Example #1
0
    def init_first_row(self, positive: bool):
        for i in range(len(self.sequence)):
            covering = None
            was = False
            # TODO: dictionary can fasten this search
            for rule in self.grammar.get_rules():
                if rule.right1 == self.sequence[i]:
                    was = True
                    self.__init_cell(i, rule)
                    rule.tmp_used = True
                    self.rules_table[0][i].append(sCellRule(rule))
            if not was and positive:
                if len(self.sequence) > 1:
                    covering = self.terminal_covering
                elif self.__settings.get_value(
                        'covering', 'is_start_covering_allowed') == "True":
                    covering = self.start_covering

                if covering is not None:
                    new_rule = covering.add_new_rule(self.grammar,
                                                     self.sequence[i])
                    self.__generated_rules_count += 1
                    new_rule.tmp_used = True
                    self.__init_cell(i, new_rule)
                    self.rules_table[0][i].append(sCellRule(new_rule))
            self.cells_parsing_states_table[0][i] = True
Example #2
0
def __init_first_row(cyk_values: CykValues, is_sentence_positive: bool, settings: Settings):
    for i in range(len(cyk_values.sequence)):
        covering = None
        was = False
        # TODO: dictionary can fasten this search
        for rule in cyk_values.grammar.get_rules():
            if rule.right1 == cyk_values.sequence[i]:
                rule: sRule = rule
                was = True
                __init_cell(cyk_values, i, rule)
                rule.tmp_used = True
                cyk_values.rules_table[0][i].append(sCellRule(rule))
        if not was and is_sentence_positive:
            if len(cyk_values.sequence) > 1:
                covering = cyk_values.terminal_covering
            elif settings.get_value('covering', 'is_start_covering_allowed') == "True":
                covering = cyk_values.start_covering

            if covering is not None:
                new_rule = covering.add_new_rule(cyk_values.grammar, cyk_values.sequence[i])
                new_rule.tmp_used = True
                __init_cell(cyk_values, i, new_rule)
                cyk_values.rules_table[0][i].append(sCellRule(new_rule))
        cyk_values.parallelData.cyk_parsed_cells['{}{}'.format(0, i)] = True
        cyk_values.parallelData.cyk_rules_for_cell['{}{}'.format(0, i)] = cyk_values.rules_table[0][i]
        cyk_values.parallelData.cyk_probability_array['{}{}'.format(0, i)] = cyk_values.probability_array[0][i]
Example #3
0
def __compute_rule(j, pr, rt, rules, sentence, pc):
    i = 1

    while j + i < len(sentence):
        rls = list(rules)
        for k in range(i):
            for rule in rls:
                if rule.right2 is not None:
                    first_rule_index = rule.right1.index
                    second_rule_index = rule.right2.index

                    while not pc['{}{}'.format(k, j)] and not pc['{}{}'.format(i - k - 1, j + k + 1)]:
                        continue

                    first_parent_prob = pr['{}{}'.format(k, j)][first_rule_index]
                    second_parent_prob = pr['{}{}'.format(i - k - 1, j + k + 1)][second_rule_index]

                    if first_parent_prob is not None and second_parent_prob is not None:
                        rule.tmp_used = True
                        rule_left_index = rule.left.index

                        current_cell_probability = pr['{}{}'.format(i, j)]

                        current_cell_probability[rule_left_index] = Stochastic.new_calculate_cell('BaumWelch', None,
                                                                                                  first_parent_prob,
                                                                                                  second_parent_prob,
                                                                                                  current_cell_probability[rule_left_index],
                                                                                                  rule)
                        pr['{}{}'.format(i, j)] = current_cell_probability
                        new_rule = sCellRule(rule, Coordinates(k, j), Coordinates(i - k - 1, j + k + 1))
                        ru_table = rt['{}{}'.format(i, j)]
                        ru_table.append(new_rule)
                        rt['{}{}'.format(i, j)] = ru_table
        pc['{}{}'.format(i, j)] = True
        i = i + 1
Example #4
0
def __handle_rule(i, j, cyk_probability_array, cyk_rules_for_cell, rules_list,
                  values):
    rules = list(rules_list)
    for rule in rules:
        for k in range(i):
            fp = cyk_probability_array['{}{}'.format(k, j)]
            sp = cyk_probability_array['{}{}'.format(i - k - 1, j + k + 1)]
            cp = cyk_probability_array['{}{}'.format(i, j)]
            if rule.right2 is not None:
                first_rule_index = rule.right1.index
                second_rule_index = rule.right2.index
                if fp[first_rule_index] is not None and sp[
                        second_rule_index] is not None:
                    rule.tmp_used = True
                    rule_left_index = rule.left.index

                    cp[rule_left_index] = __calculate_cell(
                        fp[first_rule_index], sp[second_rule_index],
                        cp[rule.left.index], rule)
                    cyk_probability_array['{}{}'.format(i, j)] = cp
                    new_rule = sCellRule(rule, Coordinates(k, j),
                                         Coordinates(i - k - 1, j + k + 1))
                    rt = cyk_rules_for_cell['{}{}'.format(i, j)]
                    rt.append(new_rule)
                    cyk_rules_for_cell['{}{}'.format(i, j)] = rt
Example #5
0
def __apply_aggressive_and_final_covering(cyk_values: CykValues, i: int,
                                          j: int):
    """
    Performs aggressive or final covering on the given cell of the cyk table
    :param i:
    :param j:
    :return:
    """
    new_rule = None
    valid_combinations_of_indexes = []
    for m in range(i):
        tmp_symbols_1 = __get_cell_symbols(cyk_values, m, j)
        tmp_symbols_2 = __get_cell_symbols(cyk_values, i - m - 1, j + m + 1)
        if len(tmp_symbols_1) > 0 and len(tmp_symbols_2) > 0:
            valid_combinations_of_indexes.append(m)
    if len(valid_combinations_of_indexes) > 0:
        random = randint(0, len(valid_combinations_of_indexes) - 1)
        symbols_1 = __get_cell_symbols(cyk_values,
                                       valid_combinations_of_indexes[random],
                                       j)
        symbols_2 = __get_cell_symbols(
            cyk_values, i - valid_combinations_of_indexes[random] - 1,
            j + valid_combinations_of_indexes[random] + 1)
        index_1 = randint(0, len(symbols_1) - 1)
        index_2 = randint(0, len(symbols_2) - 1)
        if i is not len(cyk_values.sequence) - 1:
            if RandomUtils.make_random_decision_with_probability(
                    float(
                        cyk_values.settings.get_value(
                            'covering', 'aggressive_covering_probability'))):
                covering = cyk_values.aggressive_covering
                new_rule = covering.add_new_rule(cyk_values.grammar,
                                                 symbols_1[index_1],
                                                 symbols_2[index_2])
        elif cyk_values.settings.get_value(
                'covering', 'is_full_covering_allowed') == "True":
            covering = cyk_values.final_covering
            new_rule = covering.add_new_rule(cyk_values.grammar,
                                             symbols_1[index_1],
                                             symbols_2[index_2])
        if new_rule is not None:
            new_rule.tmp_used = True
            new_cell_rule = sCellRule(
                new_rule, Coordinates(valid_combinations_of_indexes[random],
                                      j),
                Coordinates(i - valid_combinations_of_indexes[random] - 1,
                            j + valid_combinations_of_indexes[random] + 1))
            cyk_values.rules_table[i][j].append(new_cell_rule)
            __calculate_cell(
                cyk_values, cyk_values.probability_array[
                    valid_combinations_of_indexes[random]][j][
                        new_rule.right1.index], cyk_values.probability_array[
                            i - valid_combinations_of_indexes[random] -
                            1][j + valid_combinations_of_indexes[random] +
                               1][new_rule.right2.index],
                cyk_values.probability_array[i][j][new_rule.left.index],
                new_rule)
Example #6
0
 def __apply_aggressive_and_final_covering(self, i: int, j: int):
     """
     Performs aggressive or final covering on the given cell of the cyk table
     :param i:
     :param j:
     :return:
     """
     new_rule = None
     valid_combinations_of_indexes = []
     for m in self.iteration_generator(i):
         tmp_symbols_1 = self.__get_cell_symbols(m, j)
         tmp_symbols_2 = self.__get_cell_symbols(i - m - 1, j + m + 1)
         if len(tmp_symbols_1) > 0 and len(tmp_symbols_2) > 0:
             valid_combinations_of_indexes.append(m)
     if len(valid_combinations_of_indexes) > 0:
         random = randint(0, len(valid_combinations_of_indexes) - 1)
         symbols_1 = self.__get_cell_symbols(
             valid_combinations_of_indexes[random], j)
         symbols_2 = self.__get_cell_symbols(
             i - valid_combinations_of_indexes[random] - 1,
             j + valid_combinations_of_indexes[random] + 1)
         index_1 = randint(0, len(symbols_1) - 1)
         index_2 = randint(0, len(symbols_2) - 1)
         # print("Need rule: {}". format(symbols_1[index_1], symbols_2[index_2]))
         if i is not len(self.sequence) - 1:
             if RandomUtils.make_random_decision_with_probability(
                     float(
                         self.__settings.get_value(
                             'covering',
                             'aggressive_covering_probability'))):
                 covering = self.aggressive_covering
                 new_rule = covering.add_new_rule(self.grammar,
                                                  symbols_1[index_1],
                                                  symbols_2[index_2])
         elif self.__settings.get_value(
                 'covering', 'is_full_covering_allowed') == "True":
             covering = self.final_covering
             new_rule = covering.add_new_rule(self.grammar,
                                              symbols_1[index_1],
                                              symbols_2[index_2])
         if new_rule is not None:
             self.__generated_rules_count += 1
             new_rule.tmp_used = True
             new_cell_rule = sCellRule(
                 new_rule,
                 Coordinates(valid_combinations_of_indexes[random], j),
                 Coordinates(i - valid_combinations_of_indexes[random] - 1,
                             j + valid_combinations_of_indexes[random] + 1))
             self.rules_table[i][j].append(new_cell_rule)
             self.__Stochastic.calculate_cell(
                 self.mode, self.default_value, self.probability_array,
                 Coordinates(valid_combinations_of_indexes[random], j),
                 Coordinates(i - valid_combinations_of_indexes[random] - 1,
                             j + valid_combinations_of_indexes[random] + 1),
                 Coordinates(i, j), new_rule)
Example #7
0
def __compute_rule(jobs_queue, pc, pr, rt, rules):
    while True:
        try:
            cell_rule_indexes = jobs_queue.get(block=False)
        except:
            return None

        if cell_rule_indexes is None:
            return None

        i = cell_rule_indexes.i
        j = cell_rule_indexes.j

        rls = list(rules)
        for k in range(i):
            for rule in rls:
                if rule.right2 is not None:
                    first_rule_index = rule.right1.index
                    second_rule_index = rule.right2.index

                    while not pc['{}{}'.format(k, j)] and not pc['{}{}'.format(
                            i - k - 1, j + k + 1)]:
                        continue

                    first_parent_prob = pr['{}{}'.format(k,
                                                         j)][first_rule_index]
                    second_parent_prob = pr['{}{}'.format(
                        i - k - 1, j + k + 1)][second_rule_index]

                    if first_parent_prob is not None and second_parent_prob is not None:
                        rule.tmp_used = True
                        rule_left_index = rule.left.index

                        current_cell_probability = pr['{}{}'.format(i, j)]

                        current_cell_probability[
                            rule_left_index] = Stochastic.new_calculate_cell(
                                'BaumWelch', None, first_parent_prob,
                                second_parent_prob,
                                current_cell_probability[rule_left_index],
                                rule)
                        pr['{}{}'.format(i, j)] = current_cell_probability
                        new_rule = sCellRule(rule, Coordinates(k, j),
                                             Coordinates(i - k - 1, j + k + 1))
                        ru_table = rt['{}{}'.format(i, j)]
                        ru_table.append(new_rule)
                        rt['{}{}'.format(i, j)] = ru_table
        pc['{}{}'.format(i, j)] = True
Example #8
0
    def parse_sentence(self, sequence: str, positive: bool, covering_on,
                       negative_covering):

        self.__logger.info(
            'Parsing sentence {0}. Belongs to grammar: {1}'.format(
                sequence, positive))

        sequence_length = len(sequence)
        self._init_probability_array(sequence_length,
                                     len(self.grammar.nonTerminalSymbols))
        self._init_rules_table(sequence_length)
        self.init_first_row(positive)

        s_time = time.time()
        # Iterate through upper triangle of the cyk matrix
        for i in self.iteration_generator(sequence_length):
            for j in self.iteration_generator(sequence_length - i):
                for k in self.iteration_generator(i):
                    for rule in self.grammar.get_rules():
                        if rule.right2 is not None:
                            first_rule_index = rule.right1.index
                            second_rule_index = rule.right2.index
                            if self.probability_array[k][j][first_rule_index] is not None \
                                    and self.probability_array[i - k - 1][j + k + 1][second_rule_index] is not None:
                                rule.tmp_used = True
                                rule_left_index = rule.left.index

                                parent_cell_probability = self.probability_array[
                                    k][j][first_rule_index]
                                parent_cell_2_probability = \
                                    self.probability_array[i - k - 1][j + k + 1][second_rule_index]
                                current_cell_probability = self.probability_array[
                                    i][j][rule_left_index]

                                self.probability_array[i][j][rule_left_index] = \
                                    self.__Stochastic.new_calculate_cell(self.mode, self.default_value, parent_cell_probability, parent_cell_2_probability,
                                                              current_cell_probability, rule)
                                new_rule = sCellRule(
                                    rule, Coordinates(k, j),
                                    Coordinates(i - k - 1, j + k + 1))
                                self.rules_table[i][j].append(new_rule)
                # Check if probability for cell found
                is_rule_occured = self.__find_if_non_terminal_or_start_rule_occured_in_cell(
                    i, j)
                # Aggresive and final covering
                if not is_rule_occured and positive and covering_on and \
                                self.settings.get_value('covering', 'is_full_covering_allowed') == "True":
                    self.__apply_aggressive_and_final_covering(i, j)
Example #9
0
    def start_execution(self):
        while True:
            job: CykIndexes = self.jobs_storage_proxy.get_cell_job().get()

            if job is None:
                return self.rules_prob_proxy.get_rule_to_add(), self.rules_prob_proxy.get_rules_table(), self.rules_prob_proxy.get_probability_array()
            i = job.i
            j = job.j
            was = False
            for k in range(i):

                cell_up_state = self.parsing_state_proxy.get_cell_state(k, j).get()
                cell_cross_state = self.parsing_state_proxy.get_cell_state(i - k - 1, j + k + 1).get()

                while cell_up_state is False and cell_cross_state is False:
                    cell_up_state = self.parsing_state_proxy.get_cell_state(k, j).get()
                    cell_cross_state = self.parsing_state_proxy.get_cell_state(i - k - 1, j + k + 1).get()

                rules = self.rules_storage_proxy.get_rules().get()
                probability_array = self.rules_prob_proxy.get_probability_array().get()

                for rule in rules:
                    if rule.right2 is not None:
                        first_rule_index = rule.right1.index
                        second_rule_index = rule.right2.index
                        if probability_array[k][j][first_rule_index] is not None \
                                and probability_array[i - k - 1][j + k + 1][second_rule_index] is not None:
                            rule.tmp_used = True
                            rule_left_index = rule.left.index
                            was = True
                            parent_cell_probability = probability_array[k][j][first_rule_index]
                            parent_cell_2_probability = \
                                probability_array[i - k - 1][j + k + 1][second_rule_index]
                            current_cell_probability = probability_array[i][j][rule_left_index]
                            current_cell_probability = Stochastic.new_calculate_cell('BaumWelch', None, parent_cell_probability,
                                                          parent_cell_2_probability,
                                                          current_cell_probability,
                                                          rule)
                            new_rule = sCellRule(rule, Coordinates(k, j), Coordinates(i - k - 1, j + k + 1))
                            self.rules_prob_proxy.update_rules_table(i, j, new_rule)
                            self.rules_prob_proxy.update_probability_array(i, j, rule_left_index, current_cell_probability)
            if not was:
                self.rules_prob_proxy.add_rule_to_add([i, j])
            self.parsing_state_proxy.update_cell_state(i, j)
Example #10
0
def new_first_row_init(cyk_values: CykValues):
    for i in iteration_generator(len(cyk_values.sequence)):
        grammar_rules = list(cyk_values.grammar.get_rules())
        rules = [
            init_cell(cyk_values, i, rule)
            for rule in rules_generator(grammar_rules)
            if rule.right1 == cyk_values.sequence[i]
        ]
        if len(rules) > 0:
            cyk_values.rules_table[0][i].cell_rules.extend(rules)
        elif cyk_values.positive:
            covering = choose_first_row_covering(cyk_values)
            if covering is not None:
                new_rule = covering.add_new_rule(cyk_values.grammar,
                                                 cyk_values.sequence[i])
                init_cell(cyk_values, i, new_rule)
                cyk_values.rules_table[0][i].cell_rules.append(
                    sCellRule(new_rule))
        cyk_values.rules_table[0][i].parsed = True
        cyk_values.parallelData.cyk_parsed_cells['{}{}'.format(0, i)] = 1
        cyk_values.parallelData.cyk_probability_array['{}{}'.format(
            0, i)] = cyk_values.probability_array[0][i]