def init_first_row(self, positive: bool): for i in range(len(self.sequence)): covering = None was = False # TODO: dictionary can fasten this search for rule in self.grammar.get_rules(): if rule.right1 == self.sequence[i]: was = True self.__init_cell(i, rule) rule.tmp_used = True self.rules_table[0][i].append(sCellRule(rule)) if not was and positive: if len(self.sequence) > 1: covering = self.terminal_covering elif self.__settings.get_value( 'covering', 'is_start_covering_allowed') == "True": covering = self.start_covering if covering is not None: new_rule = covering.add_new_rule(self.grammar, self.sequence[i]) self.__generated_rules_count += 1 new_rule.tmp_used = True self.__init_cell(i, new_rule) self.rules_table[0][i].append(sCellRule(new_rule)) self.cells_parsing_states_table[0][i] = True
def __init_first_row(cyk_values: CykValues, is_sentence_positive: bool, settings: Settings): for i in range(len(cyk_values.sequence)): covering = None was = False # TODO: dictionary can fasten this search for rule in cyk_values.grammar.get_rules(): if rule.right1 == cyk_values.sequence[i]: rule: sRule = rule was = True __init_cell(cyk_values, i, rule) rule.tmp_used = True cyk_values.rules_table[0][i].append(sCellRule(rule)) if not was and is_sentence_positive: if len(cyk_values.sequence) > 1: covering = cyk_values.terminal_covering elif settings.get_value('covering', 'is_start_covering_allowed') == "True": covering = cyk_values.start_covering if covering is not None: new_rule = covering.add_new_rule(cyk_values.grammar, cyk_values.sequence[i]) new_rule.tmp_used = True __init_cell(cyk_values, i, new_rule) cyk_values.rules_table[0][i].append(sCellRule(new_rule)) cyk_values.parallelData.cyk_parsed_cells['{}{}'.format(0, i)] = True cyk_values.parallelData.cyk_rules_for_cell['{}{}'.format(0, i)] = cyk_values.rules_table[0][i] cyk_values.parallelData.cyk_probability_array['{}{}'.format(0, i)] = cyk_values.probability_array[0][i]
def __compute_rule(j, pr, rt, rules, sentence, pc): i = 1 while j + i < len(sentence): rls = list(rules) for k in range(i): for rule in rls: if rule.right2 is not None: first_rule_index = rule.right1.index second_rule_index = rule.right2.index while not pc['{}{}'.format(k, j)] and not pc['{}{}'.format(i - k - 1, j + k + 1)]: continue first_parent_prob = pr['{}{}'.format(k, j)][first_rule_index] second_parent_prob = pr['{}{}'.format(i - k - 1, j + k + 1)][second_rule_index] if first_parent_prob is not None and second_parent_prob is not None: rule.tmp_used = True rule_left_index = rule.left.index current_cell_probability = pr['{}{}'.format(i, j)] current_cell_probability[rule_left_index] = Stochastic.new_calculate_cell('BaumWelch', None, first_parent_prob, second_parent_prob, current_cell_probability[rule_left_index], rule) pr['{}{}'.format(i, j)] = current_cell_probability new_rule = sCellRule(rule, Coordinates(k, j), Coordinates(i - k - 1, j + k + 1)) ru_table = rt['{}{}'.format(i, j)] ru_table.append(new_rule) rt['{}{}'.format(i, j)] = ru_table pc['{}{}'.format(i, j)] = True i = i + 1
def __handle_rule(i, j, cyk_probability_array, cyk_rules_for_cell, rules_list, values): rules = list(rules_list) for rule in rules: for k in range(i): fp = cyk_probability_array['{}{}'.format(k, j)] sp = cyk_probability_array['{}{}'.format(i - k - 1, j + k + 1)] cp = cyk_probability_array['{}{}'.format(i, j)] if rule.right2 is not None: first_rule_index = rule.right1.index second_rule_index = rule.right2.index if fp[first_rule_index] is not None and sp[ second_rule_index] is not None: rule.tmp_used = True rule_left_index = rule.left.index cp[rule_left_index] = __calculate_cell( fp[first_rule_index], sp[second_rule_index], cp[rule.left.index], rule) cyk_probability_array['{}{}'.format(i, j)] = cp new_rule = sCellRule(rule, Coordinates(k, j), Coordinates(i - k - 1, j + k + 1)) rt = cyk_rules_for_cell['{}{}'.format(i, j)] rt.append(new_rule) cyk_rules_for_cell['{}{}'.format(i, j)] = rt
def __apply_aggressive_and_final_covering(cyk_values: CykValues, i: int, j: int): """ Performs aggressive or final covering on the given cell of the cyk table :param i: :param j: :return: """ new_rule = None valid_combinations_of_indexes = [] for m in range(i): tmp_symbols_1 = __get_cell_symbols(cyk_values, m, j) tmp_symbols_2 = __get_cell_symbols(cyk_values, i - m - 1, j + m + 1) if len(tmp_symbols_1) > 0 and len(tmp_symbols_2) > 0: valid_combinations_of_indexes.append(m) if len(valid_combinations_of_indexes) > 0: random = randint(0, len(valid_combinations_of_indexes) - 1) symbols_1 = __get_cell_symbols(cyk_values, valid_combinations_of_indexes[random], j) symbols_2 = __get_cell_symbols( cyk_values, i - valid_combinations_of_indexes[random] - 1, j + valid_combinations_of_indexes[random] + 1) index_1 = randint(0, len(symbols_1) - 1) index_2 = randint(0, len(symbols_2) - 1) if i is not len(cyk_values.sequence) - 1: if RandomUtils.make_random_decision_with_probability( float( cyk_values.settings.get_value( 'covering', 'aggressive_covering_probability'))): covering = cyk_values.aggressive_covering new_rule = covering.add_new_rule(cyk_values.grammar, symbols_1[index_1], symbols_2[index_2]) elif cyk_values.settings.get_value( 'covering', 'is_full_covering_allowed') == "True": covering = cyk_values.final_covering new_rule = covering.add_new_rule(cyk_values.grammar, symbols_1[index_1], symbols_2[index_2]) if new_rule is not None: new_rule.tmp_used = True new_cell_rule = sCellRule( new_rule, Coordinates(valid_combinations_of_indexes[random], j), Coordinates(i - valid_combinations_of_indexes[random] - 1, j + valid_combinations_of_indexes[random] + 1)) cyk_values.rules_table[i][j].append(new_cell_rule) __calculate_cell( cyk_values, cyk_values.probability_array[ valid_combinations_of_indexes[random]][j][ new_rule.right1.index], cyk_values.probability_array[ i - valid_combinations_of_indexes[random] - 1][j + valid_combinations_of_indexes[random] + 1][new_rule.right2.index], cyk_values.probability_array[i][j][new_rule.left.index], new_rule)
def __apply_aggressive_and_final_covering(self, i: int, j: int): """ Performs aggressive or final covering on the given cell of the cyk table :param i: :param j: :return: """ new_rule = None valid_combinations_of_indexes = [] for m in self.iteration_generator(i): tmp_symbols_1 = self.__get_cell_symbols(m, j) tmp_symbols_2 = self.__get_cell_symbols(i - m - 1, j + m + 1) if len(tmp_symbols_1) > 0 and len(tmp_symbols_2) > 0: valid_combinations_of_indexes.append(m) if len(valid_combinations_of_indexes) > 0: random = randint(0, len(valid_combinations_of_indexes) - 1) symbols_1 = self.__get_cell_symbols( valid_combinations_of_indexes[random], j) symbols_2 = self.__get_cell_symbols( i - valid_combinations_of_indexes[random] - 1, j + valid_combinations_of_indexes[random] + 1) index_1 = randint(0, len(symbols_1) - 1) index_2 = randint(0, len(symbols_2) - 1) # print("Need rule: {}". format(symbols_1[index_1], symbols_2[index_2])) if i is not len(self.sequence) - 1: if RandomUtils.make_random_decision_with_probability( float( self.__settings.get_value( 'covering', 'aggressive_covering_probability'))): covering = self.aggressive_covering new_rule = covering.add_new_rule(self.grammar, symbols_1[index_1], symbols_2[index_2]) elif self.__settings.get_value( 'covering', 'is_full_covering_allowed') == "True": covering = self.final_covering new_rule = covering.add_new_rule(self.grammar, symbols_1[index_1], symbols_2[index_2]) if new_rule is not None: self.__generated_rules_count += 1 new_rule.tmp_used = True new_cell_rule = sCellRule( new_rule, Coordinates(valid_combinations_of_indexes[random], j), Coordinates(i - valid_combinations_of_indexes[random] - 1, j + valid_combinations_of_indexes[random] + 1)) self.rules_table[i][j].append(new_cell_rule) self.__Stochastic.calculate_cell( self.mode, self.default_value, self.probability_array, Coordinates(valid_combinations_of_indexes[random], j), Coordinates(i - valid_combinations_of_indexes[random] - 1, j + valid_combinations_of_indexes[random] + 1), Coordinates(i, j), new_rule)
def __compute_rule(jobs_queue, pc, pr, rt, rules): while True: try: cell_rule_indexes = jobs_queue.get(block=False) except: return None if cell_rule_indexes is None: return None i = cell_rule_indexes.i j = cell_rule_indexes.j rls = list(rules) for k in range(i): for rule in rls: if rule.right2 is not None: first_rule_index = rule.right1.index second_rule_index = rule.right2.index while not pc['{}{}'.format(k, j)] and not pc['{}{}'.format( i - k - 1, j + k + 1)]: continue first_parent_prob = pr['{}{}'.format(k, j)][first_rule_index] second_parent_prob = pr['{}{}'.format( i - k - 1, j + k + 1)][second_rule_index] if first_parent_prob is not None and second_parent_prob is not None: rule.tmp_used = True rule_left_index = rule.left.index current_cell_probability = pr['{}{}'.format(i, j)] current_cell_probability[ rule_left_index] = Stochastic.new_calculate_cell( 'BaumWelch', None, first_parent_prob, second_parent_prob, current_cell_probability[rule_left_index], rule) pr['{}{}'.format(i, j)] = current_cell_probability new_rule = sCellRule(rule, Coordinates(k, j), Coordinates(i - k - 1, j + k + 1)) ru_table = rt['{}{}'.format(i, j)] ru_table.append(new_rule) rt['{}{}'.format(i, j)] = ru_table pc['{}{}'.format(i, j)] = True
def parse_sentence(self, sequence: str, positive: bool, covering_on, negative_covering): self.__logger.info( 'Parsing sentence {0}. Belongs to grammar: {1}'.format( sequence, positive)) sequence_length = len(sequence) self._init_probability_array(sequence_length, len(self.grammar.nonTerminalSymbols)) self._init_rules_table(sequence_length) self.init_first_row(positive) s_time = time.time() # Iterate through upper triangle of the cyk matrix for i in self.iteration_generator(sequence_length): for j in self.iteration_generator(sequence_length - i): for k in self.iteration_generator(i): for rule in self.grammar.get_rules(): if rule.right2 is not None: first_rule_index = rule.right1.index second_rule_index = rule.right2.index if self.probability_array[k][j][first_rule_index] is not None \ and self.probability_array[i - k - 1][j + k + 1][second_rule_index] is not None: rule.tmp_used = True rule_left_index = rule.left.index parent_cell_probability = self.probability_array[ k][j][first_rule_index] parent_cell_2_probability = \ self.probability_array[i - k - 1][j + k + 1][second_rule_index] current_cell_probability = self.probability_array[ i][j][rule_left_index] self.probability_array[i][j][rule_left_index] = \ self.__Stochastic.new_calculate_cell(self.mode, self.default_value, parent_cell_probability, parent_cell_2_probability, current_cell_probability, rule) new_rule = sCellRule( rule, Coordinates(k, j), Coordinates(i - k - 1, j + k + 1)) self.rules_table[i][j].append(new_rule) # Check if probability for cell found is_rule_occured = self.__find_if_non_terminal_or_start_rule_occured_in_cell( i, j) # Aggresive and final covering if not is_rule_occured and positive and covering_on and \ self.settings.get_value('covering', 'is_full_covering_allowed') == "True": self.__apply_aggressive_and_final_covering(i, j)
def start_execution(self): while True: job: CykIndexes = self.jobs_storage_proxy.get_cell_job().get() if job is None: return self.rules_prob_proxy.get_rule_to_add(), self.rules_prob_proxy.get_rules_table(), self.rules_prob_proxy.get_probability_array() i = job.i j = job.j was = False for k in range(i): cell_up_state = self.parsing_state_proxy.get_cell_state(k, j).get() cell_cross_state = self.parsing_state_proxy.get_cell_state(i - k - 1, j + k + 1).get() while cell_up_state is False and cell_cross_state is False: cell_up_state = self.parsing_state_proxy.get_cell_state(k, j).get() cell_cross_state = self.parsing_state_proxy.get_cell_state(i - k - 1, j + k + 1).get() rules = self.rules_storage_proxy.get_rules().get() probability_array = self.rules_prob_proxy.get_probability_array().get() for rule in rules: if rule.right2 is not None: first_rule_index = rule.right1.index second_rule_index = rule.right2.index if probability_array[k][j][first_rule_index] is not None \ and probability_array[i - k - 1][j + k + 1][second_rule_index] is not None: rule.tmp_used = True rule_left_index = rule.left.index was = True parent_cell_probability = probability_array[k][j][first_rule_index] parent_cell_2_probability = \ probability_array[i - k - 1][j + k + 1][second_rule_index] current_cell_probability = probability_array[i][j][rule_left_index] current_cell_probability = Stochastic.new_calculate_cell('BaumWelch', None, parent_cell_probability, parent_cell_2_probability, current_cell_probability, rule) new_rule = sCellRule(rule, Coordinates(k, j), Coordinates(i - k - 1, j + k + 1)) self.rules_prob_proxy.update_rules_table(i, j, new_rule) self.rules_prob_proxy.update_probability_array(i, j, rule_left_index, current_cell_probability) if not was: self.rules_prob_proxy.add_rule_to_add([i, j]) self.parsing_state_proxy.update_cell_state(i, j)
def new_first_row_init(cyk_values: CykValues): for i in iteration_generator(len(cyk_values.sequence)): grammar_rules = list(cyk_values.grammar.get_rules()) rules = [ init_cell(cyk_values, i, rule) for rule in rules_generator(grammar_rules) if rule.right1 == cyk_values.sequence[i] ] if len(rules) > 0: cyk_values.rules_table[0][i].cell_rules.extend(rules) elif cyk_values.positive: covering = choose_first_row_covering(cyk_values) if covering is not None: new_rule = covering.add_new_rule(cyk_values.grammar, cyk_values.sequence[i]) init_cell(cyk_values, i, new_rule) cyk_values.rules_table[0][i].cell_rules.append( sCellRule(new_rule)) cyk_values.rules_table[0][i].parsed = True cyk_values.parallelData.cyk_parsed_cells['{}{}'.format(0, i)] = 1 cyk_values.parallelData.cyk_probability_array['{}{}'.format( 0, i)] = cyk_values.probability_array[0][i]