def __generate_information_probability( self, related_node_target_links ): """Calculate probabilities with target links. It creates the entropy and probabilities from related nodes in whole AtomSpace. Args: related_node_target_links: Target link tuples in related node list :param related_node_target_links: list[list[EqualLinkKey]] Returns: The max value of n_gram. :rtype : int """ log.debug( "ConnectConflictInteractionInformation: Calculating probabilities " "(Total: " + str(len(related_node_target_links)) + ")" ) current_ratio = 0 # Register the every link in related nodes to provider. for i, target_equal_link in enumerate(related_node_target_links): current_ratio = self.__print_progress( "ConnectConflictInteractionInformation:PROB:", current_ratio, i, len(related_node_target_links), 30 ) # TODO: To prevent freeze during probability generation, # user can limit the max value of calculation. max_repeat_length = self.provider.n_gram \ if 0 < self.provider.n_gram < len(target_equal_link) \ else len(target_equal_link) # Make n-gram data in each related node. # The provider with high n-gram will provides more correct data, # but speed will going slower rapidly. # (0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), ... (1, 1, 1) cartesian_binary_iterator = \ itertools.product([False, True], repeat=max_repeat_length) for viable_case_binary in enumerate(cartesian_binary_iterator): gram_data = list() for j, selector in enumerate(viable_case_binary): # Make each gram data. if selector: gram_data.append(eq_link.key_to_link( self.a, target_equal_link[j], self.ret[0], target_equal_link[j].tv )) # Register the generated gram_data. self.provider.add_one_rawdata_count( [data for data in gram_data if data is not None], 1 ) # Update provider's statistic data. PyProbabilityAtom().calculate_probabilities(self.provider) PyEntropyAtom().calculate_entropies(self.provider)
def __print_progress( self, msg, current_ratio, current_count, total, step=10 ): # To print(debug) progress of evaluating. if current_ratio < current_count: current_ratio += total * step * 0.01 log.debug( msg + ": " + str(current_count) + "/" + str(total) + " (" + str(100 * current_count / float(total)) + "%)" ) return current_ratio
def __print_progress(self, msg, current_ratio, current_count, total, step=10): # To print(debug) progress of evaluating. if current_ratio < current_count: current_ratio += total * step * 0.01 log.debug(msg + ": " + str(current_count) + "/" + str(total) + " (" + str(100 * current_count / float(total)) + "%)") return current_ratio
def __generate_information_probability(self, related_node_target_links): """Calculate probabilities with target links. It creates the entropy and probabilities from related nodes in whole AtomSpace. Args: related_node_target_links: Target link tuples in related node list :param related_node_target_links: list[list[EqualLinkKey]] Returns: The max value of n_gram. :rtype : int """ log.debug( "ConnectConflictInteractionInformation: Calculating probabilities " "(Total: " + str(len(related_node_target_links)) + ")") current_ratio = 0 # Register the every link in related nodes to provider. for i, target_equal_link in enumerate(related_node_target_links): current_ratio = self.__print_progress( "ConnectConflictInteractionInformation:PROB:", current_ratio, i, len(related_node_target_links), 30) # TODO: To prevent freeze during probability generation, # user can limit the max value of calculation. max_repeat_length = self.provider.n_gram \ if 0 < self.provider.n_gram < len(target_equal_link) \ else len(target_equal_link) # Make n-gram data in each related node. # The provider with high n-gram will provides more correct data, # but speed will going slower rapidly. # (0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), ... (1, 1, 1) cartesian_binary_iterator = \ itertools.product([False, True], repeat=max_repeat_length) for viable_case_binary in enumerate(cartesian_binary_iterator): gram_data = list() for j, selector in enumerate(viable_case_binary): # Make each gram data. if selector: gram_data.append( eq_link.key_to_link(self.a, target_equal_link[j], self.ret[0], target_equal_link[j].tv)) # Register the generated gram_data. self.provider.add_one_rawdata_count( [data for data in gram_data if data is not None], 1) # Update provider's statistic data. PyProbabilityAtom().calculate_probabilities(self.provider) PyEntropyAtom().calculate_entropies(self.provider)
def __make_result_log(self, result_list, reverse): # To print(debug) interaction information value. if reverse: result_list = reversed(result_list) for i, result in enumerate(result_list): name = "" # Prints only top 5 results. if i < 15: for link in result['filtered_merged_links']: for node in link.out: if node.t == types.ConceptNode and node != self.ret[0]: name += node.name + ", " log.debug(name + ": " + str(result['interaction_information']))
def __evaluate_interaction_information( self, decided_atoms, conflict_link_cases, non_conflict_link_cases, non_duplicate_link_cases, ): """Evaluate interaction information value for each available conflict link cases, and returns one link set has maximum information value. Args: decided_atoms: The source atoms to make new atom. conflict_link_cases: Conflicted link tuples list. non_conflict_link_cases: Non-conflict links list. non_duplicate_link_cases: Non-duplicated links list. :param decided_atoms: list[Atom] :param conflict_link_cases: list[list[EqualLinkKey]] :param non_conflict_link_cases: list[EqualLinkKey] :param non_duplicate_link_cases: list[EqualLinkKey] Returns: A link set has maximum interaction information value. :rtype: list[EqualLinkKey] """ result_list = list() inheritance_nodes = list() for decided_atom in decided_atoms: inheritance_nodes += find_inheritance_nodes(self.a, decided_atom) # TODO: To prevent freeze during interaction information generation, # user can limit the max value of calculation. max_repeat_length = self.evaluate_n_gram_limit \ if self.evaluate_n_gram_limit < self.provider.n_gram \ else self.provider.n_gram log.debug("ConnectConflictInteractionInformation: " + "Calculating interaction information " + "(Total: " + str(len(conflict_link_cases)) + ")") current_ratio = 0 for i, conflict_link in enumerate(conflict_link_cases): current_ratio = self.__print_progress( "ConnectConflictInteractionInformation:II:", current_ratio, i, len(conflict_link_cases), 25) merged_links = list() merged_links.extend(non_conflict_link_cases) merged_links.extend(non_duplicate_link_cases) merged_links.extend(conflict_link) # Calculate n-gram data in each available link cases. # The provider with high n-gram will provides more correct data, # but speed will going slower rapidly. filtered_merged_links = \ map(lambda x: eq_link.key_to_link(self.a, x, self.ret[0], x.tv), filter(lambda x: # TODO: Currently connector excludes # an InheritanceLink to get valuable(funny) result. self.a[x.h].out[0] not in inheritance_nodes and # Manually throw away the links have low strength. (x.tv.mean > self.inter_info_strength_above_limit), merged_links ) ) if len(filtered_merged_links) is 0: continue interaction_information = PyInteractionInformationAtom(). \ calculate_interaction_information( filtered_merged_links, self.provider, max_repeat_length ) result_list.append({ "merged_links": merged_links, "filtered_merged_links": filtered_merged_links, "interaction_information": interaction_information }) if len(result_list) < 1: self.last_status = blending_status.EMPTY_RESULT return [] result_list = sorted(result_list, key=(lambda x: x["interaction_information"]), reverse=True) self.__make_result_log(result_list, reverse=False) # self.__make_result_log(result_list, reverse=True) return result_list[0]['merged_links']
def __evaluate_interaction_information( self, decided_atoms, conflict_link_cases, non_conflict_link_cases, non_duplicate_link_cases, ): """Evaluate interaction information value for each available conflict link cases, and returns one link set has maximum information value. Args: decided_atoms: The source atoms to make new atom. conflict_link_cases: Conflicted link tuples list. non_conflict_link_cases: Non-conflict links list. non_duplicate_link_cases: Non-duplicated links list. :param decided_atoms: list[Atom] :param conflict_link_cases: list[list[EqualLinkKey]] :param non_conflict_link_cases: list[EqualLinkKey] :param non_duplicate_link_cases: list[EqualLinkKey] Returns: A link set has maximum interaction information value. :rtype: list[EqualLinkKey] """ result_list = list() inheritance_nodes = list() for decided_atom in decided_atoms: inheritance_nodes += find_inheritance_nodes(self.a, decided_atom) # TODO: To prevent freeze during interaction information generation, # user can limit the max value of calculation. max_repeat_length = self.evaluate_n_gram_limit \ if self.evaluate_n_gram_limit < self.provider.n_gram \ else self.provider.n_gram log.debug( "ConnectConflictInteractionInformation: " + "Calculating interaction information " + "(Total: " + str(len(conflict_link_cases)) + ")" ) current_ratio = 0 for i, conflict_link in enumerate(conflict_link_cases): current_ratio = self.__print_progress( "ConnectConflictInteractionInformation:II:", current_ratio, i, len(conflict_link_cases), 25 ) merged_links = list() merged_links.extend(non_conflict_link_cases) merged_links.extend(non_duplicate_link_cases) merged_links.extend(conflict_link) # Calculate n-gram data in each available link cases. # The provider with high n-gram will provides more correct data, # but speed will going slower rapidly. filtered_merged_links = \ map(lambda x: eq_link.key_to_link(self.a, x, self.ret[0], x.tv), filter(lambda x: # TODO: Currently connector excludes # an InheritanceLink to get valuable(funny) result. self.a[x.h].out[0] not in inheritance_nodes and # Manually throw away the links have low strength. (x.tv.mean > self.inter_info_strength_above_limit), merged_links ) ) if len(filtered_merged_links) is 0: continue interaction_information = PyInteractionInformationAtom(). \ calculate_interaction_information( filtered_merged_links, self.provider, max_repeat_length ) result_list.append({ "merged_links": merged_links, "filtered_merged_links": filtered_merged_links, "interaction_information": interaction_information }) if len(result_list) < 1: self.last_status = blending_status.EMPTY_RESULT return [] result_list = sorted( result_list, key=(lambda x: x["interaction_information"]), reverse=True ) self.__make_result_log(result_list, reverse=False) # self.__make_result_log(result_list, reverse=True) return result_list[0]['merged_links']