def __generate_information_probability(
            self,
            related_node_target_links
    ):
        """Calculate probabilities with target links.

        It creates the entropy and probabilities from related nodes in
        whole AtomSpace.

        Args:
            related_node_target_links: Target link tuples in related node list
            :param related_node_target_links: list[list[EqualLinkKey]]
        Returns:
            The max value of n_gram.
            :rtype : int
        """
        log.debug(
            "ConnectConflictInteractionInformation: Calculating probabilities "
            "(Total: " + str(len(related_node_target_links)) + ")"
        )

        current_ratio = 0
        # Register the every link in related nodes to provider.
        for i, target_equal_link in enumerate(related_node_target_links):
            current_ratio = self.__print_progress(
                "ConnectConflictInteractionInformation:PROB:",
                current_ratio, i, len(related_node_target_links), 30
            )
            # TODO: To prevent freeze during probability generation,
            # user can limit the max value of calculation.
            max_repeat_length = self.provider.n_gram \
                if 0 < self.provider.n_gram < len(target_equal_link) \
                else len(target_equal_link)

            # Make n-gram data in each related node.
            # The provider with high n-gram will provides more correct data,
            # but speed will going slower rapidly.
            # (0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), ... (1, 1, 1)
            cartesian_binary_iterator = \
                itertools.product([False, True], repeat=max_repeat_length)

            for viable_case_binary in enumerate(cartesian_binary_iterator):
                gram_data = list()
                for j, selector in enumerate(viable_case_binary):
                    # Make each gram data.
                    if selector:
                        gram_data.append(eq_link.key_to_link(
                            self.a,
                            target_equal_link[j],
                            self.ret[0],
                            target_equal_link[j].tv
                        ))
                # Register the generated gram_data.
                self.provider.add_one_rawdata_count(
                    [data for data in gram_data if data is not None], 1
                )

        # Update provider's statistic data.
        PyProbabilityAtom().calculate_probabilities(self.provider)
        PyEntropyAtom().calculate_entropies(self.provider)
 def __print_progress(
         self, msg, current_ratio, current_count, total, step=10
 ):
     # To print(debug) progress of evaluating.
     if current_ratio < current_count:
         current_ratio += total * step * 0.01
         log.debug(
             msg + ": " + str(current_count) + "/" + str(total) +
             " (" + str(100 * current_count / float(total)) + "%)"
         )
     return current_ratio
Ejemplo n.º 3
0
 def __print_progress(self,
                      msg,
                      current_ratio,
                      current_count,
                      total,
                      step=10):
     # To print(debug) progress of evaluating.
     if current_ratio < current_count:
         current_ratio += total * step * 0.01
         log.debug(msg + ": " + str(current_count) + "/" + str(total) +
                   " (" + str(100 * current_count / float(total)) + "%)")
     return current_ratio
Ejemplo n.º 4
0
    def __generate_information_probability(self, related_node_target_links):
        """Calculate probabilities with target links.

        It creates the entropy and probabilities from related nodes in
        whole AtomSpace.

        Args:
            related_node_target_links: Target link tuples in related node list
            :param related_node_target_links: list[list[EqualLinkKey]]
        Returns:
            The max value of n_gram.
            :rtype : int
        """
        log.debug(
            "ConnectConflictInteractionInformation: Calculating probabilities "
            "(Total: " + str(len(related_node_target_links)) + ")")

        current_ratio = 0
        # Register the every link in related nodes to provider.
        for i, target_equal_link in enumerate(related_node_target_links):
            current_ratio = self.__print_progress(
                "ConnectConflictInteractionInformation:PROB:", current_ratio,
                i, len(related_node_target_links), 30)
            # TODO: To prevent freeze during probability generation,
            # user can limit the max value of calculation.
            max_repeat_length = self.provider.n_gram \
                if 0 < self.provider.n_gram < len(target_equal_link) \
                else len(target_equal_link)

            # Make n-gram data in each related node.
            # The provider with high n-gram will provides more correct data,
            # but speed will going slower rapidly.
            # (0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), ... (1, 1, 1)
            cartesian_binary_iterator = \
                itertools.product([False, True], repeat=max_repeat_length)

            for viable_case_binary in enumerate(cartesian_binary_iterator):
                gram_data = list()
                for j, selector in enumerate(viable_case_binary):
                    # Make each gram data.
                    if selector:
                        gram_data.append(
                            eq_link.key_to_link(self.a, target_equal_link[j],
                                                self.ret[0],
                                                target_equal_link[j].tv))
                # Register the generated gram_data.
                self.provider.add_one_rawdata_count(
                    [data for data in gram_data if data is not None], 1)

        # Update provider's statistic data.
        PyProbabilityAtom().calculate_probabilities(self.provider)
        PyEntropyAtom().calculate_entropies(self.provider)
Ejemplo n.º 5
0
    def __make_result_log(self, result_list, reverse):
        # To print(debug) interaction information value.
        if reverse:
            result_list = reversed(result_list)

        for i, result in enumerate(result_list):
            name = ""
            # Prints only top 5 results.
            if i < 15:
                for link in result['filtered_merged_links']:
                    for node in link.out:
                        if node.t == types.ConceptNode and node != self.ret[0]:
                            name += node.name + ", "
                log.debug(name + ": " + str(result['interaction_information']))
    def __make_result_log(self, result_list, reverse):
        # To print(debug) interaction information value.
        if reverse:
            result_list = reversed(result_list)

        for i, result in enumerate(result_list):
            name = ""
            # Prints only top 5 results.
            if i < 15:
                for link in result['filtered_merged_links']:
                    for node in link.out:
                        if node.t == types.ConceptNode and node != self.ret[0]:
                            name += node.name + ", "
                log.debug(name + ": " + str(result['interaction_information']))
Ejemplo n.º 7
0
    def __evaluate_interaction_information(
        self,
        decided_atoms,
        conflict_link_cases,
        non_conflict_link_cases,
        non_duplicate_link_cases,
    ):
        """Evaluate interaction information value for each available conflict
        link cases, and returns one link set has maximum information value.

        Args:
            decided_atoms: The source atoms to make new atom.
            conflict_link_cases: Conflicted link tuples list.
            non_conflict_link_cases: Non-conflict links list.
            non_duplicate_link_cases: Non-duplicated links list.
            :param decided_atoms: list[Atom]
            :param conflict_link_cases: list[list[EqualLinkKey]]
            :param non_conflict_link_cases: list[EqualLinkKey]
            :param non_duplicate_link_cases: list[EqualLinkKey]
        Returns:
            A link set has maximum interaction information value.
            :rtype: list[EqualLinkKey]
        """
        result_list = list()

        inheritance_nodes = list()
        for decided_atom in decided_atoms:
            inheritance_nodes += find_inheritance_nodes(self.a, decided_atom)

        # TODO: To prevent freeze during interaction information generation,
        # user can limit the max value of calculation.
        max_repeat_length = self.evaluate_n_gram_limit \
            if self.evaluate_n_gram_limit < self.provider.n_gram \
            else self.provider.n_gram

        log.debug("ConnectConflictInteractionInformation: " +
                  "Calculating interaction information " + "(Total: " +
                  str(len(conflict_link_cases)) + ")")

        current_ratio = 0
        for i, conflict_link in enumerate(conflict_link_cases):
            current_ratio = self.__print_progress(
                "ConnectConflictInteractionInformation:II:", current_ratio, i,
                len(conflict_link_cases), 25)
            merged_links = list()
            merged_links.extend(non_conflict_link_cases)
            merged_links.extend(non_duplicate_link_cases)
            merged_links.extend(conflict_link)

            # Calculate n-gram data in each available link cases.
            # The provider with high n-gram will provides more correct data,
            # but speed will going slower rapidly.
            filtered_merged_links = \
                map(lambda x:
                    eq_link.key_to_link(self.a, x, self.ret[0], x.tv),
                    filter(lambda x:
                           # TODO: Currently connector excludes
                           # an InheritanceLink to get valuable(funny) result.
                           self.a[x.h].out[0] not in inheritance_nodes and
                           # Manually throw away the links have low strength.
                           (x.tv.mean > self.inter_info_strength_above_limit),
                           merged_links
                           )
                    )

            if len(filtered_merged_links) is 0:
                continue

            interaction_information = PyInteractionInformationAtom(). \
                calculate_interaction_information(
                filtered_merged_links,
                self.provider,
                max_repeat_length
            )

            result_list.append({
                "merged_links":
                merged_links,
                "filtered_merged_links":
                filtered_merged_links,
                "interaction_information":
                interaction_information
            })

        if len(result_list) < 1:
            self.last_status = blending_status.EMPTY_RESULT
            return []

        result_list = sorted(result_list,
                             key=(lambda x: x["interaction_information"]),
                             reverse=True)

        self.__make_result_log(result_list, reverse=False)
        # self.__make_result_log(result_list, reverse=True)

        return result_list[0]['merged_links']
    def __evaluate_interaction_information(
            self,
            decided_atoms,
            conflict_link_cases,
            non_conflict_link_cases,
            non_duplicate_link_cases,
    ):
        """Evaluate interaction information value for each available conflict
        link cases, and returns one link set has maximum information value.

        Args:
            decided_atoms: The source atoms to make new atom.
            conflict_link_cases: Conflicted link tuples list.
            non_conflict_link_cases: Non-conflict links list.
            non_duplicate_link_cases: Non-duplicated links list.
            :param decided_atoms: list[Atom]
            :param conflict_link_cases: list[list[EqualLinkKey]]
            :param non_conflict_link_cases: list[EqualLinkKey]
            :param non_duplicate_link_cases: list[EqualLinkKey]
        Returns:
            A link set has maximum interaction information value.
            :rtype: list[EqualLinkKey]
        """
        result_list = list()

        inheritance_nodes = list()
        for decided_atom in decided_atoms:
            inheritance_nodes += find_inheritance_nodes(self.a, decided_atom)

        # TODO: To prevent freeze during interaction information generation,
        # user can limit the max value of calculation.
        max_repeat_length = self.evaluate_n_gram_limit \
            if self.evaluate_n_gram_limit < self.provider.n_gram \
            else self.provider.n_gram

        log.debug(
            "ConnectConflictInteractionInformation: " +
            "Calculating interaction information " +
            "(Total: " + str(len(conflict_link_cases)) + ")"
        )

        current_ratio = 0
        for i, conflict_link in enumerate(conflict_link_cases):
            current_ratio = self.__print_progress(
                "ConnectConflictInteractionInformation:II:",
                current_ratio, i, len(conflict_link_cases), 25
            )
            merged_links = list()
            merged_links.extend(non_conflict_link_cases)
            merged_links.extend(non_duplicate_link_cases)
            merged_links.extend(conflict_link)

            # Calculate n-gram data in each available link cases.
            # The provider with high n-gram will provides more correct data,
            # but speed will going slower rapidly.
            filtered_merged_links = \
                map(lambda x:
                    eq_link.key_to_link(self.a, x, self.ret[0], x.tv),
                    filter(lambda x:
                           # TODO: Currently connector excludes
                           # an InheritanceLink to get valuable(funny) result.
                           self.a[x.h].out[0] not in inheritance_nodes and
                           # Manually throw away the links have low strength.
                           (x.tv.mean > self.inter_info_strength_above_limit),
                           merged_links
                           )
                    )

            if len(filtered_merged_links) is 0:
                continue

            interaction_information = PyInteractionInformationAtom(). \
                calculate_interaction_information(
                filtered_merged_links,
                self.provider,
                max_repeat_length
            )

            result_list.append({
                "merged_links": merged_links,
                "filtered_merged_links": filtered_merged_links,
                "interaction_information": interaction_information
            })

        if len(result_list) < 1:
            self.last_status = blending_status.EMPTY_RESULT
            return []

        result_list = sorted(
            result_list,
            key=(lambda x: x["interaction_information"]),
            reverse=True
        )

        self.__make_result_log(result_list, reverse=False)
        # self.__make_result_log(result_list, reverse=True)

        return result_list[0]['merged_links']