Example #1
0
    def unite_recursively(self):
        threshold = constants.THRESHOLD_FOR_REMOVING_FROM_PART_OF  # 0.2くらい
        try:
            cluster_a, cluster_b = self._find_clusters_to_be_united()
        except ValueError:  # []つまり統合すべきタスクなし
            return

        united_new_cluster = cluster_a.union(cluster_b)
        not_shared_tasks = cluster_a.difference(cluster_b).union(
            cluster_b.difference(cluster_a))

        evaluator = TaskGraphEvaluator(self.graph)
        for task_name in not_shared_tasks:
            contribution = float(
                evaluator.contribution_with_task_name(task_name)[0])
            average_of_contribution = evaluator.contribution_with_cluster(
                united_new_cluster) / len(united_new_cluster)
            if contribution < average_of_contribution * threshold:
                print('%sは別ルートの可能性が高いので排除しました' % task_name)
                united_new_cluster.remove(task_name)

        self.task_distance_pairs[constants.SUPERTYPE_NAME].append(
            united_new_cluster)
        self.task_distance_pairs[constants.SUPERTYPE_NAME].remove(cluster_a)
        self.task_distance_pairs[constants.SUPERTYPE_NAME].remove(cluster_b)
        self.unite_recursively()
 def _cluster_contribution_url(self, cluster):
     evaluator = TaskGraphEvaluator(self.graph)
     task_names = {l for l in cluster}
     aspects = (self._aspects_with_task_name(task_name) for task_name in task_names)
     try:
         urls = {aspect[0]['url'] for aspect in aspects}
     except IndexError:
         urls = set()
     result = (cluster, evaluator.contribution_with_cluster(cluster), urls)
     return result
 def _select_subtype_tasks(self, pairs, subtype):
     """
     SUBTYPEがあるときのみ使う
     pairs => {('ハウスクリーニング_を_利用する', 25),
     ('新聞_も_分別する', -33), ('ブランド_を_教える', -18)}
     """
     try:
         task_names = {pair[0] for pair in pairs}
     except TypeError:
         pdb.set_trace()
     evaluator = TaskGraphEvaluator(self.graph)
     result = (task_names, evaluator.contribution_with_cluster(task_names), subtype)
     return result
Example #4
0
 def _select_subtype_tasks(self, pairs, subtype):
     """
     SUBTYPEがあるときのみ使う
     pairs => {('ハウスクリーニング_を_利用する', 25),
     ('新聞_も_分別する', -33), ('ブランド_を_教える', -18)}
     """
     try:
         task_names = {pair[0] for pair in pairs}
     except TypeError:
         pdb.set_trace()
     evaluator = TaskGraphEvaluator(self.graph)
     result = (task_names, evaluator.contribution_with_cluster(task_names),
               subtype)
     return result
    def _cluster_contribution_url_intersection(self, cluster):
        task_names = {l for l in cluster}
        url_set = set()
        for task_name in task_names:
            aspects = self._aspects_with_task_name(task_name)
            urls = {aspect['url'] for aspect in aspects}
            if not url_set:
                url_set = urls
            else:
                url_set = url_set.intersection(urls)

        evaluator = TaskGraphEvaluator(self.graph)
        # cluster => TaskCluster({'', '', ...})
        result = (cluster, evaluator.contribution_with_cluster(cluster), url_set)
        return result
Example #6
0
    def _cluster_contribution_url_intersection(self, cluster):
        task_names = {l for l in cluster}
        url_set = set()
        for task_name in task_names:
            aspects = self._aspects_with_task_name(task_name)
            urls = {aspect['url'] for aspect in aspects}
            if not url_set:
                url_set = urls
            else:
                url_set = url_set.intersection(urls)

        evaluator = TaskGraphEvaluator(self.graph)
        # cluster => TaskCluster({'', '', ...})
        result = (cluster, evaluator.contribution_with_cluster(cluster),
                  url_set)
        return result
    def unite_recursively(self):
        threshold = constants.THRESHOLD_FOR_REMOVING_FROM_PART_OF  # 0.2くらい
        try:
            cluster_a, cluster_b = self._find_clusters_to_be_united()
        except ValueError:  # []つまり統合すべきタスクなし
            return

        united_new_cluster = cluster_a.union(cluster_b)
        not_shared_tasks = cluster_a.difference(cluster_b).union(cluster_b.difference(cluster_a))

        evaluator = TaskGraphEvaluator(self.graph)
        for task_name in not_shared_tasks:
            contribution = float(evaluator.contribution_with_task_name(task_name)[0])
            average_of_contribution = evaluator.contribution_with_cluster(united_new_cluster) / len(united_new_cluster)
            if contribution < average_of_contribution * threshold:
                print('%sは別ルートの可能性が高いので排除しました' % task_name)
                united_new_cluster.remove(task_name)

        self.task_distance_pairs[constants.SUPERTYPE_NAME].append(united_new_cluster)
        self.task_distance_pairs[constants.SUPERTYPE_NAME].remove(cluster_a)
        self.task_distance_pairs[constants.SUPERTYPE_NAME].remove(cluster_b)
        self.unite_recursively()