def unite_recursively(self): threshold = constants.THRESHOLD_FOR_REMOVING_FROM_PART_OF # 0.2くらい try: cluster_a, cluster_b = self._find_clusters_to_be_united() except ValueError: # []つまり統合すべきタスクなし return united_new_cluster = cluster_a.union(cluster_b) not_shared_tasks = cluster_a.difference(cluster_b).union( cluster_b.difference(cluster_a)) evaluator = TaskGraphEvaluator(self.graph) for task_name in not_shared_tasks: contribution = float( evaluator.contribution_with_task_name(task_name)[0]) average_of_contribution = evaluator.contribution_with_cluster( united_new_cluster) / len(united_new_cluster) if contribution < average_of_contribution * threshold: print('%sは別ルートの可能性が高いので排除しました' % task_name) united_new_cluster.remove(task_name) self.task_distance_pairs[constants.SUPERTYPE_NAME].append( united_new_cluster) self.task_distance_pairs[constants.SUPERTYPE_NAME].remove(cluster_a) self.task_distance_pairs[constants.SUPERTYPE_NAME].remove(cluster_b) self.unite_recursively()
def _cluster_contribution_url(self, cluster): evaluator = TaskGraphEvaluator(self.graph) task_names = {l for l in cluster} aspects = (self._aspects_with_task_name(task_name) for task_name in task_names) try: urls = {aspect[0]['url'] for aspect in aspects} except IndexError: urls = set() result = (cluster, evaluator.contribution_with_cluster(cluster), urls) return result
def _select_subtype_tasks(self, pairs, subtype): """ SUBTYPEがあるときのみ使う pairs => {('ハウスクリーニング_を_利用する', 25), ('新聞_も_分別する', -33), ('ブランド_を_教える', -18)} """ try: task_names = {pair[0] for pair in pairs} except TypeError: pdb.set_trace() evaluator = TaskGraphEvaluator(self.graph) result = (task_names, evaluator.contribution_with_cluster(task_names), subtype) return result
def _cluster_contribution_url_intersection(self, cluster): task_names = {l for l in cluster} url_set = set() for task_name in task_names: aspects = self._aspects_with_task_name(task_name) urls = {aspect['url'] for aspect in aspects} if not url_set: url_set = urls else: url_set = url_set.intersection(urls) evaluator = TaskGraphEvaluator(self.graph) # cluster => TaskCluster({'', '', ...}) result = (cluster, evaluator.contribution_with_cluster(cluster), url_set) return result
def unite_recursively(self): threshold = constants.THRESHOLD_FOR_REMOVING_FROM_PART_OF # 0.2くらい try: cluster_a, cluster_b = self._find_clusters_to_be_united() except ValueError: # []つまり統合すべきタスクなし return united_new_cluster = cluster_a.union(cluster_b) not_shared_tasks = cluster_a.difference(cluster_b).union(cluster_b.difference(cluster_a)) evaluator = TaskGraphEvaluator(self.graph) for task_name in not_shared_tasks: contribution = float(evaluator.contribution_with_task_name(task_name)[0]) average_of_contribution = evaluator.contribution_with_cluster(united_new_cluster) / len(united_new_cluster) if contribution < average_of_contribution * threshold: print('%sは別ルートの可能性が高いので排除しました' % task_name) united_new_cluster.remove(task_name) self.task_distance_pairs[constants.SUPERTYPE_NAME].append(united_new_cluster) self.task_distance_pairs[constants.SUPERTYPE_NAME].remove(cluster_a) self.task_distance_pairs[constants.SUPERTYPE_NAME].remove(cluster_b) self.unite_recursively()