def get_target_groups(self): if self.__moduleDependenciesClustered == None: moduleDependencies = OperationProcessor.read_dependencies(self.__inputDictIter) deletedDependencies = dict() moduleDependencies = self.join_and_delete_modules(moduleDependencies, self.__operationDictIter, deletedDependencies) fileDependencies = LocalCollectionTools.convert_module_list_to_file_list(moduleDependencies) deletedFileDependencies = LocalCollectionTools.convert_module_list_to_file_list(deletedDependencies) fileDependencies = self.join_missing_files(fileDependencies, deletedFileDependencies) #fileDependencies = value_set_to_csv(fileDependencies) fileDependencies = CollectionTools.value_set_to_tuple(fileDependencies) #print_dependencies(fileDependencies) self.__moduleDependenciesClustered = CollectionTools.transpose(fileDependencies) return self.__moduleDependenciesClustered
def find_module_clusters(inModuleDependencies, distanceThreshold, sizeThreshold, size_fun = len): """ Try to further reduce the number of clusters by merging the existing module clusters, if their distance is less than distanceThreshold. size_fun should calculate the size of a value of inModuleDependencies, the default is the length of the list. find_module_clusters does not try to further reduce clusters that are larger than sizeThreshold. @return: A tuple-valued dictionary, which maps keys of tuples of source elements to tuples of target elements. """ moduleDependencies = CollectionTools.value_tuple_to_set(inModuleDependencies) # In einer Schleife wiederholen, bis in einem Schritt keine Änderung mehr stattgefunden hat minKeyPair = (0, 0) while minKeyPair != None: # Teilmenge der keys bestimmen, deren values den threshold nicht überschreiten keysBelowThreshold = [key for key in moduleDependencies.keys() if size_fun(moduleDependencies[key]) < sizeThreshold] #pprint.pprint(keysBelowThreshold) logging.info("Keys below size threshold %i" % len(keysBelowThreshold)) # Für alle Paare die Distanz bestimmen und das Paar mit der minimalen Distanz zusammenfügen minKeyPair = find_min_dist(moduleDependencies.keys(), keysBelowThreshold, distanceThreshold, distance_rel) # TODO join_modules fügt nur die beiden Cluster zusammen, aber nicht tatsächlich die Module. # TODO join_modules müsste dafür erweitert werden für den Fall, dass die keys bereits iterables sind. # TODO oder ist das jetzige Verhalten sogar besser? if minKeyPair != None: logging.info("Joining %s and %s (distance %f)" , minKeyPair[0], minKeyPair[1], distance_rel(minKeyPair[0], minKeyPair[1])) BasicOperations.join_modules(moduleDependencies, tuple(sorted(set(minKeyPair[0]) | set(minKeyPair[1]))), [minKeyPair[0], minKeyPair[1]]) return CollectionTools.value_set_to_tuple(moduleDependencies)