Example #1
0
 def get_target_groups(self):
     if self.__moduleDependenciesClustered == None:
         moduleDependencies = OperationProcessor.read_dependencies(self.__inputDictIter)
         deletedDependencies = dict()
         moduleDependencies = self.join_and_delete_modules(moduleDependencies, self.__operationDictIter,
                                 deletedDependencies)
         fileDependencies = LocalCollectionTools.convert_module_list_to_file_list(moduleDependencies)
         deletedFileDependencies = LocalCollectionTools.convert_module_list_to_file_list(deletedDependencies)
         fileDependencies = self.join_missing_files(fileDependencies, deletedFileDependencies)
         #fileDependencies = value_set_to_csv(fileDependencies)
         fileDependencies = CollectionTools.value_set_to_tuple(fileDependencies)
     
         #print_dependencies(fileDependencies)
         self.__moduleDependenciesClustered = CollectionTools.transpose(fileDependencies)
 
     return self.__moduleDependenciesClustered
Example #2
0
def find_module_clusters(inModuleDependencies, distanceThreshold, sizeThreshold, size_fun = len):
    """
    Try to further reduce the number of clusters by merging the existing 
    module clusters, if their distance is less than distanceThreshold.
    
    size_fun should calculate the size of a value of inModuleDependencies, the 
    default is the length of the list.
    
    find_module_clusters does not try to further reduce clusters that are 
    larger than sizeThreshold.
    
    @return: A tuple-valued dictionary, which maps keys of tuples of source elements 
    to tuples of target elements.
    """
    moduleDependencies = CollectionTools.value_tuple_to_set(inModuleDependencies)

    # In einer Schleife wiederholen, bis in einem Schritt keine Änderung mehr stattgefunden hat

    minKeyPair = (0, 0)
    while minKeyPair != None:
        # Teilmenge der keys bestimmen, deren values den threshold nicht überschreiten
        keysBelowThreshold = [key for key in moduleDependencies.keys() if size_fun(moduleDependencies[key]) < sizeThreshold]
        #pprint.pprint(keysBelowThreshold)

        logging.info("Keys below size threshold %i" % len(keysBelowThreshold))

        # Für alle Paare die Distanz bestimmen und das Paar mit der minimalen Distanz zusammenfügen

        minKeyPair = find_min_dist(moduleDependencies.keys(),
								keysBelowThreshold,
								distanceThreshold,
								distance_rel)

        # TODO join_modules fügt nur die beiden Cluster zusammen, aber nicht tatsächlich die Module.
        # TODO join_modules müsste dafür erweitert werden für den Fall, dass die keys bereits iterables sind.
        # TODO oder ist das jetzige Verhalten sogar besser?
        if minKeyPair != None:
            logging.info("Joining %s and %s (distance %f)" ,
						minKeyPair[0], minKeyPair[1],
						distance_rel(minKeyPair[0], minKeyPair[1]))
            BasicOperations.join_modules(moduleDependencies, 
                         tuple(sorted(set(minKeyPair[0]) | set(minKeyPair[1]))), 
                         [minKeyPair[0], minKeyPair[1]])
    return CollectionTools.value_set_to_tuple(moduleDependencies)