def clusterTermDocMatrix(self, initialNBases=200, algorithm='SVD'):
     self.nmfCluster = NMFCluster()
     self.nmfCluster.algorithm = algorithm
     self.initialNBases = initialNBases
     self.nmfCluster.cluster(self.termDocMatrix, self.initialNBases)
     print 'W: %d, %d' % (self.nmfCluster.W.shape[0],
                          self.nmfCluster.W.shape[1])
    def SVDNMFFindGlitchesForSink(self, sink):
        termDocMatrix = self._getTermDocMatrixForSink(sink)

        from mlutils.clustering.NMFCluster import NMFCluster
        nmfCluster = NMFCluster()
        nmfCluster.algorithm = self.args.cluster_algo
        nmfCluster.cluster(termDocMatrix)

        self.outputClusterStatistics(nmfCluster)

        anomalyResults = self.anomalyDetection(nmfCluster)

        print 'Global Anomaly Ranking:'
        for r in anomalyResults:
            print r

        self.outputCode(anomalyResults, nmfCluster)
Exemple #3
0
def main(projectRoot, sinkOfInterest, outputFilename, configuration):

    nmfCluster = NMFCluster(configuration)

    termDocMatrix = sinkSnippetEmbedder(projectRoot, sinkOfInterest,
                                        configuration)
    if termDocMatrix == None:
        print 'termDocMatrix empty'
        return

    nmfCluster.cluster(termDocMatrix)
    # basis_vector_printing_thresh = 0.1
    # nmfCluster.printPrototypes(basis_vector_printing_thresh)
    anomalyResults = anomalyDetection(projectRoot, nmfCluster)

    if outputFilename != None:
        pickle.dump(anomalyResults, file(outputFilename, 'w'))