def clusterTermDocMatrix(self, initialNBases=200, algorithm='SVD'): self.nmfCluster = NMFCluster() self.nmfCluster.algorithm = algorithm self.initialNBases = initialNBases self.nmfCluster.cluster(self.termDocMatrix, self.initialNBases) print 'W: %d, %d' % (self.nmfCluster.W.shape[0], self.nmfCluster.W.shape[1])
def SVDNMFFindGlitchesForSink(self, sink): termDocMatrix = self._getTermDocMatrixForSink(sink) from mlutils.clustering.NMFCluster import NMFCluster nmfCluster = NMFCluster() nmfCluster.algorithm = self.args.cluster_algo nmfCluster.cluster(termDocMatrix) self.outputClusterStatistics(nmfCluster) anomalyResults = self.anomalyDetection(nmfCluster) print 'Global Anomaly Ranking:' for r in anomalyResults: print r self.outputCode(anomalyResults, nmfCluster)
def main(projectRoot, sinkOfInterest, outputFilename, configuration): nmfCluster = NMFCluster(configuration) termDocMatrix = sinkSnippetEmbedder(projectRoot, sinkOfInterest, configuration) if termDocMatrix == None: print 'termDocMatrix empty' return nmfCluster.cluster(termDocMatrix) # basis_vector_printing_thresh = 0.1 # nmfCluster.printPrototypes(basis_vector_printing_thresh) anomalyResults = anomalyDetection(projectRoot, nmfCluster) if outputFilename != None: pickle.dump(anomalyResults, file(outputFilename, 'w'))