def profileModelSelection(self): dataset = ArnetMinerDataset(runLSI=False) dataset.overwrite = True dataset.overwriteVectoriser = True dataset.overwriteModel = True dataset.dataFilename = dataset.dataDir + "DBLP-citation-100000.txt" ProfileUtils.profile('dataset.modelSelection()', globals(), locals())
def profileComputeLDA(self): field = "Boosting" dataset = ArnetMinerDataset(field) dataset.overwrite = True dataset.overwriteVectoriser = True dataset.overwriteModel = True dataset.maxRelevantAuthors = 100 dataset.k = 200 dataset.dataFilename = dataset.dataDir + "DBLP-citation-100000.txt" ProfileUtils.profile('dataset.computeLDA()', globals(), locals())
args = parser.parse_args() averagePrecisionN = 50 similarityCutoff = 0.30 ns = numpy.arange(5, 105, 5) runLSI = not args.runLDA dataset = ArnetMinerDataset(runLSI=runLSI) #dataset.dataFilename = dataset.dataDir + "DBLP-citation-100000.txt" #dataset.dataFilename = dataset.dataDir + "DBLP-citation-1000000.txt" dataset.dataFilename = dataset.dataDir + "DBLP-citation-1000000.txt" #dataset.dataFilename = dataset.dataDir + "DBLP-citation-7000000.txt" #dataset.dataFilename = dataset.dataDir + "DBLP-citation-Feb21.txt" dataset.overwriteGraph = True dataset.overwriteModel = True dataset.overwriteVectoriser = True dataset.vectoriseDocuments() dataset.loadVectoriser() X = scipy.io.mmread(dataset.docTermMatrixFilename + ".mtx") X = X.tocsc() X.data[:] = 1 print(numpy.max(X.data), numpy.min(X.data)) rowSums = numpy.array(X.sum(0), numpy.int).flatten() colSums = numpy.array(X.sum(1), numpy.int).flatten()