from sandbox.util.IdIndexer import IdIndexer from wallhack.influence2.ArnetMinerDataset import ArnetMinerDataset from wallhack.influence2.GraphRanker import GraphRanker from wallhack.influence2.RankAggregator import RankAggregator from sandbox.util.Latex import Latex from sandbox.util.Util import Util from sandbox.util.Evaluator import Evaluator #logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) ranLSI = True printOutputLists = False printPrecisions = False printDocuments = True numpy.set_printoptions(suppress=True, precision=3, linewidth=100) dataset = ArnetMinerDataset(runLSI=ranLSI) #dataset.fields = ["Intelligent Agents"] if printDocuments: print("Reading article data") authorList, documentList, citationList = dataset.readAuthorsAndDocuments(useAbstract=False) print("Done") ns = numpy.arange(5, 55, 5) bestaverageTestPrecisions = numpy.zeros(len(dataset.fields)) computeInfluence = True graphRanker = GraphRanker(k=100, numRuns=100, computeInfluence=computeInfluence, p=0.05, inputRanking=[1, 2]) methodNames = graphRanker.getNames() methodNames.append("MC2")
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) numpy.set_printoptions(suppress=True, precision=3, linewidth=160) numpy.random.seed(21) parser = argparse.ArgumentParser(description='Run reputation evaluation experiments') parser.add_argument("-r", "--runLDA", action="store_true", help="Run Latent Dirchlet Allocation") parser.add_argument("-d", "--useDocs", action="store_true", help="Use document database to find relevant authors") args = parser.parse_args() averagePrecisionN = 20 ns = numpy.arange(5, 55, 5) runLSI = not args.runLDA knownAuthors = not args.useDocs dataset = ArnetMinerDataset(runLSI=runLSI, knownAuthors=knownAuthors) #dataset.dataFilename = dataset.dataDir + "DBLP-citation-100000.txt" #dataset.dataFilename = dataset.dataDir + "DBLP-citation-1000000.txt" #dataset.dataFilename = dataset.dataDir + "DBLP-citation-5000000.txt" #dataset.dataFilename = dataset.dataDir + "DBLP-citation-7000000.txt" dataset.dataFilename = dataset.dataDir + "DBLP-citation-Feb21.txt" dataset.minDf = 10**-4 dataset.ks = [100, 200, 300, 400, 500, 600] dataset.minDfs = [10**-3, 10**-4] dataset.overwriteGraph = True dataset.overwriteModel = True dataset.overwriteVectoriser = True if not knownAuthors: dataset.modelSelection()