from sandbox.util.IdIndexer import IdIndexer
from wallhack.influence2.ArnetMinerDataset import ArnetMinerDataset
from wallhack.influence2.GraphRanker import GraphRanker
from wallhack.influence2.RankAggregator import RankAggregator
from sandbox.util.Latex import Latex 
from sandbox.util.Util import Util 
from sandbox.util.Evaluator import Evaluator 

#logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)

ranLSI = True
printOutputLists = False
printPrecisions = False 
printDocuments = True
numpy.set_printoptions(suppress=True, precision=3, linewidth=100)
dataset = ArnetMinerDataset(runLSI=ranLSI)
#dataset.fields = ["Intelligent Agents"]

if printDocuments: 
    print("Reading article data")
    authorList, documentList, citationList = dataset.readAuthorsAndDocuments(useAbstract=False)
    print("Done")

ns = numpy.arange(5, 55, 5)
bestaverageTestPrecisions = numpy.zeros(len(dataset.fields))

computeInfluence = True
graphRanker = GraphRanker(k=100, numRuns=100, computeInfluence=computeInfluence, p=0.05, inputRanking=[1, 2])
methodNames = graphRanker.getNames()
methodNames.append("MC2")
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
numpy.set_printoptions(suppress=True, precision=3, linewidth=160)
numpy.random.seed(21)

parser = argparse.ArgumentParser(description='Run reputation evaluation experiments')
parser.add_argument("-r", "--runLDA", action="store_true", help="Run Latent Dirchlet Allocation")
parser.add_argument("-d", "--useDocs", action="store_true", help="Use document database to find relevant authors")
args = parser.parse_args()

averagePrecisionN = 20 
ns = numpy.arange(5, 55, 5)
runLSI = not args.runLDA
knownAuthors = not args.useDocs

dataset = ArnetMinerDataset(runLSI=runLSI, knownAuthors=knownAuthors) 
#dataset.dataFilename = dataset.dataDir + "DBLP-citation-100000.txt"
#dataset.dataFilename = dataset.dataDir + "DBLP-citation-1000000.txt"
#dataset.dataFilename = dataset.dataDir + "DBLP-citation-5000000.txt"
#dataset.dataFilename = dataset.dataDir + "DBLP-citation-7000000.txt"
dataset.dataFilename = dataset.dataDir + "DBLP-citation-Feb21.txt" 
dataset.minDf = 10**-4
dataset.ks = [100, 200, 300, 400, 500, 600]
dataset.minDfs = [10**-3, 10**-4]
dataset.overwriteGraph = True
dataset.overwriteModel = True
dataset.overwriteVectoriser = True 

if not knownAuthors: 
    dataset.modelSelection()