def main(year, min_count=None, outfile=None): """ Main function for training and evaluating AAE methods on IREON data """ if (CLEAN == True): print("Loading data from", DATA_PATH) papers = load(DATA_PATH) print("Cleaning data...") clean(CLEAN_DATA_PATH, papers) print("Clean data in {}".format(CLEAN_DATA_PATH)) return print("Loading data from", CLEAN_DATA_PATH) papers = load(CLEAN_DATA_PATH) print("Unpacking IREON data...") bags_of_papers, ids, side_info = unpack_papers(papers) del papers bags = Bags(bags_of_papers, ids, side_info) log("Whole dataset:", logfile=outfile) log(bags, logfile=outfile) evaluation = Evaluation(bags, year, logfile=outfile) evaluation.setup(min_count=min_count, min_elements=2) print("Loading pre-trained embedding", W2V_PATH) with open(outfile, 'a') as fh: print("~ Partial List", "~" * 42, file=fh) evaluation(BASELINES + RECOMMENDERS) with open(outfile, 'a') as fh: print("~ Partial List + Titles", "~" * 42, file=fh) evaluation(TITLE_ENHANCED)
def main(year, min_count=None, outfile=None, drop=1): """ Main function for training and evaluating AAE methods on IREON data """ if (CLEAN == True): print("Loading data from", DATA_PATH) papers = load(DATA_PATH) print("Cleaning data...") clean(CLEAN_DATA_PATH, papers) print("Clean data in {}".format(CLEAN_DATA_PATH)) return print("Loading data from", CLEAN_DATA_PATH) papers = load(CLEAN_DATA_PATH) print("Unpacking IREON data...") # bags_of_papers, ids, side_info = unpack_papers(papers) bags_of_papers, ids, side_info = unpack_papers_conditions(papers) del papers bags = Bags(bags_of_papers, ids, side_info) if args.compute_mi: from aaerec.utils import compute_mutual_info print("[MI] Dataset: IREON (fiv)") print("[MI] min Count:", min_count) tmp = bags.build_vocab(min_count=min_count, max_features=None) mi = compute_mutual_info(tmp, conditions=None, include_labels=True, normalize=True) with open('mi.csv', 'a') as mifile: print('IREON', min_count, mi, sep=',', file=mifile) print("=" * 78) exit(0) log("Whole dataset:", logfile=outfile) log(bags, logfile=outfile) evaluation = Evaluation(bags, year, logfile=outfile) evaluation.setup(min_count=min_count, min_elements=2, drop=drop) # Use only partial citations/labels list (no additional metadata) with open(outfile, 'a') as fh: print("~ Partial List", "~" * 42, file=fh) evaluation(BASELINES + RECOMMENDERS) # Use additional metadata (as defined in CONDITIONS for all models but SVD, which uses only titles) with open(outfile, 'a') as fh: print("~ Conditioned Models", "~" * 42, file=fh) evaluation(CONDITIONED_MODELS)
def main(year, dataset, min_count=None, outfile=None, drop=1): """ Main function for training and evaluating AAE methods on DBLP data """ path = DATA_PATH + ("dblp-ref/" if dataset == "dblp" else "acm.txt") print("Loading data from", path) papers = papers_from_files(path, dataset, n_jobs=4) print("Unpacking {} data...".format(dataset)) bags_of_papers, ids, side_info = unpack_papers(papers) del papers bags = Bags(bags_of_papers, ids, side_info) if args.compute_mi: from aaerec.utils import compute_mutual_info print("[MI] Dataset:", dataset) print("[MI] min Count:", min_count) tmp = bags.build_vocab(min_count=min_count, max_features=None) mi = compute_mutual_info(tmp, conditions=None, include_labels=True, normalize=True) with open('mi.csv', 'a') as mifile: print(dataset, min_count, mi, sep=',', file=mifile) print("=" * 78) exit(0) log("Whole dataset:", logfile=outfile) log(bags, logfile=outfile) evaluation = Evaluation(bags, year, logfile=outfile) evaluation.setup(min_count=min_count, min_elements=2, drop=drop) # To evaluate the baselines and the recommenders without metadata (or just the recommenders without metadata) # with open(outfile, 'a') as fh: # print("~ Partial List", "~" * 42, file=fh) # evaluation(BASELINES + RECOMMENDERS) # evaluation(RECOMMENDERS, batch_size=1000) with open(outfile, 'a') as fh: print("~ Partial List + Titles + Author + Venue", "~" * 42, file=fh) # To evaluate SVD with titles # evaluation(TITLE_ENHANCED) evaluation(CONDITIONED_MODELS, batch_size=1000)
def main(year, dataset, min_count=None, outfile=None): """ Main function for training and evaluating AAE methods on DBLP data """ path = DATA_PATH + ("dblp-ref/" if dataset == "dblp" else "acm.txt") print("Loading data from", path) papers = papers_from_files(path, dataset, n_jobs=4) print("Unpacking {} data...".format(dataset)) bags_of_papers, ids, side_info = unpack_papers(papers) del papers bags = Bags(bags_of_papers, ids, side_info) log("Whole dataset:", logfile=outfile) log(bags, logfile=outfile) evaluation = Evaluation(bags, year, logfile=outfile) evaluation.setup(min_count=min_count, min_elements=2) print("Loading pre-trained embedding", W2V_PATH) with open(outfile, 'a') as fh: print("~ Partial List", "~" * 42, file=fh) evaluation(BASELINES + RECOMMENDERS) with open(outfile, 'a') as fh: print("~ Partial List + Titles", "~" * 42, file=fh) evaluation(TITLE_ENHANCED)