예제 #1
0
def main(year, min_count=None, outfile=None):
    """ Main function for training and evaluating AAE methods on IREON data """
    if (CLEAN == True):
        print("Loading data from", DATA_PATH)
        papers = load(DATA_PATH)
        print("Cleaning data...")
        clean(CLEAN_DATA_PATH, papers)
        print("Clean data in {}".format(CLEAN_DATA_PATH))
        return

    print("Loading data from", CLEAN_DATA_PATH)
    papers = load(CLEAN_DATA_PATH)
    print("Unpacking IREON data...")
    bags_of_papers, ids, side_info = unpack_papers(papers)
    del papers
    bags = Bags(bags_of_papers, ids, side_info)

    log("Whole dataset:", logfile=outfile)
    log(bags, logfile=outfile)

    evaluation = Evaluation(bags, year, logfile=outfile)
    evaluation.setup(min_count=min_count, min_elements=2)
    print("Loading pre-trained embedding", W2V_PATH)

    with open(outfile, 'a') as fh:
        print("~ Partial List", "~" * 42, file=fh)
    evaluation(BASELINES + RECOMMENDERS)

    with open(outfile, 'a') as fh:
        print("~ Partial List + Titles", "~" * 42, file=fh)
    evaluation(TITLE_ENHANCED)
예제 #2
0
def main(year, min_count=None, outfile=None, drop=1):
    """ Main function for training and evaluating AAE methods on IREON data """
    if (CLEAN == True):
        print("Loading data from", DATA_PATH)
        papers = load(DATA_PATH)
        print("Cleaning data...")
        clean(CLEAN_DATA_PATH, papers)
        print("Clean data in {}".format(CLEAN_DATA_PATH))
        return

    print("Loading data from", CLEAN_DATA_PATH)
    papers = load(CLEAN_DATA_PATH)
    print("Unpacking IREON data...")
    # bags_of_papers, ids, side_info = unpack_papers(papers)
    bags_of_papers, ids, side_info = unpack_papers_conditions(papers)
    del papers
    bags = Bags(bags_of_papers, ids, side_info)
    if args.compute_mi:
        from aaerec.utils import compute_mutual_info
        print("[MI] Dataset: IREON (fiv)")
        print("[MI] min Count:", min_count)
        tmp = bags.build_vocab(min_count=min_count, max_features=None)
        mi = compute_mutual_info(tmp,
                                 conditions=None,
                                 include_labels=True,
                                 normalize=True)
        with open('mi.csv', 'a') as mifile:
            print('IREON', min_count, mi, sep=',', file=mifile)
        print("=" * 78)
        exit(0)

    log("Whole dataset:", logfile=outfile)
    log(bags, logfile=outfile)

    evaluation = Evaluation(bags, year, logfile=outfile)
    evaluation.setup(min_count=min_count, min_elements=2, drop=drop)

    # Use only partial citations/labels list (no additional metadata)
    with open(outfile, 'a') as fh:
        print("~ Partial List", "~" * 42, file=fh)
    evaluation(BASELINES + RECOMMENDERS)
    # Use additional metadata (as defined in CONDITIONS for all models but SVD, which uses only titles)
    with open(outfile, 'a') as fh:
        print("~ Conditioned Models", "~" * 42, file=fh)
    evaluation(CONDITIONED_MODELS)
예제 #3
0
def main(year, dataset, min_count=None, outfile=None, drop=1):
    """ Main function for training and evaluating AAE methods on DBLP data """
    path = DATA_PATH + ("dblp-ref/" if dataset == "dblp" else "acm.txt")
    print("Loading data from", path)
    papers = papers_from_files(path, dataset, n_jobs=4)
    print("Unpacking {} data...".format(dataset))
    bags_of_papers, ids, side_info = unpack_papers(papers)
    del papers
    bags = Bags(bags_of_papers, ids, side_info)
    if args.compute_mi:
        from aaerec.utils import compute_mutual_info
        print("[MI] Dataset:", dataset)
        print("[MI] min Count:", min_count)
        tmp = bags.build_vocab(min_count=min_count, max_features=None)
        mi = compute_mutual_info(tmp, conditions=None, include_labels=True,
                                  normalize=True)
        with open('mi.csv', 'a') as mifile:
            print(dataset, min_count, mi, sep=',', file=mifile)

        print("=" * 78)
        exit(0)

    log("Whole dataset:", logfile=outfile)
    log(bags, logfile=outfile)

    evaluation = Evaluation(bags, year, logfile=outfile)
    evaluation.setup(min_count=min_count, min_elements=2, drop=drop)

    # To evaluate the baselines and the recommenders without metadata (or just the recommenders without metadata)
    # with open(outfile, 'a') as fh:
    #     print("~ Partial List", "~" * 42, file=fh)
    # evaluation(BASELINES + RECOMMENDERS)
    # evaluation(RECOMMENDERS, batch_size=1000)

    with open(outfile, 'a') as fh:
        print("~ Partial List + Titles + Author + Venue", "~" * 42, file=fh)
    # To evaluate SVD with titles
    # evaluation(TITLE_ENHANCED)
    evaluation(CONDITIONED_MODELS, batch_size=1000)
예제 #4
0
def main(year, dataset, min_count=None, outfile=None):
    """ Main function for training and evaluating AAE methods on DBLP data """
    path = DATA_PATH + ("dblp-ref/" if dataset == "dblp" else "acm.txt")
    print("Loading data from", path)
    papers = papers_from_files(path, dataset, n_jobs=4)
    print("Unpacking {} data...".format(dataset))
    bags_of_papers, ids, side_info = unpack_papers(papers)
    del papers
    bags = Bags(bags_of_papers, ids, side_info)

    log("Whole dataset:", logfile=outfile)
    log(bags, logfile=outfile)

    evaluation = Evaluation(bags, year, logfile=outfile)
    evaluation.setup(min_count=min_count, min_elements=2)
    print("Loading pre-trained embedding", W2V_PATH)

    with open(outfile, 'a') as fh:
        print("~ Partial List", "~" * 42, file=fh)
    evaluation(BASELINES + RECOMMENDERS)

    with open(outfile, 'a') as fh:
        print("~ Partial List + Titles", "~" * 42, file=fh)
    evaluation(TITLE_ENHANCED)