Beispiel #1
0
def plot_outcomes():

	dblp_dict = getFracFound("fraction_found.csv")
	outcome_dict = parse_outputs("data/Outcomes.csv")

	found_people = []
	found_papers = []
	frac_4 = []
	for k, (v1, v2) in dblp_dict.iteritems():
		outcome = outcome_dict[k][0]
		print v1, v2, outcome
		found_people.append(float(v1))
		found_papers.append(float(v2))
		frac_4.append(float(outcome))
	plt.scatter(found_people, frac_4, color="b", label="people")
	plt.scatter(found_papers, frac_4, color="r", label="papers")
	plt.ylabel("Percentage 4*")
	plt.xlabel("Percentage Found")
	plt.legend()
	plt.show()
Beispiel #2
0
        machines['inbox'].run(
                "tc qdisc del dev {iface} root".format(
                    iface=get_iface(config,'inbox')['dev']
                ), sudo=True
        )

        agenda.subtask("collecting results")
        for (m, fname) in config['iteration_outputs']:
            if 'self' not in config or m != config['self']:
                try:
                    if fname.startswith("~/"):
                        fname = fname[2:]
                    m.get(fname, local=os.path.join(config['local_iteration_dir'], os.path.basename(fname)))
                except Exception as e:
                    warn("could not get file {}: {}".format(fname, e), exit=False)

    zulip_notify("{total_exps} experiment(s) finished in **{elapsed}** seconds.".format(
        total_exps=total_exps,
        elapsed=round(total_elapsed,3),
    ), dry=args.dry_run)

    agenda.section("parsing results")
    if not args.dry_run:
        parse_args = {'downsample' : config['args'].downsample}
        if config['args'].rows:
            parse_args['rows'] = config['args'].rows
        if config['args'].cols:
            parse_args['cols'] = config['args'].cols
        config['structure']['bundler_root'] = '.'
        parse_outputs(config, parse_args)
Beispiel #3
0
def main():
    (authors_list, authors_map, papers_list, inst_papers_selected, institutions, author_papers, inst_papers, baseline_venue_count) = load_all_data()
    (count_authors, count_inst, count_venues) = out_of_institution(papers_list, authors_map)

    G = rank_digraph(authors_map, inst_papers_selected, author_papers, True)

    # Test new method:
    G2 = rank_paper_digraph(authors_map, inst_papers_selected, author_papers, papers_list)
    (all_nodes, dist) = get_stationary_distribution(G)

    compare_venue_ratios(author_papers, papers_list)

    # Institutions lists by papers used by other institutions

    frankothers = file("results/rank_institution_by_others.txt", "w")
    print >>frankothers, "Rank Institutions by number of papers used by *others* in the REF"
    for inst, cnt in count_inst.most_common():
        # if inst is not None:
        print >>frankothers, "%3d | %s" % (cnt, institutions[inst])

    print

    # List venues by a ratio of ref / accepted
    lst = []
    for venue, cnt in count_venues.most_common():
        # if inst is not None:
        if baseline_venue_count[venue] > 0:
            lst += [((float(cnt) * 100 / baseline_venue_count[venue], venue))]
            # print "%2.2f %s" % (float(cnt) * 100 / baseline_venue_count[venue], venue)

    frankvenratio = file("results/rank_venue_by_ref_paper_ratio.txt", "w")
    print >>frankvenratio, "Rank venues by ratio of REF submitted papers vs. available papers"
    for cnt, venue in sorted(lst, reverse=True):
        if baseline_venue_count[venue] > 4:
            print >>frankvenratio, "%2.2f\t%3d\t%s" % (cnt, count_venues[venue], venue)



    frankvenratio = file("results/rank_venue_stationary.txt", "w")
    print >>frankvenratio, "Rank venues by the rank of the stationary distribution in the selection graph"
    venues = sorted([(ni, dist[i]) for i, ni in enumerate(all_nodes)], reverse=True, key=lambda x:x[1])
    for venue, cnt in venues:
        if cnt > 0.0:
            print >>frankvenratio, "%2.2f | %s" % (1000 * cnt, venue)

    # Score institutions by quality-research mass
    venues_juice = dict(venues)
    inst_juice_by_author12 = defaultdict(float)
    inst_juice_by_author4 = defaultdict(float)
    inst_juice_by_author_all = defaultdict(float)
    for a in author_papers:
        list_of_juices = []
        for authors, title, booktitle, year in author_papers[a]:
            if booktitle in venues_juice:
                list_of_juices += [ venues_juice[booktitle] ]

        # Include only 4 outputs as in the REF
        inst_juice_by_author12[authors_map[a]] += sum(sorted(list_of_juices, reverse=True)[:12]) / len(inst_papers_selected[inst])
        inst_juice_by_author4[authors_map[a]] += sum(sorted(list_of_juices, reverse=True)[:4]) / len(inst_papers_selected[inst])
        inst_juice_by_author_all[authors_map[a]] += sum(sorted(list_of_juices, reverse=True)) / len(inst_papers_selected[inst])

    from parse_outputs import parse_outputs
    outputs = parse_outputs("data/Outcomes.csv")
    ref_rank_f = lambda x:outputs[x][0]*4 + outputs[x][1]*3 + outputs[x][2]*2 + outputs[x][3]*1
    ref_sorted_inst =  sorted([inst for inst in outputs], reverse=True, key=ref_rank_f)
    ref_rank = dict([(inst, i) for i, inst in enumerate(ref_sorted_inst)])

    sel4 = sorted(institutions, reverse=True, key=lambda inst: inst_juice_by_author4[inst])
    sel4_rank = dict([(inst, i) for i, inst in enumerate(sel4)])

    selall = sorted(institutions, reverse=True, key=lambda inst: inst_juice_by_author_all[inst])
    selall_rank = dict([(inst, i) for i, inst in enumerate(selall)])


    frankvenratio = file("results/rank_institution_stationary.txt", "w")
    print >>frankvenratio, "Rank institutions by the rank of the stationary distribution in the selection graph of the venues their staff publish"
    for i, inst in enumerate(sorted(institutions, reverse=True, key=lambda inst: inst_juice_by_author12[inst])):
        if inst_juice_by_author12[inst] > 0.005:
            inst_stars = outputs[inst][0]
            #print >>frankvenratio,"**%d** (%2.2f) | **%+d**\t(%3d) | **%+d**\t(%2.2f) | **%+d**\t(%2.2f) | **%s**" % (i, inst_juice_by_author12[inst], ref_rank[inst] - i, ref_rank_f(inst),
            #                              sel4_rank[inst]-i, inst_juice_by_author4[inst], selall_rank[inst]-i, inst_juice_by_author_all[inst], institutions[inst])

            print >>frankvenratio,"%d (%2.2f) | %d (%+d) | %s" % (i + 1, inst_juice_by_author12[inst], ref_rank[inst] + 1, ref_rank[inst] - i, institutions[inst])