def plot_outcomes(): dblp_dict = getFracFound("fraction_found.csv") outcome_dict = parse_outputs("data/Outcomes.csv") found_people = [] found_papers = [] frac_4 = [] for k, (v1, v2) in dblp_dict.iteritems(): outcome = outcome_dict[k][0] print v1, v2, outcome found_people.append(float(v1)) found_papers.append(float(v2)) frac_4.append(float(outcome)) plt.scatter(found_people, frac_4, color="b", label="people") plt.scatter(found_papers, frac_4, color="r", label="papers") plt.ylabel("Percentage 4*") plt.xlabel("Percentage Found") plt.legend() plt.show()
machines['inbox'].run( "tc qdisc del dev {iface} root".format( iface=get_iface(config,'inbox')['dev'] ), sudo=True ) agenda.subtask("collecting results") for (m, fname) in config['iteration_outputs']: if 'self' not in config or m != config['self']: try: if fname.startswith("~/"): fname = fname[2:] m.get(fname, local=os.path.join(config['local_iteration_dir'], os.path.basename(fname))) except Exception as e: warn("could not get file {}: {}".format(fname, e), exit=False) zulip_notify("{total_exps} experiment(s) finished in **{elapsed}** seconds.".format( total_exps=total_exps, elapsed=round(total_elapsed,3), ), dry=args.dry_run) agenda.section("parsing results") if not args.dry_run: parse_args = {'downsample' : config['args'].downsample} if config['args'].rows: parse_args['rows'] = config['args'].rows if config['args'].cols: parse_args['cols'] = config['args'].cols config['structure']['bundler_root'] = '.' parse_outputs(config, parse_args)
def main(): (authors_list, authors_map, papers_list, inst_papers_selected, institutions, author_papers, inst_papers, baseline_venue_count) = load_all_data() (count_authors, count_inst, count_venues) = out_of_institution(papers_list, authors_map) G = rank_digraph(authors_map, inst_papers_selected, author_papers, True) # Test new method: G2 = rank_paper_digraph(authors_map, inst_papers_selected, author_papers, papers_list) (all_nodes, dist) = get_stationary_distribution(G) compare_venue_ratios(author_papers, papers_list) # Institutions lists by papers used by other institutions frankothers = file("results/rank_institution_by_others.txt", "w") print >>frankothers, "Rank Institutions by number of papers used by *others* in the REF" for inst, cnt in count_inst.most_common(): # if inst is not None: print >>frankothers, "%3d | %s" % (cnt, institutions[inst]) print # List venues by a ratio of ref / accepted lst = [] for venue, cnt in count_venues.most_common(): # if inst is not None: if baseline_venue_count[venue] > 0: lst += [((float(cnt) * 100 / baseline_venue_count[venue], venue))] # print "%2.2f %s" % (float(cnt) * 100 / baseline_venue_count[venue], venue) frankvenratio = file("results/rank_venue_by_ref_paper_ratio.txt", "w") print >>frankvenratio, "Rank venues by ratio of REF submitted papers vs. available papers" for cnt, venue in sorted(lst, reverse=True): if baseline_venue_count[venue] > 4: print >>frankvenratio, "%2.2f\t%3d\t%s" % (cnt, count_venues[venue], venue) frankvenratio = file("results/rank_venue_stationary.txt", "w") print >>frankvenratio, "Rank venues by the rank of the stationary distribution in the selection graph" venues = sorted([(ni, dist[i]) for i, ni in enumerate(all_nodes)], reverse=True, key=lambda x:x[1]) for venue, cnt in venues: if cnt > 0.0: print >>frankvenratio, "%2.2f | %s" % (1000 * cnt, venue) # Score institutions by quality-research mass venues_juice = dict(venues) inst_juice_by_author12 = defaultdict(float) inst_juice_by_author4 = defaultdict(float) inst_juice_by_author_all = defaultdict(float) for a in author_papers: list_of_juices = [] for authors, title, booktitle, year in author_papers[a]: if booktitle in venues_juice: list_of_juices += [ venues_juice[booktitle] ] # Include only 4 outputs as in the REF inst_juice_by_author12[authors_map[a]] += sum(sorted(list_of_juices, reverse=True)[:12]) / len(inst_papers_selected[inst]) inst_juice_by_author4[authors_map[a]] += sum(sorted(list_of_juices, reverse=True)[:4]) / len(inst_papers_selected[inst]) inst_juice_by_author_all[authors_map[a]] += sum(sorted(list_of_juices, reverse=True)) / len(inst_papers_selected[inst]) from parse_outputs import parse_outputs outputs = parse_outputs("data/Outcomes.csv") ref_rank_f = lambda x:outputs[x][0]*4 + outputs[x][1]*3 + outputs[x][2]*2 + outputs[x][3]*1 ref_sorted_inst = sorted([inst for inst in outputs], reverse=True, key=ref_rank_f) ref_rank = dict([(inst, i) for i, inst in enumerate(ref_sorted_inst)]) sel4 = sorted(institutions, reverse=True, key=lambda inst: inst_juice_by_author4[inst]) sel4_rank = dict([(inst, i) for i, inst in enumerate(sel4)]) selall = sorted(institutions, reverse=True, key=lambda inst: inst_juice_by_author_all[inst]) selall_rank = dict([(inst, i) for i, inst in enumerate(selall)]) frankvenratio = file("results/rank_institution_stationary.txt", "w") print >>frankvenratio, "Rank institutions by the rank of the stationary distribution in the selection graph of the venues their staff publish" for i, inst in enumerate(sorted(institutions, reverse=True, key=lambda inst: inst_juice_by_author12[inst])): if inst_juice_by_author12[inst] > 0.005: inst_stars = outputs[inst][0] #print >>frankvenratio,"**%d** (%2.2f) | **%+d**\t(%3d) | **%+d**\t(%2.2f) | **%+d**\t(%2.2f) | **%s**" % (i, inst_juice_by_author12[inst], ref_rank[inst] - i, ref_rank_f(inst), # sel4_rank[inst]-i, inst_juice_by_author4[inst], selall_rank[inst]-i, inst_juice_by_author_all[inst], institutions[inst]) print >>frankvenratio,"%d (%2.2f) | %d (%+d) | %s" % (i + 1, inst_juice_by_author12[inst], ref_rank[inst] + 1, ref_rank[inst] - i, institutions[inst])