def get_citation_count(submission_csv, citation_dict): number = re.search(".*paper(\d+).csv", submission_csv) if not number: raise ValueError("Couldn't get paper number in string: " + submission_csv) N = int(number.group(1)) citation_dict[N] = {} for r in iterate_csv(submission_csv): email, citcount = r citation_dict[N][email] = int(citcount)
def read_expertise_db(expertise_csv, exp_list): db = {} for r in iterate_csv(expertise_csv): db[r[3]] = {} if len(exp_list) != len(r[4:]): print(exp_list) print(r[4:]) raise ValueError("Exp list and expertise db topics do not match " "%d vs. %d" % (len(exp_list), len(r[4:]))) db[r[3]]['expertises'] = [l for l, v in zip(exp_list, r[4:]) if (v.strip()=='1'or v.strip()=='2')] return db
def main(): report = sys.argv[1] output_csv = sys.argv[2] output_lines = ['paper,assignment,email'] for r in iterate_csv(report): valid, paper, email, reasons = r if valid == 'x': output_lines.append("%s,conflict,%s" % (paper, email)) with open(output_csv, 'w') as f: f.write('\n'.join(output_lines)) f.write('\n')
def main(): report = sys.argv[1] output_csv = sys.argv[2] types = sys.argv[3] output_lines = ['paper,assignment,email,conflicttype'] if types == "susp": for r in iterate_csv(report): valid, paper, email, reasons, comment = r if valid == 'x': output_lines.append("%s,conflict,%s" % (paper, email)) else: for r in iterate_csv(report): valid, paper, email, reasons = r if valid == 'x': output_lines.append("%s,conflict,%s,chair-confirmed" % (paper, email)) with open(output_csv, 'w') as f: f.write('\n'.join(output_lines)) f.write('\n')
def read_expertise_to_topics(topics_to_expertise_csv): exp_list = [] topic_list = set() t_to_e = {} e_to_t = {} for r in iterate_csv(topics_to_expertise_csv): e = r[0] exp_list.append(e) ts = [t for t in r[1:] if t] e_to_t[e] = ts for t in ts: topic_list.add(t) if t not in t_to_e: t_to_e[t] = [] t_to_e[t].append(e) return e_to_t, t_to_e, exp_list, topic_list
def main(): institutions_csv = sys.argv[1] submissions_json = sys.argv[2] hotcrp_pc_member_csv = sys.argv[3] pc_member_paper_db_csv = sys.argv[4] out_proper = sys.argv[5] out_paper_collabs_field = sys.argv[6] out_pc_collabs_field = sys.argv[7] out_dblp = sys.argv[8] out_fake = sys.argv[9] # Step 1: read all the inputs (institutions_csv, paper data from hotcrp, # pc info from hotcrp and paper db from dblp): print("Reading institutions csv...") institutions = Institutions(institutions_csv) print("Reading submissions:") d = get_dict_json(submissions_json) submissions = [Submission.from_json(p, institutions) for p in tqdm(d)] print("Reading hotcrp pc members:") hotcrp_pc_members = [PCMember.from_hotcrp_csv(line, institutions) for line in tqdm(iterate_csv(hotcrp_pc_member_csv, encoding='utf-8'))] hotcrp_pc_members = {p.email: p for p in hotcrp_pc_members} print("Reading pc papers:") dblp_pc_members = {k: p.copy_no_conflicts() for k, p in hotcrp_pc_members.items()} for row in tqdm(iterate_csv(pc_member_paper_db_csv)): (email, id, firstname, lastname, keys, valid, pub_key, pub_title, pub_year, pub_authors) = row if valid == "x": pub = Publication.from_key(pub_key, institutions) dblp_pc_members[email].add_publication(pub) print("Cross referencing conflicts") for s in tqdm(submissions): # Step 2: list conflicts that are declared by authors properly # Step 3: list conflicts that are declared by authors # in the collaborators field for k, v in hotcrp_pc_members.items(): s.add_collaborator_conflict(v) # Step 4: list conflicts declared by pc members but undeclared # by paper authors for k, v in hotcrp_pc_members.items(): s.add_conflicts_from_pc_member(v) # Step 5: list conflicts not declared by anyone but caught by dblp for k, v in dblp_pc_members.items(): s.add_conflicts_from_dblp(v) # Step 6: for k, v in dblp_pc_members.items(): s.add_fake_conflicts(hotcrp_pc_members[k], v) print_reports(submissions, 'proper', out_proper) print_reports(submissions, 'collaborators_field', out_paper_collabs_field) print_reports(submissions, 'declared_by_pc_members', out_pc_collabs_field) print_reports(submissions, 'dblp', out_dblp) print_reports(submissions, 'fake_conflicts', out_fake)
def __init__(self, csv): insts = {} for i in iterate_csv(csv): insts[i[0]] = i self.insts = insts