Example #1
0
def get_citation_count(submission_csv, citation_dict):
    number = re.search(".*paper(\d+).csv", submission_csv)
    if not number:
        raise ValueError("Couldn't get paper number in string: " +
                         submission_csv)
    N = int(number.group(1))

    citation_dict[N] = {}
    for r in iterate_csv(submission_csv):
        email, citcount = r
        citation_dict[N][email] = int(citcount)
Example #2
0
def read_expertise_db(expertise_csv, exp_list):
    db = {}
    for r in iterate_csv(expertise_csv):
        db[r[3]] = {}
        if len(exp_list) != len(r[4:]):
            print(exp_list)
            print(r[4:])
            raise ValueError("Exp list and expertise db topics do not match "
                             "%d vs. %d" % (len(exp_list), len(r[4:])))
        db[r[3]]['expertises'] = [l for l, v in zip(exp_list, r[4:])
                                  if (v.strip()=='1'or v.strip()=='2')]
    return db
Example #3
0
def main():
    report = sys.argv[1]
    output_csv = sys.argv[2]

    output_lines = ['paper,assignment,email']
    for r in iterate_csv(report):
        valid, paper, email, reasons = r

        if valid == 'x':
            output_lines.append("%s,conflict,%s" % (paper, email))

    with open(output_csv, 'w') as f:
        f.write('\n'.join(output_lines))
        f.write('\n')
Example #4
0
def main():
    report = sys.argv[1]
    output_csv = sys.argv[2]
    types = sys.argv[3]

    output_lines = ['paper,assignment,email,conflicttype']
    if types == "susp":
        for r in iterate_csv(report):
            valid, paper, email, reasons, comment = r

            if valid == 'x':
                output_lines.append("%s,conflict,%s" % (paper, email))
    else:
        for r in iterate_csv(report):
            valid, paper, email, reasons = r

            if valid == 'x':
                output_lines.append("%s,conflict,%s,chair-confirmed" %
                                    (paper, email))

    with open(output_csv, 'w') as f:
        f.write('\n'.join(output_lines))
        f.write('\n')
Example #5
0
def read_expertise_to_topics(topics_to_expertise_csv):
    exp_list = []
    topic_list = set()
    t_to_e = {}
    e_to_t = {}
    for r in iterate_csv(topics_to_expertise_csv):
        e = r[0]
        exp_list.append(e)
        ts = [t for t in r[1:] if t]
        e_to_t[e] = ts
        for t in ts:
            topic_list.add(t)
            if t not in t_to_e:
                t_to_e[t] = []
            t_to_e[t].append(e)

    return e_to_t, t_to_e, exp_list, topic_list
Example #6
0
def main():
    institutions_csv = sys.argv[1]
    submissions_json = sys.argv[2]
    hotcrp_pc_member_csv = sys.argv[3]
    pc_member_paper_db_csv = sys.argv[4]

    out_proper = sys.argv[5]
    out_paper_collabs_field = sys.argv[6]
    out_pc_collabs_field = sys.argv[7]
    out_dblp = sys.argv[8]
    out_fake = sys.argv[9]
    # Step 1: read all the inputs (institutions_csv, paper data from hotcrp,
    # pc info from hotcrp and paper db from dblp):
    print("Reading institutions csv...")
    institutions = Institutions(institutions_csv)

    print("Reading submissions:")
    d = get_dict_json(submissions_json)
    submissions = [Submission.from_json(p, institutions) for p in tqdm(d)]

    print("Reading hotcrp pc members:")
    hotcrp_pc_members = [PCMember.from_hotcrp_csv(line, institutions)
                         for line in tqdm(iterate_csv(hotcrp_pc_member_csv,
                                                      encoding='utf-8'))]
    hotcrp_pc_members = {p.email: p for p in hotcrp_pc_members}

    print("Reading pc papers:")
    dblp_pc_members = {k: p.copy_no_conflicts()
                       for k, p in hotcrp_pc_members.items()}

    for row in tqdm(iterate_csv(pc_member_paper_db_csv)):
        (email, id, firstname, lastname, keys, valid,
         pub_key, pub_title, pub_year, pub_authors) = row

        if valid == "x":
            pub = Publication.from_key(pub_key, institutions)
            dblp_pc_members[email].add_publication(pub)

    print("Cross referencing conflicts")
    for s in tqdm(submissions):
        # Step 2: list conflicts that are declared by authors properly
        # Step 3: list conflicts that are declared by authors
        #         in the collaborators field
        for k, v in hotcrp_pc_members.items():
            s.add_collaborator_conflict(v)

        # Step 4: list conflicts declared by pc members but undeclared
        # by paper authors
        for k, v in hotcrp_pc_members.items():
            s.add_conflicts_from_pc_member(v)

        # Step 5: list conflicts not declared by anyone but caught by dblp
        for k, v in dblp_pc_members.items():
            s.add_conflicts_from_dblp(v)

        # Step 6:
        for k, v in dblp_pc_members.items():
            s.add_fake_conflicts(hotcrp_pc_members[k], v)

    print_reports(submissions, 'proper', out_proper)
    print_reports(submissions, 'collaborators_field', out_paper_collabs_field)
    print_reports(submissions, 'declared_by_pc_members', out_pc_collabs_field)
    print_reports(submissions, 'dblp', out_dblp)
    print_reports(submissions, 'fake_conflicts', out_fake)
Example #7
0
 def __init__(self, csv):
     insts = {}
     for i in iterate_csv(csv):
         insts[i[0]] = i
     self.insts = insts