def get_coauthors_from_pubs(rc, pubs, not_person): """Get co-authors' names from the publication. Not include the person itself.""" not_person_akas = [not_person['_id'], not_person['name']] + not_person['aka'] my_collabs = list() for pub in pubs: pub_date = dt.date(int(pub.get("year")), month_to_int(pub.get("month")),1) my_collabs.extend( [ (collabs, pub_date) for collabs in (names for names in pub.get('author', [])) ] ) my_collabs.sort(key=lambda x: x[1], reverse=True) collab_buffer, my_collab_set = [], [] for collab in my_collabs: person = fuzzy_retrieval(all_docs_from_collection( rc.client, "people"), ["name", "aka", "_id"], collab[0], case_sensitive=False) if not person: person = fuzzy_retrieval(all_docs_from_collection( rc.client, "contacts"), ["name", "aka", "_id"], collab[0], case_sensitive=False) if not person: person = {'_id': collab[0]} person_id = person['_id'] if person_id not in collab_buffer and collab[0] not in not_person_akas: my_collab_set.append(collab) collab_buffer.append(person_id) return my_collab_set
def get_advisors_name_inst(advisee, rc): """Get the advisee's advisor. Yield (last name, first name, institution name).""" my_eme = advisee.get("employment", []) + advisee.get("education", []) relevant_emes = [i for i in my_eme if "advisor" in i] phd_advisors = [(i.get("advisor"), "phd") for i in relevant_emes if 'phd' or "dphil" in i.get("degree", "").lower()] pdoc_advisors = [(i.get("advisor"), "postdoc") for i in relevant_emes if "organization" in i] advisors = phd_advisors + pdoc_advisors for advisor in advisors: adv = fuzzy_retrieval(all_docs_from_collection(rc.client, "contacts"), ['aka', 'name', '_id'], advisor[0], case_sensitive=False) if adv: advsior_name = HumanName(adv.get("name")) inst = fuzzy_retrieval(all_docs_from_collection( rc.client, "institutions"), ['aka', 'name', '_id'], adv.get("institution"), case_sensitive=False) if inst: yield advsior_name.last, advsior_name.first, inst.get( "name", "") else: print("WARNING: {} not in institutions".format( adv.get("institution"))) yield advsior_name.last, advsior_name.first, adv.get( "institution")
def latex(self): """Render latex template""" for group in self.gtx["groups"]: pi = fuzzy_retrieval(self.gtx["people"], ["aka", "name"], group["pi_name"]) grants = list(self.gtx["grants"]) current_grants = [ g for g in grants if is_current( *[ g.get(s, 1) for s in [ "begin_year", "end_year", "begin_month", "begin_day", "end_month", "end_day", ] ] ) ] pending_grants = [ g for g in grants if is_pending( *[g[s] for s in ["begin_year", "begin_month", "begin_day"]] ) ] current_grants, _, _ = filter_grants( current_grants, {pi["name"]}, pi=False, multi_pi=True ) pending_grants, _, _ = filter_grants( pending_grants, {pi["name"]}, pi=False, multi_pi=True ) grants = pending_grants + current_grants for grant in grants: grant.update( award_start_date="{2}-{1}-{0}".format( grant["begin_day"], month_to_int(grant["begin_month"]), grant["begin_year"], ), award_end_date="{2}-{1}-{0}".format( grant["end_day"], month_to_int(grant["end_month"]), grant["end_year"], ), ) self.render( "current_pending.tex", "cpp.tex", pi=pi, pending=pending_grants, current=current_grants, pi_upper=pi["name"].upper(), group=group, ) self.pdf("cpp")
def retrieve_names_and_insts(rc, collabs, not_person_akas=[]): collab_buffer, my_collab_set = [], [] for collab in collabs: person = fuzzy_retrieval(all_docs_from_collection(rc.client, "people"), ["name", "aka", "_id"], collab["name"], case_sensitive=False) if not person: person = fuzzy_retrieval(all_docs_from_collection( rc.client, "contacts"), ["name", "aka", "_id"], collab["name"], case_sensitive=False) if not person: if collab['name'] == "missing name": print( f"WARNING: a {collab.get('advis_type')} appointment " f"was found for the target {collab.get('type')} but " f"no name was specified. Please add an 'advisor' field " f"for that education/employment entry in the database." ) else: print( f"WARNING: {collab['name']} not found in contacts or people." ) person = {'_id': collab["name"], "name": collab["name"]} if person.get("name", ""): collab["name"] = HumanName(person.get("name", "")) else: print("missing_person", person) collab["_id"] = person.get('_id') pinst = get_recent_org(person) inst = fuzzy_retrieval(all_docs_from_collection( rc.client, "institutions"), ["name", "aka", "_id"], pinst, case_sensitive=False) if inst: collab["institution"] = inst["name"] else: collab["institution"] = pinst print( f"WARNING: {pinst} for {person.get('_id')} missing from institutions" ) if collab["_id"] not in collab_buffer and collab[ "name"] not in not_person_akas: my_collab_set.append(collab) collab_buffer.append(collab["_id"]) return my_collab_set
def query_ppl(self, target): """Query the data base for the target's collaborators' information.""" rc = self.rc gtx = self.gtx person = fuzzy_retrieval(all_docs_from_collection(rc.client, "people"), ['aka', 'name', '_id'], target, case_sensitive=False) if not person: raise RuntimeError("Person {} not found in people.".format(target).encode('utf-8')) pubs = get_person_pubs(gtx["citations"], person) pubs = filter_since_date(pubs, rc) try: if rc.verbose: for pub in pubs: print(f"{pub.get('title')}, ({pub.get('year')})") except AttributeError: pass my_collabs = get_coauthors_from_pubs(rc, pubs, person) advisors = get_advisors_name_inst(person, rc) advisees = get_advisees_name_inst(all_docs_from_collection(rc.client, "people"), person, rc) collabs = [] adviseors = advisors + advisees for collab in my_collabs: col_bool = True for advis in adviseors: if collab.get("name").last == advis.get("name").last \ and collab.get("name").first == advis.get("name").first: col_bool = False if advis.get("interaction_date"): if collab.get("interaction_date", dt.date.today()) > advis.get("interaction_date", dt.date.today()): advis.update({"interaction_date": collab.get("interaction_date")}) try: if collab.get("interaction_date") > advis.get("interaction_date"): advis.update({"interaction_date": collab.get("interaction_date")}) except TypeError: print(f"ERROR: incorrect dates for an education/employment in {collab.get('name')}") print(f"collab date: {collab.get('interaction_date')}, advisee date: {advis.get('interaction_date')}") raise if col_bool == True: collabs.append(collab) collabs.extend(advisees) collabs.extend(advisors) collabs.sort(key=lambda d: d['name'].last) if rc.verbose: output = [f"{my_collab.get('name').last}, " f"{my_collab.get('name').first}, " f"{my_collab.get('institution')}, " f"{my_collab.get('interaction_date')}, " f"{my_collab.get('advis_type', '')}, " f"{my_collab.get('type')}\n" for my_collab in collabs] print(*output) person["name"] = HumanName(person.get("name")) results = { 'person_info': person, 'collabs': collabs } return results
def latex(self): """Render latex template""" rc = self.rc if rc.people: people = [ fuzzy_retrieval(self.gtx["people"], ["aka", "_id", "name"], rc.people[0]) ] else: people = self.gtx["people"] for p in people: names = frozenset(p.get("aka", []) + [p["name"]]) pubs = filter_publications( all_docs_from_collection(rc.client, "citations"), names, reverse=True, ) bibfile = make_bibtex_file(pubs, pid=p["_id"], person_dir=self.bldir) emp = p.get("employment", []) emp = [em for em in emp if not em.get("not_in_cv", False)] for e in emp: e['position'] = e.get('position_full', e.get('position').title()) emp.sort(key=ene_date_key, reverse=True) edu = p.get("education", []) edu.sort(key=ene_date_key, reverse=True) projs = filter_projects( all_docs_from_collection(rc.client, "projects"), names) grants = list(all_docs_from_collection(rc.client, "grants")) proposals = list(all_docs_from_collection(rc.client, "proposals")) grants = merge_collections_superior(proposals, grants, "proposal_id") pi_grants, pi_amount, _ = filter_grants(grants, names, pi=True) coi_grants, coi_amount, coi_sub_amount = filter_grants(grants, names, pi=False) aghs = awards_grants_honors(p, "honors") self.render( "resume.tex", p["_id"] + ".tex", p=p, title=p.get("name", ""), aghs=aghs, pubs=pubs, names=names, bibfile=bibfile, education=edu, employment=emp, projects=projs, pi_grants=pi_grants, pi_amount=pi_amount, coi_grants=coi_grants, coi_amount=coi_amount, coi_sub_amount=coi_sub_amount, ) self.pdf(p["_id"])
def get_person(person_id, rc): """Get the person's name.""" person_found = fuzzy_retrieval(all_docs_from_collection( rc.client, "people"), ["name", "aka", "_id"], person_id, case_sensitive=False) if person_found: return person_found person_found = fuzzy_retrieval(all_docs_from_collection( rc.client, "contacts"), ["name", "aka", "_id"], person_id, case_sensitive=False) if person_found: return person_found print("WARNING: {} missing from people and contacts. Check aka.".format( person_id)) return None
def query_people_and_institutions(rc, names): """Get the people and institutions names.""" people, institutions, latest_active = [], [], [] for person_name in names: person_found = fuzzy_retrieval(all_docs_from_collection( rc.client, "people"), ["name", "aka", "_id"], person_name[0], case_sensitive=False) if not person_found: person_found = fuzzy_retrieval(all_docs_from_collection( rc.client, "contacts"), ["name", "aka", "_id"], person_name[0], case_sensitive=False) if not person_found: print("WARNING: {} not found in contacts or people. Check aka". format(person_name[0]).encode('utf-8')) else: people.append(person_found['name']) inst = fuzzy_retrieval(all_docs_from_collection( rc.client, "institutions"), ["name", "aka", "_id"], person_found["institution"], case_sensitive=False) if inst: institutions.append(inst["name"]) else: institutions.append( person_found.get("institution", "missing")) print("WARNING: {} missing from institutions".format( person_found["institution"])) else: people.append(person_found['name']) pinst = get_recent_org(person_found) inst = fuzzy_retrieval(all_docs_from_collection( rc.client, "institutions"), ["name", "aka", "_id"], pinst, case_sensitive=False) if inst: institutions.append(inst["name"]) else: institutions.append(pinst) print("WARNING: {} missing from institutions".format(pinst)) latest_active.append(person_name[1]) return people, institutions, latest_active
def sout(self): rc = self.rc since, before = None, None if rc.date: ondate = date(*[int(num) for num in rc.date.split('-')]) else: ondate = date.today() if rc.semester: for v in SEMESTER_START_MONTH.values(): if v[0] <= ondate.month <= v[1]: since = date(ondate.year, v[0], 1) last_day = calendar.monthrange(ondate.year, v[1])[0] before = date(ondate.year, v[1], last_day) people = self.gtx['people'] jg = self.gtx['grants'] proposals = self.gtx["proposals"] grants = merge_collections_superior(proposals, jg, "proposal_id") _future_grant["begin_date"] = ondate _future_grant["end_date"] = ondate + timedelta(days=2190) _future_grant["budget"][0]["begin_date"] = ondate _future_grant["budget"][0]["end_date"] = ondate + timedelta(days=2190) grants.append(_future_grant) for person in people: p_appt = person.get('appointments', None) if p_appt: for _id, appt in p_appt.items(): grantid = appt.get('grant') if not grantid: print("No grant found in {} appt {}".format( person.get('_id'), k)) grant = fuzzy_retrieval(grants, ["name", "_id", "alias"], grantid) if not grant: print("No grant found for {}".format(grantid)) try: accountnr = grant.get('account', grant['alias']) except: accountnr = '' loading = appt.get('loading') appt_dates = get_dates(appt) bd = appt_dates.get('begin_date') ed = appt_dates.get('end_date') if since and before: if (ed >= since) and (bd <= before): print(person.get('_id'), grantid, accountnr, loading, bd.strftime("%Y-%m-%d"), ed.strftime("%Y-%m-%d")) else: if bd <= ondate <= ed: print(person.get('_id'), grantid, accountnr, loading, bd.strftime("%Y-%m-%d"), ed.strftime("%Y-%m-%d"))
def sout(self): rc = self.rc list_search = [] collection = self.gtx["contacts"] if rc.name: list_search.extend(["name", rc.name]) if rc.inst: list_search.extend(["institution", rc.inst]) if rc.notes: list_search.extend(["notes", rc.notes]) if rc.filter: list_search.extend(rc.filter) filtered_contacts_id = (search_collection(collection, list_search)).strip(' \n') filtered_contacts_id = list(filtered_contacts_id.split(' \n')) if rc.date: date_list = [] temp_dat = date_parser.parse(rc.date).date() temp_dict = {"begin_date": (temp_dat - dateutil.relativedelta.relativedelta( months=int(rc.range))).isoformat(), "end_date": (temp_dat + dateutil.relativedelta.relativedelta( months=int(rc.range))).isoformat()} for contact in collection: curr_d = get_dates(contact)['date'] if is_current(temp_dict, now=curr_d): date_list.append(contact.get('_id')) filtered_contacts_id = [value for value in filtered_contacts_id if value in date_list] filtered_contacts = [] string_contacts = '' for contact in collection: if contact.get('_id') in filtered_contacts_id: filtered_contacts.append(contact) institution = contact.get('institution') institution_name = fuzzy_retrieval(self.gtx['institutions'], ['name', '_id', 'aka'], institution) if institution_name: contact['institution'] = institution_name.get('name') if rc.verbose: contact_str = f"{contact.get('name')}\n" for k in ['_id', 'email', 'institution', 'department', 'notes', 'aka']: if contact.get(k): if isinstance(contact.get(k), list): lst_expanded = '\n -'.join(map(str, contact.get(k))) contact_str += f" {k}:\n -{lst_expanded}\n" else: contact_str += f" {k}: {contact.get(k)}\n" string_contacts += contact_str else: string_contacts += f"{contact.get('name')} | {contact.get('_id')} |" \ f" institution: {contact.get('institution')} |" \ f" email: {contact.get('email', 'missing')}\n" print(string_contacts.strip('\n')) return
def test_fuzzy_retrieval(): person = { '_id': 'scopatz', 'aka': [ 'Scopatz', 'Scopatz, A', 'Scopatz, A.', 'Scopatz, A M', 'Anthony Michael Scopatz' ], 'name': 'Anthony Scopatz' } assert fuzzy_retrieval([person], ['aka', 'name', '_id'], 'scopatz') == person
def latex(self): """Render latex template""" for rev in self.gtx["proposalReviews"]: outname = "{}_{}".format(_id_key(rev), rev["reviewer"]) multiauth = False if isinstance(rev["names"], str): rev["names"] = [rev["names"]] if len(rev["names"]) > 1: multiauth = True firstauthor = HumanName(rev["names"][0]) firstauthorlastname = firstauthor.last if isinstance(rev["institutions"], str): rev["institutions"] = [rev["institutions"]] instns = [ fuzzy_retrieval(self.gtx["institutions"], ["aka", "name", "_id"], i) for i in rev["institutions"] ] institution_names = [ i["name"] if i else j for i, j in zip(instns, rev.get("institutions")) ] if isinstance(rev["freewrite"], str): rev["freewrite"] = [rev["freewrite"]] self.render("propreport.txt", "{}.txt".format(outname), trim_blocks=True, agency=rev["agency"], appropriateness=rev["doe_appropriateness_of_approach"], title=rev["title"], institution=institution_names[0], multiauthor=multiauth, firstAuthorLastName=firstauthorlastname, competency=rev["competency_of_team"], adequacy=rev["adequacy_of_resources"], does_what=rev["does_what"], relevance=rev["doe_relevance_to_program_mission"], budget=rev["doe_reasonableness_of_budget"], does_how=rev["does_how"], goals=rev["goals"], importance=rev["importance"], summary=rev["summary"], freewrite=rev["freewrite"], broader_impacts=rev["nsf_broader_impacts"], creativity_originality=rev[ "nsf_create_original_transformative"], benefit_to_society=rev["nsf_pot_to_benefit_society"], plan_good=rev["nsf_plan_good"], advance_knowledge=rev["nsf_pot_to_advance_knowledge"])
def test_fuzzy_retrieval(): person = { "_id": "scopatz", "aka": [ "Scopatz", "Scopatz, A", "Scopatz, A.", "Scopatz, A M", "Anthony Michael Scopatz", ], "name": "Anthony Scopatz", } assert fuzzy_retrieval([person], ["aka", "name", "_id"], "scopatz") == person assert fuzzy_retrieval([person], ["aka", "name", "_id"], "scopatz, a") is None assert ( fuzzy_retrieval( [person], ["aka", "name", "_id"], "scopatz, a", case_sensitive=False, ) == person )
def format_last_first_instutition_names(rc, ppl_names, excluded_inst_name=None): """Get the last name, first name and institution name.""" ppl = [] for ppl_tup in ppl_names: inst = fuzzy_retrieval( all_docs_from_collection(rc.client, "institutions"), ['aka', 'name', '_id'], ppl_tup[1], case_sensitive=False) if inst: inst_name = inst.get("name", "") else: inst_name = ppl_tup[1] # remove all people who are in the institution of the person if inst_name != excluded_inst_name: name = HumanName(ppl_tup[0]) yield name.last, " ".join([name.first, name.middle]), ppl_tup[1], " ", ppl_tup[2] return ppl
def query_ppl(self, target, **filters): """Query the data base for the target's collaborators' information.""" rc = self.rc gtx = self.gtx person = fuzzy_retrieval(all_docs_from_collection(rc.client, "people"), ['aka', 'name', '_id'], target, case_sensitive=False) if not person: raise RuntimeError("Person {} not found in people.".format( target).encode('utf-8')) pubs = get_person_pubs(gtx["citations"], person) if 'since_date' in filters: since_date = filters.get('since_date') pubs = filter_since_date(pubs, since_date) try: if rc.verbose: for pub in pubs: print(f"{pub.get('title')}, ({pub.get('year')})") except AttributeError: pass my_collabs = get_coauthors_from_pubs(pubs, person) people, institutions = query_people_and_institutions(rc, my_collabs) ppl_names = set(zip(people, institutions)) collab_3tups = set(format_last_first_instutition_names(rc, ppl_names)) advisors_3tups = set(get_advisors_name_inst(person, rc)) advisees_3tups = set(get_advisees_name_inst(gtx["people"], person, rc)) ppl_3tups = sorted(list(collab_3tups | advisors_3tups | advisees_3tups)) person_3tups = make_person_3tups(person, rc) coeditors_info = find_coeditors(person, rc) ppl_tab1 = format_to_nsf(person_3tups, '') ppl_tab3 = format_to_nsf(advisors_3tups, 'G:') + format_to_nsf( advisees_3tups, 'T:') ppl_tab4 = format_to_nsf(collab_3tups, 'A:') ppl_tab5 = format_to_nsf(coeditors_info, 'E:') results = { 'person_info': person, 'ppl_tab1': ppl_tab1, 'ppl_tab3': ppl_tab3, 'ppl_tab4': ppl_tab4, 'ppl_tab5': ppl_tab5, 'ppl_3tups': ppl_3tups } return results
def get_inst_name(person, rc): """Get the name of instituion of the person's lastest employment.""" if 'employment' in person: org = get_recent_org(person) person_inst_abbr = org elif 'institution' in person: person_inst_abbr = person.get('institution') else: person_inst_abbr = '' person_inst = fuzzy_retrieval(all_docs_from_collection( rc.client, "institutions"), ["name", "aka", "_id"], person_inst_abbr, case_sensitive=False) if person_inst is not None: person_inst_name = person_inst.get("name") else: person_inst_name = person_inst_abbr print(f"WARNING: {person_inst_abbr} is not found in institutions.") return person_inst_name
def latex(self): """Render latex template""" for group in self.gtx['groups']: pi = fuzzy_retrieval(self.gtx['people'], ['aka', 'name'], group['pi_name']) grants = list(self.gtx['grants']) current_grants = [ g for g in grants if is_current(*[ g.get(s, 1) for s in [ 'begin_day', 'begin_month', 'begin_year', 'end_day', 'end_month', 'end_year' ] ]) ] pending_grants = [ g for g in grants if is_pending( * [g[s] for s in ['begin_day', 'begin_month', 'begin_year']]) ] current_grants, _, _ = filter_grants(current_grants, {pi['name']}, pi=False, multi_pi=True) pending_grants, _, _ = filter_grants(pending_grants, {pi['name']}, pi=False, multi_pi=True) grants = pending_grants + current_grants for grant in grants: grant.update(award_start_date='{2}-{1}-{0}'.format( grant['begin_day'], month_to_int(grant['begin_month']), grant['begin_year']), award_end_date='{2}-{1}-{0}'.format( grant['end_day'], month_to_int(grant['end_month']), grant['end_year'])) self.render('current_pending.tex', 'cpp.tex', pi=pi, pending=pending_grants, current=current_grants, pi_upper=pi['name'].upper(), group=group) self.pdf('cpp')
def get_advisees_name_inst(coll, advisor, rc): """Get advisor's advisees. Yield (last name, first name, institutions)""" advisor_names = advisor.get('aka', []) + [advisor.get('name'), advisor.get('_id')] for person in coll: edus = person.get("education", []) for edu in edus: if 'advisor' in edu and edu['advisor'] in advisor_names: # if edu['status'] == 'postdoc' person_name = HumanName(person.get("name")) inst_name = edu.get("institution") inst = fuzzy_retrieval( all_docs_from_collection(rc.client, "institutions"), ['aka', 'name', '_id'], inst_name, case_sensitive=False) first_name = " ".join([person_name.first, person_name.middle]) if inst is None: print("WARNING: {} not in institutions".format( inst_name)) yield person_name.last, first_name, inst_name else: yield person_name.last, first_name, inst.get('name', "") break
def sout(self): rc = self.rc outdated, depleted, underspent, overspent = [], [], [], [] people = list(self.gtx['people']) all_appts = collect_appts(people, filter_key='type', filter_value='gra') all_appts.extend( collect_appts(people, filter_key='type', filter_value='ss')) all_appts.extend( collect_appts(people, filter_key='type', filter_value='pd')) if rc.projection_from_date: projection_from_date = date_parser.parse( rc.projection_from_date).date() else: projection_from_date = date.today() # collecting amounts and time interval for all grants _future_grant["begin_date"] = projection_from_date _future_grant["end_date"] = projection_from_date + timedelta(days=2190) _future_grant["budget"][0]["begin_date"] = projection_from_date _future_grant["budget"][0][ "end_date"] = projection_from_date + timedelta(days=2190) _future_grant["burn"] = grant_burn(_future_grant, all_appts) all_grants = merge_collections_superior(self.gtx["proposals"], self.gtx["grants"], "proposal_id") all_grants.append(_future_grant) most_grants_id = [ grant for grant in all_grants if grant.get('_id') not in BLACKLIST ] most_grants = [ grant for grant in most_grants_id if grant.get('alias') not in BLACKLIST ] collecting_grants_with_appts = [] for person in self.gtx['people']: appts = collect_appts([person], filter_key='type', filter_value='gra') appts.extend( collect_appts([person], filter_key='type', filter_value='ss')) appts.extend( collect_appts([person], filter_key='type', filter_value='pd')) if len(appts) > 0: person.update({"appts": appts}) collecting_grants_with_appts.extend( [appt.get("grant") for appt in appts]) grants_with_appts = list(set(collecting_grants_with_appts)) appointed_grants = [ grant for grant in most_grants if grant.get("_id") in grants_with_appts or grant.get("alias") in grants_with_appts ] grants_end, grants_begin = None, None for grant in appointed_grants: grant['burn'] = grant_burn(grant, all_appts) grant_begin = get_dates(grant)['begin_date'] grant_end = get_dates(grant)['end_date'] grant.update({"begin_date": grant_begin, "end_date": grant_end}) if not grants_begin or grant_begin < grants_begin: grants_begin = grant_begin if not grants_end or grant_end > grants_end: grants_end = grant_end # checking appointments cum_months_to_cover = 0 for person in self.gtx['people']: if not person.get("appts"): continue appts = person.get("appts") person_dates = group_member_employment_start_end(person, "bg") last_emp, months_to_cover = 0, 0 emps = [ person_date for person_date in person_dates if not person_date.get("permanent") ] emps.sort(key=lambda x: x.get('end_date', 0)) is_fully_appointed( person, min(get_dates(appt)['begin_date'] for appt in appts), max(get_dates(appt)['end_date'] for appt in appts)) for appt in appts: if appt.get("grant") in BLACKLIST: continue this_grant = fuzzy_retrieval(appointed_grants, ["_id", "alias"], appt.get('grant')) if not this_grant: raise RuntimeError( " grant: {}, person: {}, appointment: {}, grant not found in grants database" .format(appt.get("grant"), person.get("_id"), appt.get("_id"))) appt_begin, appt_end = get_dates( appt)['begin_date'], get_dates(appt)['end_date'] outdated_period, depleted_period = False, False for x in range((appt_end - appt_begin).days + 1): day = appt_begin + relativedelta(days=x) if not outdated_period: if not this_grant.get('burn'): print(this_grant.get('_id')) if not this_grant['burn'].get(day): outdated_period = True outdated.append( " person: {}, appointment: {}, grant: {},\n" " from {} until {}".format( person.get('_id'), appt.get('_id'), appt.get('grant'), str(day) if day < this_grant['begin_date'] else this_grant['end_date'] + relativedelta(days=1), str(min(appt_end, this_grant['begin_date'])) if day < this_grant['begin_date'] else str(day))) else: if this_grant['burn'].get(day): outdated_period = False if not (depleted_period or outdated_period): day_burn, this_burn = 0, this_grant['burn'] if appt.get('type') == 'gra': day_burn = this_burn[day]['student_days'] elif appt.get('type') == 'pd': day_burn = this_burn[day]['postdoc_days'] elif appt.get('type') == 'ss': day_burn = this_burn[day]['ss_days'] if day_burn < -5: # FIXME change to display depleted until next >-5 amt instead of appt_end depleted.append( " person: {}, appointment: {}, grant: {},\n" " from {} until {}".format( person['_id'], appt['_id'], appt.get('grant'), str(day), str(appt_end))) depleted_period = True # setup for plotting grants datearray, cum_student, cum_pd, cum_ss = [], None, None, None if not rc.no_plot: for x in range((grants_end - grants_begin).days + 1): datearray.append(grants_begin + relativedelta(days=x)) cum_student, cum_pd, cum_ss = [0.0] * len(datearray), [ 0.0 ] * len(datearray), [0.0] * len(datearray) plots = [] # calculating grant surplus and deficit cum_underspend = 0 for grant in appointed_grants: tracking = [ balance for balance in grant.get('tracking', []) if balance ] # if all_grants[grant]: # tracking = [balance for balance in all_grants[grant].get('tracking',[]) if balance] # else: # tracking = [] if len(tracking) > 0: tracking.sort(key=lambda x: x[0]) recent_balance = tracking[-1] recent_balance[1] = recent_balance[1] / MONTHLY_COST_QUANTUM else: recent_balance = [projection_from_date, 0] budget_begin = min( get_dates(period)['begin_date'] for period in grant.get('budget')) budget_end = max( get_dates(period)['end_date'] for period in grant.get('budget')) if grant['begin_date'] != budget_begin: raise RuntimeError( f"grant {grant.get('alias')} does not have a correct budget begin date. " f"grant begin: {grant['begin_date']} budget begin: {budget_begin}" ) elif grant['end_date'] != budget_end: raise RuntimeError( f"grant {grant.get('alias')} does not have a correct budget end date." f" grant end: {grant['end_date']} budget end: {budget_end}" ) days_to_go = (grant['end_date'] - projection_from_date).days this_burn = grant['burn'] end_amount = this_burn.get(grant['end_date'])['student_days'] + \ this_burn.get(grant['end_date'])['ss_days'] + \ this_burn.get(grant['end_date'])['postdoc_days'] if end_amount > 15.25: underspent.append((grant['end_date'], grant.get("alias"), round(end_amount / 30.5, 2), round(end_amount / days_to_go, 2))) cum_underspend += end_amount elif end_amount < -30.5: overspent.append( " end: {}, grant: {}, overspend amount: {} months". format(str(grant['end_date']), grant.get("alias"), round(end_amount / 30.5, 2))) # values for individual and cumulative grant burn plots if not rc.no_plot: grant_dates = [ grant['begin_date'] + relativedelta(days=x) for x in range((grant['end_date'] - grant['begin_date']).days + 1) ] this_student, this_pd, this_ss = [0.0] * len(grant_dates), [0.0] * len(grant_dates), \ [0.0] * len(grant_dates) counter = 0 for x in range(len(datearray)): day_burn = this_burn.get(datearray[x]) if day_burn: this_student[counter] = day_burn['student_days'] this_pd[counter] = day_burn['postdoc_days'] this_ss[counter] = day_burn['ss_days'] cum_student[x] += day_burn['student_days'] cum_pd[x] += day_burn['postdoc_days'] cum_ss[x] += day_burn['ss_days'] counter += 1 if not rc.verbose: if max(grant_dates) >= projection_from_date - timedelta( days=730): plots.append( plotter(grant_dates, student=this_student, pd=this_pd, ss=this_ss, title=grant.get("alias"))[0]) else: plots.append( plotter(grant_dates, student=this_student, pd=this_pd, ss=this_ss, title=grant.get("alias"))[0]) if outdated: outdated.sort(key=lambda mess: mess[-10:]) print("appointments on outdated grants:") for appt in outdated: print(appt) if depleted: depleted.sort(key=lambda mess: mess[-10:]) print("appointments on depleted grants:") for appt in depleted: print(appt) if underspent: underspent.sort(key=lambda x: x[0]) print("underspent grants:") for grant_info in underspent: print( f" {grant_info[1]}: end: {grant_info[0]}\n" f" projected underspend: {grant_info[2]} months, " f"balance as of {recent_balance[0]}: {recent_balance[1]}\n" f" required ss+gra burn: {grant_info[3]}") print( f"cumulative underspend = {round(cum_underspend/30.5, 2)} months, cumulative months to support = {round(cum_months_to_cover, 2)}" ) if overspent: print("overspent grants:") for grant in overspent: print(grant) if not rc.no_plot: for plot in plots: if not rc.no_gui: plt.show() cum_plot, cum_ax, outp = plotter(datearray, student=cum_student, pd=cum_pd, ss=cum_ss, title="Cumulative burn") if not rc.no_gui: plt.show() print(outp) return
def latex(self): """Render latex template""" rc = self.rc # Convert Date Strings to Datetime Objects if not rc.from_date: raise ValueError( "ERROR: need begin for the report period." "Please rerun specifying --from and --to in YYYY-MM-DD format." ) else: rp_start_date = date_parser.parse(rc.from_date).date() if not rc.to_date: rp_end_date = date.today() else: rp_end_date = date_parser.parse(rc.to_date).date() report_dates = {'begin_date': rp_start_date, 'end_date': rp_end_date} # NSF Grant _id if not rc.grants: raise RuntimeError( "Error: no grant specified. Please rerun specifying a grant") if isinstance(rc.grants, str): rc.grants = [rc.grants] if len(rc.grants) > 1: raise RuntimeError( "Error: more than one grant specified. Please rerun with" "only a single grant.") grant_id = rc.grants[0] # Get prum associated to grant and active during reporting period # institutions_coll = [inst for inst in self.gtx["institutions"]] institutions_coll = self.gtx["institutions"] grant_prums = [ prum for prum in self.gtx['projecta'] if grant_id in prum.get('grants', []) and "checklist" not in prum.get("deliverable").get("scope") ] # for prum in self.gtx['projecta']: # if grant_name in prum['grants']: # begin_date = get_dates(prum).get('begin_date') # due_date = get_due_date(prum['deliverable']) # # if projectum was finished during reporting period or is still current # # some projectum don't have an "end date", but all projecta have a deliverable # # due_date # if (rp_start_date <= due_date <= rp_end_date and prum['status'] is "finished") or is_current(prum): # grant_prums.append(prum) # Get people associated with grant grant_prums_finished_this_period = [ prum for prum in grant_prums if is_current(report_dates, get_dates(prum).get('end_date')) ] grant_prum_leads = list(set([prum['lead'] for prum in grant_prums])) grant_prum_collaborators = list( set([ collab for prum in grant_prums for collab in prum.get('collaborators', []) ])) grant_prum_group_members = list( set([ grp_mbr for prum in grant_prums for grp_mbr in prum.get('group_members', []) ])) grant_people = grant_prum_leads # Accomplishments major_activities = [] significant_results = [] for prum in grant_prums: if prum['status'] == "finished": continue else: major_activities.append(prum) for prum in grant_prums_finished_this_period: significant_results.append(prum) # Opportunities for Training and Professional Development training_and_professional_development = [] # presentations for id in grant_people: training_and_professional_development.extend( filter_presentations(self.gtx["people"], self.gtx["presentations"], institutions_coll, id, types=["all"], since=rp_start_date, before=rp_end_date, statuses=["accepted"])) # thesis defendings # how do i access people.yml in rg-db-public vs the people.yml file in rg-db-group? # defended_theses = [] # for id in grant_people: # for prsn in self.gtx['people']: # if prsn["_id"] != id: # continue # else: # person = prsn # for education in person['education']: # edu_dates = get_dates(education) # if 'phd' in education['degree'].lower() and 'columbia' in education['institution'].lower() and \ # rp_start_date.year <= edu_dates.get('end_date', edu_dates['date']).year <= rp_end_date.year: # defended_theses.append(id) # Products # need rg-db-public's citation.yml # publications = filter_publications(self.gtx["citations"], ## set(grant_people), # since=rp_start_date, # before=rp_end_date) publications = [ publ for publ in self.gtx["citations"] if grant_id in publ.get("grant", "") ] for publ in publications: doi = publ.get('doi') if doi and doi != 'tbd': publ = get_formatted_crossref_reference(doi) names = [ HumanName(author).full_name for author in publ.get("author") ] publ['author'] = names # Participants/Organizations participants = [] for person in self.gtx["people"]: months_on_grant, months_left = self.months_on( grant_id, person, rp_start_date, rp_end_date) if months_on_grant > 0: participants.append({ "name": person.get("name"), "email": person.get("email"), "position": person.get('position'), "months_on_grant": int(round(months_on_grant, 0)) }) collaborators = {} missing_contacts = [] for id in grant_prum_collaborators: for contact in self.gtx["contacts"]: if contact["_id"] == id: name = contact.get("name") aka = contact.get("aka") institution_id = contact.get("institution") institution = fuzzy_retrieval(institutions_coll, ["name", "aka", "_id"], institution_id) if institution: inst_name = institution.get("name") else: print( f"WARNING: institution {institution_id} not found " f"in institutions collection") inst_name = institution_id collaborators[id] = { "aka": aka, "name": name, "institution": inst_name } missing_contacts = [ id for id in grant_prum_collaborators if not collaborators.get(id) ] missing_contacts = list(set(missing_contacts)) for person_id in missing_contacts: print( f"WARNING contact {person_id} not found in contacts collection" ) # Impacts begin_date_str = rp_start_date.isoformat() end_date_str = rp_end_date.isoformat() self.render( "grantreport.txt", f"{grant_id}_report_{begin_date_str}_{end_date_str}.txt", begin_date=begin_date_str, end_date=end_date_str, majorActivities=major_activities, significantResults=significant_results, trainingAndProfessionalDevelopment= training_and_professional_development, # defendedTheses=defended_theses, products=publications, grantPeople=grant_people, participants=participants, collaborators=collaborators, hline= "------------------------------------------------------------------------------" )
def sout(self): gtx = self.gtx rc = self.rc if rc.filter: collection = key_value_pair_filter(self.gtx["people"], rc.filter) else: collection = self.gtx["people"] bad_stati = ["finished", "cancelled", "paused", "back_burner"] people = [] group = fuzzy_retrieval(gtx['groups'], ["_id", "aka", "name"], rc.groupname) group_id = group.get("_id") if rc.filter: if not rc.verbose: results = (collection_str(collection, rc.keys)) print(results, end="") return else: for person in collection: print("{}, {} | group_id: {}".format( person.get('name'), person.get('position'), person.get('_id'))) print(" orcid: {} | github_id: {}".format( person.get('orcid_id'), person.get('github_id'))) pass #code to print verbosely on filtering if not rc.filter: for person in gtx["people"]: if rc.current: if not person.get('active'): continue people.append(person) elif rc.prior: if person.get('active'): continue people.append(person) else: people.append(person) cleaned_people = [] for person in people: not_current_positions = [ emp for emp in person.get('employment') if not is_current(emp) ] not_current_positions.sort(key=lambda x: get_dates(x)["end_date"]) current_positions = [ emp for emp in person.get('employment') if is_current(emp) ] current_positions.sort(key=lambda x: get_dates(x)["begin_date"]) positions = not_current_positions + current_positions position_keys = [ position_key(position) for position in positions if position.get("group", "") == group_id ] if position_keys: person["position_key"] = max(position_keys)[0] cleaned_people.append(person) else: print( f"Person {person['name']} has no positions in group {group_id}" ) cleaned_people.sort(key=lambda k: k['position_key'], reverse=True) position_names = { 1: "Undergrads", 2.5: "Masters Students", 2: "Visiting Students", 3: "Graduate Students", 4: "Post Docs", 5: "Visitors", 8: "Assistant Scientists", 9: "Associate Scientists", 10: "Scientists", 11: "PI" } accounting = 12 for person in cleaned_people: if person.get('position_key') < accounting: accounting = person.get('position_key') print( f" -- {position_names.get(accounting,position_names.get(5))} --" ) if rc.verbose: print("{}, {}".format(person.get('name'), person.get('position'))) print(" email: {} | group_id: {}".format( person.get('email'), person.get('_id'))) print(" github_id: {} | orcid: {}".format( person.get('github_id'), person.get('orcid_id'))) for position in positions: if is_current(position): inst = fuzzy_retrieval(gtx["institutions"], ["aka", "name", "_id"], position.get("organization")) if inst: instname = inst.get("name") else: print( f"WARNING: {position.get('organization')} not in institutions collection" ) print(" current organization: {}".format(instname)) print(" current position: {}".format( position.get('full_position', position.get('position').title()))) if not person.get('active'): if position.get('group') == "bg": print(" billinge group position: {}".format( position.get('position'))) else: print("{}".format(person.get('name'))) return
def latex(self): """Render latex template""" gtx = self.gtx rc = self.rc for group in self.gtx["groups"]: gtx["grants"] = list(sorted( all_docs_from_collection(rc.client, "grants"), key=_id_key )) gtx["proposals"] = list(sorted( all_docs_from_collection(rc.client, "proposals"), key=_id_key )) grp = group["_id"] pi = fuzzy_retrieval( self.gtx["people"], ["aka", "name"], group["pi_name"] ) pinames = pi["name"].split() piinitialslist = [i[0] for i in pinames] pi['initials'] = "".join(piinitialslist).upper() grants = merge_collections_all(self.gtx["proposals"], self.gtx["grants"], "proposal_id") for g in grants: g['end_date'] = get_dates(g).get('end_date') g['begin_date'] = get_dates(g).get('begin_date', dt.date(1900, 1, 2)) for person in g.get("team", []): rperson = fuzzy_retrieval( self.gtx["people"], ["aka", "name"], person["name"] ) if rperson: person["name"] = rperson["name"] if g.get('budget'): amounts = [i.get('amount') for i in g.get('budget')] g['subaward_amount'] = sum(amounts) current_grants = [ dict(g) for g in grants if is_current(g) ] current_grants, _, _ = filter_grants( current_grants, {pi["name"]}, pi=False, multi_pi=True ) current_grants = [g for g in current_grants if g.get("status") != "declined"] for g in current_grants: if g.get('budget'): amounts = [i.get('amount') for i in g.get('budget')] g['subaward_amount'] = sum(amounts) pending_grants = [ g for g in self.gtx["proposals"] if is_pending(g["status"]) ] for g in pending_grants: for person in g["team"]: rperson = fuzzy_retrieval( self.gtx["people"], ["aka", "name"], person["name"] ) if rperson: person["name"] = rperson["name"] pending_grants, _, _ = filter_grants( pending_grants, {pi["name"]}, pi=False, multi_pi=True ) summed_grants = pending_grants + current_grants for grant in summed_grants: grant.update( award_start_date="{}/{}/{}".format( grant.get("begin_date").month, grant.get("begin_date").day, grant.get("begin_date").year, ), award_end_date="{}/{}/{}".format( grant.get("end_date").month, grant.get("end_date").day, grant.get("end_date").year, ), ) badids = [i["_id"] for i in current_grants if not i.get('cpp_info').get('cppflag', "")] iter = copy(current_grants) for grant in iter: if grant["_id"] in badids: current_grants.remove(grant) piname = HumanName(pi["name"]) outfile = "current-pending-{}-{}".format(grp, piname.last.lower()) self.render( "current_pending.tex", outfile + ".tex", pi=pi, pending=pending_grants, current=current_grants, pi_upper=pi["name"].upper(), group=group, ) self.pdf(outfile)
def latex(self): """Render latex template""" rc = self.rc gtx = self.gtx if rc.people: people = [ fuzzy_retrieval(gtx["people"], ["aka", "_id", "name"], rc.people[0]) ] else: people = gtx["people"] for p in people: # so we don't modify the dbs when de-referencing names = frozenset(p.get("aka", []) + [p["name"]] + [p["_id"]]) begin_period = date(1650, 1, 1) pubs = filter_publications( all_docs_from_collection(rc.client, "citations"), names, reverse=True, ) bibfile = make_bibtex_file(pubs, pid=p["_id"], person_dir=self.bldir) emps = p.get("employment", []) emps = [em for em in emps if not em.get("not_in_cv", False)] for e in emps: e['position'] = e.get('position_full', e.get('position').title()) emps.sort(key=ene_date_key, reverse=True) edu = p.get("education", []) edu.sort(key=ene_date_key, reverse=True) teach = p.get("teaching", []) for t in teach: t['position'] = t.get('position').title() projs = filter_projects( all_docs_from_collection(rc.client, "projects"), names) just_grants = list(all_docs_from_collection(rc.client, "grants")) just_proposals = list( all_docs_from_collection(rc.client, "proposals")) grants = merge_collections_superior(just_proposals, just_grants, "proposal_id") presentations = filter_presentations(self.gtx["people"], self.gtx["presentations"], self.gtx["institutions"], p.get("_id"), statuses=["accepted"]) for grant in grants: for member in grant.get("team"): dereference_institution(member, self.gtx["institutions"]) pi_grants, pi_amount, _ = filter_grants(grants, names, pi=True) coi_grants, coi_amount, coi_sub_amount = filter_grants(grants, names, pi=False) aghs = awards_grants_honors(p, "honors") service = awards_grants_honors(p, "service", funding=False) # TODO: pull this out so we can use it everywhere for ee in [emps, edu]: for e in ee: dereference_institution(e, self.gtx["institutions"]) undergrads = filter_employment_for_advisees( self.gtx["people"], begin_period, "undergrad", p["_id"]) masters = filter_employment_for_advisees(self.gtx["people"], begin_period, "ms", p["_id"]) currents = filter_employment_for_advisees(self.gtx["people"], begin_period, "phd", p["_id"]) graduateds = filter_employment_for_advisees( self.gtx["people"], begin_period, "phd", p["_id"]) postdocs = filter_employment_for_advisees(self.gtx["people"], begin_period, "postdoc", p["_id"]) postdocs = remove_duplicate_docs(postdocs, "name") visitors = filter_employment_for_advisees(self.gtx["people"], begin_period, "visitor-unsupported", p["_id"]) visitors = remove_duplicate_docs(visitors, "name") iter = deepcopy(graduateds) for g in iter: if g.get("active"): graduateds.remove(g) iter = deepcopy(currents) for g in iter: if not g.get("active"): currents.remove(g) self.render( "cv.tex", p["_id"] + ".tex", p=p, title=p.get("name", ""), aghs=aghs, service=service, undergrads=undergrads, masters=masters, currentphds=currents, graduatedphds=graduateds, postdocs=postdocs, visitors=visitors, pubs=pubs, names=names, bibfile=bibfile, education=edu, employment=emps, presentations=presentations, sentencecase=sentencecase, monthstyle=month_fullnames, projects=projs, pi_grants=pi_grants, pi_amount=pi_amount, coi_grants=coi_grants, coi_amount=coi_amount, coi_sub_amount=coi_sub_amount, ) self.pdf(p["_id"])
def latex(self): """Render latex template""" rc = self.rc if not rc.people: raise RuntimeError("ERROR: please rerun specifying --people name") if not rc.from_date: raise RuntimeError("ERROR: please rerun specifying --from") build_target = get_id_from_name( all_docs_from_collection(rc.client, "people"), rc.people[0]) begin_year = int(rc.from_date.split("-")[0]) begin_period = date_parser.parse(rc.from_date).date() pre_begin_period = begin_period - relativedelta(years=1) if rc.to_date: to_date = date_parser.parse(rc.to_date).date() end_period = to_date post_end_period = to_date + relativedelta(years=1) else: end_period = begin_period + relativedelta(years=1) - relativedelta( days=1) post_end_period = begin_period + relativedelta( years=2) - relativedelta(days=1) me = [p for p in self.gtx["people"] if p["_id"] == build_target][0] me["begin_period"] = dt.date.strftime(begin_period, "%m/%d/%Y") me["begin_period"] = dt.date.strftime(begin_period, "%m/%d/%Y") me["pre_begin_period"] = dt.date.strftime(pre_begin_period, "%m/%d/%Y") me["end_period"] = dt.date.strftime(end_period, "%m/%d/%Y") me["post_end_period"] = dt.date.strftime(post_end_period, "%m/%d/%Y") projs = filter_projects(self.gtx["projects"], set([build_target]), group="bg") ######### # highlights ######### for proj in projs: if proj.get('highlights'): proj["current_highlights"] = False for highlight in proj.get('highlights'): highlight_date = dt.date( highlight.get("year"), month_to_int(highlight.get("month", 1)), 1) if highlight_date > begin_period and highlight_date < end_period: highlight["is_current"] = True proj["current_highlights"] = True ######### # current and pending ######### pi = fuzzy_retrieval(self.gtx["people"], ["aka", "name", "_id"], build_target) # pi['initials'] = "SJLB" grants = merge_collections_superior(self.gtx["proposals"], self.gtx["grants"], "proposal_id") for g in grants: for person in g["team"]: rperson = fuzzy_retrieval(self.gtx["people"], ["aka", "name"], person["name"]) if rperson: person["name"] = rperson["name"] if g.get('budget'): amounts = [i.get('amount') for i in g.get('budget')] g['subaward_amount'] = sum(amounts) current_grants = [dict(g) for g in grants if is_current(g)] current_grants, _, _ = filter_grants(current_grants, {pi["name"]}, pi=False, multi_pi=True) pending_grants = [ g for g in self.gtx["proposals"] if g["status"] == "pending" ] for g in pending_grants: for person in g["team"]: rperson = fuzzy_retrieval(self.gtx["people"], ["aka", "name"], person["name"]) if rperson: person["name"] = rperson["name"] pending_grants, _, _ = filter_grants(pending_grants, {pi["name"]}, pi=False, multi_pi=True) grants = pending_grants + current_grants for grant in grants: grant.update( award_start_date="{2}/{1}/{0}".format( grant["begin_day"], month_to_int(grant["begin_month"]), grant["begin_year"], ), award_end_date="{2}/{1}/{0}".format( grant["end_day"], month_to_int(grant["end_month"]), grant["end_year"], ), ) badids = [ i["_id"] for i in current_grants if not i['cpp_info'].get('cppflag', "") ] iter = copy(current_grants) for grant in iter: if grant["_id"] in badids: current_grants.remove(grant) ######### # end current and pending ######### ######### # advising ######### undergrads = filter_employment_for_advisees(self.gtx["people"], begin_period, "undergrad") masters = filter_employment_for_advisees(self.gtx["people"], begin_period, "ms") currents = filter_employment_for_advisees(self.gtx["people"], begin_period, "phd") graduateds = filter_employment_for_advisees( self.gtx["people"], begin_period.replace(year=begin_year - 5), "phd") postdocs = filter_employment_for_advisees(self.gtx["people"], begin_period, "postdoc") visitors = filter_employment_for_advisees(self.gtx["people"], begin_period, "visitor-unsupported") iter = deepcopy(graduateds) for g in iter: if g.get("active"): graduateds.remove(g) iter = deepcopy(currents) for g in iter: if not g.get("active"): currents.remove(g) ###################### # service ##################### mego = deepcopy(me) dept_service = filter_service([mego], begin_period, "department") mego = deepcopy(me) school_service = filter_service([mego], begin_period, "school") mego = deepcopy(me) uni_service = filter_service([mego], begin_period, "university") uni_service.extend(school_service) mego = deepcopy(me) prof_service = filter_service([mego], begin_period, "profession") mego = deepcopy(me) outreach = filter_service([mego], begin_period, "outreach") mego = deepcopy(me) lab = filter_facilities([mego], begin_period, "research") mego = deepcopy(me) shared = filter_facilities([mego], begin_period, "shared") mego = deepcopy(me) fac_other = filter_facilities([mego], begin_period, "other") mego = deepcopy(me) fac_teaching = filter_facilities([mego], begin_period, "teaching") mego = deepcopy(me) fac_wishlist = filter_facilities([mego], begin_period, "research_wish", verbose=False) mego = deepcopy(me) tch_wishlist = filter_facilities([mego], begin_period, "teaching_wish") mego = deepcopy(me) curric_dev = filter_activities([mego], begin_period, "teaching") mego = deepcopy(me) other_activities = filter_activities([mego], begin_period, "other") ########################## # Presentation list ########################## keypres = filter_presentations(self.gtx["people"], self.gtx["presentations"], self.gtx["institutions"], build_target, types=["award", "plenary", "keynote"], since=begin_period, before=end_period, statuses=["accepted"]) invpres = filter_presentations(self.gtx["people"], self.gtx["presentations"], self.gtx["institutions"], build_target, types=["invited"], since=begin_period, before=end_period, statuses=["accepted"]) sempres = filter_presentations(self.gtx["people"], self.gtx["presentations"], self.gtx["institutions"], build_target, types=["colloquium", "seminar"], since=begin_period, before=end_period, statuses=["accepted"]) declpres = filter_presentations(self.gtx["people"], self.gtx["presentations"], self.gtx["institutions"], build_target, types=["all"], since=begin_period, before=end_period, statuses=["declined"]) ######################### # Awards ######################### ahs = awards(me, since=begin_period) ######################## # Publications ######################## names = frozenset(me.get("aka", []) + [me["name"]]) pubs = filter_publications(all_docs_from_collection( rc.client, "citations"), names, reverse=True, bold=False, since=begin_period) bibfile = make_bibtex_file(pubs, pid=me["_id"], person_dir=self.bldir) articles = [prc for prc in pubs if prc.get("entrytype") in "article"] nonarticletypes = [ "book", "inbook", "proceedings", "inproceedings", "incollection", "unpublished", "phdthesis", "misc" ] nonarticles = [ prc for prc in pubs if prc.get("entrytype") in nonarticletypes ] peer_rev_conf_pubs = [prc for prc in pubs if prc.get("peer_rev_conf")] pubiter = deepcopy(pubs) for prc in pubiter: if prc.get("peer_rev_conf"): peer_rev_conf_pubs = prc pubs.pop(prc) ############## # TODO: add Current Projects to Research summary section ############## ############# # IP ############# patents = filter_patents(self.gtx["patents"], self.gtx["people"], build_target, since=begin_period) licenses = filter_licenses(self.gtx["patents"], self.gtx["people"], build_target, since=begin_period) ############# # hindex ############# hindex = sorted(me["hindex"], key=doc_date_key).pop() ######################### # render ######################### self.render( "columbia_annual_report.tex", "billinge-ann-report.tex", pi=pi, p=me, projects=projs, pending=pending_grants, current=current_grants, undergrads=undergrads, masters=masters, currentphds=currents, graduatedphds=graduateds, postdocs=postdocs, visitors=visitors, dept_service=dept_service, uni_service=uni_service, prof_service=prof_service, outreach=outreach, lab=lab, shared=shared, facilities_other=fac_other, fac_teaching=fac_teaching, fac_wishlist=fac_wishlist, tch_wishlist=tch_wishlist, curric_dev=curric_dev, other_activities=other_activities, keypres=keypres, invpres=invpres, sempres=sempres, declpres=declpres, sentencecase=sentencecase, monthstyle=month_fullnames, ahs=ahs, pubs=articles, nonarticles=nonarticles, peer_rev_conf_pubs=peer_rev_conf_pubs, bibfile=bibfile, patents=patents, licenses=licenses, hindex=hindex, ) self.pdf("billinge-ann-report")
def meetings(self): """Render projects""" rc = self.rc mtgsi = all_docs_from_collection(rc.client, "meetings") pp_mtgs, f_mtgs = [], [] for mtg in mtgsi: if not mtg.get('lead'): print("{} missing a meeting lead".format(mtg["_id"])) if not mtg.get('scribe'): print("{} missing a meeting scribe".format(mtg["_id"])) lead = fuzzy_retrieval( all_docs_from_collection(rc.client, "people"), ["_id", "name", "aka"], mtg.get("lead")) if not lead: print("{} lead {} not found in people".format( mtg["_id"], mtg.get("lead"))) mtg["lead"] = lead["name"] scribe = fuzzy_retrieval( all_docs_from_collection(rc.client, "people"), ["_id", "name", "aka"], mtg.get("scribe")) if not scribe: print("{} scribe {} not found in people".format( mtg["_id"], mtg.get("scribe"))) mtg["scribe"] = scribe["name"] if mtg.get("journal_club"): prsn = fuzzy_retrieval( all_docs_from_collection(rc.client, "people"), ["_id", "name", "aka"], mtg["journal_club"].get("presenter")) if not prsn: print("{} Jclub presenter {} not found in people".format( mtg["_id"], mtg["journal_club"].get("presenter"))) mtg["journal_club"]["presenter"] = prsn["name"] if mtg["journal_club"].get("doi", "tbd").casefold() != 'tbd': ref, _ = get_formatted_crossref_reference( mtg["journal_club"].get("doi")) mtg["journal_club"]["doi"] = ref if mtg.get("presentation"): prsn = fuzzy_retrieval( all_docs_from_collection(rc.client, "people"), ["_id", "name", "aka"], mtg["presentation"].get("presenter")) if mtg["presentation"].get("presenter") == "hold": prsn = {} prsn["name"] = "Hold" if not prsn: print("{} presenter {} not found in people".format( mtg["_id"], mtg["presentation"].get("presenter"))) mtg["presentation"]["presenter"] = prsn["name"] mtg["presentation"]["link"] = mtg["presentation"].get( "link", "tbd") mtg['date'] = dt.date(mtg.get("year"), mtg.get("month"), mtg.get("day")) mtg['datestr'] = mtg['date'].strftime('%m/%d/%Y') today = dt.date.today() if mtg['date'] >= today: f_mtgs.append(mtg) else: pp_mtgs.append(mtg) pp_mtgs = sorted(pp_mtgs, key=lambda x: x.get('date'), reverse=True) f_mtgs = sorted(f_mtgs, key=lambda x: x.get('date')) self.render("grpmeetings.html", "grpmeetings.html", title="Group Meetings", ppmeetings=pp_mtgs, fmeetings=f_mtgs)
def latex(self): """Render latex template""" for group in self.gtx["groups"]: grp = group["_id"] grpmember_ids = group_member_ids(self.gtx['people'], grp) for member in grpmember_ids: presentations = deepcopy(self.gtx["presentations"]) types = ["all"] # types = ['invited'] #statuses = ["all"] statuses = ['accepted'] firstclean = list() secondclean = list() presclean = list() # build the filtered collection # only list the talk if the group member is an author for pres in presentations: pauthors = pres["authors"] if isinstance(pauthors, str): pauthors = [pauthors] authors = [ fuzzy_retrieval( self.gtx["people"], ["aka", "name", "_id"], author, case_sensitive=False, ) for author in pauthors ] authorids = [ author["_id"] for author in authors if author is not None ] if member in authorids: firstclean.append(pres) # only list the presentation if it is accepted for pres in firstclean: if pres["status"] in statuses or "all" in statuses: secondclean.append(pres) # only list the presentation if it is invited for pres in secondclean: if pres["type"] in types or "all" in types: presclean.append(pres) # build author list for pres in presclean: pauthors = pres["authors"] if isinstance(pauthors, str): pauthors = [pauthors] pres["authors"] = [ author if fuzzy_retrieval( self.gtx["people"], ["aka", "name", "_id"], author, case_sensitive=False, ) is None else fuzzy_retrieval( self.gtx["people"], ["aka", "name", "_id"], author, case_sensitive=False, )["name"] for author in pauthors ] authorlist = ", ".join(pres["authors"]) pres["authors"] = authorlist presdates = get_dates(pres) pres["date"] = presdates.get("begin_date") # all_date_objects = ['day', 'month', 'year'] beg_end = ['begin', 'end'] for be in beg_end: if presdates.get(f"{be}_date"): pres[f"{be}_day"] = presdates.get(f"{be}_date").day pres[f"{be}_month"] = presdates.get(f"{be}_date").month pres[f"{be}_year"] = presdates.get(f"{be}_date").year for day in ["begin_day", "end_day"]: pres["{}_suffix".format(day)] = number_suffix( pres.get(day, None) ) if "institution" in pres: inst = pres["institution"] try: pres["institution"] = fuzzy_retrieval( self.gtx["institutions"], ["aka", "name", "_id"], pres["institution"], case_sensitive=False, ) if pres["institution"] is None: print( "WARNING: institution {} in {} not found in " "institutions.yml. Preslist will build " "but to avoid errors please add and " "rerun".format(inst, pres["_id"]) ) pres["institution"] = {"_id": inst, "department": { "name": ""}} except: print("no institute {} in institutions collection".format(inst)) pres["institution"] = {"_id": inst, "department": {"name": ""}} # sys.exit( # "ERROR: institution {} not found in " # "institutions.yml. Please add and " # "rerun".format(pres["institution"]) # ) if "department" in pres: try: pres["department"] = pres["institution"][ "departments" ][pres["department"]] except: print( "WARNING: department {} not found in" " {} in institutions.yml. Pres list will" " build but please check this entry carefully and" " please add the dept to the institution!".format( pres["department"], pres["institution"]["_id"], ) ) else: pres["department"] = {"name": ""} if len(presclean) > 0: presclean = sorted( presclean, key=lambda k: k.get("date", None), reverse=True, ) outfile = "presentations-" + grp + "-" + member pi = [ person for person in self.gtx["people"] if person["_id"] is member ][0] self.render( "preslist.tex", outfile + ".tex", pi=pi, presentations=presclean, sentencecase=sentencecase, monthstyle=month_fullnames, ) self.pdf(outfile)
def excel(self): gtx = self.gtx rc = self.rc if isinstance(rc.people, str): rc.people == [rc.people] for ex in gtx["expenses"]: payee = fuzzy_retrieval(gtx["people"], ["name", "aka", "_id"], ex["payee"]) chosen_ones = [ fuzzy_retrieval(gtx["people"], ["name", "aka", "_id"], one) for one in rc.people ] if ex["payee"] != "direct_billed": for chosen_one in chosen_ones: if payee.get("name") != chosen_one.get("name"): continue # open the template if isinstance(ex["grants"], str): ex["grants"] = [ex["grants"]] grant_fractions = [1.0] else: grant_fractions = [ float(percent) / 100.0 for percent in ex["grant_percentages"] ] wb = openpyxl.load_workbook(self.template) ws = wb["T&B"] grants = [ fuzzy_retrieval(gtx["grants"], ["alias", "name", "_id"], grant) for grant in ex["grants"] ] ha = payee["home_address"] ws["B17"] = payee["name"] ws["B20"] = ha["street"] ws["B23"] = ha["city"] ws["G23"] = ha["state"] ws["L23"] = ha["zip"] ws["B36"] = ex["overall_purpose"] j = 42 total_amount = 0 item_ws = wb["T&B"] purpose_column = 4 ue_column = 13 se_column = 16 dates = [] for i, item in enumerate(ex["itemized_expenses"]): r = j + i if r > 49: item_ws = wb["Extra_Page"] j = 0 r = j + i purpose_column = 5 ue_column = 12 se_column = 14 dates.append(mdy_date(**item)) item_ws.cell(row=r, column=2, value=i) item_ws.cell(row=r, column=3, value=mdy(**item)) item_ws.cell(row=r, column=purpose_column, value=item["purpose"]) item_ws.cell( row=r, column=ue_column, value=item.get("unsegregated_expense", 0), ) try: total_amount += item.get("unsegregated_expense", 0) except TypeError: if item.get("unsegregated_expense", 0) == 'tbd': print("WARNING: unsegregated expense in {} is " "tbd".format(ex["_id"])) item["unsegregated_expense"] = 0 else: raise TypeError( "unsegregated expense in {} is not " "a number".format(ex["_id"])) item_ws.cell(row=r, column=se_column, value=item.get("segregated_expense", 0)) i = 0 if (abs( sum([ fraction * total_amount for fraction in grant_fractions ]) - total_amount) >= 0.01): raise RuntimeError( "grant percentages do not sum to 100") for grant, fraction in zip(grants, grant_fractions): nr = grant.get("account", "") row = 55 + i location = "C{}".format(row) location2 = "K{}".format(row) ws[location] = nr ws[location2] = total_amount * float(fraction) i += 1 if ex.get("expense_type", "business") == "business": spots = ("G10", "L11", "O11") else: spots = ("G7", "L8", "O8") ws[spots[0]] = "X" ws[spots[1]] = mdy( **{ k: getattr(min(dates), k) for k in ["month", "day", "year"] }) ws[spots[2]] = mdy( **{ k: getattr(max(dates), k) for k in ["month", "day", "year"] }) wb.save(os.path.join(self.bldir, ex["_id"] + ".xlsx"))
def db_updater(self): rc = self.rc print( f"Instructions/Notes:\n" f" Quarters are: Q1 July thru Sept, Q2 Oct - Dec, Q3 Jan - Mar, Q4 Apr - Jun\n" f" Grad salaries are about ${MONTH_COST} per month") grant_id = rc.grant begin_date, end_date = None, None print("Collecting Appointments for grant {}:".format(grant_id)) expenses = self.gtx['expenses'] people = self.gtx['people'] jg = self.gtx['grants'] proposals = self.gtx["proposals"] grants = merge_collections_superior(proposals, jg, "proposal_id") grant = fuzzy_retrieval(grants, ["name", "_id", "alias"], grant_id) if not grant: raise ValueError(f"ERROR: grant {grant_id} not found in grants") if rc.begin_date: begin_date = date_parser.parse(rc.begin_date).date() if rc.end_date: end_date = date_parser.parse(rc.end_date).date() if not begin_date: begin_date = get_dates(grant)['begin_date'] if not end_date: end_date = get_dates(grant)['end_date'] plot_date_list = daterange(begin_date, end_date) months = (end_date - begin_date).days / 30.42 appts, begin, end = [], datetime(3070, 1, 1).date(), datetime(1970, 1, 1).date() for person in people: person_appts = person.get('appointments', None) if person_appts: for _id, p_appt in person_appts.items(): grantid = p_appt.get('grant') if grantid == grant_id: loading = p_appt.get('loading') bd = get_dates(p_appt).get("begin_date") begin = min(begin, bd) ed = get_dates(p_appt).get("end_date") end = max(end, ed) months_on_grant = (ed - bd).days / 30.4 * loading appt = (person['_id'], bd, ed, loading, months_on_grant) appts.append(appt) appts.sort(key=lambda x: (x[0], x[1])) folks = [] for app in appts: if app[1] < end_date: if app[2] >= begin_date: print("{0}, from {1} to {2}, loading {3}. Total months: " "{4:6.2f}".format(app[0], app[1].strftime("%Y-%m-%d"), app[2].strftime("%Y-%m-%d"), app[3], app[4])) folks.append(app[0]) folks = list(set(folks)) plots = [] people_loadings = [] loadingc = np.zeros(len(plot_date_list)) for folk in folks: fig, ax = plt.subplots() loadinga = np.zeros(len(plot_date_list)) for app in appts: if app[0] == folk: loadingl = [] for day in plot_date_list: if app[1] <= day <= app[2]: loadingl.append(app[3]) else: loadingl.append(0) loadinga = loadinga + np.array(loadingl) loadingc = loadingc + loadinga months, loadingm, accum, days = [plot_date_list[0]], [], 0, 0 for day, load in zip(plot_date_list, loadinga): if day.day == 1 and days != 0: months.append(day) loadingm.append(accum * MONTH_COST / days) accum, days = 0, 0 accum = accum + load days += 1 months.pop() people_loadings.append((folk, loadinga, loadingm)) if not rc.no_plot: ax.plot_date(plot_date_list, loadinga, ls='-', marker="", label=folk) ax.set_xlabel('date') ax.set_ylabel(f"loading for student {app[0]}") ax.legend(loc='best') fig.autofmt_xdate() plots.append(fig) if not rc.no_plot: fig, ax = plt.subplots() ax.plot_date(plot_date_list, loadingc, ls='-', marker="") print(f"\n-----------\nLoadings by month\n------------") index = 0 for month in months: print(f"{month.isoformat()}:") for person in people_loadings: if person[2][index] > 0: print( f" {person[0]}\tloading: {round(person[2][index], 2)}" ) index += 1 print(f"\n----------------\nExpenses\n----------------") expenses_on_grant = [ expense for expense in expenses if grant_id in expense.get('grants') ] if len(expenses_on_grant) > 1: expenses_on_grant.sort(key=lambda x: get_dates(x).get('end_date')) for expense in expenses_on_grant: # print(expense.get('overall_purpose')) for reimb in expense.get('reimbursements'): if reimb.get('amount') == 0: amt = 0 for exp_item in expense.get('itemized_expenses', []): amt += exp_item.get('unsegregated_expense') amt += exp_item.get('prepaid_expense', 0) reimb['amount'] = amt total_spend, month_spend, all_reimb_dates, all_reimb_amts = 0, 0, [], [] for e in expenses_on_grant: reimb_amts = [ round(i.get('amount'), 2) for i in e.get('reimbursements', [{}]) ] reimb_dates = [ get_dates(i).get('date', get_dates(e).get('end_date')) for i in e.get('reimbursements', [{}]) ] all_reimb_dates.extend(reimb_dates) all_reimb_amts.extend(reimb_amts) total_spend += sum(reimb_amts) for reim_date, amt in zip(reimb_dates, reimb_amts): print( f"{reim_date} (reimb date), {get_dates(e).get('end_date')} (expense date): amount: " f"{amt}, ") print(f" payee: {e.get('payee')} " f"purpose: {e.get('overall_purpose')[:60]}") for month in months: if month >= begin_date: month_spend = 0 for amt, dte in zip(all_reimb_amts, all_reimb_dates): if month.year == dte.year and month.month == dte.month: month_spend += amt print(f"{month}: expenses monthly total = {month_spend}") print(f"Total spend = {round(total_spend, 2)}") for plot in plots: plt.show()
def latex(self): """Render latex template""" for group in self.gtx["groups"]: grp = group["_id"] pi = fuzzy_retrieval(self.gtx["people"], ["aka", "name"], group["pi_name"]) pinames = pi["name"].split() piinitialslist = [i[0] for i in pinames] pi['initials'] = "".join(piinitialslist).upper() grants = merge_collections(self.gtx["proposals"], self.gtx["grants"], "proposal_id") for g in grants: for person in g["team"]: rperson = fuzzy_retrieval(self.gtx["people"], ["aka", "name"], person["name"]) if rperson: person["name"] = rperson["name"] if g.get('budget'): amounts = [i.get('amount') for i in g.get('budget')] g['subaward_amount'] = sum(amounts) current_grants = [dict(g) for g in grants if is_current(g)] current_grants, _, _ = filter_grants(current_grants, {pi["name"]}, pi=False, multi_pi=True) for g in current_grants: if g.get('budget'): amounts = [i.get('amount') for i in g.get('budget')] g['subaward_amount'] = sum(amounts) pending_grants = [ g for g in self.gtx["proposals"] if is_pending(g["status"]) ] for g in pending_grants: for person in g["team"]: rperson = fuzzy_retrieval(self.gtx["people"], ["aka", "name"], person["name"]) if rperson: person["name"] = rperson["name"] pending_grants, _, _ = filter_grants(pending_grants, {pi["name"]}, pi=False, multi_pi=True) grants = pending_grants + current_grants for grant in grants: grant.update( award_start_date="{2}/{1}/{0}".format( grant["begin_day"], month_to_int(grant["begin_month"]), grant["begin_year"], ), award_end_date="{2}/{1}/{0}".format( grant["end_day"], month_to_int(grant["end_month"]), grant["end_year"], ), ) badids = [ i["_id"] for i in current_grants if not i.get('cpp_info').get('cppflag', "") ] iter = copy(current_grants) for grant in iter: if grant["_id"] in badids: current_grants.remove(grant) piname = HumanName(pi["name"]) outfile = "current-pending-{}-{}".format(grp, piname.last.lower()) self.render( "current_pending.tex", outfile + ".tex", pi=pi, pending=pending_grants, current=current_grants, pi_upper=pi["name"].upper(), group=group, ) self.pdf(outfile)
def latex(self): """Render latex template""" # just a reminder placeholder how to access these. These # print statements will be removed when the builder is updated # to use them! print(self.rc.from_date) print(self.rc.to_date) print(self.rc.people) print(self.rc.grants) for group in self.gtx["groups"]: grp = group["_id"] grpmember_ids = group_member_ids(self.gtx['people'], grp) for member in grpmember_ids: presentations = deepcopy(self.gtx["presentations"]) types = ["all"] # types = ['invited'] #statuses = ["all"] statuses = ['accepted'] firstclean = list() secondclean = list() presclean = list() # build the filtered collection # only list the talk if the group member is an author for pres in presentations: pauthors = pres["authors"] if isinstance(pauthors, str): pauthors = [pauthors] authors = [ fuzzy_retrieval( self.gtx["people"], ["aka", "name", "_id"], author, case_sensitive=False, ) for author in pauthors ] authorids = [ author["_id"] for author in authors if author is not None ] if member in authorids: firstclean.append(pres) # only list the presentation if it is accepted for pres in firstclean: if pres["status"] in statuses or "all" in statuses: secondclean.append(pres) # only list the presentation if it is invited for pres in secondclean: if pres["type"] in types or "all" in types: presclean.append(pres) # build author list for pres in presclean: pauthors = pres["authors"] if isinstance(pauthors, str): pauthors = [pauthors] pres["authors"] = [ author if fuzzy_retrieval( self.gtx["people"], ["aka", "name", "_id"], author, case_sensitive=False, ) is None else fuzzy_retrieval( self.gtx["people"], ["aka", "name", "_id"], author, case_sensitive=False, )["name"] for author in pauthors ] authorlist = ", ".join(pres["authors"]) pres["authors"] = authorlist # fixme: make this a more generic date loading function? if pres.get("begin_month"): pres["begin_month"] = month_to_int(pres["begin_month"]) else: sys.exit("no begin_month in {}".format(pres["_id"])) if not pres.get("begin_year"): sys.exit("no begin_year in {}".format(pres["_id"])) if pres.get("begin_day"): pres["begin_day"] = pres["begin_day"] else: sys.exit("no begin_day in {}".format(pres["_id"])) pres["date"] = datetime.date( pres["begin_year"], pres["begin_month"], pres["begin_day"], ) for day in ["begin_day", "end_day"]: pres["{}_suffix".format(day)] = number_suffix( pres.get(day, None)) if "institution" in pres: inst = pres["institution"] try: pres["institution"] = fuzzy_retrieval( self.gtx["institutions"], ["aka", "name", "_id"], pres["institution"], case_sensitive=False, ) if pres["institution"] is None: print( "WARNING: institution {} in {} not found in " "institutions.yml. Preslist will build " "but to avoid errors please add and " "rerun".format(inst, pres["_id"])) pres["institution"] = { "_id": inst, "department": { "name": "" } } except: print("no institute {} in institutions collection". format(inst)) pres["institution"] = { "_id": inst, "department": { "name": "" } } # sys.exit( # "ERROR: institution {} not found in " # "institutions.yml. Please add and " # "rerun".format(pres["institution"]) # ) if "department" in pres: try: pres["department"] = pres["institution"][ "departments"][pres["department"]] except: print( "WARNING: department {} not found in" " {} in institutions.yml. Pres list will" " build but please check this entry carefully and" " please add the dept to the institution!". format( pres["department"], pres["institution"]["_id"], )) else: pres["department"] = {"name": ""} if len(presclean) > 0: presclean = sorted( presclean, key=lambda k: k.get("date", None), reverse=True, ) outfile = "presentations-" + grp + "-" + member pi = [ person for person in self.gtx["people"] if person["_id"] is member ][0] self.render( "preslist.tex", outfile + ".tex", pi=pi, presentations=presclean, sentencecase=sentencecase, monthstyle=month_fullnames, ) self.pdf(outfile)