def find_person(name: str) -> int: try: first_name, last_name = name.split(' ', 1) matches = list(db.get_person(sup_cur, first_name, last_name)) if len(set(matches)) == 1: return matches[0] except Exception: pass return 0
def add_developers(sup_cur: db.Cursor) -> None: pmids = set(tools.MetabolomicsToolsWiki.pmids()) total = len(pmids) if total == 0: return publications = db.get_pubmed_publications(sup_cur, pmids) pmids = pmids.intersection(publications.keys()) print(f"Found {len(pmids)} of {total} tools-related publications in the " "Supplemental database.") if len(pmids) == 0: return for pmid in pmids: author_list = parse_author_list(publications[pmid]) for author in author_list: forename = author.findtext("ForeName", "").strip() lastname = author.findtext("LastName", "").strip() if not forename: print(f"PMID {pmid}: missing forename of author {lastname}") continue if not lastname: print(f"PMID {pmid}: missing surname of author {forename}") continue matches = list(db.get_person(sup_cur, forename, lastname)) if len(matches) > 1: print(f"PMID {pmid}: WARNING! Found {len(matches)} people " f" named {forename} {lastname}: {matches}") continue if matches: pid = matches[0] print(f"PMID {pmid}: found {forename} {lastname}: {pid}") else: pid = db.add_person(sup_cur, forename, lastname, "", "") if not pid: print(f"PMID {pmid}: WARNING failed to add person: " f"{forename} {lastname}") continue print(f"PMID {pmid}: added {forename} {lastname}: {pid}") affiliation_list = author.findall(".//Affiliation") for element in affiliation_list: affiliation = element.text if affiliation is None or affiliation.strip() == '': continue print(f"PMID {pmid}: affiliation for {forename} {lastname}" f": {affiliation.strip()}") return
def test_case_insensitive_name_matching(self): cursor = self.conn.cursor() actual = list(db.get_person(cursor, "james", "bond", False)) expected = [7] self.assertListEqual(expected, actual)
def get_studies(mwb_cur: db.Cursor, sup_cur: db.Cursor, people: Dict[int, Person], orgs: Dict[int, Organization], embargoed: List[str]) -> Dict[str, Study]: print("Gathering Workbench Studies") studies: Dict[str, Study] = {} mwb_cur.execute("""\ SELECT study.study_id, study.study_title, COALESCE(study.study_type, ''), COALESCE(study.study_summary, ''), study.submit_date, study.project_id, study.last_name, study.first_name, study.institute, study.department, study.laboratory FROM study, study_status_prod WHERE study.study_id = study_status_prod.study_id AND study_status_prod.status = 1""") for row in mwb_cur: submit_date = "" if row[4]: submit_date = f"{row[4]}T00:00:00" study = Study(study_id=row[0].replace('\n', ''), study_title=row[1].replace('\n', '').replace('"', '\\"'), study_type=row[2].replace('\n', ''), summary=row[3].replace('\n', '').replace('"', '\\"'), submit_date=submit_date, project_id=row[5].replace('\n', '')) # Exclude embargoed studies. if study.study_id in embargoed: print(f"Skipping embargoed study: {study.study_id}") continue # Skip invalid studies if not is_valid_study(study): continue last_names: str = row[6] first_names: str = row[7] institutes = row[8] departments = row[9] labs = row[10] institute_list = [inst.strip() for inst in institutes.split(';')] try: department_list = [dept.strip() for dept in departments.split(';')] except AttributeError: department_list = [] try: lab_list = [lab.strip() for lab in labs.split(';')] except AttributeError: lab_list = [] max_range = len(institute_list) if len(department_list) > max_range: max_range = len(department_list) if len(lab_list) > max_range: max_range = len(lab_list) for i in range(0, max_range): # If there are not enough institutes, default to first try: sup_cur.execute( """ SELECT id, parent_id FROM organizations WHERE name=%s AND type='institute' AND withheld = FALSE """, (institute_list[i], )) try: inst_id = sup_cur.fetchone()[0] study.institutes.append(orgs[inst_id].org_id) except TypeError: print("Error: Organization does not exist.") print("Organization for study " + study.study_id) print("Organization name: " + institute_list[i]) sys.exit() except IndexError: sup_cur.execute( """ SELECT id, parent_id FROM organizations WHERE name=%s AND type='institute' AND withheld = FALSE """, (institute_list[0], )) inst_id = sup_cur.fetchone()[0] # If there are not enough departments, default to first if departments: try: sup_cur.execute( """ SELECT id, parent_id FROM organizations WHERE name=%s AND type='department' AND withheld = FALSE """, (department_list[i], )) try: dept_options = {} for row in sup_cur: dept_options[row[0]] = row[1] for dept_id, parent in dept_options.items(): if inst_id == parent: department_id = dept_id study.departments.append(orgs[dept_id].org_id) except TypeError: print("Error: Organization does not exist.") print("Organization for study " + study.study_id) print("Organization name: " + department_list[i]) sys.exit() except IndexError: sup_cur.execute( """ SELECT id, parent_id FROM organizations WHERE name=%s AND type='department' AND withheld = FALSE """, (department_list[0], )) dept_options = {} for row in sup_cur: dept_options[row[0]] = row[1] for dept_id, parent in dept_options.items(): if inst_id == parent: department_id = dept_id if labs: try: sup_cur.execute( """ SELECT id, parent_id FROM organizations WHERE name=%s AND type='laboratory' AND withheld = FALSE """, (lab_list[i], )) try: lab_options = {} for row in sup_cur: lab_options[row[0]] = row[1] for lab_id, parent in lab_options.items(): if department_id == parent: study.labs.append(orgs[lab_id].org_id) except TypeError: print("Error: Organization does not exist.") print("Organization for study " + study.study_id) print("Organization name: " + lab_list[i]) sys.exit() except IndexError: pass last_name_list = [ln.strip() for ln in last_names.split(';')] first_name_list = [fn.strip() for fn in first_names.split(';')] for i in range(0, len(last_name_list)): last_name = last_name_list[i] first_name = first_name_list[i] ids = list(db.get_person(sup_cur, first_name, last_name)) try: person_id = ids[0] study.runner.append(people[person_id].person_id) except (IndexError, KeyError, TypeError): print("Error: Person does not exist.") print("Runner for study " + study.study_id) print("Last name: " + last_name + '.') print("First name: " + first_name + '.') sys.exit() studies[study.study_id] = study return studies
def get_projects(mwb_cur: db.Cursor, sup_cur: db.Cursor, people: Dict[int, Person], orgs: List[Organization]) -> Mapping[str, Project]: print("Gathering Workbench Projects") projects = {} mwb_cur.execute("""\ SELECT project_id, project_title, COALESCE(project_type, ''), COALESCE(project_summary, ''), COALESCE(doi, ''), COALESCE(funding_source, ''), last_name, first_name, institute, department, laboratory FROM project """) for row in mwb_cur: project = Project(project_id=row[0].replace('\n', ''), project_title=row[1].replace('\n', '').replace('"', '\\"'), project_type=row[2].replace('\n', ''), summary=row[3].replace('\n', '').replace('"', '\\"'), doi=row[4].replace('\n', ''), funding_source=row[5].replace('\n', '')) last_names: str = row[6] first_names: str = row[7] institutes = row[8] departments = row[9] labs = row[10] institute_list = [inst.strip() for inst in institutes.split(';')] try: department_list = [dept.strip() for dept in departments.split(';')] except AttributeError: department_list = [] try: lab_list = [lab.strip() for lab in labs.split(';')] except AttributeError: lab_list = [] max_range = len(institute_list) if len(department_list) > max_range: max_range = len(department_list) if len(lab_list) > max_range: max_range = len(lab_list) for i in range(0, max_range): # If there are not enough institutes, default to first try: sup_cur.execute( """ SELECT id, parent_id FROM organizations WHERE name=%s AND type='institute' AND withheld = FALSE """, (institute_list[i], )) try: inst_id = sup_cur.fetchone()[0] project.institutes.append(orgs[inst_id].org_id) except TypeError: print("Error: Organization does not exist.") print("Organization for project " + project.project_id) print("Organization name: " + institute_list[i]) sys.exit() except IndexError: sup_cur.execute( """ SELECT id, parent_id FROM organizations WHERE name=%s AND type='institute' AND withheld = FALSE """, (institute_list[0], )) inst_id = sup_cur.fetchone()[0] # If there are not enough departments, default to first if departments: try: sup_cur.execute( """ SELECT id, parent_id FROM organizations WHERE name=%s AND type='department' AND withheld = FALSE """, (department_list[i], )) try: dept_options = {} for row in sup_cur: dept_options[row[0]] = row[1] for dept_id, parent in dept_options.items(): if inst_id == parent: department_id = dept_id org_id = orgs[dept_id].org_id project.departments.append(org_id) except TypeError: print("Error: Organization does not exist.") print("Organization for project " + project.project_id) print("Organization name: " + department_list[i]) sys.exit() except IndexError: sup_cur.execute( """ SELECT id, parent_id FROM organizations WHERE name=%s AND type='department' AND withheld = FALSE """, (department_list[0], )) dept_options = {} for row in sup_cur: dept_options[row[0]] = row[1] for dept_id, parent in dept_options.items(): if inst_id == parent: department_id = dept_id if labs: try: sup_cur.execute( """ SELECT id, parent_id FROM organizations WHERE name=%s AND type='laboratory' AND withheld = FALSE """, (lab_list[i], )) try: lab_options = {} for row in sup_cur: lab_options[row[0]] = row[1] for lab_id, parent in lab_options.items(): if department_id == parent: project.labs.append(orgs[lab_id].org_id) except TypeError: print("Error: Organization does not exist.") print("Organization for project " + project.project_id) print("Organization name: " + lab_list[i]) sys.exit() except IndexError: pass last_name_list = [ln.strip() for ln in last_names.split(';')] first_name_list = [fn.strip() for fn in first_names.split(';')] for i in range(0, len(last_name_list)): last_name = last_name_list[i] first_name = first_name_list[i] ids = list(db.get_person(sup_cur, first_name, last_name)) try: person_id = ids[0] project.pi.append(people[person_id].person_id) except (IndexError, KeyError, TypeError): print("Error: Person does not exist.") print("PI for project " + project.project_id) print("Last name: " + last_name) print("First name: " + first_name) sys.exit() projects[project.project_id] = project return projects