def crosswalk_funding(orcid_profile, person_uri, graph): if "funding-list" in orcid_profile["orcid-profile"]["orcid-activities"] \ and orcid_profile["orcid-profile"]["orcid-activities"]["funding-list"] \ and "funding" in orcid_profile["orcid-profile"]["orcid-activities"]["funding-list"]: #Funding fundings = orcid_profile["orcid-profile"]["orcid-activities"]["funding-list"]["funding"] for funding in fundings: if funding["funding-type"] == "GRANT": title = funding["funding-title"]["title"]["value"] grant_uri = ns.D[to_hash_identifier(PREFIX_GRANT, (title,))] #Type graph.add((grant_uri, RDF.type, VIVO.Grant)) #Person graph.add((grant_uri, VIVO.relates, person_uri)) #Title graph.add((grant_uri, RDFS.label, Literal(title))) #Role role_uri = grant_uri + "-role" graph.add((role_uri, RDF.type, VIVO.PrincipalInvestigatorRole)) #Inheres in graph.add((role_uri, OBO.RO_0000052, person_uri)) graph.add((role_uri, VIVO.relatedBy, grant_uri)) #Date interval interval_uri = grant_uri + "-interval" interval_start_uri = interval_uri + "-start" interval_end_uri = interval_uri + "-end" start_year = funding["start-date"]["year"]["value"] \ if "start-date" in funding and "year" in funding["start-date"] else None start_month = funding["start-date"]["month"]["value"] \ if "start-date" in funding and "month" in funding["start-date"] else None start_day = funding["start-date"]["day"]["value"] \ if "start-date" in funding and "day" in funding["start-date"] else None end_year = funding["end-date"]["year"]["value"] \ if "end-date" in funding and "year" in funding["start-date"] else None end_month = funding["end-date"]["month"]["value"] \ if "end-date" in funding and "month" in funding["start-date"] else None end_day = funding["end-date"]["day"]["value"] \ if "end-date" in funding and "day" in funding["start-date"] else None add_date_interval(interval_uri, grant_uri, graph, interval_start_uri if add_date(interval_start_uri, start_year, graph, start_month, start_day) else None, interval_end_uri if add_date(interval_end_uri, end_year, graph, end_month, end_day) else None) #Award amount if "amount" in funding: award_amount = "${:,}".format(int(funding["amount"]["value"])) graph.add((grant_uri, VIVO.totalAwardAmount, Literal(award_amount))) #Awarded by if "organization" in funding: organization_name = funding["organization"]["name"] organization_uri = ns.D[to_hash_identifier(PREFIX_ORGANIZATION, (organization_name,))] graph.add((organization_uri, RDF.type, FOAF.Organization)) graph.add((organization_uri, RDFS.label, Literal(organization_name))) graph.add((grant_uri, VIVO.assignedBy, organization_uri)) #Identifiers if "funding-external-identifiers" in funding: for external_identifier in funding["funding-external-identifiers"]["funding-external-identifier"]: if "funding-external-identifier-value" in external_identifier: graph.add((grant_uri, VIVO.sponsorAwardId, Literal(external_identifier["funding-external-identifier-value"]))) if "funding-external-identifier-url" in external_identifier: identifier_url = external_identifier["funding-external-identifier-url"]["value"] vcard_uri = ns.D[to_hash_identifier("vcard", (identifier_url,))] graph.add((vcard_uri, RDF.type, VCARD.Kind)) #Has contact info graph.add((grant_uri, OBO.ARG_2000028, vcard_uri)) #Url vcard vcard_url_uri = vcard_uri + "-url" graph.add((vcard_url_uri, RDF.type, VCARD.URL)) graph.add((vcard_uri, VCARD.hasURL, vcard_url_uri)) graph.add((vcard_url_uri, VCARD.url, Literal(identifier_url, datatype=XSD.anyURI)))
def crosswalk_work(self, work, person_uri, person_surname, graph): # Work metadata may be available from the orcid profile, bibtex contained in the orcid profile, and/or crossref # record. The preferred order (in general) for getting metadata is crossref, bibtex, orcid. # Note that datacite records were considered, but not found to have additional/better metadata. # Work Type work_type = work["type"] if work_type in work_type_map: # Extract # Get external identifiers so that can get DOI external_identifiers = WorksCrosswalk._get_work_identifiers(work) doi = external_identifiers.get("DOI") crossref_record = WorksCrosswalk._fetch_crossref_doi( doi) if doi else {} # Bibtex bibtex = WorksCrosswalk._parse_bibtex(work) # Get title so that can construct work uri title = WorksCrosswalk._get_crossref_title( crossref_record) or bibtex.get( "title") or WorksCrosswalk._get_orcid_title(work) # Work-type work_class = work_type_map[work_type] if work_type == "TRANSLATION" and bibtex and bibtex[ "ENTRYTYPE"] in bibtex_type_map: work_class = bibtex_type_map[bibtex["ENTRYTYPE"]] # Construct work uri work_uri = self.identifier_strategy.to_uri(work_class, {"name": title}) graph.add((work_uri, RDF.type, work_class)) # Title graph.add((work_uri, RDFS.label, Literal(title))) # Publication date (publication_year, publication_month, publication_day) = \ WorksCrosswalk._get_crossref_publication_date(crossref_record) \ or WorksCrosswalk._get_orcid_publication_date(work) \ or WorksCrosswalk._get_bibtext_publication_date(bibtex) or (None, None, None) date_uri = add_date(publication_year, graph, self.identifier_strategy, publication_month, publication_day) if date_uri: graph.add((work_uri, VIVO.dateTimeValue, date_uri)) # Subjects subjects = crossref_record[ "subject"] if crossref_record and "subject" in crossref_record else None if subjects: for subject in subjects: subject_uri = self.identifier_strategy.to_uri( SKOS.Concept, {"name": subject}) graph.add((work_uri, VIVO.hasSubjectArea, subject_uri)) if self.create_strategy.should_create( SKOS.Concept, subject_uri): graph.add((subject_uri, RDF.type, SKOS.Concept)) graph.add((subject_uri, RDFS.label, Literal(subject))) # Contributors (an array of (first_name, surname, VIVO type, e.g., VIVO.Authorship)) bibtex_contributors = [] bibtex_contributors.extend( WorksCrosswalk._get_bibtex_authors(bibtex)) bibtex_contributors.extend( WorksCrosswalk._get_bibtex_editors(bibtex)) # Orcid is better for translations because has translator role if work_type == "TRANSLATION": contributors = WorksCrosswalk._get_orcid_contributors(work) else: contributors = WorksCrosswalk._get_crossref_authors(crossref_record) or bibtex_contributors \ or WorksCrosswalk._get_orcid_contributors(work) if not contributors: # Add person as author or editor. # None, None means this person. if work_type in ("EDITED_BOOK", ): contributors.append((None, None, VIVO.Editorship)) elif work_type == "TRANSLATION": # Translator is a predicate, not a -ship class. contributors.append((None, None, "TRANSLATOR")) else: contributors.append((None, None, VIVO.Authorship)) for (first_name, surname, vivo_type) in contributors: if not surname or person_surname.lower() == surname.lower(): contributor_uri = person_uri else: contributor_uri = self.identifier_strategy.to_uri( FOAF.Person, { "first_name": first_name, "surname": surname }) if self.create_strategy.should_create( FOAF.Person, contributor_uri): graph.add((contributor_uri, RDF.type, FOAF.Person)) full_name = join_if_not_empty((first_name, surname)) graph.add( (contributor_uri, RDFS.label, Literal(full_name))) # Translation is a special case if vivo_type == "TRANSLATOR": graph.add((contributor_uri, BIBO.translator, work_uri)) # So is patent assignee elif work_type == "PATENT": graph.add((contributor_uri, VIVO.assigneeFor, work_uri)) else: contributorship_uri = self.identifier_strategy.to_uri( vivo_type, { "contributor_uri": contributor_uri, "work_uri": work_uri }) graph.add((contributorship_uri, RDF.type, vivo_type)) graph.add((contributorship_uri, VIVO.relates, work_uri)) graph.add( (contributorship_uri, VIVO.relates, contributor_uri)) # Publisher publisher = crossref_record.get("publisher") or bibtex.get( "publisher") if publisher: publisher_uri = self.identifier_strategy.to_uri( FOAF.Organization, {"name": publisher}) graph.add((work_uri, VIVO.publisher, publisher_uri)) if self.create_strategy.should_create(FOAF.Organization, publisher_uri): graph.add((publisher_uri, RDF.type, FOAF.Organization)) graph.add((publisher_uri, RDFS.label, Literal(publisher))) # Volume volume = crossref_record.get("volume") or bibtex.get("volume") if volume: graph.add((work_uri, BIBO.volume, Literal(volume))) # Issue issue = crossref_record.get("issue") or bibtex.get("number") if issue: graph.add((work_uri, BIBO.issue, Literal(issue))) # Pages pages = crossref_record.get("page") or bibtex.get("pages") start_page = None end_page = None if pages and "-" in pages: (start_page, end_page) = re.split(" *-+ *", pages, maxsplit=2) if start_page: graph.add((work_uri, BIBO.pageStart, Literal(start_page))) if end_page: graph.add((work_uri, BIBO.pageEnd, Literal(end_page))) # Identifiers # Add doi in bibtex, but not orcid profile if bibtex and "doi" in bibtex and "DOI" not in external_identifiers: external_identifiers["DOI"] = bibtex["doi"] # Add isbn in bibtex, but not orcid profile if bibtex and "isbn" in bibtex and "ISBN" not in external_identifiers: external_identifiers["ISBN"] = bibtex["isbn"] for identifier_type, identifier in external_identifiers.iteritems( ): identifier_url = None if identifier_type in ("PAT", "OTHER-ID") and work_type == "PATENT": identifier_predicate = VIVO.patentNumber elif identifier_type == "ISBN": clean_isbn = identifier.replace("-", "") if len(clean_isbn) <= 10: identifier_predicate = BIBO.isbn10 else: identifier_predicate = BIBO.isbn13 else: (identifier_predicate, url_template) = identifier_map.get( identifier_type, (None, None)) if url_template: identifier_url = url_template % identifier if identifier_predicate: graph.add( (work_uri, identifier_predicate, Literal(identifier))) if identifier_url: self._add_work_url(identifier_url, work_uri, graph) orcid_url = (work.get("url", {}) or {}).get("value") if orcid_url and WorksCrosswalk._use_url(orcid_url): self._add_work_url(orcid_url, work_uri, graph) bibtex_url = bibtex.get("link") if bibtex_url and WorksCrosswalk._use_url( bibtex_url) and orcid_url != bibtex_url: self._add_work_url(bibtex_url, work_uri, graph) # Series series = bibtex.get("series") # TODO: Figure out how to model series in VIVO-ISF. # Journal # If Crossref has a journal use it journal = WorksCrosswalk._get_crossref_journal(crossref_record) issns = [] if journal: issns = crossref_record.get("ISSN", []) # Otherwise, only use for some work types. elif work_type in journal_map: journal = bibtex.get("journal") if journal: if "issn" in bibtex: issns = [bibtex["issn"]] else: journal = (work.get("journal-title", {}) or {}).get("value") if journal: journal_class = journal_map.get(work_type, BIBO.Journal) journal_uri = self.identifier_strategy.to_uri( journal_class, {"name": journal}) graph.add((work_uri, VIVO.hasPublicationVenue, journal_uri)) if self.create_strategy.should_create(journal_class, journal_uri): graph.add((journal_uri, RDF.type, journal_class)) graph.add((journal_uri, RDFS.label, Literal(journal))) for issn in issns: graph.add((journal_uri, BIBO.issn, Literal(issn))) if work_type in ("BOOK_CHAPTER", ): book_title = bibtex.get("booktitle") if book_title: book_uri = self.identifier_strategy.to_uri( BIBO.Book, {"name": book_title}) graph.add((work_uri, VIVO.hasPublicationVenue, book_uri)) if self.create_strategy.should_create(BIBO.Book, book_uri): graph.add((book_uri, RDF.type, BIBO.Book)) graph.add((book_uri, RDFS.label, Literal(book_title))) if work_type in ("CONFERENCE_PAPER", ): proceeding = bibtex.get("journal") or (work.get( "journal-title", {}) or {}).get("value") if proceeding: proceeding_uri = self.identifier_strategy.to_uri( BIBO.Proceedings, {"name": proceeding}) graph.add( (work_uri, VIVO.hasPublicationVenue, proceeding_uri)) if self.create_strategy.should_create( BIBO.Proceedings, proceeding_uri): graph.add((proceeding_uri, RDF.type, BIBO.Proceedings)) graph.add( (proceeding_uri, RDFS.label, Literal(proceeding)))
def gen_triples(cr_result, matchlist, publisher_list, journal_list, doi=None): pub_uri = uri_gen('pub', g) # Article info subjects = cr_result["subject"] if "subject" in cr_result else None if "title" in cr_result: if cr_result["title"][0]: title = cr_result["title"][0] elif cr_result["title"]: title = cr_result["title"].strip() else: title = None else: title = None # Publication type if cr_result["type"] == 'journal-article': pubtype = BIBO.AcademicArticle elif cr_result["type"] == 'book-chapter': pubtype = BIBO.Chapter elif cr_result["type"] == 'dataset': pubtype = VIVO.Dataset elif cr_result["type"] == 'proceedings-article': pubtype = VIVO.ConferencePaper elif cr_result["type"] == 'abstract': pubtype = VIVO.Abstract else: pubtype = URIRef(raw_input('Unknown publication type for {}.' ' Enter a valid URI for the type' .format(doi))) # Choose the longer (hopefully non-abbreviated) title journal = (max(cr_result["container-title"], key=len) if "container-title" in cr_result and cr_result["container-title"] else None) if journal: if journal in journal_list: journal_uri = journal_list[journal] print 'found existing '+journal g.add((D[pub_uri], VIVO.hasPublicationVenue, URIRef(journal_uri))) else: # publisher_list = get_publishers() # raw_input(publisher_list) journal_uri = D[uri_gen('n', g)] journal_list[journal] = str(journal_uri) g.add((D[pub_uri], VIVO.hasPublicationVenue, URIRef(journal_uri))) if pubtype == VIVO.ConferencePaper: g.add((URIRef(journal_uri), RDF.type, BIBO.Proceedings)) elif pubtype == BIBO.Chapter: g.add((URIRef(journal_uri), RDF.type, BIBO.Book)) else: g.add((URIRef(journal_uri), RDF.type, BIBO.Journal)) g.add((URIRef(journal_uri), RDFS.label, Literal(journal))) if "publisher" in cr_result: publisher = cr_result["publisher"] if publisher in publisher_list: publisher_uri = publisher_list[publisher] else: publisher_uri = D[uri_gen('n', g)] g.add(((URIRef(publisher_uri)), RDF.type, VIVO.Publisher)) g.add(((URIRef(publisher_uri)), RDFS.label, Literal(publisher))) publisher_list[publisher] = str(publisher_uri) print('Created new publisher "' + publisher + '"') g.add(((URIRef(journal_uri)), VIVO.publisher, URIRef(publisher_uri))) print 'Made new '+journal issue = cr_result["issue"] if "issue" in cr_result else None volume = cr_result["volume"] if "volume" in cr_result else None pages = (cr_result["page"] if "page" in cr_result and 'n/a' not in cr_result["page"] else None) # Authors authors = (parse_authors(cr_result) if "author" in cr_result else None) date = parse_publication_date(cr_result) # Publication date if date: (publication_year, publication_month, publication_day) = date else: (publication_year, publication_month, publication_day) = (None, None, None) date_uri = uri_gen('n', g) g.add((D[pub_uri], VIVO.dateTimeValue, D[date_uri])) add_date(D[date_uri], publication_year, g, publication_month, publication_day) # Add things to the graph if pubtype: g.add((D[pub_uri], RDF.type, pubtype)) if doi: g.add((D[pub_uri], BIBO.doi, Literal(doi))) if issue: g.add((D[pub_uri], BIBO.issue, Literal(issue))) if volume: g.add((D[pub_uri], BIBO.volume, Literal(volume))) if title: g.add((D[pub_uri], RDFS.label, Literal(title))) # Loop through the list of authors, trying to check for existing # authors in the database if authors: for idx, (first_name, surname) in enumerate(authors): full_name = join_if_not_empty((first_name, surname)) rank = idx+1 if full_name in matchlist[0]: pos = matchlist[0].index(full_name) assign_authorship(matchlist[1][pos], g, pub_uri, full_name, matchlist, rank) else: roll = name_lookup(surname) matchlist = name_selecter(roll, full_name, g, first_name, surname, pub_uri, matchlist, rank) # subjects if subjects: for subject in subjects: # NEED TO FIND SUBJECT IN VIVO concept_uri = get_subject(subject, g) if concept_uri: # print 'found existing '+subject g.add((D[pub_uri], VIVO.hasSubjectArea, URIRef(concept_uri))) elif subject in subjectlist[0]: # print 'already made a new one this round '+subject match = subjectlist[0].index(subject) subject_uri = subjectlist[1][match] g.add((D[pub_uri], VIVO.hasSubjectArea, D[subject_uri])) else: # print 'made new '+subject subject_uri = uri_gen('sub', g) subjectlist[0].append(subject) subjectlist[1].append(subject_uri) g.add((D[pub_uri], VIVO.hasSubjectArea, D[subject_uri])) g.add((D[subject_uri], RDF.type, SKOS.Concept)) g.add((D[subject_uri], RDFS.label, Literal(subject))) if pages: pages = pages.split("-") startpage = pages[0] g.add((D[pub_uri], BIBO.pageStart, Literal(startpage))) if len(pages) > 1: endpage = pages[1] g.add((D[pub_uri], BIBO.pageEnd, Literal(endpage))) else: endpage = None
def crosswalk(self, orcid_profile, person_uri, graph): if "fundings" in orcid_profile["activities-summary"]: # Funding for funding_group in orcid_profile["activities-summary"]["fundings"]["group"]: for funding in funding_group["funding-summary"]: if funding["type"] == "GRANT": title = funding["title"]["title"]["value"] grant_uri = self.identifier_strategy.to_uri(VIVO.Grant, {"title": title}) # Type graph.add((grant_uri, RDF.type, VIVO.Grant)) # Person graph.add((grant_uri, VIVO.relates, person_uri)) # Title graph.add((grant_uri, RDFS.label, Literal(title))) # Role role_uri = self.identifier_strategy.to_uri(VIVO.PrincipalInvestigatorRole, {"grant_uri": grant_uri}) graph.add((role_uri, RDF.type, VIVO.PrincipalInvestigatorRole)) # Inheres in graph.add((role_uri, OBO.RO_0000052, person_uri)) graph.add((role_uri, VIVO.relatedBy, grant_uri)) # Date interval (start_year, start_month, start_day) = FundingCrosswalk._get_date_parts("start-date", funding) (end_year, end_month, end_day) = FundingCrosswalk._get_date_parts("end-date", funding) add_date_interval(grant_uri, graph, self.identifier_strategy, add_date(start_year, graph, self.identifier_strategy, start_month, start_day), add_date(end_year, graph, self.identifier_strategy, end_month, end_day)) # Award amount funding_amount = funding.get("amount") if funding_amount is not None: value = funding_amount.get("value") if value is not None: award_amount = "${:,}".format(int(value)) graph.add((grant_uri, VIVO.totalAwardAmount, Literal(award_amount))) # Awarded by if "organization" in funding: organization_name = funding["organization"]["name"] organization_uri = self.identifier_strategy.to_uri(FOAF.Organization, {"name": organization_name}) graph.add((grant_uri, VIVO.assignedBy, organization_uri)) if self.create_strategy.should_create(FOAF.Organization, organization_uri): graph.add((organization_uri, RDF.type, FOAF.Organization)) graph.add((organization_uri, RDFS.label, Literal(organization_name))) # Identifiers if "external-ids" in funding and funding.get("external-ids"): for external_identifier in funding["external-ids"]["external-id"]: if "funding-external-identifier-value" in external_identifier: graph.add((grant_uri, VIVO.sponsorAwardId, Literal(external_identifier["external-id-value"]))) identifier_url = (external_identifier.get("external-id-url", {}) or {}).get("value") if identifier_url: vcard_uri = self.identifier_strategy.to_uri(VCARD.Kind, {"url": identifier_url}) graph.add((vcard_uri, RDF.type, VCARD.Kind)) # Has contact info graph.add((grant_uri, OBO.ARG_2000028, vcard_uri)) # Url vcard vcard_url_uri = self.identifier_strategy.to_uri(VCARD.URL, {"vcard_uri": vcard_uri}) graph.add((vcard_url_uri, RDF.type, VCARD.URL)) graph.add((vcard_uri, VCARD.hasURL, vcard_url_uri)) graph.add((vcard_url_uri, VCARD.url, Literal(identifier_url, datatype=XSD.anyURI)))
def crosswalk_works(orcid_profile, person_uri, graph, subjectlist, journlist, orglist): person_surname = orcid_profile["orcid-profile"]["orcid-bio"]["personal-details"]["family-name"]["value"] #Publications if "orcid-works" in orcid_profile["orcid-profile"]["orcid-activities"] \ and orcid_profile["orcid-profile"]["orcid-activities"]["orcid-works"] \ and "orcid-work" in orcid_profile["orcid-profile"]["orcid-activities"]["orcid-works"]: works = orcid_profile["orcid-profile"]["orcid-activities"]["orcid-works"]["orcid-work"] for work in works: ##Extract #Get external identifiers so that can get DOI external_identifiers = _get_work_identifiers(work) doi = external_identifiers.get("DOI") doi_record = fetch_crossref_doi(doi) if doi else None #Bibtex bibtex = _parse_bibtex(work) #Work Type work_type = work["work-type"] #Title title = work["work-title"]["title"]["value"] work_uri = ns.D[to_hash_identifier(PREFIX_DOCUMENT, (title, work_type))] #Publication date (publication_year, publication_month, publication_day) = _get_doi_publication_date(doi_record) \ if doi_record else _get_publication_date(work) #Subjects subjects = doi_record["subject"] if doi_record and "subject" in doi_record else None #Authors authors = _get_doi_authors(doi_record) if doi_record else None #TODO: Get from ORCID profile if no doi record #Publisher publisher = bibtex.get("publisher") ##Add triples #Title graph.add((work_uri, RDFS.label, Literal(title))) #Person (via Authorship) authorship_uri = work_uri + "-auth" graph.add((authorship_uri, RDF.type, VIVO.Authorship)) graph.add((authorship_uri, VIVO.relates, work_uri)) graph.add((authorship_uri, VIVO.relates, person_uri)) #Other authors if authors: for (first_name, surname) in authors: if not person_surname.lower() == surname.lower(): author_uri = ns.D[to_hash_identifier(PREFIX_PERSON, (first_name, surname))] graph.add((author_uri, RDF.type, FOAF.Person)) full_name = join_if_not_empty((first_name, surname)) graph.add((author_uri, RDFS.label, Literal(full_name))) authorship_uri = author_uri + "-auth" graph.add((authorship_uri, RDF.type, VIVO.Authorship)) graph.add((authorship_uri, VIVO.relates, work_uri)) graph.add((authorship_uri, VIVO.relates, author_uri)) #Date date_uri = work_uri + "-date" graph.add((work_uri, VIVO.dateTimeValue, date_uri)) add_date(date_uri, publication_year, graph, publication_month, publication_day) #Subjects if subjects: for subject in subjects: if subject in subjectlist[0]: match = subjectlist[0].index(subject) subject_uri = subjectlist[1][match] graph.add((work_uri, VIVO.hasSubjectArea, subject_uri)) else: subject_uri = ns.D[to_hash_identifier("sub", (subject,))] subjectlist[0].append(subject) subjectlist[1].append(subject_uri) graph.add((work_uri, VIVO.hasSubjectArea, subject_uri)) graph.add((subject_uri, RDF.type, SKOS.Concept)) graph.add((subject_uri, RDFS.label, Literal(subject))) #Identifier if doi: graph.add((work_uri, BIBO.doi, Literal(doi))) #Also add as a website identifier_url = "http://dx.doi.org/%s" % doi vcard_uri = ns.D[to_hash_identifier("vcard", (identifier_url,))] graph.add((vcard_uri, RDF.type, VCARD.Kind)) #Has contact info graph.add((work_uri, OBO.ARG_2000028, vcard_uri)) #Url vcard vcard_url_uri = vcard_uri + "-url" graph.add((vcard_url_uri, RDF.type, VCARD.URL)) graph.add((vcard_uri, VCARD.hasURL, vcard_url_uri)) graph.add((vcard_url_uri, VCARD.url, Literal(identifier_url, datatype=XSD.anyURI))) #Publisher if publisher: if publisher in orglist[0]: match = orglist[0].index(publisher) publisher_uri = orglist[1][match] graph.add((work_uri, VIVO.publisher, publisher_uri)) else: publisher_uri = ns.D[to_hash_identifier(PREFIX_ORGANIZATION, (publisher,))] orglist[0].append(publisher) orglist[1].append(publisher_uri) graph.add((publisher_uri, RDF.type, FOAF.Organization)) graph.add((publisher_uri, RDFS.label, Literal(publisher))) graph.add((work_uri, VIVO.publisher, publisher_uri)) if work_type == "JOURNAL_ARTICLE": ##Extract #Journal journal = bibtex.get("journal") #Volume volume = bibtex.get("volume") #Number number = bibtex.get("number") #Pages pages = bibtex.get("pages") start_page = None end_page = None if pages and "-" in pages: (start_page, end_page) = re.split(" *-+ *", pages, maxsplit=2) ##Add triples #Type graph.add((work_uri, RDF.type, BIBO.AcademicArticle)) #Journal if journal: if journal in journlist[0]: match = journlist[0].index(journal) journal_uri = journlist[1][match] graph.add((work_uri, VIVO.hasPublicationVenue, journal_uri)) else: journal_uri = ns.D[to_hash_identifier(PREFIX_JOURNAL, (BIBO.Journal, journal))] journlist[0].append(journal) journlist[1].append(journal_uri) graph.add((journal_uri, RDF.type, BIBO.Journal)) graph.add((journal_uri, RDFS.label, Literal(journal))) graph.add((work_uri, VIVO.hasPublicationVenue, journal_uri)) #Volume if volume: graph.add((work_uri, BIBO.volume, Literal(volume))) #Number if number: graph.add((work_uri, BIBO.issue, Literal(number))) #Pages if start_page: graph.add((work_uri, BIBO.pageStart, Literal(start_page))) if end_page: graph.add((work_uri, BIBO.pageEnd, Literal(end_page))) elif work_type == "BOOK": ##Add triples #Type graph.add((work_uri, RDF.type, BIBO.Book)) elif work_type == "DATA_SET": ##Add triples #Type graph.add((work_uri, RDF.type, VIVO.Dataset))
def crosswalk(self, orcid_profile, person_uri, graph): # Education if "educations" in orcid_profile["activities-summary"]: for education in orcid_profile["activities-summary"]["educations"][ "education-summary"]: # Gather some values degree_name = education.get("role-title") organization_name = education["organization"]["name"] start_date_year = (education["start-date"] or {}).get("year", {}).get("value") end_date_year = (education["end-date"] or {}).get("year", {}).get("value") # Organization organization_uri = self.identifier_strategy.to_uri( FOAF.Organization, {"name": organization_name}) if self.create_strategy.should_create(FOAF.Organization, organization_uri): graph.add((organization_uri, RDF.type, FOAF.Organization)) graph.add((organization_uri, RDFS.label, Literal(organization_name))) if "address" in education["organization"]: city = education["organization"]["address"]["city"] state = education["organization"]["address"]["region"] address_uri = ns.D[to_hash_identifier( "geo", (city, state))] graph.add((address_uri, RDF.type, VIVO.GeographicLocation)) graph.add((organization_uri, OBO.RO_0001025, address_uri)) graph.add((address_uri, RDFS.label, Literal("%s, %s" % (city, state)))) # Output of educational process educational_process_uri = self.identifier_strategy.to_uri( VIVO.EducationalProcess, { "organization_name": organization_name, "degree_name": degree_name, "start_year": start_date_year, "end_year": end_date_year }) graph.add((educational_process_uri, RDF.type, VIVO.EducationalProcess)) # Has participants graph.add((educational_process_uri, OBO.RO_0000057, organization_uri)) graph.add( (educational_process_uri, OBO.RO_0000057, person_uri)) # Department if education.get("department-name"): graph.add( (educational_process_uri, VIVO.departmentOrSchool, Literal(education["department-name"]))) # Interval add_date_interval( educational_process_uri, graph, self.identifier_strategy, add_date(start_date_year, graph, self.identifier_strategy), add_date(end_date_year, graph, self.identifier_strategy)) if "role-title" in education: degree_name = education["role-title"] # Awarded degree awarded_degree_uri = self.identifier_strategy.to_uri( VIVO.AwardedDegree, {"educational_process_uri": educational_process_uri}) graph.add( (awarded_degree_uri, RDF.type, VIVO.AwardedDegree)) graph.add( (awarded_degree_uri, RDFS.label, Literal(degree_name))) # Assigned by organization graph.add((awarded_degree_uri, VIVO.assignedBy, organization_uri)) # Related to educational process graph.add((awarded_degree_uri, OBO.RO_0002353, educational_process_uri)) # Relates to degree degree_uri = self.identifier_strategy.to_uri( VIVO.AcademicDegree, {"name": degree_name}) graph.add((awarded_degree_uri, VIVO.relates, degree_uri)) if self.create_strategy.should_create( VIVO.AcademicDegree, degree_uri): graph.add((degree_uri, RDF.type, VIVO.AcademicDegree)) graph.add( (degree_uri, RDFS.label, Literal(degree_name))) # Relates to person graph.add((awarded_degree_uri, VIVO.relates, person_uri))
def crosswalk(self, orcid_profile, person_uri, graph): # Education if "educations" in orcid_profile["activities-summary"]: for education in orcid_profile["activities-summary"]["educations"]["education-summary"]: # Gather some values degree_name = education.get("role-title") organization_name = education["organization"]["name"] start_date_year = (education["start-date"] or {}).get("year", {}).get("value") end_date_year = (education["end-date"] or {}).get("year", {}).get("value") # Organization organization_uri = self.identifier_strategy.to_uri(FOAF.Organization, {"name": organization_name}) if self.create_strategy.should_create(FOAF.Organization, organization_uri): graph.add((organization_uri, RDF.type, FOAF.Organization)) graph.add((organization_uri, RDFS.label, Literal(organization_name))) if "address" in education["organization"]: city = education["organization"]["address"]["city"] state = education["organization"]["address"]["region"] address_uri = ns.D[to_hash_identifier("geo", (city, state))] graph.add((address_uri, RDF.type, VIVO.GeographicLocation)) graph.add((organization_uri, OBO.RO_0001025, address_uri)) graph.add((address_uri, RDFS.label, Literal("%s, %s" % (city, state)))) # Output of educational process educational_process_uri = self.identifier_strategy.to_uri(VIVO.EducationalProcess, {"organization_name": organization_name, "degree_name": degree_name, "start_year": start_date_year, "end_year": end_date_year}) graph.add((educational_process_uri, RDF.type, VIVO.EducationalProcess)) # Has participants graph.add((educational_process_uri, OBO.RO_0000057, organization_uri)) graph.add((educational_process_uri, OBO.RO_0000057, person_uri)) # Department if education.get("department-name"): graph.add((educational_process_uri, VIVO.departmentOrSchool, Literal(education["department-name"]))) # Interval add_date_interval(educational_process_uri, graph, self.identifier_strategy, add_date(start_date_year, graph, self.identifier_strategy), add_date(end_date_year, graph, self.identifier_strategy)) if "role-title" in education: degree_name = education["role-title"] # Awarded degree awarded_degree_uri = self.identifier_strategy.to_uri(VIVO.AwardedDegree, {"educational_process_uri": educational_process_uri}) graph.add((awarded_degree_uri, RDF.type, VIVO.AwardedDegree)) graph.add((awarded_degree_uri, RDFS.label, Literal(degree_name))) # Assigned by organization graph.add((awarded_degree_uri, VIVO.assignedBy, organization_uri)) # Related to educational process graph.add((awarded_degree_uri, OBO.RO_0002353, educational_process_uri)) # Relates to degree degree_uri = self.identifier_strategy.to_uri(VIVO.AcademicDegree, {"name": degree_name}) graph.add((awarded_degree_uri, VIVO.relates, degree_uri)) if self.create_strategy.should_create(VIVO.AcademicDegree, degree_uri): graph.add((degree_uri, RDF.type, VIVO.AcademicDegree)) graph.add((degree_uri, RDFS.label, Literal(degree_name))) # Relates to person graph.add((awarded_degree_uri, VIVO.relates, person_uri))
def crosswalk(self, orcid_profile, person_uri, graph): # Work metadata may be available from the orcid profile, bibtex contained in the orcid profile, and/or crossref # record. The preferred order (in general) for getting metadata is crossref, bibtex, orcid. # Note that datacite records were considered, but not found to have additional/better metadata. #Publications for work in ((orcid_profile["orcid-profile"].get("orcid-activities") or {}).get("orcid-works") or {})\ .get("orcid-work", []): #Work Type work_type = work["work-type"] if work_type in work_type_map: ##Extract #Get external identifiers so that can get DOI external_identifiers = WorksCrosswalk._get_work_identifiers(work) doi = external_identifiers.get("DOI") crossref_record = WorksCrosswalk._fetch_crossref_doi(doi) if doi else {} #Bibtex bibtex = WorksCrosswalk._parse_bibtex(work) #Get title so that can construct work uri title = WorksCrosswalk._get_crossref_title(crossref_record) \ or bibtex.get("title") \ or WorksCrosswalk._get_orcid_title(work) #Work-type work_class = work_type_map[work_type] if work_type == "TRANSLATION" and bibtex and bibtex["ENTRYTYPE"] in bibtex_type_map: work_class = bibtex_type_map[bibtex["ENTRYTYPE"]] #Construct work uri work_uri = self.identifier_strategy.to_uri(work_class, {"name": title}) graph.add((work_uri, RDF.type, work_class)) #Title graph.add((work_uri, RDFS.label, Literal(title))) #Publication date (publication_year, publication_month, publication_day) = \ WorksCrosswalk._get_crossref_publication_date(crossref_record) \ or WorksCrosswalk._get_orcid_publication_date(work) \ or WorksCrosswalk._get_bibtext_publication_date(bibtex) or (None, None, None) date_uri = add_date(publication_year, graph, self.identifier_strategy, publication_month, publication_day) if date_uri: graph.add((work_uri, VIVO.dateTimeValue, date_uri)) #Subjects subjects = crossref_record["subject"] if crossref_record and "subject" in crossref_record else None if subjects: for subject in subjects: subject_uri = self.identifier_strategy.to_uri(SKOS.Concept, {"name": subject}) graph.add((work_uri, VIVO.hasSubjectArea, subject_uri)) if self.create_strategy.should_create(SKOS.Concept, subject_uri): graph.add((subject_uri, RDF.type, SKOS.Concept)) graph.add((subject_uri, RDFS.label, Literal(subject))) #Contributors (an array of (first_name, surname, VIVO type, e.g., VIVO.Authorship)) bibtex_contributors = [] bibtex_contributors.extend(WorksCrosswalk._get_bibtex_authors(bibtex)) bibtex_contributors.extend(WorksCrosswalk._get_bibtex_editors(bibtex)) #Orcid is better for translations because has translator role if work_type == "TRANSLATION": contributors = WorksCrosswalk._get_orcid_contributors(work) else: contributors = WorksCrosswalk._get_crossref_authors(crossref_record) or bibtex_contributors \ or WorksCrosswalk._get_orcid_contributors(work) person_surname = orcid_profile["orcid-profile"]["orcid-bio"]["personal-details"]["family-name"]["value"] if not contributors: #Add person as author or editor. #None, None means this person. if work_type in ("EDITED_BOOK",): contributors.append((None, None, VIVO.Editorship)) elif work_type == "TRANSLATION": #Translator is a predicate, not a -ship class. contributors.append((None, None, "TRANSLATOR")) else: contributors.append((None, None, VIVO.Authorship)) for (first_name, surname, vivo_type) in contributors: if not surname or person_surname.lower() == surname.lower(): contributor_uri = person_uri else: contributor_uri = self.identifier_strategy.to_uri(FOAF.Person, {"first_name": first_name, "surname": surname}) if self.create_strategy.should_create(FOAF.Person, contributor_uri): graph.add((contributor_uri, RDF.type, FOAF.Person)) full_name = join_if_not_empty((first_name, surname)) graph.add((contributor_uri, RDFS.label, Literal(full_name))) #Translation is a special case if vivo_type == "TRANSLATOR": graph.add((contributor_uri, BIBO.translator, work_uri)) #So is patent assignee elif work_type == "PATENT": graph.add((contributor_uri, VIVO.assigneeFor, work_uri)) else: contributorship_uri = self.identifier_strategy.to_uri(vivo_type, {"contributor_uri": contributor_uri, "work_uri": work_uri}) graph.add((contributorship_uri, RDF.type, vivo_type)) graph.add((contributorship_uri, VIVO.relates, work_uri)) graph.add((contributorship_uri, VIVO.relates, contributor_uri)) #Publisher publisher = crossref_record.get("publisher") or bibtex.get("publisher") if publisher: publisher_uri = self.identifier_strategy.to_uri(FOAF.Organization, {"name": publisher}) graph.add((work_uri, VIVO.publisher, publisher_uri)) if self.create_strategy.should_create(FOAF.Organization, publisher_uri): graph.add((publisher_uri, RDF.type, FOAF.Organization)) graph.add((publisher_uri, RDFS.label, Literal(publisher))) #Volume volume = crossref_record.get("volume") or bibtex.get("volume") if volume: graph.add((work_uri, BIBO.volume, Literal(volume))) #Issue issue = crossref_record.get("issue") or bibtex.get("number") if issue: graph.add((work_uri, BIBO.issue, Literal(issue))) #Pages pages = crossref_record.get("page") or bibtex.get("pages") start_page = None end_page = None if pages and "-" in pages: (start_page, end_page) = re.split(" *-+ *", pages, maxsplit=2) if start_page: graph.add((work_uri, BIBO.pageStart, Literal(start_page))) if end_page: graph.add((work_uri, BIBO.pageEnd, Literal(end_page))) #Identifiers #Add doi in bibtex, but not orcid profile if bibtex and "doi" in bibtex and "DOI" not in external_identifiers: external_identifiers["DOI"] = bibtex["doi"] #Add isbn in bibtex, but not orcid profile if bibtex and "isbn" in bibtex and "ISBN" not in external_identifiers: external_identifiers["ISBN"] = bibtex["isbn"] for identifier_type, identifier in external_identifiers.iteritems(): identifier_url = None if identifier_type in ("PAT", "OTHER_ID") and work_type == "PATENT": identifier_predicate = VIVO.patentNumber elif identifier_type == "ISBN": clean_isbn = identifier.replace("-", "") if len(clean_isbn) <= 10: identifier_predicate = BIBO.isbn10 else: identifier_predicate = BIBO.isbn13 else: (identifier_predicate, url_template) = identifier_map.get(identifier_type, (None, None)) if url_template: identifier_url = url_template % identifier if identifier_predicate: graph.add((work_uri, identifier_predicate, Literal(identifier))) if identifier_url: self._add_work_url(identifier_url, work_uri, graph) orcid_url = (work.get("url", {}) or {}).get("value") if orcid_url and WorksCrosswalk._use_url(orcid_url): self._add_work_url(orcid_url, work_uri, graph) bibtex_url = bibtex.get("link") if bibtex_url and WorksCrosswalk._use_url(bibtex_url) and orcid_url != bibtex_url: self._add_work_url(bibtex_url, work_uri, graph) #Series series = bibtex.get("series") #TODO: Figure out how to model series in VIVO-ISF. #Journal #If Crossref has a journal use it journal = WorksCrosswalk._get_crossref_journal(crossref_record) if journal: issns = crossref_record.get("ISSN", []) #Otherwise, only use for some work types. elif work_type in journal_map: issns = [] journal = bibtex.get("journal") if journal: if "issn" in bibtex: issns = [bibtex["issn"]] else: journal = (work.get("journal-title", {}) or {}).get("value") if journal: journal_class = journal_map.get(work_type, BIBO.Journal) journal_uri = self.identifier_strategy.to_uri(journal_class, {"name": journal}) graph.add((work_uri, VIVO.hasPublicationVenue, journal_uri)) if self.create_strategy.should_create(journal_class, journal_uri): graph.add((journal_uri, RDF.type, journal_class)) graph.add((journal_uri, RDFS.label, Literal(journal))) for issn in issns: graph.add((journal_uri, BIBO.issn, Literal(issn))) if work_type in ("BOOK_CHAPTER",): book_title = bibtex.get("booktitle") if book_title: book_uri = self.identifier_strategy.to_uri(BIBO.Book, {"name": book_title}) graph.add((work_uri, VIVO.hasPublicationVenue, book_uri)) if self.create_strategy.should_create(BIBO.Book, book_uri): graph.add((book_uri, RDF.type, BIBO.Book)) graph.add((book_uri, RDFS.label, Literal(book_title))) if work_type in ("CONFERENCE_PAPER",): proceeding = bibtex.get("journal") or (work.get("journal-title", {}) or {}).get("value") if proceeding: proceeding_uri = self.identifier_strategy.to_uri(BIBO.Proceedings, {"name": proceeding}) graph.add((work_uri, VIVO.hasPublicationVenue, proceeding_uri)) if self.create_strategy.should_create(BIBO.Proceedings, proceeding_uri): graph.add((proceeding_uri, RDF.type, BIBO.Proceedings)) graph.add((proceeding_uri, RDFS.label, Literal(proceeding)))
def crosswalk(self, orcid_profile, person_uri, graph): if "fundings" in orcid_profile["activities-summary"]: # Funding for funding_group in orcid_profile["activities-summary"][ "fundings"]["group"]: for funding in funding_group["funding-summary"]: if funding["type"] == "GRANT": title = funding["title"]["title"]["value"] grant_uri = self.identifier_strategy.to_uri( VIVO.Grant, {"title": title}) # Type graph.add((grant_uri, RDF.type, VIVO.Grant)) # Person graph.add((grant_uri, VIVO.relates, person_uri)) # Title graph.add((grant_uri, RDFS.label, Literal(title))) # Role role_uri = self.identifier_strategy.to_uri( VIVO.PrincipalInvestigatorRole, {"grant_uri": grant_uri}) graph.add((role_uri, RDF.type, VIVO.PrincipalInvestigatorRole)) # Inheres in graph.add((role_uri, OBO.RO_0000052, person_uri)) graph.add((role_uri, VIVO.relatedBy, grant_uri)) # Date interval (start_year, start_month, start_day) = FundingCrosswalk._get_date_parts( "start-date", funding) (end_year, end_month, end_day) = FundingCrosswalk._get_date_parts( "end-date", funding) add_date_interval( grant_uri, graph, self.identifier_strategy, add_date(start_year, graph, self.identifier_strategy, start_month, start_day), add_date(end_year, graph, self.identifier_strategy, end_month, end_day)) # Award amount funding_amount = funding.get("amount") if funding_amount is not None: value = funding_amount.get("value") if value is not None: award_amount = "${:,}".format(int(value)) graph.add((grant_uri, VIVO.totalAwardAmount, Literal(award_amount))) # Awarded by if "organization" in funding: organization_name = funding["organization"]["name"] organization_uri = self.identifier_strategy.to_uri( FOAF.Organization, {"name": organization_name}) graph.add( (grant_uri, VIVO.assignedBy, organization_uri)) if self.create_strategy.should_create( FOAF.Organization, organization_uri): graph.add((organization_uri, RDF.type, FOAF.Organization)) graph.add((organization_uri, RDFS.label, Literal(organization_name))) # Identifiers if "external-ids" in funding and funding.get( "external-ids"): for external_identifier in funding["external-ids"][ "external-id"]: if "funding-external-identifier-value" in external_identifier: graph.add((grant_uri, VIVO.sponsorAwardId, Literal(external_identifier[ "external-id-value"]))) identifier_url = (external_identifier.get( "external-id-url", {}) or {}).get("value") if identifier_url: vcard_uri = self.identifier_strategy.to_uri( VCARD.Kind, {"url": identifier_url}) graph.add( (vcard_uri, RDF.type, VCARD.Kind)) # Has contact info graph.add((grant_uri, OBO.ARG_2000028, vcard_uri)) # Url vcard vcard_url_uri = self.identifier_strategy.to_uri( VCARD.URL, {"vcard_uri": vcard_uri}) graph.add( (vcard_url_uri, RDF.type, VCARD.URL)) graph.add((vcard_uri, VCARD.hasURL, vcard_url_uri)) graph.add((vcard_url_uri, VCARD.url, Literal(identifier_url, datatype=XSD.anyURI)))
def crosswalk_affiliations(orcid_profile, person_uri, graph): #Education for affiliation in (orcid_profile["orcid-profile"].get("orcid-activities") or {}).get("affiliations", {})\ .get("affiliation", []): if affiliation["type"] == "EDUCATION": #Gather some values degree_name = affiliation.get("role-title") organization_name=affiliation["organization"]["name"] start_date_year = (affiliation["start-date"] or {}).get("year", {}).get("value") end_date_year = (affiliation["end-date"] or {}).get("year", {}).get("value") #Organization organization_uri = ns.D[to_hash_identifier(PREFIX_ORGANIZATION, (organization_name,))] graph.add((organization_uri, RDF.type, FOAF.Organization)) graph.add((organization_uri, RDFS.label, Literal(organization_name))) if "address" in affiliation["organization"]: city = affiliation["organization"]["address"]["city"] state = affiliation["organization"]["address"]["region"] address_uri = ns.D[to_hash_identifier("geo", (city, state))] graph.add((address_uri, RDF.type, VIVO.GeographicLocation)) graph.add((organization_uri, OBO.RO_0001025, address_uri)) graph.add((address_uri, RDFS.label, Literal("%s, %s" % (city, state)))) #Output of educational process educational_process_uri = ns.D[to_hash_identifier(PREFIX_EDUCATIONAL_PROCESS, (organization_name, degree_name, start_date_year, end_date_year))] graph.add((educational_process_uri, RDF.type, VIVO.EducationalProcess)) #Has participants graph.add((educational_process_uri, OBO.RO_0000057, organization_uri)) graph.add((educational_process_uri, OBO.RO_0000057, person_uri)) #Department if "department-name" in affiliation: graph.add((educational_process_uri, VIVO.departmentOrSchool, Literal(affiliation["department-name"]))) #Interval interval_uri = educational_process_uri + "-interval" interval_start_uri = interval_uri + "-start" # start_date_year = affiliation["start-date"]["year"]["value"] if "start-date" in affiliation else None interval_end_uri = interval_uri + "-end" # end_date_year = affiliation["end-date"]["year"]["value"] if "end-date" in affiliation else None add_date_interval(interval_uri, educational_process_uri, graph, interval_start_uri if add_date(interval_start_uri, start_date_year, graph) else None, interval_end_uri if add_date(interval_end_uri, end_date_year, graph) else None) if "role-title" in affiliation: degree_name = affiliation["role-title"] #Awarded degree awarded_degree_uri = educational_process_uri + "-dgre" graph.add((awarded_degree_uri, RDF.type, VIVO.AwardedDegree)) graph.add((awarded_degree_uri, RDFS.label, Literal(degree_name))) #Assigned by organization graph.add((awarded_degree_uri, VIVO.assignedBy, organization_uri)) #Related to educational process graph.add((awarded_degree_uri, OBO.RO_0002353, educational_process_uri)) #Relates to degree degree_uri = ns.D[to_hash_identifier(PREFIX_DEGREE, (degree_name,))] graph.add((degree_uri, RDF.type, VIVO.AcademicDegree)) graph.add((degree_uri, RDFS.label, Literal(degree_name))) graph.add((awarded_degree_uri, VIVO.relates, degree_uri)) #Relates to person graph.add((awarded_degree_uri, VIVO.relates, person_uri))