def orcid_author_search_parser(author_name): """ Method to parse the list of matching authors from ORCID website into an array object, given the name of the author """ out_file = "data/orcid_author_search.xml" fout = open(out_file, "w") print >> fout, ORCID.orcid_author_search(author_name, kind="xml").encode('utf-8') fout.close() tree = ET.parse(out_file) root_element = tree.getroot() ns = '{http://www.orcid.org/ns/orcid}' authors = [] for child1 in root_element: if (child1.tag == ns + 'orcid-search-results'): for child2 in child1: if (child2.tag == ns + 'orcid-search-result'): author = {'othernames': []} for child3 in child2: if (child3.tag == ns + 'orcid-profile'): for child4 in child3: if (child4.tag == ns + 'orcid-identifier'): for child5 in child4: if (child5.tag == ns + 'path'): author['orcid'] = child5.text elif (child4.tag == ns + 'orcid-bio'): for child5 in child4: if (child5.tag == ns + 'personal-details'): for child6 in child5: if (child6.tag == ns + 'given-names'): author[ 'firstname'] = child6.text elif (child6.tag == ns + 'family-name'): author[ 'lastname'] = child6.text elif (child6.tag == ns + 'other-names'): for child7 in child6: if (child7.tag == ns + 'other-name' ): author[ 'othernames'].append( child7. text) author = ORCID_Parser.generate_author_other_names( author) authors.append(author) return authors
def orcid_author_search_parser(author_name): """ Method to parse the list of matching authors from ORCID website into an array object, given the name of the author """ out_file = "data/orcid_author_search.xml" fout = open(out_file, "w") print >> fout, ORCID.orcid_author_search(author_name, kind="xml").encode('utf-8') fout.close() tree = ET.parse(out_file) root_element = tree.getroot() ns = '{http://www.orcid.org/ns/orcid}' authors = [] for child1 in root_element: if(child1.tag == ns + 'orcid-search-results'): for child2 in child1: if(child2.tag == ns + 'orcid-search-result'): author = {'othernames': []} for child3 in child2: if(child3.tag == ns + 'orcid-profile'): for child4 in child3: if(child4.tag == ns + 'orcid-identifier'): for child5 in child4: if(child5.tag == ns + 'path'): author['orcid'] = child5.text elif(child4.tag == ns + 'orcid-bio'): for child5 in child4: if(child5.tag == ns + 'personal-details'): for child6 in child5: if(child6.tag == ns + 'given-names'): author['firstname'] = child6.text elif(child6.tag == ns + 'family-name'): author['lastname'] = child6.text elif(child6.tag == ns + 'other-names'): for child7 in child6: if(child7.tag == ns + 'other-name'): author['othernames'].append(child7.text) author = ORCID_Parser.generate_author_other_names(author) authors.append(author) return authors
from lookup.ORCID import ORCID import json fout = open("orcid_author_details.txt", "w") def banner(msg): print >> fout, 70 * "=" print >> fout, msg print >> fout, 70 * "=" banner("orcid author details xml") print >> fout, ORCID.orcid_author_get("0000-0001-9558-179X", kind="xml").encode('utf-8') banner("orcid author details json") print >> fout, json.dumps(ORCID.orcid_author_get("0000-0001-9558-179X", kind="json"), indent=2).encode('utf-8') banner("orcid author works xml") print >> fout, ORCID.orcid_author_works_get("0000-0001-9558-179X", kind="xml").encode('utf-8') banner("orcid author works json") print >> fout, json.dumps(ORCID.orcid_author_works_get("0000-0001-9558-179X", kind="json"), indent=2).encode('utf-8')
from lookup.ORCID import ORCID import json fout = open("orcid_author_details_by_name.txt", "w") def banner(msg): print >> fout, 70 * "=" print >> fout, msg print >> fout, 70 * "=" banner("orcid author details xml") print >> fout, ORCID.orcid_author_search("gregor von laszewski", kind="xml").encode('utf-8') banner("orcid author details json") print >> fout, json.dumps(ORCID.orcid_author_search("gregor von laszewski", kind="json"), indent=2).encode('utf-8')
def orcid_author_get_parser(orcid): """ Method to parse the author details from ORCID website into a dictionary object, given the orcid of the author """ out_file = "data/orcid_author_get.xml" fout = open(out_file, "w") print >> fout, ORCID.orcid_author_get(orcid, kind="xml").encode('utf-8') fout.close() tree = ET.parse(out_file) root_element = tree.getroot() ns = '{http://www.orcid.org/ns/orcid}' author = {'othernames': [], 'urls': [], 'identifiers': []} for child1 in root_element: if (child1.tag == ns + 'orcid-profile'): for child2 in child1: if (child2.tag == ns + 'orcid-identifier'): for child3 in child2: if (child3.tag == ns + 'path'): author['orcid'] = child3.text elif (child2.tag == ns + 'orcid-bio'): for child3 in child2: if (child3.tag == ns + 'personal-details'): for child4 in child3: if (child4.tag == ns + 'given-names'): author['firstname'] = child4.text elif (child4.tag == ns + 'family-name'): author['lastname'] = child4.text elif (child4.tag == ns + 'other-names'): for child5 in child4: if (child5.tag == ns + 'other-name'): author['othernames'].append( child5.text) elif (child3.tag == ns + 'researcher-urls'): for child4 in child3: if (child4.tag == ns + 'researcher-url'): for child5 in child4: if (child5.tag == ns + 'url'): author['urls'].append( child5.text) elif (child3.tag == ns + 'contact-details'): for child4 in child3: if (child4.tag == ns + 'email'): author['email'] = child4.text elif (child3.tag == ns + 'external-identifiers'): for child4 in child3: if (child4.tag == ns + 'external-identifier'): identifier = {} for child5 in child4: if (child5.tag == ns + 'external-id-common-name'): key = None if (child5.text == 'ResearcherID'): key = 'ResearcherID' elif (child5.text == 'Scopus Author ID'): key = 'ScopusID' elif (child5.tag == ns + 'external-id-reference'): value = child5.text if key is not None: identifier[key] = value author['identifiers'].append( identifier) return author
def orcid_author_works_get_parser(orcid): """ Method to parse the author works from ORCID website into a dictionary object, given the orcid of the author """ out_file = "data/orcid_author_works_get.xml" fout = open(out_file, "w") print >> fout, ORCID.orcid_author_works_get(orcid, kind="xml").encode('utf-8') fout.close() tree = ET.parse(out_file) root_element = tree.getroot() ns = '{http://www.orcid.org/ns/orcid}' author = {'works': []} for child1 in root_element: if (child1.tag == ns + 'orcid-profile'): for child2 in child1: if (child2.tag == ns + 'orcid-identifier'): for child3 in child2: if (child3.tag == ns + 'path'): author['orcid'] = child3.text elif (child2.tag == ns + 'orcid-activities'): for child3 in child2: if (child3.tag == ns + 'orcid-works'): for child4 in child3: if (child4.tag == ns + 'orcid-work'): work = { 'identifiers': [], 'authorIDs': [] } for child5 in child4: if (child5.tag == ns + 'work-title'): for child6 in child5: if (child6.tag == ns + 'title'): work[ 'title'] = child6.text elif (child5.tag == ns + 'journal-title'): work[ 'journalTitle'] = child5.text elif (child5.tag == ns + 'work-citation'): for child6 in child5: if (child6.tag == ns + 'work-citation-type' ): work[ 'work-citation-type'] = child6.text elif (child6.tag == ns + 'citation'): citation = child6.text if (work['work-citation-type'] == 'bibtex'): work[ 'authors'] = ORCID_Parser.get_authors_list_from_bibtex( citation) elif (work[ 'work-citation-type'] == 'formatted-unspecified'): work[ 'authors'] = ORCID_Parser.get_authors_list_from_unformattedtext( citation) elif (child5.tag == ns + 'publication-date'): for child6 in child5: if (child6.tag == ns + 'year'): work[ 'year'] = child6.text elif (child5.tag == ns + 'work-external-identifiers'): for child6 in child5: if (child6.tag == ns + 'work-external-identifier' ): identifier = {} for child7 in child6: if (child7.tag == ns + 'work-external-identifier-type' ): key = None if (child7.text == 'doi'): key = 'doi' elif (child7.tag == ns + 'work-external-identifier-id' ): value = child7.text if key is not None: identifier[ key] = value work[key] = value work[ 'identifiers'].append( identifier) if 'title' not in work: work['title'] = '' if 'doi' not in work: publications = IEEE_Parser.ieee_publication_search_parser( work['title']) if (len(publications) == 1): for publication in publications: work['doi'] = publication[ 'doi'] else: work['doi'] = '' if 'authors' not in work: work['authors'] = [] author['works'].append(work) return author
def banner(msg): print 70 * "=" print msg print 70 * "=" banner("doi turtle") print DOI.doi_get("10.1109/GCE.2010.5676126", kind="turle") banner("doi bibtex") print DOI.doi_get("10.1109/GCE.2010.5676126") banner("doi json") pprint(DOI.doi_get("10.1109/GCE.2010.5676126", kind="json")) banner("ieee xml") print IEEE.ieee_doi_get("10.1109/GCE.2010.5676126", kind="xml") banner("ieee json") print IEEE.ieee_doi_get("10.1109/GCE.2010.5676126", kind="json") banner("orcid xml") print ORCID.orcid_doi_get("10.1109/GCE.2010.5676126", kind="xml") banner("orcid json") print json.dumps(ORCID.orcid_doi_get("10.1109/GCE.2010.5676126", kind="json"), indent=2) banner("crossref json") pprint(CrossRef.crossref_doi_get("10.1109/GCE.2010.5676126", kind="json"))
def orcid_author_get_parser(orcid): """ Method to parse the author details from ORCID website into a dictionary object, given the orcid of the author """ out_file = "data/orcid_author_get.xml" fout = open(out_file, "w") print >> fout, ORCID.orcid_author_get(orcid, kind="xml").encode('utf-8') fout.close() tree = ET.parse(out_file) root_element = tree.getroot() ns = '{http://www.orcid.org/ns/orcid}' author = {'othernames': [], 'urls': [], 'identifiers': []} for child1 in root_element: if(child1.tag == ns + 'orcid-profile'): for child2 in child1: if(child2.tag == ns + 'orcid-identifier'): for child3 in child2: if(child3.tag == ns + 'path'): author['orcid'] = child3.text elif(child2.tag == ns + 'orcid-bio'): for child3 in child2: if(child3.tag == ns + 'personal-details'): for child4 in child3: if(child4.tag == ns + 'given-names'): author['firstname'] = child4.text elif(child4.tag == ns + 'family-name'): author['lastname'] = child4.text elif(child4.tag == ns + 'other-names'): for child5 in child4: if(child5.tag == ns + 'other-name'): author['othernames'].append(child5.text) elif(child3.tag == ns + 'researcher-urls'): for child4 in child3: if(child4.tag == ns + 'researcher-url'): for child5 in child4: if(child5.tag == ns + 'url'): author['urls'].append(child5.text) elif(child3.tag == ns + 'contact-details'): for child4 in child3: if(child4.tag == ns + 'email'): author['email'] = child4.text elif(child3.tag == ns + 'external-identifiers'): for child4 in child3: if(child4.tag == ns + 'external-identifier'): identifier = {} for child5 in child4: if(child5.tag == ns + 'external-id-common-name'): key = None if(child5.text == 'ResearcherID'): key = 'ResearcherID' elif(child5.text == 'Scopus Author ID'): key = 'ScopusID' elif(child5.tag == ns + 'external-id-reference'): value = child5.text if key is not None: identifier[key] = value author['identifiers'].append(identifier) return author
def orcid_author_works_get_parser(orcid): """ Method to parse the author works from ORCID website into a dictionary object, given the orcid of the author """ out_file = "data/orcid_author_works_get.xml" fout = open(out_file, "w") print >> fout, ORCID.orcid_author_works_get(orcid, kind="xml").encode('utf-8') fout.close() tree = ET.parse(out_file) root_element = tree.getroot() ns = '{http://www.orcid.org/ns/orcid}' author = {'works': []} for child1 in root_element: if(child1.tag == ns + 'orcid-profile'): for child2 in child1: if(child2.tag == ns + 'orcid-identifier'): for child3 in child2: if(child3.tag == ns + 'path'): author['orcid'] = child3.text elif(child2.tag == ns + 'orcid-activities'): for child3 in child2: if(child3.tag == ns + 'orcid-works'): for child4 in child3: if(child4.tag == ns + 'orcid-work'): work = {'identifiers': [], 'authorIDs': []} for child5 in child4: if(child5.tag == ns + 'work-title'): for child6 in child5: if(child6.tag == ns + 'title'): work['title'] = child6.text elif(child5.tag == ns + 'journal-title'): work['journalTitle'] = child5.text elif(child5.tag == ns + 'work-citation'): for child6 in child5: if(child6.tag == ns + 'work-citation-type'): work['work-citation-type'] = child6.text elif(child6.tag == ns + 'citation'): citation = child6.text if(work['work-citation-type'] == 'bibtex'): work['authors'] = ORCID_Parser.get_authors_list_from_bibtex(citation) elif(work['work-citation-type'] == 'formatted-unspecified'): work['authors'] = ORCID_Parser.get_authors_list_from_unformattedtext(citation) elif(child5.tag == ns + 'publication-date'): for child6 in child5: if(child6.tag == ns + 'year'): work['year'] = child6.text elif(child5.tag == ns + 'work-external-identifiers'): for child6 in child5: if(child6.tag == ns + 'work-external-identifier'): identifier = {} for child7 in child6: if(child7.tag == ns + 'work-external-identifier-type'): key = None if(child7.text == 'doi'): key = 'doi' elif(child7.tag == ns + 'work-external-identifier-id'): value = child7.text if key is not None: identifier[key] = value work[key] = value work['identifiers'].append(identifier) if 'title' not in work: work['title'] = '' if 'doi' not in work: publications = IEEE_Parser.ieee_publication_search_parser(work['title']) if(len(publications) == 1): for publication in publications: work['doi'] = publication['doi'] else: work['doi'] = '' if 'authors' not in work: work['authors'] = [] author['works'].append(work) return author