def check_records(records): for record in records: if is_springer(record): rec_doc = BibRecDocs(int(record.record_id)) rec_docs = rec_doc.list_latest_files() for doc in rec_docs: if doc.get_format() == '.xml': f = open(doc.get_full_path()) content = f.read() try: del record['100'] del record['700'] record.amended = True except: pass first_author = True try: if "-//NLM//DTD JATS" in content: jats = JATSParser() authors = jats.get_authors(parseString(content)) else: app = NLMParser() authors = app.get_authors(parseString(content)) except: record.warn('Problem with parsing XML.') continue for author in authors: if author.get('surname'): subfields = [ ('a', '%s, %s' % (author.get('surname'), author.get('given_name') or author.get('initials', ''))) ] else: subfields = [('a', '%s' % (author.get('name', ''))) ] if 'orcid' in author: subfields.append(('j', author['orcid'])) if 'affiliation' in author: for aff in author["affiliation"]: subfields.append(('v', aff)) add_nations_field(subfields) if author.get('email'): subfields.append(('m', author['email'])) if first_author: record.add_field('100__', value='', subfields=subfields) first_author = False else: record.add_field('700__', value='', subfields=subfields)
def check_records(records): for record in records: if is_springer(record): rec_doc = BibRecDocs(int(record.record_id)) rec_docs = rec_doc.list_latest_files() for doc in rec_docs: if doc.get_format() == '.xml': f = open(doc.get_full_path()) content = f.read() try: del record['100'] del record['700'] record.amended = True except: pass first_author = True try: if "-//NLM//DTD JATS" in content: jats = JATSParser() authors = jats.get_authors(parseString(content)) else: app = NLMParser() authors = app.get_authors(parseString(content)) except: record.warn('Problem with parsing XML.') continue for author in authors: if author.get('surname'): subfields = [('a', '%s, %s' % (author.get('surname'), author.get('given_name') or author.get('initials', '')))] else: subfields = [('a', '%s' % (author.get('name', '')))] if 'orcid' in author: subfields.append(('j', author['orcid'])) if 'affiliation' in author: for aff in author["affiliation"]: subfields.append(('v', aff)) add_nations_field(subfields) if author.get('email'): subfields.append(('m', author['email'])) if first_author: record.add_field('100__', value='', subfields=subfields) first_author = False else: record.add_field('700__', value='', subfields=subfields)
class JATSUtilsTests(unittest.TestCase): def setUp(self): self.jats_parser = JATSParser() def test_get_orcid(self): """ See http://jats.nlm.nih.gov/archiving/tag-library/1.1d1/n-dsw0.html for orcid in contrib tag """ xml = parseString( """<contrib> <contrib-id contrib-id-type="orcid">http://orcid.org/1792-3336-9172-961X</contrib-id> <name><surname>Fauller</surname> <given-names>Betty Lou</given-names> </name> <degrees>BA, MA</degrees> </contrib>""") self.assertEqual(self.jats_parser._get_orcid(xml), '1792-3336-9172-961X')
def setUp(self): self.jats_parser = JATSParser()