def test_experiments_from_693__e__0_and_e(): schema = load_schema('jobs') subschema = schema['properties']['experiments'] snippet = ('<record>' ' <datafield tag="693" ind1=" " ind2=" ">' ' <subfield code="e">CERN-LHC-ATLAS</subfield>' ' <subfield code="0">1108541</subfield>' ' </datafield>' ' <datafield tag="693" ind1=" " ind2=" ">' ' <subfield code="e">IHEP-CEPC</subfield>' ' </datafield>' '</record>') # record/1393583/export/xme expected = [{ 'curated_relation': True, 'name': 'CERN-LHC-ATLAS', 'record': { '$ref': 'http://localhost:5000/api/experiments/1108541', }, }, { 'curated_relation': False, 'name': 'IHEP-CEPC' }] result = jobs.do(create_record(snippet)) assert validate(result['experiments'], subschema) is None assert expected == result['experiments']
def test_contact_details_from_multiple_marcxml_270(): schema = load_schema('jobs') subschema = schema['properties']['contact_details'] snippet = ('<record> ' ' <datafield tag="270" ind1=" " ind2=" ">' ' <subfield code="m">[email protected]</subfield>' ' <subfield code="p">Manfred Lindner</subfield>' ' </datafield>' ' <datafield tag="270" ind1=" " ind2=" ">' ' <subfield code="p">Wynton Marsalis</subfield>' ' </datafield>' '</record>') expected = [ { 'name': 'Manfred Lindner', 'email': '*****@*****.**', }, { 'name': 'Wynton Marsalis', }, ] result = jobs.do(create_record(snippet)) assert validate(result['contact_details'], subschema) is None assert expected == result['contact_details']
def test_institutions_from_110__double_a_z(): schema = load_schema('jobs') subschema = schema['properties']['institutions'] snippet = ('<datafield tag="110" ind1=" " ind2=" ">' ' <subfield code="a">Indiana U.</subfield>' ' <subfield code="a">NIST, Wash., D.C.</subfield>' ' <subfield code="z">902874</subfield>' ' <subfield code="z">903056</subfield>' '</datafield>') # record/1328021/export/xme expected = [ { 'curated_relation': True, 'name': 'Indiana U.', 'record': { '$ref': 'http://localhost:5000/api/institutions/902874', }, }, { 'curated_relation': True, 'name': 'NIST, Wash., D.C.', 'record': { '$ref': 'http://localhost:5000/api/institutions/903056', }, }, ] result = jobs.do(create_record(snippet)) assert validate(result['institutions'], subschema) is None assert expected == result['institutions']
def test_institutions_from_double_110__a(): schema = load_schema('jobs') subschema = schema['properties']['institutions'] snippet = ('<record>' ' <datafield tag="110" ind1=" " ind2=" ">' ' <subfield code="a">Coll. William and Mary</subfield>' ' </datafield>' ' <datafield tag="110" ind1=" " ind2=" ">' ' <subfield code="a">Jefferson Lab</subfield>' ' </datafield>' '</record>') # record/1427342 expected = [ { 'curated_relation': False, 'name': 'Coll. William and Mary', }, { 'curated_relation': False, 'name': 'Jefferson Lab', }, ] result = jobs.do(create_record(snippet)) assert validate(result['institutions'], subschema) is None assert expected == result['institutions']
def test_date_closed_from_046__i_and_046__l_an_url(): schema = load_schema('jobs') subschema_deadline_date = schema['properties']['deadline_date'] subschema_urls = schema['properties']['urls'] snippet = ( '<record>' ' <datafield tag="046" ind1=" " ind2=" ">' ' <subfield code="i">2012-06-01</subfield>' ' </datafield>' ' <datafield tag="046" ind1=" " ind2=" ">' ' <subfield code="l">http://www.pma.caltech.edu/physics-search</subfield>' ' </datafield>' '</record>') # record/963314 expected_deadline_date = '2012-06-01' expected_urls = [ { 'value': 'http://www.pma.caltech.edu/physics-search' }, ] result = jobs.do(create_record(snippet)) assert validate(result['deadline_date'], subschema_deadline_date) is None assert expected_deadline_date == result['deadline_date'] assert validate(result['urls'], subschema_urls) is None assert expected_urls == result['urls']
def test_contact_details_and_reference_letters_from_270__m_o_p_url(): schema = load_schema('jobs') subschema_contact_details = schema['properties']['contact_details'] subschema_reference_letters = schema['properties']['reference_letters'] snippet = ( '<datafield tag="270" ind1=" " ind2=" ">' ' <subfield code="m">[email protected]</subfield>' ' <subfield code="o">https://academicjobsonline.org/ajo/jobs/12729</subfield>' ' <subfield code="p">Kenichi Hatakeyama</subfield>' '</datafield>') # record/1711401 expected = { 'contact_details': [ { 'name': 'Hatakeyama, Kenichi', 'email': '*****@*****.**', }, ], 'reference_letters': { 'urls': [{ 'value': 'https://academicjobsonline.org/ajo/jobs/12729' }], }, } result = jobs.do(create_record(snippet)) assert validate(result['contact_details'], subschema_contact_details) is None assert validate(result['reference_letters'], subschema_reference_letters) is None assert expected['contact_details'] == result['contact_details'] assert expected['reference_letters'] == result['reference_letters']
def test_contact_details_and_reference_letters_from_270__m_o_p(): schema = load_schema('jobs') subschema_contact_details = schema['properties']['contact_details'] subschema_reference_letters = schema['properties']['reference_letters'] snippet = ('<datafield tag="270" ind1=" " ind2=" ">' ' <subfield code="m">[email protected]</subfield>' ' <subfield code="o">[email protected]</subfield>' ' <subfield code="p">Jess McIver</subfield>' '</datafield>') # record/1736228 expected = { 'contact_details': [ { 'name': 'McIver, Jess', 'email': '*****@*****.**', }, ], 'reference_letters': { 'emails': ['*****@*****.**'], }, } result = jobs.do(create_record(snippet)) assert validate(result['contact_details'], subschema_contact_details) is None assert validate(result['reference_letters'], subschema_reference_letters) is None assert expected['contact_details'] == result['contact_details'] assert expected['reference_letters'] == result['reference_letters']
def test_closed_date_from_046__l_fake_date(): snippet = ('<datafield tag="046" ind1=" " ind2=" ">' ' <subfield code="l">0000</subfield>' '</datafield>') # record/958863 result = jobs.do(create_record(snippet)) assert 'closed_date' not in result
def test_deadline_date_from_046__i__fake_date(): snippet = ('<datafield tag="046" ind1=" " ind2=" ">' ' <subfield code="i">0000</subfield>' '</datafield>') # record/959114 result = jobs.do(create_record(snippet)) assert 'deadline_date' not in result
def test_ranks_from_marcxml_656_with_single_a(): schema = load_schema('jobs') subschema = schema['properties']['ranks'] snippet = ('<datafield tag="656" ind1=" " ind2=" ">' ' <subfield code="a">Senior</subfield>' '</datafield>') result = jobs.do(create_record(snippet)) assert validate(result['ranks'], subschema) is None assert result['ranks'] == ['SENIOR']
def test_status_from_marcxml_980_JOB(): schema = load_schema('jobs') subschema = schema['properties']['status'] snippet = ('<datafield tag="980" ind1=" " ind2=" ">' ' <subfield code="a">JOB</subfield>' '</datafield>') # /record/1736229 expected = 'open' result = jobs.do(create_record(snippet)) assert validate(result['status'], subschema) is None assert expected == result['status']
def test_regions_from_043__a_corrects_misspellings(): schema = load_schema('jobs') subschema = schema['properties']['regions'] snippet = ('<datafield tag="043" ind1=" " ind2=" ">' ' <subfield code="a">United States</subfield>' '</datafield>') expected = ['North America'] result = jobs.do(create_record(snippet)) assert validate(result['regions'], subschema) is None assert expected == result['regions']
def test_position_from_245__a(): schema = load_schema('jobs') subschema = schema['properties']['position'] snippet = ('<datafield tag="245" ind1=" " ind2=" ">' ' <subfield code="a">Neutrino Physics</subfield>' '</datafield>') # record/1467312 expected = 'Neutrino Physics' result = jobs.do(create_record(snippet)) assert validate(result['position'], subschema) is None assert expected == result['position']
def test_closed_date_from_046__l(): schema = load_schema('jobs') subschema = schema['properties']['closed_date'] snippet = ('<datafield tag="046" ind1=" " ind2=" ">' ' <subfield code="l">2008-02-11</subfield>' '</datafield>') # record/934304 expected = '2008-02-11' result = jobs.do(create_record(snippet)) assert validate(result['closed_date'], subschema) is None assert expected == result['closed_date']
def test_deadline_date_from_046__i(): schema = load_schema('jobs') subschema = schema['properties']['deadline_date'] snippet = ('<datafield tag="046" ind1=" " ind2=" ">' ' <subfield code="i">2015-12-15</subfield>' '</datafield>') # record/1310294 expected = '2015-12-15' result = jobs.do(create_record(snippet)) assert validate(result['deadline_date'], subschema) is None assert expected == result['deadline_date']
def test_regions_from_043__a_splits_on_commas(): schema = load_schema('jobs') subschema = schema['properties']['regions'] snippet = ('<datafield tag="043" ind1=" " ind2=" ">' ' <subfield code="a">Asia, North America</subfield>' '</datafield>') expected = ['Asia', 'North America'] result = jobs.do(create_record(snippet)) assert validate(result['regions'], subschema) is None assert expected == result['regions']
def test_deadline_date_from_046__i__fake_date(): schema = load_schema('jobs') subschema = schema['properties']['deadline_date'] snippet = ('<datafield tag="046" ind1=" " ind2=" ">' ' <subfield code="i">0000</subfield>' '</datafield>') # record/959114 result = jobs.do(create_record(snippet)) expected = '3000' assert validate(result['deadline_date'], subschema) is None assert expected == result['deadline_date']
def test_description_from_520__a(): schema = load_schema('jobs') subschema = schema['properties']['description'] snippet = ( '<datafield tag="520" ind1=" " ind2=" ">' ' <subfield code="a">(1) Conduct independent research in string theory related theoretical sciences;<br /> <br /> (2) Advising graduate students in their research;<br /> <br /> (3) A very small amount of teaching of undergraduate courses.&nbsp;</subfield>' '</datafield>') # record/1239755 expected = '(1) Conduct independent research in string theory related theoretical sciences;<br /> <br /> (2) Advising graduate students in their research;<br /> <br /> (3) A very small amount of teaching of undergraduate courses. ' result = jobs.do(create_record(snippet)) assert validate(result['description'], subschema) is None assert expected == result['description']
def test_description_from_520__a_sanitizes_html(): schema = load_schema('jobs') subschema = schema['properties']['description'] snippet = ( '<datafield tag="520" ind1=" " ind2=" ">' ' <subfield code="a"><!--?xml version="1.0" encoding="UTF-8"?--> <div> There is an opening for a software developer to contribute to CMS workflow management software development, including the evolution of the software in preparation for the HL LHC era. &nbsp;The qualifications expected of a successful candidate for this position are listed in the posting linked below. &nbsp;This position will be part of the Notre Dame Center for Research Computing scientific workflows and dynamic distributed applications team. This team collaborates with faculty and researchers within the Notre Dame community on challenges involving leveraging distributed computing resources to accelerate scientific discovery. The group focuses on topics like managing complex scientific workflows involving large amounts of data across one or more distributed systems, porting scientific workflows to new computational environments, such as some of the world&rsquo;s largest HPC facilities or commercial cloud resources, and developing tools to enable the creation of dynamic scientific applications that scale from laptop to cluster to cloud.</div> <div> <br /> Job posting (qualifications and link to apply):&nbsp;<a href="https://jobs.nd.edu/postings/15810">https://jobs.nd.edu/postings/15810</a><br /> <br /> Notre Dame scientific workflows and dynamic distributed applications team webpage: &nbsp;<a href="http://workflow-team.crc.nd.edu/">http://workflow-team.crc.nd.edu</a></div> <br /></subfield>' '</datafield>') # record/1239755 expected = 'There is an opening for a software developer to contribute to CMS workflow management software development, including the evolution of the software in preparation for the HL LHC era. The qualifications expected of a successful candidate for this position are listed in the posting linked below. This position will be part of the Notre Dame Center for Research Computing scientific workflows and dynamic distributed applications team. This team collaborates with faculty and researchers within the Notre Dame community on challenges involving leveraging distributed computing resources to accelerate scientific discovery. The group focuses on topics like managing complex scientific workflows involving large amounts of data across one or more distributed systems, porting scientific workflows to new computational environments, such as some of the world’s largest HPC facilities or commercial cloud resources, and developing tools to enable the creation of dynamic scientific applications that scale from laptop to cluster to cloud. <br> Job posting (qualifications and link to apply): <a href="https://jobs.nd.edu/postings/15810">https://jobs.nd.edu/postings/15810</a><br> <br> Notre Dame scientific workflows and dynamic distributed applications team webpage: <a href="http://workflow-team.crc.nd.edu/">http://workflow-team.crc.nd.edu</a> <br>' result = jobs.do(create_record(snippet)) assert validate(result['description'], subschema) is None assert expected == result['description']
def test_arxiv_categories_from_65017__a(): schema = load_schema('jobs') subschema = schema['properties']['arxiv_categories'] snippet = ('<datafield tag="650" ind1="1" ind2="7">' ' <subfield code="a">hep-ex</subfield>' '</datafield>') # record/1736229 expected = [ 'hep-ex', ] result = jobs.do(create_record(snippet)) assert validate(result['arxiv_categories'], subschema) is None assert expected == result['arxiv_categories']
def test_arxiv_categories_from_65017__a_physics_acc_phys(): schema = load_schema('jobs') subschema = schema['properties']['arxiv_categories'] snippet = ('<record>' ' <datafield tag="650" ind1="1" ind2="7">' ' <subfield code="a">physics.acc-phys</subfield>' ' </datafield>' '</record>') # record/1731774 expected = [ 'physics.acc-ph', ] result = jobs.do(create_record(snippet)) assert validate(result['arxiv_categories'], subschema) is None assert expected == result['arxiv_categories']
def test_experiments_from_693__e(): schema = load_schema('jobs') subschema = schema['properties']['experiments'] snippet = ('<datafield tag="693" ind1=" " ind2=" ">' ' <subfield code="e">ALIGO</subfield>' '</datafield>') # record/1375852 expected = [ { 'curated_relation': False, 'name': 'ALIGO', }, ] result = jobs.do(create_record(snippet)) assert validate(result['experiments'], subschema) is None assert expected == result['experiments']
def test_contact_details_from_marcxml_270_single_p_double_m(): schema = load_schema('jobs') subschema = schema['properties']['contact_details'] snippet = ('<datafield tag="270" ind1=" " ind2=" ">' ' <subfield code="m">[email protected]</subfield>' ' <subfield code="m">[email protected]</subfield>' ' <subfield code="p">Manfred Lindner</subfield>' '</datafield>') expected = [ { 'name': 'Manfred Lindner' }, ] result = jobs.do(create_record(snippet)) assert validate(result['contact_details'], subschema) is None assert expected == result['contact_details']
def test_experiments_from_triple_693__e__0(): schema = load_schema('jobs') subschema = schema['properties']['experiments'] snippet = ('<record>' ' <datafield tag="693" ind1=" " ind2=" ">' ' <subfield code="e">CERN-NA-049</subfield>' ' <subfield code="0">1110308</subfield>' ' </datafield>' ' <datafield tag="693" ind1=" " ind2=" ">' ' <subfield code="e">CERN-NA-061</subfield>' ' <subfield code="0">1108234</subfield>' ' </datafield>' ' <datafield tag="693" ind1=" " ind2=" ">' ' <subfield code="e">CERN-LHC-ALICE</subfield>' ' <subfield code="0">1110642</subfield>' ' </datafield>' '</record>') # record/1469159 expected = [{ 'curated_relation': True, 'name': 'CERN-NA-049', 'record': { '$ref': 'http://localhost:5000/api/experiments/1110308', }, }, { 'curated_relation': True, 'name': 'CERN-NA-061', 'record': { '$ref': 'http://localhost:5000/api/experiments/1108234', }, }, { 'curated_relation': True, 'name': 'CERN-LHC-ALICE', 'record': { '$ref': 'http://localhost:5000/api/experiments/1110642', }, }] result = jobs.do(create_record(snippet)) assert validate(result['experiments'], subschema) is None assert expected == result['experiments']
def test_ranks_from_marcxml_double_656(): schema = load_schema('jobs') subschema = schema['properties']['ranks'] snippet = ('<record>' ' <datafield tag="656" ind1=" " ind2=" ">' ' <subfield code="a">Senior</subfield>' ' </datafield>' ' <datafield tag="656" ind1=" " ind2=" ">' ' <subfield code="a">Junior</subfield>' ' </datafield>' '</record>') expected = [ 'SENIOR', 'JUNIOR', ] result = jobs.do(create_record(snippet)) assert validate(result['ranks'], subschema) is None assert expected == result['ranks']
def test_position_from_245__a_with_external_job_identifier(): schema = load_schema('jobs') subschema_position = schema['properties']['position'] subschema_external_job_identifier = schema['properties'][ 'external_job_identifier'] snippet = ( '<datafield tag="245" ind1=" " ind2=" ">' ' <subfield code="a">Director of Accelerator Operations (12010)</subfield>' '</datafield>') # record/1467312 expected_position = 'Director of Accelerator Operations' expected_external_job_identifier = '12010' result = jobs.do(create_record(snippet)) assert validate(result['position'], subschema_position) is None assert validate(result['external_job_identifier'], subschema_external_job_identifier) is None assert expected_position == result['position'] assert expected_external_job_identifier == result[ 'external_job_identifier']
def test_accelerator_experiments_from_693__e__0(): schema = load_schema('jobs') subschema = schema['properties']['accelerator_experiments'] snippet = ('<datafield tag="693" ind1=" " ind2=" ">' ' <subfield code="e">CERN-LHC-ATLAS</subfield>' ' <subfield code="0">1108541</subfield>' '</datafield>') # record/1332138 expected = [ { 'curated_relation': True, 'legacy_name': 'CERN-LHC-ATLAS', 'record': { '$ref': 'http://localhost:5000/api/experiments/1108541', }, }, ] result = jobs.do(create_record(snippet)) assert validate(result['accelerator_experiments'], subschema) is None assert expected == result['accelerator_experiments']
def test_contact_details_and_reference_letters_from_270__m_o_p_repeated(): schema = load_schema('jobs') subschema_contact_details = schema['properties']['contact_details'] subschema_reference_letters = schema['properties']['reference_letters'] snippet = ('<datafield tag="270" ind1=" " ind2=" ">' ' <subfield code="m">[email protected]</subfield>' ' <subfield code="m">[email protected]</subfield>' ' <subfield code="o">[email protected]</subfield>' ' <subfield code="o">[email protected]</subfield>' ' <subfield code="p">Mikhail Zubkov</subfield>' ' <subfield code="p">Meir Lewkowicz</subfield>' '</datafield>') # record/1717472 expected = { 'contact_details': [ { 'name': 'Zubkov, Mikhail', 'email': '*****@*****.**', }, { 'name': 'Lewkowicz, Meir', 'email': '*****@*****.**', }, ], 'reference_letters': { 'emails': ['*****@*****.**', '*****@*****.**'], }, } result = jobs.do(create_record(snippet)) assert validate(result['contact_details'], subschema_contact_details) is None assert validate(result['reference_letters'], subschema_reference_letters) is None assert expected['contact_details'] == result['contact_details'] assert expected['reference_letters'] == result['reference_letters']
def test_date_closed_from_046__i_and_046__l_an_email(): schema = load_schema('jobs') subschema_deadline_date = schema['properties']['deadline_date'] subschema_reference_email = schema['properties']['reference_email'] snippet = ('<record>' ' <datafield tag="046" ind1=" " ind2=" ">' ' <subfield code="l">[email protected]</subfield>' ' </datafield>' ' <datafield tag="046" ind1=" " ind2=" ">' ' <subfield code="i">8888</subfield>' ' </datafield>' '</record>') # record/1089529 expected_deadline_date = '8888' expected_reference_email = ['*****@*****.**'] result = jobs.do(create_record(snippet)) assert validate(result['deadline_date'], subschema_deadline_date) is None assert expected_deadline_date == result['deadline_date'] assert validate(result['reference_email'], subschema_reference_email) is None assert expected_reference_email == result['reference_email']
def test_deleted_and_status_from_marcxml_980_a_c(): schema = load_schema('jobs') subschema_status = schema['properties']['status'] subschema_deleted = schema['properties']['deleted'] snippet = ('<record>' ' <datafield tag="980" ind1=" " ind2=" ">' ' <subfield code="a">JOB</subfield>' ' </datafield>' ' <datafield tag="980" ind1=" " ind2=" ">' ' <subfield code="c">DELETED</subfield>' ' </datafield>' '</record>') # /record/1253987 expected = { 'deleted': True, 'status': 'open', } result = jobs.do(create_record(snippet)) assert validate(result['status'], subschema_status) is None assert validate(result['deleted'], subschema_deleted) is None assert expected['status'] == result['status'] assert expected['deleted'] == result['deleted']