def test_author_conversion_with_no_v(self): """Test author conversion with the special case for $v.""" from harvestingkit.bibrecord import record_get_field_values from harvestingkit.inspire_cds_package.from_inspire import Inspire2CDS xml = """<collection> <record> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Mokhov, N.V.</subfield> <subfield code="v">Fermilab</subfield> </datafield> </record></collection> """ for record in Inspire2CDS.from_source(xml): converted_record = record.get_record() self.assertEqual( record_get_field_values(converted_record, tag="100", code="v"), [] ) self.assertEqual( record_get_field_values(converted_record, tag="100", code="a"), ["Mokhov, N V"] )
def test_link_conversion_with_no_w(self): """Test link conversion with the special case for $w.""" from harvestingkit.bibrecord import record_get_field_values from harvestingkit.inspire_cds_package.from_inspire import Inspire2CDS xml = """<collection> <record> <datafield tag="856" ind1="4" ind2=" "> <subfield code="u">http://www.adsabs.harvard.edu/abs/1990NuPhS..13..535R</subfield> <subfield code="w">1990NuPhS..13..535R</subfield> <subfield code="y">ADSABS</subfield> </datafield> </record></collection> """ for record in Inspire2CDS.from_source(xml): converted_record = record.get_record() self.assertEqual( record_get_field_values(converted_record, tag="856", ind1="4", code="w"), [] ) self.assertEqual( record_get_field_values(converted_record, tag="856", ind1="4", code="u"), ["http://www.adsabs.harvard.edu/abs/1990NuPhS..13..535R"] )
def test_experiments_conversion_with_no_a(self): """Test experiments conversion with no $$a.""" from harvestingkit.bibrecord import record_get_field_values from harvestingkit.inspire_cds_package.from_inspire import Inspire2CDS xml = """<collection> <record> <datafield tag="693" ind1=" " ind2=" "> <subfield code="e">CERN-CAST</subfield> </datafield> </record></collection> """ for record in Inspire2CDS.from_source(xml): converted_record = record.get_record() self.assertEqual( record_get_field_values(converted_record, tag="693", code="a"), [] ) self.assertEqual( record_get_field_values(converted_record, tag="693", code="e"), ["CAST"] )
def test_pubnote_conversion_with_pos_special_case(self): """Test pubnote conversion with the PoS special case for $p and $v.""" from harvestingkit.bibrecord import record_get_field_values from harvestingkit.inspire_cds_package.from_inspire import Inspire2CDS xml = """<collection> <record> <datafield tag="773" ind1=" " ind2=" "> <subfield code="c">018</subfield> <subfield code="p">PoS</subfield> <subfield code="v">QFTHEP2011</subfield> <subfield code="w">C11-09-24</subfield> <subfield code="y">2013</subfield> </datafield> </record></collection> """ for record in Inspire2CDS.from_source(xml): converted_record = record.get_record() self.assertEqual( record_get_field_values(converted_record, tag="773", code="p"), ["PoS"] ) self.assertEqual( record_get_field_values(converted_record, tag="773", code="v"), ["QFTHEP2011"] )
def test_ignore_999(self): """Test ignore tag 999.""" from harvestingkit.bibrecord import record_get_field_values from harvestingkit.inspire_cds_package.from_inspire import Inspire2CDS xml = """ <collection> <record> <datafield tag="999" ind1="C" ind2="5"> <subfield code="h">I. Krajcar Bronić, B. Grosswendt Nuclear</subfield> <subfield code="m">Instruments and Methods in Physics Research Section B 142 , p. 219 Article | PDF (618 K) | View Record in Scopus | Citing articles (12)</subfield> <subfield code="o">16</subfield> <subfield code="y">1998</subfield> </datafield> <datafield tag="999" ind1=" " ind2=" "> <subfield code="o">16</subfield> <subfield code="y">1998</subfield> </datafield> <datafield tag="999" ind1=" " ind2="3"> <subfield code="o">16</subfield> <subfield code="y">1998</subfield> </datafield> </record> </collection> """ for record in Inspire2CDS.from_source(xml): converted_record = record.get_record() tag_999 = record_get_field_values(converted_record, tag="999") self.assertEqual(tag_999, [])
def test_experiments_conversion_with_leading_zero(self): """Test experiments conversion with removing leading zero.""" from harvestingkit.bibrecord import record_get_field_values from harvestingkit.inspire_cds_package.from_inspire import Inspire2CDS xml = """<collection> <record> <datafield tag="693" ind1=" " ind2=" "> <subfield code="e">CERN-RD-053</subfield> </datafield> </record></collection> """ for record in Inspire2CDS.from_source(xml): converted_record = record.get_record() self.assertEqual( record_get_field_values(converted_record, tag="693", code="a"), ["Not applicable"] ) self.assertEqual( record_get_field_values(converted_record, tag="693", code="e"), ["RD53"] )
def test_add_collaboration_to_710g(self): """Append ' Collaboration' string to the collaboration name.""" from harvestingkit.bibrecord import record_get_field_values from harvestingkit.inspire_cds_package.from_inspire import Inspire2CDS xml = """ <collection> <record> <datafield tag="710" ind1=" " ind2=" "> <subfield code="g">ATLAS</subfield> </datafield> <datafield tag="710" ind1=" " ind2=" "> <subfield code="g">CMS Collaboration</subfield> </datafield> <datafield tag="800" ind1=" " ind2=" "> <subfield code="g">another field</subfield> </datafield> </record> </collection> """ for record in Inspire2CDS.from_source(xml): converted_record = record.get_record() tags_710g = record_get_field_values(converted_record, tag="710", code="g") self.assertEqual(tags_710g, ["ATLAS Collaboration", "CMS Collaboration"])
def test_article_to_publication(self): """Test 542__e -> 542__3 Article is converted to Publication.""" from harvestingkit.bibrecord import record_get_field_values from harvestingkit.inspire_cds_package.from_inspire import Inspire2CDS xml = """ <collection> <record> <datafield tag="542" ind1=" " ind2=" "> <subfield code="a">Another Field</subfield> <subfield code="e">Article</subfield> </datafield> </record> <record> <datafield tag="542" ind1=" " ind2=" "> <subfield code="e">AnotherValue</subfield> </datafield> </record> </collection> """ rec1, rec2 = Inspire2CDS.from_source(xml) converted_rec1 = rec1.get_record() rec1_3_value = record_get_field_values(converted_rec1, tag="542", code="3") self.assertEqual(rec1_3_value, ["publication"]) rec1_a_value = record_get_field_values(converted_rec1, tag="542", code="a") self.assertEqual(rec1_a_value, ["Another Field"]) converted_rec2 = rec2.get_record() rec2_value = record_get_field_values(converted_rec2, tag="542", code="3") self.assertEqual(rec2_value, ["AnotherValue"])
def test_thesis_conversion_supervisors(self): """Test link conversion with the special case for $w.""" from harvestingkit.bibrecord import record_get_field_values from harvestingkit.inspire_cds_package.from_inspire import Inspire2CDS xml = """<collection> <record> <datafield tag="701" ind1=" " ind2=" "> <subfield code="a">Besançon, Marc</subfield> </datafield> <datafield tag="701" ind1=" " ind2=" "> <subfield code="a">Ferri, Frederico</subfield> </datafield> <datafield tag="980" ind1=" " ind2=" "> <subfield code="a">THESIS</subfield> </datafield> </record></collection> """ for record in Inspire2CDS.from_source(xml): converted_record = record.get_record() self.assertEqual( record_get_field_values(converted_record, tag="980", code="a"), ["THESIS"]) self.assertEqual( record_get_field_values(converted_record, tag="700", code="a"), ["Besançon, Marc", "Ferri, Frederico"]) self.assertEqual( record_get_field_values(converted_record, tag="700", code="e"), ["dir.", "dir."])
def test_thesis_conversion(self): """Test link conversion with the special case for $w.""" from harvestingkit.bibrecord import record_get_field_values from harvestingkit.inspire_cds_package.from_inspire import Inspire2CDS xml = """<collection> <record> <datafield tag="502" ind1=" " ind2=" "> <subfield code="b">Diploma</subfield> <subfield code="c">Freiburg U.</subfield> <subfield code="d">2005</subfield> </datafield> <datafield tag="980" ind1=" " ind2=" "> <subfield code="a">THESIS</subfield> </datafield> </record></collection> """ for record in Inspire2CDS.from_source(xml): converted_record = record.get_record() self.assertEqual( record_get_field_values(converted_record, tag="980", code="a"), ["THESIS"]) self.assertEqual( record_get_field_values(converted_record, tag="502", code="a"), ["Diploma"]) self.assertEqual( record_get_field_values(converted_record, tag="502", code="b"), ["Freiburg U."]) self.assertEqual( record_get_field_values(converted_record, tag="502", code="c"), ["2005"])
def test_article_773(self): """Test if tag 773 has c,p,v,y then doc_type is ARTICLE.""" from harvestingkit.bibrecord import record_get_field_values from harvestingkit.inspire_cds_package.from_inspire import Inspire2CDS xml = """ <collection> <record> <datafield tag="773" ind1=" " ind2=" "> <subfield code="x">Phys. Rev. D 91 (2015) 021302 (Rapid Communication)</subfield> <subfield code="v">D91</subfield> <subfield code="p">Phys.Rev.</subfield> <subfield code="y">2015</subfield> <subfield code="c">021302</subfield> </datafield> <datafield tag="980" ind1=" " ind2=" "> <subfield code="a">PUBLISHED</subfield> </datafield> </record> <record> <datafield tag="773" ind1=" " ind2=" "> <subfield code="x">Phys. Rev. D 91 (2015) 021302 (Rapid Communication)</subfield> <subfield code="v">D91</subfield> <subfield code="p">Phys.Rev.</subfield> <subfield code="y">2015</subfield> <subfield code="c">021302</subfield> </datafield> <datafield tag="980" ind1=" " ind2=" "> <subfield code="a">CONFERENCEPAPER</subfield> </datafield> </record> <record> <datafield tag="773" ind1=" " ind2=" "> <subfield code="w">C10-09-06.10</subfield> </datafield> </record> </collection> """ records = list(Inspire2CDS.from_source(xml)) rec1 = records[0] converted_record = rec1.get_record() tag_980 = record_get_field_values(converted_record, tag="980", code="a") self.assertEqual(tag_980, ["ARTICLE"]) rec2 = records[1] converted_record = rec2.get_record() tag_980 = record_get_field_values(converted_record, tag="980", code="a") self.assertEqual(sorted(tag_980), sorted(["ConferencePaper", "ARTICLE"])) rec3 = records[2] converted_record = rec3.get_record() tag_980 = record_get_field_values(converted_record, tag="980", code="a") self.assertEqual(tag_980, ["PREPRINT"])
def test_article_to_publication(self): """Test 542__e -> 542__3 Article is converted to Publication.""" from harvestingkit.bibrecord import field_get_subfield_instances,\ record_get_field_values, record_get_field_instances from harvestingkit.inspire_cds_package.from_inspire import Inspire2CDS xml = """ <collection> <record> <datafield tag="542" ind1=" " ind2=" "> <subfield code="g">Another Field</subfield> <subfield code="e">Not Article</subfield> <subfield code="a">Another Field</subfield> </datafield> <datafield tag="542" ind1=" " ind2=" "> <subfield code="a">Another Field</subfield> <subfield code="e">Article</subfield> </datafield> <datafield tag="542" ind1=" " ind2=" "> <subfield code="a">Only `a` subfield</subfield> </datafield> </record> <record> <datafield tag="542" ind1=" " ind2=" "> <subfield code="e">AnotherValue</subfield> </datafield> </record> </collection> """ rec1, rec2 = Inspire2CDS.from_source(xml) converted_rec1 = rec1.get_record() rec1_542_fields = record_get_field_instances(converted_rec1, '542') rec1_542_0_subs = field_get_subfield_instances(rec1_542_fields[0]) self.assertEqual( rec1_542_0_subs, [('g', 'Another Field'), ('3', 'Not Article'), ('a', 'Another Field')]) rec1_542_1_subs = field_get_subfield_instances(rec1_542_fields[1]) self.assertEqual( rec1_542_1_subs, [('a', 'Another Field'), ('3', 'publication')]) rec1_542_2_subs = field_get_subfield_instances(rec1_542_fields[2]) self.assertEqual( rec1_542_2_subs, [('a', 'Only `a` subfield')]) converted_rec2 = rec2.get_record() rec2_542_fields = record_get_field_instances(converted_rec2, '542') rec2_542_0_subs = field_get_subfield_instances(rec2_542_fields[0]) self.assertEqual( rec2_542_0_subs, [('3', 'AnotherValue')])
def test_conference_info_date_parsing(self): """Test conversion with the special cases for dates.""" from harvestingkit.bibrecord import record_get_field_values from harvestingkit.inspire_cds_package.from_inspire import Inspire2CDS xml = """<collection> <record> <datafield tag="111" ind1=" " ind2=" "> <subfield code="x">2016-03-21</subfield> <subfield code="y">2016-03-24</subfield> <subfield code="c">Somewhere, Someplace</subfield> </datafield> <datafield tag="111" ind1=" " ind2=" "> <subfield code="x">2016-03-30</subfield> <subfield code="y">2016-04-03</subfield> <subfield code="c">Somewhere</subfield> </datafield> <datafield tag="111" ind1=" " ind2=" "> <subfield code="x">2016-05-21</subfield> <subfield code="c">Someplace</subfield> </datafield> <datafield tag="111" ind1=" " ind2=" "> <subfield code="y">2016-03-21</subfield> </datafield> <datafield tag="111" ind1=" " ind2=" "> <subfield code="x">2016-03-24</subfield> </datafield> <datafield tag="980" ind1=" " ind2=" "> <subfield code="a">CONFERENCES</subfield> </datafield> </record> </collection> """ for record in Inspire2CDS.from_source(xml): converted_record = record.get_record() fields_111_d = record_get_field_values(converted_record, tag="111", code="d") assert len(fields_111_d) == 2 assert '21-24 Mar 2016' in fields_111_d assert '30 Mar-03 Apr 2016' in fields_111_d fields_111_g = record_get_field_values(converted_record, tag="111", code="g") assert len(fields_111_g) == 3 assert 'somewhere20160330' in fields_111_g assert 'somewhere20160321' in fields_111_g assert 'someplace20160521' in fields_111_g
def test_thesis_conversion(self): """Test link conversion with the special case for $w.""" from harvestingkit.bibrecord import record_get_field_values from harvestingkit.inspire_cds_package.from_inspire import Inspire2CDS xml = """<collection> <record> <datafield tag="502" ind1=" " ind2=" "> <subfield code="b">Diploma</subfield> <subfield code="c">Freiburg U.</subfield> <subfield code="d">2005</subfield> </datafield> <datafield tag="980" ind1=" " ind2=" "> <subfield code="a">THESIS</subfield> </datafield> </record></collection> """ for record in Inspire2CDS.from_source(xml): converted_record = record.get_record() self.assertEqual( record_get_field_values(converted_record, tag="980", code="a"), ["THESIS"] ) self.assertEqual( record_get_field_values(converted_record, tag="502", code="a"), ["Diploma"] ) self.assertEqual( record_get_field_values(converted_record, tag="502", code="b"), ["Freiburg U."] ) self.assertEqual( record_get_field_values(converted_record, tag="502", code="c"), ["2005"] )
def test_thesis_conversion_supervisors(self): """Test link conversion with the special case for $w.""" from harvestingkit.bibrecord import record_get_field_values from harvestingkit.inspire_cds_package.from_inspire import Inspire2CDS xml = """<collection> <record> <datafield tag="701" ind1=" " ind2=" "> <subfield code="a">Besançon, Marc</subfield> </datafield> <datafield tag="701" ind1=" " ind2=" "> <subfield code="a">Ferri, Frederico</subfield> </datafield> <datafield tag="980" ind1=" " ind2=" "> <subfield code="a">THESIS</subfield> </datafield> </record></collection> """ for record in Inspire2CDS.from_source(xml): converted_record = record.get_record() self.assertEqual( record_get_field_values(converted_record, tag="980", code="a"), ["THESIS"] ) self.assertEqual( record_get_field_values(converted_record, tag="700", code="a"), ["Besançon, Marc", "Ferri, Frederico"] ) self.assertEqual( record_get_field_values(converted_record, tag="700", code="e"), ["dir.", "dir."] )