def test_arxiv_to_classic(self):
     testfiles = glob.glob(
         os.path.join(os.path.dirname(__file__), 'data/arxiv.test/oai*'))
     shouldbe = [
         f.replace('/oai', '/tagged/oai') + '.tagged' for f in testfiles
     ]
     for f, b in zip(testfiles, shouldbe):
         # Python 3 orders the properties dictionary differently
         if sys.version_info > (3, ) and os.path.exists(
                 b.replace('/tagged/oai', '/tagged/python3/oai')):
             b = b.replace('/tagged/oai', '/tagged/python3/oai')
         if sys.version_info > (3, ):
             open_mode = 'rb'
         else:
             open_mode = 'rU'
         with open(f, open_mode) as fp:
             serializer = classic.Tagged()
             outputfp = StringIO()
             parser = arxiv.ArxivParser()
             document = parser.parse(fp)
             serializer.write(document, outputfp)
             testoutput = outputfp.getvalue()
             outputfp.close()
             if sys.version_info > (3, ):
                 read_mode = 'r'
             else:
                 read_mode = 'rU'
             with open(b, read_mode) as bp:
                 self.assertEqual(testoutput, bp.read())
Ejemplo n.º 2
0
class TestATel(unittest.TestCase):
    import pytest

    def setUp(self):
        "Mock atel.ATelParser.urllib.urlopen"
        self.patcher = patch('urllib2.urlopen')
        self.urlopen_mock = self.patcher.start()

    def test_output(self):
        parser = atel.ATelParser()
        mock_infile = os.path.join(os.path.dirname(__file__), "data/stubdata/input/ATel_rss.xml")
        mock_data = open(mock_infile).read()
        self.urlopen_mock.return_value = MockResponse(mock_data)
        joss_url = 'http://www.astronomerstelegram.org/?adsbiblio'
        test_data = parser.parse(joss_url, data_tag='item')
        test_outfile = "test_atel.tag"
        standard_outfile = os.path.join(os.path.dirname(__file__), "data/stubdata/serialized/atel.tag")
        try:
            os.remove(test_outfile)
        except Exception, err:
            pass
        for d in test_data:
            serializer = classic.Tagged()
            outputfp = open(test_outfile, 'a')
            serializer.write(d, outputfp)
            outputfp.close()
        result = filecmp.cmp(test_outfile, standard_outfile)
        self.assertEqual(result, True)
        os.remove(test_outfile)
Ejemplo n.º 3
0
 def test_output(self):
     parser = hstprop.HSTParser()
     mock_infile = os.path.join(os.path.dirname(__file__),
                                "data/stubdata/input/hstprop.json")
     mock_data = json.loads(open(mock_infile).read())
     self.get_batch_mock.return_value = mock_data
     api_url = 'https://proper.stsci.edu/proper/adsProposalSearch/query_test'
     token = 'foo'
     test_data = parser.parse(api_url,
                              api_key=token,
                              fromDate='2019-01-01',
                              maxRecords=1,
                              test=True)
     test_outfile = "test_hst.tag"
     standard_outfile = os.path.join(
         os.path.dirname(__file__), "data/stubdata/serialized/hstprop.tag")
     try:
         os.remove(test_outfile)
     except Exception as err:
         pass
     for d in test_data:
         serializer = classic.Tagged()
         outputfp = open(test_outfile, 'a')
         serializer.write(d, outputfp)
         outputfp.close()
     result = filecmp.cmp(test_outfile, standard_outfile)
     self.assertEqual(result, True)
     os.remove(test_outfile)
Ejemplo n.º 4
0
class TestProcSci(unittest.TestCase):

    def setUp(self):
        "Mock procsci.PoSParser.urllib.urlopen"
        self.patcher = patch('urllib.urlopen')
        self.urlopen_mock = self.patcher.start()

    def test_output(self):
        parser = procsci.PoSParser()
        mock_infile = os.path.join(os.path.dirname(__file__), "data/stubdata/input/pos_sissa_it_299.html")
        mock_data = open(mock_infile, 'rU').read()
        self.urlopen_mock.return_value = MockResponse(mock_data)
        test_data = parser.parse("https://pos.sissa.it/299")
        test_outfile = "test_pos.tag"
        standard_outfile = os.path.join(os.path.dirname(__file__), "data/stubdata/serialized/procsci_299.tag")
        try:
            os.remove(test_outfile)
        except Exception, err:
            pass
        for d in test_data:
            serializer = classic.Tagged()
            outputfp = open(test_outfile, 'a')
            serializer.write(d, outputfp)
            outputfp.close()
        result = filecmp.cmp(test_outfile, standard_outfile)
        self.assertEqual(result, True)
        os.remove(test_outfile)
Ejemplo n.º 5
0
class TestJOSS(unittest.TestCase):
    import pytest

    def setUp(self):
        "Mock joss.JOSSParser.urllib.urlopen"
        self.patcher = patch('urllib2.urlopen')
        self.urlopen_mock = self.patcher.start()

    def test_output(self):
        parser = joss.JOSSParser()
        mock_infile = os.path.join(os.path.dirname(__file__), "data/stubdata/input/joss_atom.xml")
        mock_data = open(mock_infile).read()
        self.urlopen_mock.return_value = MockResponse(mock_data)
        joss_url = 'https://joss.theoj.org/papers/published.atom'
        test_data = parser.parse(joss_url, since='2019-07-10', page=1)
        test_outfile = "test_joss.tag"
        standard_outfile = os.path.join(os.path.dirname(__file__), "data/stubdata/serialized/joss.tag")
        try:
            os.remove(test_outfile)
        except Exception, err:
            pass
        for d in test_data:
            serializer = classic.Tagged()
            outputfp = open(test_outfile, 'a')
            serializer.write(d, outputfp)
            outputfp.close()
        result = filecmp.cmp(test_outfile, standard_outfile)
        self.assertEqual(result, True)
        os.remove(test_outfile)
    def test_pnas_parser(self):

        webdata_file = os.path.join(self.stubdata_dir, 'input',
                                    'pnas_117_36_21873.xml')
        with open(webdata_file, open_mode_u) as fw:
            webdata = fw.read()
        parser = pnas.PNASParser()
        output = parser.parse(webdata)

        serializer = classic.Tagged()
        test_outfile = os.path.join(self.stubdata_dir, 'serialized',
                                    'test_pnas.tag')
        if sys.version_info > (3, ):
            standard_outfile = os.path.join(self.stubdata_dir, 'serialized',
                                            'python3', 'pnas.tag')
        else:
            standard_outfile = os.path.join(self.stubdata_dir, 'serialized',
                                            'pnas.tag')
        try:
            os.remove(test_outfile)
        except Exception as err:
            pass
        with open(test_outfile, 'w') as fo:
            serializer.write(output, fo)

        result = filecmp.cmp(test_outfile, standard_outfile)
        self.assertEqual(result, True)
        os.remove(test_outfile)
Ejemplo n.º 7
0
    def test_pnas_parser(self):
        mock_infile = os.path.join(self.stubdata_dir, 'input', 'pnas_feedparser.resp')
        mock_html_file = os.path.join(self.stubdata_dir, 'input', 'pnas_resp.html')
        mock_data = open(mock_infile, open_mode_u).read()
        mock_html = open(mock_html_file, open_mode_u).read()
        self.requests_mock.return_value.text = MockResponse(mock_html)
        feed = json.loads(mock_data)
        for _item in feed['entries']:
            absURL = _item['link']
            parser = pnas.PNASParser()
            output = parser.parse(absURL)

        serializer = classic.Tagged()
        test_outfile = os.path.join(self.stubdata_dir, 'serialized', 'test_pnas.tag')
        if sys.version_info > (3,):
            standard_outfile = os.path.join(self.stubdata_dir, 'serialized', 'python3', 'pnas.tag')
        else:
            standard_outfile = os.path.join(self.stubdata_dir, 'serialized', 'pnas.tag')
        try:
            os.remove(test_outfile)
        except Exception as err:
            pass
        with open(test_outfile, 'w') as fo:
            serializer.write(output, fo)

        result = filecmp.cmp(test_outfile, standard_outfile)
        self.assertEqual(result, True)
        os.remove(test_outfile)
Ejemplo n.º 8
0
 def test_arxiv_to_classic(self):
     testfiles = glob.glob(os.path.join(os.path.dirname(__file__), 'data/arxiv.test/oai*'))
     shouldbe = [f.replace('/oai', '/tagged/oai') + '.tagged' for f in testfiles]
     for f, b in zip(testfiles, shouldbe):
         with open(f, 'rU') as fp:
             serializer = classic.Tagged()
             outputfp = cStringIO.StringIO()
             parser = arxiv.ArxivParser()
             document = parser.parse(fp)
             serializer.write(document, outputfp)
             testoutput = outputfp.getvalue()
             outputfp.close()
             with open(b, 'rU') as bp:
                 self.assertEqual(testoutput, bp.read())
Ejemplo n.º 9
0
 def test_proquest_parser(self):
     infilename = 'SAO_NASA_Sep_2020.UNX'
     parser = proquest.ProQuestParser(infilename)
     parsed = parser.parse()
     serializer = classic.Tagged()
     standard_outfile = os.path.join(self.outputdir, 'SAO_NASA_Sep_2020.UNX.new')
     test_outfile = os.path.join(self.outputdir, 'test_proquest.UNX.new')
     try:
         os.remove(test_outfile)
     except Exception as err:
         pass
     with open(test_outfile, 'w') as fo:
         for rec in parser.results:
             serializer.write(rec, fo)
     result = filecmp.cmp(test_outfile, standard_outfile)
     self.assertEqual(result, True)
     os.remove(test_outfile)
Ejemplo n.º 10
0
 def test_output(self):
     parser = procsci.PoSParser()
     mock_infile = "test_data/stubdata/input/pos_sissa_it_299.html"
     mock_data = open(mock_infile, 'rU').read()
     self.urlopen_mock.return_value = MockResponse(mock_data)
     test_data = parser.parse("https://pos.sissa.it/299")
     test_outfile = "test_pos.tag"
     standard_outfile = "test_data/stubdata/serialized/procsci_299.tag"
     try:
         os.remove(test_outfile)
     except:
         pass
     for d in test_data:
         serializer = classic.Tagged()
         outputfp = open(test_outfile, 'a')
         serializer.write(d, outputfp)
         outputfp.close()
     result = filecmp.cmp(test_outfile, standard_outfile)
     self.assertEqual(result, True)
     os.remove(test_outfile)
Ejemplo n.º 11
0
 def test_output(self):
     parser = procsci.PoSParser()
     mock_infile = os.path.join(os.path.dirname(__file__), "data/stubdata/input/pos_sissa_it_299.html")
     mock_data = open(mock_infile, open_mode_u).read()
     self.requests_mock.return_value.text = MockResponse(mock_data)
     test_data = parser.parse("https://pos.sissa.it/299_test")
     test_outfile = "test_pos.tag"
     standard_outfile = os.path.join(os.path.dirname(__file__), "data/stubdata/serialized/procsci_299.tag")
     try:
         os.remove(test_outfile)
     except Exception as err:
         pass
     for d in test_data:
         serializer = classic.Tagged()
         outputfp = open(test_outfile, 'a')
         serializer.write(d, outputfp)
         outputfp.close()
     result = filecmp.cmp(test_outfile, standard_outfile)
     self.assertEqual(result, True)
     os.remove(test_outfile)
 def test_proquest_parser(self):
     marc_filename = self.stubdata_dir + 'input/' + 'SAO_NASA_Sep_2020.UNX'
     oa_filename = marc_filename.replace('.UNX', '_OpenAccessTitles.csv')
     marcdata = open(marc_filename).read()
     oadata = open(oa_filename).read()
     parser = proquest.ProQuestParser(marcdata, oadata)
     parsed = parser.parse()
     serializer = classic.Tagged()
     standard_outfile = os.path.join(self.outputdir,
                                     'SAO_NASA_Sep_2020.UNX.new')
     test_outfile = os.path.join(self.outputdir, 'test_proquest.UNX.new')
     try:
         os.remove(test_outfile)
     except Exception as err:
         pass
     with open(test_outfile, 'w') as fo:
         for rec in parser.results:
             serializer.write(rec, fo)
     result = filecmp.cmp(test_outfile, standard_outfile)
     self.assertEqual(result, True)
     os.remove(test_outfile)
Ejemplo n.º 13
0
 def test_output(self):
     parser = atel.ATelParser()
     mock_infile = os.path.join(os.path.dirname(__file__),
                                "data/stubdata/input/ATel_rss.xml")
     mock_data = open(mock_infile).read()
     self.urlopen_mock.return_value = MockResponse(mock_data)
     atel_url = 'http://www.astronomerstelegram.org/?adsbiblio.test'
     test_data = parser.parse(atel_url, data_tag='item')
     test_outfile = "test_atel.tag"
     standard_outfile = os.path.join(os.path.dirname(__file__),
                                     "data/stubdata/serialized/atel.tag")
     try:
         os.remove(test_outfile)
     except Exception as err:
         pass
     for d in test_data:
         serializer = classic.Tagged()
         outputfp = open(test_outfile, 'a')
         serializer.write(d, outputfp)
         outputfp.close()
     result = filecmp.cmp(test_outfile, standard_outfile)
     self.assertEqual(result, True)
     os.remove(test_outfile)
Ejemplo n.º 14
0
 def test_output(self):
     parser = joss.JOSSParser()
     mock_infile = os.path.join(os.path.dirname(__file__), "data/stubdata/input/joss_atom.xml")
     mock_data = open(mock_infile).read()
     self.urlopen_mock.return_value = MockResponse(mock_data)
     joss_url = 'https://joss.theoj.org/papers/published.atom.test'
     test_data = parser.parse(joss_url, since='2019-07-10', page=1)
     test_outfile = "test_joss.tag"
     if sys.version_info > (3,):
         standard_outfile = os.path.join(os.path.dirname(__file__), "data/stubdata/serialized/python3/joss.tag")
     else:
         standard_outfile = os.path.join(os.path.dirname(__file__), "data/stubdata/serialized/joss.tag")
     try:
         os.remove(test_outfile)
     except Exception as err:
         pass
     for d in test_data:
         serializer = classic.Tagged()
         outputfp = open(test_outfile, 'a')
         serializer.write(d, outputfp)
         outputfp.close()
     result = filecmp.cmp(test_outfile, standard_outfile)
     self.assertEqual(result, True)
     os.remove(test_outfile)