def test_is_valid(self):
        rec = TurboBeeMsg()
        rec.qid = 'foo'
        self.assertTrue(rec.is_valid())

        rec.qid = 'unlimiteeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeed' * 512
        self.assertTrue(rec.is_valid())

        rec.status = Status.new
        now = datetime.utcnow()

        rec.created = rec.get_timestamp(now)
        self.assertEqual(now, rec.get_datetime(rec.created))

        rec.value = 'foobar'
        rec.ctype = rec.ContentType.text
        rec.ctype = 0  # unknown
        self.assertTrue(rec.is_valid())

        rec.set_value(1)
        self.assertTrue(rec.is_valid())

        rec.set_value(u'\ud789')
        rec.ctype = rec.ContentType.text

        self.assertEqual(rec.get_value(), u'\ud789'.encode('utf8'))
        rec.ctype = rec.ContentType.binary
        self.assertEqual(rec.get_value(), u'\ud789'.encode('utf8'))

        rec.set_value({'foo': u'\ud789'}, rec.ContentType.json)
        self.assertEqual(rec.get_value(), {'foo': u'\ud789'})

        rec.set_value(u'\ud789', rec.ContentType.text)
        self.assertEqual(rec.get_value(), u'\ud789'.encode('utf8'))
Beispiel #2
0
    def _retrieve_abstract_template(self, url):
        msg = TurboBeeMsg(target=url)
        i = 0

        parts = self._parse_bbb_url(url)

        while not self.harvest_webpage(msg) and i < 3:
            self.logger.warn('Retrying to fetch: ' + url)
            i += 1

        html = msg.get_value()
        html = html.decode('utf8')

        # some basic checks
        if 'data-widget="ShowAbstract"' not in html:
            raise Exception(
                'Cannot process fetched page, or data-widget for {}'.format(
                    url))
        # TODO; find the sections and replace them with symbolic names {tags}, {abstract}....
        x = html.find('data-highwire')
        while x > 0:
            x -= 1
            if html[x] == '<':
                break
        end = html.find('data-highwire', x)
        while html.find('data-highwire', end + 1) > 0:
            end = html.find('data-highwire', end + 1)

        while html[end] != '>':
            end += 1
        if end == -1 or x == 0:
            raise Exception("Cannot find tags section")
        html = html[0:x] + '{{tags}}' + html[end + 1:]

        x = html.find('<article')
        end = html.find('</article')

        if x == -1 or end == -1:
            raise Exception(
                'Cannot process fetched page, cannot find abstract section for {}'
                .format(url))

        while x < len(html) and x < end:
            x += 1
            if html[x] == '>':
                x += 1
                break

        if x > end:
            raise Exception(
                'Cannot process fetched page, cannot find abstract section for {}'
                .format(url))

        html = html[0:x] + '{{abstract}}' + html[end:]

        if 'bibcode' in parts and parts['bibcode']:
            html = html.replace(parts['bibcode'], u'{{bibcode}}')

        # finally, cut out noscript warning that says javascript is required
        x = html.find('id="noscriptmsg"')  # div id within noscript
        if x > -1:
            start = html.rfind('<noscript', 0, x)
            end = html.find('</noscript>', start) + len('</noscript>')
            if end > start:
                html = html[:start] + html[end:]
        # and the first noscript that includes a style, it hides the abstract div
        x = html.find('<noscript>')
        if x > -1:
            offset = x + len('<noscript>')
            if html[offset:].strip().startswith('<style>'):
                end = html.find('</noscript>') + len('</noscript>')
                if end > x:
                    html = html[:x] + html[end:]

        return html
    def test_build_abstract_page(self):
        msg = TurboBeeMsg(
            target=
            'https://ui.adsabs.harvard.edu/#abs/2019LRR....22....1I/abstract')

        html = ''
        with open(os.path.dirname(__file__) + '/abs.html', 'r') as f:
            html = f.read()

        json = {
            u'responseHeader': {
                u'status': 0,
                u'QTime': 12,
                u'params': {
                    u'x-amzn-trace-id':
                    u'Root=1-5c89826f-96274e0a2ef0646da1cdc944',
                    u'rows':
                    u'10',
                    u'q':
                    u'identifier:"2019LRR....22....1I"',
                    u'start':
                    u'0',
                    u'wt':
                    u'json',
                    u'fl':
                    u'[citations],abstract,aff,author,bibcode,citation_count,comment,data,doi,esources,first_author,id,isbn,issn,issue,keyword,links_data,page,property,pub,pub_raw,pubdate,pubnote,read_count,title,volume,year'
                }
            },
            u'response': {
                u'start':
                0,
                u'numFound':
                1,
                u'docs': [{
                    u'read_count':
                    514,
                    u'pubdate':
                    u'2019-12-00',
                    u'first_author':
                    u'Ishak, Mustapha',
                    u'abstract':
                    u'We review recent developments and results in testing general relativity (GR) at cosmological scales. The subject has witnessed rapid growth during the last two decades with the aim of addressing the question of cosmic acceleration and the dark energy associated with it. However, with the advent of precision cosmology, it has also become a well-motivated endeavor by itself to test gravitational physics at cosmic scales. We overview cosmological probes of gravity, formalisms and parameterizations for testing deviations from GR at cosmological scales, selected modified gravity (MG) theories, gravitational screening mechanisms, and computer codes developed for these tests. We then provide summaries of recent cosmological constraints on MG parameters and selected MG models. We supplement these cosmological constraints with a summary of implications from the recent binary neutron star merger event. Next, we summarize some results on MG parameter forecasts with and without astrophysical systematics that will dominate the uncertainties. The review aims at providing an overall picture of the subject and an entry point to students and researchers interested in joining the field. It can also serve as a quick reference to recent results and constraints on testing gravity at cosmological scales.',
                    u'links_data': [
                        u'{"access": "open", "instances": "", "title": "", "type": "preprint", "url": "http://arxiv.org/abs/1806.10122"}',
                        u'{"access": "open", "instances": "", "title": "", "type": "electr", "url": "https://doi.org/10.1007%2Fs41114-018-0017-4"}'
                    ],
                    u'year':
                    u'2019',
                    u'pubnote': [
                        u'Invited review article for Living Reviews in Relativity. 201 pages, 17 figures. Matches published version; doi:10.1007/s41114-018-0017-4'
                    ],
                    u'id':
                    u'15558883',
                    u'bibcode':
                    u'2019LRR....22....1I',
                    u'author': [u'Ishak, Mustapha'],
                    u'aff': [
                        u'Department of Physics, The University of Texas at Dallas, Richardson, TX, USA'
                    ],
                    u'esources': [u'EPRINT_HTML', u'EPRINT_PDF', u'PUB_HTML'],
                    u'issue':
                    u'1',
                    u'pub_raw':
                    u'Living Reviews in Relativity, Volume 22, Issue 1, article id. 1, <NUMPAGES>204</NUMPAGES> pp.',
                    u'pub':
                    u'Living Reviews in Relativity',
                    u'volume':
                    u'22',
                    u'doi': [u'10.1007/s41114-018-0017-4'],
                    u'keyword': [
                        u'Tests of relativistic gravity',
                        u'Theories of gravity', u'Modified gravity',
                        u'Cosmological tests', u'Post-Friedmann limit',
                        u'Gravitational waves',
                        u'Astrophysics - Cosmology and Nongalactic Astrophysics',
                        u'Astrophysics - Astrophysics of Galaxies',
                        u'General Relativity and Quantum Cosmology'
                    ],
                    u'title': [u'Testing general relativity in cosmology'],
                    u'citation_count':
                    18,
                    u'[citations]': {
                        u'num_citations': 18,
                        u'num_references': 928
                    },
                    u'property': [
                        u'ESOURCE', u'ARTICLE', u'REFEREED', u'PUB_OPENACCESS',
                        u'EPRINT_OPENACCESS', u'EPRINT_OPENACCESS',
                        u'OPENACCESS'
                    ],
                    u'page': [u'1']
                }]
            }
        }

        with patch.object(self.app, '_load_url', return_value=html) as loader, \
            patch.object(self.app, 'search_api', return_value=json) as searcher:
            assert self.app.build_static_page(msg)
            p = msg.get_value().decode('utf8')
            assert u'og:image' in p
            assert u'We review recent' in p
            assert u'ADS requires JavaScript' not in p
            assert u'#app-container' not in p