Exemple #1
0
    def setUpClass(cls) -> None:
        """Initialize a database session with in-memory SQLite."""
        from browse.services import database

        cls.database_service = database
        mock_app = mock.MagicMock()
        mock_app.config = {
            'SQLALCHEMY_DATABASE_URI': DATABASE_URL,
            'SQLALCHEMY_TRACK_MODIFICATIONS': False
        }

        mock_app.extensions = {}
        mock_app.root_path = ''

        database.db.init_app(mock_app)
        database.db.app = mock_app
        database.db.create_all()
        database.db.session.commit()

        inst_cornell = database.models.MemberInstitution(
            id=1, name='Cornell University', label='Cornell University')
        database.db.session.add(inst_cornell)

        inst_cornell_ip = database.models.MemberInstitutionIP(
            id=1,
            sid=inst_cornell.id,
            start=2152988672,  # 128.84.0.0
            end=2153054207,  # 128.84.255.255
            exclude=0)
        database.db.session.add(inst_cornell_ip)

        inst_cornell_ip_exclude = \
            database.models.MemberInstitutionIP(
                id=2,
                sid=inst_cornell.id,
                start=2152991233,  # 128.84.10.1
                end=2152991242,    # 128.84.10.10
                exclude=1
            )
        database.db.session.add(inst_cornell_ip_exclude)

        inst_other = database.models.MemberInstitution(
            id=2, name='Other University', label='Other University')
        database.db.session.add(inst_other)

        inst_other_ip = database.models.MemberInstitutionIP(
            id=3,
            sid=inst_other.id,
            start=2152991236,  # 128.84.10.4
            end=2152991242,  # 128.84.10.10
            exclude=0)
        database.db.session.add(inst_other_ip)
        database.db.session.commit()

        sql_dir = path_of_for_test('data/db/sql')
        sql_files: List[str] = glob.glob(f'{sql_dir}/*.sql')
        execute_sql_files(sql_files, database.db.engine)
        database.db.session.commit()
Exemple #2
0
 def test_split_long_author_list(self):
     f1 = path_of_for_test(
         'data/abs_files/ftp/arxiv/papers/1411/1411.4413.abs')
     meta: metadata = AbsMetaSession.parse_abs_file(filename=f1)
     alst = split_long_author_list(queries_for_authors(str(meta.authors)),
                                   20)
     self.assertIs(type(alst), tuple)
     self.assertIs(len(alst), 3)
     self.assertIs(type(alst[0]), list)
     self.assertIs(type(alst[1]), list)
     self.assertGreater(len(alst[1]), 0)
     self.assertIs(type(alst[2]), int)
Exemple #3
0
    def test_split_with_collaboration(self):
        f1 = path_of_for_test(
            'data/abs_files/ftp/arxiv/papers/0808/0808.4142.abs')
        meta: metadata = AbsMetaSession.parse_abs_file(filename=f1)

        split = split_authors(str(meta.authors))
        self.assertListEqual(
            split, ['D0 Collaboration', ':', 'V. Abazov', ',', 'et al'])

        alst = queries_for_authors(str(meta.authors))
        self.assertListEqual(alst,
                             [('D0 Collaboration', 'D0 Collaboration'), ': ',
                              ('V. Abazov', 'Abazov, V'), ', ', 'et al'])
Exemple #4
0
    def test_all_trackback_pings(self) -> None:
        """Test if all trackback pings are counted."""
        doc_sql_file = path_of_for_test(
            'data/db/sql/arXiv_trackback_pings.sql')

        count_from_file = grep_f_count(
            doc_sql_file, '''INSERT INTO `arXiv_trackback_pings`''')
        count_from_db: int = TestBrowseDatabaseService.database_service\
            .count_all_trackback_pings()
        count_from_db_list: int = TestBrowseDatabaseService.database_service\
            .get_all_trackback_pings().__len__()

        if count_from_file is not None:
            self.assertEqual(count_from_db, count_from_file,
                             'Count of all trackback pings are correct')
        else:
            self.assertIsNotNone(count_from_file,
                                 'count of trackback pings is defined')

        self.assertEqual(count_from_db_list, count_from_file,
                         'All trackback pings are returned')
Exemple #5
0
    def test_split_strange_author_list(self):
        """Test odd author list that shows '0 additional authors' ARXIVNG-2083"""
        f1 = path_of_for_test(
            'data/abs_files/ftp/arxiv/papers/1902/1902.05884.abs')
        meta: metadata = AbsMetaSession.parse_abs_file(filename=f1)
        alst = split_long_author_list(queries_for_authors(str(meta.authors)),
                                      100)

        self.assertIs(type(alst), tuple)
        self.assertIs(len(alst), 3)

        self.assertIs(type(alst[0]), list)
        self.assertIs(type(alst[1]), list)
        self.assertIs(type(alst[2]), int)

        self.assertEqual(
            len(list(filter(lambda x: isinstance(x, tuple), alst[0]))), 101)

        self.assertEqual(len(alst[1]), 0,
                         "Back list on 1902.05884 should be empty")
        self.assertEqual(alst[2], 0,
                         "Back list size on 1902.05884 should be empty")
"""Tests for arXiv abstract (.abs) parser."""
import os
from unittest import TestCase
from datetime import datetime
from dateutil.tz import tzutc
from browse.domain.metadata import DocMetadata, Submitter, SourceType, \
    VersionEntry
from browse.services.document.metadata import AbsMetaSession
from tests import path_of_for_test

ABS_FILES = path_of_for_test('data/abs_files')


class TestAbsParser(TestCase):
    """Test  parsing metadata from .abs files."""
    def test_bulk_parsing(self):
        """Parse all nonempty .abs files in test set."""
        num_files_tested = 0
        from_regex = r'(?m)From:\s+[^<]+<[^@]+@[^>]+>'
        self.assertRegex('From: J Doe <*****@*****.**>', from_regex)
        for dir_name, subdir_list, file_list in os.walk(ABS_FILES):
            for fname in file_list:
                fname_path = os.path.join(dir_name, fname)
                # skip any empty files
                if os.stat(fname_path).st_size == 0:
                    continue
                if not fname_path.endswith('.abs'):
                    continue
                num_files_tested += 1
                dm = AbsMetaSession.parse_abs_file(filename=fname_path)
                self.assertIsInstance(dm, DocMetadata)
Exemple #7
0
"""Tests for Google Scholar metadata tag creation. """

import html
import json
import os
import pprint
from unittest import TestCase
from tests import path_of_for_test

from browse.domain.metadata import DocMetadata
from browse.services.util.metatags import meta_tag_metadata
from browse.services.document.metadata import AbsMetaSession
from app import app

CLASSIC_RESULTS_FILE = path_of_for_test(
    'data/classic_scholar_metadata_tags.json')
ABS_FILES = path_of_for_test('data/abs_files')


class TestAgainstClassicResults(TestCase):
    """Test google scholar metadata created from abs files against classic exresults. """
    def setUp(self):
        app.testing = True
        app.config['APPLICATION_ROOT'] = ''
        app.config['SERVER_NAME'] = 'dev.arxiv.org'
        self.app = app

    def test_same_as_classic(self):

        bad_data = [
            '1501.00001v1',
Exemple #8
0
 def test_collaboration_at_front(self):
     f1 = path_of_for_test('data/abs_files/ftp/arxiv/papers/0808/0808.4142.abs')
     meta = AbsMetaSession.parse_abs_file(filename=f1)
     paflst = parse_author_affil(meta.authors.raw)
     self.assertListEqual(paflst, [['D0 Collaboration', '', ''], ['Abazov', 'V.', '']])