def setUpClass(cls) -> None: """Initialize a database session with in-memory SQLite.""" from browse.services import database cls.database_service = database mock_app = mock.MagicMock() mock_app.config = { 'SQLALCHEMY_DATABASE_URI': DATABASE_URL, 'SQLALCHEMY_TRACK_MODIFICATIONS': False } mock_app.extensions = {} mock_app.root_path = '' database.db.init_app(mock_app) database.db.app = mock_app database.db.create_all() database.db.session.commit() inst_cornell = database.models.MemberInstitution( id=1, name='Cornell University', label='Cornell University') database.db.session.add(inst_cornell) inst_cornell_ip = database.models.MemberInstitutionIP( id=1, sid=inst_cornell.id, start=2152988672, # 128.84.0.0 end=2153054207, # 128.84.255.255 exclude=0) database.db.session.add(inst_cornell_ip) inst_cornell_ip_exclude = \ database.models.MemberInstitutionIP( id=2, sid=inst_cornell.id, start=2152991233, # 128.84.10.1 end=2152991242, # 128.84.10.10 exclude=1 ) database.db.session.add(inst_cornell_ip_exclude) inst_other = database.models.MemberInstitution( id=2, name='Other University', label='Other University') database.db.session.add(inst_other) inst_other_ip = database.models.MemberInstitutionIP( id=3, sid=inst_other.id, start=2152991236, # 128.84.10.4 end=2152991242, # 128.84.10.10 exclude=0) database.db.session.add(inst_other_ip) database.db.session.commit() sql_dir = path_of_for_test('data/db/sql') sql_files: List[str] = glob.glob(f'{sql_dir}/*.sql') execute_sql_files(sql_files, database.db.engine) database.db.session.commit()
def test_split_long_author_list(self): f1 = path_of_for_test( 'data/abs_files/ftp/arxiv/papers/1411/1411.4413.abs') meta: metadata = AbsMetaSession.parse_abs_file(filename=f1) alst = split_long_author_list(queries_for_authors(str(meta.authors)), 20) self.assertIs(type(alst), tuple) self.assertIs(len(alst), 3) self.assertIs(type(alst[0]), list) self.assertIs(type(alst[1]), list) self.assertGreater(len(alst[1]), 0) self.assertIs(type(alst[2]), int)
def test_split_with_collaboration(self): f1 = path_of_for_test( 'data/abs_files/ftp/arxiv/papers/0808/0808.4142.abs') meta: metadata = AbsMetaSession.parse_abs_file(filename=f1) split = split_authors(str(meta.authors)) self.assertListEqual( split, ['D0 Collaboration', ':', 'V. Abazov', ',', 'et al']) alst = queries_for_authors(str(meta.authors)) self.assertListEqual(alst, [('D0 Collaboration', 'D0 Collaboration'), ': ', ('V. Abazov', 'Abazov, V'), ', ', 'et al'])
def test_all_trackback_pings(self) -> None: """Test if all trackback pings are counted.""" doc_sql_file = path_of_for_test( 'data/db/sql/arXiv_trackback_pings.sql') count_from_file = grep_f_count( doc_sql_file, '''INSERT INTO `arXiv_trackback_pings`''') count_from_db: int = TestBrowseDatabaseService.database_service\ .count_all_trackback_pings() count_from_db_list: int = TestBrowseDatabaseService.database_service\ .get_all_trackback_pings().__len__() if count_from_file is not None: self.assertEqual(count_from_db, count_from_file, 'Count of all trackback pings are correct') else: self.assertIsNotNone(count_from_file, 'count of trackback pings is defined') self.assertEqual(count_from_db_list, count_from_file, 'All trackback pings are returned')
def test_split_strange_author_list(self): """Test odd author list that shows '0 additional authors' ARXIVNG-2083""" f1 = path_of_for_test( 'data/abs_files/ftp/arxiv/papers/1902/1902.05884.abs') meta: metadata = AbsMetaSession.parse_abs_file(filename=f1) alst = split_long_author_list(queries_for_authors(str(meta.authors)), 100) self.assertIs(type(alst), tuple) self.assertIs(len(alst), 3) self.assertIs(type(alst[0]), list) self.assertIs(type(alst[1]), list) self.assertIs(type(alst[2]), int) self.assertEqual( len(list(filter(lambda x: isinstance(x, tuple), alst[0]))), 101) self.assertEqual(len(alst[1]), 0, "Back list on 1902.05884 should be empty") self.assertEqual(alst[2], 0, "Back list size on 1902.05884 should be empty")
"""Tests for arXiv abstract (.abs) parser.""" import os from unittest import TestCase from datetime import datetime from dateutil.tz import tzutc from browse.domain.metadata import DocMetadata, Submitter, SourceType, \ VersionEntry from browse.services.document.metadata import AbsMetaSession from tests import path_of_for_test ABS_FILES = path_of_for_test('data/abs_files') class TestAbsParser(TestCase): """Test parsing metadata from .abs files.""" def test_bulk_parsing(self): """Parse all nonempty .abs files in test set.""" num_files_tested = 0 from_regex = r'(?m)From:\s+[^<]+<[^@]+@[^>]+>' self.assertRegex('From: J Doe <*****@*****.**>', from_regex) for dir_name, subdir_list, file_list in os.walk(ABS_FILES): for fname in file_list: fname_path = os.path.join(dir_name, fname) # skip any empty files if os.stat(fname_path).st_size == 0: continue if not fname_path.endswith('.abs'): continue num_files_tested += 1 dm = AbsMetaSession.parse_abs_file(filename=fname_path) self.assertIsInstance(dm, DocMetadata)
"""Tests for Google Scholar metadata tag creation. """ import html import json import os import pprint from unittest import TestCase from tests import path_of_for_test from browse.domain.metadata import DocMetadata from browse.services.util.metatags import meta_tag_metadata from browse.services.document.metadata import AbsMetaSession from app import app CLASSIC_RESULTS_FILE = path_of_for_test( 'data/classic_scholar_metadata_tags.json') ABS_FILES = path_of_for_test('data/abs_files') class TestAgainstClassicResults(TestCase): """Test google scholar metadata created from abs files against classic exresults. """ def setUp(self): app.testing = True app.config['APPLICATION_ROOT'] = '' app.config['SERVER_NAME'] = 'dev.arxiv.org' self.app = app def test_same_as_classic(self): bad_data = [ '1501.00001v1',
def test_collaboration_at_front(self): f1 = path_of_for_test('data/abs_files/ftp/arxiv/papers/0808/0808.4142.abs') meta = AbsMetaSession.parse_abs_file(filename=f1) paflst = parse_author_affil(meta.authors.raw) self.assertListEqual(paflst, [['D0 Collaboration', '', ''], ['Abazov', 'V.', '']])