コード例 #1
0
ファイル: test_desy.py プロジェクト: drjova/hepcrawl
def get_expected_fixture(response_file_name):
    expected_record = expected_json_results_from_file(
        'responses/desy',
        response_file_name,
        test_suite='unit',
    )
    return expected_record
コード例 #2
0
ファイル: test_desy.py プロジェクト: miguelgrc/hepcrawl
def get_expected_fixture(response_file_name):
    expected_record = expected_json_results_from_file(
        'responses/desy',
        response_file_name,
        test_suite='unit',
    )
    return expected_record
コード例 #3
0
    yield {
        'CRAWLER_HOST_URL': 'http://*****:*****@pytest.mark.parametrize('expected_results', [
    expected_json_results_from_file(
        'cds',
        'fixtures',
        'cds_smoke_records_expected.json',
    ),
],
                         ids=[
                             'smoke',
                         ])
def test_cds(set_up_local_environment, expected_results):
    crawler = get_crawler_instance(
        set_up_local_environment.get('CRAWLER_HOST_URL'))

    crawl_results = CeleryMonitor.do_crawl(
        app=celery_app,
        monitor_timeout=5,
        monitor_iter_limit=100,
        events_limit=1,
コード例 #4
0
ファイル: test_arxiv.py プロジェクト: ammirate/hepcrawl
def get_configuration_single():
    return {
        'CRAWLER_HOST_URL': 'http://*****:*****@pytest.mark.parametrize('expected_results, config, spider', [
    (
        expected_json_results_from_file(
            'arxiv',
            'fixtures',
            'arxiv_expected.json',
        ),
        get_configuration(),
        'arXiv',
    ),
    (
        expected_json_results_from_file(
            'arxiv',
            'fixtures',
            'arxiv_expected_single.json',
        ),
        get_configuration_single(),
        'arXiv_single',
    ),
],
コード例 #5
0
ファイル: test_desy.py プロジェクト: katrinleinweber/hepcrawl
def cleanup():
    # The test must wait until the docker environment is up (takes about 10
    # seconds).
    sleep(10)
    yield

    clean_dir(path=os.path.join(os.getcwd(), '.scrapy'))
    clean_dir('/tmp/file_urls')
    clean_dir('/tmp/DESY')


@pytest.mark.parametrize('expected_results, settings', [
    (
        expected_json_results_from_file(
            'desy',
            'fixtures',
            'desy_records_ftp_expected.json',
        ),
        get_ftp_settings(),
    ),
    (
        expected_json_results_from_file(
            'desy',
            'fixtures',
            'desy_records_local_expected.json',
        ),
        get_local_settings(),
    ),
],
                         ids=[
                             'ftp package',
コード例 #6
0
    clean_dir()
    clean_dir(path=os.path.join(os.getcwd(), '.scrapy'))

    _, dirs, files = next(os.walk(package_location))
    for dir_name in dirs:
        clean_dir(os.path.join(package_location, dir_name))
    for file_name in files:
        if not file_name.endswith('.zip'):
            os.unlink(os.path.join(package_location, file_name))


@pytest.mark.parametrize('expected_results, settings', [
    (
        expected_json_results_from_file(
            'wsp',
            'fixtures',
            'wsp_smoke_records.json',
        ),
        get_ftp_settings(),
    ),
    (
        expected_json_results_from_file(
            'wsp',
            'fixtures',
            'wsp_smoke_records.json',
        ),
        get_local_settings(),
    ),
],
                         ids=[
                             'ftp',
コード例 #7
0
ファイル: test_arxiv.py プロジェクト: drjova/hepcrawl
        'CRAWLER_HOST_URL': 'http://scrapyd:6800',
        'CRAWLER_PROJECT': 'hepcrawl',
        'CRAWLER_ARGUMENTS': {
            'identifier': 'oai:arXiv.org:1401.2122',
            'url': 'http://arxiv-http-server.local/oai2',
        }
    }


@pytest.mark.parametrize(
    'expected_results, config, spider',
    [
        (
            expected_json_results_from_file(
                'arxiv',
                'fixtures',
                'arxiv_expected.json',
            ),
            get_configuration(),
            'arXiv',
        ),
        (
            expected_json_results_from_file(
                'arxiv',
                'fixtures',
                'arxiv_expected_single.json',
            ),
            get_configuration_single(),
            'arXiv_single',
        ),
    ],
コード例 #8
0
ファイル: test_cds.py プロジェクト: miguelgrc/hepcrawl
def get_configuration_single():
    return {
        'CRAWLER_HOST_URL': 'http://*****:*****@pytest.mark.parametrize('expected_results, config, spider', [
    (
        expected_json_results_from_file(
            'cds',
            'fixtures',
            'cds_expected.json',
        ),
        get_configuration(),
        'CDS',
    ),
    (
        expected_json_results_from_file(
            'cds',
            'fixtures',
            'cds_single_expected.json',
        ),
        get_configuration_single(),
        'CDS_single',
    ),
],
コード例 #9
0
ファイル: test_wsp.py プロジェクト: drjova/hepcrawl
    _, dirs, files = next(os.walk(package_location))
    for dir_name in dirs:
        clean_dir(os.path.join(package_location, dir_name))
    for file_name in files:
        if not file_name.endswith('.zip'):
            os.unlink(os.path.join(package_location, file_name))


@pytest.mark.parametrize(
    'expected_results, settings',
    [
        (
            expected_json_results_from_file(
                'wsp',
                'fixtures',
                'wsp_smoke_records.json',
            ),
            get_ftp_settings(),
        ),
        (
            expected_json_results_from_file(
                'wsp',
                'fixtures',
                'wsp_smoke_records.json',
            ),
            get_local_settings(),
        ),
    ],
    ids=[
        'ftp',
コード例 #10
0
ファイル: test_desy.py プロジェクト: drjova/hepcrawl
    # seconds).
    sleep(10)
    yield

    clean_dir(path=os.path.join(os.getcwd(), '.scrapy'))
    clean_dir('/tmp/file_urls')
    clean_dir('/tmp/DESY')


@pytest.mark.parametrize(
    'expected_results, settings',
    [
        (
            expected_json_results_from_file(
                'desy',
                'fixtures',
                'desy_records_ftp_expected.json',
            ),
            get_ftp_settings(),
        ),
        (
            expected_json_results_from_file(
                'desy',
                'fixtures',
                'desy_records_local_expected.json',
            ),
            get_local_settings(),
        ),

    ],
    ids=[
コード例 #11
0
ファイル: test_arxiv.py プロジェクト: michamos/hepcrawl
    yield {
        'CRAWLER_HOST_URL': 'http://*****:*****@pytest.mark.parametrize('expected_results', [
    expected_json_results_from_file(
        'arxiv',
        'fixtures',
        'arxiv_smoke_record.json',
    ),
],
                         ids=[
                             'smoke',
                         ])
def test_arxiv(set_up_local_environment, expected_results):
    crawler = get_crawler_instance(
        set_up_local_environment.get('CRAWLER_HOST_URL'))

    results = CeleryMonitor.do_crawl(
        app=celery_app,
        monitor_timeout=5,
        monitor_iter_limit=100,
        events_limit=1,
コード例 #12
0
ファイル: test_pos.py プロジェクト: katrinleinweber/hepcrawl
                'https://http-server.local/contribution?id='
            ),
            'base_proceedings_url': (
                'https://http-server.local/cgi-bin/reader/conf.cgi?confid='
            ),
        }
    }


@pytest.mark.parametrize(
    'expected_results, config',
    [
        (
            expected_json_results_from_file(
                'pos',
                'fixtures',
                'pos_conference_proceedings_records.json',
            ),
            get_configuration(),
        ),
    ],
    ids=[
        'smoke',
    ]
)
def test_pos_conference_paper_record_and_proceedings_record(
    expected_results,
    config,
):
    crawler = get_crawler_instance(config['CRAWLER_HOST_URL'])
コード例 #13
0
ファイル: test_cds.py プロジェクト: drjova/hepcrawl
        'CRAWLER_HOST_URL': 'http://scrapyd:6800',
        'CRAWLER_PROJECT': 'hepcrawl',
        'CRAWLER_ARGUMENTS': {
            'source_file': 'file://' + package_location,
        }
    }

    clean_dir()


@pytest.mark.parametrize(
    'expected_results',
    [
        expected_json_results_from_file(
            'cds',
            'fixtures',
            'cds_smoke_records_expected.json',
        ),
    ],
    ids=[
        'smoke',
    ]
)
def test_cds(set_up_local_environment, expected_results):
    crawler = get_crawler_instance(
        set_up_local_environment.get('CRAWLER_HOST_URL')
    )

    crawl_results = CeleryMonitor.do_crawl(
        app=celery_app,
        monitor_timeout=5,