コード例 #1
0
def games(api_version, from_date, to_date):
    meta = JobMetadata(
        id=str(uuid.uuid4()),
        app_version=__version__,
        execution_date=datetime.utcnow().strftime("%Y/%m/%d"),
        execution_ts=datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S.%f%z"),
        query_start_date=from_date.strftime(DATE_FORMATS[0]),
        query_stop_date=to_date.strftime(DATE_FORMATS[0]),
        job_successful='True',
        job_exception=''
    )
    bucket = os.environ.get('DEST_BUCKET', 'output')
    jobs = os.environ.get('JOB_BUCKET', 'jobs')
    s3client = boto3.client('s3', config=Config(signature_version='s3v4'),
                            endpoint_url=os.environ.get('S3_ENDPOINT_URL'))
    storage = Storage(bucket, jobs, s3client)
    try:
        api_adapters = API_FACTORY.adapter_for_version(api_version)
        api = api_adapters.api()
        crawler = api_adapters.crawler(api, storage)
        crawler.crawl(from_date, to_date)
    except Exception as e:
        click.echo('JOB RUN FAILED')
        click.echo(e)
        # if it blows up, update the meta object
        meta.job_successful = 'False'
        meta.job_exception = e.__repr__().replace(',', ' ')
    finally:
        meta_keys = ','.join(asdict(meta).keys())
        meta_values = ','.join(asdict(meta).values())
        storage_key = f'{meta.execution_date}/{meta.id}.csv'
        csv_string = f'{meta_keys}\n{meta_values}'
        storage.store_job(storage_key, csv_string)
コード例 #2
0
def test_storage_store_job():
    s3_mock = Mock()
    storage = Storage('testbucket', 'jobbucket', s3_mock)

    result = storage.store_job('1/2/3/4.csv', 'foo bar baz')

    assert result is True
    s3_mock.put_object.assert_called_with(Bucket='jobbucket', Key='1/2/3/4.csv', Body='foo bar baz')
コード例 #3
0
def test_storage_store_game():
    s3_mock = Mock()
    storage = Storage('testbucket', 'jobbucket', s3_mock)
    key = StorageKey('a', 'b', 'c', 'd')

    result = storage.store_game(key, 'foo bar baz')

    assert result is True
    s3_mock.put_object.assert_called_with(Bucket='testbucket', Key='a/b/c/d.csv', Body='foo bar baz')
コード例 #4
0
def test_crawl(schedule_data, game_2019030314_data):
    databucket = 'testdatabucket'
    jobbucket = 'testjobbucket'
    game_1_id = '2019030314'
    game_2_id = '2019030325'
    header_string = ','.join(header)

    schedule = 'https://statsapi.web.nhl.com/api/v1/schedule?startDate=2020-01-01&endDate=2020-01-02'
    boxscore_1 = f'https://statsapi.web.nhl.com/api/v1/game/{game_1_id}/boxscore'
    boxscore_2 = f'https://statsapi.web.nhl.com/api/v1/game/{game_2_id}/boxscore'

    with requests_mock.Mocker() as m:
        m.get(schedule, json=schedule_data, status_code=200)
        m.get(boxscore_1, json=game_2019030314_data, status_code=200)
        m.get(boxscore_2, json=game_2019030314_data, status_code=200)

        s3_mock = Mock()
        storage = Storage(databucket, jobbucket, s3_mock)
        api = NHLApi()

        crawler = Crawler(api, storage)
        crawler.crawl(datetime(2020, 1, 1), datetime(2020, 1, 2))

        assert s3_mock.put_object.call_count == 2

        call_01_kwargs = s3_mock.put_object.call_args_list[0].kwargs
        call_02_kwargs = s3_mock.put_object.call_args_list[1].kwargs

        assert call_01_kwargs.get('Bucket') == call_02_kwargs.get(
            'Bucket') == databucket
        assert call_01_kwargs.get('Key') == f'2020/09/13/{game_1_id}.csv'
        assert call_02_kwargs.get('Key') == f'2020/09/14/{game_2_id}.csv'

        assert header_string in call_01_kwargs.get('Body')
        assert header_string in call_02_kwargs.get('Body')
コード例 #5
0
    def test_flatten_json(self, transformer_class):
        storage = Storage(None, None)
        uuid = 1

        transformer = transformer_class(storage, uuid)
        json = {
            "a": [
                {
                    "b": [
                        {"c": 1},
                        {"d": 2},
                    ],
                },
                {
                    "bb": [
                        {"cc": 11},
                        {"dd": 22},
                    ],
                },
            ],
        }
        result = transformer.flatten_json(json)
        expected = {
            'player_a_0_b_0_c': 1,
            'player_a_0_b_1_d': 2,
            'player_a_1_bb_0_cc': 11,
            'player_a_1_bb_1_dd': 22,
        }

        assert result == expected
コード例 #6
0
    def test_init(self, transformer_class):
        storage = Storage(None, None)
        uuid = 1

        transformer = transformer_class(storage, uuid)

        assert transformer.uuid == uuid
コード例 #7
0
    def test_init(self, crawler_class):
        nhl_api = NHLApi()
        storage = Storage(None, None)
        uuid = 1

        crawler = crawler_class(nhl_api, storage, uuid)

        assert crawler.uuid == uuid
コード例 #8
0
def test_crawl_no_games():
    databucket = 'testdatabucket'
    jobbucket = 'testjobbucket'
    schedule = 'https://statsapi.web.nhl.com/api/v1/schedule?startDate=2020-01-01&endDate=2020-01-02'
    with requests_mock.Mocker() as m:
        m.get(schedule, json={'totalGames': 0}, status_code=200)

        s3_mock = Mock()
        storage = Storage(databucket, jobbucket, s3_mock)
        api = NHLApi()

        crawler = Crawler(api, storage)
        crawler.crawl(datetime(2020, 1, 1), datetime(2020, 1, 2))

        assert s3_mock.put_object.call_count == 0