예제 #1
0
    def test_get_schema_handles_single_types(self):
        fieldmap = [
            {
                'name': 'uniqueReachAverageImpressionFrequency',
                'type': 'string'
            },
        ]

        actual = get_schema("some_stream", fieldmap)

        expected = {
            'type': 'object',
            'properties': {
                'uniqueReachAverageImpressionFrequency': {
                    'type': ['null', 'string']
                },
                REPORT_ID_FIELD: {
                    'type': 'integer'
                },
                SINGER_REPORT_FIELD: {
                    'type': 'string',
                    'format': 'date-time'
                },
            },
            'addtionalProperties': False
        }

        self.assertDictEqual(expected, actual)
예제 #2
0
    def test_get_schema_handles_multiple_types(self):
        fieldmap = [
            {
                'name': 'uniqueReachAverageImpressionFrequency',
                'type': ['double', 'string']
            },
            {
                'name': 'uniqueReachClickReach',
                'type': ['long', 'string']
            },
            {
                'name': 'uniqueReachImpressionReach',
                'type': ['long', 'string']
            },
            {
                'name': 'conversionId (required)',
                'type': ['long', 'string']
            },
            {
                'name': 'uniqueReachTotalReach',
                'type': ['long', 'string']
            },
        ]

        actual = get_schema("some_stream", fieldmap)

        expected = {
            'type': 'object',
            'properties': {
                'uniqueReachAverageImpressionFrequency': {
                    'type': ['null', 'number', 'string']
                },
                'uniqueReachClickReach': {
                    'type': ['null', 'integer', 'string']
                },
                'uniqueReachImpressionReach': {
                    'type': ['null', 'integer', 'string']
                },
                'conversionId (required)': {
                    'type': ['null', 'integer', 'string']
                },
                'uniqueReachTotalReach': {
                    'type': ['null', 'integer', 'string']
                },
                REPORT_ID_FIELD: {
                    'type': 'integer'
                },
                SINGER_REPORT_FIELD: {
                    'type': 'string',
                    'format': 'date-time'
                },
            },
            'addtionalProperties': False
        }

        self.assertDictEqual(expected, actual)
예제 #3
0
def sync_report(service, field_type_lookup, profile_id, report_config):
    report_id = report_config['report_id']
    stream_name = report_config['stream_name']
    stream_alias = report_config['stream_alias']

    LOGGER.info("%s: Starting sync", stream_name)

    report = (service.reports().get(profileId=profile_id,
                                    reportId=report_id).execute())

    fieldmap = get_fields(field_type_lookup, report)
    schema = get_schema(stream_name, fieldmap)
    singer.write_schema(stream_name, schema, [], stream_alias=stream_alias)

    with singer.metrics.job_timer('run_report'):
        report_time = datetime.utcnow().isoformat() + 'Z'
        report_file = (service.reports().run(profileId=profile_id,
                                             reportId=report_id).execute())

        report_file_id = report_file['id']

        sleep = 0
        start_time = time.time()
        while True:
            report_file = (service.files().get(
                reportId=report_id, fileId=report_file_id).execute())

            status = report_file['status']
            if status == 'REPORT_AVAILABLE':
                process_file(service, fieldmap, report_config, report_file_id,
                             report_time)
                break
            elif status != 'PROCESSING':
                message = '{}: report_id {} / file_id {} - File status is {}, processing failed'.format(
                    stream_name, report_id, report_file_id, status)
                LOGGER.error(message)
                raise Exception(message)
                raise Exception(message)
            elif time.time() - start_time > MAX_RETRY_ELAPSED_TIME:
                message = '{}: report_id {} / file_id {} - File processing deadline exceeded ({} secs)'.format(
                    stream_name, report_id, report_file_id, status,
                    MAX_RETRY_ELAPSED_TIME)
                LOGGER.error(message)
                raise Exception(message)

            sleep = next_sleep_interval(sleep)
            LOGGER.info(
                '{}: report_id {} / file_id {} - File status is {}, sleeping for {} seconds'
                .format(stream_name, report_id, report_file_id, status, sleep))
            time.sleep(sleep)
def discover_streams(service, config):
    profile_id = config.get('profile_id')

    reports = (service.reports().list(
        profileId=profile_id).execute().get('items'))

    reports = sorted(reports, key=lambda x: x['id'])
    report_configs = {}
    for report in reports:
        stream_name = sanitize_name(report['name'])
        tap_stream_id = '{}_{}'.format(stream_name, report['id'])
        report_configs[(stream_name, tap_stream_id)] = report

    field_type_lookup = get_field_type_lookup()
    catalog = Catalog([])

    for (stream_name, tap_stream_id), report in report_configs.items():
        fieldmap = get_fields(field_type_lookup, report)
        schema_dict = get_schema(stream_name, fieldmap)
        schema = Schema.from_dict(schema_dict)

        metadata = []
        metadata.append({
            'metadata': {
                'tap-doubleclick-campaign-manager.report-id': report['id']
            },
            'breadcrumb': []
        })

        for prop in schema_dict['properties'].keys():
            metadata.append({
                'metadata': {
                    'inclusion': 'automatic'
                },
                'breadcrumb': ['properties', prop]
            })

        catalog.streams.append(
            CatalogEntry(stream=stream_name,
                         stream_alias=stream_name,
                         tap_stream_id=tap_stream_id,
                         key_properties=[],
                         schema=schema,
                         metadata=metadata))

    return catalog.to_dict()