Exemple #1
0
def sync_events():
    schema = load_schema("events")
    singer.write_schema("events", schema, [])

    for export_bundle in request_export_bundles():
        with metrics.record_counter("events") as counter:
            for event in download_events(export_bundle['Id']):
                transform_event(event)
                counter.increment()
                singer.write_record("events", event)
            stop_timestamp = datetime.datetime.utcfromtimestamp(export_bundle['Stop'])
            utils.update_state(STATE, "events", stop_timestamp)
            singer.write_state(STATE)
Exemple #2
0
def sync_sales_activities():
    """Sync all sales activities, call out to individual filters
    """

    bookmark_property = 'updated_at'
    endpoint = 'sales_activities'
    state_entity = endpoint
    start = get_start(state_entity)
    singer.write_schema(endpoint,
                        tap_utils.load_schema(endpoint),
                        ["id"],
                        bookmark_properties=[bookmark_property])
    sales = gen_request(get_url(endpoint))
    for sale in sales:
        if sale[bookmark_property] >= start:
            LOGGER.info("Sale {}: Syncing details".format(sale['id']))
            singer.write_record("sale_activities", sale,
                                time_extracted=singer.utils.now())
Exemple #3
0
def sync_deals_by_filter(bookmark_prop, fil):
    """
    Iterate over all deal filter to sync all deal data
    """
    endpoint = 'deals'
    fil_id = fil['id']
    state_entity = endpoint + "_" + str(fil_id)
    start = get_start(state_entity)
    deals = gen_request(get_url(endpoint, query='view/'+str(fil_id)))
    for deal in deals:
        if deal[bookmark_prop] >= start:
            # get all sub-entities and save them
            deal['amount'] = float(deal['amount'])  # cast amount to float
            deal['custom_field'] = json.dumps(
                deal['custom_field'])  # Make JSON String to store
            LOGGER.info("Deal {}: Syncing details".format(deal['id']))
            singer.write_record(
                "deals", deal, time_extracted=singer.utils.now())
Exemple #4
0
def get_all_pull_requests(schemas, config, state, mdata):
    '''
    https://developer.github.com/v3/pulls/#list-pull-requests
    '''
    repo_path = config['repository']
    with metrics.record_counter('pull_requests') as counter:
        with metrics.record_counter('reviews') as reviews_counter:
            for response in authed_get_all_pages(
                    'pull_requests',
                    'https://api.github.com/repos/{}/pulls?state=all'.format(
                        repo_path)):
                pull_requests = response.json()
                extraction_time = singer.utils.now()
                for pr in pull_requests:
                    pr_num = pr.get('number')

                    # transform and write pull_request record
                    with singer.Transformer() as transformer:
                        rec = transformer.transform(
                            pr,
                            schemas['pull_requests'],
                            metadata=metadata.to_map(mdata))
                    singer.write_record('pull_requests',
                                        rec,
                                        time_extracted=extraction_time)
                    singer.write_bookmark(
                        state, 'pull_requests', 'since',
                        singer.utils.strftime(extraction_time))
                    counter.increment()

                    # sync reviews if that schema is present (only there if selected)
                    if schemas.get('reviews'):
                        for review_rec in get_reviews_for_pr(
                                pr_num, schemas['reviews'], config, state,
                                mdata):
                            singer.write_record('reviews',
                                                review_rec,
                                                time_extracted=extraction_time)
                            singer.write_bookmark(
                                state, 'reviews', 'since',
                                singer.utils.strftime(extraction_time))
                            reviews_counter.increment()

    return state
def sync_table_file(config: Dict, s3_path: str, table_spec: Dict, stream: Dict) -> int:
    """
    Sync a given csv found file
    :param config: tap configuration
    :param s3_path: file path given by S3
    :param table_spec: tables specs
    :param stream: Stream data
    :return: number of streamed records
    """
    LOGGER.info('Syncing file "%s".', s3_path)

    bucket = config['bucket']
    table_name = table_spec['table_name']

    s3_file_handle = s3.get_file_handle(config, s3_path)
    # We observed data who's field size exceeded the default maximum of
    # 131072. We believe the primary consequence of the following setting
    # is that a malformed, wide CSV would potentially parse into a single
    # large field rather than giving this error, but we also think the
    # chances of that are very small and at any rate the source data would
    # need to be fixed. The other consequence of this could be larger
    # memory consumption but that's acceptable as well.
    csv.field_size_limit(sys.maxsize)
    iterator = get_row_iterator(s3_file_handle._raw_stream, table_spec)  # pylint:disable=protected-access

    records_synced = 0

    for row in iterator:
        custom_columns = {
            s3.SDC_SOURCE_BUCKET_COLUMN: bucket,
            s3.SDC_SOURCE_FILE_COLUMN: s3_path,

            # index zero, +1 for header row
            s3.SDC_SOURCE_LINENO_COLUMN: records_synced + 2
        }
        rec = {**row, **custom_columns}

        with Transformer() as transformer:
            to_write = transformer.transform(rec, stream['schema'], metadata.to_map(stream['metadata']))

        write_record(table_name, to_write)
        records_synced += 1

    return records_synced
    def sync(self):
        current_bookmark_str = singer.bookmarks.get_bookmark(
            state=self.state,
            tap_stream_id=self.tap_stream_id,
            key=self.bookmark_properties)

        if current_bookmark_str is not None:
            self.params.update({self.api_bookmark_param: current_bookmark_str})

        singer.bookmarks.write_bookmark(state=self.state,
                                        tap_stream_id=self.tap_stream_id,
                                        key=self.bookmark_properties,
                                        val=singer.utils.strftime(
                                            singer.utils.now()))

        with singer.metrics.job_timer(job_type=f"sync_{self.tap_stream_id}"):
            with singer.metrics.record_counter(
                    endpoint=self.tap_stream_id) as counter:
                project_params = self.config.get("streams",
                                                 {}).get("projects", {})
                project_params.update({"fields": "id"})
                for project in self._yield_records(entity='projects',
                                                   params=project_params):
                    # Reset the offset after each Project iteration.
                    self.params.update({"offset": 0})
                    for story in self._yield_records(
                            entity=f"projects/{project.get('id')}/stories",
                            params=self.params):
                        for endpoint in self.expand_endpoints:
                            records = [
                                record for record in self._yield_records(
                                    entity=
                                    f"projects/{project.get('id')}/stories/{story.get('id')}/{endpoint}",
                                    params={})
                            ]
                            story[endpoint] = records
                        with singer.Transformer() as transformer:
                            transformed_record = transformer.transform(
                                data=story, schema=self.schema)
                            singer.write_record(
                                stream_name=self.tap_stream_id,
                                time_extracted=singer.utils.now(),
                                record=transformed_record)
                            counter.increment()
def sync_contacts(STATE, stream):
    '''Sync contacts from the Autopilot API

    The API returns data in the following format

    {
        "contacts": [{...},{...}],
        "total_contacts": 400,
        "bookmark": "person_9EAF39E4-9AEC-4134-964A-D9D8D54162E7"
    }

    Params:
    STATE - State dictionary
    stream - Stream dictionary from the catalog
    '''
    tap_stream_id = stream['tap_stream_id']
    singer.write_schema(tap_stream_id,
                        stream['schema'],
                        ["contact_id"])

    start = utils.strptime_with_tz(get_start(STATE, tap_stream_id, "updated_at"))

    LOGGER.info("Only syncing contacts updated since " + utils.strftime(start))
    max_updated_at = start

    for row in gen_request(STATE, get_url(tap_stream_id)):
        updated_at = None
        if "updated_at" in row:
            updated_at = utils.strptime_with_tz(
                _transform_datetime( # pylint: disable=protected-access
                    row["updated_at"],
                    UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING))

        if not updated_at or updated_at >= start:
            singer.write_record(tap_stream_id, transform_contact(row))

        if updated_at and updated_at > max_updated_at:
            max_updated_at = updated_at

    STATE = singer.write_bookmark(STATE, tap_stream_id, "updated_at", utils.strftime(max_updated_at))
    singer.write_state(STATE)

    LOGGER.info("Completed Contacts Sync")
    return STATE
def get_rate_cards_rates(name,
                         schema,
                         state,
                         url,
                         start_date,
                         replication_key,
                         replication_method,
                         sync=False,
                         stream_rate_cards=None,
                         mdata=None):
    with metrics.record_counter(name) as counter:
        for rate_card in get_data('rate_cards',
                                  stream_rate_cards.schema.to_dict(),
                                  state,
                                  url,
                                  start_date,
                                  stream_rate_cards.replication_key,
                                  stream_rate_cards.replication_method,
                                  mdata=stream_rate_cards.metadata,
                                  sync=False,
                                  by_pass_date=True):
            rate_card_id = rate_card.get('id')
            response = request_get(url + f'rate_cards/{rate_card_id}/{name}')
            if response:
                bookmark = singer.get_bookmark(state, name, replication_key)
                if bookmark is None:
                    bookmark = start_date
                new_bookmark = bookmark

                records = response.json()
                extraction_time = singer.utils.now()
                for record in records:
                    with singer.Transformer() as transformer:
                        record['rate_card_id'] = rate_card_id
                        rec = transformer.transform(
                            record, schema, metadata=metadata.to_map(mdata))
                        new_bookmark = max(new_bookmark, rec[replication_key])
                        singer.write_record(name,
                                            rec,
                                            time_extracted=extraction_time)
                        counter.increment()
                    singer.write_bookmark(state, name, replication_key,
                                          new_bookmark)
    return state
Exemple #9
0
    def sync(self, client, **kwargs):
        startdate = kwargs['startdate']
        start, end = self.get_absolute_start_end_time(
            startdate, lookback=int(self.config.get('lookback')))

        max_bookmark_dttm = start

        with singer.metrics.record_counter(endpoint=self.name) as counter:
            while start != end:
                start_str = start.strftime(INVOICE_DATETIME_FMT)
                next_window_str = start_str
                results = client.get_paginated_data(self.api_method,
                                                    self.version,
                                                    self.endpoint,
                                                    data_key=self.data_key,
                                                    params=self.build_params(),
                                                    body=self.build_body(
                                                        start_str,
                                                        next_window_str))

                max_bookmark_value = strftime(max_bookmark_dttm)
                with Transformer(
                        integer_datetime_fmt="no-integer-datetime-parsing"
                ) as transformer:
                    for page in results:
                        for record in page.get(self.data_key):
                            transformed_record = self.transform(record)

                            record_timestamp = strptime_to_utc(
                                transformed_record[self.replication_key])
                            if record_timestamp > max_bookmark_dttm:
                                max_bookmark_value = strftime(record_timestamp)

                            singer.write_record(
                                stream_name=self.name,
                                record=transformer.transform(
                                    data=transformed_record,
                                    schema=self.stream_schema,
                                    metadata=self.stream_metadata),
                                time_extracted=singer.utils.now())
                            counter.increment()
                start = start + timedelta(days=DATE_WINDOW_SIZE)
                self.update_bookmark(self.name, max_bookmark_value)
            return counter.value
Exemple #10
0
def process_worksheet(gsheets_loader, sheet_name, worksheet, config):
    if worksheet is None:
        name_with_worksheet = sheet_name
    else:
        name_with_worksheet = sheet_name + "_" + worksheet

    if 'singular_table_name' in config and config['singular_table_name']:
        stream_name = underscore(parameterize(name_with_worksheet))
    else:
        stream_name = tableize(parameterize(name_with_worksheet))

    schema = gsheets_loader.get_schema(sheet_name, worksheet)
    records = gsheets_loader.get_data(sheet_name, worksheet)

    # additional data transformations
    column_mapping = None
    if 'underscore_columns' in config and config['underscore_columns']:
        column_mapping = {'id': 'id'}
        props = {}
        for k, v in schema['properties'].items():
            kt = underscore(parameterize(k))
            props[kt] = v
            column_mapping[k] = kt
        schema['properties'] = props

    schema['properties']['id'] = {'type': 'integer'}

    for i, record in enumerate(records, start=1):
        record['id'] = i

    # write stuff
    singer.write_schema(
        stream_name=stream_name,
        schema=schema,
        key_properties=['id']
    )

    for record in records:
        if column_mapping is not None:
            record_transformed = {column_mapping[k]: v for k, v in record.items()}
        else:
            record_transformed = record

        singer.write_record(stream_name, record_transformed)
Exemple #11
0
    def sync(self):
        with singer.metrics.job_timer(job_type=f"sync_{self.tap_stream_id}"):
            with singer.metrics.record_counter(
                    endpoint=self.tap_stream_id) as counter:
                client = self._get_auth_client()
                params = {
                    "start_date":
                    singer.utils.strftime(singer.utils.now() -
                                          timedelta(days=365),
                                          format_str='%Y-%m-01'),
                    "end_date":
                    singer.utils.strftime(singer.utils.now(),
                                          format_str="%Y-%m-%d"),
                    "accounting_method":
                    "Accrual",
                    "summarize_column_by":
                    "Month"
                }
                resp = self._get(auth_client=client,
                                 report_entity='ProfitAndLoss',
                                 params=params)
                rows = self._transform_columns_into_rows(resp)

                for i, row in enumerate(rows):
                    if row.get("StartDate") is None:
                        continue
                    input = []
                    data = self._get_row_data(resp=resp,
                                              column_enum=i + 1,
                                              input=input)
                    new_data = {}
                    for line in data:
                        new_data.update(line)
                    row["ReportData"] = new_data
                    row["SyncTimestampUtc"] = singer.utils.strftime(
                        singer.utils.now(), "%Y-%m-%dT%H:%M:%SZ")

                    with singer.Transformer() as transformer:
                        transformed_record = transformer.transform(
                            data=row, schema=self.schema)
                        singer.write_record(stream_name=self.stream,
                                            time_extracted=singer.utils.now(),
                                            record=transformed_record)
                        counter.increment()
Exemple #12
0
    def sync(self, mdata):

        schema = self.load_schema()
        bookmark = singer.get_bookmark(state=self.state,
                                       tap_stream_id=self.name,
                                       key=self.replication_key)
        if bookmark is None:
            bookmark = self.config.get('start_date')
        new_bookmark = bookmark

        # pylint: disable=unused-variable
        with singer.metrics.job_timer(job_type='list_users') as timer:
            with singer.metrics.record_counter(endpoint=self.name) as counter:
                users_list = self.client.get_users(limit=100)

                for page in users_list:
                    users = page.get('members')
                    transformed_users = transform_json(
                        stream=self.name,
                        data=users,
                        date_fields=self.date_fields)
                    for user in transformed_users:
                        with singer.Transformer(
                                integer_datetime_fmt="unix-seconds-integer-datetime-parsing") \
                                as transformer:
                            transformed_record = transformer.transform(
                                data=user,
                                schema=schema,
                                metadata=metadata.to_map(mdata))
                            new_bookmark = max(
                                new_bookmark,
                                transformed_record.get('updated'))
                            if transformed_record.get('updated') > bookmark:
                                if self.write_to_singer:
                                    singer.write_record(
                                        stream_name=self.name,
                                        time_extracted=singer.utils.now(),
                                        record=transformed_record)
                                    counter.increment()

        self.state = singer.write_bookmark(state=self.state,
                                           tap_stream_id=self.name,
                                           key=self.replication_key,
                                           val=new_bookmark)
Exemple #13
0
def get_all_events(schemas, repo_path, state, mdata):
    # Incremental sync off `created_at`
    # https://developer.github.com/v3/issues/events/#list-events-for-a-repository
    # 'https://api.github.com/repos/{}/issues/events?sort=created_at&direction=desc'.format(repo_path)

    bookmark_value = get_bookmark(state, repo_path, "events", "since")
    if bookmark_value:
        bookmark_time = singer.utils.strptime_to_utc(bookmark_value)
    else:
        bookmark_time = 0

    with metrics.record_counter('events') as counter:
        for response in authed_get_all_pages(
                'events',
                'https://api.github.com/repos/{}/events?sort=created_at&direction=desc'
                .format(repo_path)):
            events = response.json()
            extraction_time = singer.utils.now()
            for r in events:
                r['_sdc_repository'] = repo_path

                # skip records that haven't been updated since the last run
                # the GitHub API doesn't currently allow a ?since param for pulls
                # once we find the first piece of old data we can return, thanks to
                # the sorting
                updated_at = r.get('created_at') if r.get(
                    'updated_at') is None else r.get('updated_at')
                if bookmark_time and singer.utils.strptime_to_utc(
                        updated_at) < bookmark_time:
                    return state

                # transform and write release record
                with singer.Transformer() as transformer:
                    rec = transformer.transform(
                        r, schemas, metadata=metadata.to_map(mdata))
                singer.write_record('events',
                                    rec,
                                    time_extracted=extraction_time)
                singer.write_bookmark(
                    state, repo_path, 'events',
                    {'since': singer.utils.strftime(extraction_time)})
                counter.increment()

    return state
def sync_report_for_day(stream_name, stream_schema, sdk_client, start, field_list): # pylint: disable=too-many-locals
    report_downloader = sdk_client.GetReportDownloader(version=VERSION)
    customer_id = sdk_client.client_customer_id
    report = {
        'reportName': 'Seems this is required',
        'dateRangeType': 'CUSTOM_DATE',
        'reportType': stream_name,
        'downloadFormat': 'CSV',
        'selector': {
            'fields': field_list,
            'dateRange': {'min': start.strftime('%Y%m%d'),
                          'max': start.strftime('%Y%m%d')}}}

     # Fetch the report as a csv string
    with metrics.http_request_timer(stream_name):
        result = attempt_download_report(report_downloader, report)

    headers, values = parse_csv_string(result)
    with metrics.record_counter(stream_name) as counter:
        time_extracted = utils.now()

        for _, val in enumerate(values):
            obj = dict(zip(get_xml_attribute_headers(stream_schema, headers), val))
            obj['_sdc_customer_id'] = customer_id
            obj['_sdc_report_datetime'] = REPORT_RUN_DATETIME
            with Transformer(singer.UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
                bumble_bee.pre_hook = transform_pre_hook
                obj = bumble_bee.transform(obj, stream_schema)

            singer.write_record(stream_name, obj, time_extracted=time_extracted)
            counter.increment()

        if start > get_start_for_stream(sdk_client.client_customer_id, stream_name):
            LOGGER.info('updating bookmark: %s > %s', start, get_start_for_stream(sdk_client.client_customer_id, stream_name))
            bookmarks.write_bookmark(STATE,
                                     state_key_name(sdk_client.client_customer_id, stream_name),
                                     'date',
                                     start.strftime(utils.DATETIME_FMT))
            singer.write_state(STATE)
        else:
            LOGGER.info('not updating bookmark: %s <= %s', start, get_start_for_stream(sdk_client.client_customer_id, stream_name))

        LOGGER.info("Done syncing %s records for the %s report for customer_id %s on %s",
                    counter.value, stream_name, customer_id, start)
Exemple #15
0
 def paginate(self, offset, count, ext_time, path, stream_id):
     if len(self.state) < 14:
         start_date = singer.utils.strptime_with_tz(
             self.config['start_date'])
     else:
         first_time = False
         start_date = singer.utils.strptime_with_tz(self.state[stream_id])
     start_date = start_date.strftime('%m/%d/%YT%H:%M:%S')
     ext_time = start_date
     while (int(count) > int(offset)
            and (int(count) - int(offset)) >= -100):
         url = "https://api.merchantos.com/API/Account/" + str(
             self.config['customer_ids']) + "/" + str(
                 stream_id) + ".json?offset="
         relation = self.create_relation()
         page = self.client.request(stream_id, "GET",
                                    (url + str(offset) + relation))
         info = page['@attributes']
         count = info['count']
         if int(count) <= 100:
             offset = 300
             data = page[str(stream_id)]
         else:
             offset = int(info['offset']) + 100
             data = page[str(stream_id)]
         for key in data:
             if type(key) == str:
                 ext_time = data['timeStamp']
             elif type(key) == str:
                 if key['timeStamp'] >= ext_time:
                     ext_time = key['timeStamp']
                 else:
                     pass
                 singer.write_record(stream_id, data)
                 with metrics.record_counter(stream_id) as counter:
                     counter.increment(len(page))
                 continue
             else:
                 pass
             singer.write_record(stream_id, key)
             with metrics.record_counter(stream_id) as counter:
                 counter.increment(len(page))
         path.append(ext_time)
         self.update_start_date_bookmark(path, str(stream_id))
Exemple #16
0
    def sync(self, mdata):

        schema = self.load_schema()

        with singer.metrics.job_timer(job_type='list_conversations') as timer:
            with singer.metrics.record_counter(endpoint=self.name) as counter:
                for channel in self.channels():
                    with singer.Transformer(
                            integer_datetime_fmt=
                            "unix-seconds-integer-datetime-parsing"
                    ) as transformer:
                        transformed_record = transformer.transform(
                            data=channel,
                            schema=schema,
                            metadata=metadata.to_map(mdata))
                        singer.write_record(stream_name=self.name,
                                            time_extracted=singer.utils.now(),
                                            record=transformed_record)
                        counter.increment()
Exemple #17
0
    async def sync_stats(self, schema, period: pendulum.period = None):
        """Output the stats in the period."""
        stream = "stats"
        loop = asyncio.get_event_loop()

        singer.write_schema(stream, schema, ["service_id", "start_time"])
        bookmark = get_bookmark(self.state, stream, "from")
        if bookmark is not None:
            if "UTC" in bookmark:
                bookmark = datetime.datetime.strptime(
                    bookmark, '%Y-%m-%d %H:%M:%S UTC').isoformat()
            start_date = pendulum.parse(bookmark).int_timestamp
        else:
            start_date = pendulum.parse(
                self._config['start_date']).int_timestamp
        end_date = pendulum.now().int_timestamp
        result = await loop.run_in_executor(None, self.client.stats,
                                            start_date, end_date)
        LOGGER.info("stats results: %s", result)
        if result:
            for n in result['data']:
                service_result = await loop.run_in_executor(
                    None, self.client.service, n)
                for i in result['data'][n]:
                    i['service_name'] = service_result['name']
                    i['service_versions'] = json.dumps(
                        service_result['versions'])
                    i['service_customer_id'] = service_result['customer_id']
                    i['service_publish_key'] = service_result['publish_key']
                    i['service_comment'] = service_result['comment']
                    i['service_deleted_at'] = service_result['deleted_at']
                    i['service_updated_at'] = service_result['updated_at']
                    i['service_created_at'] = service_result['created_at']
                    singer.write_record(stream, i)
            try:
                end_temp = datetime.datetime.strptime(result['meta']["to"],
                                                      '%Y-%m-%d %H:%M:%S UTC')
                end = end_temp.isoformat()
                self.state = write_bookmark(self.state, stream, "from", end)
            except:
                # print("what fails is:" + result['meta']["to"])
                sys.stderr.write("what fails is:" + result['meta']["to"] +
                                 "\n")
Exemple #18
0
def sync_deal_pipelines(STATE, ctx):
    catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
    mdata = metadata.to_map(catalog.get("metadata"))
    schema = load_schema("deal_pipelines")
    singer.write_schema("deal_pipelines", schema, ["pipelineId"],
                        catalog.get("stream_alias"))
    LOGGER.info("sync_deal_pipelines")
    data = request(get_url("deal_pipelines")).json()
    with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
        for row in data:
            record = bumble_bee.transform(row, schema, mdata)
            singer.write_record(
                "deal_pipelines",
                record,
                catalog.get("stream_alias"),
                time_extracted=utils.now(),
            )
    singer.write_state(STATE)
    return STATE
Exemple #19
0
def sync_contact_lists(STATE, ctx):
    catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
    mdata = metadata.to_map(catalog.get("metadata"))
    schema = load_schema("contact_lists")
    bookmark_key = "updatedAt"
    singer.write_schema("contact_lists", schema, ["listId"], [bookmark_key],
                        catalog.get("stream_alias"))

    start = get_start(STATE, "contact_lists", bookmark_key)
    max_bk_value = start

    LOGGER.info("sync_contact_lists from %s", start)

    url = get_url("contact_lists")
    params = {"count": 250}
    with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
        for row in gen_request(
                STATE,
                "contact_lists",
                url,
                params,
                "lists",
                "has-more",
            ["offset"],
            ["offset"],
        ):
            record = bumble_bee.transform(row, schema, mdata)

            if record[bookmark_key] >= start:
                singer.write_record(
                    "contact_lists",
                    record,
                    catalog.get("stream_alias"),
                    time_extracted=utils.now(),
                )
            if record[bookmark_key] >= max_bk_value:
                max_bk_value = record[bookmark_key]

    STATE = singer.write_bookmark(STATE, "contact_lists", bookmark_key,
                                  max_bk_value)
    singer.write_state(STATE)

    return STATE
Exemple #20
0
def sync_time_filtered(entity):
    bookmark_property = 'updated_at'

    singer.write_schema(entity,
                        utils.load_schema(entity), ["id"],
                        bookmark_properties=[bookmark_property])
    start = get_start(entity)

    logger.info("Syncing {} from {}".format(entity, start))
    for row in gen_request(get_url(entity)):
        if row[bookmark_property] >= start:
            if 'custom_fields' in row:
                row['custom_fields'] = transform_dict(row['custom_fields'],
                                                      force_str=True)

            utils.update_state(STATE, entity, row[bookmark_property])
            singer.write_record(entity, row, time_extracted=singer.utils.now())

    singer.write_state(STATE)
Exemple #21
0
 async def sync_custom_usage(self, schema):
     """Get hourly usage for custom metric."""
     stream = "custom_usage"
     loop = asyncio.get_event_loop()
     singer.write_schema(stream, schema, ["hour", "account"])
     custom_usage = await loop.run_in_executor(None,
                                               self.client.hourly_request,
                                               self.state, self.config,
                                               f"timeseries", stream)
     if custom_usage:
         for c in custom_usage['usage']:
             c['account'] = self.config['account']
             singer.write_record(stream, c)
         if custom_usage['usage'] is not None and len(
                 custom_usage['usage']) > 0:
             self.state = write_bookmark(
                 self.state, stream, "since",
                 custom_usage['usage'][len(custom_usage['usage']) -
                                       1]['hour'])
Exemple #22
0
    def sync(self):
        most_recent_date = self.params["last_seen_at"]
        record_metadata = singer.metadata.to_map(self.metadata)

        with singer.metrics.job_timer(job_type=f"list_{self.tap_stream_id}"), \
          singer.metrics.record_counter(endpoint=self.tap_stream_id) as counter, \
          singer.Transformer() as transformer:
            for page in self._list_resource(url_suffix="/customers/all", params=self.params):
                for record in page.get(self.tap_stream_id):
                  transformed_record = transformer.transform(data=record, schema=self.schema, metadata=record_metadata)
                  singer.write_record(stream_name=self.stream, time_extracted=singer.utils.now(), record=transformed_record)
                  counter.increment()
                  if transformed_record["last_seen_at"] > most_recent_date:
                    most_recent_date = transformed_record["last_seen_at"]

                singer.bookmarks.write_bookmark(state=self.state, tap_stream_id=self.tap_stream_id, key="last_seen_at", val=most_recent_date)
                singer.write_state(self.state)

        singer.bookmarks.write_bookmark(state=self.state, tap_stream_id=self.tap_stream_id, key="last_seen_at", val=most_recent_date)
Exemple #23
0
def do_sync(account, annotated_schemas, state):

    for stream in get_streams_to_sync(account, annotated_schemas, state):
        LOGGER.info('Syncing %s, fields %s', stream.name, stream.fields())
        schema = load_schema(stream)
        singer.write_schema(stream.name, schema, stream.key_properties)

        with singer.stats.Counter(source=stream.name) as stats:

            for message in stream:
                if 'record' in message:
                    stats.add(record_count=1)
                    record = singer.transform.transform(
                        message['record'], schema)
                    singer.write_record(stream.name, record)
                elif 'state' in message:
                    singer.write_state(message['state'])
                else:
                    raise Exception('Unrecognized message {}'.format(message))
Exemple #24
0
def getaccount():
    accounts = requests.get('https://api.awin.com/accounts?accessToken=' +
                            AUTH['accessToken'],
                            headers={"user_agent": AUTH['user_agent']})
    if accounts.status_code == 200:
        singer.write_schema("Accounts", ACCOUNT_SCHEMA, ["accountId"])
        for account in accounts.json()['accounts']:
            if account['accountType'] == 'advertiser':
                ADVERTISERS.append(account['accountId'])
            if account['accountType'] == 'publisher':
                PUBLISHERS.append(account['publisher'])
            account["startDate"] = str(
                parse(STATE['last_fetched']) + timedelta(days=1))
            account["endDate"] = str(parse(STATE['last_fetched']) + \
                                           timedelta(days=AUTH['increment']))
            singer.write_record("Accounts", account)
    else:
        LOGGER.error(accounts.json()['error'])
        sys.exit(1)
Exemple #25
0
def getcampaign():
    campaigns = CLIENT.service.getCampaigns()
    singer.write_schema('campaigns', CAMPAIGN_SCHEMA, ['campaignID'])
    campaigns_list = []
    for acmp in campaigns:
        for cmp in acmp[1:]:
            camp = dict(cmp[0])
            if camp['campaignBid'] != None:
                camp['biddingStrategy'] = camp['campaignBid'][
                    'biddingStrategy']
                if camp['campaignBid']['cpcBid'] != None:
                    camp['cpc'] = camp['campaignBid']['cpcBid']['cpc']
                if camp['campaignBid']['cpaBid'] != None:
                    camp['cpa'] = camp['campaignBid']['cpaBid']['cpa']
            camp['categoryBids'] = str(camp['categoryBids'])
            camp.pop('campaignBid')
            campaigns_list.append(camp['campaignID'])
            singer.write_record('campaigns', camp)
    return campaigns_list
Exemple #26
0
 def sync(self, state, stream_schema, stream_metadata, config, transformer):
     start_time = singer.get_bookmark(state, self.tap_stream_id,
                                      self.replication_key,
                                      config['start_date'])
     for window_start, window_end in get_date_windows(start_time):
         LOGGER.info("Searching for customers from %s to %s", window_start,
                     window_end)
         for page, _ in self.client.get_customers(window_start, window_end):
             for record in page:
                 transformed_record = transformer.transform(
                     record, stream_schema, stream_metadata)
                 singer.write_record(
                     self.tap_stream_id,
                     transformed_record,
                 )
         state = singer.write_bookmark(state, self.tap_stream_id,
                                       self.replication_key, window_end)
         singer.write_state(state)
     return state
Exemple #27
0
    async def sync_trace_search(self, schema):
        stream = "trace_search"
        loop = asyncio.get_event_loop()
        singer.write_schema(stream, schema, ["hour", "account"])
        trace_search = await loop.run_in_executor(None,
                                                  self.client.hourly_request,
                                                  self.state, self.config,
                                                  f"traces", stream)
        if trace_search:
            for trace in trace_search['usage']:
                trace['account'] = self.config['account']
                singer.write_record(stream, trace)

            if trace_search['usage'] is not None and len(
                    trace_search['usage']) > 0:
                self.state = write_bookmark(
                    self.state, stream, "since",
                    trace_search['usage'][len(trace_search['usage']) -
                                          1]['hour'])
Exemple #28
0
def _sync_contact_vids(catalog, vids, schema, bumble_bee):
    if len(vids) == 0:
        return

    data = request(get_url("contacts_detail"),
                   params={
                       'vid': vids,
                       'showListMemberships': True,
                       "formSubmissionMode": "all"
                   }).json()
    time_extracted = utils.now()
    mdata = metadata.to_map(catalog.get('metadata'))

    for record in data.values():
        record = bumble_bee.transform(record, schema, mdata)
        singer.write_record("contacts",
                            record,
                            catalog.get('stream_alias'),
                            time_extracted=time_extracted)
Exemple #29
0
def getaggreportcreative():
    singer.write_schema("AggReport", AGGREGATED_CREATIVE_SCHEMA,
                        ["advertiserId", "publisherId", "region"])
    for advertiser in ADVERTISERS:
        reportdataset = requests.get(
            'https://api.awin.com/advertisers/' + str(advertiser) +
            '/reports/creative',
            params=STATE['aggregatedByCreative'],
            headers={"User-Agent": AUTH['user_agent']})
        if reportdataset.status_code == 200:
            for data in reportdataset.json():
                data["startDate"] = str(
                    parse(STATE['last_fetched']) + timedelta(days=1))
                data["endDate"] = str(parse(STATE['last_fetched']) + \
                                               timedelta(days=AUTH['increment']))
                singer.write_record("AggReport", data)
        else:
            LOGGER.error(
                "Error" + str(reportdataset.content).replace('\n', ' ') +
                " while extracting data in report creative for advertiser: " +
                str(advertiser))
            sys.exit(1)
    time.sleep(5)
    for publisher in PUBLISHERS:
        reportdataset = requests.get(
            'https://api.awin.com/publishers/' + str(publisher) +
            '/reports/creative',
            params=STATE['aggregatedByCreative'],
            headers={"User-Agent": AUTH['user_agent']})
        if reportdataset.status_code == 200:
            for data in reportdataset.json():
                data["startDate"] = str(
                    parse(STATE['last_fetched']) + timedelta(days=1))
                data["endDate"] = str(parse(STATE['last_fetched']) + \
                                               timedelta(days=AUTH['increment']))
                singer.write_record("AggReport", data)
        else:
            LOGGER.error(
                "Error " + str(reportdataset.content).replace('\n', ' ') +
                " while extracting data in report creative for publisher: " +
                str(publisher))
            sys.exit(1)
    time.sleep(5)
Exemple #30
0
def downloadcategoryreport():
    filters = STATE
    cate = getcategory()
    if not cate:
        LOGGER.info('0 rows for category report')
    else:
        filters['reportSelector'] = {'CategoryIDs': cate}
        filters['reportType'] = 'Category'
        jobid = CLIENT.service.scheduleReportJob(filters)
        while True:
            if CLIENT.service.getJobStatus(jobid) == 'Completed':
                tab = ET.parse(urlopen(CLIENT.service.getReportDownloadUrl(jobid))) \
                        .getroot().getchildren()[0]
                break
        rows = [i for i in tab if i.tag == 'rows'][0]
        singer.write_schema('categoriemetrics', CATEGORY_METRICS_SCHEMA,
                            ['categoryID', 'dateTime'])
        for row in rows:
            singer.write_record('campaignsmetrics', row.attrib)
Exemple #31
0
def process_records(stream, mdata, max_modified, records, filter_field, fks):
    schema = stream.schema.to_dict()
    with metrics.record_counter(stream.tap_stream_id) as counter:
        for record in records:
            record_flat = {
                'id': record['id']
            }
            for prop, value in record['attributes'].items():
                if prop == 'id':
                    raise Exception('Error flattening Outeach record - conflict with `id` key')
                record_flat[prop] = value

            if 'relationships' in record:
                for prop, value in record['relationships'].items():
                    if 'data' not in value and 'links' not in value:
                        raise Exception('Only `data` or `links` expected in relationships')

                    fk_field_name = '{}Id'.format(prop)

                    if 'data' in value and fk_field_name in fks:
                        data_value = value['data']
                        if data_value is not None and 'id' not in data_value:
                            raise Exception('null or `id` field expected for `data` relationship')

                        if fk_field_name in record_flat:
                            raise Exception(
                                '`{}` exists as both an attribute and generated relationship name'.format(fk_field_name))

                        if data_value == None:
                            record_flat[fk_field_name] = None
                        else:
                            record_flat[fk_field_name] = data_value['id']

            if filter_field in record_flat and record_flat[filter_field] > max_modified:
                max_modified = record_flat[filter_field]

            with Transformer() as transformer:
                record_typed = transformer.transform(record_flat,
                                                     schema,
                                                     mdata)
            singer.write_record(stream.tap_stream_id, record_typed)
            counter.increment()
        return max_modified