Python get_bookmark Examples, singer.bookmarks.get_bookmark Python Examples

Example #1

0

Show file

def get_all_stargazers(schema, config, state, mdata):
    '''
    https://developer.github.com/v3/activity/starring/#list-stargazers
    '''
    repo_path = config['repository']
    if bookmarks.get_bookmark(state, "stargazers", 'since'):
        query_string = '&since={}'.format(
            bookmarks.get_bookmark(state, "stargazers", 'since'))
    else:
        query_string = ''

    stargazers_headers = {'Accept': 'application/vnd.github.v3.star+json'}
    last_stargazer_time = None
    with metrics.record_counter('stargazers') as counter:
        for response in authed_get_all_pages(
                'stargazers',
                'https://api.github.com/repos/{}/stargazers?sort=updated&direction=asc{}'
                .format(repo_path, query_string), stargazers_headers):
            stargazers = response.json()
            extraction_time = singer.utils.now()
            for stargazer in stargazers:
                with singer.Transformer() as transformer:
                    rec = transformer.transform(
                        stargazer, schema, metadata=metadata.to_map(mdata))
                rec['user_id'] = rec['user']['id']
                singer.write_record('stargazers',
                                    rec,
                                    time_extracted=extraction_time)
                singer.write_bookmark(state, 'stargazers', 'since',
                                      singer.utils.strftime(extraction_time))
                counter.increment()

    return state

Example #2

0

Show file

def get_all_commits(schema, config, state, mdata):
    '''
    https://developer.github.com/v3/repos/commits/#list-commits-on-a-repository
    '''
    repo_path = config['repository']
    if bookmarks.get_bookmark(state, "commits", 'since'):
        query_string = '?since={}'.format(
            bookmarks.get_bookmark(state, "commits", 'since'))
    else:
        query_string = ''

    latest_commit_time = None

    with metrics.record_counter('commits') as counter:
        for response in authed_get_all_pages(
                'commits', 'https://api.github.com/repos/{}/commits{}'.format(
                    repo_path, query_string)):
            commits = response.json()
            extraction_time = singer.utils.now()
            for commit in commits:
                with singer.Transformer() as transformer:
                    rec = transformer.transform(
                        commit, schema, metadata=metadata.to_map(mdata))
                singer.write_record('commits',
                                    rec,
                                    time_extracted=extraction_time)
                singer.write_bookmark(state, 'commits', 'since',
                                      singer.utils.strftime(extraction_time))
                counter.increment()

    return state

Example #3

0

Show file

def get_all_issues(schema, config, state, mdata):
    '''
    https://developer.github.com/v3/issues/#list-issues-for-a-repository
    '''
    repo_path = config['repository']

    if bookmarks.get_bookmark(state, "issues", 'since'):
        query_string = '&since={}'.format(
            bookmarks.get_bookmark(state, "issues", 'since'))
    else:
        query_string = ''

    last_issue_time = None
    with metrics.record_counter('issues') as counter:
        for response in authed_get_all_pages(
                'issues',
                'https://api.github.com/repos/{}/issues?sort=updated&direction=asc{}'
                .format(repo_path, query_string)):
            issues = response.json()
            extraction_time = singer.utils.now()
            for issue in issues:
                with singer.Transformer() as transformer:
                    rec = transformer.transform(
                        issue, schema, metadata=metadata.to_map(mdata))
                singer.write_record('issues',
                                    rec,
                                    time_extracted=extraction_time)
                singer.write_bookmark(state, 'issues', 'since',
                                      singer.utils.strftime(extraction_time))
                counter.increment()
    return state

Example #4

0

Show file

File: __init__.py Project: prakashpp/tap-fulfil

def get_sync_domain(state, stream, model_name):
    """
    Return a domain (a filter expression) that can be used to filter
    records.

    If the model's replication methd
    """
    domain = []
    for entry in stream.metadata:
        # stream metadata will have empty breadcrumb
        if not entry['breadcrumb'] and \
                entry['metadata'].get('replication-method', None) == 'FULL_TABLE':
            return domain

    last_updated_at = get_bookmark(state, stream.tap_stream_id,
                                   'last_updated_at')
    if last_updated_at:
        last_updated_at = utils.strptime(last_updated_at)
        domain.extend([
            'OR',
            [('write_date', '>', last_updated_at)],
            [('create_date', '>', last_updated_at)],
        ])
    last_record_id = get_bookmark(state, stream.tap_stream_id,
                                  'last_record_id')
    if last_record_id:
        domain.append(('id', '>', last_record_id))
    return domain

Example #5

0

Show file

def get_or_create_export_for_leads(client, state, stream, export_start, config):
    export_id = bookmarks.get_bookmark(state, "leads", "export_id")
    # check if export is still valid
    if export_id is not None and not client.export_available("leads", export_id):
        singer.log_info("Export %s no longer available.", export_id)
        export_id = None

    if export_id is None:
        # Corona mode is required to query by "updatedAt", otherwise a full
        # sync is required using "createdAt".
        query_field = "updatedAt" if client.use_corona else "createdAt"
        max_export_days = int(config.get('max_export_days',
                                         MAX_EXPORT_DAYS))
        export_end = get_export_end(export_start,
                                    end_days=max_export_days)
        query = {query_field: {"startAt": export_start.isoformat(),
                               "endAt": export_end.isoformat()}}

        # Create the new export and store the id and end date in state.
        # Does not start the export (must POST to the "enqueue" endpoint).
        fields = []
        for entry in stream['metadata']:
            if len(entry['breadcrumb']) > 0 and (entry['metadata'].get('selected') or entry['metadata'].get('inclusion') == 'automatic'):
                fields.append(entry['breadcrumb'][-1])

        export_id = client.create_export("leads", fields, query)
        state = update_state_with_export_info(
            state, stream, export_id=export_id, export_end=export_end.isoformat())
    else:
        export_end = pendulum.parse(bookmarks.get_bookmark(state, "leads", "export_end"))

    return export_id, export_end

Example #6

0

Show file

File: sync.py Project: aiguofer/tap-marketo

def get_or_create_export_for_activities(client, state, stream, export_start,
                                        config):
    export_id = bookmarks.get_bookmark(state, stream["tap_stream_id"],
                                       "export_id")
    if export_id is not None and not client.export_available(
            "activities", export_id):
        singer.log_info("Export %s no longer available.", export_id)
        export_id = None

    if export_id is None:
        # The activity id is in the top-most breadcrumb of the metatdata
        # Activity ids correspond to activity type id in Marketo.
        # We need the activity type id to build the query.
        activity_metadata = metadata.to_map(stream["metadata"])
        activity_type_id = metadata.get(activity_metadata, (),
                                        'marketo.activity-id')

        # Activities must be queried by `createdAt` even though
        # that is not a real field. `createdAt` proxies `activityDate`.
        # The activity type id must also be included in the query. The
        # largest date range that can be used for activities is 30 days.
        max_export_days = int(config.get('max_export_days', MAX_EXPORT_DAYS))
        export_end = get_export_end(export_start, end_days=max_export_days)
        query = {
            "createdAt": {
                "startAt": export_start.isoformat(),
                "endAt": export_end.isoformat()
            },
            "activityTypeIds": [activity_type_id]
        }

        # Create the new export and store the id and end date in state.
        # Does not start the export (must POST to the "enqueue" endpoint).
        try:
            export_id = client.create_export("activities", ACTIVITY_FIELDS,
                                             query)
        except ApiQuotaExceeded as e:
            # The main reason we wrap the ApiQuotaExceeded exception in a
            # new one is to be able to tell the customer what their
            # configured max_export_days is.
            raise ApiQuotaExceeded(
                ("You may wish to consider changing the "
                 "`max_export_days` config value to a lower number if "
                 "you're unable to sync a single {} day window within "
                 "your current API quota.").format(max_export_days)) from e
        state = update_state_with_export_info(
            state,
            stream,
            export_id=export_id,
            export_end=export_end.isoformat())
    else:
        export_end = pendulum.parse(
            bookmarks.get_bookmark(state, stream["tap_stream_id"],
                                   "export_end"))

    return export_id, export_end

Example #7

0

Show file

    def test_empty_state(self):
        empty_state = {}

        # Case with no value to fall back on
        self.assertIsNone(
            bookmarks.get_bookmark(empty_state, 'some_stream', 'my_key'))

        # Case with a given default
        self.assertEqual(
            bookmarks.get_bookmark(empty_state, 'some_stream', 'my_key',
                                   'default_value'), 'default_value')

Example #8

0

Show file

def sync_paginated(client, state, stream):
    # http://developers.marketo.com/rest-api/endpoint-reference/lead-database-endpoint-reference/#!/Campaigns/getCampaignsUsingGET
    # http://developers.marketo.com/rest-api/endpoint-reference/lead-database-endpoint-reference/#!/Static_Lists/getListsUsingGET
    #
    # Campaigns and Static Lists are paginated with a max return of 300
    # items per page. There are no filters that can be used to only
    # return updated records.
    replication_key = determine_replication_key(stream['tap_stream_id'])

    singer.write_schema(stream["tap_stream_id"], stream["schema"], stream["key_properties"], bookmark_properties=[replication_key])
    start_date = bookmarks.get_bookmark(state, stream["tap_stream_id"], replication_key)
    params = {"batchSize": 300}
    endpoint = "rest/v1/{}.json".format(stream["tap_stream_id"])

    # Paginated requests use paging tokens for retrieving the next page
    # of results. These tokens are stored in the state for resuming
    # syncs. If a paging token exists in state, use it.
    next_page_token = bookmarks.get_bookmark(state, stream["tap_stream_id"], "next_page_token")
    if next_page_token:
        params["nextPageToken"] = next_page_token

    # Keep querying pages of data until no next page token.
    record_count = 0
    job_started = pendulum.utcnow().isoformat()
    while True:
        data = client.request("GET", endpoint, endpoint_name=stream["tap_stream_id"], params=params)

        time_extracted = utils.now()

        # Each row just needs the values formatted. If the record is
        # newer than the original start date, stream the record. Finally,
        # update the bookmark if newer than the existing bookmark.
        for row in data["result"]:
            record = format_values(stream, row)
            if record[replication_key] >= start_date:
                record_count += 1

                singer.write_record(stream["tap_stream_id"], record, time_extracted=time_extracted)

        # No next page, results are exhausted.
        if "nextPageToken" not in data:
            break

        # Store the next page token in state and continue.
        params["nextPageToken"] = data["nextPageToken"]
        state = bookmarks.write_bookmark(state, stream["tap_stream_id"], "next_page_token", data["nextPageToken"])
        singer.write_state(state)

    # Once all results are exhausted, unset the next page token bookmark
    # so the subsequent sync starts from the beginning.
    state = bookmarks.write_bookmark(state, stream["tap_stream_id"], "next_page_token", None)
    state = bookmarks.write_bookmark(state, stream["tap_stream_id"], replication_key, job_started)
    singer.write_state(state)
    return state, record_count

Example #9

0

Show file

File: sync.py Project: aiguofer/tap-marketo

def sync_leads(client, state, stream):
    # http://developers.marketo.com/rest-api/bulk-extract/bulk-lead-extract/
    replication_key = determine_replication_key(stream["tap_stream_id"])

    singer.write_schema("leads",
                        stream["schema"],
                        stream["key_properties"],
                        bookmark_properties=[replication_key])
    initial_bookmark = pendulum.parse(
        bookmarks.get_bookmark(state, "leads", replication_key))
    export_start = pendulum.parse(
        bookmarks.get_bookmark(state, "leads", replication_key))
    if client.use_corona:
        export_start = export_start.subtract(days=ATTRIBUTION_WINDOW_DAYS)

    job_started = pendulum.utcnow()
    record_count = 0
    max_bookmark = initial_bookmark
    while export_start < job_started:
        export_id, export_end = get_or_create_export_for_leads(
            client, state, stream, export_start)
        state = wait_for_export(client, state, stream, export_id)
        for row in stream_rows(client, "leads", export_id):
            time_extracted = utils.now()

            record = format_values(stream, row)
            record_bookmark = pendulum.parse(record[replication_key])

            if client.use_corona:
                max_bookmark = export_end

                singer.write_record("leads",
                                    record,
                                    time_extracted=time_extracted)
                record_count += 1
            elif record_bookmark >= initial_bookmark:
                max_bookmark = max(max_bookmark, record_bookmark)

                singer.write_record("leads",
                                    record,
                                    time_extracted=time_extracted)
                record_count += 1

        # Now that one of the exports is finished, update the bookmark
        state = update_state_with_export_info(
            state, stream, bookmark=max_bookmark.isoformat())
        export_start = export_end

    return state, record_count

Example #10

0

Show file

File: fetch.py Project: mrtns/tap-insightly

def handle_resource(resource, schemas, id_field, state, mdata):
    extraction_time = singer.utils.now()
    endpoint = get_endpoint(resource)
    bookmark = get_bookmark(state, resource, "since")
    qs = {} if resource not in CAN_FILTER else {"updated_after_utc": bookmark}

    with metrics.record_counter(resource) as counter:
        for page in get_all_pages(resource, endpoint, qs):
            for row in page:
                # Handle custom fields if present
                if resource in HAS_CUSTOM_FIELDS:
                    row = transform_custom_field(id_field, row)

                write_record(row, resource, schemas[resource], mdata,
                             extraction_time)
                counter.increment()

                if "links" in schemas:
                    handle_links(
                        resource,
                        row[id_field],
                        schemas["links"],
                        mdata,
                        extraction_time,
                    )
    return write_bookmark(state, resource, extraction_time)

Example #11

0

Show file

File: streams.py Project: jnschaeffer/tap-gbfs

    def sync(self):
        bookmark = bookmarks.get_bookmark(
            self.state,
            self.stream_name,
            self.replication_keys[0],
            MIN_TIME,
        )

        start_dt = utils.strptime_to_utc(bookmark)
        resp = self.client.request_feed(self.stream_name)

        max_dt = start_dt
        for station in resp.get("data").get("stations"):
            last_reported = station.get("last_reported")
            last_reported_dt = datetime.fromtimestamp(last_reported).replace(
                tzinfo=pytz.UTC)
            if last_reported_dt > max_dt:
                max_dt = last_reported_dt
            if last_reported_dt > start_dt:
                yield station

        bookmarks.write_bookmark(
            self.state,
            self.stream_name,
            self.replication_keys[0],
            utils.strftime(max_dt),
        )
        messages.write_state(self.state)

Example #12

0

Show file

    def hourly_request(
            self, state, config, query,
            stream):  # manage start date condition if more than 2 months
        try:
            bookmark = get_bookmark(state, stream, "since")
            if bookmark:
                start_date = bookmark
            else:
                start_date = config['start_hour']

            # date can't be more than two months otherwise datadog fail
            min_date = (datetime.today() + relativedelta(months=-2) +
                        relativedelta(days=1)).strftime('%Y-%m-%dT%H')
            if start_date < min_date:
                start_date = min_date
            # also being too current get some partial data for the days, so we always have to go back a little bit to refresh
            max_date = (datetime.today() +
                        relativedelta(days=-5)).strftime('%Y-%m-%dT%H')
            if start_date > max_date:
                start_date = max_date

            if start_date != datetime.utcnow().strftime('%Y-%m-%dT%H'):
                data = {
                    'start_hr': start_date,
                    'end_hr': datetime.utcnow().strftime('%Y-%m-%dT%H')
                }
                traces = self._get(query, data=data)
                return traces.json()
            else:
                return None
        except Exception as error:
            LOGGER.error(error)
            return None

Example #13

0

Show file

File: sync.py Project: aiguofer/tap-marketo

def sync_activities(client, state, stream, config):
    # http://developers.marketo.com/rest-api/bulk-extract/bulk-activity-extract/
    replication_key = determine_replication_key(stream['tap_stream_id'])
    singer.write_schema(stream["tap_stream_id"],
                        stream["schema"],
                        stream["key_properties"],
                        bookmark_properties=[replication_key])
    export_start = pendulum.parse(
        bookmarks.get_bookmark(state, stream["tap_stream_id"],
                               replication_key))
    job_started = pendulum.utcnow()
    record_count = 0
    while export_start < job_started:
        export_id, export_end = get_or_create_export_for_activities(
            client, state, stream, export_start, config)
        state = wait_for_export(client, state, stream, export_id)
        for row in stream_rows(client, "activities", export_id):
            time_extracted = utils.now()

            row = flatten_activity(row, stream)
            record = format_values(stream, row)

            singer.write_record(stream["tap_stream_id"],
                                record,
                                time_extracted=time_extracted)
            record_count += 1

        state = update_state_with_export_info(
            state, stream, bookmark=export_start.isoformat())
        export_start = export_end

    return state, record_count

Example #14

0

Show file

File: sync.py Project: aiguofer/tap-marketo

def sync_programs(client, state, stream):
    # http://developers.marketo.com/rest-api/assets/programs/#by_date_range
    #
    # Programs are queryable via their updatedAt time but require and
    # end date as well. As there is no max time range for the query,
    # query from the bookmark value until current.
    #
    # The Programs endpoint uses offsets with a return limit of 200
    # per page. If requesting past the final program, an error message
    # is returned to indicate that the endpoint has been fully synced.
    replication_key = determine_replication_key(stream['tap_stream_id'])

    singer.write_schema("programs",
                        stream["schema"],
                        stream["key_properties"],
                        bookmark_properties=[replication_key])
    start_date = bookmarks.get_bookmark(state, "programs", replication_key)
    end_date = pendulum.utcnow().isoformat()
    params = {
        "maxReturn": 200,
        "offset": 0,
        "earliestUpdatedAt": start_date,
        "latestUpdatedAt": end_date,
    }
    endpoint = "rest/asset/v1/programs.json"

    record_count = 0
    while True:
        data = client.request("GET",
                              endpoint,
                              endpoint_name="programs",
                              params=params)

        # If the no asset message is in the warnings, we have exhausted
        # the search results and can end the sync.
        if "warnings" in data and NO_ASSET_MSG in data["warnings"]:
            break

        time_extracted = utils.now()

        # Each row just needs the values formatted. If the record is
        # newer than the original start date, stream the record.
        for row in data["result"]:
            record = format_values(stream, row)
            if record[replication_key] >= start_date:
                record_count += 1

                singer.write_record("programs",
                                    record,
                                    time_extracted=time_extracted)

        # Increment the offset by the return limit for the next query.
        params["offset"] += params["maxReturn"]

    # Now that we've finished every page we can update the bookmark to
    # the end of the query.
    state = bookmarks.write_bookmark(state, "programs", replication_key,
                                     end_date)
    singer.write_state(state)
    return state, record_count

Example #15

0

Show file

def validate_state(config, catalog, state):
    for stream in catalog["streams"]:
        for mdata in stream['metadata']:
            if mdata['breadcrumb'] == [] and mdata['metadata'].get('selected') != True:
                # If a stream is deselected while it's the current stream, unset the
                # current stream.
                if stream["tap_stream_id"] == get_currently_syncing(state):
                    set_currently_syncing(state, None)
                break

        replication_key = determine_replication_key(stream['tap_stream_id'])
        if not replication_key:
            continue

        # If there's no bookmark for a stream (new integration, newly selected,
        # reset, etc) we need to use the default start date from the config.
        bookmark = get_bookmark(state,
                                stream["tap_stream_id"],
                                replication_key)
        if bookmark is None:
            state = write_bookmark(state,
                                   stream["tap_stream_id"],
                                   replication_key,
                                   config["start_date"])

    singer.write_state(state)
    return state

Example #16

0

Show file

    async def sync_bills(self, schema, period: pendulum.period = None):
        """Output the `bills` in the period."""
        stream = "bills"
        loop = asyncio.get_event_loop()

        if not period:
            # build a default period from the last bookmark
            bookmark = get_bookmark(self.state, stream, "start_time")
            start = pendulum.parse(bookmark)
            end = pendulum.now()
            period = pendulum.period(start, end)

        singer.write_schema(stream, schema, ["invoice_id"])

        for at in period.range("months"):
            result = await loop.run_in_executor(None, self.client.bill, at)
            if result:
                singer.write_record(stream, result)
                try:
                    end = datetime.datetime.strptime(
                        result["end_time"], "%Y-%m-%dT%H:%M:%SZ").isoformat()
                    self.state = write_bookmark(self.state, stream,
                                                "start_time", end)
                except:
                    # print("what fails is:" + result['end_time'])
                    sys.stderr.write("what fails is:" + result['end_time'] +
                                     "\n")

Example #17

0

Show file

 async def sync_top_average_metrics(self, schema):
     stream = "top_average_metrics"
     loop = asyncio.get_event_loop()
     bookmark = get_bookmark(self.state, "top_average_metrics", "since")
     if bookmark:
         start_date = urllib.parse.quote(bookmark)
     else:
         start_date = self.config['start_month']
     today = datetime.today()
     end_date = datetime(today.year, today.month, 1)
     month_data = datetime.strptime(start_date, '%Y-%m')
     singer.write_schema(stream, schema,
                         ["month", "metric_name", "account"])
     while month_data <= end_date:
         month_str = datetime.strftime(month_data, '%Y-%m')
         date_str = datetime.strftime(month_data, '%Y-%m-%d')
         top_average_metrics = await loop.run_in_executor(
             None, self.client.top_avg_metrics, month_str)
         if top_average_metrics:
             for t in top_average_metrics['usage']:
                 t["month"] = date_str
                 t['account'] = self.config['account']
                 singer.write_record(stream, t)
             self.state = write_bookmark(self.state, stream, "since",
                                         month_str)
             month_data = month_data + relativedelta(months=+1)

Example #18

0

Show file

File: streams.py Project: jnschaeffer/tap-gbfs

    def sync(self):
        bookmark = bookmarks.get_bookmark(
            self.state,
            self.stream_name,
            self.replication_keys[0],
            MIN_TIME,
        )

        start_dt = utils.strptime_to_utc(bookmark)
        resp = self.client.request_feed(self.stream_name)
        last_updated = resp.get("last_updated").replace(tzinfo=pytz.UTC)

        if start_dt >= last_updated:
            return

        for station in resp.get("data").get("stations"):
            # Delete array/complex properties because we're not powerful enough yet
            if "rental_methods" in station:
                del station["rental_methods"]
            if "rental_uris" in station:
                del station["rental_uris"]
            yield station

        bookmarks.write_bookmark(
            self.state,
            self.stream_name,
            self.replication_keys[0],
            utils.strftime(last_updated),
        )
        messages.write_state(self.state)

Example #19

0

Show file

def get_attribution_window_bookmark(state, advertiser_ids, stream_name):
    """Get attribution window for stream from Singer State."""
    mid_bk_value = bookmarks.get_bookmark(
        state,
        state_key_name(advertiser_ids, stream_name),
        "last_attribution_window_date",
    )
    return utils.strptime_with_tz(mid_bk_value) if mid_bk_value else None

Example #20

0

Show file

def sync_products(client, catalog, state, start_date, end_date, stream_id,
                  stream_config):
    stream_id = 'products'

    write_schema(catalog, stream_id)

    # Rip this out once all bookmarks are converted
    if isinstance(state.get('bookmarks', {}).get(stream_id), str):
        # Old style bookmark found. Use it and delete it
        last_date = state['bookmarks'].pop(stream_id)

        # Write this bookmark in the new style
        bookmarks.write_bookmark(state, stream_id, 'datetime', last_date)
        singer.write_state(state)

    last_date = bookmarks.get_bookmark(state, stream_id, 'datetime',
                                       start_date)

    def products_transform(record):
        out = {}
        for key, value in record.items():
            if key not in DEPRECATED_PRODUCT_FIELDS:
                out[key] = value
        return out

    page = 1
    limit = 200
    num_results = None
    max_datetime = last_date
    while is_next_page(limit, num_results):
        LOGGER.info('Sycing products - page {}'.format(page))

        updated_min = strptime_to_utc(last_date).strftime('%Y-%m-%d')
        data = client.get('/get-product/',
                          params={
                              'updated_at_min': updated_min,
                              'page': page,
                              'count': limit
                          },
                          endpoint=stream_id)
        page += 1

        records = data['products']

        if records:
            num_results = len(records)

            max_page_datetime = max(map(lambda x: x['updated_at'], records))
            if max_page_datetime > max_datetime:
                max_datetime = max_page_datetime

            persist_records(catalog, stream_id, map(products_transform,
                                                    records))
        else:
            num_results = 0

        bookmarks.write_bookmark(state, stream_id, 'datetime', max_datetime)
        singer.write_state(state)

Example #21

0

Show file

File: streams.py Project: schuylerfried/tap-circle-ci-1

def get_bookmark(state: dict, project: str, stream_name: str,
                 bookmark_key: str) -> Optional[str]:
    """
    Retrieve a bookmark from the bookmarks (basically marks when you last synced records for this key)
    """
    stream_bookmark = bookmarks.get_bookmark(state, project, stream_name)
    if stream_bookmark is not None:
        return stream_bookmark.get(bookmark_key)
    return None

Example #22

0

Show file

 def incidents(self, state, config):
     try:
         bookmark = get_bookmark(state, "incidents", "since")
         query_base = f"incidents?limit=100&total=true&utc=true"
         if bookmark:
             start_date = datetime.datetime.strptime(
                 bookmark, '%Y-%m-%dT%H:%M:%S.%f')
         else:
             start_date = datetime.datetime.strptime(
                 config['start_date'], '%Y-%m-%d')
             #query += "&since=" + urllib.parse.quote(start_date) + '&until='+ datetime.datetime.utcnow().isoformat()
         r = relativedelta.relativedelta(datetime.datetime.utcnow(),
                                         start_date)
         result = {}
         if r.years > 0 or r.months >= 5:
             while r.years > 0 or r.months >= 5:
                 until = (start_date + datetime.timedelta(5 * 365 / 12))
                 query = query_base + "&since=" + urllib.parse.quote(
                     start_date.isoformat(
                     )) + "&utc=true" + "&until=" + urllib.parse.quote(
                         until.isoformat())
                 incidents = self._get(query)
                 iterable = incidents
                 if 'incidents' in result:
                     result['incidents'].extend(iterable['incidents'])
                 else:
                     result = iterable
                 offset = 0
                 while iterable['more']:
                     offset = offset + result['limit']
                     query += "&offset=" + str(offset)
                     incidents = self._get(query)
                     iterable = incidents
                     result['incidents'].extend(iterable['incidents'])
                 start_date = until
                 r = relativedelta.relativedelta(datetime.datetime.utcnow(),
                                                 start_date)
         query = query_base + "&since=" + urllib.parse.quote(
             start_date.isoformat()) + '&until=' + urllib.parse.quote(
                 datetime.datetime.utcnow().isoformat())
         incidents = self._get(query)
         iterable = incidents
         if 'incidents' in result:
             result['incidents'].extend(iterable['incidents'])
         else:
             result = iterable
         offset = 0
         while iterable['more']:
             offset = offset + result['limit']
             query += "&offset=" + str(offset)
             incidents = self._get(query)
             iterable = incidents
             result['incidents'].extend(iterable['incidents'])
         return result
     except Exception as e:
         LOGGER.error(e)
         return None

Example #23

0

Show file

def is_first_run(tap_stream_id: str, state: Dict[str, Any]) -> bool:
    """Checks bookmarks to determine if its a stream's first run"""

    value = get_bookmark(state, tap_stream_id, "wrote_initial_activate_version", default=False) 

    if not isinstance(value, bool):
        return True

    return not value

Example #24

0

Show file

def translate_state(state, catalog, repositories):
    '''
    This tap used to only support a single repository, in which case the
    state took the shape of:
    {
      "bookmarks": {
        "commits": {
          "since": "2018-11-14T13:21:20.700360Z"
        }
      }
    }
    The tap now supports multiple repos, so this function should be called
    at the beginning of each run to ensure the state is translate to the
    new format:
    {
      "bookmarks": {
        "singer-io/tap-adwords": {
          "commits": {
            "since": "2018-11-14T13:21:20.700360Z"
          }
        }
        "singer-io/tap-salesforce": {
          "commits": {
            "since": "2018-11-14T13:21:20.700360Z"
          }
        }
      }
    }
    '''
    nested_dict = lambda: collections.defaultdict(nested_dict)
    new_state = nested_dict()

    for stream in catalog['streams']:
        stream_name = stream['tap_stream_id']
        for repo in repositories:
            if bookmarks.get_bookmark(state, repo, stream_name):
                return state
            if bookmarks.get_bookmark(state, stream_name, 'since'):
                new_state['bookmarks'][repo][stream_name][
                    'since'] = bookmarks.get_bookmark(state, stream_name,
                                                      'since')

    return new_state

Example #25

0

Show file

def translate_state(state, catalog, organizations):
    """
    This tap used to only support a single organization, in which case the
    state took the shape of:
    {
      "bookmarks": {
        "singer-io": {
          "since": "2018-11-14T13:21:20.700360Z"
        }
      }
    }
    The tap now supports multiple repos, so this function should be called
    at the beginning of each run to ensure the state is translate to the
    new format:
    {
      "bookmarks": {
        "singer-io": {
          "commits": {
            "since": "2018-11-14T13:21:20.700360Z"
          }
        }
        "stoplightio": {
          "commits": {
            "since": "2018-11-14T13:21:20.700360Z"
          }
        }
      }
    }
    """
    nested_dict = lambda: collections.defaultdict(nested_dict)
    new_state = nested_dict()

    for stream in catalog["streams"]:
        stream_name = stream["tap_stream_id"]
        for org in organizations:
            if bookmarks.get_bookmark(state, org, stream_name):
                return state
            if bookmarks.get_bookmark(state, stream_name, "since"):
                new_state["bookmarks"][org][stream_name][
                    "since"] = bookmarks.get_bookmark(state, stream_name,
                                                      "since")

    return new_state

Example #26

0

Show file

def get_attendances(site_id, state):
    bookmark_value = get_bookmark(state, "attendances", "since")

    for page in get_all_pages(
        "attendances", "/sites/{}/attendances".format(site_id), bookmark_value
    ):
        rows = page["data"]
        for attendance in rows:
            yield attendance

    return state

Example #27

0

Show file

    def test_non_empty_state(self):
        stream_id_1 = 'customers'
        bookmark_key_1 = 'datetime'
        bookmark_val_1 = 123456789

        non_empty_state = {
            'bookmarks': {
                stream_id_1: {
                    bookmark_key_1: bookmark_val_1
                }
            }
        }

        #
        # Cases with no value to fall back on
        #

        # Bad stream, bad key
        self.assertIsNone(
            bookmarks.get_bookmark(non_empty_state, 'some_stream', 'my_key'))

        # Good stream, bad key
        self.assertIsNone(
            bookmarks.get_bookmark(non_empty_state, stream_id_1, 'my_key'))

        # Good stream, good key
        self.assertEqual(
            bookmarks.get_bookmark(non_empty_state, stream_id_1,
                                   bookmark_key_1), bookmark_val_1)

        #
        # Cases with a given default
        #

        # Bad stream, bad key
        self.assertEqual(
            bookmarks.get_bookmark(non_empty_state, 'some_stream', 'my_key',
                                   'default_value'), 'default_value')

        # Bad stream, good key
        self.assertEqual(
            bookmarks.get_bookmark(non_empty_state, 'some_stream',
                                   bookmark_key_1, 'default_value'),
            'default_value')

        # Good stream, bad key
        self.assertEqual(
            bookmarks.get_bookmark(non_empty_state, stream_id_1, 'my_key',
                                   'default_value'), 'default_value')

        # Good stream, good key
        self.assertEqual(
            bookmarks.get_bookmark(non_empty_state, stream_id_1,
                                   bookmark_key_1, 'default_value'),
            bookmark_val_1)

Example #28

0

Show file

 def top_avg_metrics(self, state, config):
     try:
         bookmark = get_bookmark(state, "top_average_metrics", "since")
         if bookmark:
             start_date = urllib.parse.quote(bookmark)
         else:
             start_date = config['start_month']
         data = {'month': start_date}
         query = f"top_avg_metrics"
         metrics = self._get(query, data=data)
         return metrics.json()
     except:
         return None

Example #29

0

Show file

File: sync.py Project: aiguofer/tap-marketo

def get_or_create_export_for_leads(client, state, stream, export_start):
    export_id = bookmarks.get_bookmark(state, "leads", "export_id")
    # check if export is still valid
    if export_id is not None and not client.export_available(
            "leads", export_id):
        singer.log_info("Export %s no longer available.", export_id)
        export_id = None

    if export_id is None:
        # Corona mode is required to query by "updatedAt", otherwise a full
        # sync is required using "createdAt".
        query_field = "updatedAt" if client.use_corona else "createdAt"
        export_end = get_export_end(export_start)
        query = {
            query_field: {
                "startAt": export_start.isoformat(),
                "endAt": export_end.isoformat()
            }
        }

        # Create the new export and store the id and end date in state.
        # Does not start the export (must POST to the "enqueue" endpoint).
        fields = [
            f for f, s in stream["schema"]["properties"].items()
            if s.get("selected") or (s.get("inclusion") == "automatic")
        ]
        export_id = client.create_export("leads", fields, query)
        state = update_state_with_export_info(
            state,
            stream,
            export_id=export_id,
            export_end=export_end.isoformat())
    else:
        export_end = pendulum.parse(
            bookmarks.get_bookmark(state, "leads", "export_end"))

    return export_id, export_end

Example #30

0

Show file

 def issues(self, project_id, state):
     bookmark = get_bookmark(state, "issues", "start")
     query = f"projects/rise-people/{project_id}/issues/"
     if bookmark:
         date_filter = urllib.parse.quote("lastSeen:>=" + bookmark)
         query += "?query=" + date_filter
     response = self._get(query)
     issues = response.json()
     url = response.url
     while (response.links is not None and response.links.__len__() > 0
            and response.links['next']['results'] == 'true'):
         url = response.links['next']['url']
         response = self.session.get(url)
         issues += response.json()
     return issues