def get_all_stargazers(schema, config, state, mdata): ''' https://developer.github.com/v3/activity/starring/#list-stargazers ''' repo_path = config['repository'] if bookmarks.get_bookmark(state, "stargazers", 'since'): query_string = '&since={}'.format( bookmarks.get_bookmark(state, "stargazers", 'since')) else: query_string = '' stargazers_headers = {'Accept': 'application/vnd.github.v3.star+json'} last_stargazer_time = None with metrics.record_counter('stargazers') as counter: for response in authed_get_all_pages( 'stargazers', 'https://api.github.com/repos/{}/stargazers?sort=updated&direction=asc{}' .format(repo_path, query_string), stargazers_headers): stargazers = response.json() extraction_time = singer.utils.now() for stargazer in stargazers: with singer.Transformer() as transformer: rec = transformer.transform( stargazer, schema, metadata=metadata.to_map(mdata)) rec['user_id'] = rec['user']['id'] singer.write_record('stargazers', rec, time_extracted=extraction_time) singer.write_bookmark(state, 'stargazers', 'since', singer.utils.strftime(extraction_time)) counter.increment() return state
def get_all_commits(schema, config, state, mdata): ''' https://developer.github.com/v3/repos/commits/#list-commits-on-a-repository ''' repo_path = config['repository'] if bookmarks.get_bookmark(state, "commits", 'since'): query_string = '?since={}'.format( bookmarks.get_bookmark(state, "commits", 'since')) else: query_string = '' latest_commit_time = None with metrics.record_counter('commits') as counter: for response in authed_get_all_pages( 'commits', 'https://api.github.com/repos/{}/commits{}'.format( repo_path, query_string)): commits = response.json() extraction_time = singer.utils.now() for commit in commits: with singer.Transformer() as transformer: rec = transformer.transform( commit, schema, metadata=metadata.to_map(mdata)) singer.write_record('commits', rec, time_extracted=extraction_time) singer.write_bookmark(state, 'commits', 'since', singer.utils.strftime(extraction_time)) counter.increment() return state
def get_all_issues(schema, config, state, mdata): ''' https://developer.github.com/v3/issues/#list-issues-for-a-repository ''' repo_path = config['repository'] if bookmarks.get_bookmark(state, "issues", 'since'): query_string = '&since={}'.format( bookmarks.get_bookmark(state, "issues", 'since')) else: query_string = '' last_issue_time = None with metrics.record_counter('issues') as counter: for response in authed_get_all_pages( 'issues', 'https://api.github.com/repos/{}/issues?sort=updated&direction=asc{}' .format(repo_path, query_string)): issues = response.json() extraction_time = singer.utils.now() for issue in issues: with singer.Transformer() as transformer: rec = transformer.transform( issue, schema, metadata=metadata.to_map(mdata)) singer.write_record('issues', rec, time_extracted=extraction_time) singer.write_bookmark(state, 'issues', 'since', singer.utils.strftime(extraction_time)) counter.increment() return state
def get_sync_domain(state, stream, model_name): """ Return a domain (a filter expression) that can be used to filter records. If the model's replication methd """ domain = [] for entry in stream.metadata: # stream metadata will have empty breadcrumb if not entry['breadcrumb'] and \ entry['metadata'].get('replication-method', None) == 'FULL_TABLE': return domain last_updated_at = get_bookmark(state, stream.tap_stream_id, 'last_updated_at') if last_updated_at: last_updated_at = utils.strptime(last_updated_at) domain.extend([ 'OR', [('write_date', '>', last_updated_at)], [('create_date', '>', last_updated_at)], ]) last_record_id = get_bookmark(state, stream.tap_stream_id, 'last_record_id') if last_record_id: domain.append(('id', '>', last_record_id)) return domain
def get_or_create_export_for_leads(client, state, stream, export_start, config): export_id = bookmarks.get_bookmark(state, "leads", "export_id") # check if export is still valid if export_id is not None and not client.export_available("leads", export_id): singer.log_info("Export %s no longer available.", export_id) export_id = None if export_id is None: # Corona mode is required to query by "updatedAt", otherwise a full # sync is required using "createdAt". query_field = "updatedAt" if client.use_corona else "createdAt" max_export_days = int(config.get('max_export_days', MAX_EXPORT_DAYS)) export_end = get_export_end(export_start, end_days=max_export_days) query = {query_field: {"startAt": export_start.isoformat(), "endAt": export_end.isoformat()}} # Create the new export and store the id and end date in state. # Does not start the export (must POST to the "enqueue" endpoint). fields = [] for entry in stream['metadata']: if len(entry['breadcrumb']) > 0 and (entry['metadata'].get('selected') or entry['metadata'].get('inclusion') == 'automatic'): fields.append(entry['breadcrumb'][-1]) export_id = client.create_export("leads", fields, query) state = update_state_with_export_info( state, stream, export_id=export_id, export_end=export_end.isoformat()) else: export_end = pendulum.parse(bookmarks.get_bookmark(state, "leads", "export_end")) return export_id, export_end
def get_or_create_export_for_activities(client, state, stream, export_start, config): export_id = bookmarks.get_bookmark(state, stream["tap_stream_id"], "export_id") if export_id is not None and not client.export_available( "activities", export_id): singer.log_info("Export %s no longer available.", export_id) export_id = None if export_id is None: # The activity id is in the top-most breadcrumb of the metatdata # Activity ids correspond to activity type id in Marketo. # We need the activity type id to build the query. activity_metadata = metadata.to_map(stream["metadata"]) activity_type_id = metadata.get(activity_metadata, (), 'marketo.activity-id') # Activities must be queried by `createdAt` even though # that is not a real field. `createdAt` proxies `activityDate`. # The activity type id must also be included in the query. The # largest date range that can be used for activities is 30 days. max_export_days = int(config.get('max_export_days', MAX_EXPORT_DAYS)) export_end = get_export_end(export_start, end_days=max_export_days) query = { "createdAt": { "startAt": export_start.isoformat(), "endAt": export_end.isoformat() }, "activityTypeIds": [activity_type_id] } # Create the new export and store the id and end date in state. # Does not start the export (must POST to the "enqueue" endpoint). try: export_id = client.create_export("activities", ACTIVITY_FIELDS, query) except ApiQuotaExceeded as e: # The main reason we wrap the ApiQuotaExceeded exception in a # new one is to be able to tell the customer what their # configured max_export_days is. raise ApiQuotaExceeded( ("You may wish to consider changing the " "`max_export_days` config value to a lower number if " "you're unable to sync a single {} day window within " "your current API quota.").format(max_export_days)) from e state = update_state_with_export_info( state, stream, export_id=export_id, export_end=export_end.isoformat()) else: export_end = pendulum.parse( bookmarks.get_bookmark(state, stream["tap_stream_id"], "export_end")) return export_id, export_end
def test_empty_state(self): empty_state = {} # Case with no value to fall back on self.assertIsNone( bookmarks.get_bookmark(empty_state, 'some_stream', 'my_key')) # Case with a given default self.assertEqual( bookmarks.get_bookmark(empty_state, 'some_stream', 'my_key', 'default_value'), 'default_value')
def sync_paginated(client, state, stream): # http://developers.marketo.com/rest-api/endpoint-reference/lead-database-endpoint-reference/#!/Campaigns/getCampaignsUsingGET # http://developers.marketo.com/rest-api/endpoint-reference/lead-database-endpoint-reference/#!/Static_Lists/getListsUsingGET # # Campaigns and Static Lists are paginated with a max return of 300 # items per page. There are no filters that can be used to only # return updated records. replication_key = determine_replication_key(stream['tap_stream_id']) singer.write_schema(stream["tap_stream_id"], stream["schema"], stream["key_properties"], bookmark_properties=[replication_key]) start_date = bookmarks.get_bookmark(state, stream["tap_stream_id"], replication_key) params = {"batchSize": 300} endpoint = "rest/v1/{}.json".format(stream["tap_stream_id"]) # Paginated requests use paging tokens for retrieving the next page # of results. These tokens are stored in the state for resuming # syncs. If a paging token exists in state, use it. next_page_token = bookmarks.get_bookmark(state, stream["tap_stream_id"], "next_page_token") if next_page_token: params["nextPageToken"] = next_page_token # Keep querying pages of data until no next page token. record_count = 0 job_started = pendulum.utcnow().isoformat() while True: data = client.request("GET", endpoint, endpoint_name=stream["tap_stream_id"], params=params) time_extracted = utils.now() # Each row just needs the values formatted. If the record is # newer than the original start date, stream the record. Finally, # update the bookmark if newer than the existing bookmark. for row in data["result"]: record = format_values(stream, row) if record[replication_key] >= start_date: record_count += 1 singer.write_record(stream["tap_stream_id"], record, time_extracted=time_extracted) # No next page, results are exhausted. if "nextPageToken" not in data: break # Store the next page token in state and continue. params["nextPageToken"] = data["nextPageToken"] state = bookmarks.write_bookmark(state, stream["tap_stream_id"], "next_page_token", data["nextPageToken"]) singer.write_state(state) # Once all results are exhausted, unset the next page token bookmark # so the subsequent sync starts from the beginning. state = bookmarks.write_bookmark(state, stream["tap_stream_id"], "next_page_token", None) state = bookmarks.write_bookmark(state, stream["tap_stream_id"], replication_key, job_started) singer.write_state(state) return state, record_count
def sync_leads(client, state, stream): # http://developers.marketo.com/rest-api/bulk-extract/bulk-lead-extract/ replication_key = determine_replication_key(stream["tap_stream_id"]) singer.write_schema("leads", stream["schema"], stream["key_properties"], bookmark_properties=[replication_key]) initial_bookmark = pendulum.parse( bookmarks.get_bookmark(state, "leads", replication_key)) export_start = pendulum.parse( bookmarks.get_bookmark(state, "leads", replication_key)) if client.use_corona: export_start = export_start.subtract(days=ATTRIBUTION_WINDOW_DAYS) job_started = pendulum.utcnow() record_count = 0 max_bookmark = initial_bookmark while export_start < job_started: export_id, export_end = get_or_create_export_for_leads( client, state, stream, export_start) state = wait_for_export(client, state, stream, export_id) for row in stream_rows(client, "leads", export_id): time_extracted = utils.now() record = format_values(stream, row) record_bookmark = pendulum.parse(record[replication_key]) if client.use_corona: max_bookmark = export_end singer.write_record("leads", record, time_extracted=time_extracted) record_count += 1 elif record_bookmark >= initial_bookmark: max_bookmark = max(max_bookmark, record_bookmark) singer.write_record("leads", record, time_extracted=time_extracted) record_count += 1 # Now that one of the exports is finished, update the bookmark state = update_state_with_export_info( state, stream, bookmark=max_bookmark.isoformat()) export_start = export_end return state, record_count
def handle_resource(resource, schemas, id_field, state, mdata): extraction_time = singer.utils.now() endpoint = get_endpoint(resource) bookmark = get_bookmark(state, resource, "since") qs = {} if resource not in CAN_FILTER else {"updated_after_utc": bookmark} with metrics.record_counter(resource) as counter: for page in get_all_pages(resource, endpoint, qs): for row in page: # Handle custom fields if present if resource in HAS_CUSTOM_FIELDS: row = transform_custom_field(id_field, row) write_record(row, resource, schemas[resource], mdata, extraction_time) counter.increment() if "links" in schemas: handle_links( resource, row[id_field], schemas["links"], mdata, extraction_time, ) return write_bookmark(state, resource, extraction_time)
def sync(self): bookmark = bookmarks.get_bookmark( self.state, self.stream_name, self.replication_keys[0], MIN_TIME, ) start_dt = utils.strptime_to_utc(bookmark) resp = self.client.request_feed(self.stream_name) max_dt = start_dt for station in resp.get("data").get("stations"): last_reported = station.get("last_reported") last_reported_dt = datetime.fromtimestamp(last_reported).replace( tzinfo=pytz.UTC) if last_reported_dt > max_dt: max_dt = last_reported_dt if last_reported_dt > start_dt: yield station bookmarks.write_bookmark( self.state, self.stream_name, self.replication_keys[0], utils.strftime(max_dt), ) messages.write_state(self.state)
def hourly_request( self, state, config, query, stream): # manage start date condition if more than 2 months try: bookmark = get_bookmark(state, stream, "since") if bookmark: start_date = bookmark else: start_date = config['start_hour'] # date can't be more than two months otherwise datadog fail min_date = (datetime.today() + relativedelta(months=-2) + relativedelta(days=1)).strftime('%Y-%m-%dT%H') if start_date < min_date: start_date = min_date # also being too current get some partial data for the days, so we always have to go back a little bit to refresh max_date = (datetime.today() + relativedelta(days=-5)).strftime('%Y-%m-%dT%H') if start_date > max_date: start_date = max_date if start_date != datetime.utcnow().strftime('%Y-%m-%dT%H'): data = { 'start_hr': start_date, 'end_hr': datetime.utcnow().strftime('%Y-%m-%dT%H') } traces = self._get(query, data=data) return traces.json() else: return None except Exception as error: LOGGER.error(error) return None
def sync_activities(client, state, stream, config): # http://developers.marketo.com/rest-api/bulk-extract/bulk-activity-extract/ replication_key = determine_replication_key(stream['tap_stream_id']) singer.write_schema(stream["tap_stream_id"], stream["schema"], stream["key_properties"], bookmark_properties=[replication_key]) export_start = pendulum.parse( bookmarks.get_bookmark(state, stream["tap_stream_id"], replication_key)) job_started = pendulum.utcnow() record_count = 0 while export_start < job_started: export_id, export_end = get_or_create_export_for_activities( client, state, stream, export_start, config) state = wait_for_export(client, state, stream, export_id) for row in stream_rows(client, "activities", export_id): time_extracted = utils.now() row = flatten_activity(row, stream) record = format_values(stream, row) singer.write_record(stream["tap_stream_id"], record, time_extracted=time_extracted) record_count += 1 state = update_state_with_export_info( state, stream, bookmark=export_start.isoformat()) export_start = export_end return state, record_count
def sync_programs(client, state, stream): # http://developers.marketo.com/rest-api/assets/programs/#by_date_range # # Programs are queryable via their updatedAt time but require and # end date as well. As there is no max time range for the query, # query from the bookmark value until current. # # The Programs endpoint uses offsets with a return limit of 200 # per page. If requesting past the final program, an error message # is returned to indicate that the endpoint has been fully synced. replication_key = determine_replication_key(stream['tap_stream_id']) singer.write_schema("programs", stream["schema"], stream["key_properties"], bookmark_properties=[replication_key]) start_date = bookmarks.get_bookmark(state, "programs", replication_key) end_date = pendulum.utcnow().isoformat() params = { "maxReturn": 200, "offset": 0, "earliestUpdatedAt": start_date, "latestUpdatedAt": end_date, } endpoint = "rest/asset/v1/programs.json" record_count = 0 while True: data = client.request("GET", endpoint, endpoint_name="programs", params=params) # If the no asset message is in the warnings, we have exhausted # the search results and can end the sync. if "warnings" in data and NO_ASSET_MSG in data["warnings"]: break time_extracted = utils.now() # Each row just needs the values formatted. If the record is # newer than the original start date, stream the record. for row in data["result"]: record = format_values(stream, row) if record[replication_key] >= start_date: record_count += 1 singer.write_record("programs", record, time_extracted=time_extracted) # Increment the offset by the return limit for the next query. params["offset"] += params["maxReturn"] # Now that we've finished every page we can update the bookmark to # the end of the query. state = bookmarks.write_bookmark(state, "programs", replication_key, end_date) singer.write_state(state) return state, record_count
def validate_state(config, catalog, state): for stream in catalog["streams"]: for mdata in stream['metadata']: if mdata['breadcrumb'] == [] and mdata['metadata'].get('selected') != True: # If a stream is deselected while it's the current stream, unset the # current stream. if stream["tap_stream_id"] == get_currently_syncing(state): set_currently_syncing(state, None) break replication_key = determine_replication_key(stream['tap_stream_id']) if not replication_key: continue # If there's no bookmark for a stream (new integration, newly selected, # reset, etc) we need to use the default start date from the config. bookmark = get_bookmark(state, stream["tap_stream_id"], replication_key) if bookmark is None: state = write_bookmark(state, stream["tap_stream_id"], replication_key, config["start_date"]) singer.write_state(state) return state
async def sync_bills(self, schema, period: pendulum.period = None): """Output the `bills` in the period.""" stream = "bills" loop = asyncio.get_event_loop() if not period: # build a default period from the last bookmark bookmark = get_bookmark(self.state, stream, "start_time") start = pendulum.parse(bookmark) end = pendulum.now() period = pendulum.period(start, end) singer.write_schema(stream, schema, ["invoice_id"]) for at in period.range("months"): result = await loop.run_in_executor(None, self.client.bill, at) if result: singer.write_record(stream, result) try: end = datetime.datetime.strptime( result["end_time"], "%Y-%m-%dT%H:%M:%SZ").isoformat() self.state = write_bookmark(self.state, stream, "start_time", end) except: # print("what fails is:" + result['end_time']) sys.stderr.write("what fails is:" + result['end_time'] + "\n")
async def sync_top_average_metrics(self, schema): stream = "top_average_metrics" loop = asyncio.get_event_loop() bookmark = get_bookmark(self.state, "top_average_metrics", "since") if bookmark: start_date = urllib.parse.quote(bookmark) else: start_date = self.config['start_month'] today = datetime.today() end_date = datetime(today.year, today.month, 1) month_data = datetime.strptime(start_date, '%Y-%m') singer.write_schema(stream, schema, ["month", "metric_name", "account"]) while month_data <= end_date: month_str = datetime.strftime(month_data, '%Y-%m') date_str = datetime.strftime(month_data, '%Y-%m-%d') top_average_metrics = await loop.run_in_executor( None, self.client.top_avg_metrics, month_str) if top_average_metrics: for t in top_average_metrics['usage']: t["month"] = date_str t['account'] = self.config['account'] singer.write_record(stream, t) self.state = write_bookmark(self.state, stream, "since", month_str) month_data = month_data + relativedelta(months=+1)
def sync(self): bookmark = bookmarks.get_bookmark( self.state, self.stream_name, self.replication_keys[0], MIN_TIME, ) start_dt = utils.strptime_to_utc(bookmark) resp = self.client.request_feed(self.stream_name) last_updated = resp.get("last_updated").replace(tzinfo=pytz.UTC) if start_dt >= last_updated: return for station in resp.get("data").get("stations"): # Delete array/complex properties because we're not powerful enough yet if "rental_methods" in station: del station["rental_methods"] if "rental_uris" in station: del station["rental_uris"] yield station bookmarks.write_bookmark( self.state, self.stream_name, self.replication_keys[0], utils.strftime(last_updated), ) messages.write_state(self.state)
def get_attribution_window_bookmark(state, advertiser_ids, stream_name): """Get attribution window for stream from Singer State.""" mid_bk_value = bookmarks.get_bookmark( state, state_key_name(advertiser_ids, stream_name), "last_attribution_window_date", ) return utils.strptime_with_tz(mid_bk_value) if mid_bk_value else None
def sync_products(client, catalog, state, start_date, end_date, stream_id, stream_config): stream_id = 'products' write_schema(catalog, stream_id) # Rip this out once all bookmarks are converted if isinstance(state.get('bookmarks', {}).get(stream_id), str): # Old style bookmark found. Use it and delete it last_date = state['bookmarks'].pop(stream_id) # Write this bookmark in the new style bookmarks.write_bookmark(state, stream_id, 'datetime', last_date) singer.write_state(state) last_date = bookmarks.get_bookmark(state, stream_id, 'datetime', start_date) def products_transform(record): out = {} for key, value in record.items(): if key not in DEPRECATED_PRODUCT_FIELDS: out[key] = value return out page = 1 limit = 200 num_results = None max_datetime = last_date while is_next_page(limit, num_results): LOGGER.info('Sycing products - page {}'.format(page)) updated_min = strptime_to_utc(last_date).strftime('%Y-%m-%d') data = client.get('/get-product/', params={ 'updated_at_min': updated_min, 'page': page, 'count': limit }, endpoint=stream_id) page += 1 records = data['products'] if records: num_results = len(records) max_page_datetime = max(map(lambda x: x['updated_at'], records)) if max_page_datetime > max_datetime: max_datetime = max_page_datetime persist_records(catalog, stream_id, map(products_transform, records)) else: num_results = 0 bookmarks.write_bookmark(state, stream_id, 'datetime', max_datetime) singer.write_state(state)
def get_bookmark(state: dict, project: str, stream_name: str, bookmark_key: str) -> Optional[str]: """ Retrieve a bookmark from the bookmarks (basically marks when you last synced records for this key) """ stream_bookmark = bookmarks.get_bookmark(state, project, stream_name) if stream_bookmark is not None: return stream_bookmark.get(bookmark_key) return None
def incidents(self, state, config): try: bookmark = get_bookmark(state, "incidents", "since") query_base = f"incidents?limit=100&total=true&utc=true" if bookmark: start_date = datetime.datetime.strptime( bookmark, '%Y-%m-%dT%H:%M:%S.%f') else: start_date = datetime.datetime.strptime( config['start_date'], '%Y-%m-%d') #query += "&since=" + urllib.parse.quote(start_date) + '&until='+ datetime.datetime.utcnow().isoformat() r = relativedelta.relativedelta(datetime.datetime.utcnow(), start_date) result = {} if r.years > 0 or r.months >= 5: while r.years > 0 or r.months >= 5: until = (start_date + datetime.timedelta(5 * 365 / 12)) query = query_base + "&since=" + urllib.parse.quote( start_date.isoformat( )) + "&utc=true" + "&until=" + urllib.parse.quote( until.isoformat()) incidents = self._get(query) iterable = incidents if 'incidents' in result: result['incidents'].extend(iterable['incidents']) else: result = iterable offset = 0 while iterable['more']: offset = offset + result['limit'] query += "&offset=" + str(offset) incidents = self._get(query) iterable = incidents result['incidents'].extend(iterable['incidents']) start_date = until r = relativedelta.relativedelta(datetime.datetime.utcnow(), start_date) query = query_base + "&since=" + urllib.parse.quote( start_date.isoformat()) + '&until=' + urllib.parse.quote( datetime.datetime.utcnow().isoformat()) incidents = self._get(query) iterable = incidents if 'incidents' in result: result['incidents'].extend(iterable['incidents']) else: result = iterable offset = 0 while iterable['more']: offset = offset + result['limit'] query += "&offset=" + str(offset) incidents = self._get(query) iterable = incidents result['incidents'].extend(iterable['incidents']) return result except Exception as e: LOGGER.error(e) return None
def is_first_run(tap_stream_id: str, state: Dict[str, Any]) -> bool: """Checks bookmarks to determine if its a stream's first run""" value = get_bookmark(state, tap_stream_id, "wrote_initial_activate_version", default=False) if not isinstance(value, bool): return True return not value
def translate_state(state, catalog, repositories): ''' This tap used to only support a single repository, in which case the state took the shape of: { "bookmarks": { "commits": { "since": "2018-11-14T13:21:20.700360Z" } } } The tap now supports multiple repos, so this function should be called at the beginning of each run to ensure the state is translate to the new format: { "bookmarks": { "singer-io/tap-adwords": { "commits": { "since": "2018-11-14T13:21:20.700360Z" } } "singer-io/tap-salesforce": { "commits": { "since": "2018-11-14T13:21:20.700360Z" } } } } ''' nested_dict = lambda: collections.defaultdict(nested_dict) new_state = nested_dict() for stream in catalog['streams']: stream_name = stream['tap_stream_id'] for repo in repositories: if bookmarks.get_bookmark(state, repo, stream_name): return state if bookmarks.get_bookmark(state, stream_name, 'since'): new_state['bookmarks'][repo][stream_name][ 'since'] = bookmarks.get_bookmark(state, stream_name, 'since') return new_state
def translate_state(state, catalog, organizations): """ This tap used to only support a single organization, in which case the state took the shape of: { "bookmarks": { "singer-io": { "since": "2018-11-14T13:21:20.700360Z" } } } The tap now supports multiple repos, so this function should be called at the beginning of each run to ensure the state is translate to the new format: { "bookmarks": { "singer-io": { "commits": { "since": "2018-11-14T13:21:20.700360Z" } } "stoplightio": { "commits": { "since": "2018-11-14T13:21:20.700360Z" } } } } """ nested_dict = lambda: collections.defaultdict(nested_dict) new_state = nested_dict() for stream in catalog["streams"]: stream_name = stream["tap_stream_id"] for org in organizations: if bookmarks.get_bookmark(state, org, stream_name): return state if bookmarks.get_bookmark(state, stream_name, "since"): new_state["bookmarks"][org][stream_name][ "since"] = bookmarks.get_bookmark(state, stream_name, "since") return new_state
def get_attendances(site_id, state): bookmark_value = get_bookmark(state, "attendances", "since") for page in get_all_pages( "attendances", "/sites/{}/attendances".format(site_id), bookmark_value ): rows = page["data"] for attendance in rows: yield attendance return state
def test_non_empty_state(self): stream_id_1 = 'customers' bookmark_key_1 = 'datetime' bookmark_val_1 = 123456789 non_empty_state = { 'bookmarks': { stream_id_1: { bookmark_key_1: bookmark_val_1 } } } # # Cases with no value to fall back on # # Bad stream, bad key self.assertIsNone( bookmarks.get_bookmark(non_empty_state, 'some_stream', 'my_key')) # Good stream, bad key self.assertIsNone( bookmarks.get_bookmark(non_empty_state, stream_id_1, 'my_key')) # Good stream, good key self.assertEqual( bookmarks.get_bookmark(non_empty_state, stream_id_1, bookmark_key_1), bookmark_val_1) # # Cases with a given default # # Bad stream, bad key self.assertEqual( bookmarks.get_bookmark(non_empty_state, 'some_stream', 'my_key', 'default_value'), 'default_value') # Bad stream, good key self.assertEqual( bookmarks.get_bookmark(non_empty_state, 'some_stream', bookmark_key_1, 'default_value'), 'default_value') # Good stream, bad key self.assertEqual( bookmarks.get_bookmark(non_empty_state, stream_id_1, 'my_key', 'default_value'), 'default_value') # Good stream, good key self.assertEqual( bookmarks.get_bookmark(non_empty_state, stream_id_1, bookmark_key_1, 'default_value'), bookmark_val_1)
def top_avg_metrics(self, state, config): try: bookmark = get_bookmark(state, "top_average_metrics", "since") if bookmark: start_date = urllib.parse.quote(bookmark) else: start_date = config['start_month'] data = {'month': start_date} query = f"top_avg_metrics" metrics = self._get(query, data=data) return metrics.json() except: return None
def get_or_create_export_for_leads(client, state, stream, export_start): export_id = bookmarks.get_bookmark(state, "leads", "export_id") # check if export is still valid if export_id is not None and not client.export_available( "leads", export_id): singer.log_info("Export %s no longer available.", export_id) export_id = None if export_id is None: # Corona mode is required to query by "updatedAt", otherwise a full # sync is required using "createdAt". query_field = "updatedAt" if client.use_corona else "createdAt" export_end = get_export_end(export_start) query = { query_field: { "startAt": export_start.isoformat(), "endAt": export_end.isoformat() } } # Create the new export and store the id and end date in state. # Does not start the export (must POST to the "enqueue" endpoint). fields = [ f for f, s in stream["schema"]["properties"].items() if s.get("selected") or (s.get("inclusion") == "automatic") ] export_id = client.create_export("leads", fields, query) state = update_state_with_export_info( state, stream, export_id=export_id, export_end=export_end.isoformat()) else: export_end = pendulum.parse( bookmarks.get_bookmark(state, "leads", "export_end")) return export_id, export_end
def issues(self, project_id, state): bookmark = get_bookmark(state, "issues", "start") query = f"projects/rise-people/{project_id}/issues/" if bookmark: date_filter = urllib.parse.quote("lastSeen:>=" + bookmark) query += "?query=" + date_filter response = self._get(query) issues = response.json() url = response.url while (response.links is not None and response.links.__len__() > 0 and response.links['next']['results'] == 'true'): url = response.links['next']['url'] response = self.session.get(url) issues += response.json() return issues