def do_sync(self): LOGGER.info("Starting sync.") streams = self.get_streams_to_replicate() stream_map = {s.NAME: s for s in streams} for available_stream in AVAILABLE_STREAMS: if available_stream.NAME not in stream_map: continue stream = stream_map[available_stream.NAME] try: stream.state = self.state stream.sync() self.state = stream.state except OSError as e: LOGGER.error(str(e)) exit(e.errno) except Exception as e: LOGGER.error(str(e)) LOGGER.error('Failed to sync endpoint {}, moving on!' .format(stream.TABLE)) raise e save_state(self.state)
def do_sync(self): LOGGER.info("Starting sync.") streams = self.get_streams_to_replicate() for stream in streams: # Don't sync substreams directly -- sync them # via their parents if len(stream.REQUIRES) > 0: continue try: stream.state = self.state stream.sync() self.state = stream.state except OSError as e: LOGGER.error(str(e)) exit(e.errno) except Exception as e: LOGGER.error(str(e)) LOGGER.error('Failed to sync endpoint {}, moving on!' .format(stream.TABLE)) raise e LOGGER.info('saving state with {}'.format(self.state)) save_state(self.state)
def sync_data(self, parent_ids=None): if parent_ids is None: raise RuntimeError( 'Cannot pull tweet engagement for {}'.format(parent_ids)) self.write_schema() start = self.get_start_for_tweet_ids(parent_ids) LOGGER.info( "Pulling data from {} for a batch of 25 tweets".format(start)) table = self.TABLE url = self.get_url() while True: start = self.get_start_for_tweet_ids(parent_ids) end = min(datetime.datetime.utcnow(), start + datetime.timedelta(weeks=4)) if start > (end - datetime.timedelta(hours=3)): break body = self.get_body(start, end, parent_ids) self.request_start = datetime.datetime.utcnow() result = self.client.make_request(url, self.API_METHOD, body=body) data = self.get_stream_data(result) with singer.metrics.record_counter(endpoint=table) as counter: for index, obj in enumerate(data): singer.write_records(table, [self.filter_keys(obj)]) self.state = incorporate( self.state, 'tweet_engagements.{}'.format(obj.get('tweet_id')), 'date', obj.get('date')) counter.increment() save_state(self.state) max_sleep = 35 sleep_seconds = min( max_sleep, ((self.request_start + datetime.timedelta(seconds=max_sleep)) - datetime.datetime.utcnow()).seconds) if sleep_seconds > 0: LOGGER.info("Sleeping for {} seconds before making " "next request".format(sleep_seconds)) time.sleep(sleep_seconds)
def do_sync(self): LOGGER.info("Starting sync.") streams = self.get_streams_to_replicate() for stream in streams: try: stream.state = self.state stream.sync() self.state = stream.state except OSError as e: LOGGER.error(str(e)) exit(e.errno) except Exception as e: LOGGER.error(str(e)) LOGGER.error('Failed to sync endpoint {}, moving on!'.format( stream.TABLE)) raise e save_state(self.state)
def sync_data(self): table = self.TABLE done = False filters = self.get_filters() start_date = get_last_record_value_for_table(self.state, table) if start_date is None: start_date = get_config_start_date(self.config) else: start_date = start_date.replace(tzinfo=pytz.UTC) td = self.get_interval() end_date = start_date + td while not done: max_date = start_date LOGGER.info("Querying {} starting at {}".format(table, start_date)) body = { "startMSeconds": int(start_date.timestamp() * 1000), "endMSeconds": int(end_date.timestamp() * 1000), } if filters is not None: body["filters"] = filters LOGGER.info(body) try: response = self.client.make_request(self.get_url(), "POST", body=body) except RuntimeError as e: if "502" in str(e) or "504" in str(e): # try one more time response = self.client.make_request(self.get_url(), "POST", body=body) else: raise e to_write = self.get_stream_data(response) with singer.metrics.record_counter(endpoint=table) as ctr: singer.write_records(table, to_write) ctr.increment(amount=len(to_write)) for item in to_write: max_date = max(max_date, self.get_time_for_state(item)) self.state = incorporate(self.state, table, "start_date", start_date) if max_date > datetime.datetime.now(pytz.UTC): done = True if len(to_write) == 0: LOGGER.info("Advancing one full interval.") if end_date > datetime.datetime.now(pytz.UTC): done = True else: start_date = end_date elif start_date == max_date: LOGGER.info("Advancing one millisecond.") start_date = start_date + datetime.timedelta(milliseconds=1) else: LOGGER.info("Advancing by one page.") start_date = max_date end_date = start_date + td save_state(self.state)
def sync_data(self): table = self.TABLE done = False start_date = get_last_record_value_for_table(self.state, table) if start_date is None: start_date = get_config_start_date(self.config) else: start_date = start_date.replace(tzinfo=pytz.UTC) td = datetime.timedelta(hours=1) end_date = start_date + td while not done: max_date = start_date LOGGER.info("Querying {} starting at {}".format(table, start_date)) body = { "filters": { "environment": self.config.get("environment"), "lastUpdated": { "gte": int(start_date.timestamp() * 1000), "lte": int(end_date.timestamp() * 1000), }, } } LOGGER.info(body) response = self.client.make_request(self.get_url(), "POST", body=body) to_write = self.get_stream_data(response) with singer.metrics.record_counter(endpoint=table) as ctr: singer.write_records(table, to_write) ctr.increment(amount=len(to_write)) for item in to_write: max_date = max(max_date, self.get_time_for_state(item)) self.state = incorporate(self.state, table, "start_date", start_date) if max_date > datetime.datetime.now(pytz.UTC): done = True if len(to_write) == 0: LOGGER.info("Advancing one full interval.") if end_date > datetime.datetime.now(pytz.UTC): done = True else: start_date = end_date elif start_date == max_date: LOGGER.info("Advancing one second.") start_date = start_date + datetime.timedelta(seconds=1) else: LOGGER.info("Advancing by one page.") start_date = max_date end_date = start_date + td save_state(self.state)