def do_sync(self): bookmark_date_as_date = str_to_date(self.bookmark_date) max_product_date = bookmark_date_as_date product_response = self.client.make_request("/products/mine") product_ids = [] with singer.metrics.Counter('record_count', {'endpoint': 'products'}) as counter: for product in product_response.json().values(): product_ids.append(product['id']) # Only upsert messages which have changed product_date = product['updated_date'] if product['updated_date']\ else product['added_date'] product_date = str_to_date(product_date) product = tidy_dates(product) if product_date > bookmark_date_as_date: singer.write_message( singer.RecordMessage( stream='products', record=product, )) max_product_date = max(max_product_date, product_date) counter.increment() self.state = singer.write_bookmark(self.state, self.STREAM_NAME, 'last_record', date_to_str(max_product_date)) self.product_ids = product_ids
def do_sync(self): """ Main sync functionality Allows for differences in schemas between catalog and the actual received data to unravel lists This permits the user to get more granular ratings info (e.g. number of reviews for each rating) """ start_date = str_to_date(self.bookmark_date).strftime('%Y-%m-%d') while str_to_date(start_date).date() < datetime.date.today(): end_date = min( str_to_date(start_date).date() + datetime.timedelta(days=28), datetime.date.today() - datetime.timedelta(days=1)) try: response = self.client.make_request( self.URI.format(start_date, end_date.strftime('%Y-%m-%d'))) except RequestError: return new_bookmark_date = self.bookmark_date with singer.metrics.Counter( 'record_count', {'endpoint': self.STREAM_NAME}) as counter: for entry in self.traverse_nested_dicts( response.json(), self.RESPONSE_LEVELS): new_bookmark_date = max(new_bookmark_date, entry['date']) schema_keys = [ x for x in self.schema['properties'].keys() if x not in entry.keys() ] entry_keys = [ x for x in entry.keys() if x not in self.schema['properties'].keys() ] if schema_keys and entry_keys: entries = list( itertools.chain.from_iterable([ entry[entry_item] for entry_item in entry_keys ])) for j, schema_item in enumerate(schema_keys): entry[schema_item] = entries[j] for key in entry_keys: del (entry[key]) entry = strings_to_floats(entry) singer.write_message( singer.RecordMessage( stream=self.STREAM_NAME, record=entry, )) counter.increment() self.state = singer.write_bookmark(self.state, self.STREAM_NAME, 'last_record', new_bookmark_date) if end_date == datetime.date.today() - datetime.timedelta(days=1): break start_date = end_date.strftime('%Y-%m-%d')
def do_sync(self): """ Main sync functionality Most of the streams use this A few of the streams work differently and override this method """ start_date = str_to_date(self.bookmark_date).strftime('%Y-%m-%d') try: response = self.client.make_request(self.URI.format(start_date)) except RequestError: return new_bookmark_date = self.bookmark_date with singer.metrics.Counter('record_count', {'endpoint': self.STREAM_NAME}) as counter: for entry in self.traverse_nested_dicts(response.json(), self.RESPONSE_LEVELS): new_bookmark_date = max(new_bookmark_date, entry['date']) entry = strings_to_floats(entry) singer.write_message( singer.RecordMessage( stream=self.STREAM_NAME, record=entry, )) counter.increment() self.state = singer.write_bookmark(self.state, self.STREAM_NAME, 'last_record', new_bookmark_date)
def do_sync(self): start_date = str_to_date(self.bookmark_date) new_bookmark_date = start_date product_ids = ';'.join(str(id) for id in self.product_ids) while start_date.date() <= date.today(): end_date = start_date + timedelta(days=28) uri = '/ranks/{}/daily/{}/{}'.format( product_ids, start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d')) data = self.client.make_request(uri).json() rank_dates = data['dates'] rank_data = data['data'] with singer.metrics.Counter('record_count', {'endpoint': 'ranks'}) as counter: for rank_entry in rank_data: for i, rank_date in enumerate(rank_dates): record_data = dict( country=rank_entry['country'], category=rank_entry['category'], product_id=rank_entry['product_id'], position=rank_entry['positions'][i], delta=rank_entry['deltas'][i], date=rank_date, ) new_bookmark_date = max( new_bookmark_date, str_to_date(record_data['date'])) record_data = strings_to_floats(record_data) singer.write_message( singer.RecordMessage( stream=self.STREAM_NAME, record=record_data, )) counter.increment() self.state = singer.write_bookmark(self.state, self.STREAM_NAME, 'last_record', date_to_str(new_bookmark_date)) start_date = end_date