def sync(self, mdata, channel_id, ts): schema = self.load_schema() start, end = self.get_absolute_date_range( self.config.get('start_date')) # pylint: disable=unused-variable with singer.metrics.job_timer(job_type='list_threads') as timer: with singer.metrics.record_counter(endpoint=self.name) as counter: replies = self.client.get_thread(channel=channel_id, ts=ts, inclusive="true", oldest=int(start.timestamp()), latest=int(end.timestamp())) for page in replies: transformed_threads = transform_json( stream=self.name, data=page.get('messages', []), date_fields=self.date_fields, channel_id=channel_id) for message in transformed_threads: with singer.Transformer( integer_datetime_fmt="unix-seconds-integer-datetime-parsing") \ as transformer: transformed_record = transformer.transform( data=message, schema=schema, metadata=metadata.to_map(mdata)) if self.write_to_singer: singer.write_record( stream_name=self.name, time_extracted=singer.utils.now(), record=transformed_record) counter.increment()
def sync(self, mdata): schema = self.load_schema() # pylint: disable=unused-variable with singer.metrics.job_timer(job_type='list_conversations') as timer: with singer.metrics.record_counter(endpoint=self.name) as counter: channels = self.channels() for channel in channels: transformed_channel = transform_json( stream=self.name, data=[channel], date_fields=self.date_fields) with singer.Transformer( integer_datetime_fmt="unix-seconds-integer-datetime-parsing") \ as transformer: transformed_record = transformer.transform( data=transformed_channel[0], schema=schema, metadata=metadata.to_map(mdata)) if self.write_to_singer: singer.write_record( stream_name=self.name, time_extracted=singer.utils.now(), record=transformed_record) counter.increment()
def sync(self, mdata): schema = self.load_schema() bookmark = singer.get_bookmark(state=self.state, tap_stream_id=self.name, key=self.replication_key) if bookmark is None: bookmark = self.config.get('start_date') new_bookmark = bookmark # pylint: disable=unused-variable with singer.metrics.job_timer(job_type='list_users') as timer: with singer.metrics.record_counter(endpoint=self.name) as counter: users_list = self.client.get_users(limit=100) for page in users_list: users = page.get('members') transformed_users = transform_json(stream=self.name, data=users, date_fields=self.date_fields) for user in transformed_users: with singer.Transformer( integer_datetime_fmt="unix-seconds-integer-datetime-parsing") \ as transformer: transformed_record = transformer.transform(data=user, schema=schema, metadata=metadata.to_map( mdata)) new_bookmark = max(new_bookmark, transformed_record.get('updated')) if transformed_record.get('updated') > bookmark: if self.write_to_singer: singer.write_record(stream_name=self.name, time_extracted=singer.utils.now(), record=transformed_record) counter.increment() self.state = singer.write_bookmark(state=self.state, tap_stream_id=self.name, key=self.replication_key, val=new_bookmark)
def sync(self, mdata): schema = self.load_schema() # pylint: disable=unused-variable with singer.metrics.job_timer(job_type='list_files') as timer: with singer.metrics.record_counter(endpoint=self.name) as counter: bookmark_date = self.get_bookmark( self.name, self.config.get('start_date')) start, end = self.get_absolute_date_range(bookmark_date) # Window the requests based on the tap configuration date_window_start = start date_window_end = start + timedelta( days=int(self.date_window_size)) min_bookmark = start max_bookmark = start while date_window_start < date_window_end: remote_files_list = self.client.get_remote_files( from_ts=int(date_window_start.timestamp()), to_ts=int(date_window_end.timestamp())) for page in remote_files_list: remote_files = page.get('files') transformed_files = transform_json( stream=self.name, data=remote_files, date_fields=self.date_fields) for file in transformed_files: with singer.Transformer( integer_datetime_fmt= "unix-seconds-integer-datetime-parsing" ) as transformer: transformed_record = transformer.transform( data=file, schema=schema, metadata=metadata.to_map(mdata)) record_timestamp = \ file.get('timestamp', '') record_timestamp_int = int(record_timestamp) if record_timestamp_int >= start.timestamp(): if self.write_to_singer: singer.write_record( stream_name=self.name, time_extracted=singer.utils.now(), record=transformed_record) counter.increment() if datetime.utcfromtimestamp( record_timestamp_int).replace( tzinfo=utc ) > max_bookmark.replace( tzinfo=utc): # Records are sorted by most recent first, so this # should only fire once every sync, per channel max_bookmark = datetime.fromtimestamp( record_timestamp_int) elif datetime.utcfromtimestamp( record_timestamp_int).replace( tzinfo=utc) < min_bookmark: # The min bookmark tracks how far back we've synced # during the sync, since the records are ordered # newest -> oldest min_bookmark = datetime.fromtimestamp( record_timestamp_int) self.update_bookmarks( self.name, min_bookmark.strftime(DATETIME_FORMAT)) # Update the date window date_window_start = date_window_end date_window_end = date_window_start + timedelta( days=self.date_window_size) if date_window_end > end: date_window_end = end
def sync(self, mdata): schema = self.load_schema() threads_stream = None threads_mdata = None # If threads are also being synced we'll need to do that for each message for catalog_entry in self.catalog.get_selected_streams(self.state): if catalog_entry.stream == 'threads': threads_mdata = catalog_entry.metadata threads_stream = ThreadsStream(client=self.client, config=self.config, catalog=self.catalog, state=self.state) # pylint: disable=unused-variable with singer.metrics.job_timer( job_type='list_conversation_history') as timer: with singer.metrics.record_counter(endpoint=self.name) as counter: for channel in self.channels(): channel_id = channel.get('id') bookmark_date = self.get_bookmark( channel_id, self.config.get('start_date')) start, end = self.get_absolute_date_range(bookmark_date) # Window the requests based on the tap configuration date_window_start = start date_window_end = start + timedelta( days=int(self.date_window_size)) min_bookmark = start max_bookmark = start while date_window_start < date_window_end: messages = self.client \ .get_messages(channel=channel_id, oldest=int(date_window_start.timestamp()), latest=int(date_window_end.timestamp())) if messages: for page in messages: messages = page.get('messages') transformed_messages = transform_json( stream=self.name, data=messages, date_fields=self.date_fields, channel_id=channel_id) for message in transformed_messages: data = {'channel_id': channel_id} data = {**data, **message} # If threads are being synced then the message data for the # message the threaded replies are in response to will be # synced to the messages table as well as the threads table if threads_stream and data.get( 'thread_ts'): # If threads is selected we need to sync all the # threaded replies to this message threads_stream.write_schema() threads_stream.sync( mdata=threads_mdata, channel_id=channel_id, ts=data.get('thread_ts')) threads_stream.write_state() with singer.Transformer( integer_datetime_fmt= "unix-seconds-integer-datetime-parsing" ) as transformer: transformed_record = transformer.transform( data=data, schema=schema, metadata=metadata.to_map(mdata)) record_timestamp = data.get( 'ts', '').partition('.')[0] record_timestamp_int = int( record_timestamp) if record_timestamp_int >= start.timestamp( ): if self.write_to_singer: singer.write_record( stream_name=self.name, time_extracted=singer. utils.now(), record=transformed_record) counter.increment() if datetime.utcfromtimestamp( record_timestamp_int ).replace(tzinfo=utc ) > max_bookmark.replace( tzinfo=utc): # Records are sorted by most recent first, so this # should only fire once every sync, per channel max_bookmark = datetime.fromtimestamp( record_timestamp_int) elif datetime.utcfromtimestamp( record_timestamp_int ).replace( tzinfo=utc) < min_bookmark: # The min bookmark tracks how far back we've synced # during the sync, since the records are ordered # newest -> oldest min_bookmark = datetime.fromtimestamp( record_timestamp_int) self.update_bookmarks( channel_id, min_bookmark.strftime(DATETIME_FORMAT)) # Update the date window date_window_start = date_window_end date_window_end = date_window_start + timedelta( days=self.date_window_size) if date_window_end > end: date_window_end = end else: date_window_start = date_window_end