Esempio n. 1
0
    def sync(self, mdata, channel_id, ts):
        schema = self.load_schema()
        start, end = self.get_absolute_date_range(
            self.config.get('start_date'))

        # pylint: disable=unused-variable
        with singer.metrics.job_timer(job_type='list_threads') as timer:
            with singer.metrics.record_counter(endpoint=self.name) as counter:
                replies = self.client.get_thread(channel=channel_id,
                                                 ts=ts,
                                                 inclusive="true",
                                                 oldest=int(start.timestamp()),
                                                 latest=int(end.timestamp()))

                for page in replies:
                    transformed_threads = transform_json(
                        stream=self.name,
                        data=page.get('messages', []),
                        date_fields=self.date_fields,
                        channel_id=channel_id)
                    for message in transformed_threads:
                        with singer.Transformer(
                                integer_datetime_fmt="unix-seconds-integer-datetime-parsing") \
                                as transformer:
                            transformed_record = transformer.transform(
                                data=message,
                                schema=schema,
                                metadata=metadata.to_map(mdata))
                            if self.write_to_singer:
                                singer.write_record(
                                    stream_name=self.name,
                                    time_extracted=singer.utils.now(),
                                    record=transformed_record)
                                counter.increment()
Esempio n. 2
0
    def sync(self, mdata):
        schema = self.load_schema()

        # pylint: disable=unused-variable
        with singer.metrics.job_timer(job_type='list_conversations') as timer:
            with singer.metrics.record_counter(endpoint=self.name) as counter:
                channels = self.channels()
                for channel in channels:
                    transformed_channel = transform_json(
                        stream=self.name,
                        data=[channel],
                        date_fields=self.date_fields)
                    with singer.Transformer(
                            integer_datetime_fmt="unix-seconds-integer-datetime-parsing") \
                            as transformer:
                        transformed_record = transformer.transform(
                            data=transformed_channel[0],
                            schema=schema,
                            metadata=metadata.to_map(mdata))
                        if self.write_to_singer:
                            singer.write_record(
                                stream_name=self.name,
                                time_extracted=singer.utils.now(),
                                record=transformed_record)
                            counter.increment()
Esempio n. 3
0
    def sync(self, mdata):

        schema = self.load_schema()
        bookmark = singer.get_bookmark(state=self.state, tap_stream_id=self.name,
                                       key=self.replication_key)
        if bookmark is None:
            bookmark = self.config.get('start_date')
        new_bookmark = bookmark

        # pylint: disable=unused-variable
        with singer.metrics.job_timer(job_type='list_users') as timer:
            with singer.metrics.record_counter(endpoint=self.name) as counter:
                users_list = self.client.get_users(limit=100)

                for page in users_list:
                    users = page.get('members')
                    transformed_users = transform_json(stream=self.name, data=users,
                                                       date_fields=self.date_fields)
                    for user in transformed_users:
                        with singer.Transformer(
                                integer_datetime_fmt="unix-seconds-integer-datetime-parsing") \
                                as transformer:
                            transformed_record = transformer.transform(data=user, schema=schema,
                                                                       metadata=metadata.to_map(
                                                                           mdata))
                            new_bookmark = max(new_bookmark, transformed_record.get('updated'))
                            if transformed_record.get('updated') > bookmark:
                                if self.write_to_singer:
                                    singer.write_record(stream_name=self.name,
                                                        time_extracted=singer.utils.now(),
                                                        record=transformed_record)
                                    counter.increment()

        self.state = singer.write_bookmark(state=self.state, tap_stream_id=self.name,
                                           key=self.replication_key, val=new_bookmark)
Esempio n. 4
0
    def sync(self, mdata):
        schema = self.load_schema()

        # pylint: disable=unused-variable
        with singer.metrics.job_timer(job_type='list_files') as timer:
            with singer.metrics.record_counter(endpoint=self.name) as counter:

                bookmark_date = self.get_bookmark(
                    self.name, self.config.get('start_date'))
                start, end = self.get_absolute_date_range(bookmark_date)

                # Window the requests based on the tap configuration
                date_window_start = start
                date_window_end = start + timedelta(
                    days=int(self.date_window_size))
                min_bookmark = start
                max_bookmark = start

                while date_window_start < date_window_end:
                    remote_files_list = self.client.get_remote_files(
                        from_ts=int(date_window_start.timestamp()),
                        to_ts=int(date_window_end.timestamp()))

                    for page in remote_files_list:
                        remote_files = page.get('files')
                        transformed_files = transform_json(
                            stream=self.name,
                            data=remote_files,
                            date_fields=self.date_fields)
                        for file in transformed_files:
                            with singer.Transformer(
                                    integer_datetime_fmt=
                                    "unix-seconds-integer-datetime-parsing"
                            ) as transformer:
                                transformed_record = transformer.transform(
                                    data=file,
                                    schema=schema,
                                    metadata=metadata.to_map(mdata))
                                record_timestamp = \
                                    file.get('timestamp', '')
                                record_timestamp_int = int(record_timestamp)

                                if record_timestamp_int >= start.timestamp():
                                    if self.write_to_singer:
                                        singer.write_record(
                                            stream_name=self.name,
                                            time_extracted=singer.utils.now(),
                                            record=transformed_record)
                                        counter.increment()

                                    if datetime.utcfromtimestamp(
                                            record_timestamp_int).replace(
                                                tzinfo=utc
                                            ) > max_bookmark.replace(
                                                tzinfo=utc):
                                        # Records are sorted by most recent first, so this
                                        # should only fire once every sync, per channel
                                        max_bookmark = datetime.fromtimestamp(
                                            record_timestamp_int)
                                    elif datetime.utcfromtimestamp(
                                            record_timestamp_int).replace(
                                                tzinfo=utc) < min_bookmark:
                                        # The min bookmark tracks how far back we've synced
                                        # during the sync, since the records are ordered
                                        # newest -> oldest
                                        min_bookmark = datetime.fromtimestamp(
                                            record_timestamp_int)
                        self.update_bookmarks(
                            self.name, min_bookmark.strftime(DATETIME_FORMAT))
                    # Update the date window
                    date_window_start = date_window_end
                    date_window_end = date_window_start + timedelta(
                        days=self.date_window_size)
                    if date_window_end > end:
                        date_window_end = end
Esempio n. 5
0
    def sync(self, mdata):

        schema = self.load_schema()
        threads_stream = None
        threads_mdata = None

        # If threads are also being synced we'll need to do that for each message
        for catalog_entry in self.catalog.get_selected_streams(self.state):
            if catalog_entry.stream == 'threads':
                threads_mdata = catalog_entry.metadata
                threads_stream = ThreadsStream(client=self.client,
                                               config=self.config,
                                               catalog=self.catalog,
                                               state=self.state)

        # pylint: disable=unused-variable
        with singer.metrics.job_timer(
                job_type='list_conversation_history') as timer:
            with singer.metrics.record_counter(endpoint=self.name) as counter:
                for channel in self.channels():
                    channel_id = channel.get('id')

                    bookmark_date = self.get_bookmark(
                        channel_id, self.config.get('start_date'))
                    start, end = self.get_absolute_date_range(bookmark_date)

                    # Window the requests based on the tap configuration
                    date_window_start = start
                    date_window_end = start + timedelta(
                        days=int(self.date_window_size))
                    min_bookmark = start
                    max_bookmark = start

                    while date_window_start < date_window_end:

                        messages = self.client \
                            .get_messages(channel=channel_id,
                                          oldest=int(date_window_start.timestamp()),
                                          latest=int(date_window_end.timestamp()))

                        if messages:
                            for page in messages:
                                messages = page.get('messages')
                                transformed_messages = transform_json(
                                    stream=self.name,
                                    data=messages,
                                    date_fields=self.date_fields,
                                    channel_id=channel_id)
                                for message in transformed_messages:
                                    data = {'channel_id': channel_id}
                                    data = {**data, **message}

                                    # If threads are being synced then the message data for the
                                    # message the threaded replies are in response to will be
                                    # synced to the messages table as well as the threads table
                                    if threads_stream and data.get(
                                            'thread_ts'):
                                        # If threads is selected we need to sync all the
                                        # threaded replies to this message
                                        threads_stream.write_schema()
                                        threads_stream.sync(
                                            mdata=threads_mdata,
                                            channel_id=channel_id,
                                            ts=data.get('thread_ts'))
                                        threads_stream.write_state()
                                    with singer.Transformer(
                                            integer_datetime_fmt=
                                            "unix-seconds-integer-datetime-parsing"
                                    ) as transformer:
                                        transformed_record = transformer.transform(
                                            data=data,
                                            schema=schema,
                                            metadata=metadata.to_map(mdata))
                                        record_timestamp = data.get(
                                            'ts', '').partition('.')[0]
                                        record_timestamp_int = int(
                                            record_timestamp)
                                        if record_timestamp_int >= start.timestamp(
                                        ):
                                            if self.write_to_singer:
                                                singer.write_record(
                                                    stream_name=self.name,
                                                    time_extracted=singer.
                                                    utils.now(),
                                                    record=transformed_record)
                                                counter.increment()

                                            if datetime.utcfromtimestamp(
                                                    record_timestamp_int
                                            ).replace(tzinfo=utc
                                                      ) > max_bookmark.replace(
                                                          tzinfo=utc):
                                                # Records are sorted by most recent first, so this
                                                # should only fire once every sync, per channel
                                                max_bookmark = datetime.fromtimestamp(
                                                    record_timestamp_int)
                                            elif datetime.utcfromtimestamp(
                                                    record_timestamp_int
                                            ).replace(
                                                    tzinfo=utc) < min_bookmark:
                                                # The min bookmark tracks how far back we've synced
                                                # during the sync, since the records are ordered
                                                # newest -> oldest
                                                min_bookmark = datetime.fromtimestamp(
                                                    record_timestamp_int)
                                self.update_bookmarks(
                                    channel_id,
                                    min_bookmark.strftime(DATETIME_FORMAT))
                            # Update the date window
                            date_window_start = date_window_end
                            date_window_end = date_window_start + timedelta(
                                days=self.date_window_size)
                            if date_window_end > end:
                                date_window_end = end
                        else:
                            date_window_start = date_window_end