Esempio n. 1
0
    def do_sync(self):
        LOGGER.info("Starting sync.")

        streams = self.get_streams_to_replicate()
        stream_map = {s.NAME: s for s in streams}

        for available_stream in AVAILABLE_STREAMS:
            if available_stream.NAME not in stream_map:
                continue

            stream = stream_map[available_stream.NAME]
            try:
                stream.state = self.state
                stream.sync()
                self.state = stream.state
            except OSError as e:
                LOGGER.error(str(e))
                exit(e.errno)

            except Exception as e:
                LOGGER.error(str(e))
                LOGGER.error('Failed to sync endpoint {}, moving on!'
                             .format(stream.TABLE))
                raise e

        save_state(self.state)
Esempio n. 2
0
    def do_sync(self):
        LOGGER.info("Starting sync.")

        streams = self.get_streams_to_replicate()

        for stream in streams:
            # Don't sync substreams directly -- sync them
            # via their parents
            if len(stream.REQUIRES) > 0:
               continue

            try:
                stream.state = self.state
                stream.sync()
                self.state = stream.state
            except OSError as e:
                LOGGER.error(str(e))
                exit(e.errno)

            except Exception as e:
                LOGGER.error(str(e))
                LOGGER.error('Failed to sync endpoint {}, moving on!'
                             .format(stream.TABLE))
                raise e
        LOGGER.info('saving state with {}'.format(self.state))        
        save_state(self.state)
    def sync_data(self, parent_ids=None):
        if parent_ids is None:
            raise RuntimeError(
                'Cannot pull tweet engagement for {}'.format(parent_ids))

        self.write_schema()

        start = self.get_start_for_tweet_ids(parent_ids)

        LOGGER.info(
            "Pulling data from {} for a batch of 25 tweets".format(start))

        table = self.TABLE

        url = self.get_url()

        while True:
            start = self.get_start_for_tweet_ids(parent_ids)
            end = min(datetime.datetime.utcnow(),
                      start + datetime.timedelta(weeks=4))

            if start > (end - datetime.timedelta(hours=3)):
                break

            body = self.get_body(start, end, parent_ids)

            self.request_start = datetime.datetime.utcnow()

            result = self.client.make_request(url, self.API_METHOD, body=body)

            data = self.get_stream_data(result)

            with singer.metrics.record_counter(endpoint=table) as counter:
                for index, obj in enumerate(data):
                    singer.write_records(table, [self.filter_keys(obj)])

                    self.state = incorporate(
                        self.state,
                        'tweet_engagements.{}'.format(obj.get('tweet_id')),
                        'date', obj.get('date'))

                    counter.increment()

            save_state(self.state)

            max_sleep = 35
            sleep_seconds = min(
                max_sleep,
                ((self.request_start + datetime.timedelta(seconds=max_sleep)) -
                 datetime.datetime.utcnow()).seconds)

            if sleep_seconds > 0:
                LOGGER.info("Sleeping for {} seconds before making "
                            "next request".format(sleep_seconds))
                time.sleep(sleep_seconds)
Esempio n. 4
0
    def do_sync(self):
        LOGGER.info("Starting sync.")

        streams = self.get_streams_to_replicate()

        for stream in streams:
            try:
                stream.state = self.state
                stream.sync()
                self.state = stream.state
            except OSError as e:
                LOGGER.error(str(e))
                exit(e.errno)

            except Exception as e:
                LOGGER.error(str(e))
                LOGGER.error('Failed to sync endpoint {}, moving on!'.format(
                    stream.TABLE))
                raise e

        save_state(self.state)
Esempio n. 5
0
    def sync_data(self):
        table = self.TABLE
        done = False

        filters = self.get_filters()
        start_date = get_last_record_value_for_table(self.state, table)

        if start_date is None:
            start_date = get_config_start_date(self.config)
        else:
            start_date = start_date.replace(tzinfo=pytz.UTC)

        td = self.get_interval()

        end_date = start_date + td

        while not done:
            max_date = start_date

            LOGGER.info("Querying {} starting at {}".format(table, start_date))

            body = {
                "startMSeconds": int(start_date.timestamp() * 1000),
                "endMSeconds": int(end_date.timestamp() * 1000),
            }

            if filters is not None:
                body["filters"] = filters

            LOGGER.info(body)

            try:
                response = self.client.make_request(self.get_url(),
                                                    "POST",
                                                    body=body)
            except RuntimeError as e:
                if "502" in str(e) or "504" in str(e):
                    # try one more time
                    response = self.client.make_request(self.get_url(),
                                                        "POST",
                                                        body=body)

                else:
                    raise e

            to_write = self.get_stream_data(response)

            with singer.metrics.record_counter(endpoint=table) as ctr:
                singer.write_records(table, to_write)

                ctr.increment(amount=len(to_write))

                for item in to_write:
                    max_date = max(max_date, self.get_time_for_state(item))

            self.state = incorporate(self.state, table, "start_date",
                                     start_date)

            if max_date > datetime.datetime.now(pytz.UTC):
                done = True

            if len(to_write) == 0:
                LOGGER.info("Advancing one full interval.")

                if end_date > datetime.datetime.now(pytz.UTC):
                    done = True
                else:
                    start_date = end_date

            elif start_date == max_date:
                LOGGER.info("Advancing one millisecond.")
                start_date = start_date + datetime.timedelta(milliseconds=1)
            else:
                LOGGER.info("Advancing by one page.")
                start_date = max_date

            end_date = start_date + td

            save_state(self.state)
Esempio n. 6
0
    def sync_data(self):
        table = self.TABLE
        done = False

        start_date = get_last_record_value_for_table(self.state, table)

        if start_date is None:
            start_date = get_config_start_date(self.config)
        else:
            start_date = start_date.replace(tzinfo=pytz.UTC)

        td = datetime.timedelta(hours=1)

        end_date = start_date + td

        while not done:
            max_date = start_date

            LOGGER.info("Querying {} starting at {}".format(table, start_date))

            body = {
                "filters": {
                    "environment": self.config.get("environment"),
                    "lastUpdated": {
                        "gte": int(start_date.timestamp() * 1000),
                        "lte": int(end_date.timestamp() * 1000),
                    },
                }
            }

            LOGGER.info(body)

            response = self.client.make_request(self.get_url(),
                                                "POST",
                                                body=body)

            to_write = self.get_stream_data(response)

            with singer.metrics.record_counter(endpoint=table) as ctr:
                singer.write_records(table, to_write)

                ctr.increment(amount=len(to_write))

                for item in to_write:
                    max_date = max(max_date, self.get_time_for_state(item))

            self.state = incorporate(self.state, table, "start_date",
                                     start_date)

            if max_date > datetime.datetime.now(pytz.UTC):
                done = True

            if len(to_write) == 0:
                LOGGER.info("Advancing one full interval.")

                if end_date > datetime.datetime.now(pytz.UTC):
                    done = True
                else:
                    start_date = end_date

            elif start_date == max_date:
                LOGGER.info("Advancing one second.")
                start_date = start_date + datetime.timedelta(seconds=1)

            else:
                LOGGER.info("Advancing by one page.")
                start_date = max_date

            end_date = start_date + td

            save_state(self.state)