Esempio n. 1
0
    def sync_data(self):
        table = self.TABLE
        api_method = self.API_METHOD
        done = False

        # Attempt to get the bookmark date from the state file (if one exists and is supplied).
        LOGGER.info('Attempting to get the most recent bookmark_date for entity {}.'.format(self.ENTITY))
        bookmark_date = get_last_record_value_for_table(self.state, table, 'bookmark_date')

        # If there is no bookmark date, fall back to using the start date from the config file.
        if bookmark_date is None:
            LOGGER.info('Could not locate bookmark_date from STATE file. Falling back to start_date from config.json instead.')
            bookmark_date = get_config_start_date(self.config)
        else:
            bookmark_date = parse(bookmark_date)

        # Convert bookmarked start date to POSIX.
        bookmark_date_posix = int(bookmark_date.timestamp())
        
        # Create params for filtering
        if self.ENTITY == 'event':
            params = {"occurred_at[after]": bookmark_date_posix}
            bookmark_key = 'occurred_at'
        elif self.ENTITY in ['promotional_credit','comment']:
            params = {"created_at[after]": bookmark_date_posix}
            bookmark_key = 'created_at'
        else:
            params = {"updated_at[after]": bookmark_date_posix}
            bookmark_key = 'updated_at'

        # Add sort_by[asc] to prevent data overwrite by oldest deleted records
        if self.SORT_BY is not None:
            params['sort_by[asc]'] = self.SORT_BY

        LOGGER.info("Querying {} starting at {}".format(table, bookmark_date))

        while not done:
            max_date = bookmark_date

            response = self.client.make_request(
                url=self.get_url(),
                method=api_method,
                params=params)

            if 'api_error_code' in response.keys():
                if response['api_error_code'] == 'configuration_incompatible':
                    LOGGER.error('{} is not configured'.format(response['error_code']))
                    break

            records = response.get('list')
            
            to_write = self.get_stream_data(records)
            
            if self.config.get('include_deleted') not in ['false','False', False]:
                if self.ENTITY == 'event':
                    for event in to_write:
                        if event["event_type"] == 'plan_deleted':
                            Util.plans.append(event['content']['plan'])
                        elif event['event_type'] == 'addon_deleted':
                            Util.addons.append(event['content']['addon'])
                        elif event['event_type'] == 'coupon_deleted':
                            Util.coupons.append(event['content']['coupon'])
                if self.ENTITY == 'plan':
                    for plan in Util.plans:
                        to_write.append(plan)
                if self.ENTITY == 'addon':
                    for addon in Util.addons:
                        to_write.append(addon)
                if self.ENTITY == 'coupon':
                    for coupon in Util.coupons:
                        to_write.append(coupon) 

            
            with singer.metrics.record_counter(endpoint=table) as ctr:
                singer.write_records(table, to_write)

                ctr.increment(amount=len(to_write))
                
                for item in to_write:
                    #if item.get(bookmark_key) is not None:
                    max_date = max(
                        max_date,
                        parse(item.get(bookmark_key))
                    )

            self.state = incorporate(
                self.state, table, 'bookmark_date', max_date)

            if not response.get('next_offset'):
                LOGGER.info("Final offset reached. Ending sync.")
                done = True
            else:
                LOGGER.info("Advancing by one offset.")
                params['offset'] = response.get('next_offset')
                bookmark_date = max_date

        save_state(self.state)
Esempio n. 2
0
    def sync_data(self):
        table = self.TABLE
        api_method = self.API_METHOD
        done = False
        sync_interval_in_mins = 2

        # Attempt to get the bookmark date from the state file (if one exists and is supplied).
        LOGGER.info(
            'Attempting to get the most recent bookmark_date for entity {}.'.
            format(self.ENTITY))
        bookmark_date = get_last_record_value_for_table(
            self.state, table, 'bookmark_date')

        # If there is no bookmark date, fall back to using the start date from the config file.
        if bookmark_date is None:
            LOGGER.info(
                'Could not locate bookmark_date from STATE file. Falling back to start_date from config.json instead.'
            )
            bookmark_date = get_config_start_date(self.config)
        else:
            bookmark_date = parse(bookmark_date)

        # Convert bookmarked start date to POSIX.
        bookmark_date_posix = int(bookmark_date.timestamp())
        to_date = datetime.now(
            pytz.utc) - timedelta(minutes=sync_interval_in_mins)
        to_date_posix = int(to_date.timestamp())
        sync_window = str([bookmark_date_posix, to_date_posix])
        LOGGER.info("Sync Window {} for schema {}".format(sync_window, table))

        # Create params for filtering
        if self.ENTITY == 'event':
            params = {"occurred_at[between]": sync_window}
            bookmark_key = 'occurred_at'
        elif self.ENTITY in ['promotional_credit', 'comment']:
            params = {"created_at[between]": sync_window}
            bookmark_key = 'created_at'
        else:
            params = {"updated_at[between]": sync_window}
            bookmark_key = 'updated_at'

        # Add sort_by[asc] to prevent data overwrite by oldest deleted records
        if self.SORT_BY is not None:
            params['sort_by[asc]'] = self.SORT_BY

        LOGGER.info("Querying {} starting at {}".format(table, bookmark_date))

        while not done:
            max_date = to_date

            response = self.client.make_request(url=self.get_url(),
                                                method=api_method,
                                                params=params)

            if 'api_error_code' in response.keys():
                if response['api_error_code'] == 'configuration_incompatible':
                    LOGGER.error('{} is not configured'.format(
                        response['error_code']))
                    break

            records = response.get('list')

            # List of deleted "plans, addons and coupons" from the /events endpoint
            deleted_records = []

            if self.config.get('include_deleted') not in [
                    'false', 'False', False
            ]:
                if self.ENTITY == 'event':
                    # Parse "event_type" from events records and collect deleted plan/addon/coupon from events
                    for record in records:
                        event = record.get(self.ENTITY)
                        if event["event_type"] == 'plan_deleted':
                            Util.plans.append(event['content']['plan'])
                        elif event['event_type'] == 'addon_deleted':
                            Util.addons.append(event['content']['addon'])
                        elif event['event_type'] == 'coupon_deleted':
                            Util.coupons.append(event['content']['coupon'])
                # We need additional transform for deleted records as "to_write" already contains transformed data
                if self.ENTITY == 'plan':
                    for plan in Util.plans:
                        deleted_records.append(self.transform_record(plan))
                if self.ENTITY == 'addon':
                    for addon in Util.addons:
                        deleted_records.append(self.transform_record(addon))
                if self.ENTITY == 'coupon':
                    for coupon in Util.coupons:
                        deleted_records.append(self.transform_record(coupon))

            # Get records from API response and transform
            to_write = self.get_stream_data(records)

            with singer.metrics.record_counter(endpoint=table) as ctr:
                # Combine transformed records and deleted data of  "plan, addon and coupon" collected from events endpoint
                to_write = to_write + deleted_records
                singer.write_records(table, to_write)

                ctr.increment(amount=len(to_write))

            # update max_date with minimum of (max_replication_key) or (now - 2 minutes)
            # this will make sure that bookmark does not go beyond (now - 2 minutes)
            # so, no data will be missed due to API latency
            max_date = min(max_date, to_date)
            self.state = incorporate(self.state, table, 'bookmark_date',
                                     max_date)

            if not response.get('next_offset'):
                LOGGER.info("Final offset reached. Ending sync.")
                done = True
            else:
                LOGGER.info("Advancing by one offset.")
                params['offset'] = response.get('next_offset')
                bookmark_date = max_date

        save_state(self.state)
Esempio n. 3
0
    def sync_data(self):
        table = self.TABLE
        api_method = self.API_METHOD
        done = False

        # Attempt to get the bookmark date from the state file (if one exists and is supplied).
        LOGGER.info(
            'Attempting to get the most recent bookmark_date for entity {}.'.
            format(self.ENTITY))
        bookmark_date = get_last_record_value_for_table(
            self.state, table, 'bookmark_date')

        # If there is no bookmark date, fall back to using the start date from the config file.
        if bookmark_date is None:
            LOGGER.info(
                'Could not locate bookmark_date from STATE file. Falling back to start_date from config.json instead.'
            )
            bookmark_date = get_config_start_date(self.config)
        else:
            bookmark_date = parse(bookmark_date)

        # Convert bookmarked start date to POSIX.
        bookmark_date_posix = int(bookmark_date.timestamp())

        # Create params for filtering
        if self.ENTITY == 'event':
            params = {"occurred_at[after]": bookmark_date_posix}
            bookmark_key = 'occurred_at'
        else:
            params = {"updated_at[after]": bookmark_date_posix}
            bookmark_key = 'updated_at'

        LOGGER.info("Querying {} starting at {}".format(table, bookmark_date))

        while not done:
            max_date = bookmark_date

            response = self.client.make_request(url=self.get_url(),
                                                method=api_method,
                                                params=params)

            to_write = self.get_stream_data(response.get('list'))

            with singer.metrics.record_counter(endpoint=table) as ctr:
                singer.write_records(table, to_write)

                ctr.increment(amount=len(to_write))

                for item in to_write:
                    max_date = max(max_date, parse(item.get(bookmark_key)))

            self.state = incorporate(self.state, table, 'bookmark_date',
                                     max_date)

            if not response.get('next_offset'):
                LOGGER.info("Final offset reached. Ending sync.")
                done = True
            else:
                LOGGER.info("Advancing by one offset.")
                params['offset'] = response.get('next_offset')
                bookmark_date = max_date

        save_state(self.state)
Esempio n. 4
0
    def sync_data(self):
        table = self.TABLE
        api_method = self.API_METHOD
        done = False

        # Attempt to get the bookmark date from the state file (if one exists and is supplied).
        LOGGER.info(
            "Attempting to get the most recent bookmark_date for entity {}.".
            format(self.ENTITY))
        bookmark_date = get_last_record_value_for_table(
            self.state, table, "bookmark_date")

        # If there is no bookmark date, fall back to using the start date from the config file.
        if bookmark_date is None:
            LOGGER.info(
                "Could not locate bookmark_date from STATE file. Falling back to start_date from config.json instead."
            )
            bookmark_date = get_config_start_date(self.config)
        else:
            bookmark_date = parse(bookmark_date)

        # Convert bookmarked start date to POSIX.
        bookmark_date_posix = int(bookmark_date.timestamp())

        # Create params for filtering
        if self.ENTITY == "event":
            params = {"occurred_at[after]": bookmark_date_posix}
            bookmark_key = "occurred_at"
        elif self.ENTITY == "promotional_credit":
            params = {"created_at[after]": bookmark_date_posix}
            bookmark_key = "created_at"
        else:
            params = {"updated_at[after]": bookmark_date_posix}
            bookmark_key = "updated_at"

        LOGGER.info("Querying {} starting at {}".format(table, bookmark_date))

        while not done:
            max_date = bookmark_date

            response = self.client.make_request(url=self.get_url(),
                                                method=api_method,
                                                params=params)

            if "api_error_code" in response.keys():
                if response["api_error_code"] == "configuration_incompatible":
                    LOGGER.error("{} is not configured".format(
                        response["error_code"]))
                    break

            records = response.get("list")

            to_write = self.get_stream_data(records)

            if self.ENTITY == "event":
                for event in to_write:
                    if event["event_type"] == "plan_deleted":
                        Util.plans.append(event["content"]["plan"])
                    elif event["event_type"] == "addon_deleted":
                        Util.addons.append(event["content"]["addon"])
                    elif event["event_type"] == "coupon_deleted":
                        Util.coupons.append(event["content"]["coupon"])
            if self.ENTITY == "plan":
                for plan in Util.plans:
                    to_write.append(plan)
            if self.ENTITY == "addon":
                for addon in Util.addons:
                    to_write.append(addon)
            if self.ENTITY == "coupon":
                for coupon in Util.coupons:
                    to_write.append(coupon)

            with singer.metrics.record_counter(endpoint=table) as ctr:
                singer.write_records(table, to_write)

                ctr.increment(amount=len(to_write))

                for item in to_write:
                    bookmark_key = item.get(bookmark_key, None)

                    if bookmark_key is not None:
                        bookmark_date = parse(bookmark_key)

                        LOGGER.info(f"BOOKMARK_KEY value: {bookmark_date}")

                        max_date = max(
                            ensure_tz_aware_dt(max_date),
                            ensure_tz_aware_dt(bookmark_date),
                        )
                    else:
                        pass
                        # LOGGER.info("BOOKMARK_KEY not found. Using max date.")

            self.state = incorporate(self.state, table, "bookmark_date",
                                     max_date)

            if not response.get("next_offset"):
                LOGGER.info("Final offset reached. Ending sync.")
                done = True
            else:
                LOGGER.info("Advancing by one offset.")
                params["offset"] = response.get("next_offset")
                bookmark_date = max_date

        save_state(self.state)