Ejemplo n.º 1
0
    def sync_data(self):
        table = self.TABLE

        LOGGER.info('Syncing data for {}'.format(table))

        url = self.get_url()
        params = self.get_params(_next=None)
        resources = self.sync_paginated(url, params)

        if self.CACHE_RESULTS:
            stream_cache.add(table, resources)
            LOGGER.info('Added {} {}s to cache'.format(len(resources), table))

        LOGGER.info('Reached end of stream, moving on.')
        save_state(self.state)
        return self.state
Ejemplo n.º 2
0
    def do_sync(self):
        LOGGER.info("Starting sync.")

        streams, opportunity_child_catalogs = self.get_streams_to_replicate()

        if any(streams):
            LOGGER.info('Will sync: %s', ', '.join([stream.TABLE for stream in streams]))

        for stream in streams:
            stream.state = self.state
            if stream.TABLE == 'opportunities':
                stream.sync(opportunity_child_catalogs)
            else:
                stream.sync()
            self.state = stream.state
        save_state(self.state)
Ejemplo n.º 3
0
    def sync_data_for_period(self, date, interval):
        table = self.TABLE

        updated_after = date
        updated_before = updated_after + interval

        LOGGER.info('Syncing data from {} to {}'.format(
            updated_after.isoformat(), updated_before.isoformat()))

        params = self.get_params(updated_after, updated_before)
        url = self.get_url()
        res = self.sync_paginated(url, params)

        self.state = incorporate(self.state, table, self.RANGE_FIELD,
                                 date.isoformat())

        save_state(self.state)
        return res
Ejemplo n.º 4
0
    def sync_data_for_period(self, date, interval, child_streams=None, stop_time=None):
        table = self.TABLE

        updated_after = date
        updated_before = updated_after + interval

        if stop_time is not None and updated_before > stop_time:
            updated_before = stop_time

        LOGGER.info(
            'Syncing data from {} to {}'.format(
                updated_after.isoformat(),
                updated_before.isoformat()))

        params = self.get_params(updated_after, updated_before)
        url = self.get_url()
        asyncio.run(self.sync_paginated(url, params, updated_after, child_streams))

        self.state = incorporate(self.state,
                                 table,
                                 self.RANGE_FIELD,
                                 updated_before.isoformat())

        save_state(self.state)
Ejemplo n.º 5
0
    def sync_paginated(self, url, params=None, updated_after=None, child_streams=None):
        table = self.TABLE

        transformer = singer.Transformer(singer.UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING)
        applications_stream = OpportunityApplicationsStream(self.config,
                                                            self.state,
                                                            child_streams.get('opportunity_applications'),
                                                            self.client)
        offers_stream = OpportunityOffersStream(self.config,
                                                self.state,
                                                child_streams.get('opportunity_offers'),
                                                self.client)
        referrals_stream = OpportunityReferralsStream(self.config,
                                                      self.state,
                                                      child_streams.get('opportunity_referrals'),
                                                      self.client)
        resumes_stream = OpportunityResumesStream(self.config,
                                                  self.state,
                                                  child_streams.get('opportunity_resumes'),
                                                  self.client)
        # Set up looping parameters (page is for logging consistency)
        finished_paginating = False
        page = singer.bookmarks.get_bookmark(self.state, table, "next_page") or 1
        _next = singer.bookmarks.get_bookmark(self.state, table, "offset")
        if _next:
            params['offset'] = _next

        while not finished_paginating:
            try:
                result = self.client.make_request(url, self.API_METHOD, params=params)
            except OffsetInvalidException as ex:
                LOGGER.warning('Found invalid offset "%s", retrying without offset.', params['offset'])
                params.pop("offset")
                _next = None
                page = 1
                result = self.client.make_request(url, self.API_METHOD, params=params)
            _next = result.get('next')

            data = self.get_stream_data(result['data'], transformer)

            LOGGER.info('Starting Opportunity child stream syncs')
            for opportunity in data:
                opportunity_id = opportunity['id']

                if child_streams.get('opportunity_applications'):
                    applications_stream.write_schema()
                    applications_stream.sync_data(opportunity_id)

                if child_streams.get('opportunity_offers'):
                    offers_stream.write_schema()
                    offers_stream.sync_data(opportunity_id)

                if child_streams.get('opportunity_referrals'):
                    referrals_stream.write_schema()
                    referrals_stream.sync_data(opportunity_id)

                if child_streams.get('opportunity_resumes'):
                    resumes_stream.write_schema()
                    resumes_stream.sync_data(opportunity_id)

            LOGGER.info('Finished Opportunity child stream syncs')


            with singer.metrics.record_counter(endpoint=table) as counter:
                self.write_schema()
                singer.write_records(table, data)
                counter.increment(len(data))

            LOGGER.info('Synced page {} for {}'.format(page, self.TABLE))
            page += 1

            if _next:
                params['offset'] = _next
                self.state = singer.bookmarks.write_bookmark(self.state, table, "offset", _next)
                self.state = singer.bookmarks.write_bookmark(self.state, table, "next_page", page)
                # Save the last_record bookmark when we're paginating to make sure we pick up there if interrupted
                self.state = singer.bookmarks.write_bookmark(self.state, table, "last_record", updated_after.isoformat())
                save_state(self.state)
            else:
                finished_paginating = True

        transformer.log_warning()
        self.state = singer.bookmarks.clear_bookmark(self.state, table, "offset")
        self.state = singer.bookmarks.clear_bookmark(self.state, table, "next_page")
        save_state(self.state)
Ejemplo n.º 6
0
    async def sync_paginated(self, url, params=None, updated_after=None, child_streams=None):
        table = self.TABLE

        transformer = singer.Transformer(singer.UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING)

        # Set up looping parameters (page is for logging consistency)
        finished_paginating = False
        page = singer.bookmarks.get_bookmark(self.state, table, "next_page") or 1
        _next = singer.bookmarks.get_bookmark(self.state, table, "offset")
        params["expand"] = self.EXPAND
        if _next:
            params['offset'] = _next

        while not finished_paginating:
            try:
                result = self.client.make_request(url, self.API_METHOD, params=params)
            except OffsetInvalidException as ex:
                LOGGER.warning('Found invalid offset "%s", retrying without offset.', params['offset'])
                params.pop("offset")
                _next = None
                page = 1
                result = self.client.make_request(url, self.API_METHOD, params=params)
            _next = result.get('next')
            data = self.get_stream_data(result['data'], transformer)

            LOGGER.info('Starting Opportunity child stream syncs')
            tasks = []
            async with aiohttp.ClientSession() as session:
                for opportunity in data:
                    opportunity["links"] = []
                    opportunity_id = opportunity['id']
                    if opportunity_id is None:
                        LOGGER.info("oppurtunity id is null")
                        continue
                    for stream_name in child_streams:
                        child_streams[stream_name].write_schema()
                        task = asyncio.ensure_future(
                            child_streams[stream_name].sync_data(opportunity_id, async_session=session))
                        tasks.append(task)
                responses_async = await asyncio.gather(*tasks)

            LOGGER.info('Finished Opportunity child stream syncs')

            with singer.metrics.record_counter(endpoint=table) as counter:
                singer.write_records(table, data)
                counter.increment(len(data))

            LOGGER.info('Synced page {} for {}'.format(page, self.TABLE))
            page += 1

            if _next:
                params['offset'] = _next
                self.state = singer.bookmarks.write_bookmark(self.state, table, "offset", _next)
                self.state = singer.bookmarks.write_bookmark(self.state, table, "next_page", page)
                # Save the last_record bookmark when we're paginating to make sure we pick up there if interrupted
                self.state = singer.bookmarks.write_bookmark(self.state, table, "last_record",
                                                             updated_after.isoformat())
                save_state(self.state)
            else:
                finished_paginating = True

        transformer.log_warning()
        self.state = singer.bookmarks.clear_bookmark(self.state, table, "offset")
        self.state = singer.bookmarks.clear_bookmark(self.state, table, "next_page")
        save_state(self.state)