Esempio n. 1
0
def do_sync(args):
    LOGGER.info("Starting sync.")

    config = load_config(args.config)
    state = load_state(args.state)
    catalog = load_catalog(args.properties)

    stream_accessors = []

    for stream_catalog in catalog.get('streams'):
        stream_accessor = None

        if not is_selected(stream_catalog):
            LOGGER.info("'{}' is not marked selected, skipping.".format(
                stream_catalog.get('stream')))
            continue

        for available_stream_accessor in AVAILABLE_STREAM_ACCESSORS:
            if available_stream_accessor.matches_catalog(stream_catalog):
                stream_accessors.append(
                    available_stream_accessor(config, state, stream_catalog))

                break

    for stream_accessor in stream_accessors:
        try:
            stream_accessor.state = state
            stream_accessor.sync()
            state = stream_accessor.state

        except Exception as exception:
            LOGGER.error(exception)
            LOGGER.error('Failed to sync endpoint, moving on!')

    save_state(state)
Esempio n. 2
0
    def sync(self):
        key_properties = self.catalog.get('key_properties')
        table = self.TABLE

        singer.write_schema(
            self.catalog.get('stream'),
            self.catalog.get('schema'),
            key_properties=key_properties)

        start = self.get_start_date(table)
        end = start
        interval = timedelta(hours=6)

        LOGGER.info('Syncing unsubscribes.')

        while end < datetime.now(pytz.utc):
            self.login()
            start = end
            end = start + interval
            LOGGER.info("Fetching unsubscribes from {} to {}".format(
                start, end))

            hasMore = True
            _filter = self.make_filter(start, end)
            pageNumber = 1

            field_selector = get_field_selector(
                self.catalog.get('schema'))

            while hasMore:
                self.login()
                LOGGER.info("... page {}".format(pageNumber))
                results = self.client.service.readUnsubscribes(
                    _filter, pageNumber)
                pageNumber = pageNumber + 1

                singer.write_records(
                    table,
                    [field_selector(suds.sudsobject.asdict(result))
                     for result in results])

                LOGGER.info("... {} results".format(len(results)))

                if len(results) == 0:
                    hasMore = False

                self.state = incorporate(
                    self.state,
                    table,
                    'start_date',
                    start.isoformat())

                save_state(self.state)

        LOGGER.info("Done syncing unsubscribes.")
Esempio n. 3
0
    def sync(self):
        key_properties = self.catalog.get('key_properties')
        table = self.TABLE

        singer.write_schema(self.catalog.get('stream'),
                            self.catalog.get('schema'),
                            key_properties=key_properties)

        start = self.get_start_date(table)
        end = start
        interval = timedelta(hours=1)

        LOGGER.info('Syncing outbound activities.')

        while end < datetime.now(pytz.utc):
            self.login()
            start = end
            end = start + interval
            LOGGER.info("Fetching activities from {} to {}".format(start, end))

            _filter = self.make_filter(start, end)
            field_selector = get_field_selector(self.catalog.get('schema'))

            hasMore = True

            while hasMore:
                try:
                    results = \
                        self.client.service.readRecentOutboundActivities(
                            _filter)
                except suds.WebFault as e:
                    if '116' in e.fault.faultstring:
                        hasMore = False
                        break
                    else:
                        raise

                result_dicts = [
                    suds.sudsobject.asdict(result) for result in results
                ]

                parsed_results = [
                    field_selector(result) for result in result_dicts
                ]

                for result in parsed_results:
                    ids = [
                        'createdDate', 'activityType', 'contactId', 'listId',
                        'segmentId', 'keywordId', 'messageId'
                    ]

                    result['id'] = hashlib.md5('|'.join(
                        filter(
                            identity,
                            project(
                                result,
                                ids).values())).encode('utf-8')).hexdigest()

                singer.write_records(table, parsed_results)

                LOGGER.info('... {} results'.format(len(results)))

                _filter.readDirection = 'NEXT'

                if len(results) == 0:
                    hasMore = False

            self.state = incorporate(self.state, table, 'createdDate',
                                     start.replace(microsecond=0).isoformat())

            save_state(self.state)

        LOGGER.info('Done syncing outbound activities.')
Esempio n. 4
0
    def sync(self):
        key_properties = self.catalog.get('key_properties')
        table = self.TABLE

        singer.write_schema(self.catalog.get('stream'),
                            self.catalog.get('schema'),
                            key_properties=key_properties)

        field_selector = get_field_selector(self.catalog,
                                            self.catalog.get('schema'))

        includeGeoIpData = self.any_selected([
            'geoIPCity', 'geoIPStateRegion', 'geoIPZip', 'geoIPCountry',
            'geoIPCountryCode'
        ])

        includeTechnologyData = self.any_selected([
            'primaryBrowser', 'mobileBrowser', 'primaryEmailClient'
            'mobileEmailClient', 'operatingSystem'
        ])

        includeRFMData = self.any_selected([
            'firstOrderDate', 'lastOrderDate', 'lastOrderTotal'
            'totalOrders', 'totalRevenue', 'averageOrderValue'
        ])

        includeEngagementData = self.any_selected(
            ['lastDeliveryDate', 'lastOpenDate', 'lastClickDate'])

        if includeGeoIpData:
            LOGGER.info('Including GEOIP data.')

        if includeTechnologyData:
            LOGGER.info('Including technology data.')

        if includeRFMData:
            LOGGER.info('Including RFM data.')

        if includeEngagementData:
            LOGGER.info('Including engagement data.')

        LOGGER.info('Syncing contacts.')

        start = self.get_start_date(table)
        end = start
        interval = timedelta(hours=6)

        def flatten(item):
            read_only_data = item.get('readOnlyContactData', {}) or {}
            item.pop('readOnlyContactData', None)
            return dict(item, **read_only_data)

        while end < datetime.now(pytz.utc):
            start = end
            end = start + interval
            LOGGER.info("Fetching contacts modified from {} to {}".format(
                start, end))

            _filter = self.make_filter(start, end)

            pageNumber = 1
            hasMore = True
            while hasMore:
                retry_count = 0
                try:
                    results = self.client.service.readContacts(
                        filter=_filter,
                        includeLists=True,
                        fields=[],
                        pageNumber=pageNumber,
                        includeSMSKeywords=True,
                        includeGeoIPData=includeGeoIpData,
                        includeTechnologyData=includeTechnologyData,
                        includeRFMData=includeRFMData,
                        includeEngagementData=includeEngagementData)

                except socket.timeout:
                    retry_count += 1
                    if retry_count >= 5:
                        LOGGER.error(
                            "Retried more than five times, moving on!")
                        raise
                    LOGGER.warn("Timeout caught, retrying request")
                    continue
                except Fault as e:
                    if '103' in e.message:
                        LOGGER.warn(
                            "Got signed out - logging in again and retrying")
                        self.login()
                        continue
                    else:
                        raise

                LOGGER.info("... {} results".format(len(results)))
                extraction_time = singer.utils.now()
                for result in results:
                    result_dict = zeep.helpers.serialize_object(
                        result, target_cls=dict)
                    flattened = flatten(result_dict)
                    singer.write_record(table,
                                        field_selector(flattened),
                                        time_extracted=extraction_time)

                if len(results) == 0:
                    hasMore = False

                pageNumber = pageNumber + 1

            self.state = incorporate(self.state, table, self.REPLICATION_KEY,
                                     start.replace(microsecond=0).isoformat())

            save_state(self.state)

        LOGGER.info("Done syncing contacts.")
Esempio n. 5
0
    def sync(self):
        key_properties = self.catalog.get('key_properties')
        table = self.TABLE

        singer.write_schema(self.catalog.get('stream'),
                            self.catalog.get('schema'),
                            key_properties=key_properties)

        start = self.get_start_date(table)
        end = start
        interval = timedelta(hours=6)

        LOGGER.info('Syncing unsubscribes.')

        while end < datetime.now(pytz.utc):
            start = end
            end = start + interval
            LOGGER.info("Fetching unsubscribes from {} to {}".format(
                start, end))

            hasMore = True
            _filter = self.make_filter(start, end)
            pageNumber = 1

            field_selector = get_field_selector(self.catalog,
                                                self.catalog.get('schema'))

            while hasMore:
                LOGGER.info("... page {}".format(pageNumber))
                try:
                    results = self.client.service.readUnsubscribes(
                        filter=_filter, pageNumber=pageNumber)
                    pageNumber = pageNumber + 1

                    singer.write_records(table, [
                        field_selector(
                            zeep.helpers.serialize_object(result,
                                                          target_cls=dict))
                        for result in results
                    ])
                except Fault as e:
                    if '103' in e.message:
                        LOGGER.warn(
                            "Got signed out - logging in again and retrying")
                        self.login()
                        continue
                    else:
                        raise

                LOGGER.info("... {} results".format(len(results)))

                if len(results) == 0:
                    hasMore = False

                self.state = incorporate(self.state,
                                         table, self.REPLICATION_KEY,
                                         start.isoformat())

                save_state(self.state)

        LOGGER.info("Done syncing unsubscribes.")