def sync(self): key_properties = self.catalog.get('key_properties') table = self.TABLE singer.write_schema( self.catalog.get('stream'), self.catalog.get('schema'), key_properties=key_properties) start = self.get_start_date(table) end = start interval = timedelta(hours=6) LOGGER.info('Syncing unsubscribes.') while end < datetime.now(pytz.utc): self.login() start = end end = start + interval LOGGER.info("Fetching unsubscribes from {} to {}".format( start, end)) hasMore = True _filter = self.make_filter(start, end) pageNumber = 1 field_selector = get_field_selector( self.catalog.get('schema')) while hasMore: self.login() LOGGER.info("... page {}".format(pageNumber)) results = self.client.service.readUnsubscribes( _filter, pageNumber) pageNumber = pageNumber + 1 singer.write_records( table, [field_selector(suds.sudsobject.asdict(result)) for result in results]) LOGGER.info("... {} results".format(len(results))) if len(results) == 0: hasMore = False self.state = incorporate( self.state, table, 'start_date', start.isoformat()) save_state(self.state) LOGGER.info("Done syncing unsubscribes.")
def sync(self): key_properties = self.catalog.get('key_properties') table = self.TABLE singer.write_schema(self.catalog.get('stream'), self.catalog.get('schema'), key_properties=key_properties) hasMore = True pageNumber = 1 field_selector = get_field_selector(self.catalog, self.catalog.get('schema')) LOGGER.info('Syncing lists.') _filter = self.make_filter() while hasMore: LOGGER.info("... page {}".format(pageNumber)) try: results = self.client.service.readLists(filter=_filter, pageNumber=pageNumber, pageSize=5000) except Fault as e: if '103' in e.message: LOGGER.warn( "Got signed out - logging in again and retrying") self.login() continue else: raise LOGGER.info("... {} results".format(len(results))) pageNumber = pageNumber + 1 singer.write_records(table, [ field_selector( zeep.helpers.serialize_object(result, target_cls=dict)) for result in results ]) if len(results) == 0: hasMore = False LOGGER.info("Done syncing lists.")
def sync(self): key_properties = self.catalog.get('key_properties') table = self.TABLE singer.write_schema(self.catalog.get('stream'), self.catalog.get('schema'), key_properties=key_properties) self.login() hasMore = True pageNumber = 1 field_selector = get_field_selector(self.catalog.get('schema')) LOGGER.info('Syncing lists.') while hasMore: self.login() LOGGER.info("... page {}".format(pageNumber)) results = self.client.service.readLists( 1, # weird hack -- this just happens to work if we # pass 1 as the filter. Other values like None # did not work pageNumber, 5000) LOGGER.info("... {} results".format(len(results))) pageNumber = pageNumber + 1 singer.write_records(table, [ field_selector(suds.sudsobject.asdict(result)) for result in results ]) if len(results) == 0: hasMore = False LOGGER.info("Done syncing lists.")
def sync(self): key_properties = self.catalog.get('key_properties') table = self.TABLE singer.write_schema(self.catalog.get('stream'), self.catalog.get('schema'), key_properties=key_properties) start = self.get_start_date(table) end = start interval = timedelta(hours=1) LOGGER.info('Syncing outbound activities.') while end < datetime.now(pytz.utc): self.login() start = end end = start + interval LOGGER.info("Fetching activities from {} to {}".format(start, end)) _filter = self.make_filter(start, end) field_selector = get_field_selector(self.catalog.get('schema')) hasMore = True while hasMore: try: results = \ self.client.service.readRecentOutboundActivities( _filter) except suds.WebFault as e: if '116' in e.fault.faultstring: hasMore = False break else: raise result_dicts = [ suds.sudsobject.asdict(result) for result in results ] parsed_results = [ field_selector(result) for result in result_dicts ] for result in parsed_results: ids = [ 'createdDate', 'activityType', 'contactId', 'listId', 'segmentId', 'keywordId', 'messageId' ] result['id'] = hashlib.md5('|'.join( filter( identity, project( result, ids).values())).encode('utf-8')).hexdigest() singer.write_records(table, parsed_results) LOGGER.info('... {} results'.format(len(results))) _filter.readDirection = 'NEXT' if len(results) == 0: hasMore = False self.state = incorporate(self.state, table, 'createdDate', start.replace(microsecond=0).isoformat()) save_state(self.state) LOGGER.info('Done syncing outbound activities.')
def sync(self): key_properties = self.catalog.get('key_properties') table = self.TABLE singer.write_schema(self.catalog.get('stream'), self.catalog.get('schema'), key_properties=key_properties) field_selector = get_field_selector(self.catalog, self.catalog.get('schema')) includeGeoIpData = self.any_selected([ 'geoIPCity', 'geoIPStateRegion', 'geoIPZip', 'geoIPCountry', 'geoIPCountryCode' ]) includeTechnologyData = self.any_selected([ 'primaryBrowser', 'mobileBrowser', 'primaryEmailClient' 'mobileEmailClient', 'operatingSystem' ]) includeRFMData = self.any_selected([ 'firstOrderDate', 'lastOrderDate', 'lastOrderTotal' 'totalOrders', 'totalRevenue', 'averageOrderValue' ]) includeEngagementData = self.any_selected( ['lastDeliveryDate', 'lastOpenDate', 'lastClickDate']) if includeGeoIpData: LOGGER.info('Including GEOIP data.') if includeTechnologyData: LOGGER.info('Including technology data.') if includeRFMData: LOGGER.info('Including RFM data.') if includeEngagementData: LOGGER.info('Including engagement data.') LOGGER.info('Syncing contacts.') start = self.get_start_date(table) end = start interval = timedelta(hours=6) def flatten(item): read_only_data = item.get('readOnlyContactData', {}) or {} item.pop('readOnlyContactData', None) return dict(item, **read_only_data) while end < datetime.now(pytz.utc): start = end end = start + interval LOGGER.info("Fetching contacts modified from {} to {}".format( start, end)) _filter = self.make_filter(start, end) pageNumber = 1 hasMore = True while hasMore: retry_count = 0 try: results = self.client.service.readContacts( filter=_filter, includeLists=True, fields=[], pageNumber=pageNumber, includeSMSKeywords=True, includeGeoIPData=includeGeoIpData, includeTechnologyData=includeTechnologyData, includeRFMData=includeRFMData, includeEngagementData=includeEngagementData) except socket.timeout: retry_count += 1 if retry_count >= 5: LOGGER.error( "Retried more than five times, moving on!") raise LOGGER.warn("Timeout caught, retrying request") continue except Fault as e: if '103' in e.message: LOGGER.warn( "Got signed out - logging in again and retrying") self.login() continue else: raise LOGGER.info("... {} results".format(len(results))) extraction_time = singer.utils.now() for result in results: result_dict = zeep.helpers.serialize_object( result, target_cls=dict) flattened = flatten(result_dict) singer.write_record(table, field_selector(flattened), time_extracted=extraction_time) if len(results) == 0: hasMore = False pageNumber = pageNumber + 1 self.state = incorporate(self.state, table, self.REPLICATION_KEY, start.replace(microsecond=0).isoformat()) save_state(self.state) LOGGER.info("Done syncing contacts.")
def sync(self): key_properties = self.catalog.get('key_properties') table = self.TABLE singer.write_schema(self.catalog.get('stream'), self.catalog.get('schema'), key_properties=key_properties) start = self.get_start_date(table) end = start interval = timedelta(hours=6) LOGGER.info('Syncing unsubscribes.') while end < datetime.now(pytz.utc): start = end end = start + interval LOGGER.info("Fetching unsubscribes from {} to {}".format( start, end)) hasMore = True _filter = self.make_filter(start, end) pageNumber = 1 field_selector = get_field_selector(self.catalog, self.catalog.get('schema')) while hasMore: LOGGER.info("... page {}".format(pageNumber)) try: results = self.client.service.readUnsubscribes( filter=_filter, pageNumber=pageNumber) pageNumber = pageNumber + 1 singer.write_records(table, [ field_selector( zeep.helpers.serialize_object(result, target_cls=dict)) for result in results ]) except Fault as e: if '103' in e.message: LOGGER.warn( "Got signed out - logging in again and retrying") self.login() continue else: raise LOGGER.info("... {} results".format(len(results))) if len(results) == 0: hasMore = False self.state = incorporate(self.state, table, self.REPLICATION_KEY, start.isoformat()) save_state(self.state) LOGGER.info("Done syncing unsubscribes.")