def sync_data(self): cursor = request('Campaign', FuelSDK.ET_Campaign, self.auth_stub) for campaign in cursor: campaign = self.filter_keys_and_parse(campaign) singer.write_records(self.__class__.TABLE, [campaign])
def _get_fields(self, extensions): to_return = extensions.copy() result = request('DataExtensionField', FuelSDK.ET_DataExtension_Column, self.auth_stub) for field in result: extension_id = field.DataExtension.CustomerKey field = sudsobj_to_dict(field) field_name = field['Name'] if field.get('IsPrimaryKey'): to_return = _merge_in(to_return, [extension_id, 'key_properties'], field_name) field_schema = { 'type': [ 'null', _convert_extension_datatype(str(field.get('FieldType'))) ], 'description': str(field.get('Description')), } to_return = set_in( to_return, [extension_id, 'schema', 'properties', field_name], field_schema) return to_return
def pull_subscribers_batch(self, subscriber_keys): if not subscriber_keys: return table = self.__class__.TABLE _filter = {} if len(subscriber_keys) == 1: _filter = { 'Property': 'SubscriberKey', 'SimpleOperator': 'equals', 'Value': subscriber_keys[0] } elif len(subscriber_keys) > 1: _filter = { 'Property': 'SubscriberKey', 'SimpleOperator': 'IN', 'Value': subscriber_keys } else: LOGGER.info('Got empty set of subscriber keys, moving on') return stream = request('Subscriber', FuelSDK.ET_Subscriber, self.auth_stub, _filter) for subscriber in stream: subscriber = self.filter_keys_and_parse(subscriber) subscriber = self.remove_sensitive_data(subscriber) singer.write_records(table, [subscriber])
def sync_data(self): table = self.__class__.TABLE selector = FuelSDK.ET_ContentArea search_filter = None retrieve_all_since = get_last_record_value_for_table(self.state, table) if retrieve_all_since is not None: search_filter = { 'Property': 'ModifiedDate', 'SimpleOperator': 'greaterThan', 'Value': retrieve_all_since } stream = request('ContentAreaDataAccessObject', selector, self.auth_stub, search_filter) for content_area in stream: content_area = self.filter_keys_and_parse(content_area) self.state = incorporate(self.state, table, 'ModifiedDate', content_area.get('ModifiedDate')) singer.write_records(table, [content_area]) save_state(self.state)
def sync_data(self): table = self.__class__.TABLE selector = FuelSDK.ET_List search_filter = None retrieve_all_since = get_last_record_value_for_table( self.state, table, self.config.get('start_date')) if retrieve_all_since is not None: search_filter = { 'Property': 'ModifiedDate', 'SimpleOperator': 'greaterThan', 'Value': retrieve_all_since } stream = request('List', selector, self.auth_stub, search_filter) for _list in stream: _list = self.filter_keys_and_parse(_list) self.state = incorporate(self.state, table, 'ModifiedDate', _list.get('ModifiedDate')) singer.write_records(table, [_list]) save_state(self.state)
def sync_data(self): table = self.__class__.TABLE selector = FuelSDK.ET_Send search_filter = None # pass config to return start date if not bookmark is found retrieve_all_since = get_last_record_value_for_table( self.state, table, self.config) if retrieve_all_since is not None: search_filter = { 'Property': 'ModifiedDate', 'SimpleOperator': 'greaterThan', 'Value': retrieve_all_since } stream = request('Send', selector, self.auth_stub, search_filter, batch_size=self.batch_size) catalog_copy = copy.deepcopy(self.catalog) for send in stream: send = self.filter_keys_and_parse(send) self.state = incorporate(self.state, table, 'ModifiedDate', send.get('ModifiedDate')) self.write_records_with_transform(send, catalog_copy, table) save_state(self.state)
def sync_data(self): table = self.__class__.TABLE selector = FuelSDK.ET_Email search_filter = None retrieve_all_since = get_last_record_value_for_table(self.state, table) if retrieve_all_since is not None: search_filter = { 'Property': 'ModifiedDate', 'SimpleOperator': 'greaterThan', 'Value': retrieve_all_since } stream = request('Email', selector, self.auth_stub, search_filter) for email in stream: email = self.filter_keys_and_parse(email) self.state = incorporate(self.state, table, 'ModifiedDate', email.get('ModifiedDate')) singer.write_records(table, [email]) save_state(self.state)
def sync_data(self): table = self.__class__.TABLE subscriber_dao = SubscriberDataAccessObject(self.config, self.state, self.auth_stub, self.subscriber_catalog) # pass config to return start date if not bookmark is found start = get_last_record_value_for_table(self.state, table, self.config) pagination_unit = self.config.get( 'pagination__list_subscriber_interval_unit', 'days') pagination_quantity = self.config.get( 'pagination__list_subscriber_interval_quantity', 1) unit = {pagination_unit: int(pagination_quantity)} end = increment_date(start, unit) all_subscribers_list = self._get_all_subscribers_list() while before_now(start): stream = request('ListSubscriber', FuelSDK.ET_List_Subscriber, self.auth_stub, _get_list_subscriber_filter( all_subscribers_list, start, unit), batch_size=self.batch_size) batch_size = 100 if self.replicate_subscriber: subscriber_dao.write_schema() catalog_copy = copy.deepcopy(self.catalog) for list_subscribers_batch in partition_all(stream, batch_size): for list_subscriber in list_subscribers_batch: list_subscriber = self.filter_keys_and_parse( list_subscriber) if list_subscriber.get('ModifiedDate'): self.state = incorporate( self.state, table, 'ModifiedDate', list_subscriber.get('ModifiedDate')) self.write_records_with_transform(list_subscriber, catalog_copy, table) if self.replicate_subscriber: # make the list of subscriber keys subscriber_keys = list( map(_get_subscriber_key, list_subscribers_batch)) # pass the list of 'subscriber_keys' to fetch subscriber details subscriber_dao.pull_subscribers_batch(subscriber_keys) save_state(self.state) start = end end = increment_date(start, unit)
def sync_data(self): table = self.__class__.TABLE endpoints = { 'sent': FuelSDK.ET_SentEvent, 'click': FuelSDK.ET_ClickEvent, 'open': FuelSDK.ET_OpenEvent, 'bounce': FuelSDK.ET_BounceEvent, 'unsub': FuelSDK.ET_UnsubEvent } for event_name, selector in endpoints.items(): search_filter = None start = get_last_record_value_for_table(self.state, event_name) if start is None: start = self.config.get('start_date') if start is None: raise RuntimeError('start_date not defined!') pagination_unit = self.config.get( 'pagination__{}_interval_unit'.format(event_name), 'minutes') pagination_quantity = self.config.get( 'pagination__{}_interval_quantity'.format(event_name), 10) unit = {pagination_unit: int(pagination_quantity)} end = increment_date(start, unit) while before_now(start): LOGGER.info("Fetching {} from {} to {}" .format(event_name, start, end)) search_filter = get_date_page('EventDate', start, unit) stream = request(event_name, selector, self.auth_stub, search_filter) for event in stream: event = self.filter_keys_and_parse(event) self.state = incorporate(self.state, event_name, 'EventDate', event.get('EventDate')) singer.write_records(table, [event]) self.state = incorporate(self.state, event_name, 'EventDate', start) save_state(self.state) start = end end = increment_date(start, unit)
def sync_data(self): table = self.__class__.TABLE subscriber_dao = SubscriberDataAccessObject(self.config, self.state, self.auth_stub, self.subscriber_catalog) start = get_last_record_value_for_table(self.state, table, self.config.get('start_date')) if start is None: start = self.config.get('start_date') pagination_unit = self.config.get( 'pagination__list_subscriber_interval_unit', 'days') pagination_quantity = self.config.get( 'pagination__list_subsctiber_interval_quantity', 1) unit = {pagination_unit: int(pagination_quantity)} end = increment_date(start, unit) all_subscribers_list = self._get_all_subscribers_list() while before_now(start): stream = request( 'ListSubscriber', FuelSDK.ET_List_Subscriber, self.auth_stub, _get_list_subscriber_filter(all_subscribers_list, start, unit)) batch_size = 100 if self.replicate_subscriber: subscriber_dao.write_schema() for list_subscribers_batch in partition_all(stream, batch_size): for list_subscriber in list_subscribers_batch: list_subscriber = self.filter_keys_and_parse( list_subscriber) if list_subscriber.get('ModifiedDate'): self.state = incorporate( self.state, table, 'ModifiedDate', list_subscriber.get('ModifiedDate')) list_subscriber = self.remove_sensitive_data( list_subscriber) singer.write_records(table, [list_subscriber]) if self.replicate_subscriber: subscriber_keys = list( map(_get_subscriber_key, list_subscribers_batch)) subscriber_dao.pull_subscribers_batch(subscriber_keys) save_state(self.state) start = end end = increment_date(start, unit)
def sync_data(self): table = self.__class__.TABLE search_filter = None start = get_last_record_value_for_table(self.state, self.event_name, self.config.get('start_date')) if start is None: start = self.config.get('start_date') if start is None: raise RuntimeError('start_date not defined!') pagination_unit = self.config.get( 'pagination__{}_interval_unit'.format(self.event_name), 'minutes') pagination_quantity = self.config.get( 'pagination__{}_interval_quantity'.format(self.event_name), 10) unit = {pagination_unit: int(pagination_quantity)} end = increment_date(start, unit) while before_now(start): LOGGER.info("Fetching {} from {} to {}".format( self.event_name, start, end)) search_filter = get_date_page('EventDate', start, unit) stream = request(self.event_name, self.selector, self.auth_stub, search_filter) for event in stream: event = self.filter_keys_and_parse(event) self.state = incorporate(self.state, self.event_name, 'EventDate', event.get('EventDate')) if event.get('SubscriberKey') is None: LOGGER.info( "SubscriberKey is NULL so ignoring {} record with SendID: {} and EventDate: {}" .format(self.event_name, event.get('SendID'), event.get('EventDate'))) continue event = self.remove_sensitive_data(event) singer.write_records(table, [event]) self.state = incorporate(self.state, self.event_name, 'EventDate', start) save_state(self.state) start = end end = increment_date(start, unit)
def _get_extensions(self): result = request('DataExtension', FuelSDK.ET_DataExtension, self.auth_stub, props=['CustomerKey', 'Name'], batch_size=int(self.config.get('batch_size', 2500))) to_return = {} for extension in result: extension_name = str(extension.Name) customer_key = str(extension.CustomerKey) to_return[customer_key] = { 'tap_stream_id': 'data_extension.{}'.format(customer_key), 'stream': 'data_extension.{}'.format(extension_name), 'key_properties': ['_CustomObjectKey'], 'schema': { 'type': 'object', 'properties': { '_CustomObjectKey': { 'type': ['null', 'string'], 'description': ('Hidden auto-incrementing primary ' 'key for data extension rows.'), }, 'CategoryID': { 'type': ['null', 'integer'], 'description': ('Specifies the identifier of the ' 'folder. (Taken from the parent ' 'data extension.)') } } }, 'metadata': [{ 'breadcrumb': (), 'metadata': { 'inclusion': 'available' } }, { 'breadcrumb': ('properties', '_CustomObjectKey'), 'metadata': { 'inclusion': 'available' } }, { 'breadcrumb': ('properties', 'CategoryID'), 'metadata': { 'inclusion': 'available' } }] } return to_return
def pull_subscribers_batch(self, subscriber_keys): table = self.__class__.TABLE stream = request( 'Subscriber', FuelSDK.ET_Subscriber, self.auth_stub, { 'Property': 'SubscriberKey', 'SimpleOperator': 'IN', 'Value': subscriber_keys }) for subscriber in stream: subscriber = self.filter_keys_and_parse(subscriber) singer.write_records(table, [subscriber])
def sync_data(self): cursor = request( 'Campaign', FuelSDK.ET_Campaign, self.auth_stub, # use $pageSize and $page in the props for # this stream as it calls using REST API props={"$pageSize": self.batch_size, "$page": 1, "page": 1}) catalog_copy = copy.deepcopy(self.catalog) for campaign in cursor: campaign = self.filter_keys_and_parse(campaign) self.write_records_with_transform(campaign, catalog_copy, self.TABLE)
def sync_data(self): table = self.__class__.TABLE selector = FuelSDK.ET_Send list_sends_dao = ListSendDataAccessObject(self.config, self.state, self.auth_stub, self.listsend_catalog) link_sends_dao = LinkSendDataAccessObject(self.config, self.state, self.auth_stub, self.linksend_catalog) search_filter = None retrieve_all_since = get_last_record_value_for_table( self.state, table, self.config.get('start_date')) if self.REPLICATION_METHOD == "FULL_TABLE": retrieve_all_since = datetime.datetime.strptime( self.config.get('start_date'), "%Y-%m-%dT%H:%M:%SZ") if retrieve_all_since is not None: search_filter = { 'Property': 'CreatedDate', 'SimpleOperator': 'greaterThan', 'Value': retrieve_all_since } stream = request('Send', selector, self.auth_stub, search_filter) if self.replicate_listsend: list_sends_dao.write_schema() if self.replicate_linksend: link_sends_dao.write_schema() for send in stream: send = self.filter_keys_and_parse(send) if self.replicate_listsend: list_sends_dao.sync_data_by_sendID(send.get('ID')) if self.replicate_linksend: link_sends_dao.sync_data_by_sendID(send.get('ID')) if retrieve_all_since.strftime('%Y-%m-%d') < send.get( 'CreatedDate' )[: 10] and self.REPLICATION_METHOD == 'INCREMENTAL' or self.REPLICATION_METHOD == 'FULL_TABLE': self.state = incorporate(self.state, table, 'CreatedDate', send.get('CreatedDate')) singer.write_records(table, [send]) save_state(self.state)
def sync_data(self): table = self.__class__.TABLE selector = FuelSDK.ET_ListSend # making this endpoint as FULL_TABLE, as 'ModifiedDate' is not retrievable as discussed # here: https://salesforce.stackexchange.com/questions/354332/not-getting-modifieddate-for-listsend-endpoint stream = request('ListSend', selector, self.auth_stub, batch_size=self.batch_size) catalog_copy = copy.deepcopy(self.catalog) for list_send in stream: list_send = self.filter_keys_and_parse(list_send) self.write_records_with_transform(list_send, catalog_copy, table)
def _get_all_subscribers_list(self): """ Find the 'All Subscribers' list via the SOAP API, and return it. """ result = request('List', FuelSDK.ET_List, self.auth_stub, { 'Property': 'ListName', 'SimpleOperator': 'equals', 'Value': 'All Subscribers', }) lists = list(result) if len(lists) != 1: msg = ('Found {} all subscriber lists, expected one!' .format(len(lists))) raise RuntimeError(msg) return sudsobj_to_dict(lists[0])
def _get_fields(self, extensions): to_return = extensions.copy() result = request( 'DataExtensionField', FuelSDK.ET_DataExtension_Column, self.auth_stub) for field in result: extension_id = field.DataExtension.CustomerKey field = sudsobj_to_dict(field) field_name = field['Name'] if field.get('IsPrimaryKey'): to_return = _merge_in( to_return, [extension_id, 'key_properties'], field_name) field_schema = { 'type': [ 'null', _convert_extension_datatype(str(field.get('FieldType'))) ], 'description': str(field.get('Description')), } to_return = set_in( to_return, [extension_id, 'schema', 'properties', field_name], field_schema) # These fields are defaulted into the schema, do not add to metadata again. if field_name not in {'_CustomObjectKey', 'CategoryID'}: to_return[extension_id]['metadata'].append({ 'breadcrumb': ('properties', field_name), 'metadata': {'inclusion': 'available'} }) return to_return
def _get_extensions(self): result = request('DataExtension', FuelSDK.ET_DataExtension, self.auth_stub, props=['CustomerKey', 'Name']) to_return = {} for extension in result: extension_name = str(extension.Name) customer_key = str(extension.CustomerKey) to_return[customer_key] = { 'tap_stream_id': 'data_extension.{}'.format(customer_key), 'stream': 'data_extension.{}'.format(extension_name), 'key_properties': ['_CustomObjectKey'], 'schema': { 'type': 'object', 'inclusion': 'available', 'selected-by-default': False, 'properties': { '_CustomObjectKey': { 'type': 'string', 'description': ('Hidden auto-incrementing primary ' 'key for data extension rows.'), }, 'CategoryID': { 'type': 'integer', 'description': ('Specifies the identifier of the ' 'folder. (Taken from the parent ' 'data extension.)') } } }, } return to_return
def sync_data_by_sendID(self, sendId): if not sendId: return table = self.__class__.TABLE _filter = {} if sendId: _filter = { 'Property': 'SendID', 'SimpleOperator': 'equals', 'Value': sendId } else: LOGGER.info('No send id here, moving on') return stream = request(self.__class__.TABLE, ET_LinkSend, self.auth_stub, _filter) for link_send in stream: link_send = self.filter_keys_and_parse(link_send) singer.write_records(table, [link_send])
def sync_data(self): table = self.__class__.TABLE endpoints = { 'sent': FuelSDK.ET_SentEvent, 'click': FuelSDK.ET_ClickEvent, 'open': FuelSDK.ET_OpenEvent, 'bounce': FuelSDK.ET_BounceEvent, 'unsub': FuelSDK.ET_UnsubEvent } for event_name, selector in endpoints.items(): search_filter = None # pass config to return start date if not bookmark is found start = get_last_record_value_for_table(self.state, event_name, self.config) if start is None: raise RuntimeError('start_date not defined!') pagination_unit = self.config.get( 'pagination__{}_interval_unit'.format(event_name), 'minutes') pagination_quantity = self.config.get( 'pagination__{}_interval_quantity'.format(event_name), 10) unit = {pagination_unit: int(pagination_quantity)} end = increment_date(start, unit) while before_now(start): LOGGER.info("Fetching {} from {} to {}" .format(event_name, start, end)) search_filter = get_date_page('EventDate', start, unit) stream = request(event_name, selector, self.auth_stub, search_filter, batch_size=self.batch_size) catalog_copy = copy.deepcopy(self.catalog) for event in stream: event = self.filter_keys_and_parse(event) self.state = incorporate(self.state, event_name, 'EventDate', event.get('EventDate')) if event.get('SubscriberKey') is None: LOGGER.info("SubscriberKey is NULL so ignoring {} record with SendID: {} and EventDate: {}" .format(event_name, event.get('SendID'), event.get('EventDate'))) continue self.write_records_with_transform(event, catalog_copy, table) self.state = incorporate(self.state, event_name, 'EventDate', start) save_state(self.state) start = end end = increment_date(start, unit)
def sync_data(self): tap_stream_id = self.catalog.get('tap_stream_id') table = self.catalog.get('stream') (_, customer_key) = tap_stream_id.split('.', 1) keys = self.get_catalog_keys() keys.remove('CategoryID') replication_key = None start = get_last_record_value_for_table(self.state, table) if start is None: start = self.config.get('start_date') for key in ['ModifiedDate', 'JoinDate']: if key in keys: replication_key = key pagination_unit = self.config.get( 'pagination__data_extension_interval_unit', 'days') pagination_quantity = self.config.get( 'pagination__data_extension_interval_quantity', 7) unit = {pagination_unit: int(pagination_quantity)} end = increment_date(start, unit) parent_result = None parent_extension = None parent_result = request('DataExtension', FuelSDK.ET_DataExtension, self.auth_stub, search_filter={ 'Property': 'CustomerKey', 'SimpleOperator': 'equals', 'Value': customer_key, }, props=['CustomerKey', 'CategoryID']) parent_extension = next(parent_result) parent_category_id = parent_extension.CategoryID while before_now(start) or replication_key is None: self._replicate(customer_key, keys, parent_category_id, table, partial=(replication_key is not None), start=start, end=end, unit=unit, replication_key=replication_key) if replication_key is None: return self.state = incorporate(self.state, table, replication_key, start) save_state(self.state) start = end end = increment_date(start, unit)