Ejemplo n.º 1
0
    def sync_data(self):
        cursor = request('Campaign', FuelSDK.ET_Campaign, self.auth_stub)

        for campaign in cursor:
            campaign = self.filter_keys_and_parse(campaign)

            singer.write_records(self.__class__.TABLE, [campaign])
    def _get_fields(self, extensions):
        to_return = extensions.copy()

        result = request('DataExtensionField', FuelSDK.ET_DataExtension_Column,
                         self.auth_stub)

        for field in result:
            extension_id = field.DataExtension.CustomerKey
            field = sudsobj_to_dict(field)
            field_name = field['Name']

            if field.get('IsPrimaryKey'):
                to_return = _merge_in(to_return,
                                      [extension_id, 'key_properties'],
                                      field_name)

            field_schema = {
                'type': [
                    'null',
                    _convert_extension_datatype(str(field.get('FieldType')))
                ],
                'description':
                str(field.get('Description')),
            }

            to_return = set_in(
                to_return, [extension_id, 'schema', 'properties', field_name],
                field_schema)

        return to_return
Ejemplo n.º 3
0
    def pull_subscribers_batch(self, subscriber_keys):
        if not subscriber_keys:
            return

        table = self.__class__.TABLE
        _filter = {}

        if len(subscriber_keys) == 1:
            _filter = {
                'Property': 'SubscriberKey',
                'SimpleOperator': 'equals',
                'Value': subscriber_keys[0]
            }

        elif len(subscriber_keys) > 1:
            _filter = {
                'Property': 'SubscriberKey',
                'SimpleOperator': 'IN',
                'Value': subscriber_keys
            }
        else:
            LOGGER.info('Got empty set of subscriber keys, moving on')
            return

        stream = request('Subscriber', FuelSDK.ET_Subscriber, self.auth_stub,
                         _filter)

        for subscriber in stream:
            subscriber = self.filter_keys_and_parse(subscriber)
            subscriber = self.remove_sensitive_data(subscriber)
            singer.write_records(table, [subscriber])
Ejemplo n.º 4
0
    def sync_data(self):
        table = self.__class__.TABLE
        selector = FuelSDK.ET_ContentArea

        search_filter = None
        retrieve_all_since = get_last_record_value_for_table(self.state, table)

        if retrieve_all_since is not None:
            search_filter = {
                'Property': 'ModifiedDate',
                'SimpleOperator': 'greaterThan',
                'Value': retrieve_all_since
            }

        stream = request('ContentAreaDataAccessObject', selector,
                         self.auth_stub, search_filter)

        for content_area in stream:
            content_area = self.filter_keys_and_parse(content_area)

            self.state = incorporate(self.state, table, 'ModifiedDate',
                                     content_area.get('ModifiedDate'))

            singer.write_records(table, [content_area])

        save_state(self.state)
Ejemplo n.º 5
0
    def sync_data(self):
        table = self.__class__.TABLE
        selector = FuelSDK.ET_List

        search_filter = None
        retrieve_all_since = get_last_record_value_for_table(
            self.state, table, self.config.get('start_date'))

        if retrieve_all_since is not None:
            search_filter = {
                'Property': 'ModifiedDate',
                'SimpleOperator': 'greaterThan',
                'Value': retrieve_all_since
            }

        stream = request('List', selector, self.auth_stub, search_filter)

        for _list in stream:
            _list = self.filter_keys_and_parse(_list)

            self.state = incorporate(self.state, table, 'ModifiedDate',
                                     _list.get('ModifiedDate'))

            singer.write_records(table, [_list])

        save_state(self.state)
Ejemplo n.º 6
0
    def sync_data(self):
        table = self.__class__.TABLE
        selector = FuelSDK.ET_Send

        search_filter = None

        # pass config to return start date if not bookmark is found
        retrieve_all_since = get_last_record_value_for_table(
            self.state, table, self.config)

        if retrieve_all_since is not None:
            search_filter = {
                'Property': 'ModifiedDate',
                'SimpleOperator': 'greaterThan',
                'Value': retrieve_all_since
            }

        stream = request('Send',
                         selector,
                         self.auth_stub,
                         search_filter,
                         batch_size=self.batch_size)

        catalog_copy = copy.deepcopy(self.catalog)

        for send in stream:
            send = self.filter_keys_and_parse(send)

            self.state = incorporate(self.state, table, 'ModifiedDate',
                                     send.get('ModifiedDate'))

            self.write_records_with_transform(send, catalog_copy, table)

        save_state(self.state)
Ejemplo n.º 7
0
    def sync_data(self):
        table = self.__class__.TABLE
        selector = FuelSDK.ET_Email

        search_filter = None
        retrieve_all_since = get_last_record_value_for_table(self.state, table)

        if retrieve_all_since is not None:
            search_filter = {
                'Property': 'ModifiedDate',
                'SimpleOperator': 'greaterThan',
                'Value': retrieve_all_since
            }

        stream = request('Email', selector, self.auth_stub, search_filter)

        for email in stream:
            email = self.filter_keys_and_parse(email)

            self.state = incorporate(self.state, table, 'ModifiedDate',
                                     email.get('ModifiedDate'))

            singer.write_records(table, [email])

        save_state(self.state)
Ejemplo n.º 8
0
    def sync_data(self):
        table = self.__class__.TABLE
        subscriber_dao = SubscriberDataAccessObject(self.config, self.state,
                                                    self.auth_stub,
                                                    self.subscriber_catalog)

        # pass config to return start date if not bookmark is found
        start = get_last_record_value_for_table(self.state, table, self.config)

        pagination_unit = self.config.get(
            'pagination__list_subscriber_interval_unit', 'days')
        pagination_quantity = self.config.get(
            'pagination__list_subscriber_interval_quantity', 1)

        unit = {pagination_unit: int(pagination_quantity)}

        end = increment_date(start, unit)

        all_subscribers_list = self._get_all_subscribers_list()

        while before_now(start):
            stream = request('ListSubscriber',
                             FuelSDK.ET_List_Subscriber,
                             self.auth_stub,
                             _get_list_subscriber_filter(
                                 all_subscribers_list, start, unit),
                             batch_size=self.batch_size)

            batch_size = 100

            if self.replicate_subscriber:
                subscriber_dao.write_schema()

            catalog_copy = copy.deepcopy(self.catalog)

            for list_subscribers_batch in partition_all(stream, batch_size):
                for list_subscriber in list_subscribers_batch:
                    list_subscriber = self.filter_keys_and_parse(
                        list_subscriber)

                    if list_subscriber.get('ModifiedDate'):
                        self.state = incorporate(
                            self.state, table, 'ModifiedDate',
                            list_subscriber.get('ModifiedDate'))

                    self.write_records_with_transform(list_subscriber,
                                                      catalog_copy, table)

                if self.replicate_subscriber:
                    # make the list of subscriber keys
                    subscriber_keys = list(
                        map(_get_subscriber_key, list_subscribers_batch))

                    # pass the list of 'subscriber_keys' to fetch subscriber details
                    subscriber_dao.pull_subscribers_batch(subscriber_keys)

            save_state(self.state)

            start = end
            end = increment_date(start, unit)
Ejemplo n.º 9
0
    def sync_data(self):
        table = self.__class__.TABLE
        endpoints = {
            'sent': FuelSDK.ET_SentEvent,
            'click': FuelSDK.ET_ClickEvent,
            'open': FuelSDK.ET_OpenEvent,
            'bounce': FuelSDK.ET_BounceEvent,
            'unsub': FuelSDK.ET_UnsubEvent
        }

        for event_name, selector in endpoints.items():
            search_filter = None

            start = get_last_record_value_for_table(self.state, event_name)

            if start is None:
                start = self.config.get('start_date')

            if start is None:
                raise RuntimeError('start_date not defined!')

            pagination_unit = self.config.get(
                'pagination__{}_interval_unit'.format(event_name), 'minutes')
            pagination_quantity = self.config.get(
                'pagination__{}_interval_quantity'.format(event_name), 10)

            unit = {pagination_unit: int(pagination_quantity)}

            end = increment_date(start, unit)

            while before_now(start):
                LOGGER.info("Fetching {} from {} to {}"
                            .format(event_name, start, end))

                search_filter = get_date_page('EventDate', start, unit)

                stream = request(event_name,
                                 selector,
                                 self.auth_stub,
                                 search_filter)

                for event in stream:
                    event = self.filter_keys_and_parse(event)

                    self.state = incorporate(self.state,
                                             event_name,
                                             'EventDate',
                                             event.get('EventDate'))

                    singer.write_records(table, [event])

                self.state = incorporate(self.state,
                                         event_name,
                                         'EventDate',
                                         start)

                save_state(self.state)

                start = end
                end = increment_date(start, unit)
Ejemplo n.º 10
0
    def sync_data(self):
        table = self.__class__.TABLE
        subscriber_dao = SubscriberDataAccessObject(self.config, self.state,
                                                    self.auth_stub,
                                                    self.subscriber_catalog)

        start = get_last_record_value_for_table(self.state, table,
                                                self.config.get('start_date'))

        if start is None:
            start = self.config.get('start_date')

        pagination_unit = self.config.get(
            'pagination__list_subscriber_interval_unit', 'days')
        pagination_quantity = self.config.get(
            'pagination__list_subsctiber_interval_quantity', 1)

        unit = {pagination_unit: int(pagination_quantity)}

        end = increment_date(start, unit)

        all_subscribers_list = self._get_all_subscribers_list()

        while before_now(start):
            stream = request(
                'ListSubscriber', FuelSDK.ET_List_Subscriber, self.auth_stub,
                _get_list_subscriber_filter(all_subscribers_list, start, unit))

            batch_size = 100

            if self.replicate_subscriber:
                subscriber_dao.write_schema()

            for list_subscribers_batch in partition_all(stream, batch_size):
                for list_subscriber in list_subscribers_batch:
                    list_subscriber = self.filter_keys_and_parse(
                        list_subscriber)

                    if list_subscriber.get('ModifiedDate'):
                        self.state = incorporate(
                            self.state, table, 'ModifiedDate',
                            list_subscriber.get('ModifiedDate'))
                    list_subscriber = self.remove_sensitive_data(
                        list_subscriber)
                    singer.write_records(table, [list_subscriber])

                if self.replicate_subscriber:
                    subscriber_keys = list(
                        map(_get_subscriber_key, list_subscribers_batch))

                    subscriber_dao.pull_subscribers_batch(subscriber_keys)

            save_state(self.state)

            start = end
            end = increment_date(start, unit)
Ejemplo n.º 11
0
    def sync_data(self):
        table = self.__class__.TABLE

        search_filter = None

        start = get_last_record_value_for_table(self.state, self.event_name,
                                                self.config.get('start_date'))

        if start is None:
            start = self.config.get('start_date')

        if start is None:
            raise RuntimeError('start_date not defined!')

        pagination_unit = self.config.get(
            'pagination__{}_interval_unit'.format(self.event_name), 'minutes')
        pagination_quantity = self.config.get(
            'pagination__{}_interval_quantity'.format(self.event_name), 10)

        unit = {pagination_unit: int(pagination_quantity)}

        end = increment_date(start, unit)

        while before_now(start):
            LOGGER.info("Fetching {} from {} to {}".format(
                self.event_name, start, end))

            search_filter = get_date_page('EventDate', start, unit)

            stream = request(self.event_name, self.selector, self.auth_stub,
                             search_filter)

            for event in stream:
                event = self.filter_keys_and_parse(event)

                self.state = incorporate(self.state, self.event_name,
                                         'EventDate', event.get('EventDate'))

                if event.get('SubscriberKey') is None:
                    LOGGER.info(
                        "SubscriberKey is NULL so ignoring {} record with SendID: {} and EventDate: {}"
                        .format(self.event_name, event.get('SendID'),
                                event.get('EventDate')))
                    continue
                event = self.remove_sensitive_data(event)
                singer.write_records(table, [event])

            self.state = incorporate(self.state, self.event_name, 'EventDate',
                                     start)

            save_state(self.state)

            start = end
            end = increment_date(start, unit)
Ejemplo n.º 12
0
    def _get_extensions(self):
        result = request('DataExtension',
                         FuelSDK.ET_DataExtension,
                         self.auth_stub,
                         props=['CustomerKey', 'Name'],
                         batch_size=int(self.config.get('batch_size', 2500)))

        to_return = {}

        for extension in result:
            extension_name = str(extension.Name)
            customer_key = str(extension.CustomerKey)

            to_return[customer_key] = {
                'tap_stream_id':
                'data_extension.{}'.format(customer_key),
                'stream':
                'data_extension.{}'.format(extension_name),
                'key_properties': ['_CustomObjectKey'],
                'schema': {
                    'type': 'object',
                    'properties': {
                        '_CustomObjectKey': {
                            'type': ['null', 'string'],
                            'description': ('Hidden auto-incrementing primary '
                                            'key for data extension rows.'),
                        },
                        'CategoryID': {
                            'type': ['null', 'integer'],
                            'description': ('Specifies the identifier of the '
                                            'folder. (Taken from the parent '
                                            'data extension.)')
                        }
                    }
                },
                'metadata': [{
                    'breadcrumb': (),
                    'metadata': {
                        'inclusion': 'available'
                    }
                }, {
                    'breadcrumb': ('properties', '_CustomObjectKey'),
                    'metadata': {
                        'inclusion': 'available'
                    }
                }, {
                    'breadcrumb': ('properties', 'CategoryID'),
                    'metadata': {
                        'inclusion': 'available'
                    }
                }]
            }

        return to_return
Ejemplo n.º 13
0
    def pull_subscribers_batch(self, subscriber_keys):
        table = self.__class__.TABLE
        stream = request(
            'Subscriber', FuelSDK.ET_Subscriber, self.auth_stub, {
                'Property': 'SubscriberKey',
                'SimpleOperator': 'IN',
                'Value': subscriber_keys
            })

        for subscriber in stream:
            subscriber = self.filter_keys_and_parse(subscriber)

            singer.write_records(table, [subscriber])
Ejemplo n.º 14
0
    def sync_data(self):
        cursor = request(
            'Campaign',
            FuelSDK.ET_Campaign,
            self.auth_stub,
            # use $pageSize and $page in the props for
            # this stream as it calls using REST API
            props={"$pageSize": self.batch_size, "$page": 1, "page": 1})

        catalog_copy = copy.deepcopy(self.catalog)

        for campaign in cursor:
            campaign = self.filter_keys_and_parse(campaign)

            self.write_records_with_transform(campaign, catalog_copy, self.TABLE)
Ejemplo n.º 15
0
    def sync_data(self):
        table = self.__class__.TABLE
        selector = FuelSDK.ET_Send
        list_sends_dao = ListSendDataAccessObject(self.config, self.state,
                                                  self.auth_stub,
                                                  self.listsend_catalog)
        link_sends_dao = LinkSendDataAccessObject(self.config, self.state,
                                                  self.auth_stub,
                                                  self.linksend_catalog)
        search_filter = None
        retrieve_all_since = get_last_record_value_for_table(
            self.state, table, self.config.get('start_date'))
        if self.REPLICATION_METHOD == "FULL_TABLE":
            retrieve_all_since = datetime.datetime.strptime(
                self.config.get('start_date'), "%Y-%m-%dT%H:%M:%SZ")

        if retrieve_all_since is not None:
            search_filter = {
                'Property': 'CreatedDate',
                'SimpleOperator': 'greaterThan',
                'Value': retrieve_all_since
            }

        stream = request('Send', selector, self.auth_stub, search_filter)

        if self.replicate_listsend:
            list_sends_dao.write_schema()

        if self.replicate_linksend:
            link_sends_dao.write_schema()

        for send in stream:
            send = self.filter_keys_and_parse(send)
            if self.replicate_listsend:
                list_sends_dao.sync_data_by_sendID(send.get('ID'))
            if self.replicate_linksend:
                link_sends_dao.sync_data_by_sendID(send.get('ID'))
            if retrieve_all_since.strftime('%Y-%m-%d') < send.get(
                    'CreatedDate'
            )[:
              10] and self.REPLICATION_METHOD == 'INCREMENTAL' or self.REPLICATION_METHOD == 'FULL_TABLE':
                self.state = incorporate(self.state, table, 'CreatedDate',
                                         send.get('CreatedDate'))

                singer.write_records(table, [send])

        save_state(self.state)
Ejemplo n.º 16
0
    def sync_data(self):
        table = self.__class__.TABLE
        selector = FuelSDK.ET_ListSend

        # making this endpoint as FULL_TABLE, as 'ModifiedDate' is not retrievable as discussed
        # here: https://salesforce.stackexchange.com/questions/354332/not-getting-modifieddate-for-listsend-endpoint
        stream = request('ListSend',
                         selector,
                         self.auth_stub,
                         batch_size=self.batch_size)

        catalog_copy = copy.deepcopy(self.catalog)

        for list_send in stream:
            list_send = self.filter_keys_and_parse(list_send)

            self.write_records_with_transform(list_send, catalog_copy, table)
Ejemplo n.º 17
0
    def _get_all_subscribers_list(self):
        """
        Find the 'All Subscribers' list via the SOAP API, and return it.
        """
        result = request('List', FuelSDK.ET_List, self.auth_stub, {
            'Property': 'ListName',
            'SimpleOperator': 'equals',
            'Value': 'All Subscribers',
        })

        lists = list(result)

        if len(lists) != 1:
            msg = ('Found {} all subscriber lists, expected one!'
                   .format(len(lists)))
            raise RuntimeError(msg)

        return sudsobj_to_dict(lists[0])
Ejemplo n.º 18
0
    def _get_fields(self, extensions):
        to_return = extensions.copy()

        result = request(
            'DataExtensionField',
            FuelSDK.ET_DataExtension_Column,
            self.auth_stub)

        for field in result:
            extension_id = field.DataExtension.CustomerKey
            field = sudsobj_to_dict(field)
            field_name = field['Name']

            if field.get('IsPrimaryKey'):
                to_return = _merge_in(
                    to_return,
                    [extension_id, 'key_properties'],
                    field_name)

            field_schema = {
                'type': [
                    'null',
                    _convert_extension_datatype(str(field.get('FieldType')))
                ],
                'description': str(field.get('Description')),
            }

            to_return = set_in(
                to_return,
                [extension_id, 'schema', 'properties', field_name],
                field_schema)

            # These fields are defaulted into the schema, do not add to metadata again.
            if field_name not in {'_CustomObjectKey', 'CategoryID'}:
                to_return[extension_id]['metadata'].append({
                    'breadcrumb': ('properties', field_name),
                    'metadata': {'inclusion': 'available'}
                })

        return to_return
Ejemplo n.º 19
0
    def _get_extensions(self):
        result = request('DataExtension',
                         FuelSDK.ET_DataExtension,
                         self.auth_stub,
                         props=['CustomerKey', 'Name'])

        to_return = {}

        for extension in result:
            extension_name = str(extension.Name)
            customer_key = str(extension.CustomerKey)

            to_return[customer_key] = {
                'tap_stream_id': 'data_extension.{}'.format(customer_key),
                'stream': 'data_extension.{}'.format(extension_name),
                'key_properties': ['_CustomObjectKey'],
                'schema': {
                    'type': 'object',
                    'inclusion': 'available',
                    'selected-by-default': False,
                    'properties': {
                        '_CustomObjectKey': {
                            'type':
                            'string',
                            'description': ('Hidden auto-incrementing primary '
                                            'key for data extension rows.'),
                        },
                        'CategoryID': {
                            'type':
                            'integer',
                            'description': ('Specifies the identifier of the '
                                            'folder. (Taken from the parent '
                                            'data extension.)')
                        }
                    }
                },
            }

        return to_return
Ejemplo n.º 20
0
    def sync_data_by_sendID(self, sendId):
        if not sendId:
            return

        table = self.__class__.TABLE
        _filter = {}

        if sendId:
            _filter = {
                'Property': 'SendID',
                'SimpleOperator': 'equals',
                'Value': sendId
            }
        else:
            LOGGER.info('No send id here, moving on')
            return

        stream = request(self.__class__.TABLE, ET_LinkSend, self.auth_stub,
                         _filter)
        for link_send in stream:
            link_send = self.filter_keys_and_parse(link_send)
            singer.write_records(table, [link_send])
Ejemplo n.º 21
0
    def sync_data(self):
        table = self.__class__.TABLE
        endpoints = {
            'sent': FuelSDK.ET_SentEvent,
            'click': FuelSDK.ET_ClickEvent,
            'open': FuelSDK.ET_OpenEvent,
            'bounce': FuelSDK.ET_BounceEvent,
            'unsub': FuelSDK.ET_UnsubEvent
        }

        for event_name, selector in endpoints.items():
            search_filter = None

            # pass config to return start date if not bookmark is found
            start = get_last_record_value_for_table(self.state, event_name, self.config)

            if start is None:
                raise RuntimeError('start_date not defined!')

            pagination_unit = self.config.get(
                'pagination__{}_interval_unit'.format(event_name), 'minutes')
            pagination_quantity = self.config.get(
                'pagination__{}_interval_quantity'.format(event_name), 10)

            unit = {pagination_unit: int(pagination_quantity)}

            end = increment_date(start, unit)

            while before_now(start):
                LOGGER.info("Fetching {} from {} to {}"
                            .format(event_name, start, end))

                search_filter = get_date_page('EventDate', start, unit)

                stream = request(event_name,
                                 selector,
                                 self.auth_stub,
                                 search_filter,
                                 batch_size=self.batch_size)

                catalog_copy = copy.deepcopy(self.catalog)

                for event in stream:
                    event = self.filter_keys_and_parse(event)

                    self.state = incorporate(self.state,
                                             event_name,
                                             'EventDate',
                                             event.get('EventDate'))

                    if event.get('SubscriberKey') is None:
                        LOGGER.info("SubscriberKey is NULL so ignoring {} record with SendID: {} and EventDate: {}"
                                    .format(event_name,
                                            event.get('SendID'),
                                            event.get('EventDate')))
                        continue

                    self.write_records_with_transform(event, catalog_copy, table)

                self.state = incorporate(self.state,
                                         event_name,
                                         'EventDate',
                                         start)

                save_state(self.state)

                start = end
                end = increment_date(start, unit)
Ejemplo n.º 22
0
    def sync_data(self):
        tap_stream_id = self.catalog.get('tap_stream_id')
        table = self.catalog.get('stream')
        (_, customer_key) = tap_stream_id.split('.', 1)

        keys = self.get_catalog_keys()
        keys.remove('CategoryID')

        replication_key = None

        start = get_last_record_value_for_table(self.state, table)

        if start is None:
            start = self.config.get('start_date')

        for key in ['ModifiedDate', 'JoinDate']:
            if key in keys:
                replication_key = key

        pagination_unit = self.config.get(
            'pagination__data_extension_interval_unit', 'days')
        pagination_quantity = self.config.get(
            'pagination__data_extension_interval_quantity', 7)

        unit = {pagination_unit: int(pagination_quantity)}

        end = increment_date(start, unit)

        parent_result = None
        parent_extension = None
        parent_result = request('DataExtension',
                                FuelSDK.ET_DataExtension,
                                self.auth_stub,
                                search_filter={
                                    'Property': 'CustomerKey',
                                    'SimpleOperator': 'equals',
                                    'Value': customer_key,
                                },
                                props=['CustomerKey', 'CategoryID'])

        parent_extension = next(parent_result)
        parent_category_id = parent_extension.CategoryID

        while before_now(start) or replication_key is None:
            self._replicate(customer_key,
                            keys,
                            parent_category_id,
                            table,
                            partial=(replication_key is not None),
                            start=start,
                            end=end,
                            unit=unit,
                            replication_key=replication_key)

            if replication_key is None:
                return

            self.state = incorporate(self.state, table, replication_key, start)

            save_state(self.state)

            start = end
            end = increment_date(start, unit)