Python write_records Exemples, singer.write_records Python Exemples

Exemple #1

0

Afficher le fichier

def sync_ad_groups(client, account_id, campaign_ids, selected_streams):
    ad_group_ids = []
    for campaign_id in campaign_ids:
        response = client.GetAdGroupsByCampaignId(CampaignId=campaign_id)
        response_dict = sobject_to_dict(response)

        if 'AdGroup' in response_dict:
            ad_groups = sobject_to_dict(response)['AdGroup']

            if 'ad_groups' in selected_streams:
                LOGGER.info('Syncing AdGroups for Account: %s, Campaign: %s',
                            account_id, campaign_id)
                selected_fields = get_selected_fields(
                    selected_streams['ad_groups'])
                singer.write_schema('ad_groups',
                                    get_core_schema(client, 'AdGroup'), ['Id'])
                with metrics.record_counter('ad_groups') as counter:
                    singer.write_records(
                        'ad_groups',
                        filter_selected_fields_many(selected_fields,
                                                    ad_groups))
                    counter.increment(len(ad_groups))

            ad_group_ids += list(map(lambda x: x['Id'], ad_groups))
    return ad_group_ids

Exemple #2

0

Afficher le fichier

Fichier : emails.py Projet : timvisher/tap-exacttarget

    def sync_data(self):
        table = self.__class__.TABLE
        selector = FuelSDK.ET_Email

        search_filter = None
        retrieve_all_since = get_last_record_value_for_table(self.state, table)

        if retrieve_all_since is not None:
            search_filter = {
                'Property': 'ModifiedDate',
                'SimpleOperator': 'greaterThan',
                'Value': retrieve_all_since
            }

        stream = request('Email', selector, self.auth_stub, search_filter)

        for email in stream:
            email = self.filter_keys_and_parse(email)

            self.state = incorporate(self.state, table, 'ModifiedDate',
                                     email.get('ModifiedDate'))

            singer.write_records(table, [email])

        save_state(self.state)

Exemple #3

0

Afficher le fichier

def sync_table_file(config, s3_file, table_spec, schema):
    logger.info('Syncing file "{}".'.format(s3_file))

    bucket = config['bucket']
    table_name = table_spec['name']

    iterator = tap_s3_csv.format_handler.get_row_iterator(
        config, table_spec, s3_file)

    records_synced = 0

    for row in iterator:
        metadata = {
            '_s3_source_bucket': bucket,
            '_s3_source_file': s3_file,

            # index zero, +1 for header row
            '_s3_source_lineno': records_synced + 2
        }

        try:
            to_write = [{**conversion.convert_row(row, schema), **metadata}]
            singer.write_records(table_name, to_write)
        except BrokenPipeError as bpe:
            logger.error(
                f'Pipe to loader broke after {records_synced} records were written from {s3_file}: troubled line was {row}'
            )
            raise bpe

        records_synced += 1

    return records_synced

Exemple #4

0

Afficher le fichier

Fichier : base.py Projet : goes-funky/tap-amazon-advertising

    def sync_data(self):
        table = self.TABLE
        LOGGER.info('Syncing data for entity {}'.format(table))

        url = self.get_url(self.api_path)
        body = self.get_body()

        index = 0
        count = 5000
        while True:
            LOGGER.info('Syncing {} rows from index {}'.format(count, index))

            params = self.get_params(index, count)
            result = self.client.make_request(url,
                                              self.API_METHOD,
                                              params=params,
                                              body=body)

            data = self.get_stream_data(result)
            if len(data) == 0:
                break
            else:
                index += count

            with singer.metrics.record_counter(endpoint=table) as counter:
                for obj in data:
                    singer.write_records(table, [obj])

                    counter.increment()

        return self.state

Exemple #5

0

Afficher le fichier

def do_sync(base, start_date, api_key):
    logger.info('Replicating exchange rate data from fixer.io starting from {}'.format(start_date))
    singer.write_schema('exchange_rate', schema, 'date')

    state = {'start_date': start_date}
    next_date = start_date
    
    try:
        while True:
            response = request(base_url + next_date, {'base': base, 'access_key': api_key})
            payload = response.json()

            if datetime.strptime(next_date, DATE_FORMAT) > datetime.utcnow():
                break
            else:
                singer.write_records('exchange_rate', [parse_response(payload)])
                state = {'start_date': next_date}
                next_date = (datetime.strptime(next_date, DATE_FORMAT) + timedelta(days=1)).strftime(DATE_FORMAT)

    except requests.exceptions.RequestException as e:
        logger.fatal('Error on ' + e.request.url +
                     '; received status ' + str(e.response.status_code) +
                     ': ' + e.response.text)
        singer.write_state(state)
        sys.exit(-1)

    singer.write_state(state)
    logger.info('Tap exiting normally')

Exemple #6

0

Afficher le fichier

Fichier : __init__.py Projet : source-medium/tap-bold

def sync(config, state, catalog):
    """ Sync data from tap source """
    # Loop over selected streams in catalog
    for stream in catalog.get_selected_streams(state):
        LOGGER.info("Syncing stream:" + stream.tap_stream_id)

        bookmark_column = stream.replication_key
        is_sorted = False

        singer.write_schema(
            stream_name=stream.tap_stream_id,
            schema=stream.schema.to_dict(),
            key_properties=stream.key_properties,
        )

        max_bookmark = None
        for row in tap_data(config=config, stream=stream):
            transformed = transform(row)
            singer.write_records(stream.tap_stream_id, [transformed])
            if bookmark_column:
                if is_sorted:
                    # update bookmark to latest value
                    singer.write_state(
                        {stream.tap_stream_id: row[bookmark_column]})
                else:
                    # if data unsorted, save max value until end of writes
                    max_bookmark = max(max_bookmark, row[bookmark_column])
        if bookmark_column and not is_sorted:
            singer.write_state({stream.tap_stream_id: max_bookmark})
    return

Exemple #7

0

Afficher le fichier

 def write_metadados(self):
     schema = {
         'properties': {
             "id": {
                 "type": "integer"
             },
             "nome": {
                 "type": "string"
             },
             "URL": {
                 "type": "string"
             },
             "pesquisa": {
                 "type": "string"
             },
             "assunto": {
                 "type": "string"
             },
             "periodicidade_frequencia": {
                 "type": "string"
             },
             "periodicidade_inicio": {
                 "type": "integer"
             },
             "periodicidade_fim": {
                 "type": "integer"
             },
             "timestamp": {
                 "type": "string",
                 "format": "date-time"
             },
         }
     }
     singer.write_schema('metadados_agregados', schema, ['id'])
     singer.write_records('metadados_agregados', self.metadados)

Exemple #8

0

Afficher le fichier

    def sync_data(self, return_ids=False):
        table = self.TABLE

        response = self.client.make_request(self.get_url(), 'GET')

        # In development, the fastest way to decrease iteration time is to
        # slice the following data down to something very small like 10.
        #
        # all_technicians = self.get_stream_data(response)[:10]
        all_technicians = self.get_stream_data(response)

        if not return_ids:
            with singer.metrics.record_counter(endpoint=table) as counter:
                for obj in all_technicians:
                    singer.write_records(table, [obj])

                    counter.increment()

        technician_ids = sorted(
            [technician.get('nodeid') for technician in all_technicians])

        if return_ids:
            return technician_ids

        for substream in self.substreams:
            substream.state = self.state
            LOGGER.info("Syncing {}".format(substream.TABLE))
            substream.sync_data(parent_ids=technician_ids)
            self.state = substream.state

Exemple #9

0

Afficher le fichier

    def sync_data(self):
        table = self.TABLE

        LOGGER.info('Syncing data for {}'.format(table))
        url = self.get_url()
        params = self.get_params()
        body = self.get_body()

        while True:
            response = self.client.make_request(url,
                                                self.API_METHOD,
                                                params=params,
                                                body=body)
            transformed = self.get_stream_data(response)

            with singer.metrics.record_counter(endpoint=table) as counter:
                singer.write_records(table, transformed)
                counter.increment(len(transformed))

            page_number = body['page_number']
            LOGGER.info('Synced page {} for {}'.format(page_number, table))

            if len(transformed) == 0:
                break

            body['page_number'] += 1
            self.save_state(transformed[-1])

        return self.state

Exemple #10

0

Afficher le fichier

Fichier : content_areas.py Projet : timvisher/tap-exacttarget

    def sync_data(self):
        table = self.__class__.TABLE
        selector = FuelSDK.ET_ContentArea

        search_filter = None
        retrieve_all_since = get_last_record_value_for_table(self.state, table)

        if retrieve_all_since is not None:
            search_filter = {
                'Property': 'ModifiedDate',
                'SimpleOperator': 'greaterThan',
                'Value': retrieve_all_since
            }

        stream = request('ContentAreaDataAccessObject', selector,
                         self.auth_stub, search_filter)

        for content_area in stream:
            content_area = self.filter_keys_and_parse(content_area)

            self.state = incorporate(self.state, table, 'ModifiedDate',
                                     content_area.get('ModifiedDate'))

            singer.write_records(table, [content_area])

        save_state(self.state)

Exemple #11

0

Afficher le fichier

def sync(config, state, catalog):
    """ Sync data from tap source """
    # Loop over selected streams in catalog

    for stream in catalog.get_selected_streams(state):
        LOGGER.info('Syncing stream: %s', stream.tap_stream_id)

        bookmark_column = stream.replication_key
        is_sorted = True  # TODO: indicate whether data is sorted ascending on bookmark value

        singer.write_schema(stream_name=stream.tap_stream_id,
                            schema=stream.schema.to_dict(),
                            key_properties=stream.key_properties)

        max_bookmark = None
        for row in tap_data():
            # TODO: place type conversions or transformations here

            # write one or more rows to the stream:
            singer.write_records(stream.tap_stream_id, [row])
            if bookmark_column:
                if is_sorted:
                    # update bookmark to latest value
                    singer.write_state(
                        {stream.tap_stream_id: row[bookmark_column]})
                else:
                    # if data unsorted, save max value until end of writes
                    max_bookmark = max(max_bookmark, row[bookmark_column])
        if bookmark_column and not is_sorted:
            singer.write_state({stream.tap_stream_id: max_bookmark})
    return

Exemple #12

0

Afficher le fichier

Fichier : sync.py Projet : Phanatik/Python-Demo

def sync(config, state, catalog):
    LOGGER = singer.get_logger()
    ref = get_ref()
    """ Sync data from tap source """
    # Loop over selected streams in catalog
    for stream in catalog.get_selected_streams(state):
        LOGGER.info("Syncing stream: " + stream.tap_stream_id)

        bookmark_column = stream.replication_key
        #print(config)
        singer.write_schema(stream_name=stream.tap_stream_id, schema=stream.schema.to_dict(), key_properties=stream.key_properties)
        
        s_date = singer.get_bookmark(state, stream.tap_stream_id, "startdate")
        e_date = singer.get_bookmark(state, stream.tap_stream_id, "lastrun")
        offset = (pd.to_datetime(e_date) - pd.to_datetime(s_date)).days
        
        if offset <= 0:
            offset = 1
        
        tokens = {"accessTokenKey": config["token_key"], "accessTokenSecret": config["token_secret"]}
        query = {"datefiltermode": config["datefiltermode"], "filterorder": config["filterorder"], "dateinterval": config["interval"], "dateintervaloffset": offset }
        
        # TODO: delete and replace this inline function with your own data retrieval process:
        response = make_request(config, stream, tokens, query)
        
        tap_data, state = append_data(response, state, config, stream, tokens, query)
        for row in tap_data:
            # write one or more rows to the stream:
            singer.write_records(stream.tap_stream_id, [row])
        new_offset = query["dateintervaloffset"] - config["interval"]
        new_date = (datetime.now() - timedelta(new_offset)).strftime("%Y-%m-%d")
        state = write_bookmark(state, stream.tap_stream_id, "startdate", new_date)
        state = write_bookmark(state, stream.tap_stream_id, "lastrun", datetime.now().strftime("%Y-%m-%d"))
    state = singer.write_state(state)
    return

Exemple #13

0

Afficher le fichier

def get_incremental_pull_additional_properties(stream, endpoint, state,
                                               api_key, start_date):
    latest_event_time = get_starting_point_additional_properties(
        stream, state, start_date)
    with metrics.record_counter(stream['stream']) as counter:
        url = '{}{}/export'.format(endpoint, stream['tap_stream_id'])
        for response in get_all_additional_properties_using_next(
                stream['stream'], url, api_key, latest_event_time):
            events = response.json()
            if events:
                # latest_date = None
                for result in events.get('results'):
                    result['id'] = events.get('id')
                    result['data'][0]['segment'] = result['segment']
                    result['data'][0]['metric'] = events['metric']
                    counter.increment(1)
                    hash_object = hashlib.sha224(
                        (result['segment'] +
                         result['data'][0]['date']).encode('utf-8'))
                    result['data'][0]['id'] = hash_object.hexdigest()

                    singer.write_records(stream['stream'], result.get('data'))
                update_state(state, stream['stream'],
                             datetime.datetime.today().strftime(DATETIME_FMT))
                singer.write_state(state)

    return state

Exemple #14

0

Afficher le fichier

def write_file(target_filename, table_spec, schema):
    LOGGER.info('Syncing file "{}".'.format(target_filename))
    target_uri = table_spec['path'] + '/' + target_filename
    iterator = tap_spreadsheets_anywhere.format_handler.get_row_iterator(
        table_spec, target_uri)
    records_synced = 0

    for row in iterator:
        metadata = {
            '_smart_source_bucket': table_spec['path'],
            '_smart_source_file': target_filename,
            # index zero, +1 for header row
            '_smart_source_lineno': records_synced + 2
        }

        try:
            to_write = [{**conversion.convert_row(row, schema), **metadata}]
            singer.write_records(table_spec['name'], to_write)
        except BrokenPipeError as bpe:
            LOGGER.error(
                f'Pipe to loader broke after {records_synced} records were written from {target_filename}: troubled '
                f'line was {to_write[0]}')
            raise bpe

        records_synced += 1

    return records_synced

Exemple #15

0

Afficher le fichier

    def sync_records(self, request_config, end_date=None):
        table = self.TABLE
        raw_orders = self.client.fetch_orders(request_config)
        orders = self.get_stream_data(raw_orders)

        with singer.metrics.record_counter(endpoint=table) as counter:
            singer.write_records(table, orders)
            counter.increment(len(orders))

        if len(orders) > 0:
            state_key = 'LastUpdateDate'
            last_order = orders[-1]
            order_time = last_order[state_key]
            self.state = incorporate(self.state, self.TABLE, state_key,
                                     order_time)
            save_state(self.state)

            parsed = parse(order_time).date()
            if end_date is not None and parsed > end_date:
                LOGGER.info(
                    "Synced past the specified end_date ({}) - quitting".
                    format(parsed))
                return None, orders

        next_token = raw_orders.parsed.get('NextToken', {}).get('value')
        return next_token, orders

Exemple #16

0

Afficher le fichier

def get_characters():
  marvel_limit = 100
  offset = 0

#Format the time
  t = time.strftime("%Y%d%m%H%M%S")
  m = hashlib.md5()

  m.update("{}{}{}".format(t, CONFIG['private_key'], CONFIG['public_key']).encode("utf-8"))

  hash = m.hexdigest()

#Now let's get Singer going
  schema = {'type': 'object',
  'properties': 
    { 
    'id': {'type': 'integer'},
    'name': {'type': 'string'},
    'modified': {'type': 'string', 'format': 'date-time'}
    }}

  singer.write_schema('characters', schema, 'id')

#call the api and get records until there aren't anymore
  while True:
  	response = requests.get('https://gateway.marvel.com:443/v1/public/characters?orderBy=modified&apikey={}&ts={}&hash={}&limit={}&offset={}'.format(CONFIG['public_key'],t,hash,marvel_limit,offset))

  	body = response.json()['data']

  	singer.write_records('characters', body['results'])		
  	
  	offset = offset + marvel_limit

  	if body['count'] < marvel_limit:
           break

Exemple #17

0

Afficher le fichier

def sync_users(config, state):
    stream_id = 'users'
    api_key = config['api_key']
    sc = SlingClient(api_key)

    raw_users = sc.make_request('users')

    user_records = []
    for user in raw_users:
        record = {
            'id': str(user.get('id')) if user.get('id') else None,
            'type': user.get('type'),
            'name': user.get('name'),
            'last_name': user.get('lastname'),
            'avatar': user.get('avatar'),
            'email': user.get('email'),
            'timezone': user.get('timezone'),
            'hours_cap': user.get('hoursCap'),
            'active': user.get('active'),
            'deactivated_at': user.get('deactivatedAt'),
        }
        user_records.append(record)

    singer.write_records(stream_id, user_records)

    return state

Exemple #18

0

Afficher le fichier

    def sync_data(self):
        table = self.TABLE
        page = 1

        LOGGER.info('Syncing data for entity {} (page={})'.format(table, page))

        url = "{}{}".format(self.client.base_url, self.api_path)

        while True:
            params = self.get_params(page=page)
            body = self.get_body()

            result = self.client.make_request(url,
                                              self.API_METHOD,
                                              params=params,
                                              body=body)

            data = self.get_stream_data(result)

            with singer.metrics.record_counter(endpoint=table) as counter:
                for obj in data:
                    singer.write_records(table, [obj])

                    counter.increment()

            paging = result['paging']
            if page >= paging['totalPages']:
                break
            page += 1

        return self.state

Exemple #19

0

Afficher le fichier

Fichier : lists.py Projet : uptilab2/tap-exacttarget

    def sync_data(self):
        table = self.__class__.TABLE
        selector = FuelSDK.ET_List

        search_filter = None
        retrieve_all_since = get_last_record_value_for_table(
            self.state, table, self.config.get('start_date'))

        if retrieve_all_since is not None:
            search_filter = {
                'Property': 'ModifiedDate',
                'SimpleOperator': 'greaterThan',
                'Value': retrieve_all_since
            }

        stream = request('List', selector, self.auth_stub, search_filter)

        for _list in stream:
            _list = self.filter_keys_and_parse(_list)

            self.state = incorporate(self.state, table, 'ModifiedDate',
                                     _list.get('ModifiedDate'))

            singer.write_records(table, [_list])

        save_state(self.state)

Exemple #20

0

Afficher le fichier

Fichier : campaigns.py Projet : timvisher/tap-exacttarget

    def sync_data(self):
        cursor = request('Campaign', FuelSDK.ET_Campaign, self.auth_stub)

        for campaign in cursor:
            campaign = self.filter_keys_and_parse(campaign)

            singer.write_records(self.__class__.TABLE, [campaign])

Exemple #21

0

Afficher le fichier

Fichier : subscribers.py Projet : uptilab2/tap-exacttarget

    def pull_subscribers_batch(self, subscriber_keys):
        if not subscriber_keys:
            return

        table = self.__class__.TABLE
        _filter = {}

        if len(subscriber_keys) == 1:
            _filter = {
                'Property': 'SubscriberKey',
                'SimpleOperator': 'equals',
                'Value': subscriber_keys[0]
            }

        elif len(subscriber_keys) > 1:
            _filter = {
                'Property': 'SubscriberKey',
                'SimpleOperator': 'IN',
                'Value': subscriber_keys
            }
        else:
            LOGGER.info('Got empty set of subscriber keys, moving on')
            return

        stream = request('Subscriber', FuelSDK.ET_Subscriber, self.auth_stub,
                         _filter)

        for subscriber in stream:
            subscriber = self.filter_keys_and_parse(subscriber)
            subscriber = self.remove_sensitive_data(subscriber)
            singer.write_records(table, [subscriber])

Exemple #22

0

Afficher le fichier

Fichier : __init__.py Projet : uptilab2/tap-criteo

def sync(client, config, state, catalog):
    """ Sync data from tap source """
    # Loop over selected streams in catalog
    for stream in catalog.get_selected_streams(state):
        stream_id = stream.tap_stream_id
        logger.info("Syncing stream:" + stream_id)

        singer.write_schema(
            stream_name=stream_id,
            schema=stream.schema.to_dict(),
            key_properties=stream.key_properties,
        )

        yesterday = datetime.now() - timedelta(1)
        day = state.get(stream_id) or config.get('start_date')
        day = day and datetime.strptime(day, DATE_FORMAT) or yesterday

        while day <= yesterday:
            tap_data = client.request_report(stream, day)
            singer.write_records(stream_id, tap_data)
            state[stream_id] = day.strftime(DATE_FORMAT)
            singer.write_state(state)
            day += timedelta(1)

    return

Exemple #23

0

Afficher le fichier

Fichier : __init__.py Projet : jkcchan/singer-tap-template

def sync(config, state, catalog):
    """ Sync data from tap source """
    # Loop over selected streams in catalog
    for stream in catalog.get_selected_streams(state):
        LOGGER.info("Syncing stream:" + stream.tap_stream_id)

        bookmark_column = stream.replication_key
        is_sorted = True  # TODO: indicate whether data is sorted ascending on bookmark value

        singer.write_schema(
            stream_name=stream.tap_stream_id,
            schema=stream.schema.to_dict(),
            key_properties=stream.key_properties,
        )

        # TODO: delete and replace this inline function with your own data retrieval process:
        tap_data = lambda: [{"id": x, "name": f'row${x}'} for x in range(1000)]

        max_bookmark = None
        for row in tap_data():
            # TODO: place type conversions or transformations here

            # write one or more rows to the stream:
            singer.write_records(stream.tap_stream_id, [row])
            if bookmark_column:
                if is_sorted:
                    # update bookmark to latest value
                    singer.write_state(
                        {stream.tap_stream_id: row[bookmark_column]})
                else:
                    # if data unsorted, save max value until end of writes
                    max_bookmark = max(max_bookmark, row[bookmark_column])
        if bookmark_column and not is_sorted:
            singer.write_state({stream.tap_stream_id: max_bookmark})
    return

Exemple #24

0

Afficher le fichier

    def sync_paginated(self, url, params=None, async_session=None):
        table = self.TABLE
        _next = True
        page = 1

        all_resources = []
        transformer = singer.Transformer(singer.UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING)
        while _next is not None:
            result = self.client.make_request(url, self.API_METHOD, params=params)
            _next = result.get('next')
            data = self.get_stream_data(result['data'], transformer)

            with singer.metrics.record_counter(endpoint=table) as counter:
                singer.write_records(
                    table,
                    data)
                counter.increment(len(data))
                all_resources.extend(data)

            if _next:
                params['offset'] = _next

            LOGGER.info('Synced page {} for {}'.format(page, self.TABLE))
            page += 1
        transformer.log_warning()
        return all_resources

Exemple #25

0

Afficher le fichier

Fichier : tap_github.py Projet : dterror-zz/tap-github

def get_all_commits(repo_path, state):
    if 'commits' in state and state['commits'] is not None:
        query_string = '?since={}'.format(state['commits'])
    else:
        query_string = ''

    latest_commit_time = None

    with metrics.record_counter('commits') as counter:
        for response in authed_get_all_pages(
                'commits', 'https://api.github.com/repos/{}/commits{}'.format(
                    repo_path, query_string)):
            commits = response.json()

            for commit in commits:
                counter.increment()
                commit.pop('author', None)
                commit.pop('committer', None)

            singer.write_records('commits', commits)
            if not latest_commit_time:
                latest_commit_time = commits[0]['commit']['committer']['date']

    state['commits'] = latest_commit_time
    return state

Exemple #26

0

Afficher le fichier

def sync_accounts_stream(account_ids, catalog_item):
    selected_fields = get_selected_fields(catalog_item)
    accounts = []

    LOGGER.info('Initializing CustomerManagementService client - Loading WSDL')
    client = CustomServiceClient('CustomerManagementService')
    account_schema = get_core_schema(client, 'AdvertiserAccount')
    singer.write_schema('accounts', account_schema, ['Id'])

    for account_id in account_ids:
        # Loop over the multiple account_ids
        client = create_sdk_client('CustomerManagementService', account_id)
        # Get account data
        response = client.GetAccount(AccountId=account_id)
        accounts.append(sobject_to_dict(response))

    accounts_bookmark = singer.get_bookmark(STATE, 'accounts', 'last_record')
    if accounts_bookmark:
        accounts = list(
            filter(
                lambda x: x is not None and x['LastModifiedTime'] >=
                accounts_bookmark, accounts))

    max_accounts_last_modified = max([x['LastModifiedTime'] for x in accounts])

    with metrics.record_counter('accounts') as counter:
        # Write only selected fields
        singer.write_records(
            'accounts', filter_selected_fields_many(selected_fields, accounts))
        counter.increment(len(accounts))

    singer.write_bookmark(STATE, 'accounts', 'last_record',
                          max_accounts_last_modified)
    singer.write_state(STATE)

Exemple #27

0

Afficher le fichier

Fichier : __init__.py Projet : retentionscience/tap-s3-csv

def sync_table_file(config, s3_file, table_spec, schema):
    logger.info('Syncing file "{}".'.format(s3_file))

    bucket = config['bucket']
    table_name = table_spec['name']

    iterator = tap_s3_csv.format_handler.get_row_iterator(
        config, table_spec, s3_file)

    records_synced = 0

    for row in iterator:
        metadata = {
            '_s3_source_bucket': bucket,
            '_s3_source_file': s3_file,

            # index zero, +1 for header row
            '_s3_source_lineno': records_synced + 2
        }

        to_write = [{**conversion.convert_row(row, schema), **metadata}]
        singer.write_records(table_name, to_write)
        records_synced += 1

    return records_synced

Exemple #28

0

Afficher le fichier

Fichier : __init__.py Projet : JulesHuisman/tap-geosource

def sync(config, state, catalog):
    """ Sync data from tap source """
    geo_source = GeoSource(path=config['path'], config=config)

    for stream in catalog.get_selected_streams(state):
        # Fetch the layer from the geo source
        layer = geo_source.layers[stream.tap_stream_id]

        # Log some info about the stream
        LOGGER.info(
            f'Syncing stream: {stream.tap_stream_id} | Transformation: {str(layer.should_transform)}')

        singer.write_schema(
            stream_name=stream.tap_stream_id,
            schema=stream.schema.to_dict(),
            key_properties=stream.key_properties,
        )

        with record_counter(log_interval=10) as counter:
            for row in layer.features():
                # Write a row to the stream
                singer.write_records(stream.tap_stream_id, [row])

                # Log the records
                counter.increment()

Exemple #29

0

Afficher le fichier

    def sync_data(self):
        table = self.TABLE
        LOGGER.info('Syncing data for entity {}'.format(table))

        domain = self.get_domain()

        url = (
            'https://{domain}{api_path}'.format(
                domain=domain,
                api_path=self.api_path))

        params = self.get_params()
        body = self.get_body()

        result = self.client.make_request(
            url, self.API_METHOD, params=params, body=body)
        data = self.get_stream_data(result)

        with singer.metrics.record_counter(endpoint=table) as counter:
            for obj in data:
                singer.write_records(
                    table,
                    [obj])

                counter.increment()
        return self.state

Exemple #30

0

Afficher le fichier

Fichier : events.py Projet : stvhanna/tap-exacttarget-1

    def sync_data(self):
        table = self.__class__.TABLE
        endpoints = {
            'sent': FuelSDK.ET_SentEvent,
            'click': FuelSDK.ET_ClickEvent,
            'open': FuelSDK.ET_OpenEvent,
            'bounce': FuelSDK.ET_BounceEvent,
            'unsub': FuelSDK.ET_UnsubEvent
        }

        for event_name, selector in endpoints.items():
            search_filter = None

            start = get_last_record_value_for_table(self.state, event_name)

            if start is None:
                start = self.config.get('start_date')

            if start is None:
                raise RuntimeError('start_date not defined!')

            pagination_unit = self.config.get(
                'pagination__{}_interval_unit'.format(event_name), 'minutes')
            pagination_quantity = self.config.get(
                'pagination__{}_interval_quantity'.format(event_name), 10)

            unit = {pagination_unit: int(pagination_quantity)}

            end = increment_date(start, unit)

            while before_now(start):
                LOGGER.info("Fetching {} from {} to {}"
                            .format(event_name, start, end))

                search_filter = get_date_page('EventDate', start, unit)

                stream = request(event_name,
                                 selector,
                                 self.auth_stub,
                                 search_filter)

                for event in stream:
                    event = self.filter_keys_and_parse(event)

                    self.state = incorporate(self.state,
                                             event_name,
                                             'EventDate',
                                             event.get('EventDate'))

                    singer.write_records(table, [event])

                self.state = incorporate(self.state,
                                         event_name,
                                         'EventDate',
                                         start)

                save_state(self.state)

                start = end
                end = increment_date(start, unit)

Exemple #31

0

Afficher le fichier

Fichier : base.py Projet : nick-mccoy/tap-uservoice

    def sync_data_for_date(self, date, interval):
        LOGGER.info('Syncing data for {}'.format(date.isoformat()))
        table = self.TABLE

        updated_after = date
        updated_before = updated_after + interval
        cursor = None
        has_data = True
        page = 1

        while has_data:
            url = 'https://{}.uservoice.com{}'.format(
                self.config.get('subdomain'),
                self.API_PATH)

            result = self.client.fetch_data(
                url, updated_after, updated_before, cursor,
                endpoint=table)

            cursor = result.get('pagination', {}).get('cursor')
            total_pages = result.get('pagination', {}).get('total_pages')
            data = self.get_stream_data(result)
            has_data = ((data is not None) and (len(data) > 0))

            if has_data:
                with singer.metrics.record_counter(endpoint=table) \
                     as counter:
                    for obj in data:
                        singer.write_records(
                            table,
                            [self.filter_keys(obj)])

                        counter.increment()

                        self.state = incorporate(self.state,
                                                 table,
                                                 'updated_at',
                                                 obj.get('updated_at'))

                if page == total_pages:
                    LOGGER.info('Reached end of stream, moving on.')
                    has_data = False

                elif cursor is None:
                    raise RuntimeError(('Found data, but there is no '
                                        'continuation cursor! (Expected '
                                        '{} pages, found {})').format(
                                            total_pages,
                                            page))

            else:
                LOGGER.info('No data returned, moving on.')

            page = page + 1

        self.state = incorporate(self.state,
                                 table,
                                 'updated_at',
                                 date.isoformat())

        save_state(self.state)