Exemple #1
0
def sync_endpoint(endpoint, path, date_fields=None):
    schema = load_schema(endpoint)
    bookmark_property = 'updated_at'

    singer.write_schema(endpoint,
                        schema, ["id"],
                        bookmark_properties=[bookmark_property])

    start = get_start(endpoint)

    url = get_url(endpoint)
    data = request(url)
    time_extracted = utils.now()

    with Transformer() as transformer:
        for row in data:
            item = row[path]
            item = transformer.transform(item, schema)

            append_times_to_dates(item, date_fields)

            if item[bookmark_property] >= start:
                singer.write_record(endpoint,
                                    item,
                                    time_extracted=time_extracted)

                utils.update_state(STATE, endpoint, item[bookmark_property])

    singer.write_state(STATE)
Exemple #2
0
def sync_project(pid):
    url = get_url("projects", pid)
    data = request(url).json()

    flatten_id(data, "owner")
    project = transform_row(data, RESOURCES["projects"]["schema"])

    state_key = "project_{}".format(project["id"])

    #pylint: disable=maybe-no-member
    last_activity_at = project.get('last_activity_at',
                                   project.get('created_at'))
    if not last_activity_at:
        raise Exception(
            #pylint: disable=line-too-long
            "There is no last_activity_at or created_at field on project {}. This usually means I don't have access to the project."
            .format(project['id']))

    if project['last_activity_at'] >= get_start(state_key):

        sync_branches(project)
        sync_commits(project)
        sync_issues(project)
        sync_milestones(project)
        sync_users(project)

        singer.write_record("projects", project)
        utils.update_state(STATE, state_key, last_activity_at)
        singer.write_state(STATE)
def sync_transactions():
    schema = load_schema("transactions")
    singer.write_schema("transactions", schema, ["id"])

    now = datetime.datetime.utcnow()
    start = utils.strptime(get_start("transactions"))
    logger.info("transactions: Syncing from {}".format(start))

    while start < now:
        end = start + datetime.timedelta(days=1)
        if end > now:
            end = now

        data = braintree.Transaction.search(
            braintree.TransactionSearch.created_at.between(start, end))
        logger.info("transactions: Fetched {} records from {} - {}".format(
            data.maximum_size, start, end))

        for row in data:
            transformed = transform_row(row, schema)
            singer.write_record("transactions", transformed)

        utils.update_state(STATE, "transactions", utils.strftime(end))
        singer.write_state(STATE)
        start += datetime.timedelta(days=1)
Exemple #4
0
def sync_entity(entity, key_properties):
    start_date = get_start(entity)
    logger.info("{}: Starting sync from {}".format(entity, start_date))

    schema = load_schema(entity)
    singer.write_schema(entity, schema, key_properties)
    logger.info("{}: Sent schema".format(entity))

    logger.info("{}: Requesting export".format(entity))
    export_start = utils.strftime(
        datetime.datetime.utcnow().replace(tzinfo=pytz.utc))
    export_id = request_export(entity)

    logger.info("{}: Export ready".format(entity))

    rows = stream_export(entity, export_id)
    logger.info("{}: Got {} records".format(entity, len(rows)))

    for row in rows:
        transformed_row = transform_row(entity, row)
        singer.write_record(entity, transformed_row)

    utils.update_state(STATE, entity, export_start)
    singer.write_state(STATE)
    logger.info("{}: State synced to {}".format(entity, export_start))
Exemple #5
0
def sync_entity_chunked(entity_name, key_properties, path):
    schema = load_schema(entity_name)
    singer.write_schema(entity_name, schema, key_properties)

    start = get_start(entity_name)
    now_ts = int(datetime.datetime.utcnow().timestamp() * 1000)
    start_ts = int(utils.strptime(start).timestamp() * 1000)

    url = get_url(entity_name)
    while start_ts < now_ts:
        end_ts = start_ts + CHUNK_SIZES[entity_name]
        params = {
            'startTimestamp': start_ts,
            'endTimestamp': end_ts,
            'limit': 1000,
        }
        for row in gen_request(url, params, path, "hasMore", "offset",
                               "offset"):
            record = transform(row, schema)
            singer.write_record(entity_name, record)

        utils.update_state(STATE, entity_name,
                           datetime.datetime.utcfromtimestamp(end_ts / 1000))
        singer.write_state(STATE)
        start_ts = end_ts
Exemple #6
0
def sync_deals():
    last_sync = utils.strptime(get_start("deals"))
    days_since_sync = (datetime.datetime.utcnow() - last_sync).days
    if days_since_sync > 30:
        endpoint = "deals_all"
    else:
        endpoint = "deals_recent"

    schema = load_schema("deals")
    singer.write_schema("deals", schema, ["portalId", "dealId"])

    url = get_url(endpoint)
    params = {'count': 250}
    for i, row in enumerate(
            gen_request(url, params, "deals", "hasMore", "offset", "offset")):
        record = request(get_url("deals_detail", deal_id=row['dealId'])).json()
        record = transform(record, schema)

        modified_time = None
        if 'hs_lastmodifieddate' in record:
            modified_time = utils.strptime(
                record['hs_lastmodifieddate']['value'])
        elif 'createdate' in record:
            modified_time = utils.strptime(record['createdate']['value'])

        if not modified_time or modified_time >= last_sync:
            singer.write_record("deals", record)
            utils.update_state(STATE, "deals", modified_time)

        if i % 250 == 0:
            singer.write_state(STATE)
def sync_type(type, endpoint, replicationKey, useValidationWindow):
    schema = load_schema(type)
    singer.write_schema(type, schema, [replicationKey])

    dateFrom = get_start(type)
    if useValidationWindow:
        dateFrom = dateFrom - timedelta(days=CONFIG['validation_window'])

    nextpageuri = get_url(f"{endpoint}.json", dateFrom)

    lastRow = None

    headers = {"Accept": "application/json"}
    auth = HTTPBasicAuth(CONFIG['account_sid'], CONFIG['auth_token'])

    while nextpageuri != "":
        req = requests.Request("GET",
                               url=f"{BASE_HOST}{nextpageuri}",
                               headers=headers,
                               auth=auth).prepare()
        resp = SESSION.send(req)
        resp.raise_for_status()

        json = resp.json()
        for row in json.get(endpoint):
            output = map_types(schema, row)
            lastRow = output
            singer.write_record(type, output)
        nextpageuri = json['@nextpageuri']

    if lastRow != None:
        utils.update_state(STATE, type, lastRow[replicationKey])
Exemple #8
0
def sync_entity(entity, primary_keys, date_keys=None, transform=None):
    schema = load_schema(entity)
    singer.write_schema(entity, schema, primary_keys)

    start_date = get_start(entity)
    for row in gen_request(entity):
        if transform:
            row = transform(row)

        if date_keys:
            # Rows can have various values for various date keys (See the calls to
            # `sync_entity` in `do_sync`), usually dates of creation and update.
            # But in some cases some keys may not be present.
            #
            # To handle this we:
            #
            # 1. Get _all_ the values for all the keys that are actually present in
            # the row (not every row has every key), and exclude missing ones.
            #
            # 2. Take the max of those values as the bookmark for that entity.
            #
            # A KeyError is raised if the row has none of the date keys.
            if not any(date_key in row for date_key in date_keys):
                raise KeyError('None of date keys found in the row')
            last_touched = max(row[date_key] for date_key in date_keys
                               if date_key in row)
            utils.update_state(STATE, entity, last_touched)
            if last_touched < start_date:
                continue

        row = transform_row(row, schema)

        singer.write_record(entity, row)

    singer.write_state(STATE)
def sync_events():
    schema = load_schema("events")
    singer.write_schema("events", schema, [])

    for export_bundle in request_export_bundles():
        for event in download_events(export_bundle['Id']):
            transform_event(event)
            singer.write_record("events", event)
        stop_timestamp = datetime.datetime.utcfromtimestamp(
            export_bundle['Stop'])
        utils.update_state(STATE, "events", stop_timestamp)
        singer.write_state(STATE)
Exemple #10
0
def sync_reviews(headers, CONFIG, STATE):
    page = 0 # initialize at first page
    rec = 1 # initialize first record
    total_pages = 1 # initial total pages, which gets overwritten
    total_elements = 0 # initial total elements, which gets overwritten
    size = 10 # number of records per request
    to_timestamp = int(headers['X-Revinate-Porter-Timestamp'])
    last_update = to_timestamp  # init
    from_timestamp = 0  # initial value
    # set from_timestamp as NVL(STATE.last_update, CONFIG.start_date, now - 1 year)
    if 'last_update' not in STATE:
        if 'start_date' not in CONFIG:
            from_timestamp = to_timestamp - (60 * 60 * 24 * 7)  # looks back 1 week
        else:
            from_timestamp = int(time.mktime(datetime.datetime.strptime(CONFIG['start_date'], \
                '%Y-%m-%dT%H:%M:%SZ').timetuple()))
    else:
        from_timestamp = int(STATE['last_update'])
    updated_at_range = str(from_timestamp) + '..' + str(to_timestamp)
    # loop thru all pages
    while (page + 1) <= total_pages:
        if (rec + size) <= total_elements:
            rec_to = rec + size - 1
        else:
            rec_to = total_elements
        LOGGER.info('Page {} of {} Total Pages, Record {}-{} of {} Total Records'.format( \
            str(page + 1), str(total_pages), str(rec), str(rec_to), str(total_elements)))
        params = {
            'updatedAt': updated_at_range,
            'page': page,
            'size': size,
            'sort': 'updatedAt,ASC'
        }
        try:
            reviews_parsed = fetch_reviews(headers, params)
        except Exception as exception:
            LOGGER.exception(exception)
            break
        # loop thru all records on page
        for record in reviews_parsed['content']:
            parsed_review = parse_review(record)
            singer.write_record('reviews', parsed_review)
            last_update = record['updatedAt']
            rec = rec + 1
        page_json = reviews_parsed['page']
        total_pages = int(page_json.get('totalPages', 1))
        total_elements = int(page_json.get('totalElements', 0))
        page = page + 1
    # update STATE last_update
    utils.update_state(STATE, 'last_update', last_update)
    singer.write_state(STATE)
    LOGGER.info("State synced to last_update: {}".format(last_update))
    LOGGER.info("Done syncing reviews.")
Exemple #11
0
def sync_events():
    schema = load_schema("events")
    singer.write_schema("events", schema, [])

    for export_bundle in request_export_bundles():
        with metrics.record_counter("events") as counter:
            for event in download_events(export_bundle['Id']):
                transform_event(event)
                counter.increment()
                singer.write_record("events", event)
            stop_timestamp = datetime.datetime.utcfromtimestamp(export_bundle['Stop'])
            utils.update_state(STATE, "events", stop_timestamp)
            singer.write_state(STATE)
Exemple #12
0
def sync_owners():
    schema = load_schema("owners")
    singer.write_schema("owners", schema, ["portalId", "ownerId"])
    start = get_start("owners")

    data = request(get_url("owners")).json()
    for row in data:
        record = transform(row, schema)
        if record['updatedAt'] >= start:
            singer.write_record("owners", record)
            utils.update_state(STATE, "owners", record['updatedAt'])

    singer.write_state(STATE)
Exemple #13
0
def sync_keywords():
    schema = load_schema("keywords")
    singer.write_schema("keywords", schema, ["keyword_guid"])
    start = get_start("keywords")

    data = request(get_url("keywords")).json()
    for row in data['keywords']:
        record = transform(row, schema)
        if record['created_at'] >= start:
            singer.write_record("keywords", record)
            utils.update_state(STATE, "keywords", record['created_at'])

    singer.write_state(STATE)
Exemple #14
0
def sync_workflows():
    schema = load_schema("workflows")
    singer.write_schema("workflows", schema, ["id"])
    start = get_start("workflows")

    data = request(get_url("workflows")).json()
    for row in data['workflows']:
        record = transform(row, schema)
        if record['updatedAt'] >= start:
            singer.write_record("workflows", record)
            utils.update_state(STATE, "workflows", record['updatedAt'])

    singer.write_state(STATE)
Exemple #15
0
def sync_contacts():
    last_sync = utils.strptime(get_start("contacts"))
    days_since_sync = (datetime.datetime.utcnow() - last_sync).days
    if days_since_sync > 30:
        endpoint = "contacts_all"
        offset_keys = ['vid-offset']
        offset_targets = ['vidOffset']
    else:
        endpoint = "contacts_recent"
        offset_keys = ['vid-offset', 'time-offset']
        offset_targets = ['vidOffset', 'timeOffset']

    schema = load_schema("contacts")
    singer.write_schema("contacts", schema, ["canonical-vid"])

    url = get_url(endpoint)
    params = {
        'showListMemberships': True,
        'count': 100,
    }
    vids = []
    for row in gen_request(url, params, 'contacts', 'has-more', offset_keys,
                           offset_targets):
        modified_time = None
        if 'lastmodifieddate' in row['properties']:
            modified_time = utils.strptime(
                _transform_datetime(
                    row['properties']['lastmodifieddate']['value']))

        if not modified_time or modified_time >= last_sync:
            vids.append(row['vid'])

        if len(vids) == 100:
            data = request(get_url("contacts_detail"), params={
                'vid': vids
            }).json()
            for vid, record in data.items():
                record = transform(record, schema)
                singer.write_record("contacts", record)

                modified_time = None
                if 'lastmodifieddate' in record['properties']:
                    modified_time = record['properties']['lastmodifieddate'][
                        'value']
                    utils.update_state(STATE, "contacts", modified_time)

            vids = []

    singer.write_state(STATE)
Exemple #16
0
def sync_entity(entity):
    logger.info("Syncing {} from {}".format(entity, get_start(entity)))

    schema = load_schema(entity)
    singer.write_schema(entity, schema, ["id"])

    for i, row in enumerate(gen_request(entity)):
        transform_datetimes(row)
        singer.write_record(entity, row)
        utils.update_state(STATE, entity, row["updated_at"])

        # "end_users" is the only one that can be queried by updated_at
        # As such, the other streams require a full sync before writing bookmarks.
        if i % 50 == 49 and entity == "end_users":
            singer.write_state(STATE)

    singer.write_state(STATE)
Exemple #17
0
def sync_entity(entity, primary_keys, date_keys=None, transform=None):
    schema = load_schema(entity)
    singer.write_schema(entity, schema, primary_keys)

    start_date = get_start(entity)
    for row in gen_request(entity):
        if transform:
            row = transform(row)

        row = transform_row(row, schema)
        if date_keys:
            last_touched = max(row[date_key] for date_key in date_keys)
            utils.update_state(STATE, entity, last_touched)
            if last_touched < start_date:
                continue

        singer.write_record(entity, row)

    singer.write_state(STATE)
Exemple #18
0
def sync_activities(activity_type_id,
                    lead_fields,
                    date_fields,
                    leads_schema,
                    do_leads=False):
    global LEAD_IDS, LEAD_IDS_SYNCED

    state_key = 'activities_{}'.format(activity_type_id)
    start = get_start(state_key)
    data = request("v1/activities/pagingtoken.json", {'sinceDatetime': start})
    params = {
        'activityTypeIds': activity_type_id,
        'nextPageToken': data['nextPageToken'],
        'batchSize': LEADS_BATCH_SIZE,
    }

    for row in gen_request("v1/activities.json", params=params):
        # Stream in the activity and update the state.
        singer.write_record("activities", row)
        utils.update_state(STATE, state_key, row['activityDate'])

        if do_leads:
            # Add the lead id to the set of lead ids that need synced unless
            # already synced.
            lead_id = row['leadId']
            if lead_id not in LEAD_IDS_SYNCED:
                LEAD_IDS.add(lead_id)

            # If we have 300 or more lead ids (one page), sync those leads and mark
            # the ids as synced. Once the leads have been synced we can update the
            # state.
            if len(LEAD_IDS) >= LEADS_BATCH_SIZE:
                # Take the first 300 off the set and sync them.
                lead_ids = list(LEAD_IDS)[:LEADS_BATCH_SIZE]
                sync_leads(lead_ids, lead_fields, date_fields, leads_schema)

                # Remove the synced lead ids from the set to be synced and add them
                # to the set of synced ids.
                LEAD_IDS = LEAD_IDS.difference(lead_ids)
                LEAD_IDS_SYNCED = LEAD_IDS_SYNCED.union(lead_ids)

        # Update the state.
        singer.write_state(STATE)
Exemple #19
0
def sync_companies():
    last_sync = utils.strptime(get_start("companies"))
    days_since_sync = (datetime.datetime.utcnow() - last_sync).days
    if days_since_sync > 30:
        endpoint = "companies_all"
        path = "companies"
        more_key = "has-more"
        offset_keys = ["offset"]
        offset_targets = ["offset"]
    else:
        endpoint = "companies_recent"
        path = "results"
        more_key = "hasMore"
        offset_keys = ["offset"]
        offset_targets = ["offset"]

    schema = load_schema('companies')
    singer.write_schema("companies", schema, ["companyId"])

    url = get_url(endpoint)
    params = {'count': 250}
    for i, row in enumerate(
            gen_request(url, params, path, more_key, offset_keys,
                        offset_targets)):
        record = request(
            get_url("companies_detail", company_id=row['companyId'])).json()
        record = transform(record, schema)

        modified_time = None
        if 'hs_lastmodifieddate' in record:
            modified_time = utils.strptime(
                record['hs_lastmodifieddate']['value'])
        elif 'createdate' in record:
            modified_time = utils.strptime(record['createdate']['value'])

        if not modified_time or modified_time >= last_sync:
            singer.write_record("companies", record)
            utils.update_state(STATE, "companies", modified_time)

        if i % 250 == 0:
            singer.write_state(STATE)
def sync_type(type, endpoint, replicationKey):
    schema = load_schema(type)
    singer.write_schema(type, schema, [replicationKey])

    url = get_url(endpoint, get_start(type).strftime("%Y-%m-%d"))

    req = requests.Request("GET", url=url).prepare()
    resp = SESSION.send(req)
    resp.raise_for_status()

    finalRow = None
    for row in resp.json():
        finalRow = row
        if row.get("date"):
            row["date"] = dateparser.parse(row["date"]).isoformat() + "Z";
        if row.get("rating"):
            row["rating"] = int(row["rating"])
        singer.write_record(type, row)

    if finalRow != None:
        utils.update_state(STATE, type, finalRow['date'])
Exemple #21
0
def sync_advertisers(client):
    schema = load_schema("merchants")
    singer.write_schema("merchants", schema, ["Id"])

    lastModified = get_start("merchants", False)

    finalRow = None

    resp = client.service.getMerchantList(sRelationship="joined")
    for x in resp.body.getMerchantListReturn:
        x = helpers.serialize_object(x)
        if x['aCommissionRanges'] != None:
            x['aCommissionRanges'] = x['aCommissionRanges']
        if x['aSectors'] != None:
            x['aSectors'] = t['aSectors']
        if lastModified == None or x['dDetailsModified'] > lastModified:
            singer.write_record("merchants", map_type(x))
            finalRow = x

    if finalRow != None:
        utils.update_state(STATE, "merchants", finalRow['dDetailsModified'])
Exemple #22
0
def sync_endpoint(catalog_entry, schema, mdata, date_fields = None):
    singer.write_schema(catalog_entry.tap_stream_id,
                        schema,
                        [PRIMARY_KEY],
                        bookmark_properties = [REPLICATION_KEY])

    start = get_start(catalog_entry.tap_stream_id)
    url = get_url(catalog_entry.tap_stream_id)
    data = request(url)[catalog_entry.tap_stream_id]
    time_extracted = utils.now()

    stream_version = get_stream_version(catalog_entry.tap_stream_id)
    activate_version_message = singer.ActivateVersionMessage(
        stream=catalog_entry.stream,
        version=stream_version
    )

    for row in data:
        with Transformer() as transformer:
            rec = transformer.transform(row, schema, mdata)
            append_times_to_dates(rec, date_fields)

            try:
                updated_at = rec[REPLICATION_KEY]
            except KeyError:
                updated_at = start
            
            if updated_at >= start:
                new_record = singer.RecordMessage(
                    stream=catalog_entry.stream,
                    record=rec,
                    version=stream_version,
                    time_extracted=time_extracted)
                singer.write_message(new_record)
 
                utils.update_state(STATE, catalog_entry.tap_stream_id, updated_at)

    singer.write_state(STATE)
    singer.write_message(activate_version_message)
Exemple #23
0
def do_sync(guid, company_id, start_date):
    """Use the request function to get data and write the schema and response to singer"""
    schema = load_schema()
    LOGGER.info("---------- Writing Schema ----------")
    singer.write_schema("powerinbox_response", schema, "stripe")

    LOGGER.info("---------- Starting sync ----------")

    pickup_date = get_start_date("start_date")
    if (pickup_date):
        next_date = pickup_date
    else:
        next_date = start_date

    try:
        while next_date < utils.strftime(utils.now(), DATE_FORMAT):

            ext_url = ("{company_id}/{guid}/all_stripe/{date}.json"
                       .format(company_id=company_id, guid=guid, date=next_date))

            response = request(BASE_URL+ext_url)

            with metrics.record_counter(BASE_URL+ext_url) as counter:
                for record in response:
                    singer.write_records("powerinbox_response", [record])
                    utils.update_state(STATE, "start_date", next_date.encode("ascii", "ignore"))
                    counter.increment()

            next_date = utils.strftime((utils.strptime_to_utc(next_date)+
                                        timedelta(days=1)), DATE_FORMAT)

    except Exception as exc:
        LOGGER.critical(exc)
        singer.write_state(STATE)
        raise exc

    singer.write_state(STATE)
    LOGGER.info("---------- Completed sync ----------")
Exemple #24
0
def sync_transactions(client):
    schema = load_schema("transactions")
    singer.write_schema("transactions", schema, ["Id"])

    dateFrom = get_start("transactions") - timedelta(
        days=CONFIG['validation_window'])
    dateTo = datetime.now(timezone.utc)

    start = dateFrom
    offset = 0
    finalRow = None

    # handle batches by number of days and number of rows
    while start < dateTo:
        end = start + timedelta(days=MAX_DAYS)
        if (end > dateTo): end = dateTo
        resp = client.service.getTransactionList(dStartDate=start,
                                                 dEndDate=end,
                                                 iOffset=offset,
                                                 iLimit=BATCH_SIZE,
                                                 sDateType="transaction")
        if (resp.body.getTransactionListCountReturn.iRowsReturned > 0):
            for t in resp.body.getTransactionListReturn:
                t = helpers.serialize_object(t)
                if t['aTransactionParts'] != None:
                    t['aTransactionParts'] = t['aTransactionParts']
                singer.write_record("transactions", map_type(t))
                finalRow = t
        if (offset + resp.body.getTransactionListCountReturn.iRowsReturned
            ) < resp.body.getTransactionListCountReturn.iRowsAvailable:
            offset += resp.body.getTransactionListCountReturn.iRowsReturned
        else:
            start = end
            offset = 0

    if finalRow != None:
        utils.update_state(STATE, "transactions", finalRow['dTransactionDate'])
Exemple #25
0
def sync_endpoint(endpoint, schema, mdata, date_fields = None):
    singer.write_schema(endpoint,
                        schema,
                        [PRIMARY_KEY],
                        bookmark_properties = [REPLICATION_KEY])

    start = get_start(endpoint)
    url = get_url(endpoint)
    data = request(url)[endpoint]
    time_extracted = utils.now()

    for row in data:
        with Transformer() as transformer:
            rec = transformer.transform(row, schema, mdata)
            append_times_to_dates(rec, date_fields)

            updated_at = rec[REPLICATION_KEY]
            if updated_at >= start:
                singer.write_record(endpoint,
                                    rec,
                                    time_extracted = time_extracted)
                utils.update_state(STATE, endpoint, updated_at)

    singer.write_state(STATE)
Exemple #26
0
def sync_endpoint(schema_name,
                  endpoint=None,
                  path=None,
                  special_field_name=None,
                  special_field_value=None,
                  keys=None,
                  object_to_id=None,
                  parameter_for_updated=None):
    schema = load_schema(schema_name)
    bookmark_property = 'updated_at'
    LOGGER.info('Loading ' + schema_name)
    if keys is None:
        keys = ['id']
    singer.write_schema(schema_name,
                        schema,
                        keys,
                        bookmark_properties=[bookmark_property])

    start = get_start(schema_name)
    start_dt = datetime.datetime.strptime(start, '%Y-%m-%dT%H:%M:%S.%fZ')
    updated_since = start_dt.strftime("%Y%m%dT%H%M%S")
    LOGGER.info('updated_since ' + updated_since)
    with Transformer() as transformer:
        url = get_url(endpoint or schema_name)
        url = endpoint or url
        if parameter_for_updated is not None:
            url = url + '?' + parameter_for_updated + '=' + updated_since
        response = request(url, None)
        LOGGER.info('URL :' + url)
        if schema_name is 'project_financials':
            response = [response]

        time_extracted = utils.now()

        for row in response:

            if special_field_name is not None:
                row[special_field_name] = special_field_value

            if object_to_id is not None:
                for key in object_to_id:
                    if row[key] is not None:
                        row[key + '_id'] = row[key]['id']
                    else:
                        row[key + '_id'] = None

            item = transformer.transform(row, schema)

            if not bookmark_property in item:
                item[bookmark_property] = \
                    datetime.datetime.now().strftime('%Y-%m-%d') \
                    + 'T00:00:00.00Z'

            if datetime.datetime.strptime(item[bookmark_property],
                                          '%Y-%m-%dT%H:%M:%S.%fZ') >= start_dt:
                singer.write_record(schema_name,
                                    item,
                                    time_extracted=time_extracted)

                utils.update_state(STATE, schema_name, item[bookmark_property])
    singer.write_state(STATE)
Exemple #27
0
def sync_rate_cards(  # pylint: disable=too-many-arguments
    schema_name,
    endpoint=None,
    path=None,
    special_field_name=None,
    special_field_value=None,
    date_fields=None,
    with_updated_since=True,
    for_each_handler=None,
    map_handler=None,
    object_to_id=None,
):
    schema = load_schema(schema_name)
    bookmark_property = 'updated_at'
    LOGGER.info('Loading ' + schema_name)
    singer.write_schema(schema_name,
                        schema, ['id'],
                        bookmark_properties=[bookmark_property])

    start = get_start(schema_name)

    with Transformer() as transformer:
        url = get_url(endpoint or schema_name)
        url = endpoint or url
        response = request(url, None)

        time_extracted = utils.now()

        for row in response:
            if map_handler is not None:
                row = map_handler(row)

            if object_to_id is not None:
                for key in object_to_id:
                    if row[key] is not None:
                        row[key + '_id'] = row[key]['id']
                    else:
                        row[key + '_id'] = None

            item = transformer.transform(row, schema)
            if not bookmark_property in item:
                item[bookmark_property] = \
                    datetime.datetime.now().strftime('%Y-%m-%d') \
                    + 'T00:00:00Z'

            # find expenses

            sync_endpoint(
                'rate_cards_rates',
                BASE_API_URL + 'rate_cards/' + str(row['id']) + '/rates',
                None,
                'rate_card_id',
                str(row['id']),
                ['rate_card_id', 'role'],
            )

            singer.write_record(schema_name,
                                item,
                                time_extracted=time_extracted)

            # take any additional actions required for the currently loaded endpoint

            utils.update_state(STATE, schema_name, item[bookmark_property])
    singer.write_state(STATE)
Exemple #28
0
def sync_project(  # pylint: disable=too-many-arguments
    schema_name,
    endpoint=None,
    path=None,
    special_field_name=None,
    special_field_value=None,
    date_fields=None,
    with_updated_since=True,
    for_each_handler=None,
    map_handler=None,
    object_to_id=None,
):
    schema = load_schema(schema_name)
    bookmark_property = 'updated_at'
    LOGGER.info('Loading ' + schema_name)
    singer.write_schema(schema_name, schema, ['id'])

    start = get_start(schema_name)

    with Transformer() as transformer:
        url = get_url(endpoint or schema_name)
        url = endpoint or url

        response = request(url, None)

        for row in response:
            for refs in row['external_refs']:
                if refs['name'] == 'harvest_project_id':
                    LOGGER.info('Loading in if' + str(refs))
                    row['connected_project'] = refs['value']
            item = transformer.transform(row, schema)
            time_extracted = utils.now()

            # find related
            sync_endpoint(
                'expense_items',
                BASE_API_URL + 'projects/' + str(row['id']) + '/expense_items',
                None, 'project_id', str(row['id']))
            sync_endpoint(
                'invoices',
                BASE_API_URL + 'projects/' + str(row['id']) + '/invoices',
                None, 'project_id', str(row['id']))
            sync_endpoint(
                'milestones',
                BASE_API_URL + 'projects/' + str(row['id']) + '/milestones',
                None, 'project_id', str(row['id']))
            sync_endpoint(
                'project_team',
                BASE_API_URL + 'projects/' + str(row['id']) + '/team',
                None,
                'project_id',
                str(row['id']),
                ['person_id', 'project_id'],
            )
            sync_endpoint(
                'sprints',
                BASE_API_URL + 'projects/' + str(row['id']) + '/sprints', None,
                'project_id', str(row['id']))
            sync_endpoint(
                'workflow_columns', BASE_API_URL + 'projects/' +
                str(row['id']) + '/workflow_columns', None, 'project_id',
                str(row['id']))
            sync_endpoint(
                'project_financials',
                BASE_API_URL + 'projects/' + str(row['id']) + '/financials',
                None,
                None,
                None,
                ['project_id'],
            )


            if bookmark_property in item and item[bookmark_property] \
                >= start:
                singer.write_record(schema_name,
                                    item,
                                    time_extracted=time_extracted)

                utils.update_state(STATE, schema_name, item[bookmark_property])
    singer.write_state(STATE)
Exemple #29
0
def sync_allocations(
    schema_name,
    endpoint=None,
    path=None,
    special_field_name=None,
    special_field_value=None,
    keys=None,
    object_to_id=None,
):
    schema = load_schema(schema_name)
    bookmark_property = 'updated_at'
    LOGGER.info('Loading ' + schema_name)
    if keys is None:
        keys = ['id']
    singer.write_schema(schema_name,
                        schema,
                        keys,
                        bookmark_properties=[bookmark_property])

    start = get_start(schema_name)
    weekDays = [
        'monday',
        'tuesday',
        'wednesday',
        'thursday',
        'friday',
        'saturday',
        'sunday',
    ]
    with Transformer() as transformer:
        url = get_url(endpoint or schema_name)
        url = endpoint or url

        response = request(url, None)

        time_extracted = utils.now()

        for row in response:

            # add here logic

            date = datetime.datetime.strptime(row['start_date'], '%Y-%m-%d')
            LOGGER.info("Project" + str(row['project']) + "-" +
                        str(row['person']))
            end_date = datetime.datetime.strptime(row['end_date'], '%Y-%m-%d')

            newRow = {}
            #LOGGER.info("ID:"  + str(row['id']))
            #LOGGER.info("Date :  "  + date.strftime('%Y%m%d'))

            while date <= end_date:
                #LOGGER.info('Date :  ' + str(date.weekday()) + 'weekday'
                #             + weekDays[date.weekday()])
                #LOGGER.info(row['project'])
                #LOGGER.info(row[weekDays[date.weekday()]])
                #LOGGER.info(str(date.strftime('%Y-%m-%d')))
                #if row['id'] = 72051:
                #    LOGGER.info(row['project'])
                #    LOGGER.info(row['person'])
                #    LOGGER.info(str(date.strftime('%Y-%m-%d')))
                #    LOGGER.info(str(end_date.strftime('%Y-%m-%d')))

                newRow['allocation'] = row[weekDays[date.weekday()]]
                if not newRow['allocation'] > 0:
                    date = date + timedelta(days=1)
                    continue
                newRow['project'] = row['project']
                newRow['non_project_time'] = row['non_project_time']
                newRow['connected_project'] = row['connected_project']
                newRow['person'] = row['person']
                newRow['project'] = row['project']
                newRow['date'] = date.strftime('%Y-%m-%d')
                newRow['notes'] = row['notes']
                newRow['created_by'] = row['created_by']
                newRow['updated_by'] = row['updated_by']
                newRow['created_at'] = row['created_at']
                newRow['updated_at'] = row['updated_at']
                newRow['id'] = str(row['id']) \
                    + str(date.strftime('%Y%m%d'))

                date = date + timedelta(days=1)

                item = transformer.transform(newRow, schema)

                if not bookmark_property in item:
                    item[bookmark_property] = \
                        datetime.datetime.now().strftime('%Y-%m-%d') \
                        + 'T00:00:00Z'

                if bookmark_property in item \
                    and item[bookmark_property] >= start:
                    singer.write_record(schema_name,
                                        item,
                                        time_extracted=time_extracted)

                    utils.update_state(STATE, schema_name,
                                       item[bookmark_property])
                else:
                    singer.write_record(schema_name,
                                        item,
                                        time_extracted=time_extracted)

                    # take any additional actions required for the currently loaded endpoint

                    utils.update_state(STATE, schema_name,
                                       item[bookmark_property])
        singer.write_state(STATE)
Exemple #30
0
def sync_in_app_events():

    schema = load_schema("raw_data/in_app_events")
    singer.write_schema("in_app_events", schema, [
        "event_time",
        "event_name",
        "appsflyer_id"
    ])

    # This order matters
    fieldnames = (
        "attributed_touch_type",
        "attributed_touch_time",
        "install_time",
        "event_time",
        "event_name",
        "event_value",
        "event_revenue",
        "event_revenue_currency",
        "event_revenue_usd",
        "event_source",
        "is_receipt_validated",
        "af_prt",
        "media_source",
        "af_channel",
        "af_keywords",
        "campaign",
        "af_c_id",
        "af_adset",
        "af_adset_id",
        "af_ad",
        "af_ad_id",
        "af_ad_type",
        "af_siteid",
        "af_sub_siteid",
        "af_sub1",
        "af_sub2",
        "af_sub3",
        "af_sub4",
        "af_sub5",
        "af_cost_model",
        "af_cost_value",
        "af_cost_currency",
        "contributor1_af_prt",
        "contributor1_media_source",
        "contributor1_campaign",
        "contributor1_touch_type",
        "contributor1_touch_time",
        "contributor2_af_prt",
        "contributor2_media_source",
        "contributor2_campaign",
        "contributor2_touch_type",
        "contributor2_touch_time",
        "contributor3_af_prt",
        "contributor3_media_source",
        "contributor3_campaign",
        "contributor3_touch_type",
        "contributor3_touch_time",
        "region",
        "country_code",
        "state",
        "city",
        "postal_code",
        "dma",
        "ip",
        "wifi",
        "operator",
        "carrier",
        "language",
        "appsflyer_id",
        "advertising_id",
        "idfa",
        "android_id",
        "customer_user_id",
        "imei",
        "idfv",
        "platform",
        "device_type",
        "os_version",
        "app_version",
        "sdk_version",
        "app_id",
        "app_name",
        "bundle_id",
        "is_retargeting",
        "retargeting_conversion_type",
        "af_attribution_lookback",
        "af_reengagement_window",
        "is_primary_attribution",
        "user_agent",
        "http_referrer",
        "original_url",
    )

    stop_time = datetime.datetime.now()
    from_datetime = get_start("in_app_events")
    to_datetime = get_stop(from_datetime, stop_time, 10)

    while from_datetime < stop_time:
        LOGGER.info("Syncing data from %s to %s", from_datetime, to_datetime)
        params = dict()
        params["from"] = from_datetime.strftime("%Y-%m-%d %H:%M")
        params["to"] = to_datetime.strftime("%Y-%m-%d %H:%M")
        params["api_token"] = CONFIG["api_token"]

        url = get_url("in_app_events", app_id=CONFIG["app_id"])
        request_data = request(url, params)

        csv_data = RequestToCsvAdapter(request_data)
        reader = csv.DictReader(csv_data, fieldnames)

        next(reader) # Skip the heading row

        bookmark = from_datetime
        for i, row in enumerate(reader):
            record = xform(row, schema)
            singer.write_record("in_app_events", record)
            # AppsFlyer returns records in order of most recent first.
            if utils.strptime(record["event_time"]) > bookmark:
                bookmark = utils.strptime(record["event_time"])

        # Write out state
        utils.update_state(STATE, "in_app_events", bookmark)
        singer.write_state(STATE)

        # Move the timings forward
        from_datetime = to_datetime
        to_datetime = get_stop(from_datetime, stop_time, 10)
Exemple #31
0
def sync_transactions():
    schema = load_schema("transactions")

    singer.write_schema("transactions",
                        schema, ["id"],
                        bookmark_properties=['created_at'])

    latest_updated_at = to_utc(
        utils.strptime(STATE.get('latest_updated_at', DEFAULT_TIMESTAMP)))

    run_maximum_updated_at = latest_updated_at

    latest_disbursement_date = to_utc(
        utils.strptime(STATE.get('latest_disbursement_date',
                                 DEFAULT_TIMESTAMP)))

    run_maximum_disbursement_date = latest_disbursement_date

    latest_start_date = to_utc(utils.strptime(get_start("transactions")))

    period_start = latest_start_date - TRAILING_DAYS

    period_end = utils.now()

    logger.info("transactions: Syncing from {}".format(period_start))

    logger.info(
        "transactions: latest_updated_at from {}, disbursement_date from {}".
        format(latest_updated_at, latest_disbursement_date))

    logger.info(
        "transactions: latest_start_date from {}".format(latest_start_date))

    # increment through each day (20k results max from api)
    for start, end in daterange(period_start, period_end):

        end = min(end, period_end)

        data = braintree.Transaction.search(
            braintree.TransactionSearch.created_at.between(start, end))
        time_extracted = utils.now()

        logger.info("transactions: Fetched {} records from {} - {}".format(
            data.maximum_size, start, end))

        row_written_count = 0
        row_skipped_count = 0

        for row in data:
            # Ensure updated_at consistency
            if not getattr(row, 'updated_at'):
                row.updated_at = row.created_at

            transformed = transform_row(row, schema)
            updated_at = to_utc(row.updated_at)

            # if disbursement is successful, get disbursement date
            # set disbursement datetime to min if not found

            if row.disbursement_details is None:
                disbursement_date = datetime.min

            else:
                if row.disbursement_details.disbursement_date is None:
                    row.disbursement_details.disbursement_date = datetime.min

                disbursement_date = to_utc(
                    datetime.combine(
                        row.disbursement_details.disbursement_date,
                        datetime.min.time()))

            # Is this more recent than our past stored value of update_at?
            # Is this more recent than our past stored value of disbursement_date?
            # Use >= for updated_at due to non monotonic updated_at values
            # Use > for disbursement_date - confirming all transactions disbursed
            # at the same time
            # Update our high water mark for updated_at and disbursement_date
            # in this run
            if (updated_at >= latest_updated_at) or (disbursement_date >=
                                                     latest_disbursement_date):

                if updated_at > run_maximum_updated_at:
                    run_maximum_updated_at = updated_at

                if disbursement_date > run_maximum_disbursement_date:
                    run_maximum_disbursement_date = disbursement_date

                singer.write_record("transactions",
                                    transformed,
                                    time_extracted=time_extracted)
                row_written_count += 1

            else:

                row_skipped_count += 1

        logger.info("transactions: Written {} records from {} - {}".format(
            row_written_count, start, end))

        logger.info("transactions: Skipped {} records from {} - {}".format(
            row_skipped_count, start, end))

    # End day loop
    logger.info("transactions: Complete. Last updated record: {}".format(
        run_maximum_updated_at))

    logger.info("transactions: Complete. Last disbursement date: {}".format(
        run_maximum_disbursement_date))

    latest_updated_at = run_maximum_updated_at

    latest_disbursement_date = run_maximum_disbursement_date

    STATE['latest_updated_at'] = utils.strftime(latest_updated_at)

    STATE['latest_disbursement_date'] = utils.strftime(
        latest_disbursement_date)

    utils.update_state(STATE, "transactions", utils.strftime(end))

    singer.write_state(STATE)