Beispiel #1
0
def __process_all_records_data_stream(req_state):
    max_bookmark_value = req_state.last_date

    record_count = 0
    records = ilevel.get_all_objects(req_state.stream_name, req_state.client)

    if len(records) == 0:
        return 0

    # Process records
    process_record_count = 0
    max_bookmark_value, process_record_count = process_records(
        result_records=records,
        req_state=req_state,
        deletion_flag=False,
        max_bookmark_value=max_bookmark_value)

    record_count = record_count + process_record_count

    # Data not sorted
    # Update the state with the max_bookmark_value for the stream after ALL records
    if req_state.bookmark_field and process_record_count > 0:
        singer_ops.write_bookmark(req_state.state, req_state.stream_name,
                                  max_bookmark_value)

    return record_count
Beispiel #2
0
def __process_periodic_data_calcs(req_state,
                                  scenario_name='Actual',
                                  currency_code='USD'):
    entity_types = ['assets']  # Currently: assets only (not funds)
    period_types = req_state.period_types.strip().replace(' ', '').split(',')
    batch_size = 10000
    start_dttm = datetime.strptime(req_state.last_date, '%Y-%m-%d')
    end_dttm = req_state.end_date
    max_bookmark_value = req_state.last_date

    # Init params_list and results
    i_get_params_list = req_state.client.factory.create(
        'ArrayOfBaseRequestParameters')
    results = []
    req_id = 1
    batch = 1
    update_count = 0

    # Base objects
    data_value_types = req_state.client.factory.create('DataValueTypes')

    # scenario_id for scenario_name
    scenarios = req_state.client.service.GetScenarios()
    scenario = [i for i in scenarios.NamedEntity if i.Name == scenario_name][0]
    scenario_id = scenario.Id

    # current_date
    date_types = req_state.client.factory.create('DateTypes')
    current_date = req_state.client.factory.create('Date')
    current_date.Type = date_types.Current

    # latest_date
    latest_date = req_state.client.factory.create('Date')
    latest_date.Type = date_types.Latest

    # Get all calc data items
    data_item_search_criteria = req_state.client.factory.create(
        'DataItemsSearchCriteria')
    data_item_search_criteria.GetGlobalDataItemsOnly = True  # Global Data Items ONLY
    data_items = req_state.client.service.GetDataItems(
        data_item_search_criteria)
    calc_data_items = [
        i for i in data_items.DataItemObjectEx if i.FormulaTypeIDsString
    ]  # TESTING (add): and 'Gross Margin' in i.Name
    calc_data_items_len = len(calc_data_items)
    last_calc_data_item = calc_data_items[-1]

    # entity_type loop
    for entity_type in entity_types:  # funds, assets
        LOGGER.info('entity_type = {}'.format(entity_type))  # COMMENT OUT
        # entity_ids for funds_or_assets
        if entity_type == 'funds':
            entities = req_state.client.service.GetFunds()
            entity_objs = entities.Fund
            # entity_objs = [i for i in entity_objs if 'IV, L.P.' in i.ExcelName] # COMMENT OUT
        else:  # assets
            entities = req_state.client.service.GetAssets()
            entity_objs = entities.Asset
            # entity_objs = [i for i in entity_objs if 'Guild Education' in i.Name] # TESTING: COMMENT OUT
        entity_objs_len = len(entity_objs)

        # calc_data_items loop
        cdi = 1
        for data_item in calc_data_items:
            data_item_id = data_item.Id
            data_item_name = data_item.Name
            LOGGER.info('data_item_name = {} ({})'.format(
                data_item_name, data_item_id))  # COMMENT OUT

            # data_value_type for data_item
            data_value_type_id = data_item.DataValueType
            data_value_type = data_value_types[data_value_type_id]

            # entity loop
            ent = 1
            for entity in entity_objs:
                entity_dict = ilevel.sobject_to_dict(entity)
                entity_id = entity_dict.get('Id')
                entity_name = entity_dict.get('Name')
                # LOGGER.info('entity = {} ({})'.format(entity_name, entity_id)) # COMMENT OUT
                entity_initial_dttm = datetime.strptime(
                    entity_dict.get('InitialPeriod')[:10], '%Y-%m-%d')
                max_dttm = [start_dttm, entity_initial_dttm]
                start_dttm = max(i for i in max_dttm if i is not None)

                # LOGGER.info('periodic_data_calculated: {}, {}: {} ({})'.format(
                #     data_item_name, entity_type, entity_name, entity_id)) # COMMENT OUT
                entity_path = ilevel.create_entity_path(req_state, [entity_id])

                # period_type loop
                last_period_type = period_types[-1]
                for period_type in period_types:
                    period, period_diff = ilevel.get_periods(
                        req_state, start_dttm, end_dttm, period_type)

                    # offset_period loop (0, -1, -2, ...) look-back
                    pd = 0
                    while pd <= period_diff + 1:
                        # LOGGER.info('{}: periodic_data_calculated: {}, Period Type: {}, Offset: {}'.format(
                        #    req_id, data_item_name, period_type, -pd)) # COMMENT OUT
                        offset_period = copy.copy(period)
                        offset_period.IsOffset = True
                        offset_period.Quantity = int(-1 * pd)

                        i_get_params = req_state.client.factory.create(
                            'AssetAndFundGetRequestParameters')
                        i_get_params.RequestIdentifier = req_id
                        i_get_params.DataValueType = data_value_type
                        i_get_params.EntitiesPath = entity_path
                        i_get_params.DataItemId = data_item_id
                        i_get_params.ScenarioId = scenario_id
                        i_get_params.Period = period
                        i_get_params.Offset = offset_period
                        i_get_params.EndOfPeriod = latest_date
                        i_get_params.ReportedDate = current_date
                        i_get_params.CurrencyCode = currency_code

                        i_get_params_list.BaseRequestParameters.append(
                            i_get_params)
                        # LOGGER.info('i_get_params = {}'.format(i_get_params)) # COMMENT OUT

                        # run iGetBatch
                        end_of_batches = False
                        if (pd == (period_diff + 1) and period_type == last_period_type \
                            and ent == entity_objs_len and cdi == calc_data_items_len and entity_type == 'assets'):
                            end_of_batches = True
                            LOGGER.info('xxx END OF BATCHES xxx')
                        if (req_id % batch_size == 0) or end_of_batches:
                            LOGGER.info('xxx BATCH: {} xxx'.format(batch))
                            i_get_count = len(i_get_params_list)
                            i_get_request = req_state.client.factory.create(
                                'DataServiceRequest')
                            i_get_request.IncludeStandardizedDataInfo = True
                            i_get_request.IncludeExcelFormula = True
                            i_get_request.ParametersList = i_get_params_list
                            # LOGGER.info('i_get_request = {}'.format(i_get_request)) # COMMENT OUT

                            # pylint: disable=unused-variable
                            metrics_string = (
                                'periodic_data_calculated, iGetBatch #{}: {} requests'
                                .format(batch, i_get_count))
                            with metrics.http_request_timer(
                                    metrics_string) as timer:
                                data_values = req_state.client.service.iGetBatch(
                                    i_get_request)

                            # LOGGER.info('data_values = {}'.format(data_values)) # COMMENT OUT

                            if isinstance(data_values, str):
                                continue

                            try:
                                periodic_data_records = data_values.DataValue
                            except Exception as err:
                                LOGGER.error('{}'.format(err))
                                LOGGER.error('data_values dict = {}'.format(
                                    ilevel.sobject_to_dict(data_values)))
                                raise err

                            for periodic_data_record in periodic_data_records:
                                if "Error" in periodic_data_record:
                                    continue

                                if "NoDataAvailable" in periodic_data_record:
                                    continue

                                periodic_data_record_dict = ilevel.sobject_to_dict(
                                    periodic_data_record)
                                # LOGGER.info('period_data_record_dict = {}'.format(periodic_data_record_dict)) # COMMENT OUT

                                transformed_record = transform_json(
                                    periodic_data_record_dict)
                                # LOGGER.info('transformed_record = {}'.format(transformed_record)) # COMMENT OUT

                                if 'value' in transformed_record:
                                    value = transformed_record.get('value')
                                    value_string = str(value)
                                    if type(value) in (int, float):
                                        value_numeric = float(value)
                                    else:
                                        value_numeric = None
                                    if value == 'No Data Available':
                                        continue
                                    sd_parameters = transformed_record.get(
                                        'sd_parameters', {})
                                    excel_formula = transformed_record.get(
                                        'excel_formula')
                                    currency_code = sd_parameters.get(
                                        'currency_code')
                                    data_item_id = sd_parameters.get(
                                        'data_item_id')
                                    data_value_type = sd_parameters.get(
                                        'data_value_type')
                                    detail_id = sd_parameters.get('detail_id')
                                    entity_id = next(
                                        iter(
                                            sd_parameters.get(
                                                'entities_path',
                                                {}).get('path',
                                                        {}).get('int', [])),
                                        None)
                                    scenario_id = sd_parameters.get(
                                        'scenario_id')
                                    period_type = sd_parameters.get(
                                        'period', {}).get('type')
                                    end_of_period_value = sd_parameters.get(
                                        'end_of_period', {}).get('value')
                                    reported_date_value = sd_parameters.get(
                                        'reported_date', {}).get('value')
                                    exchange_rate_type = sd_parameters.get(
                                        'exchange_rate', {}).get('type')
                                    request_id = sd_parameters.get(
                                        'request_identifier')
                                    standardized_data_id = sd_parameters.get(
                                        'standardized_data_id')

                                    dimensions = {
                                        'data_item_id': data_item_id,
                                        'entity_id': entity_id,
                                        'scenario_id': scenario_id,
                                        'period_type': period_type,
                                        'end_of_period_value':
                                        end_of_period_value,
                                        'currency_code': currency_code,
                                        'exchange_rate_type':
                                        exchange_rate_type,
                                        'data_value_type': data_value_type
                                    }
                                    hash_key = str(
                                        hash_data(
                                            json.dumps(dimensions,
                                                       sort_keys=True)))

                                    # Primary key dimensions, create md5 hash key
                                    new_record = {
                                        'hash_key': hash_key,
                                        'excel_formula': excel_formula,
                                        'currency_code': currency_code,
                                        'data_item_id': data_item_id,
                                        'data_value_type': data_value_type,
                                        'detail_id': detail_id,
                                        'entity_id': entity_id,
                                        'scenario_id': scenario_id,
                                        'period_type': period_type,
                                        'end_of_period_value':
                                        end_of_period_value,
                                        'reported_date_value':
                                        reported_date_value,
                                        'exchange_rate_type':
                                        exchange_rate_type,
                                        'request_id': request_id,
                                        'standardized_data_id':
                                        standardized_data_id,
                                        'value': value,
                                        'value_string': value_string,
                                        'value_numeric': value_numeric
                                    }

                                    results.append(new_record)
                                # end for rec in period_data_records

                            # Process batch records
                            max_bookmark_value, process_record_count = process_records(
                                result_records=results,
                                req_state=req_state,
                                deletion_flag=False,
                                max_bookmark_value=max_bookmark_value)

                            update_count = update_count + process_record_count

                            # Init new params_list and results
                            i_get_params_list = req_state.client.factory.create(
                                'ArrayOfBaseRequestParameters')
                            results = []

                            batch = batch + 1
                            # end iGetBatch

                        req_id = req_id + 1
                        pd = pd + 1
                        # end offset_period loop

                    # end period_type loop

                ent = ent + 1
                # end entity_id loop

            cdi = cdi + 1
            # end calc_data_items loop

        # end entity_type loop

    # Update the state with the max_bookmark_value for the stream after ALL records
    # Always process past year of calculated data (Subtract 365 days from max_bookmark_value)
    max_bookmark_dttm = datetime.strptime(max_bookmark_value[:10],
                                          "%Y-%m-%d") - timedelta(days=365)
    max_bookmark_value = max_bookmark_dttm.strftime("%Y-%m-%d")
    singer_ops.write_bookmark(req_state.state, req_state.stream_name,
                              max_bookmark_value)

    return update_count
Beispiel #3
0
def __process_standardized_data_stream(req_state):
    max_bookmark_value = req_state.last_date
    update_count = 0

    #Split date windows: API call restricts date windows based on 30 day periods.
    date_chunks = ilevel.get_date_chunks(req_state.last_date,
                                         req_state.end_date, MAX_DATE_WINDOW)

    cur_start_date = None
    cur_end_date = None
    cur_date_criteria_length = len(date_chunks)
    cur_date_range_index = 0
    LOGGER.info('Preparing to process %s date chunks', len(date_chunks))
    date_chunk_index = 0
    while cur_date_range_index < cur_date_criteria_length:

        if cur_start_date is None:
            cur_start_date = date_chunks[0]
            cur_end_date = date_chunks[1]
            cur_date_range_index = 2
        else:
            cur_start_date = cur_end_date
            cur_end_date = date_chunks[cur_date_range_index]
            cur_date_range_index = cur_date_range_index + 1

        LOGGER.info(
            'periodic_data_standardized, {} - {}, Date Range: {} of {}'.format(
                cur_start_date, cur_end_date, cur_date_range_index,
                cur_date_criteria_length))

        #Get updated records based on date range
        updated_object_id_sets = ilevel.get_standardized_data_id_chunks(
            cur_start_date, cur_end_date, req_state.client)
        if len(updated_object_id_sets) == 0:
            continue

        LOGGER.info(
            'periodic_data_standardized, {} - {}, Updated Sets: {}'.format(
                cur_start_date, cur_end_date, len(updated_object_id_sets)))

        #Translate standardized ids to objects
        batch = 1
        for id_set in updated_object_id_sets:
            processed_record_count = 0
            temp_max_bookmark_value, processed_record_count = process_iget_batch_for_standardized_id_set(
                id_set, req_state)
            temp_max_bookmark_value_dttm = datetime.strptime(
                temp_max_bookmark_value, "%Y-%m-%d")
            max_bookmark_value_dttm = datetime.strptime(
                max_bookmark_value, "%Y-%m-%d")
            if temp_max_bookmark_value_dttm > max_bookmark_value_dttm:
                max_bookmark_value = temp_max_bookmark_value

            LOGGER.info(
                'periodic_data_standardized, {} - {}, Batch #{}, Requests: {}, Results: {}'
                .format(cur_start_date, cur_end_date, batch, len(id_set),
                        processed_record_count))
            update_count = update_count + processed_record_count
            batch = batch + 1

        # Some reported_date_value (bookmark) are in the future?
        max_bookmark_value_dttm = datetime.strptime(max_bookmark_value,
                                                    "%Y-%m-%d")
        if max_bookmark_value_dttm > cur_end_date:
            max_bookmark_value = cur_end_date.strftime("%Y-%m-%d")

        date_chunk_index = date_chunk_index + 1

        # Data not sorted
        # Update the state with the max_bookmark_value for the stream after ALL records
        if req_state.bookmark_field and processed_record_count > 0:
            singer_ops.write_bookmark(req_state.state, req_state.stream_name,
                                      max_bookmark_value)

    return update_count
Beispiel #4
0
def __process_incremental_stream(req_state):
    record_count = 0
    date_chunks = ilevel.get_date_chunks(req_state.last_date,
                                         req_state.end_date, MAX_DATE_WINDOW)
    max_bookmark_value_upd = req_state.last_date
    max_bookmark_value_del = req_state.last_date

    cur_start_date = None
    cur_end_date = None
    cur_date_criteria_length = len(date_chunks)
    cur_date_range_index = 0

    # Loop through date, and id 'chunks' as appropriate, processing each window.
    while cur_date_range_index < cur_date_criteria_length:
        if cur_start_date is None:
            cur_start_date = date_chunks[0]
            cur_end_date = date_chunks[1]
            cur_date_range_index = 2
        else:
            cur_start_date = cur_end_date
            cur_end_date = date_chunks[cur_date_range_index]
            cur_date_range_index = cur_date_range_index + 1

        LOGGER.info('{}: Processing date range {} of {} total ({} - {})'.format(
            req_state.stream_name, cur_date_range_index, cur_date_criteria_length, \
                cur_start_date, cur_end_date))

        #Retrieve updated entities for given date range, and send for processing
        updated_object_id_sets = ilevel.get_updated_object_id_sets(
            cur_start_date, cur_end_date, req_state.client,
            req_state.stream_name)

        update_bookmark = False
        if len(updated_object_id_sets) > 0:
            cur_id_set_index = 0
            for id_set in updated_object_id_sets:
                updated_record_count = 0
                LOGGER.info('{}: Processing id set {} of {} total sets'.format(
                    req_state.stream_name, cur_id_set_index + 1,
                    len(updated_object_id_sets)))

                # Process updated object stream id set
                max_bookmark_value_upd, updated_record_count = \
                    __process_updated_object_stream_id_set(
                        object_ids=list(id_set),
                        req_state=req_state,
                        max_bookmark_value=max_bookmark_value_upd)

                record_count = record_count + updated_record_count
                if updated_record_count > 0:
                    update_bookmark = True

        #Retrieve deleted entities for given date range, and send for processing
        deleted_object_id_sets = ilevel.get_deleted_object_id_sets(
            cur_start_date, cur_end_date, req_state.client,
            req_state.stream_name)

        if len(deleted_object_id_sets) > 0:
            cur_id_set_index = 0
            for id_set in deleted_object_id_sets:
                deleted_record_count = 0
                LOGGER.info(
                    '{}: Processing deleted id set {} of {} total sets'.format(
                        req_state.stream_name, cur_id_set_index + 1,
                        len(deleted_object_id_sets)))
                # Process deleted records
                max_bookmark_value_del, deleted_record_count = \
                    __process_deleted_object_stream_id_set(
                        object_ids=list(id_set),
                        req_state=req_state,
                        max_bookmark_value=max_bookmark_value_del)

                record_count = record_count + deleted_record_count
                if deleted_record_count > 0:
                    update_bookmark = True

                cur_id_set_index = cur_id_set_index + 1

        # Get max_bookmark_value from update (_1) and delete (_2)
        max_bookmark_value = max(req_state.start_date, req_state.last_date, \
            max_bookmark_value_upd, max_bookmark_value_del)
        max_bookmark_value_dttm = datetime.strptime(max_bookmark_value,
                                                    "%Y-%m-%d")
        if max_bookmark_value_dttm > cur_end_date:
            max_bookmark_value = cur_end_date.strftime("%Y-%m-%d")

        # Data not sorted
        # Update the state with the max_bookmark_value for the stream after ALL records
        if update_bookmark:
            singer_ops.write_bookmark(req_state.state, req_state.stream_name,
                                      max_bookmark_value)

    return record_count