def __process_periodic_data_calcs(req_state, scenario_name='Actual', currency_code='USD'): entity_types = ['assets'] # Currently: assets only (not funds) period_types = req_state.period_types.strip().replace(' ', '').split(',') batch_size = 10000 start_dttm = datetime.strptime(req_state.last_date, '%Y-%m-%d') end_dttm = req_state.end_date max_bookmark_value = req_state.last_date # Init params_list and results i_get_params_list = req_state.client.factory.create( 'ArrayOfBaseRequestParameters') results = [] req_id = 1 batch = 1 update_count = 0 # Base objects data_value_types = req_state.client.factory.create('DataValueTypes') # scenario_id for scenario_name scenarios = req_state.client.service.GetScenarios() scenario = [i for i in scenarios.NamedEntity if i.Name == scenario_name][0] scenario_id = scenario.Id # current_date date_types = req_state.client.factory.create('DateTypes') current_date = req_state.client.factory.create('Date') current_date.Type = date_types.Current # latest_date latest_date = req_state.client.factory.create('Date') latest_date.Type = date_types.Latest # Get all calc data items data_item_search_criteria = req_state.client.factory.create( 'DataItemsSearchCriteria') data_item_search_criteria.GetGlobalDataItemsOnly = True # Global Data Items ONLY data_items = req_state.client.service.GetDataItems( data_item_search_criteria) calc_data_items = [ i for i in data_items.DataItemObjectEx if i.FormulaTypeIDsString ] # TESTING (add): and 'Gross Margin' in i.Name calc_data_items_len = len(calc_data_items) last_calc_data_item = calc_data_items[-1] # entity_type loop for entity_type in entity_types: # funds, assets LOGGER.info('entity_type = {}'.format(entity_type)) # COMMENT OUT # entity_ids for funds_or_assets if entity_type == 'funds': entities = req_state.client.service.GetFunds() entity_objs = entities.Fund # entity_objs = [i for i in entity_objs if 'IV, L.P.' in i.ExcelName] # COMMENT OUT else: # assets entities = req_state.client.service.GetAssets() entity_objs = entities.Asset # entity_objs = [i for i in entity_objs if 'Guild Education' in i.Name] # TESTING: COMMENT OUT entity_objs_len = len(entity_objs) # calc_data_items loop cdi = 1 for data_item in calc_data_items: data_item_id = data_item.Id data_item_name = data_item.Name LOGGER.info('data_item_name = {} ({})'.format( data_item_name, data_item_id)) # COMMENT OUT # data_value_type for data_item data_value_type_id = data_item.DataValueType data_value_type = data_value_types[data_value_type_id] # entity loop ent = 1 for entity in entity_objs: entity_dict = ilevel.sobject_to_dict(entity) entity_id = entity_dict.get('Id') entity_name = entity_dict.get('Name') # LOGGER.info('entity = {} ({})'.format(entity_name, entity_id)) # COMMENT OUT entity_initial_dttm = datetime.strptime( entity_dict.get('InitialPeriod')[:10], '%Y-%m-%d') max_dttm = [start_dttm, entity_initial_dttm] start_dttm = max(i for i in max_dttm if i is not None) # LOGGER.info('periodic_data_calculated: {}, {}: {} ({})'.format( # data_item_name, entity_type, entity_name, entity_id)) # COMMENT OUT entity_path = ilevel.create_entity_path(req_state, [entity_id]) # period_type loop last_period_type = period_types[-1] for period_type in period_types: period, period_diff = ilevel.get_periods( req_state, start_dttm, end_dttm, period_type) # offset_period loop (0, -1, -2, ...) look-back pd = 0 while pd <= period_diff + 1: # LOGGER.info('{}: periodic_data_calculated: {}, Period Type: {}, Offset: {}'.format( # req_id, data_item_name, period_type, -pd)) # COMMENT OUT offset_period = copy.copy(period) offset_period.IsOffset = True offset_period.Quantity = int(-1 * pd) i_get_params = req_state.client.factory.create( 'AssetAndFundGetRequestParameters') i_get_params.RequestIdentifier = req_id i_get_params.DataValueType = data_value_type i_get_params.EntitiesPath = entity_path i_get_params.DataItemId = data_item_id i_get_params.ScenarioId = scenario_id i_get_params.Period = period i_get_params.Offset = offset_period i_get_params.EndOfPeriod = latest_date i_get_params.ReportedDate = current_date i_get_params.CurrencyCode = currency_code i_get_params_list.BaseRequestParameters.append( i_get_params) # LOGGER.info('i_get_params = {}'.format(i_get_params)) # COMMENT OUT # run iGetBatch end_of_batches = False if (pd == (period_diff + 1) and period_type == last_period_type \ and ent == entity_objs_len and cdi == calc_data_items_len and entity_type == 'assets'): end_of_batches = True LOGGER.info('xxx END OF BATCHES xxx') if (req_id % batch_size == 0) or end_of_batches: LOGGER.info('xxx BATCH: {} xxx'.format(batch)) i_get_count = len(i_get_params_list) i_get_request = req_state.client.factory.create( 'DataServiceRequest') i_get_request.IncludeStandardizedDataInfo = True i_get_request.IncludeExcelFormula = True i_get_request.ParametersList = i_get_params_list # LOGGER.info('i_get_request = {}'.format(i_get_request)) # COMMENT OUT # pylint: disable=unused-variable metrics_string = ( 'periodic_data_calculated, iGetBatch #{}: {} requests' .format(batch, i_get_count)) with metrics.http_request_timer( metrics_string) as timer: data_values = req_state.client.service.iGetBatch( i_get_request) # LOGGER.info('data_values = {}'.format(data_values)) # COMMENT OUT if isinstance(data_values, str): continue try: periodic_data_records = data_values.DataValue except Exception as err: LOGGER.error('{}'.format(err)) LOGGER.error('data_values dict = {}'.format( ilevel.sobject_to_dict(data_values))) raise err for periodic_data_record in periodic_data_records: if "Error" in periodic_data_record: continue if "NoDataAvailable" in periodic_data_record: continue periodic_data_record_dict = ilevel.sobject_to_dict( periodic_data_record) # LOGGER.info('period_data_record_dict = {}'.format(periodic_data_record_dict)) # COMMENT OUT transformed_record = transform_json( periodic_data_record_dict) # LOGGER.info('transformed_record = {}'.format(transformed_record)) # COMMENT OUT if 'value' in transformed_record: value = transformed_record.get('value') value_string = str(value) if type(value) in (int, float): value_numeric = float(value) else: value_numeric = None if value == 'No Data Available': continue sd_parameters = transformed_record.get( 'sd_parameters', {}) excel_formula = transformed_record.get( 'excel_formula') currency_code = sd_parameters.get( 'currency_code') data_item_id = sd_parameters.get( 'data_item_id') data_value_type = sd_parameters.get( 'data_value_type') detail_id = sd_parameters.get('detail_id') entity_id = next( iter( sd_parameters.get( 'entities_path', {}).get('path', {}).get('int', [])), None) scenario_id = sd_parameters.get( 'scenario_id') period_type = sd_parameters.get( 'period', {}).get('type') end_of_period_value = sd_parameters.get( 'end_of_period', {}).get('value') reported_date_value = sd_parameters.get( 'reported_date', {}).get('value') exchange_rate_type = sd_parameters.get( 'exchange_rate', {}).get('type') request_id = sd_parameters.get( 'request_identifier') standardized_data_id = sd_parameters.get( 'standardized_data_id') dimensions = { 'data_item_id': data_item_id, 'entity_id': entity_id, 'scenario_id': scenario_id, 'period_type': period_type, 'end_of_period_value': end_of_period_value, 'currency_code': currency_code, 'exchange_rate_type': exchange_rate_type, 'data_value_type': data_value_type } hash_key = str( hash_data( json.dumps(dimensions, sort_keys=True))) # Primary key dimensions, create md5 hash key new_record = { 'hash_key': hash_key, 'excel_formula': excel_formula, 'currency_code': currency_code, 'data_item_id': data_item_id, 'data_value_type': data_value_type, 'detail_id': detail_id, 'entity_id': entity_id, 'scenario_id': scenario_id, 'period_type': period_type, 'end_of_period_value': end_of_period_value, 'reported_date_value': reported_date_value, 'exchange_rate_type': exchange_rate_type, 'request_id': request_id, 'standardized_data_id': standardized_data_id, 'value': value, 'value_string': value_string, 'value_numeric': value_numeric } results.append(new_record) # end for rec in period_data_records # Process batch records max_bookmark_value, process_record_count = process_records( result_records=results, req_state=req_state, deletion_flag=False, max_bookmark_value=max_bookmark_value) update_count = update_count + process_record_count # Init new params_list and results i_get_params_list = req_state.client.factory.create( 'ArrayOfBaseRequestParameters') results = [] batch = batch + 1 # end iGetBatch req_id = req_id + 1 pd = pd + 1 # end offset_period loop # end period_type loop ent = ent + 1 # end entity_id loop cdi = cdi + 1 # end calc_data_items loop # end entity_type loop # Update the state with the max_bookmark_value for the stream after ALL records # Always process past year of calculated data (Subtract 365 days from max_bookmark_value) max_bookmark_dttm = datetime.strptime(max_bookmark_value[:10], "%Y-%m-%d") - timedelta(days=365) max_bookmark_value = max_bookmark_dttm.strftime("%Y-%m-%d") singer_ops.write_bookmark(req_state.state, req_state.stream_name, max_bookmark_value) return update_count
def perform_igetbatch_operation_for_standardized_id_set(id_set, req_state): # pylint: disable=too-many-statements data_value_types = req_state.client.factory.create('DataValueTypes') # current_date date_types = req_state.client.factory.create('DateTypes') current_date = req_state.client.factory.create('Date') current_date.Type = date_types.Current # latest_date latest_date = req_state.client.factory.create('Date') latest_date.Type = date_types.Latest req_id = 1 id_set_len = len(id_set) i_get_params_list = req_state.client.factory.create('ArrayOfBaseRequestParameters') for cur_id in id_set: req_id = req_id + 1 i_get_params = req_state.client.factory.create('AssetAndFundGetRequestParameters') i_get_params.StandardizedDataId = cur_id i_get_params.RequestIdentifier = req_id i_get_params.DataValueType = getattr(data_value_types, 'ObjectId') i_get_params.EndOfPeriod = latest_date i_get_params.ReportedDate = current_date i_get_params_list.BaseRequestParameters.append(i_get_params) i_get_request = req_state.client.factory.create('DataServiceRequest') i_get_request.IncludeStandardizedDataInfo = True i_get_request.IncludeExcelFormula = True i_get_request.ParametersList = i_get_params_list # pylint: disable=unused-variable metrics_string = ('Standardized Data Item iGetBatch: {} requests'.format(id_set_len)) with metrics.http_request_timer(metrics_string) as timer: data_values = req_state.client.service.iGetBatch(i_get_request) # LOGGER.info('data_values dict = {}'.format(sobject_to_dict(data_values))) # COMMENT OUT if isinstance(data_values, str): return [] try: periodic_data_records = data_values.DataValue except Exception as err: LOGGER.error('%s', err) LOGGER.error('data_values dict = %s', sobject_to_dict(data_values)) raise err results = [] for periodic_data_record in periodic_data_records: if "Error" in periodic_data_record: continue if "NoDataAvailable" in periodic_data_record: continue periodic_data_record_dict = sobject_to_dict(periodic_data_record) # LOGGER.info('period_data_record_dict = {}'.format(periodic_data_record_dict)) # COMMENT OUT transformed_record = transform_json(periodic_data_record_dict) # LOGGER.info('transformed_record = {}'.format(transformed_record)) # COMMENT OUT if 'value' in transformed_record: value = transformed_record.get('value') value_string = str(value) if type(value) in (int, float): value_numeric = float(value) else: value_numeric = None if value == 'No Data Available': continue sd_parameters = transformed_record.get('sd_parameters', {}) excel_formula = transformed_record.get('excel_formula') currency_code = sd_parameters.get('currency_code') data_item_id = sd_parameters.get('data_item_id') data_value_type = sd_parameters.get('data_value_type') detail_id = sd_parameters.get('detail_id') scenario_id = sd_parameters.get('scenario_id') period_type = sd_parameters.get('period', {}).get('type') end_of_period_value = sd_parameters.get('end_of_period', {}).get('value') reported_date_value = sd_parameters.get('reported_date', {}).get('value') exchange_rate_type = sd_parameters.get('exchange_rate', {}).get('type') request_id = sd_parameters.get('request_identifier') standardized_data_id = sd_parameters.get('standardized_data_id') entity_ids = sd_parameters.get('entities_path', {}).get('path', {}).get('int', []) for entity_id in entity_ids: # Primary key dimensions, create md5 hash key dimensions = { 'data_item_id': data_item_id, 'entity_id': entity_id, 'scenario_id': scenario_id, 'period_type': period_type, 'end_of_period_value': end_of_period_value, 'currency_code': currency_code, 'exchange_rate_type': exchange_rate_type, 'data_value_type': data_value_type } hash_key = str(hash_data(json.dumps(dimensions, sort_keys=True))) new_record = { 'hash_key': hash_key, 'excel_formula': excel_formula, 'currency_code': currency_code, 'data_item_id': data_item_id, 'data_value_type': data_value_type, 'detail_id': detail_id, 'entity_id': entity_id, 'scenario_id': scenario_id, 'period_type': period_type, 'end_of_period_value': end_of_period_value, 'reported_date_value': reported_date_value, 'exchange_rate_type': exchange_rate_type, 'request_id': request_id, 'standardized_data_id': standardized_data_id, 'value': value, 'value_string': value_string, 'value_numeric': value_numeric } results.append(new_record) # end for rec in periodic_data_records # LOGGER.info('results = {}'.format(results)) # COMMENT OUT return results
def process_records(result_records, req_state, deletion_flag=None, max_bookmark_value=None): if not result_records or result_records is None or result_records == []: return max_bookmark_value, 0 stream_name = req_state.stream_name bookmark_field = req_state.bookmark_field last_date = req_state.last_date LOGGER.info('{}: Preparing to publish {} records'.format( stream_name, len(result_records))) stream = req_state.catalog.get_stream(stream_name) schema = stream.schema.to_dict() stream_metadata = metadata.to_map(stream.metadata) # Transform records try: transformed_data = transform_json(result_records) except Exception as err: LOGGER.error(err) LOGGER.error('result_records = {}'.format(result_records)) raise err with metrics.record_counter(req_state.stream_name) as counter: for record in transformed_data: # Add deletion flag to record __set_deletion_flag(record, deletion_flag) # Singer.io validate/transform vs. JSON schema with Transformer() as transformer: try: transformed_record = transformer.transform( record, schema, stream_metadata) except Exception as err: LOGGER.error(err) LOGGER.error('Error record: {}'.format(record)) raise err # Reset max_bookmark_value to new value if higher if bookmark_field and (bookmark_field in transformed_record): bookmark_dt = transformed_record[bookmark_field][:10] bookmark_dttm = datetime.strptime(bookmark_dt, "%Y-%m-%d") if max_bookmark_value: max_bookmark_value_dttm = datetime.strptime( max_bookmark_value, "%Y-%m-%d") if bookmark_dttm > max_bookmark_value_dttm: max_bookmark_value = bookmark_dt else: max_bookmark_value = bookmark_dt # Lookback window, always process last 14 days last_dttm = datetime.strptime(last_date, "%Y-%m-%d") - timedelta(days=14) # Keep only records whose bookmark is on after the last_date if bookmark_dttm >= last_dttm: singer_ops.write_record(req_state.stream_name, transformed_record, utils.now()) counter.increment() else: singer_ops.write_record(req_state.stream_name, transformed_record, utils.now()) counter.increment() LOGGER.info('{}: Published {} records, max_bookmark_value: {}'.format( stream_name, counter.value, max_bookmark_value)) return max_bookmark_value, counter.value