def insert(**kwargs): val = f"INSERT INTO {table} (" if len(kwargs) == 1: key, value = kwargs.popitem() if isinstance(value, str): value = '"' + value + '"' val += f' {key}) VALUES({value})' else: i = 0 for key, _ in kwargs.items(): val += key i = i + 1 if i != len(kwargs): val += ', ' else: val += ') VALUES (' i = 0 for _, value in kwargs.items(): if isinstance(value, str): value = '"' + value + '"' val += value i = i + 1 if i != len(kwargs): val += ', ' else: val += ') ' log.debug(val) db.execute_sql_statement(val)
def wrapper(*args, **kwargs): try: log.debug(f"Calling service: {func.__name__}") return func(*args, **kwargs) except Exception as err: error = f'Error calling service {func.__name__}. Error: ' + str( err) log.error(error) raise falcon.HTTPError(falcon.HTTP_400, 'service error', error)
def delete(**kwargs): val = f"DELETE FROM {table}" if len(kwargs) == 1: key, value = kwargs.popitem() if isinstance(value, str): value = '"' + value + '"' val += f' WHERE {key} = {value}' else: i = 0 for key, value in kwargs.items(): if isinstance(value, str): value = '"' + value + '"' val += f' WHERE {key} = {value}' i = i + 1 if i != len(kwargs): val += ' AND ' log.debug(val) db.execute_sql_statement(val)
def import_survey_stream(data, run_id): df: pandas.DataFrame = pandas.read_csv(io.BytesIO(data), encoding="ISO-8859-1", engine="python") log.debug("Importing survey data") return _import_survey_data(df, run_id)
def _apply_rule(func, rule, param, getval): log.debug( f"Rule: function: {func.__name__}, rule: {rule.__name__}, param: {param} value: {getval(param)}" ) return rule(getval(param))
def do_ips_shift_weight_calculation(df_surveydata, df_shiftsdata, serial_number, shift_weight): """ Author : Richmond Rice / Nassir Mohammad Date : May 2018 Purpose : Generates shift weights (design weights/initial weights) for each type of IPS traffic. Runs the shift factor and crossings factor functions. Uses the data frames they return to calculate the surveydata and summary data sets. Parameters : Parameters: df_surveydata = the IPS survey records for the period. df_shiftsdata = SAS data set holding # of possible shifts / total crossings by stratum |; var_serial_number = Variable holding the record serial number var_shift_weight = Variable holding the name of the shift weight field Returns : Data frames: (final_output_data, final_summary_data) Requirements : logging Dependencies : Function - calculate_ips_shift_factor() Function - calculate_ips_crossing_factor() """ # Calculate the Shift Factor for the given data sets df_totsampshifts, df_possshifts, df_surveydata_sf = calculate_ips_shift_factor( df_shiftsdata, df_surveydata) # Calculate the Crossings Factor for the given data sets df_totsampcrossings, df_surveydata_merge = calculate_ips_crossing_factor( df_shiftsdata, df_surveydata_sf) # The various column sets used for setting columns, sorting columns, # aggregating by, merging data frames. colset1 = SHIFTS_STRATA + [MIG_SI_COLUMN] colset2 = SHIFTS_STRATA colset3 = SHIFTS_SUB_STRATA colset4 = SHIFTS_STRATA + [ MIG_SI_COLUMN, POSSIBLE_COUNT_COLUMN, SAMPLED_COUNT_COLUMN, MIN_WEIGHT_COLUMN, AVERAGE_WEIGHT_COLUMN, MAX_WEIGHT_COLUMN, COUNT_COLUMN, WEIGHT_SUM_COLUMN ] colset5 = [serial_number, shift_weight] # Make all column headers upper case df_surveydata_merge.columns = df_surveydata_merge.columns.str.upper() df_possshifts.columns = df_possshifts.columns.str.upper() df_totsampcrossings.columns = df_totsampcrossings.columns.str.upper() df_totsampshifts.columns = df_totsampshifts.columns.str.upper() # -------------------------------------------------------------------- # Check for any missing shift factors by extracting incorrect values # -------------------------------------------------------------------- df_shift_flag = df_surveydata_merge[df_surveydata_merge[FLAG_COLUMN] == 1] df_shift_flag = df_shift_flag[df_shift_flag[FACTOR_COLUMN].isnull()] # Collect data outside of specified threshold threshold_string = "" for index, record in df_shift_flag.iterrows(): threshold_string += "___||___" \ + df_shift_flag.columns[0] + " : " + str(record[0]) if len(df_shift_flag) > 0: log.error('Case(s) contain no shift factor(s):' + threshold_string) else: df_surveydata_merge.loc[df_surveydata_merge[FACTOR_COLUMN].isnull() & (df_surveydata_merge[FLAG_COLUMN] != 1), FACTOR_COLUMN] = 1 log.debug('Contains shift factor(s)') # -------------------------------------------------------------------- # Check for missing crossings factor by extracting incorrect values # -------------------------------------------------------------------- df_crossings_flag = df_surveydata_merge[ df_surveydata_merge[CROSSING_FLAG_COLUMN] == 1] df_crossings_flag = df_crossings_flag[ df_crossings_flag[CROSSING_FACTOR_COLUMN].isnull()] # Collect data outside of specified threshold if len(df_crossings_flag) > 0: threshold_string = "" for index, record in df_crossings_flag.iterrows(): threshold_string += "___||___" \ + df_crossings_flag.columns[0] + " : " + str(record[0]) log.error('Case(s) contain no crossings factor(s):' + threshold_string) else: df_surveydata_merge.loc[ df_surveydata_merge[CROSSING_FACTOR_COLUMN].isnull() & (df_surveydata_merge.CROSSINGS_FLAG_PV != 1), CROSSING_FACTOR_COLUMN] = 1 log.debug('Contains crossings factor(s)') # -------------------------------------------------------------------- # Check for invalid shift data by extracting incorrect values # -------------------------------------------------------------------- df_invalid_shifts = df_surveydata_merge[ df_surveydata_merge[FACTOR_COLUMN] < 0] df_possible_shifts = pd.merge(df_shift_flag, df_invalid_shifts, on=['SERIAL'], how='left') # Collect data outside of specified threshold if len(df_possible_shifts) > 0: threshold_string = "" for index, record in df_possible_shifts.iterrows(): threshold_string += "___||___" \ + df_possible_shifts.columns[0] + " : " + str(record[0]) log.error('Case(s) has an invalid number of possible shifts' + threshold_string) # Check for invalid crossings data by extracting incorrect values. df_invalid_crossings = df_surveydata_merge[ df_surveydata_merge[CROSSING_FACTOR_COLUMN] < 0] df_possible_crossings = pd.merge(df_crossings_flag, df_invalid_crossings, on=['SERIAL'], how='left') # Collect data outside of specified threshold if len(df_possible_crossings) > 0: threshold_string = "" for index, record in df_possible_crossings.iterrows(): threshold_string += "___||___" \ + df_possible_crossings.columns[0] + " : " + str(record[0]) log.error('Case(s) has an invalid number of total crossings' + threshold_string) # Check for missing migration sampling intervals by extracting incorrect values. df_missing_migsi = df_surveydata_merge[ df_surveydata_merge['MIGSI'].isnull()] # Collect data outside of specified threshold if len(df_missing_migsi) > 0: threshold_string = "" for index, record in df_missing_migsi.iterrows(): threshold_string += "___||___" \ + df_missing_migsi.columns[0] + " : " + str(record[0]) log.error('Case(s) missing migration sampling interval' + threshold_string) # -------------------------------------------------------------------- # Calculate shift weight: PS - add round to match expected in test? # -------------------------------------------------------------------- df_surveydata_merge[shift_weight] = df_surveydata_merge[ FACTOR_COLUMN] * df_surveydata_merge[ CROSSING_FACTOR_COLUMN] * df_surveydata_merge[MIG_SI_COLUMN] # df_surveydata_merge[shift_weight] = round( # df_surveydata_merge[FACTOR_COLUMN] * df_surveydata_merge[CROSSING_FACTOR_COLUMN] * df_surveydata_merge[ # MIG_SI_COLUMN], 3) # -------------------------------------------------------------------- # produce shift weight summary output # -------------------------------------------------------------------- # Sort surveydata df_surveydata_merge_sorted = df_surveydata_merge.sort_values(colset1) # Group by the necessary columns and aggregate df_surveydata_merge shift weight df_surveydata_merge_sorted_grouped = \ df_surveydata_merge_sorted.groupby(SHIFTS_STRATA + [MIG_SI_COLUMN])[shift_weight].agg({ COUNT_COLUMN: 'count', WEIGHT_SUM_COLUMN: 'sum', MIN_WEIGHT_COLUMN: 'min', AVERAGE_WEIGHT_COLUMN: 'mean', MAX_WEIGHT_COLUMN: 'max' }) # Flatten summary columns to single row after aggregation df_surveydata_merge_sorted_grouped = df_surveydata_merge_sorted_grouped.reset_index( ) # PS: round column df_surveydata_merge_sorted_grouped[WEIGHT_SUM_COLUMN] = \ df_surveydata_merge_sorted_grouped[WEIGHT_SUM_COLUMN].round(3) df_surveydata_merge_sorted_grouped[MIN_WEIGHT_COLUMN] = \ df_surveydata_merge_sorted_grouped[MIN_WEIGHT_COLUMN].round(3) df_surveydata_merge_sorted_grouped[AVERAGE_WEIGHT_COLUMN] = \ df_surveydata_merge_sorted_grouped[AVERAGE_WEIGHT_COLUMN].round(3) df_surveydata_merge_sorted_grouped[MAX_WEIGHT_COLUMN] = \ df_surveydata_merge_sorted_grouped[MAX_WEIGHT_COLUMN].round(3) # -------------------------------------------------------------------- # Merge possible shifts to summary # -------------------------------------------------------------------- # Merge possible shifts to summary df_summary = pd.merge(df_surveydata_merge_sorted_grouped, df_possshifts, on=colset2, how='outer') df_summary = df_summary.rename( columns={'NUMERATOR': POSSIBLE_COUNT_COLUMN}) # Merge totsampcrossings to summary df_summary = pd.merge(df_summary, df_totsampcrossings, on=colset2, how='outer') df_summary = df_summary.rename( columns={'DENOMINATOR': SAMPLED_COUNT_COLUMN}) # Merge totsampshifts to summary df_summary = pd.merge(df_summary, df_totsampshifts, on=colset2, how='outer') df_summary = df_summary.rename(columns={'DENOMINATOR': 'TEMP'}) # Merge total sample crossings and total sample shifts to single column via addition df_summary[SAMPLED_COUNT_COLUMN] = df_summary[SAMPLED_COUNT_COLUMN].fillna( 0) + df_summary.TEMP.fillna(0) df_summary = df_summary.drop(['TEMP'], 1) # Sort summaries df_summary_2 = df_summary.sort_values(colset2) # Re-index the data frames df_summary_2.index = range(df_summary_2.shape[0]) # -------------------------------------------------------------------- # Produce summary high # -------------------------------------------------------------------- # Sort survey data df_surveydata_merge_3 = df_surveydata_merge.sort_values(colset3) # Group by the necessary columns and aggregate df_surveydata_merge shift weight df_summary_high = df_surveydata_merge_3.groupby(colset3)[shift_weight].agg( { COUNT_COLUMN: 'count', WEIGHT_SUM_COLUMN: 'sum', MIN_WEIGHT_COLUMN: 'min', AVERAGE_WEIGHT_COLUMN: 'mean', MAX_WEIGHT_COLUMN: 'max' }) # Flatten summary high columns to single row after aggregation df_summary_high = df_summary_high.reset_index() # PS: round column df_summary_high[COUNT_COLUMN] = df_summary_high[COUNT_COLUMN].round(3) df_summary_high[AVERAGE_WEIGHT_COLUMN] = df_summary_high[ AVERAGE_WEIGHT_COLUMN].round(3) df_summary_high[MIN_WEIGHT_COLUMN] = df_summary_high[ MIN_WEIGHT_COLUMN].round(3) df_summary_high[AVERAGE_WEIGHT_COLUMN] = df_summary_high[ AVERAGE_WEIGHT_COLUMN].round(3) df_summary_high[MAX_WEIGHT_COLUMN] = df_summary_high[ MAX_WEIGHT_COLUMN].round(3) # Append total sample crossings and total sample shifts df_totsampshifts_appended = df_totsampshifts.append(df_totsampcrossings) # Re-index the data frame df_totsampshifts_appended.index = range(df_totsampshifts_appended.shape[0]) # Sort total sample shifts df_totsampshifts_1 = df_totsampshifts_appended.sort_values(colset3) # Group by the necessary columns and aggregate df_totsampshifts shift weight df_summary_high_sampled = df_totsampshifts_1.groupby( colset3)['DENOMINATOR'].agg([(SAMPLED_COUNT_COLUMN, 'sum')]) # Flatten summary high sampled columns to single row after aggregation df_summary_high_sampled = df_summary_high_sampled.reset_index() # Left merge summary high with summary high sampled df_summary_high_1 = pd.merge(df_summary_high, df_summary_high_sampled, on=SHIFTS_SUB_STRATA, how='left') # Append summary and summary high df_summary_3 = pd.concat([df_summary_high_1, df_summary_2]) # Set summary columns df_summary_4 = df_summary_3[colset4] df_summary_5 = df_summary_4.sort_values([SUMMARY_KEY_COLUMN], ascending=True, kind='mergesort') df_summary_5.index = range(df_summary_5.shape[0]) # replace 0 with nan to match SAS df_summary_5[SAMPLED_COUNT_COLUMN].replace(0, np.nan, inplace=True) # Set surveydata columns df_surveydata_merge_output = df_surveydata_merge_3[colset5] df_surveydata_merge_output_2 = df_surveydata_merge_output.sort_values( ['SERIAL']) # re-index the dataframe df_surveydata_merge_output_2.index = range( df_surveydata_merge_output_2.shape[0]) final_output_data = df_surveydata_merge_output_2 final_summary_data = df_summary_5 # Create shift weight threshold data sets df_min_sw_check = df_summary_2[ df_summary_2[SAMPLED_COUNT_COLUMN].notnull() & (df_summary_2[MIN_WEIGHT_COLUMN] < int(MINIMUM_WEIGHT_THRESHOLD))] df_max_sw_check = df_summary_2[ df_summary_2[SAMPLED_COUNT_COLUMN].notnull() & (df_summary_2[MAX_WEIGHT_COLUMN] > int(MAXIMUM_WEIGHT_THRESHOLD))] # Merge shift weight threshold data sets df_sw_thresholds_check = pd.merge(df_min_sw_check, df_max_sw_check, on=colset1, how='outer') # Collect data outside of specified threshold if len(df_sw_thresholds_check) > 0: threshold_string = "" for index, record in df_sw_thresholds_check.iterrows(): threshold_string += "___||___" \ + df_sw_thresholds_check.columns[0] + " : " + str(record[0]) + " | " \ + df_sw_thresholds_check.columns[1] + " : " + str(record[1]) + " | " \ + df_sw_thresholds_check.columns[2] + " : " + str(record[2]) + " | " \ + df_sw_thresholds_check.columns[3] + " : " + str(record[3]) log.warning('Shift weight outside thresholds for: ' + threshold_string) return final_output_data, final_summary_data