Esempio n. 1
0
    def insert(**kwargs):
        val = f"INSERT INTO {table} ("
        if len(kwargs) == 1:
            key, value = kwargs.popitem()
            if isinstance(value, str):
                value = '"' + value + '"'
            val += f' {key}) VALUES({value})'
        else:
            i = 0
            for key, _ in kwargs.items():
                val += key
                i = i + 1
                if i != len(kwargs):
                    val += ', '
                else:
                    val += ') VALUES ('
            i = 0
            for _, value in kwargs.items():
                if isinstance(value, str):
                    value = '"' + value + '"'
                val += value
                i = i + 1
                if i != len(kwargs):
                    val += ', '
                else:
                    val += ') '

        log.debug(val)
        db.execute_sql_statement(val)
Esempio n. 2
0
    def wrapper(*args, **kwargs):
        try:
            log.debug(f"Calling service: {func.__name__}")
            return func(*args, **kwargs)

        except Exception as err:
            error = f'Error calling service {func.__name__}. Error: ' + str(
                err)
            log.error(error)
            raise falcon.HTTPError(falcon.HTTP_400, 'service error', error)
Esempio n. 3
0
    def delete(**kwargs):
        val = f"DELETE FROM {table}"
        if len(kwargs) == 1:
            key, value = kwargs.popitem()
            if isinstance(value, str):
                value = '"' + value + '"'
            val += f' WHERE {key} = {value}'
        else:
            i = 0
            for key, value in kwargs.items():
                if isinstance(value, str):
                    value = '"' + value + '"'
                val += f' WHERE {key} = {value}'
                i = i + 1
                if i != len(kwargs):
                    val += ' AND '

        log.debug(val)

        db.execute_sql_statement(val)
Esempio n. 4
0
def import_survey_stream(data, run_id):
    df: pandas.DataFrame = pandas.read_csv(io.BytesIO(data), encoding="ISO-8859-1", engine="python")
    log.debug("Importing survey data")
    return _import_survey_data(df, run_id)
Esempio n. 5
0
def _apply_rule(func, rule, param, getval):
    log.debug(
        f"Rule: function: {func.__name__}, rule: {rule.__name__}, param: {param} value: {getval(param)}"
    )
    return rule(getval(param))
def do_ips_shift_weight_calculation(df_surveydata, df_shiftsdata,
                                    serial_number, shift_weight):
    """
    Author       :  Richmond Rice / Nassir Mohammad
    Date         :  May 2018
    Purpose      :  Generates shift weights (design weights/initial weights) for each type
                    of IPS traffic.  Runs the shift factor and crossings factor functions.
                    Uses the data frames they return to calculate the surveydata and summary data sets.
    Parameters   :  Parameters:	df_surveydata = the IPS survey records for the period.
                    df_shiftsdata = SAS data set holding # of possible shifts / total crossings by stratum													|;
                    var_serial_number = Variable holding the record serial number
                    var_shift_weight = Variable holding the name of the shift weight field
    Returns      :  Data frames: (final_output_data, final_summary_data)
    Requirements :  logging
    Dependencies :  Function - calculate_ips_shift_factor()
                    Function - calculate_ips_crossing_factor()
    """

    # Calculate the Shift Factor for the given data sets
    df_totsampshifts, df_possshifts, df_surveydata_sf = calculate_ips_shift_factor(
        df_shiftsdata, df_surveydata)
    # Calculate the Crossings Factor for the given data sets
    df_totsampcrossings, df_surveydata_merge = calculate_ips_crossing_factor(
        df_shiftsdata, df_surveydata_sf)

    # The various column sets used for setting columns, sorting columns,
    # aggregating by, merging data frames.
    colset1 = SHIFTS_STRATA + [MIG_SI_COLUMN]

    colset2 = SHIFTS_STRATA

    colset3 = SHIFTS_SUB_STRATA

    colset4 = SHIFTS_STRATA + [
        MIG_SI_COLUMN, POSSIBLE_COUNT_COLUMN, SAMPLED_COUNT_COLUMN,
        MIN_WEIGHT_COLUMN, AVERAGE_WEIGHT_COLUMN, MAX_WEIGHT_COLUMN,
        COUNT_COLUMN, WEIGHT_SUM_COLUMN
    ]

    colset5 = [serial_number, shift_weight]

    # Make all column headers upper case
    df_surveydata_merge.columns = df_surveydata_merge.columns.str.upper()
    df_possshifts.columns = df_possshifts.columns.str.upper()
    df_totsampcrossings.columns = df_totsampcrossings.columns.str.upper()
    df_totsampshifts.columns = df_totsampshifts.columns.str.upper()

    # --------------------------------------------------------------------
    # Check for any missing shift factors by extracting incorrect values
    # --------------------------------------------------------------------
    df_shift_flag = df_surveydata_merge[df_surveydata_merge[FLAG_COLUMN] == 1]
    df_shift_flag = df_shift_flag[df_shift_flag[FACTOR_COLUMN].isnull()]

    # Collect data outside of specified threshold
    threshold_string = ""
    for index, record in df_shift_flag.iterrows():
        threshold_string += "___||___" \
                            + df_shift_flag.columns[0] + " : " + str(record[0])

    if len(df_shift_flag) > 0:
        log.error('Case(s) contain no shift factor(s):' + threshold_string)
    else:
        df_surveydata_merge.loc[df_surveydata_merge[FACTOR_COLUMN].isnull() &
                                (df_surveydata_merge[FLAG_COLUMN] != 1),
                                FACTOR_COLUMN] = 1
        log.debug('Contains shift factor(s)')

    # --------------------------------------------------------------------
    # Check for missing crossings factor by extracting incorrect values
    # --------------------------------------------------------------------
    df_crossings_flag = df_surveydata_merge[
        df_surveydata_merge[CROSSING_FLAG_COLUMN] == 1]
    df_crossings_flag = df_crossings_flag[
        df_crossings_flag[CROSSING_FACTOR_COLUMN].isnull()]

    # Collect data outside of specified threshold

    if len(df_crossings_flag) > 0:
        threshold_string = ""
        for index, record in df_crossings_flag.iterrows():
            threshold_string += "___||___" \
                                + df_crossings_flag.columns[0] + " : " + str(record[0])
        log.error('Case(s) contain no crossings factor(s):' + threshold_string)
    else:
        df_surveydata_merge.loc[
            df_surveydata_merge[CROSSING_FACTOR_COLUMN].isnull() &
            (df_surveydata_merge.CROSSINGS_FLAG_PV != 1),
            CROSSING_FACTOR_COLUMN] = 1
        log.debug('Contains crossings factor(s)')

    # --------------------------------------------------------------------
    # Check for invalid shift data by extracting incorrect values
    # --------------------------------------------------------------------
    df_invalid_shifts = df_surveydata_merge[
        df_surveydata_merge[FACTOR_COLUMN] < 0]

    df_possible_shifts = pd.merge(df_shift_flag,
                                  df_invalid_shifts,
                                  on=['SERIAL'],
                                  how='left')

    # Collect data outside of specified threshold

    if len(df_possible_shifts) > 0:
        threshold_string = ""
        for index, record in df_possible_shifts.iterrows():
            threshold_string += "___||___" \
                                + df_possible_shifts.columns[0] + " : " + str(record[0])
        log.error('Case(s) has an invalid number of possible shifts' +
                  threshold_string)

    # Check for invalid crossings data by extracting incorrect values.
    df_invalid_crossings = df_surveydata_merge[
        df_surveydata_merge[CROSSING_FACTOR_COLUMN] < 0]

    df_possible_crossings = pd.merge(df_crossings_flag,
                                     df_invalid_crossings,
                                     on=['SERIAL'],
                                     how='left')

    # Collect data outside of specified threshold

    if len(df_possible_crossings) > 0:
        threshold_string = ""
        for index, record in df_possible_crossings.iterrows():
            threshold_string += "___||___" \
                                + df_possible_crossings.columns[0] + " : " + str(record[0])
        log.error('Case(s) has an invalid number of total crossings' +
                  threshold_string)

    # Check for missing migration sampling intervals by extracting incorrect values.
    df_missing_migsi = df_surveydata_merge[
        df_surveydata_merge['MIGSI'].isnull()]

    # Collect data outside of specified threshold

    if len(df_missing_migsi) > 0:
        threshold_string = ""
        for index, record in df_missing_migsi.iterrows():
            threshold_string += "___||___" \
                                + df_missing_migsi.columns[0] + " : " + str(record[0])
        log.error('Case(s) missing migration sampling interval' +
                  threshold_string)

    # --------------------------------------------------------------------
    # Calculate shift weight: PS - add round to match expected in test?
    # --------------------------------------------------------------------

    df_surveydata_merge[shift_weight] = df_surveydata_merge[
        FACTOR_COLUMN] * df_surveydata_merge[
            CROSSING_FACTOR_COLUMN] * df_surveydata_merge[MIG_SI_COLUMN]

    # df_surveydata_merge[shift_weight] = round(
    #     df_surveydata_merge[FACTOR_COLUMN] * df_surveydata_merge[CROSSING_FACTOR_COLUMN] * df_surveydata_merge[
    #         MIG_SI_COLUMN], 3)

    # --------------------------------------------------------------------
    # produce shift weight summary output
    # --------------------------------------------------------------------

    # Sort surveydata
    df_surveydata_merge_sorted = df_surveydata_merge.sort_values(colset1)

    # Group by the necessary columns and aggregate df_surveydata_merge shift weight
    df_surveydata_merge_sorted_grouped = \
        df_surveydata_merge_sorted.groupby(SHIFTS_STRATA + [MIG_SI_COLUMN])[shift_weight].agg({
            COUNT_COLUMN: 'count',
            WEIGHT_SUM_COLUMN: 'sum',
            MIN_WEIGHT_COLUMN: 'min',
            AVERAGE_WEIGHT_COLUMN: 'mean',
            MAX_WEIGHT_COLUMN: 'max'
        })

    # Flatten summary columns to single row after aggregation
    df_surveydata_merge_sorted_grouped = df_surveydata_merge_sorted_grouped.reset_index(
    )

    # PS: round column
    df_surveydata_merge_sorted_grouped[WEIGHT_SUM_COLUMN] = \
        df_surveydata_merge_sorted_grouped[WEIGHT_SUM_COLUMN].round(3)
    df_surveydata_merge_sorted_grouped[MIN_WEIGHT_COLUMN] = \
        df_surveydata_merge_sorted_grouped[MIN_WEIGHT_COLUMN].round(3)
    df_surveydata_merge_sorted_grouped[AVERAGE_WEIGHT_COLUMN] = \
        df_surveydata_merge_sorted_grouped[AVERAGE_WEIGHT_COLUMN].round(3)
    df_surveydata_merge_sorted_grouped[MAX_WEIGHT_COLUMN] = \
        df_surveydata_merge_sorted_grouped[MAX_WEIGHT_COLUMN].round(3)

    # --------------------------------------------------------------------
    # Merge possible shifts to summary
    # --------------------------------------------------------------------

    # Merge possible shifts to summary
    df_summary = pd.merge(df_surveydata_merge_sorted_grouped,
                          df_possshifts,
                          on=colset2,
                          how='outer')
    df_summary = df_summary.rename(
        columns={'NUMERATOR': POSSIBLE_COUNT_COLUMN})

    # Merge totsampcrossings to summary
    df_summary = pd.merge(df_summary,
                          df_totsampcrossings,
                          on=colset2,
                          how='outer')
    df_summary = df_summary.rename(
        columns={'DENOMINATOR': SAMPLED_COUNT_COLUMN})

    # Merge totsampshifts to summary
    df_summary = pd.merge(df_summary,
                          df_totsampshifts,
                          on=colset2,
                          how='outer')
    df_summary = df_summary.rename(columns={'DENOMINATOR': 'TEMP'})

    # Merge total sample crossings and total sample shifts to single column via addition
    df_summary[SAMPLED_COUNT_COLUMN] = df_summary[SAMPLED_COUNT_COLUMN].fillna(
        0) + df_summary.TEMP.fillna(0)

    df_summary = df_summary.drop(['TEMP'], 1)

    # Sort summaries
    df_summary_2 = df_summary.sort_values(colset2)

    # Re-index the data frames
    df_summary_2.index = range(df_summary_2.shape[0])

    # --------------------------------------------------------------------
    # Produce summary high
    # --------------------------------------------------------------------

    # Sort survey data
    df_surveydata_merge_3 = df_surveydata_merge.sort_values(colset3)

    # Group by the necessary columns and aggregate df_surveydata_merge shift weight
    df_summary_high = df_surveydata_merge_3.groupby(colset3)[shift_weight].agg(
        {
            COUNT_COLUMN: 'count',
            WEIGHT_SUM_COLUMN: 'sum',
            MIN_WEIGHT_COLUMN: 'min',
            AVERAGE_WEIGHT_COLUMN: 'mean',
            MAX_WEIGHT_COLUMN: 'max'
        })

    # Flatten summary high columns to single row after aggregation
    df_summary_high = df_summary_high.reset_index()

    # PS: round column
    df_summary_high[COUNT_COLUMN] = df_summary_high[COUNT_COLUMN].round(3)
    df_summary_high[AVERAGE_WEIGHT_COLUMN] = df_summary_high[
        AVERAGE_WEIGHT_COLUMN].round(3)
    df_summary_high[MIN_WEIGHT_COLUMN] = df_summary_high[
        MIN_WEIGHT_COLUMN].round(3)
    df_summary_high[AVERAGE_WEIGHT_COLUMN] = df_summary_high[
        AVERAGE_WEIGHT_COLUMN].round(3)
    df_summary_high[MAX_WEIGHT_COLUMN] = df_summary_high[
        MAX_WEIGHT_COLUMN].round(3)

    # Append total sample crossings and total sample shifts
    df_totsampshifts_appended = df_totsampshifts.append(df_totsampcrossings)

    # Re-index the data frame
    df_totsampshifts_appended.index = range(df_totsampshifts_appended.shape[0])

    # Sort total sample shifts
    df_totsampshifts_1 = df_totsampshifts_appended.sort_values(colset3)

    # Group by the necessary columns and aggregate df_totsampshifts shift weight
    df_summary_high_sampled = df_totsampshifts_1.groupby(
        colset3)['DENOMINATOR'].agg([(SAMPLED_COUNT_COLUMN, 'sum')])

    # Flatten summary high sampled columns to single row after aggregation
    df_summary_high_sampled = df_summary_high_sampled.reset_index()

    # Left merge summary high with summary high sampled
    df_summary_high_1 = pd.merge(df_summary_high,
                                 df_summary_high_sampled,
                                 on=SHIFTS_SUB_STRATA,
                                 how='left')

    # Append summary and summary high
    df_summary_3 = pd.concat([df_summary_high_1, df_summary_2])

    # Set summary columns
    df_summary_4 = df_summary_3[colset4]
    df_summary_5 = df_summary_4.sort_values([SUMMARY_KEY_COLUMN],
                                            ascending=True,
                                            kind='mergesort')
    df_summary_5.index = range(df_summary_5.shape[0])

    # replace 0 with nan to match SAS
    df_summary_5[SAMPLED_COUNT_COLUMN].replace(0, np.nan, inplace=True)

    # Set surveydata columns
    df_surveydata_merge_output = df_surveydata_merge_3[colset5]
    df_surveydata_merge_output_2 = df_surveydata_merge_output.sort_values(
        ['SERIAL'])

    # re-index the dataframe
    df_surveydata_merge_output_2.index = range(
        df_surveydata_merge_output_2.shape[0])

    final_output_data = df_surveydata_merge_output_2
    final_summary_data = df_summary_5

    # Create shift weight threshold data sets
    df_min_sw_check = df_summary_2[
        df_summary_2[SAMPLED_COUNT_COLUMN].notnull()
        & (df_summary_2[MIN_WEIGHT_COLUMN] < int(MINIMUM_WEIGHT_THRESHOLD))]
    df_max_sw_check = df_summary_2[
        df_summary_2[SAMPLED_COUNT_COLUMN].notnull()
        & (df_summary_2[MAX_WEIGHT_COLUMN] > int(MAXIMUM_WEIGHT_THRESHOLD))]

    # Merge shift weight threshold data sets
    df_sw_thresholds_check = pd.merge(df_min_sw_check,
                                      df_max_sw_check,
                                      on=colset1,
                                      how='outer')

    # Collect data outside of specified threshold

    if len(df_sw_thresholds_check) > 0:
        threshold_string = ""
        for index, record in df_sw_thresholds_check.iterrows():
            threshold_string += "___||___" \
                                + df_sw_thresholds_check.columns[0] + " : " + str(record[0]) + " | " \
                                + df_sw_thresholds_check.columns[1] + " : " + str(record[1]) + " | " \
                                + df_sw_thresholds_check.columns[2] + " : " + str(record[2]) + " | " \
                                + df_sw_thresholds_check.columns[3] + " : " + str(record[3])
        log.warning('Shift weight outside thresholds for: ' + threshold_string)

    return final_output_data, final_summary_data