Python produce_columns 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: imputation_functions

메소드/함수: produce_columns

hotexamples.com에서의 예제들: 6

Python produce_columns - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 imputation_functions.produce_columns에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def lambda_handler(event, context):
    """
    Returns JSON data with new IQR columns and respective values.
    :param event: JSON payload that contains: json_data, questions_list, distinct_values.
                  Type: JSON.
    :param context: N/A.
    :return: Success - {"success": True/False, "data"/"error": "JSON String"/"Message"}
    """
    current_module = "IQRS - Method"
    error_message = ""

    # Define run_id outside of try block
    run_id = 0

    # Set-up variables for status message
    bpm_queue_url = None

    try:
        # Retrieve run_id before input validation
        # Because it is used in exception handling
        run_id = event["RuntimeVariables"]["run_id"]

        runtime_variables = RuntimeSchema().load(event["RuntimeVariables"])

        # Runtime Variables
        bpm_queue_url = runtime_variables["bpm_queue_url"]
        distinct_values = runtime_variables["distinct_values"]
        environment = runtime_variables["environment"]
        input_data = pd.DataFrame(runtime_variables["data"])
        questions_list = runtime_variables["questions_list"]
        survey = runtime_variables["survey"]

    except Exception as e:
        error_message = general_functions.handle_exception(e, current_module, run_id,
                                                           context=context)
        return {"success": False, "error": error_message}

    try:
        logger = general_functions.get_logger(survey, current_module, environment,
                                              run_id)
    except Exception as e:
        error_message = general_functions.handle_exception(e, current_module,
                                                           run_id, context=context)
        return {"success": False, "error": error_message}

    try:

        logger.info("Started - retrieved configuration variables.")

        movement_columns = produce_columns("movement_", questions_list)
        iqrs_columns = produce_columns("iqrs_", questions_list)

        iqrs_df = calc_iqrs(
            input_data,
            movement_columns,
            iqrs_columns,
            distinct_values
        )

        logger.info("Successfully finished calculations of IQRS.")

        json_out = iqrs_df.to_json(orient="records")
        final_output = {"data": json_out}

    except Exception as e:
        error_message = general_functions.handle_exception(e, current_module,
                                                           run_id, context=context,
                                                           bpm_queue_url=bpm_queue_url)
    finally:
        if (len(error_message)) > 0:
            logger.error(error_message)
            return {"success": False, "error": error_message}

    logger.info("Successfully completed module: " + current_module)
    final_output["success"] = True
    return final_output

예제 #2

파일 보기

def lambda_handler(event, context):
    """
    The wrangler is responsible for preparing the data so the IQRS method can be applied.
    :param event: Contains all the variables which are required for the specific run.
    :param context: N/A
    :return: Success & None/Error - Type: JSON
    """
    current_module = "Imputation IQRS - Wrangler."
    error_message = ""

    # Define run_id outside of try block
    run_id = 0

    # Set-up variables for status message
    bpm_queue_url = None

    try:
        # Retrieve run_id before input validation
        # Because it is used in exception handling
        run_id = event["RuntimeVariables"]["run_id"]

        # Set up clients
        lambda_client = boto3.client("lambda", region_name="eu-west-2")

        environment_variables = EnvironmentSchema().load(os.environ)

        runtime_variables = RuntimeSchema().load(event["RuntimeVariables"])

        # Environment Variables
        bucket_name = environment_variables["bucket_name"]
        method_name = environment_variables["method_name"]
        run_environment = environment_variables["run_environment"]

        # Runtime Variables
        bpm_queue_url = runtime_variables["bpm_queue_url"]
        distinct_values = runtime_variables["distinct_values"]
        environment = runtime_variables["environment"]
        in_file_name = runtime_variables["in_file_name"]
        out_file_name = runtime_variables["out_file_name"]
        questions_list = runtime_variables["questions_list"]
        sns_topic_arn = runtime_variables["sns_topic_arn"]
        survey = runtime_variables['survey']

    except Exception as e:
        error_message = general_functions.handle_exception(e,
                                                           current_module,
                                                           run_id,
                                                           context=context)
        raise exception_classes.LambdaFailure(error_message)

    try:
        logger = general_functions.get_logger(survey, current_module,
                                              environment, run_id)
    except Exception as e:
        error_message = general_functions.handle_exception(e,
                                                           current_module,
                                                           run_id,
                                                           context=context)
        raise exception_classes.LambdaFailure(error_message)

    try:

        logger.info("Started - retrieved configuration variables.")

        data = aws_functions.read_dataframe_from_s3(bucket_name, in_file_name)
        logger.info("Successfully retrieved data.")
        iqrs_columns = imp_func.produce_columns("iqrs_", questions_list)
        for col in iqrs_columns:
            data[col] = 0

        logger.info("IQRS columns successfully added")

        data_json = data.to_json(orient="records")

        logger.info("Dataframe converted to JSON")

        payload = {
            "RuntimeVariables": {
                "bpm_queue_url": bpm_queue_url,
                "data": json.loads(data_json),
                "distinct_values": distinct_values,
                "environment": environment,
                "questions_list": questions_list,
                "run_id": run_id,
                "survey": survey
            }
        }

        wrangled_data = lambda_client.invoke(FunctionName=method_name,
                                             Payload=json.dumps(payload))
        logger.info("Successfully invoked method.")

        json_response = json.loads(
            wrangled_data.get("Payload").read().decode("UTF-8"))
        logger.info("JSON extracted from method response.")

        if not json_response["success"]:
            raise exception_classes.MethodFailure(json_response["error"])

        aws_functions.save_to_s3(bucket_name, out_file_name,
                                 json_response["data"])
        logger.info("Successfully sent data to s3.")

        if run_environment != "development":
            logger.info(aws_functions.delete_data(bucket_name, in_file_name))
            logger.info("Successfully deleted input data from s3.")

        aws_functions.send_sns_message(sns_topic_arn, "Imputation - IQRs.")
        logger.info("Successfully sent message to sns.")

    except Exception as e:
        error_message = general_functions.handle_exception(
            e,
            current_module,
            run_id,
            context=context,
            bpm_queue_url=bpm_queue_url)
    finally:
        if (len(error_message)) > 0:
            logger.error(error_message)
            raise exception_classes.LambdaFailure(error_message)

    logger.info("Successfully completed module: " + current_module)

    return {"success": True}

예제 #3

파일 보기

파일: apply_factors_wrangler.py 프로젝트: uk-gov-mirror/ONSdigital.es-imputation-sg

def lambda_handler(event, context):
    """
    This wrangler is used to prepare data for the apply factors statistical method.
    The method requires a column per question to store the factors.
    :param event:  Contains all the variables which are required for the specific run.
    :param context: N/A
    :return: Success & None/Error - Type: JSON
    """
    current_module = "Imputation Apply Factors - Wrangler."
    error_message = ""

    # Define run_id outside of try block
    run_id = 0

    # Set-up variables for status message
    bpm_queue_url = None
    current_step_num = 4

    try:
        # Retrieve run_id before input validation
        # Because it is used in exception handling
        run_id = event["RuntimeVariables"]["run_id"]

        # Set up clients
        lambda_client = boto3.client("lambda", region_name="eu-west-2")

        environment_variables = EnvironmentSchema().load(os.environ)

        runtime_variables = RuntimeSchema().load(event["RuntimeVariables"])

        # Environment Variables
        bucket_name = environment_variables["bucket_name"]
        method_name = environment_variables["method_name"]
        response_type = environment_variables["response_type"]
        run_environment = environment_variables["run_environment"]

        # Runtime Variables
        bpm_queue_url = runtime_variables["bpm_queue_url"]
        current_data = runtime_variables["current_data"]
        distinct_values = runtime_variables["distinct_values"]
        environment = runtime_variables["environment"]
        factors_parameters = runtime_variables["factors_parameters"][
            "RuntimeVariables"]
        in_file_name = runtime_variables["in_file_name"]
        out_file_name = runtime_variables["out_file_name"]
        previous_data = runtime_variables["previous_data"]
        questions_list = runtime_variables["questions_list"]
        reference = runtime_variables["unique_identifier"][0]
        region_column = factors_parameters["region_column"]
        regionless_code = factors_parameters["regionless_code"]
        sns_topic_arn = runtime_variables["sns_topic_arn"]
        sum_columns = runtime_variables["sum_columns"]
        survey = runtime_variables["survey"]
        total_steps = runtime_variables["total_steps"]

    except Exception as e:
        error_message = general_functions.handle_exception(e,
                                                           current_module,
                                                           run_id,
                                                           context=context)
        raise exception_classes.LambdaFailure(error_message)

    try:
        logger = general_functions.get_logger(survey, current_module,
                                              environment, run_id)
    except Exception as e:
        error_message = general_functions.handle_exception(e,
                                                           current_module,
                                                           run_id,
                                                           context=context)
        raise exception_classes.LambdaFailure(error_message)

    try:
        logger.info("Started - retrieved configuration variables.")

        # Get factors data from calculate_factors
        factors_dataframe = aws_functions.read_dataframe_from_s3(
            bucket_name, in_file_name)
        logger.info("Successfully retrieved factors data from s3")

        # Get data from module that preceded imputation
        input_data = aws_functions.read_dataframe_from_s3(
            bucket_name, current_data)

        # Split out non responder data from input
        non_responder_dataframe = input_data[input_data[response_type] == 1]
        logger.info("Successfully retrieved raw-input data from s3")

        # Read in previous period data for current period non-responders
        prev_period_data = aws_functions.read_dataframe_from_s3(
            bucket_name, previous_data)
        logger.info("Successfully retrieved previous period data from s3")
        # Filter so we only have those that responded in prev
        prev_period_data = prev_period_data[prev_period_data[response_type] ==
                                            2]

        prev_questions_list = produce_columns("prev_", questions_list,
                                              [reference])

        for question in questions_list:
            prev_period_data = prev_period_data.rename(
                index=str, columns={question: "prev_" + question})
        logger.info("Successfully renamed previous period data")

        non_responder_dataframe_with_prev = pd.merge(
            non_responder_dataframe,
            prev_period_data[prev_questions_list],
            on=reference,
        )
        logger.info(
            "Successfully merged previous period data with non-responder df")

        # Merge the factors onto the non responders
        non_responders_with_factors = pd.merge(
            non_responder_dataframe_with_prev,
            factors_dataframe[produce_columns("imputation_factor_",
                                              questions_list,
                                              distinct_values)],
            on=distinct_values,
            how="inner",
        )
        logger.info("Successfully merged non-responders with factors")

        # Collects all rows where an imputation factor doesn't exist.
        dropped_rows = non_responder_dataframe_with_prev[
            ~non_responder_dataframe_with_prev[reference].
            isin(non_responders_with_factors[reference])].dropna()

        if len(dropped_rows) > 0:
            merge_values = distinct_values
            merge_values.remove(region_column)

            # Collect the GB region imputation factors if they exist.
            regionless_factors = \
                factors_dataframe[
                    produce_columns("imputation_factor_",
                                    questions_list,
                                    distinct_values)
                ][factors_dataframe[region_column] == regionless_code]

            if len(merge_values) != 0:
                # Basic merge where we have values to merge on.
                dropped_rows_with_factors = \
                    pd.merge(dropped_rows, regionless_factors,
                             on=merge_values, how="inner")
            else:
                # Added a column to both dataframes to use for the merge.
                dropped_rows["Temp_Key"] = 0
                regionless_factors["Temp_Key"] = 0

                dropped_rows_with_factors = \
                    pd.merge(dropped_rows, regionless_factors,
                             on="Temp_Key", how="inner")

                dropped_rows_with_factors = dropped_rows_with_factors.drop(
                    "Temp_Key", axis=1)

            non_responders_with_factors = \
                pd.concat([non_responders_with_factors, dropped_rows_with_factors])
            logger.info("Successfully merged missing rows with non_responders")

        payload = {
            "RuntimeVariables": {
                "bpm_queue_url":
                bpm_queue_url,
                "data":
                json.loads(
                    non_responders_with_factors.to_json(orient="records")),
                "environment":
                environment,
                "questions_list":
                questions_list,
                "run_id":
                run_id,
                "sum_columns":
                sum_columns,
                "survey":
                survey
            }
        }

        # Non responder data should now contain all previous values
        #   and the imputation columns
        imputed_data = lambda_client.invoke(
            FunctionName=method_name,
            Payload=json.dumps(payload),
        )
        logger.info("Successfully invoked method.")

        json_response = json.loads(
            imputed_data.get("Payload").read().decode("UTF-8"))
        logger.info("JSON extracted from method response.")

        if not json_response["success"]:
            raise exception_classes.MethodFailure(json_response["error"])

        imputed_non_responders = pd.read_json(json_response["data"],
                                              dtype=False)

        # retrieve current responders from input data..
        current_responders = input_data[input_data[response_type] == 2]

        # Joining Datasets Together.
        final_imputed = pd.concat([current_responders, imputed_non_responders])
        logger.info("Successfully joined imputed data with responder data")

        # Create A List Of Factor Columns To Drop
        cols_to_drop = produce_columns(
            "imputation_factor_", questions_list,
            produce_columns("prev_", questions_list))

        filtered_data = final_imputed.drop(cols_to_drop, axis=1)

        message = filtered_data.to_json(orient="records")

        aws_functions.save_to_s3(bucket_name, out_file_name, message)
        logger.info("Successfully sent data to s3.")

        if run_environment != "development":
            logger.info(aws_functions.delete_data(bucket_name, current_data))
            logger.info(aws_functions.delete_data(bucket_name, previous_data))
            logger.info(aws_functions.delete_data(bucket_name, in_file_name))
            logger.info("Successfully deleted input data.")

        aws_functions.send_sns_message(sns_topic_arn,
                                       "Imputation - Apply Factors.")
        logger.info("Successfully sent message to sns.")

    except Exception as e:
        error_message = general_functions.handle_exception(
            e,
            current_module,
            run_id,
            context=context,
            bpm_queue_url=bpm_queue_url)
    finally:
        if (len(error_message)) > 0:
            logger.error(error_message)
            raise exception_classes.LambdaFailure(error_message)

    logger.info("Successfully completed module: " + current_module)

    # Send end status to BPM.
    status = "DONE"
    aws_functions.send_bpm_status(bpm_queue_url, current_module, status,
                                  run_id, current_step_num, total_steps)

    return {"success": True}

예제 #4

파일 보기

def lambda_handler(event, context):
    """
    Prepares data for and calls the Calculate imputation factors method by adding on the
    required columns needed by the method.
    :param event: Contains all the variables which are required for the specific run.
    :param context: lambda context
    :return: Success & None/Error - Type: JSON
    """
    current_module = "Imputation Calculate Factors - Wrangler."
    error_message = ""

    # Define run_id outside of try block
    run_id = 0

    # Set-up variables for status message
    bpm_queue_url = None

    try:
        # Retrieve run_id before input validation
        # Because it is used in exception handling
        run_id = event["RuntimeVariables"]["run_id"]

        # Set up clients
        lambda_client = boto3.client("lambda", region_name="eu-west-2")

        environment_variables = EnvironmentSchema().load(os.environ)

        runtime_variables = RuntimeSchema().load(event["RuntimeVariables"])

        # Environment Variables
        bucket_name = environment_variables["bucket_name"]
        method_name = environment_variables["method_name"]
        run_environment = environment_variables["run_environment"]

        # Runtime Variables
        bpm_queue_url = runtime_variables["bpm_queue_url"]
        distinct_values = runtime_variables["distinct_values"]
        environment = runtime_variables["environment"]
        factors_parameters = runtime_variables["factors_parameters"]
        in_file_name = runtime_variables["in_file_name"]
        out_file_name = runtime_variables["out_file_name"]
        period_column = runtime_variables["period_column"]
        questions_list = runtime_variables["questions_list"]
        sns_topic_arn = runtime_variables["sns_topic_arn"]
        survey = runtime_variables['survey']

    except Exception as e:
        error_message = general_functions.handle_exception(e,
                                                           current_module,
                                                           run_id,
                                                           context=context)
        raise exception_classes.LambdaFailure(error_message)

    try:
        logger = general_functions.get_logger(survey, current_module,
                                              environment, run_id)
    except Exception as e:
        error_message = general_functions.handle_exception(e,
                                                           current_module,
                                                           run_id,
                                                           context=context)
        raise exception_classes.LambdaFailure(error_message)

    try:
        logger.info("Started - retrieved configuration variables.")

        data = aws_functions.read_dataframe_from_s3(bucket_name, in_file_name)

        logger.info("Successfully retrieved data")

        factor_columns = imp_func.\
            produce_columns("imputation_factor_", questions_list)

        # create df columns needed for method
        for factor in factor_columns:
            data[factor] = 0

        payload = {
            "RuntimeVariables": {
                "bpm_queue_url": bpm_queue_url,
                "data": json.loads(data.to_json(orient="records")),
                "environment": environment,
                "questions_list": questions_list,
                "distinct_values": distinct_values,
                "factors_parameters": factors_parameters,
                "run_id": run_id,
                "survey": survey
            }
        }

        # invoke the method to calculate the factors
        calculate_factors = lambda_client.invoke(FunctionName=method_name,
                                                 Payload=json.dumps(payload))
        logger.info("Successfully invoked method.")

        json_response = json.loads(
            calculate_factors.get("Payload").read().decode("UTF-8"))
        logger.info("JSON extracted from method response.")

        if not json_response["success"]:
            raise exception_classes.MethodFailure(json_response["error"])

        output_df = pd.read_json(json_response["data"], dtype=False)
        distinct_values.append(period_column)
        columns_to_keep = imp_func.produce_columns("imputation_factor_",
                                                   questions_list,
                                                   distinct_values)

        final_df = output_df[columns_to_keep].drop_duplicates().to_json(
            orient="records")
        aws_functions.save_to_s3(bucket_name, out_file_name, final_df)
        logger.info("Successfully sent data to s3.")

        if run_environment != "development":
            logger.info(aws_functions.delete_data(bucket_name, in_file_name))
            logger.info("Successfully deleted input data.")

        aws_functions.send_sns_message(sns_topic_arn,
                                       "Imputation - Calculate Factors.")
        logger.info("Successfully sent message to sns.")

    except Exception as e:
        error_message = general_functions.handle_exception(
            e,
            current_module,
            run_id,
            context=context,
            bpm_queue_url=bpm_queue_url)
    finally:
        if (len(error_message)) > 0:
            logger.error(error_message)
            raise exception_classes.LambdaFailure(error_message)

    logger.info("Successfully completed module: " + current_module)

    return {"success": True}

예제 #5

파일 보기

def lambda_handler(event, context):
    """
    Generates an aggregated DataFrame containing the mean value for
    each of the period on period percentage movements, grouped by
    region and strata.
    :param event: JSON payload that contains: json_data, questions_list
                  Type: JSON.
    :param context: Context object
    :return: Success - {"success": True/False, "data"/"error": "JSON String"/"Message"}
    """
    current_module = "Means - Method"
    error_message = ""

    # Define run_id outside of try block
    run_id = 0

    # Set-up variables for status message
    bpm_queue_url = None

    try:
        # Retrieve run_id before input validation
        # Because it is used in exception handling
        run_id = event["RuntimeVariables"]["run_id"]

        runtime_variables = RuntimeSchema().load(event["RuntimeVariables"])

        # Runtime Variables
        bpm_queue_url = runtime_variables["bpm_queue_url"]
        distinct_values = runtime_variables["distinct_values"]
        environment = runtime_variables["environment"]
        json_data = runtime_variables["data"]
        questions_list = runtime_variables["questions_list"]
        survey = runtime_variables["survey"]

    except Exception as e:
        error_message = general_functions.handle_exception(e,
                                                           current_module,
                                                           run_id,
                                                           context=context)
        return {"success": False, "error": error_message}

    try:
        logger = general_functions.get_logger(survey, current_module,
                                              environment, run_id)
    except Exception as e:
        error_message = general_functions.handle_exception(e,
                                                           current_module,
                                                           run_id,
                                                           context=context)
        return {"success": False, "error": error_message}

    try:
        logger.info("Started - retrieved configuration variables.")

        movement_columns = imp_func.produce_columns("movement_",
                                                    questions_list)

        df = pd.DataFrame(json_data)

        logger.info("Successfully retrieved data from event.")

        workingdf = df[movement_columns + distinct_values]

        counts = workingdf.groupby(distinct_values).count()
        # Rename columns to fit naming standards
        for column in movement_columns:
            counts.rename(
                columns={column: column + "_count"},
                inplace=True,
            )

        # Create DataFrame which sums the movements grouped by region and strata
        sums = workingdf.groupby(distinct_values).sum()

        # Rename columns to fit naming standards
        for column in movement_columns:
            sums.rename(
                columns={column: column + "_sum"},
                inplace=True,
            )

        counts = counts.reset_index(level=distinct_values)
        sums = sums.reset_index(level=distinct_values)
        moves = sums.merge(
            counts,
            left_on=distinct_values,
            right_on=distinct_values,
            how="left",
        )

        # Join on movements and counts on region & strata to DataFrame
        df = pd.merge(df, moves, on=distinct_values, how="left")

        for question in questions_list:
            df["mean_" + question] = df.apply(
                lambda x: x["movement_" + question + "_sum"] / x[
                    "movement_" + question + "_count"]
                if x["movement_" + question + "_count"] > 0 else 0,
                axis=1,
            )

        logger.info("Successfully finished calculations of means.")

        final_output = {"data": df.to_json(orient="records")}

    except Exception as e:
        error_message = general_functions.handle_exception(
            e,
            current_module,
            run_id,
            context=context,
            bpm_queue_url=bpm_queue_url)
    finally:
        if (len(error_message)) > 0:
            logger.error(error_message)
            return {"success": False, "error": error_message}

    logger.info("Successfully completed module: " + current_module)
    final_output["success"] = True
    return final_output

예제 #6

파일 보기

def lambda_handler(event, context):
    """
    Calculates imputation factor for each question, in each aggregated group.
    :param event: JSON payload that contains: factors_type, json_data, questions_list
        - Type: JSON.
    :param context: lambda context
    :return: Success - {"success": True/False, "data"/"error": "JSON String"/"Message"}
    """
    current_module = "Calculate Factors - Method"
    error_message = ""

    # Define run_id outside of try block
    run_id = 0

    # Set-up variables for status message
    bpm_queue_url = None

    try:
        # Retrieve run_id before input validation
        # Because it is used in exception handling
        run_id = event["RuntimeVariables"]["run_id"]

        runtime_variables = RuntimeSchema().load(event["RuntimeVariables"])

        # Pick Correct Schema
        factors_parameters = runtime_variables["factors_parameters"][
            "RuntimeVariables"]
        factors_type = factors_parameters["factors_type"]
        factors_name = ''.join(word.title()
                               for word in factors_type.split('_'))
        factors_schema = getattr(imp_func, factors_name + "Schema")

        factors = factors_schema().load(factors_parameters)

        # Runtime Variables
        bpm_queue_url = runtime_variables["bpm_queue_url"]
        df = pd.DataFrame(runtime_variables["data"])
        distinct_values = runtime_variables["distinct_values"]
        environment = runtime_variables["environment"]
        questions_list = runtime_variables["questions_list"]
        survey = runtime_variables["survey"]

    except Exception as e:
        error_message = general_functions.handle_exception(e,
                                                           current_module,
                                                           run_id,
                                                           context=context)
        return {"success": False, "error": error_message}

    try:
        logger = general_functions.get_logger(survey, current_module,
                                              environment, run_id)
    except Exception as e:
        error_message = general_functions.handle_exception(e,
                                                           current_module,
                                                           run_id,
                                                           context=context)
        return {"success": False, "error": error_message}

    try:
        logger.info("Started - retrieved configuration variables.")

        # Get relative calculation function
        calculation = getattr(imp_func, factors_type)

        # Pass the distinct values to the factors function in its parameters
        factors["distinct_values"] = distinct_values

        # Some surveys will need to use the regional mean, extract them ahead of time
        if "regional_mean" in factors:
            region_column = factors["region_column"]
            regional_mean = factors["regional_mean"]
            regionless_code = factors["regionless_code"]
            survey_column = factors["survey_column"]

            # split to get only regionless data
            gb_rows = df.loc[df[region_column] == regionless_code]

            # produce column names
            means_columns = imp_func.produce_columns("mean_", questions_list)
            counts_columns = imp_func.\
                produce_columns("movement_", questions_list, suffix="_count")
            gb_columns = \
                means_columns +\
                counts_columns +\
                distinct_values +\
                [survey_column]

            factor_columns = imp_func.\
                produce_columns("imputation_factor_",
                                questions_list,
                                distinct_values+[survey_column])

            # select only gb columns and then drop duplicates, leaving one row per strata
            gb_rows = gb_rows[gb_columns].drop_duplicates()
            factors[regional_mean] = ""

            # calculate gb factors ahead of time
            gb_rows = gb_rows.apply(
                lambda x: calculation(x, questions_list, **factors), axis=1)

            # reduce gb_rows to distinct_values, survey, and the factors
            gb_factors = gb_rows[factor_columns]

            # add gb_factors to factors parameters to send to calculation
            factors[regional_mean] = gb_factors

        df = df.apply(lambda x: calculation(x, questions_list, **factors),
                      axis=1)
        logger.info("Calculated Factors for " + str(questions_list))

        factors_dataframe = df

        logger.info("Successfully finished calculations of factors")

        final_output = {"data": factors_dataframe.to_json(orient="records")}

    except Exception as e:
        error_message = general_functions.handle_exception(
            e,
            current_module,
            run_id,
            context=context,
            bpm_queue_url=bpm_queue_url)
    finally:
        if (len(error_message)) > 0:
            logger.error(error_message)
            return {"success": False, "error": error_message}

    logger.info("Successfully completed module: " + current_module)
    final_output["success"] = True
    return final_output