Esempio n. 1
0
 def __init__(self, data_frame, df_helper, df_context, meta_parser):
     self.data_frame = data_frame
     self.df_helper = df_helper
     self.df_context = df_context
     self.meta_parser = meta_parser
     self.test_dimension = ChiSquare(self.data_frame, self.df_helper,
                                     self.df_context,
                                     self.meta_parser).test_dimension(
                                         'Price_Range', 'Source')
Esempio n. 2
0
 def Run(self):
     df_chisquare_obj = ChiSquare(
         self._data_frame, self._dataframe_helper, self._dataframe_context,
         self._metaParser).test_all(dimension_columns=(
             self._dataframe_context.get_result_column(), ))
     # df_chisquare_result = CommonUtils.as_dict(df_chisquare_obj)
     # print 'RESULT: %s' % (json.dumps(df_chisquare_result, indent=2))
     # DataWriter.write_dict_as_json(self._spark, df_chisquare_result, self._dataframe_context.get_result_file()+'ChiSquare/')
     # Narratives
     # print self._data_frame.select('Sales').show()
     if df_chisquare_obj.get_result() != {}:
         chisquare_narratives = CommonUtils.as_dict(
             ChiSquareNarratives(self._dataframe_helper, df_chisquare_obj,
                                 self._spark, self._dataframe_context,
                                 self._data_frame, self._story_narrative,
                                 self._result_setter))
Esempio n. 3
0
class TestChiSquare(unittest.TestCase):
    def __init__(self, data_frame, df_helper, df_context, meta_parser):
        self.data_frame = data_frame
        self.df_helper = df_helper
        self.df_context = df_context
        self.meta_parser = meta_parser
        self.test_dimension = ChiSquare(self.data_frame, self.df_helper,
                                        self.df_context,
                                        self.meta_parser).test_dimension(
                                            'Price_Range', 'Source')

    def test_upper(self):
        self.assertEqual('foo'.upper(), 'FOO')

    def setup(self):
        pass

    def run_chisquare_test(self):
        # TestCase for test_dimension function
        self.assertEqual(self.test_dimension.get_pvalue(),
                         Chi_exp_op['dim_pval'])
Esempio n. 4
0
 def test_chisquare_dimension(self):
     test_dimension = ChiSquare(self.data_frame, self.df_helper,
                                self.df_context,
                                self.meta_parser).test_dimension(
                                    'Price_Range', 'Source')
     self.assertAlmostEqual(test_dimension.get_pvalue(),
                            exp_values['pval']['Price_Range-Source'],
                            places=5)
     self.assertAlmostEqual(test_dimension.get_effect_size(),
                            exp_values['effect_size']['Price_Range-Source'],
                            places=5)
     self.assertAlmostEqual(test_dimension.get_stat(),
                            exp_values['stats']['Price_Range-Source'],
                            places=5)
     self.assertAlmostEqual(test_dimension.get_v_value(),
                            exp_values['v_value']['Price_Range-Source'],
                            places=5)
Esempio n. 5
0
 def test_chisquare_measure(self):
     test_measures = ChiSquare(self.data_frame, self.df_helper,
                               self.df_context,
                               self.meta_parser).test_measures(
                                   'Price_Range', 'Marketing_Cost')
     self.assertAlmostEqual(
         test_measures.get_pvalue(),
         exp_values['pval']['Price_Range-Marketing_Cost'],
         places=5)
     self.assertAlmostEqual(
         test_measures.get_effect_size(),
         exp_values['effect_size']['Price_Range-Marketing_Cost'],
         places=5)
     self.assertAlmostEqual(
         test_measures.get_stat(),
         exp_values['stats']['Price_Range-Marketing_Cost'],
         places=5)
     self.assertAlmostEqual(
         test_measures.get_v_value(),
         exp_values['v_value']['Price_Range-Marketing_Cost'],
         places=5)
    def Predict(self):
        SQLctx = SQLContext(sparkContext=self._spark.sparkContext,
                            sparkSession=self._spark)
        dataSanity = True
        level_counts_train = self._dataframe_context.get_level_count_dict()
        categorical_columns = self._dataframe_helper.get_string_columns()
        numerical_columns = self._dataframe_helper.get_numeric_columns()
        time_dimension_columns = self._dataframe_helper.get_timestamp_columns()
        result_column = self._dataframe_context.get_result_column()
        categorical_columns = [
            x for x in categorical_columns if x != result_column
        ]

        level_counts_score = CommonUtils.get_level_count_dict(
            self._data_frame,
            categorical_columns,
            self._dataframe_context.get_column_separator(),
            output_type="dict",
            dataType="spark")
        for key in level_counts_train:
            if key in level_counts_score:
                if level_counts_train[key] != level_counts_score[key]:
                    dataSanity = False
            else:
                dataSanity = False

        test_data_path = self._dataframe_context.get_input_file()
        score_data_path = self._dataframe_context.get_score_path(
        ) + "/ScoredData/data.csv"
        trained_model_path = self._dataframe_context.get_model_path()
        if trained_model_path.endswith(".pkl"):
            trained_model_path = "/".join(
                trained_model_path.split("/")[:-1]) + "/model"
        pipeline_path = "/".join(
            trained_model_path.split("/")[:-1]) + "/pipeline"
        score_summary_path = self._dataframe_context.get_score_path(
        ) + "/Summary/summary.json"

        pipelineModel = MLUtils.load_pipeline(pipeline_path)
        if self._classifier == "OneVsRest":
            trained_model = MLUtils.load_one_vs_rest_model(trained_model_path)
        elif self._classifier == "lr":
            trained_model = MLUtils.load_logistic_model(trained_model_path)

        df = self._data_frame
        indexed = pipelineModel.transform(df)
        transformed = trained_model.transform(indexed)
        label_indexer_dict = MLUtils.read_string_indexer_mapping(
            pipeline_path, SQLctx)
        prediction_to_levels = udf(lambda x: label_indexer_dict[x],
                                   StringType())
        transformed = transformed.withColumn(
            result_column, prediction_to_levels(transformed.prediction))

        # udf_to_calculate_probability = udf(lambda x:max(x[0]))
        # transformed = transformed.withColumn("predicted_probability",udf_to_calculate_probability(transformed.probability))
        # print transformed.select("predicted_probability").show(5)

        if "probability" in transformed.columns:
            probability_dataframe = transformed.select(
                [result_column, "probability"]).toPandas()
            probability_dataframe = probability_dataframe.rename(
                index=str, columns={result_column: "predicted_class"})
            probability_dataframe[
                "predicted_probability"] = probability_dataframe[
                    "probability"].apply(lambda x: max(x))
            self._score_summary[
                "prediction_split"] = MLUtils.calculate_scored_probability_stats(
                    probability_dataframe)
            self._score_summary["result_column"] = result_column
            scored_dataframe = transformed.select(
                categorical_columns + time_dimension_columns +
                numerical_columns + [result_column, "probability"]).toPandas()
            # scored_dataframe = scored_dataframe.rename(index=str, columns={"predicted_probability": "probability"})
        else:
            self._score_summary["prediction_split"] = []
            self._score_summary["result_column"] = result_column
            scored_dataframe = transformed.select(categorical_columns +
                                                  time_dimension_columns +
                                                  numerical_columns +
                                                  [result_column]).toPandas()

        if score_data_path.startswith("file"):
            score_data_path = score_data_path[7:]
        scored_dataframe.to_csv(score_data_path, header=True, index=False)
        # print json.dumps({"scoreSummary":self._score_summary},indent=2)
        CommonUtils.write_to_file(
            score_summary_path,
            json.dumps({"scoreSummary": self._score_summary}))

        print "STARTING DIMENSION ANALYSIS ..."
        columns_to_keep = []
        columns_to_drop = []
        considercolumnstype = self._dataframe_context.get_score_consider_columns_type(
        )
        considercolumns = self._dataframe_context.get_score_consider_columns()
        if considercolumnstype != None:
            if considercolumns != None:
                if considercolumnstype == ["excluding"]:
                    columns_to_drop = considercolumns
                elif considercolumnstype == ["including"]:
                    columns_to_keep = considercolumns
        if len(columns_to_keep) > 0:
            columns_to_drop = list(set(df.columns) - set(columns_to_keep))
        # spark_scored_df = transformed.select(categorical_columns+time_dimension_columns+numerical_columns+[result_column])
        scored_df = transformed.select(categorical_columns +
                                       time_dimension_columns +
                                       numerical_columns + [result_column])

        SQLctx = SQLContext(sparkContext=self._spark.sparkContext,
                            sparkSession=self._spark)
        spark_scored_df = SQLctx.createDataFrame(scored_df.toPandas())
        columns_to_drop = [
            x for x in columns_to_drop if x in spark_scored_df.columns
        ]
        modified_df = spark_scored_df.select(
            [x for x in spark_scored_df.columns if x not in columns_to_drop])
        df_helper = DataFrameHelper(modified_df, self._dataframe_context)
        df_helper.set_params()
        df = df_helper.get_data_frame()
        try:
            fs = time.time()
            narratives_file = self._dataframe_context.get_score_path(
            ) + "/narratives/FreqDimension/data.json"
            result_file = self._dataframe_context.get_score_path(
            ) + "/results/FreqDimension/data.json"
            df_freq_dimension_obj = FreqDimensions(
                spark_scored_df, df_helper, self._dataframe_context).test_all(
                    dimension_columns=[result_column])
            df_freq_dimension_result = CommonUtils.as_dict(
                df_freq_dimension_obj)
            CommonUtils.write_to_file(result_file,
                                      json.dumps(df_freq_dimension_result))
            narratives_obj = DimensionColumnNarrative(result_column, df_helper,
                                                      self._dataframe_context,
                                                      df_freq_dimension_obj)
            narratives = CommonUtils.as_dict(narratives_obj)
            CommonUtils.write_to_file(narratives_file, json.dumps(narratives))
            print "Frequency Analysis Done in ", time.time() - fs, " seconds."
        except:
            print "Frequency Analysis Failed "

        try:
            fs = time.time()
            narratives_file = self._dataframe_context.get_score_path(
            ) + "/narratives/ChiSquare/data.json"
            result_file = self._dataframe_context.get_score_path(
            ) + "/results/ChiSquare/data.json"
            df_chisquare_obj = ChiSquare(df, df_helper,
                                         self._dataframe_context).test_all(
                                             dimension_columns=[result_column])
            df_chisquare_result = CommonUtils.as_dict(df_chisquare_obj)
            # print 'RESULT: %s' % (json.dumps(df_chisquare_result, indent=2))
            CommonUtils.write_to_file(result_file,
                                      json.dumps(df_chisquare_result))
            chisquare_narratives = CommonUtils.as_dict(
                ChiSquareNarratives(df_helper, df_chisquare_obj,
                                    self._dataframe_context, df))
            # print 'Narrarives: %s' %(json.dumps(chisquare_narratives, indent=2))
            CommonUtils.write_to_file(narratives_file,
                                      json.dumps(chisquare_narratives))
            print "ChiSquare Analysis Done in ", time.time() - fs, " seconds."
        except:
            print "ChiSquare Analysis Failed "
Esempio n. 7
0
    def setUp(self):
        APP_NAME = "test"
        spark = CommonUtils.get_spark_session(app_name=APP_NAME,
                                              hive_environment=False)
        spark.sparkContext.setLogLevel("ERROR")
        # spark.conf.set("spark.sql.execution.arrow.enabled", "true")

        configJson = get_test_configs("testCase", testFor="chisquare")

        config = configJson["config"]
        jobConfig = configJson["job_config"]
        jobType = jobConfig["job_type"]
        jobName = jobConfig["job_name"]
        jobURL = jobConfig["job_url"]
        messageURL = jobConfig["message_url"]
        try:
            errorURL = jobConfig["error_reporting_url"]
        except:
            errorURL = None
        if "app_id" in jobConfig:
            appid = jobConfig["app_id"]
        else:
            appid = None
        debugMode = True
        LOGGER = {}

        configJsonObj = configparser.ParserConfig(config)
        configJsonObj.set_json_params()
        configJsonObj = configparser.ParserConfig(config)
        configJsonObj.set_json_params()

        dataframe_context = ContextSetter(configJsonObj)
        dataframe_context.set_job_type(
            jobType
        )  #jobType should be set before set_params call of dataframe_context
        dataframe_context.set_params()
        dataframe_context.set_message_url(messageURL)
        dataframe_context.set_app_id(appid)
        dataframe_context.set_debug_mode(debugMode)
        dataframe_context.set_job_url(jobURL)
        dataframe_context.set_app_name(APP_NAME)
        dataframe_context.set_error_url(errorURL)
        dataframe_context.set_logger(LOGGER)
        dataframe_context.set_xml_url(jobConfig["xml_url"])
        dataframe_context.set_job_name(jobName)
        dataframe_context.set_environment("debugMode")
        dataframe_context.set_message_ignore(True)
        dataframe_context.set_analysis_name("Descriptive analysis")

        df = MasterHelper.load_dataset(spark, dataframe_context)
        metaParserInstance = MasterHelper.get_metadata(df, spark,
                                                       dataframe_context, None)
        df, df_helper = MasterHelper.set_dataframe_helper(
            df, dataframe_context, metaParserInstance)
        targetVal = dataframe_context.get_result_column()

        self.result_setter = ResultSetter(dataframe_context)
        self.story_narrative = NarrativesTree()
        self.story_narrative.set_name(
            "{} Performance Report".format(targetVal))
        self.data_frame = df
        self.df_helper = df_helper
        self.df_context = dataframe_context
        self.meta_parser = metaParserInstance
        self.measure_columns = df_helper.get_numeric_columns()
        self.base_dir = "/chisquare/"
        self.significant_variables = [
            'Buyer_Gender', 'Sales', 'Discount_Range', 'Shipping_Cost',
            'Last_Transaction', 'Marketing_Cost'
        ]
        self.measure_columns = [
            'Tenure_in_Days', 'Sales', 'Marketing_Cost', 'Shipping_Cost',
            'Last_Transaction'
        ]
        self.df_chisquare_obj = ChiSquare(
            self.data_frame, self.df_helper, self.df_context,
            self.meta_parser).test_all(
                dimension_columns=(self.df_context.get_result_column(), ))
        self.df_chisquare_result = self.df_chisquare_obj.get_result()
        self.num_analysed_variables = 11
Esempio n. 8
0
class TestChiSquare(unittest.TestCase):

    # def __init__(self):
    # 	pass

    def setUp(self):
        APP_NAME = "test"
        spark = CommonUtils.get_spark_session(app_name=APP_NAME,
                                              hive_environment=False)
        spark.sparkContext.setLogLevel("ERROR")
        # spark.conf.set("spark.sql.execution.arrow.enabled", "true")

        configJson = get_test_configs("testCase", testFor="chisquare")

        config = configJson["config"]
        jobConfig = configJson["job_config"]
        jobType = jobConfig["job_type"]
        jobName = jobConfig["job_name"]
        jobURL = jobConfig["job_url"]
        messageURL = jobConfig["message_url"]
        try:
            errorURL = jobConfig["error_reporting_url"]
        except:
            errorURL = None
        if "app_id" in jobConfig:
            appid = jobConfig["app_id"]
        else:
            appid = None
        debugMode = True
        LOGGER = {}

        configJsonObj = configparser.ParserConfig(config)
        configJsonObj.set_json_params()
        configJsonObj = configparser.ParserConfig(config)
        configJsonObj.set_json_params()

        dataframe_context = ContextSetter(configJsonObj)
        dataframe_context.set_job_type(
            jobType
        )  #jobType should be set before set_params call of dataframe_context
        dataframe_context.set_params()
        dataframe_context.set_message_url(messageURL)
        dataframe_context.set_app_id(appid)
        dataframe_context.set_debug_mode(debugMode)
        dataframe_context.set_job_url(jobURL)
        dataframe_context.set_app_name(APP_NAME)
        dataframe_context.set_error_url(errorURL)
        dataframe_context.set_logger(LOGGER)
        dataframe_context.set_xml_url(jobConfig["xml_url"])
        dataframe_context.set_job_name(jobName)
        dataframe_context.set_environment("debugMode")
        dataframe_context.set_message_ignore(True)
        dataframe_context.set_analysis_name("Descriptive analysis")

        df = MasterHelper.load_dataset(spark, dataframe_context)
        metaParserInstance = MasterHelper.get_metadata(df, spark,
                                                       dataframe_context, None)
        df, df_helper = MasterHelper.set_dataframe_helper(
            df, dataframe_context, metaParserInstance)
        targetVal = dataframe_context.get_result_column()

        self.result_setter = ResultSetter(dataframe_context)
        self.story_narrative = NarrativesTree()
        self.story_narrative.set_name(
            "{} Performance Report".format(targetVal))
        self.data_frame = df
        self.df_helper = df_helper
        self.df_context = dataframe_context
        self.meta_parser = metaParserInstance
        self.measure_columns = df_helper.get_numeric_columns()
        self.base_dir = "/chisquare/"
        self.significant_variables = [
            'Buyer_Gender', 'Sales', 'Discount_Range', 'Shipping_Cost',
            'Last_Transaction', 'Marketing_Cost'
        ]
        self.measure_columns = [
            'Tenure_in_Days', 'Sales', 'Marketing_Cost', 'Shipping_Cost',
            'Last_Transaction'
        ]
        self.df_chisquare_obj = ChiSquare(
            self.data_frame, self.df_helper, self.df_context,
            self.meta_parser).test_all(
                dimension_columns=(self.df_context.get_result_column(), ))
        self.df_chisquare_result = self.df_chisquare_obj.get_result()
        self.num_analysed_variables = 11

    def test_chisquare_dimension(self):
        test_dimension = ChiSquare(self.data_frame, self.df_helper,
                                   self.df_context,
                                   self.meta_parser).test_dimension(
                                       'Price_Range', 'Source')
        self.assertAlmostEqual(test_dimension.get_pvalue(),
                               exp_values['pval']['Price_Range-Source'],
                               places=5)
        self.assertAlmostEqual(test_dimension.get_effect_size(),
                               exp_values['effect_size']['Price_Range-Source'],
                               places=5)
        self.assertAlmostEqual(test_dimension.get_stat(),
                               exp_values['stats']['Price_Range-Source'],
                               places=5)
        self.assertAlmostEqual(test_dimension.get_v_value(),
                               exp_values['v_value']['Price_Range-Source'],
                               places=5)

    def test_chisquare_measure(self):
        test_measures = ChiSquare(self.data_frame, self.df_helper,
                                  self.df_context,
                                  self.meta_parser).test_measures(
                                      'Price_Range', 'Marketing_Cost')
        self.assertAlmostEqual(
            test_measures.get_pvalue(),
            exp_values['pval']['Price_Range-Marketing_Cost'],
            places=5)
        self.assertAlmostEqual(
            test_measures.get_effect_size(),
            exp_values['effect_size']['Price_Range-Marketing_Cost'],
            places=5)
        self.assertAlmostEqual(
            test_measures.get_stat(),
            exp_values['stats']['Price_Range-Marketing_Cost'],
            places=5)
        self.assertAlmostEqual(
            test_measures.get_v_value(),
            exp_values['v_value']['Price_Range-Marketing_Cost'],
            places=5)

    def test_chisquare_all(self):

        #PVal-Test
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Deal_Type').get_pvalue(),
            exp_values['pval']['Price_Range-Deal_Type'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Discount_Range').get_pvalue(),
            exp_values['pval']['Price_Range-Discount_Range'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result('Price_Range',
                                                       'Source').get_pvalue(),
            exp_values['pval']['Price_Range-Source'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Platform').get_pvalue(),
            exp_values['pval']['Price_Range-Platform'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Buyer_Age').get_pvalue(),
            exp_values['pval']['Price_Range-Buyer_Age'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Buyer_Gender').get_pvalue(),
            exp_values['pval']['Price_Range-Buyer-Gender'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Tenure_in_Days').get_pvalue(),
            exp_values['pval']['Price_Range-Tenure_in_Days'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result('Price_Range',
                                                       'Sales').get_pvalue(),
            exp_values['pval']['Price_Range-Sales'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Marketing_Cost').get_pvalue(),
            exp_values['pval']['Price_Range-Marketing_Cost'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Shipping_Cost').get_pvalue(),
            exp_values['pval']['Price_Range-Shipping_Cost'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Last_Transaction').get_pvalue(),
            exp_values['pval']['Price_Range-Last_Transaction'])

        #EffectSize_Test
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Deal_Type').get_effect_size(),
            exp_values['effect_size']['Price_Range-Deal_Type'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Discount_Range').get_effect_size(),
            exp_values['effect_size']['Price_Range-Discount_Range'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Source').get_effect_size(),
            exp_values['effect_size']['Price_Range-Source'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Platform').get_effect_size(),
            exp_values['effect_size']['Price_Range-Platform'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Buyer_Age').get_effect_size(),
            exp_values['effect_size']['Price_Range-Buyer_Age'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Buyer_Gender').get_effect_size(),
            exp_values['effect_size']['Price_Range-Buyer-Gender'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Tenure_in_Days').get_effect_size(),
            exp_values['effect_size']['Price_Range-Tenure_in_Days'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Sales').get_effect_size(),
            exp_values['effect_size']['Price_Range-Sales'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Marketing_Cost').get_effect_size(),
            exp_values['effect_size']['Price_Range-Marketing_Cost'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Shipping_Cost').get_effect_size(),
            exp_values['effect_size']['Price_Range-Shipping_Cost'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Last_Transaction').get_effect_size(),
            exp_values['effect_size']['Price_Range-Last_Transaction'])

        #Stats_Test
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result('Price_Range',
                                                       'Deal_Type').get_stat(),
            exp_values['stats']['Price_Range-Deal_Type'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Discount_Range').get_stat(),
            exp_values['stats']['Price_Range-Discount_Range'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result('Price_Range',
                                                       'Source').get_stat(),
            exp_values['stats']['Price_Range-Source'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result('Price_Range',
                                                       'Platform').get_stat(),
            exp_values['stats']['Price_Range-Platform'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result('Price_Range',
                                                       'Buyer_Age').get_stat(),
            exp_values['stats']['Price_Range-Buyer_Age'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Buyer_Gender').get_stat(),
            exp_values['stats']['Price_Range-Buyer-Gender'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Tenure_in_Days').get_stat(),
            exp_values['stats']['Price_Range-Tenure_in_Days'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result('Price_Range',
                                                       'Sales').get_stat(),
            exp_values['stats']['Price_Range-Sales'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Marketing_Cost').get_stat(),
            exp_values['stats']['Price_Range-Marketing_Cost'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Shipping_Cost').get_stat(),
            exp_values['stats']['Price_Range-Shipping_Cost'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Last_Transaction').get_stat(),
            exp_values['stats']['Price_Range-Last_Transaction'])

        # #VVal-Test
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Deal_Type').get_v_value(),
            exp_values['v_value']['Price_Range-Deal_Type'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Discount_Range').get_v_value(),
            exp_values['v_value']['Price_Range-Discount_Range'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result('Price_Range',
                                                       'Source').get_v_value(),
            exp_values['v_value']['Price_Range-Source'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Platform').get_v_value(),
            exp_values['v_value']['Price_Range-Platform'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Buyer_Age').get_v_value(),
            exp_values['v_value']['Price_Range-Buyer_Age'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Buyer_Gender').get_v_value(),
            exp_values['v_value']['Price_Range-Buyer-Gender'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Tenure_in_Days').get_v_value(),
            exp_values['v_value']['Price_Range-Tenure_in_Days'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result('Price_Range',
                                                       'Sales').get_v_value(),
            exp_values['v_value']['Price_Range-Sales'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Marketing_Cost').get_v_value(),
            exp_values['v_value']['Price_Range-Marketing_Cost'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Shipping_Cost').get_v_value(),
            exp_values['v_value']['Price_Range-Shipping_Cost'])
        self.assertAlmostEqual(
            self.df_chisquare_obj.get_chisquare_result(
                'Price_Range', 'Last_Transaction').get_v_value(),
            exp_values['v_value']['Price_Range-Last_Transaction'])

    def test_chisquare_analysis(self):
        target_chisquare_result = self.df_chisquare_result['Price_Range']
        chisquare_result = self.df_chisquare_obj.get_chisquare_result(
            'Price_Range', 'Buyer_Gender')
        out = ChiSquareAnalysis(
            self.df_context, self.df_helper, chisquare_result, 'Price_Range',
            'Buyer_Gender', self.significant_variables,
            self.num_analysed_variables, self.data_frame, self.measure_columns,
            self.base_dir, None,
            target_chisquare_result)._generate_narratives()

        self.assertEqual(out['data_dict'], exp_data_dict)
        self.assertEqual(out['target_dict']['11 to 50'],
                         out['target_dict']['11 to 50'])
        self.assertEqual(out['target_dict']['101 to 500'],
                         out['target_dict']['101 to 500'])
        self.assertEqual(out['target_dict']['0 to 10'],
                         out['target_dict']['0 to 10'])