def __init__(self, data_frame, df_helper, df_context, meta_parser): self.data_frame = data_frame self.df_helper = df_helper self.df_context = df_context self.meta_parser = meta_parser self.test_dimension = ChiSquare(self.data_frame, self.df_helper, self.df_context, self.meta_parser).test_dimension( 'Price_Range', 'Source')
def Run(self): df_chisquare_obj = ChiSquare( self._data_frame, self._dataframe_helper, self._dataframe_context, self._metaParser).test_all(dimension_columns=( self._dataframe_context.get_result_column(), )) # df_chisquare_result = CommonUtils.as_dict(df_chisquare_obj) # print 'RESULT: %s' % (json.dumps(df_chisquare_result, indent=2)) # DataWriter.write_dict_as_json(self._spark, df_chisquare_result, self._dataframe_context.get_result_file()+'ChiSquare/') # Narratives # print self._data_frame.select('Sales').show() if df_chisquare_obj.get_result() != {}: chisquare_narratives = CommonUtils.as_dict( ChiSquareNarratives(self._dataframe_helper, df_chisquare_obj, self._spark, self._dataframe_context, self._data_frame, self._story_narrative, self._result_setter))
class TestChiSquare(unittest.TestCase): def __init__(self, data_frame, df_helper, df_context, meta_parser): self.data_frame = data_frame self.df_helper = df_helper self.df_context = df_context self.meta_parser = meta_parser self.test_dimension = ChiSquare(self.data_frame, self.df_helper, self.df_context, self.meta_parser).test_dimension( 'Price_Range', 'Source') def test_upper(self): self.assertEqual('foo'.upper(), 'FOO') def setup(self): pass def run_chisquare_test(self): # TestCase for test_dimension function self.assertEqual(self.test_dimension.get_pvalue(), Chi_exp_op['dim_pval'])
def test_chisquare_dimension(self): test_dimension = ChiSquare(self.data_frame, self.df_helper, self.df_context, self.meta_parser).test_dimension( 'Price_Range', 'Source') self.assertAlmostEqual(test_dimension.get_pvalue(), exp_values['pval']['Price_Range-Source'], places=5) self.assertAlmostEqual(test_dimension.get_effect_size(), exp_values['effect_size']['Price_Range-Source'], places=5) self.assertAlmostEqual(test_dimension.get_stat(), exp_values['stats']['Price_Range-Source'], places=5) self.assertAlmostEqual(test_dimension.get_v_value(), exp_values['v_value']['Price_Range-Source'], places=5)
def test_chisquare_measure(self): test_measures = ChiSquare(self.data_frame, self.df_helper, self.df_context, self.meta_parser).test_measures( 'Price_Range', 'Marketing_Cost') self.assertAlmostEqual( test_measures.get_pvalue(), exp_values['pval']['Price_Range-Marketing_Cost'], places=5) self.assertAlmostEqual( test_measures.get_effect_size(), exp_values['effect_size']['Price_Range-Marketing_Cost'], places=5) self.assertAlmostEqual( test_measures.get_stat(), exp_values['stats']['Price_Range-Marketing_Cost'], places=5) self.assertAlmostEqual( test_measures.get_v_value(), exp_values['v_value']['Price_Range-Marketing_Cost'], places=5)
def Predict(self): SQLctx = SQLContext(sparkContext=self._spark.sparkContext, sparkSession=self._spark) dataSanity = True level_counts_train = self._dataframe_context.get_level_count_dict() categorical_columns = self._dataframe_helper.get_string_columns() numerical_columns = self._dataframe_helper.get_numeric_columns() time_dimension_columns = self._dataframe_helper.get_timestamp_columns() result_column = self._dataframe_context.get_result_column() categorical_columns = [ x for x in categorical_columns if x != result_column ] level_counts_score = CommonUtils.get_level_count_dict( self._data_frame, categorical_columns, self._dataframe_context.get_column_separator(), output_type="dict", dataType="spark") for key in level_counts_train: if key in level_counts_score: if level_counts_train[key] != level_counts_score[key]: dataSanity = False else: dataSanity = False test_data_path = self._dataframe_context.get_input_file() score_data_path = self._dataframe_context.get_score_path( ) + "/ScoredData/data.csv" trained_model_path = self._dataframe_context.get_model_path() if trained_model_path.endswith(".pkl"): trained_model_path = "/".join( trained_model_path.split("/")[:-1]) + "/model" pipeline_path = "/".join( trained_model_path.split("/")[:-1]) + "/pipeline" score_summary_path = self._dataframe_context.get_score_path( ) + "/Summary/summary.json" pipelineModel = MLUtils.load_pipeline(pipeline_path) if self._classifier == "OneVsRest": trained_model = MLUtils.load_one_vs_rest_model(trained_model_path) elif self._classifier == "lr": trained_model = MLUtils.load_logistic_model(trained_model_path) df = self._data_frame indexed = pipelineModel.transform(df) transformed = trained_model.transform(indexed) label_indexer_dict = MLUtils.read_string_indexer_mapping( pipeline_path, SQLctx) prediction_to_levels = udf(lambda x: label_indexer_dict[x], StringType()) transformed = transformed.withColumn( result_column, prediction_to_levels(transformed.prediction)) # udf_to_calculate_probability = udf(lambda x:max(x[0])) # transformed = transformed.withColumn("predicted_probability",udf_to_calculate_probability(transformed.probability)) # print transformed.select("predicted_probability").show(5) if "probability" in transformed.columns: probability_dataframe = transformed.select( [result_column, "probability"]).toPandas() probability_dataframe = probability_dataframe.rename( index=str, columns={result_column: "predicted_class"}) probability_dataframe[ "predicted_probability"] = probability_dataframe[ "probability"].apply(lambda x: max(x)) self._score_summary[ "prediction_split"] = MLUtils.calculate_scored_probability_stats( probability_dataframe) self._score_summary["result_column"] = result_column scored_dataframe = transformed.select( categorical_columns + time_dimension_columns + numerical_columns + [result_column, "probability"]).toPandas() # scored_dataframe = scored_dataframe.rename(index=str, columns={"predicted_probability": "probability"}) else: self._score_summary["prediction_split"] = [] self._score_summary["result_column"] = result_column scored_dataframe = transformed.select(categorical_columns + time_dimension_columns + numerical_columns + [result_column]).toPandas() if score_data_path.startswith("file"): score_data_path = score_data_path[7:] scored_dataframe.to_csv(score_data_path, header=True, index=False) # print json.dumps({"scoreSummary":self._score_summary},indent=2) CommonUtils.write_to_file( score_summary_path, json.dumps({"scoreSummary": self._score_summary})) print "STARTING DIMENSION ANALYSIS ..." columns_to_keep = [] columns_to_drop = [] considercolumnstype = self._dataframe_context.get_score_consider_columns_type( ) considercolumns = self._dataframe_context.get_score_consider_columns() if considercolumnstype != None: if considercolumns != None: if considercolumnstype == ["excluding"]: columns_to_drop = considercolumns elif considercolumnstype == ["including"]: columns_to_keep = considercolumns if len(columns_to_keep) > 0: columns_to_drop = list(set(df.columns) - set(columns_to_keep)) # spark_scored_df = transformed.select(categorical_columns+time_dimension_columns+numerical_columns+[result_column]) scored_df = transformed.select(categorical_columns + time_dimension_columns + numerical_columns + [result_column]) SQLctx = SQLContext(sparkContext=self._spark.sparkContext, sparkSession=self._spark) spark_scored_df = SQLctx.createDataFrame(scored_df.toPandas()) columns_to_drop = [ x for x in columns_to_drop if x in spark_scored_df.columns ] modified_df = spark_scored_df.select( [x for x in spark_scored_df.columns if x not in columns_to_drop]) df_helper = DataFrameHelper(modified_df, self._dataframe_context) df_helper.set_params() df = df_helper.get_data_frame() try: fs = time.time() narratives_file = self._dataframe_context.get_score_path( ) + "/narratives/FreqDimension/data.json" result_file = self._dataframe_context.get_score_path( ) + "/results/FreqDimension/data.json" df_freq_dimension_obj = FreqDimensions( spark_scored_df, df_helper, self._dataframe_context).test_all( dimension_columns=[result_column]) df_freq_dimension_result = CommonUtils.as_dict( df_freq_dimension_obj) CommonUtils.write_to_file(result_file, json.dumps(df_freq_dimension_result)) narratives_obj = DimensionColumnNarrative(result_column, df_helper, self._dataframe_context, df_freq_dimension_obj) narratives = CommonUtils.as_dict(narratives_obj) CommonUtils.write_to_file(narratives_file, json.dumps(narratives)) print "Frequency Analysis Done in ", time.time() - fs, " seconds." except: print "Frequency Analysis Failed " try: fs = time.time() narratives_file = self._dataframe_context.get_score_path( ) + "/narratives/ChiSquare/data.json" result_file = self._dataframe_context.get_score_path( ) + "/results/ChiSquare/data.json" df_chisquare_obj = ChiSquare(df, df_helper, self._dataframe_context).test_all( dimension_columns=[result_column]) df_chisquare_result = CommonUtils.as_dict(df_chisquare_obj) # print 'RESULT: %s' % (json.dumps(df_chisquare_result, indent=2)) CommonUtils.write_to_file(result_file, json.dumps(df_chisquare_result)) chisquare_narratives = CommonUtils.as_dict( ChiSquareNarratives(df_helper, df_chisquare_obj, self._dataframe_context, df)) # print 'Narrarives: %s' %(json.dumps(chisquare_narratives, indent=2)) CommonUtils.write_to_file(narratives_file, json.dumps(chisquare_narratives)) print "ChiSquare Analysis Done in ", time.time() - fs, " seconds." except: print "ChiSquare Analysis Failed "
def setUp(self): APP_NAME = "test" spark = CommonUtils.get_spark_session(app_name=APP_NAME, hive_environment=False) spark.sparkContext.setLogLevel("ERROR") # spark.conf.set("spark.sql.execution.arrow.enabled", "true") configJson = get_test_configs("testCase", testFor="chisquare") config = configJson["config"] jobConfig = configJson["job_config"] jobType = jobConfig["job_type"] jobName = jobConfig["job_name"] jobURL = jobConfig["job_url"] messageURL = jobConfig["message_url"] try: errorURL = jobConfig["error_reporting_url"] except: errorURL = None if "app_id" in jobConfig: appid = jobConfig["app_id"] else: appid = None debugMode = True LOGGER = {} configJsonObj = configparser.ParserConfig(config) configJsonObj.set_json_params() configJsonObj = configparser.ParserConfig(config) configJsonObj.set_json_params() dataframe_context = ContextSetter(configJsonObj) dataframe_context.set_job_type( jobType ) #jobType should be set before set_params call of dataframe_context dataframe_context.set_params() dataframe_context.set_message_url(messageURL) dataframe_context.set_app_id(appid) dataframe_context.set_debug_mode(debugMode) dataframe_context.set_job_url(jobURL) dataframe_context.set_app_name(APP_NAME) dataframe_context.set_error_url(errorURL) dataframe_context.set_logger(LOGGER) dataframe_context.set_xml_url(jobConfig["xml_url"]) dataframe_context.set_job_name(jobName) dataframe_context.set_environment("debugMode") dataframe_context.set_message_ignore(True) dataframe_context.set_analysis_name("Descriptive analysis") df = MasterHelper.load_dataset(spark, dataframe_context) metaParserInstance = MasterHelper.get_metadata(df, spark, dataframe_context, None) df, df_helper = MasterHelper.set_dataframe_helper( df, dataframe_context, metaParserInstance) targetVal = dataframe_context.get_result_column() self.result_setter = ResultSetter(dataframe_context) self.story_narrative = NarrativesTree() self.story_narrative.set_name( "{} Performance Report".format(targetVal)) self.data_frame = df self.df_helper = df_helper self.df_context = dataframe_context self.meta_parser = metaParserInstance self.measure_columns = df_helper.get_numeric_columns() self.base_dir = "/chisquare/" self.significant_variables = [ 'Buyer_Gender', 'Sales', 'Discount_Range', 'Shipping_Cost', 'Last_Transaction', 'Marketing_Cost' ] self.measure_columns = [ 'Tenure_in_Days', 'Sales', 'Marketing_Cost', 'Shipping_Cost', 'Last_Transaction' ] self.df_chisquare_obj = ChiSquare( self.data_frame, self.df_helper, self.df_context, self.meta_parser).test_all( dimension_columns=(self.df_context.get_result_column(), )) self.df_chisquare_result = self.df_chisquare_obj.get_result() self.num_analysed_variables = 11
class TestChiSquare(unittest.TestCase): # def __init__(self): # pass def setUp(self): APP_NAME = "test" spark = CommonUtils.get_spark_session(app_name=APP_NAME, hive_environment=False) spark.sparkContext.setLogLevel("ERROR") # spark.conf.set("spark.sql.execution.arrow.enabled", "true") configJson = get_test_configs("testCase", testFor="chisquare") config = configJson["config"] jobConfig = configJson["job_config"] jobType = jobConfig["job_type"] jobName = jobConfig["job_name"] jobURL = jobConfig["job_url"] messageURL = jobConfig["message_url"] try: errorURL = jobConfig["error_reporting_url"] except: errorURL = None if "app_id" in jobConfig: appid = jobConfig["app_id"] else: appid = None debugMode = True LOGGER = {} configJsonObj = configparser.ParserConfig(config) configJsonObj.set_json_params() configJsonObj = configparser.ParserConfig(config) configJsonObj.set_json_params() dataframe_context = ContextSetter(configJsonObj) dataframe_context.set_job_type( jobType ) #jobType should be set before set_params call of dataframe_context dataframe_context.set_params() dataframe_context.set_message_url(messageURL) dataframe_context.set_app_id(appid) dataframe_context.set_debug_mode(debugMode) dataframe_context.set_job_url(jobURL) dataframe_context.set_app_name(APP_NAME) dataframe_context.set_error_url(errorURL) dataframe_context.set_logger(LOGGER) dataframe_context.set_xml_url(jobConfig["xml_url"]) dataframe_context.set_job_name(jobName) dataframe_context.set_environment("debugMode") dataframe_context.set_message_ignore(True) dataframe_context.set_analysis_name("Descriptive analysis") df = MasterHelper.load_dataset(spark, dataframe_context) metaParserInstance = MasterHelper.get_metadata(df, spark, dataframe_context, None) df, df_helper = MasterHelper.set_dataframe_helper( df, dataframe_context, metaParserInstance) targetVal = dataframe_context.get_result_column() self.result_setter = ResultSetter(dataframe_context) self.story_narrative = NarrativesTree() self.story_narrative.set_name( "{} Performance Report".format(targetVal)) self.data_frame = df self.df_helper = df_helper self.df_context = dataframe_context self.meta_parser = metaParserInstance self.measure_columns = df_helper.get_numeric_columns() self.base_dir = "/chisquare/" self.significant_variables = [ 'Buyer_Gender', 'Sales', 'Discount_Range', 'Shipping_Cost', 'Last_Transaction', 'Marketing_Cost' ] self.measure_columns = [ 'Tenure_in_Days', 'Sales', 'Marketing_Cost', 'Shipping_Cost', 'Last_Transaction' ] self.df_chisquare_obj = ChiSquare( self.data_frame, self.df_helper, self.df_context, self.meta_parser).test_all( dimension_columns=(self.df_context.get_result_column(), )) self.df_chisquare_result = self.df_chisquare_obj.get_result() self.num_analysed_variables = 11 def test_chisquare_dimension(self): test_dimension = ChiSquare(self.data_frame, self.df_helper, self.df_context, self.meta_parser).test_dimension( 'Price_Range', 'Source') self.assertAlmostEqual(test_dimension.get_pvalue(), exp_values['pval']['Price_Range-Source'], places=5) self.assertAlmostEqual(test_dimension.get_effect_size(), exp_values['effect_size']['Price_Range-Source'], places=5) self.assertAlmostEqual(test_dimension.get_stat(), exp_values['stats']['Price_Range-Source'], places=5) self.assertAlmostEqual(test_dimension.get_v_value(), exp_values['v_value']['Price_Range-Source'], places=5) def test_chisquare_measure(self): test_measures = ChiSquare(self.data_frame, self.df_helper, self.df_context, self.meta_parser).test_measures( 'Price_Range', 'Marketing_Cost') self.assertAlmostEqual( test_measures.get_pvalue(), exp_values['pval']['Price_Range-Marketing_Cost'], places=5) self.assertAlmostEqual( test_measures.get_effect_size(), exp_values['effect_size']['Price_Range-Marketing_Cost'], places=5) self.assertAlmostEqual( test_measures.get_stat(), exp_values['stats']['Price_Range-Marketing_Cost'], places=5) self.assertAlmostEqual( test_measures.get_v_value(), exp_values['v_value']['Price_Range-Marketing_Cost'], places=5) def test_chisquare_all(self): #PVal-Test self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Deal_Type').get_pvalue(), exp_values['pval']['Price_Range-Deal_Type']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Discount_Range').get_pvalue(), exp_values['pval']['Price_Range-Discount_Range']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result('Price_Range', 'Source').get_pvalue(), exp_values['pval']['Price_Range-Source']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Platform').get_pvalue(), exp_values['pval']['Price_Range-Platform']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Buyer_Age').get_pvalue(), exp_values['pval']['Price_Range-Buyer_Age']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Buyer_Gender').get_pvalue(), exp_values['pval']['Price_Range-Buyer-Gender']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Tenure_in_Days').get_pvalue(), exp_values['pval']['Price_Range-Tenure_in_Days']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result('Price_Range', 'Sales').get_pvalue(), exp_values['pval']['Price_Range-Sales']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Marketing_Cost').get_pvalue(), exp_values['pval']['Price_Range-Marketing_Cost']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Shipping_Cost').get_pvalue(), exp_values['pval']['Price_Range-Shipping_Cost']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Last_Transaction').get_pvalue(), exp_values['pval']['Price_Range-Last_Transaction']) #EffectSize_Test self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Deal_Type').get_effect_size(), exp_values['effect_size']['Price_Range-Deal_Type']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Discount_Range').get_effect_size(), exp_values['effect_size']['Price_Range-Discount_Range']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Source').get_effect_size(), exp_values['effect_size']['Price_Range-Source']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Platform').get_effect_size(), exp_values['effect_size']['Price_Range-Platform']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Buyer_Age').get_effect_size(), exp_values['effect_size']['Price_Range-Buyer_Age']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Buyer_Gender').get_effect_size(), exp_values['effect_size']['Price_Range-Buyer-Gender']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Tenure_in_Days').get_effect_size(), exp_values['effect_size']['Price_Range-Tenure_in_Days']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Sales').get_effect_size(), exp_values['effect_size']['Price_Range-Sales']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Marketing_Cost').get_effect_size(), exp_values['effect_size']['Price_Range-Marketing_Cost']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Shipping_Cost').get_effect_size(), exp_values['effect_size']['Price_Range-Shipping_Cost']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Last_Transaction').get_effect_size(), exp_values['effect_size']['Price_Range-Last_Transaction']) #Stats_Test self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result('Price_Range', 'Deal_Type').get_stat(), exp_values['stats']['Price_Range-Deal_Type']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Discount_Range').get_stat(), exp_values['stats']['Price_Range-Discount_Range']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result('Price_Range', 'Source').get_stat(), exp_values['stats']['Price_Range-Source']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result('Price_Range', 'Platform').get_stat(), exp_values['stats']['Price_Range-Platform']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result('Price_Range', 'Buyer_Age').get_stat(), exp_values['stats']['Price_Range-Buyer_Age']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Buyer_Gender').get_stat(), exp_values['stats']['Price_Range-Buyer-Gender']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Tenure_in_Days').get_stat(), exp_values['stats']['Price_Range-Tenure_in_Days']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result('Price_Range', 'Sales').get_stat(), exp_values['stats']['Price_Range-Sales']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Marketing_Cost').get_stat(), exp_values['stats']['Price_Range-Marketing_Cost']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Shipping_Cost').get_stat(), exp_values['stats']['Price_Range-Shipping_Cost']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Last_Transaction').get_stat(), exp_values['stats']['Price_Range-Last_Transaction']) # #VVal-Test self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Deal_Type').get_v_value(), exp_values['v_value']['Price_Range-Deal_Type']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Discount_Range').get_v_value(), exp_values['v_value']['Price_Range-Discount_Range']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result('Price_Range', 'Source').get_v_value(), exp_values['v_value']['Price_Range-Source']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Platform').get_v_value(), exp_values['v_value']['Price_Range-Platform']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Buyer_Age').get_v_value(), exp_values['v_value']['Price_Range-Buyer_Age']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Buyer_Gender').get_v_value(), exp_values['v_value']['Price_Range-Buyer-Gender']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Tenure_in_Days').get_v_value(), exp_values['v_value']['Price_Range-Tenure_in_Days']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result('Price_Range', 'Sales').get_v_value(), exp_values['v_value']['Price_Range-Sales']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Marketing_Cost').get_v_value(), exp_values['v_value']['Price_Range-Marketing_Cost']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Shipping_Cost').get_v_value(), exp_values['v_value']['Price_Range-Shipping_Cost']) self.assertAlmostEqual( self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Last_Transaction').get_v_value(), exp_values['v_value']['Price_Range-Last_Transaction']) def test_chisquare_analysis(self): target_chisquare_result = self.df_chisquare_result['Price_Range'] chisquare_result = self.df_chisquare_obj.get_chisquare_result( 'Price_Range', 'Buyer_Gender') out = ChiSquareAnalysis( self.df_context, self.df_helper, chisquare_result, 'Price_Range', 'Buyer_Gender', self.significant_variables, self.num_analysed_variables, self.data_frame, self.measure_columns, self.base_dir, None, target_chisquare_result)._generate_narratives() self.assertEqual(out['data_dict'], exp_data_dict) self.assertEqual(out['target_dict']['11 to 50'], out['target_dict']['11 to 50']) self.assertEqual(out['target_dict']['101 to 500'], out['target_dict']['101 to 500']) self.assertEqual(out['target_dict']['0 to 10'], out['target_dict']['0 to 10'])