def test_add_training_analysis(self): self.name = "training-analysis" self.report_name = "Training Analysis Report" self.tag_number = 1 ## Create Report self.report = Report(name=self.report_name) ### Add Header Level 2 self.report.detail.add_header_level_2(text='Deep Learning Training') ### Learning Curve with open( prepare_input_path( data_path='sample_data/learning_curve.json'), 'r') as f: learning_curve = json.load(f) print('best_idx:', learning_curve['best_idx']) print( 'history [first 2 samples]:', { k: learning_curve['history'][k] for k in list(learning_curve['history'].keys())[:2] }) print('benchmark_metric:', learning_curve['benchmark_metric']) print('benchmark_threshold:', learning_curve['benchmark_threshold']) print('training_params:', learning_curve['training_params']) self.report.detail.add_learning_curve( history=learning_curve['history'], best_idx=learning_curve['best_idx'], benchmark_metric=learning_curve['benchmark_metric'], benchmark_threshold=learning_curve['benchmark_threshold'], training_params=learning_curve['training_params'])
def test_add_overview(self): self.name = "overview" self.report_name = "Overview Report" self.tag_number = 1 ## Create Report self.report = Report(name=self.report_name) ### Create Cover Section self.report.overview.add_new_page() self.report.overview.add_section_title(text="Overview") self.report.overview.add_paragraph(text="This is summary Info") model_info = [('Model ID', '12345678'), ('Model Version', 'v6'), ('Scenario ID', '111222333444555'), ('Notes', 'This model is created as a beta version.')] self.report.overview.add_model_info_summary(model_info=model_info) timing = [('Data Preprocessing', 1000), ('Feature Engineering', 10000), ('Training', 200200), ('Evaluation', 30303)] self.report.overview.add_training_timing(timing=timing) data_summary = [('training', 10000), ('validation', 2000), ('testing', 1000)] self.report.overview.add_data_set_summary(data_summary=data_summary) with open( prepare_input_path( data_path='sample_data/evaluation_result_summary.json'), 'r') as f: evaluation_result_data = json.load(f) print(evaluation_result_data) self.report.overview.add_evaluation_result_summary( evaluation_result=evaluation_result_data) print(self.report.overview.contents) ### added 7 component but length of overview is 8, ### there is a default component - new page at the beginning (init) self.assertEqual(len(self.report.overview.contents), 8)
def test_add_feature_analysis(self): self.name = "feature-analysis" self.report_name = "Feature Analysis Report" self.page_number = 4 ## Create Report self.report = Report(name=self.report_name) ### Create Feature Analysis Section as new page self.report.detail.add_new_page() self.report.detail.add_section_title("Example for Feature Analysis ") ### Add Header Level 1 self.report.detail.add_header_level_1(text='Feature Analysis') self.assertEqual(len(self.report.detail.contents), 3) ### Add Header Level 2 self.report.detail.add_header_level_2(text='Feature Importance') self.assertEqual(len(self.report.detail.contents), 4) ### Feature Importance path = Path(prepare_input_path(data_path='sample_data/model.pkl')) model = pd.read_pickle(str(path)) path = Path(prepare_input_path(data_path='sample_data/train_data.csv')) data = pd.read_csv(str(path)) # -- csv including header -- feature_names = data.columns fi = FeatureInterpreter(feature_names=feature_names) rank = fi.get_feature_importance_ranking(trained_model=model, train_x=data, method='default') self.report.detail.add_feature_importance(importance_ranking=rank, importance_threshold=0.005) self.assertEqual(len(self.report.detail.contents), 5) ### Create Performance Analysis Section as new page self.report.detail.add_new_page() self.report.detail.add_section_title( "Example for Performance Analysis ") self.report.detail.add_paragraph( text='this is dummy model, no trained with titanic dataset') ### Add Header Level 1 self.report.detail.add_header_level_1(text='Performance Analysis') ### Add Header Level 2 self.report.detail.add_header_level_2(text='Hyperparameter Tuning') self.assertEqual(len(self.report.detail.contents), 10) ### Hyperparameter Tuning with open( prepare_input_path( data_path='sample_data/hyperparameter_tuning.json'), 'r') as f: hyperparameter_tuning = json.load(f) print('search_space:', hyperparameter_tuning['search_space']) print('best_idx:', hyperparameter_tuning['best_idx']) self.assertEqual(hyperparameter_tuning['best_idx'], '3') print( 'history [first 2 samples]:', { k: hyperparameter_tuning['history'][k] for k in list(hyperparameter_tuning['history'].keys())[:2] }) print('benchmark_metric:', hyperparameter_tuning['benchmark_metric']) self.assertEqual(hyperparameter_tuning['benchmark_metric'], 'accuracy') print('benchmark_threshold:', hyperparameter_tuning['benchmark_threshold']) self.assertEqual(hyperparameter_tuning['benchmark_threshold'], 0.8) print('non_hyperopt_score:', hyperparameter_tuning['non_hyperopt_score']) self.report.detail.add_hyperparameter_tuning( history=hyperparameter_tuning['history'], best_idx=hyperparameter_tuning['best_idx'], search_space=hyperparameter_tuning['search_space'], benchmark_metric=hyperparameter_tuning['benchmark_metric'], benchmark_threshold=hyperparameter_tuning['benchmark_threshold'], non_hyperopt_score=hyperparameter_tuning['non_hyperopt_score']) print(self.report.detail.contents) self.assertEqual(len(self.report.detail.contents), 11)
def test_add_data_analysis(self): self.name = "data-analysis" self.report_name = "Data Analysis Report" self.page_number = 10 ## Create Report self.report = Report(name=self.report_name) ### Create Data Analysis Section self.report.detail.add_section_title("Example for Data Analysis ") ### Add Header Level 1 self.report.detail.add_header_level_1(text='Data Analysis') self.assertEqual(len(self.report.detail.contents), 2) ### Load Data file_name = prepare_input_path(data_path='sample_data/titanic.csv') data = pd.read_csv(file_name) ### Add dummy birthday to demonstrate datetime presentation bday = [] for i in range(len(data)): year = np.random.randint(low=1960, high=1979) month = np.random.randint(low=1, high=12) day = np.random.randint(low=1, high=28) bday.append("%s" % (10000 * year + 100 * month + day)) data['Birthday'] = bday label_column = 'Survived' ### Get data types - scenario where no metadata provided feature, valid_feature_names, valid_feature_types, meta = \ DataUtil.get_column_types(data=data, threshold=0.3, label=label_column) # -- Cast Data to String -- non_numeric_features = [ name for name, _type in list( zip(valid_feature_names, valid_feature_types)) if _type != DATATYPE.NUMBER ] if label_column is not None: non_numeric_features += [label_column] DataUtil.cast_type_to_string(data=data, feature_names=non_numeric_features) ### Add Header Level 2 self.report.detail.add_header_level_2( text='Data Class (Label) Distribution') ### Add Label distribution label_distributions = DataUtil.get_label_distribution( data=data, label=label_column) self.report.detail.add_data_set_distribution(label_distributions) self.assertEqual(len(self.report.detail.contents), 4) ### Get Data Stats stats = DataUtil.get_data_statistics(data=data, feature_names=valid_feature_names, feature_types=valid_feature_types, label=label_column) ### Add Header Level 2 self.report.detail.add_header_level_2(text='Data Field Attribute') ### Data Field Attribute self.report.detail.add_data_attributes(meta) self.assertEqual(len(self.report.detail.contents), 6) ### Add Header Level 2 self.report.detail.add_header_level_2(text='Data Missing Value Check') ### Missing value count missing_count, total_count = \ DataUtil.get_missing_value_count(data=data, feature_names=valid_feature_names, feature_types=valid_feature_types) print(missing_count) self.assertEqual(missing_count['Age'], 177) self.assertEqual(missing_count['Embarked'], 2) print(total_count) self.assertEqual(total_count['Name'], 891) self.assertEqual(total_count['Embarked'], 891) self.assertEqual(len(self.report.detail.contents), 7) self.report.detail.add_data_missing_value( missing_count=dict(missing_count), total_count=total_count) self.assertEqual(len(self.report.detail.contents), 8) ### Add Header Level 2 self.report.detail.add_header_level_2(text='Data Field Distribution') ### Data Field Distribution Desc self.report.detail.add_paragraph( text= 'This section displays distribution for categorical fields, numerical fields and text fields.' ) self.assertEqual(len(self.report.detail.contents), 10) ### Add Header Level 3 self.report.detail.add_header_level_3( text='Categorical Field Distribution') print(feature[DATATYPE.CATEGORY]) self.assertEqual(len(feature[DATATYPE.CATEGORY]), 5) ### Categorical field distribution for field_name in feature[DATATYPE.CATEGORY]: labelled_stats, all_stats = stats[field_name] self.report.detail.add_categorical_field_distribution( field_name=field_name, field_distribution=labelled_stats) self.assertEqual(len(self.report.detail.contents), 16) ### Add Header Level 3 self.report.detail.add_header_level_3( text='Numerical Field Distribution') print(feature[DATATYPE.NUMBER]) self.assertEqual(len(feature[DATATYPE.NUMBER]), 2) ### Numerical field distribution for field_name in feature[DATATYPE.NUMBER]: labelled_stats, all_stats = stats[field_name] self.report.detail.add_numeric_field_distribution( field_name=field_name, field_distribution=labelled_stats) self.assertEqual(len(self.report.detail.contents), 19) ### Add Header Level 3 self.report.detail.add_header_level_3(text='Text Field Distribution') print(feature[DATATYPE.FREETEXT]) self.assertEqual(len(feature[DATATYPE.FREETEXT]), 1) ### Text field distribution for field_name in feature[DATATYPE.FREETEXT]: labelled_stats, all_stats = stats[field_name] self.report.detail.add_text_field_distribution( field_name=field_name, field_distribution=labelled_stats) self.assertEqual(len(self.report.detail.contents), 21) ### Add Header Level 3 self.report.detail.add_header_level_3( text='Datetime Field Distribution') print(feature[DATATYPE.DATETIME]) self.assertEqual(len(feature[DATATYPE.DATETIME]), 1) ### Datetime field distribution for field_name in feature[DATATYPE.DATETIME]: labelled_stats, all_stats = stats[field_name] self.report.detail.add_datetime_field_distribution( field_name=field_name, field_distribution=labelled_stats) print(self.report.detail.contents) self.assertEqual(len(self.report.detail.contents), 23)