def test_json_generate_report(self): """ Test report rendering with json config file """ start_time = datetime.now().replace(microsecond=0) controller = Controller(config=Configuration(self.json)) controller.render() end_time = datetime.now().replace(microsecond=0) # -- PDF Report -- output = "%s/%s.pdf" % (self.out_path, self.json_writer_pdf_name) print("JSON-PDF report generated %s:" % output) with open(output, 'rb') as f: pdf = PdfFileReader(f) info = pdf.getDocumentInfo() number_of_pages = pdf.getNumPages() print(info) self.assertEqual(info['/Title'], self.json_report_name) # print(info['/CreationDate']) report_time = datetime.strptime( info['/CreationDate'], 'D:%Y%m%d%H%M%S') self.assertTrue(time_in_range(start_time, end_time, report_time)) print(number_of_pages) self.assertEqual(number_of_pages, self.json_writer_pdf_page_number)
def test_json_generate_report(self): # Set seed for reproducibility np.random.seed(123456) """ Test report rendering with json config file """ # Load the dataset and prepare training and test sets train_file = prepare_input_path(working_path='sample_input/housing_price/train.csv') data = pd.read_csv(train_file) data.dropna(axis=0, subset=['SalePrice'], inplace=True) y = data.SalePrice X = data.drop(['SalePrice', 'Id'], axis=1).select_dtypes( exclude=['object']) train_X, test_X, train_y, test_y = train_test_split(X.values, y.values, test_size=0.25) my_imputer = SimpleImputer() train_X = my_imputer.fit_transform(train_X) test_X = my_imputer.transform(test_X) my_model = GradientBoostingRegressor(n_estimators=1000, max_depth=5, learning_rate=0.1, subsample=0.7, random_state=42) hist = my_model.fit(train_X, train_y) X.columns.tolist() train_X_df = pd.DataFrame(data=train_X, columns=X.columns.tolist()) clf = my_model clf_fn = my_model.predict y_train = [] feature_names = X.columns.tolist() target_names_list = ['SalePrice'] start_time = datetime.now().replace(microsecond=0) controller = Controller(config=Configuration(self.json, locals())) controller.render() end_time = datetime.now().replace(microsecond=0) # -- PDF Report -- output = "%s/%s.pdf" % (self.out_path, self.json_writer_pdf_name) print("JSON-PDF report generated %s:" % output) with open(output, 'rb') as f: pdf = PdfFileReader(f) info = pdf.getDocumentInfo() number_of_pages = pdf.getNumPages() print(info) self.assertEqual(info['/Title'], self.json_report_name) # print(info['/CreationDate']) report_time = datetime.strptime( info['/CreationDate'], 'D:%Y%m%d%H%M%S') print("{} {} {}".format(start_time, report_time, end_time)) self.assertTrue(time_in_range(start_time, end_time, report_time)) print(number_of_pages) self.assertEqual(number_of_pages, self.json_writer_pdf_page_number)
def test_json_generate_report(self): """ Test report rendering with json config file """ start_time = datetime.now().replace(microsecond=0) controller = Controller(config=Configuration(self.json)) controller.render() end_time = datetime.now().replace(microsecond=0) # -- PDF Report -- output = "%s/%s.pdf" % (self.out_path, self.json_writer_pdf_name) print("JSON-PDF report generated %s:" % output) with open(output, 'rb') as f: pdf = PdfFileReader(f) info = pdf.getDocumentInfo() number_of_pages = pdf.getNumPages() print(info) self.assertEqual(info['/Title'], self.json_report_name) # print(info['/CreationDate']) report_time = datetime.strptime( info['/CreationDate'], 'D:%Y%m%d%H%M%S') print("{} {} {}".format(start_time, report_time, end_time)) self.assertTrue(time_in_range(start_time, end_time, report_time)) print(number_of_pages) self.assertEqual(number_of_pages, self.json_writer_pdf_page_number) # -- HTML Report -- output = "%s/%s.html" % (self.out_path, self.json_writer_html_name) print("JSON-HTML report generated %s:" % output) with open(output) as f: read_data = f.read() index = read_data.find(self.json_report_name) # -- the header start at index 1279 -- self.assertEqual(index, 1279) index = read_data.find('created on') create_date = read_data[index+11: index+30] print(create_date) report_time = datetime.strptime(create_date, '%Y-%m-%d %H:%M:%S') print("{} {} {}".format(start_time, report_time, end_time)) self.assertTrue(time_in_range(start_time, end_time, report_time)) number_of_tags = read_data.count('class="tab_contents"') # print(number_of_tags) self.assertEqual(number_of_tags, self.json_writer_html_tag_number)
def test_yml_load_call(self): """ Test json config file loading using __call__ """ call = Configuration() conf = call(config=self.basic_yaml) self.assertIn(Constant.NAME.value, conf, msg="Section Title missing") name = conf[Constant.NAME.value] self.assertEqual(name, 'Sample Report', msg="Report name mis-match") self.assertIn(Constant.ENABLE_CONTENT_TABLE.value, conf, msg="Content Table Flag missing") enable_content_table = conf[Constant.ENABLE_CONTENT_TABLE.value] self.assertTrue(enable_content_table, msg="Report name mis-match") self.assertIn(Constant.CONTENT_LIST.value, conf, msg="Section Content " "missing") contents = conf[Constant.CONTENT_LIST.value] self.assertTrue(len(contents), 2)
def test_json_generate_report(self): # Set seed for reproducibility """ Test report rendering with json config file """ # Load the dataset and prepare training and test sets train_file = prepare_input_path( working_path='sample_input/housing_price_halfmil/train.csv') df_data = pd.read_csv(train_file, header=0, nrows=300) # Get predictor and target X = df_data.drop("Prices", axis=1).fillna(value=0) y = df_data["Prices"].fillna(value=0) train_X, test_X, train_y, test_y = train_test_split(X.values, y.values, test_size=0.25) # Train regression from sklearn.linear_model import Lasso alpha_list = [0.01, 0.1, 1, 2, 5, 10] model_list = [] r2_list = [] for alpha in alpha_list: lm = Lasso(alpha) lm.fit(train_X, train_y) model_list.append(lm) # model quality y_pred = lm.predict(test_X) r2 = lm.score(test_X, test_y) r2_list.append(r2) print('Alpha: %s. R2: %s' % (alpha, r2)) index = r2_list.index(max(r2_list)) lm = model_list[index] feature_names = X.columns.tolist() clf = lm clf_fn = lm.predict print('Subsetting training data to %s to speed up. ' % self.limit_size) train_X = train_X[:self.limit_size] start_time = datetime.now().replace(microsecond=0) controller = Controller(config=Configuration(self.json, locals())) controller.render() end_time = datetime.now().replace(microsecond=0) # -- PDF Report -- output = "%s/%s.pdf" % (self.out_path, self.json_writer_pdf_name) print("JSON-PDF report generated %s:" % output) with open(output, 'rb') as f: pdf = PdfFileReader(f) info = pdf.getDocumentInfo() number_of_pages = pdf.getNumPages() print(info) self.assertEqual(info['/Title'], self.json_report_name) # print(info['/CreationDate']) report_time = datetime.strptime(info['/CreationDate'], 'D:%Y%m%d%H%M%S') print("{} {} {}".format(start_time, report_time, end_time)) self.assertTrue(time_in_range(start_time, end_time, report_time)) print(number_of_pages) self.assertEqual(number_of_pages, self.json_writer_pdf_page_number)
def test_json_generate_report(self): # Set seed for reproducibility np.random.seed(123456) """ Test report rendering with json config file """ # -- Train on a subset of categories -- categories = [ 'rec.sport.baseball', 'soc.religion.christian', 'sci.med' ] raw_train = datasets.fetch_20newsgroups(data_home=prepare_input_path(working_path='sample_input/20news'), subset='train', categories=categories) print('Training dataset keys:', list(raw_train.keys())) print('Training class name:', raw_train.target_names) print('Training sample target:', raw_train.target[:10]) raw_test = datasets.fetch_20newsgroups(subset='test', categories=categories) X_train = raw_train.data vectorizer = TfidfVectorizer() X_train_vec = vectorizer.fit_transform(X_train) y_train = raw_train.target X_test_vec = vectorizer.transform(raw_test.data) y_test = raw_test.target print('Training sample:', len(X_train)) print('--------------------') print(X_train[0]) print('--------------------') clf = MultinomialNB(alpha=0.1) clf.fit(X_train_vec, y_train) print('Subsetting training sample to %s to speed up.' % self.limit_size) X_train = X_train[:self.limit_size] print('Classifier score:', clf.score(X_test_vec, y_test)) print('Classifier predict func:', clf.predict_proba) def predict_fn(instance): vec = vectorizer.transform(instance) return clf.predict_proba(vec) print('Testing sample prob:', predict_fn(raw_test.data[:10])) # -- Instantiate the explainer -- explainer = ExplainerFactory.get_explainer(domain=xai.DOMAIN.TEXT) explainer.build_explainer(predict_fn) print('Testing sample explanation:', explainer.explain_instance(raw_test.data[0])) feature_names = [] clf_fn = predict_fn target_names_list = [] start_time = datetime.now().replace(microsecond=0) controller = Controller(config=Configuration(self.json, locals())) controller.render() end_time = datetime.now().replace(microsecond=0) # -- PDF Report -- output = "%s/%s.pdf" % (self.out_path, self.json_writer_pdf_name) print("JSON-PDF report generated %s:" % output) with open(output, 'rb') as f: pdf = PdfFileReader(f) info = pdf.getDocumentInfo() number_of_pages = pdf.getNumPages() print(info) self.assertEqual(info['/Title'], self.json_report_name) # print(info['/CreationDate']) report_time = datetime.strptime( info['/CreationDate'], 'D:%Y%m%d%H%M%S') print("{} {} {}".format(start_time, report_time, end_time)) self.assertTrue(time_in_range(start_time, end_time, report_time)) print(number_of_pages) self.assertEqual(number_of_pages, self.json_writer_pdf_page_number)
def test_yml_generate_report(self): """ Test report rendering with yaml config file """ controller = Controller(config=Configuration(self.basic_yaml)) controller.render()
def test_json_generate_report(self): """ Test report rendering with json config file """ controller = Controller(config=Configuration(self.basic_json)) controller.render()
def test_json_generate_report(self): # Set seed for reproducibility np.random.seed(123456) """ Test report rendering with json config file """ # Load the dataset and prepare training and test sets raw_data = datasets.load_breast_cancer() X, y = raw_data['data'], raw_data['target'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) feature_names = raw_data['feature_names'] target_names_list = list(raw_data['target_names']) # Instantiate a classifier, train, and evaluate on test set clf = RandomForestClassifier() clf.fit(X_train, y_train) clf.score(X_test, y_test) clf_fn = clf.predict_proba start_time = datetime.now().replace(microsecond=0) controller = Controller(config=Configuration(self.json, locals())) controller.render() end_time = datetime.now().replace(microsecond=0) # -- PDF Report -- output = "%s/%s.pdf" % (self.out_path, self.json_writer_pdf_name) print("JSON-PDF report generated %s:" % output) with open(output, 'rb') as f: pdf = PdfFileReader(f) info = pdf.getDocumentInfo() number_of_pages = pdf.getNumPages() print(info) self.assertEqual(info['/Title'], self.json_report_name) # print(info['/CreationDate']) report_time = datetime.strptime(info['/CreationDate'], 'D:%Y%m%d%H%M%S') print("{} {} {}".format(start_time, report_time, end_time)) self.assertTrue(time_in_range(start_time, end_time, report_time)) print(number_of_pages) self.assertEqual(number_of_pages, self.json_writer_pdf_page_number) # -- HTML Report -- output = "%s/%s.html" % (self.out_path, self.json_writer_html_name) print("JSON-HTML report generated %s:" % output) with open(output) as f: read_data = f.read() index = read_data.find(self.json_report_name) # -- the header start at index 1279 -- self.assertEqual(index, 1279) index = read_data.find('created on') create_date = read_data[index + 11:index + 30] print(create_date) report_time = datetime.strptime(create_date, '%Y-%m-%d %H:%M:%S') print("{} {} {}".format(start_time, report_time, end_time)) self.assertTrue(time_in_range(start_time, end_time, report_time)) number_of_tags = read_data.count('class="tab_contents"') # print(number_of_tags) self.assertEqual(number_of_tags, self.json_writer_html_tag_number)