def test_generate_report(self, mocked_request): response = json.load(open(MOCKED_STATISTIC)) mocked_request.get(STAT_URL, json=response) generate_report(MOCKED_DISTRIBUTION, HTML_TEMPLATE, TEST_REPORT) with open(TEST_REPORT, 'r') as tr: content = tr.read() self.assertTrue('Load Test Report' in content) self.assertTrue(content.count('HTTPError'), 2) os.remove(TEST_REPORT)
def run(cfg=None): #Getting dataset raw_data = data_acq(cfg) #Building features data = data_prep(raw_data, cfg) #Split data X_train, X_test, y_train, y_test = split_data(data) #Hyperparameter optimization opt = train(X_train, y_train, cfg) #Evaluation report generate_report(opt, X_test, y_test)
def test_stat_url_inaccessible(self, mocked_request): mocked_request.get(STAT_URL, status_code=500) generate_report(MOCKED_DISTRIBUTION, HTML_TEMPLATE, TEST_REPORT) self.assertFalse(os.path.isfile(TEST_REPORT))
#!/usr/bin/env python # coding=utf-8 from src.query import querysql from src.report import generate_report if __name__ == '__main__': generate_report("ANDROID", "20190305", "20190312") generate_report("IOS", "20190305", "20190312")
new_data['Price'][i] = data['Price'][i] # %% create date features add_datepart(new_data, 'Date') new_data.drop('Elapsed', axis=1, inplace=True) # %% distinguishing mondays and fridays from other dates new_data['mon_fri'] = 0 for i in range(0, len(new_data)): if new_data['Dayofweek'][i] == 0 or new_data['Dayofweek'][i] == 4: new_data['mon_fri'] = 1 else: new_data['mon_fri'] = 0 #%% generate report profile = generate_report(new_data, 'Processed Safaricom Data Report') profile.to_file(output_file='reports/Processed-Safaricom-Report.html') # %% save processed data print(new_data.head()) print(new_data.info()) print(new_data.shape) new_data.to_csv('data/processed/Safaricom-Ltd(SCOM).csv', index=False) # %% splitting data into train and test data at 80% train threshold = get_threshold(new_data) train = new_data[:threshold] test = new_data[threshold:] # %% save train and test data print(train.shape)
import pandas as pd import matplotlib.pyplot as plt from src.report import generate_report plt.rcParams['figure.figsize'] = (12, 8) #%% create a dataframe data = pd.read_csv('data/raw/Safaricom-Ltd(SCOM).csv') data = data[:-2] # remove rows displaying statistics print(data.tail()) print(data.shape) print(data.describe()) #%% generate report profile = generate_report(data, 'Raw Safaricom Data Report') profile.to_file(output_file='reports/Raw-Safaricom-Report.html') #%% show number of missing data per column null_counts = data.isnull().sum() print('Number of null values in each column:\n', null_counts) #%% show datatypes in dataset print('Data types and their frequency:\n', data.dtypes.value_counts()) # show datatypes for each column data_dtypes = pd.DataFrame(data.dtypes, columns=['dtypes']) print(data_dtypes) #%% convert incorrectly mapped columns to float cols = ['Price', 'Open', 'High', 'Low']