def test_generate_report(self, mocked_request):
     response = json.load(open(MOCKED_STATISTIC))
     mocked_request.get(STAT_URL, json=response)
     generate_report(MOCKED_DISTRIBUTION, HTML_TEMPLATE, TEST_REPORT)
     with open(TEST_REPORT, 'r') as tr:
         content = tr.read()
         self.assertTrue('Load Test Report' in content)
         self.assertTrue(content.count('HTTPError'), 2)
     os.remove(TEST_REPORT)
Example #2
0
 def test_generate_report(self, mocked_request):
     response = json.load(open(MOCKED_STATISTIC))
     mocked_request.get(STAT_URL, json=response)
     generate_report(MOCKED_DISTRIBUTION, HTML_TEMPLATE, TEST_REPORT)
     with open(TEST_REPORT, 'r') as tr:
         content = tr.read()
         self.assertTrue('Load Test Report' in content)
         self.assertTrue(content.count('HTTPError'), 2)
     os.remove(TEST_REPORT)
Example #3
0
def run(cfg=None):
    #Getting dataset
    raw_data = data_acq(cfg)
    #Building features
    data = data_prep(raw_data, cfg)
    #Split data
    X_train, X_test, y_train, y_test = split_data(data)
    #Hyperparameter optimization
    opt = train(X_train, y_train, cfg)
    #Evaluation report
    generate_report(opt, X_test, y_test)
 def test_stat_url_inaccessible(self, mocked_request):
     mocked_request.get(STAT_URL, status_code=500)
     generate_report(MOCKED_DISTRIBUTION, HTML_TEMPLATE, TEST_REPORT)
     self.assertFalse(os.path.isfile(TEST_REPORT))
Example #5
0
#!/usr/bin/env python
# coding=utf-8

from src.query import querysql
from src.report import generate_report

if __name__ == '__main__':
    generate_report("ANDROID", "20190305", "20190312")
    generate_report("IOS", "20190305", "20190312")
Example #6
0
 def test_stat_url_inaccessible(self, mocked_request):
     mocked_request.get(STAT_URL, status_code=500)
     generate_report(MOCKED_DISTRIBUTION, HTML_TEMPLATE, TEST_REPORT)
     self.assertFalse(os.path.isfile(TEST_REPORT))
Example #7
0
    new_data['Price'][i] = data['Price'][i]

# %% create date features
add_datepart(new_data, 'Date')
new_data.drop('Elapsed', axis=1, inplace=True)

# %% distinguishing mondays and fridays from other dates
new_data['mon_fri'] = 0
for i in range(0, len(new_data)):
    if new_data['Dayofweek'][i] == 0 or new_data['Dayofweek'][i] == 4:
        new_data['mon_fri'] = 1
    else:
        new_data['mon_fri'] = 0

#%% generate report
profile = generate_report(new_data, 'Processed Safaricom Data Report')
profile.to_file(output_file='reports/Processed-Safaricom-Report.html')

# %% save processed data
print(new_data.head())
print(new_data.info())
print(new_data.shape)
new_data.to_csv('data/processed/Safaricom-Ltd(SCOM).csv', index=False)

# %% splitting data into train and test data at 80% train
threshold = get_threshold(new_data)
train = new_data[:threshold]
test = new_data[threshold:]

# %% save train and test data
print(train.shape)
Example #8
0
import pandas as pd
import matplotlib.pyplot as plt

from src.report import generate_report

plt.rcParams['figure.figsize'] = (12, 8)

#%% create a dataframe
data = pd.read_csv('data/raw/Safaricom-Ltd(SCOM).csv')
data = data[:-2]  # remove rows displaying statistics
print(data.tail())
print(data.shape)
print(data.describe())

#%% generate report
profile = generate_report(data, 'Raw Safaricom Data Report')
profile.to_file(output_file='reports/Raw-Safaricom-Report.html')

#%% show number of missing data per column
null_counts = data.isnull().sum()
print('Number of null values in each column:\n', null_counts)

#%% show datatypes in dataset
print('Data types and their frequency:\n', data.dtypes.value_counts())

# show datatypes for each column
data_dtypes = pd.DataFrame(data.dtypes, columns=['dtypes'])
print(data_dtypes)

#%% convert incorrectly mapped columns to float
cols = ['Price', 'Open', 'High', 'Low']