class TestDataHandler(unittest.TestCase): def __init__(self, *args, **kwargs): super(TestDataHandler, self).__init__(*args, **kwargs) self.dh = DataHandler(data_path=root.joinpath("data")) def test_get_data(self): all_data = self.dh.get_data() self.assertIsInstance(all_data, dict) for proj, datasets in all_data.items(): self.assertIsInstance(proj, str) self.assertIsInstance(datasets, dict) for key, value in datasets.items(): self.assertIsInstance(key, str) self.assertIsInstance(value, pd.core.frame.DataFrame)
def bug_count(self): data_handler = DataHandler(data_path=root.joinpath('data')) files = data_handler.get_data() all_results = dict() for proj, data in files.items(): col_name = ['Date', 'Actual', 'ARIMA', 'NAIVE'] results = [] actual = 0 for train, test in self.moving_window(data, frame=24): try: p, d, q = 4, 1, 4 # if not self.is_stationary(train): # train = self.detrend_series(train) arima = ARIMA(train, order=(p, d, q), freq='W-MON') arima_fit = arima.fit(disp=0) # Find start and end time stamps start, end = test.index[0], test.index[-1] # Save date, actual, and forecast prev_actual = actual actual = test.values.ravel()[0] forecast_arima = int(abs(arima_fit.forecast()[0])) forecast_naive = prev_actual date = test.index.strftime("%Y-%m-%d").values[0] results.append( [date, actual, forecast_arima, forecast_naive]) except: X = np.arange(len(train.values) + 1) X = np.reshape(X, (len(X), 1)) y = train.values model = LinearRegression() model.fit(X[:-1], y) prev_actual = actual actual = test.values.ravel()[0] forecast_arima = int( abs(model.predict(X[-1].reshape(1, -1))[0])) forecast_naive = prev_actual date = test.index.strftime("%Y-%m-%d").values[0] results.append( [date, actual, forecast_arima, forecast_naive]) results = pd.DataFrame(results, columns=col_name).set_index('Date') results.to_csv(root.joinpath('results', proj + ".csv")) return all_results
root = root.parent if root not in sys.path: sys.path.append(root) from metrics.abcd import ABCD from data.data_handler import DataHandler from prediction.model import PredictionModel import warnings warnings.filterwarnings("ignore") if __name__ == "__main__": dh = DataHandler() data = dh.get_data(top_k=1) for _, val in data.items(): data = val X = data[data.columns[:-1]] y = data[data.columns[-1]] # lsvc = LinearSVC(C=0.01, penalty="l1", dual=False).fit(X, y) # model = SelectFromModel(lsvc, prefit=True) # X = model.transform(X) pca = PCA(n_components=3) pca.fit(X) X = pca.transform(X) colors = ['navy', 'darkorange'] for X_transformed, title in [(X, "PCA")]: plt.figure(figsize=(8, 8))
root = root.parent if root not in sys.path: sys.path.append(root) from metrics.abcd import ABCD from data.data_handler import DataHandler from prediction.model import PredictionModel import warnings warnings.filterwarnings("ignore") if __name__ == "__main__": dh = DataHandler() mdl = PredictionModel() data = dh.get_data(top_k=25) # Create a Table than can pretty printed # -------------------------------------- results = PrettyTable() results.field_names = ["Project", " G", " Pd", " Pf", " F1", " Prec", " IFA", "PCI20"] # Align Data # ---------- results.align["Project"] = "l" results.align[" G"] = "r" results.align[" Pd"] = "r" results.align[" Pf"] = "r" results.align[" F1"] = "r" results.align[" IFA"] = "r"
from pathlib import Path root = Path(os.path.abspath(os.path.join(os.getcwd().split("src")[0], 'src'))) if root not in sys.path: sys.path.append(str(root)) from metrics.abcd import ABCD from data.data_handler import DataHandler from prediction.model import PredictionModel if __name__ == "__main__": dh = DataHandler() mdl = PredictionModel() data = dh.get_data() "Create a Table than can pretty printed" results = PrettyTable() results.field_names = ["Train", "Test ", " Pd", " Pf", " F1"] "Align Data" results.align["Train"] = "l" results.align["Test "] = "l" results.align[" Pd"] = "r" results.align[" Pf"] = "r" results.align[" F1"] = "r" for proj, dataset in data.items(): dataset_keys = sorted(dataset.keys()) for trn, tst in zip(dataset_keys[:-1], dataset_keys[1:]):