def R2_adj(y_true, y_pred, dim1): Z = numpy.concatenate( [y_true.reshape(-1, 1), y_pred.reshape(-1, 1)], axis=1) Z = numpy.array(Z, dtype=numpy.float32) Z[Z == numpy.inf] = numpy.nan Z[Z == -numpy.inf] = numpy.nan nan_mask = ~pandas.isna(Z).any(axis=1) y_true_, y_pred_ = y_true[nan_mask], y_pred[nan_mask] if y_true_.shape[0] == 0: return numpy.nan else: if y_true_.shape[0] != y_true.shape[0]: print( 'MAE: the sample contains NaNs, they were dropped\tN of dropped NaNs: {0}' .format(y_true.shape[0] - y_true_.shape[0])) return r2_adj(y_true=y_true_, y_pred=y_pred_, dim0=Z.shape[0], dim1=dim1)
def score(self, measure, dim0_mask=None, ts_report=False): Y_hat = self.predict(dim0_mask) if dim0_mask is None: X, Y = self.data.values else: old_d0 = self.data.mask.d0 self.data.mask.d0 = dim0_mask X, Y = self.data.values self.data.mask.d0 = old_d0 if measure == 'r2_adj': measured = r2_adj(Y, Y_hat, X.shape[0], X.shape[1]) elif measure == 'mae': measured = mean_absolute_error(Y, Y_hat) elif measure == 'r2': measured = r2_score(Y, Y_hat) else: raise Exception("Not yet!") if ts_report: errors = Y.ravel() - Y_hat stationarity_score = adfuller(errors, regression='nc')[1] skewness = stats.skew(errors) return measured, stationarity_score, skewness else: return measured
def insane(das_model, data, mask_thresh, multiple_model_args, tsi_names, y_names, removes, test_rate=0.2, n_folds=1): report = pandas.DataFrame(columns=[ 'Np', 'Nf', 'Ns', 'R2_adj_cur_fold', 'R2_adj_nxt_fold', 'R2_adj_test', 'smoother', 'd1', 'params', 'd1', 'X_adj_' ]) X_train, Y_train, X_test, Y_test, X_ = load_data(data, y_names, removes, test_rate, n_folds) # g_mask = numpy.ones(shape=(X_.shape[0]), dtype=bool) g_mask = numpy.ones(shape=(X_train[0].shape[1]), dtype=bool) quotes_mask, news_mask = g_mask.copy(), g_mask.copy() quotes_mask[mask_thresh:] = False news_mask[:mask_thresh] = False target_mask = numpy.array([True]) maskeds_X = [{ Insane(my_name='LnPct'): quotes_mask, Insane(my_name='Nothing'): news_mask }, { Insane(my_name='TanhLnPct'): quotes_mask, Insane(my_name='Nothing'): news_mask }, { Insane(my_name='LnPct'): quotes_mask, Insane(my_name='LnPct'): news_mask }, { Insane(my_name='TanhLnPct'): quotes_mask, Insane(my_name='TanhLnPct'): news_mask }, { Insane(my_name='Whiten'): quotes_mask, Insane(my_name='Nothing'): news_mask }, { Insane(my_name='TanhWhiten'): quotes_mask, Insane(my_name='Nothing'): news_mask }, { Insane(my_name='Whiten'): quotes_mask, Insane(my_name='Whiten'): news_mask }, { Insane(my_name='TanhWhiten'): quotes_mask, Insane(my_name='TanhWhiten'): news_mask }] maskeds_Y = [{ Insane(my_name='LnPct'): target_mask }, { Insane(my_name='TanhLnPct'): target_mask }, { Insane(my_name='LnPct'): target_mask }, { Insane(my_name='TanhLnPct'): target_mask }, { Insane(my_name='Whiten'): target_mask }, { Insane(my_name='TanhWhiten'): target_mask }, { Insane(my_name='Whiten'): target_mask }, { Insane(my_name='TanhWhiten'): target_mask }] maskeds_coded = [ 'LnPct_No', 'TanhLnPct_No', 'LnPct_LnPct', 'TanhLnPct_TanhLnPct', 'Whiten_No', 'TanhWhiten_No', 'Whiten_Whiten', 'TanhWhiten_TanhWhiten' ] verbose_step = 10 n_iters = len(maskeds_X) * len(multiple_model_args) * n_folds print('N of expected iters = {0}'.format(n_iters)) print('Started search: {0}'.format(datetime.datetime.now().isoformat())) for j in range(len(X_train)): print('here go those trainers') print(X_train[j].shape) print('say hello') it = 0 for s in range(len(maskeds_X)): smoother_X_train = [] smoother_Y_train = [] # ???? for j in range(len(X_train)): smt_X = Compakt(maskeds_X[j], maskeds_coded[j]) smt_X.fit(array=X_train[j]) smoother_X_train.append(smt_X) smt_Y = Compakt(maskeds_Y[j], maskeds_coded[j]) smt_Y.fit(array=Y_train[j]) smoother_Y_train.append(smt_Y) X_train_ = [ smoother_X_train[z].forward(array=X_train[z]) for z in range(len(X_train)) ] Y_train_ = [ smoother_Y_train[z].forward(array=Y_train[z]) for z in range(len(Y_train)) ] for i in range(len(multiple_model_args)): params = multiple_model_args[i] for j in range(len(X_train_)): model_ = das_model(**params) # a[~pandas.isna(a).any(axis=1), :] if X_train_[j][~pandas.isna(X_train_[j]).any( axis=1), :].shape[0] == 0: print(X_train_[j][:, 0:20]) print(X_train_[j][:, 20:40]) print(X_train_[j][:, 40:60]) print(X_train_[j][:, 60:80]) print(X_train_[j][:, 80:100]) print('------------------') print(X_train[j][:, 0:20]) print(smoother_X_train[j].forward(X_train[j])[:, 0:20]) print(smoother_X_train[j].say_my_name()) raise Exception('Ded Inside') model_.fit( X_train_[j][~pandas.isna(X_train_[j]).any(axis=1), :], Y_train_[j][~pandas.isna(X_train_[j]).any( axis=1), :].ravel()) Y_hat_train = smoother_Y_train[j].backward( array=model_.predict(X_train_[j][ ~pandas.isna(X_train_[j]).any(axis=1), :])) Y_hat_test = smoother_Y_train[j].backward( array=model_.predict(smoother_X_train[j].forward( array=X_test[~pandas.isna(X_test).any( axis=1), :])).reshape(-1, 1)) if j < (len(X_train_) - 1): Y_hat_ded = smoother_Y_train[j].backward( array=model_.predict(smoother_X_train[j].forward( array=X_train_[(j + 1)][~pandas.isna(X_train_[ (j + 1)]).any(axis=1), :])).reshape(-1, 1)) nxt_folded = r2_adj( Y_train[(j + 1)][~pandas.isna(X_train_[(j + 1)]).any( axis=1), :], Y_hat_ded, X_train_[(j + 1)][~pandas.isna(X_train_[(j + 1)]).any( axis=1), :].shape[0], X_train_[(j + 1)].shape[1]) else: nxt_folded = r2_adj( Y_test[~pandas.isna(X_test).any(axis=1), :], Y_hat_test, X_test[~pandas.isna(X_test).any(axis=1), :].shape[0], X_test.shape[1]) result = { 'Np': i, 'Nf': j, 'Ns': s, 'R2_adj_cur_fold': r2_adj(Y_train[j], Y_hat_train, X_train_[j].shape[0], X_train_[j].shape[1]), 'R2_adj_nxt_fold': nxt_folded, 'R2_adj_test': r2_adj(Y_test, Y_hat_test, X_train_[j].shape[0], X_train_[j].shape[1]), 'smoother': smoother_X_train[j].say_my_name(), 'd1': X_train_[j].shape[1], 'params': params, 'X_adj_': X_ } report = report.append(result, ignore_index=True) if it % verbose_step == 0: print('{0} / {1}'.format(it, n_iters)) it += 1 print('{0} / {0}'.format(n_iters)) print('Finished search: {0}'.format(datetime.datetime.now().isoformat())) return report
def goded(das_model, data, multiple_model_args, tsi_names, y_names, removes, test_rate=0.2, n_folds=1): report = pandas.DataFrame(columns=[ 'Np', 'Nf', 'Ns', 'R2_adj_cur_fold', 'R2_adj_nxt_fold', 'R2_adj_test', 'smoother', 'd1', 'params', 'd1', 'X_adj_' ]) X_train, Y_train, X_test, Y_test, X_ = load_data(data, y_names, removes, test_rate, n_folds) # smoothers = [Distributed, Distributed, Distributed, Distributed] smoothers = [Distributed, Distributed, Distributed] # smoothers_args = [{'my_name': 'Nothing'}, {'my_name': 'Simple'}, {'my_name': 'Normal'}, {'my_name': 'Uniform'}] smoothers_args = [{ 'my_name': 'Nothing' }, { 'my_name': 'Simple' }, { 'my_name': 'Normal' }] verbose_step = 10 n_iters = len(smoothers) * len(multiple_model_args) * n_folds print('N of expected iters = {0}'.format(n_iters)) print('Started search: {0}'.format(datetime.datetime.now().isoformat())) it = 0 for s in range(len(smoothers)): smoother_X_train = [] smoother_Y_train = [] for j in range(len(X_train)): smt_X = smoothers[s](**smoothers_args[s]) smt_X.fit(array=X_train[j]) smoother_X_train.append(smt_X) smt_Y = smoothers[s](**smoothers_args[s]) smt_Y.fit(array=Y_train[j]) smoother_Y_train.append(smt_Y) X_train_ = [ smoother_X_train[z].forward(array=X_train[z]) for z in range(len(X_train)) ] Y_train_ = [ smoother_Y_train[z].forward(array=Y_train[z]) for z in range(len(Y_train)) ] for i in range(len(multiple_model_args)): params = multiple_model_args[i] for j in range(len(X_train_)): model_ = das_model(**params) model_.fit(X_train_[j], Y_train_[j].ravel()) Y_hat_train = smoother_Y_train[j].backward( array=model_.predict(X_train_[j]).reshape(-1, 1)) Y_hat_test = smoother_Y_train[j].backward( array=model_.predict(smoother_X_train[j].forward( array=X_test)).reshape(-1, 1)) if j < (len(X_train_) - 1): Y_hat_ded = smoother_Y_train[j].backward( array=model_.predict(smoother_X_train[j].forward( array=X_train_[(j + 1)])).reshape(-1, 1)) nxt_folded = r2_adj(Y_train[(j + 1)], Y_hat_ded, X_train_[j].shape[0], X_train_[j].shape[1]) else: nxt_folded = r2_adj(Y_test, Y_hat_test, X_train_[j].shape[0], X_train_[j].shape[1]) result = { 'Np': i, 'Nf': j, 'Ns': s, 'R2_adj_cur_fold': r2_adj(Y_train[j], Y_hat_train, X_train_[j].shape[0], X_train_[j].shape[1]), 'R2_adj_nxt_fold': nxt_folded, 'R2_adj_test': r2_adj(Y_test, Y_hat_test, X_train_[j].shape[0], X_train_[j].shape[1]), 'smoother': smoother_X_train[j].say_my_name(), 'd1': X_train_[j].shape[1], 'params': params, 'X_adj_': X_ } report = report.append(result, ignore_index=True) if it % verbose_step == 0: print('{0} / {1}'.format(it, n_iters)) it += 1 print('{0} / {0}'.format(n_iters)) print('Finished search: {0}'.format(datetime.datetime.now().isoformat())) return report