def fit_serveral_genes(series, fitter, loo, filename, b_show): if fitter is not None: theta, L, LOO_predictions,_ = fitter.fit(series.ages, series.expression, loo=loo) print 'L = {}'.format(L) fig = plot_series(series, fitter.shape, theta, LOO_predictions) else: fig = plot_series(series) if filename is None: ensure_dir(results_dir()) filename = join(results_dir(), 'fits.png') print 'Saving figure to {}'.format(filename) save_figure(fig, filename) if b_show: plt.show(block=True)
def fit_serveral_genes(series, fitter, loo, filename, b_show): if fitter is not None: theta, L, LOO_predictions, _ = fitter.fit(series.ages, series.expression, loo=loo) print 'L = {}'.format(L) fig = plot_series(series, fitter.shape, theta, LOO_predictions) else: fig = plot_series(series) if filename is None: ensure_dir(results_dir()) filename = join(results_dir(), 'fits.png') print 'Saving figure to {}'.format(filename) save_figure(fig, filename) if b_show: plt.show(block=True)
def transform(self, X): return self._gauss_basis(X[:, :, np.newaxis], self._centres, self._width, axis=1) def linear(train, test, t=132): X_train, X_test = sklearn_formatting(train, test) gauss_model = make_pipeline( GaussianFeatures(40), LinearRegression(), ) gauss_model.fit(X_train, train.values) y_fit = gauss_model.predict(X_train) # predict a cycle y_pred = gauss_model.predict(X_test) rmse = error(test.values, y_pred) return y_fit, y_pred, rmse if __name__ == "__main__": df_train, df_test = get_data() lin_y, lin_y_pred, lin_rmse = linear(df_train, df_test) plot_series(df_train, df_test, lin_y, lin_y_pred)
def arima_fn(train, test, order=(10, 1, 9)): # fit ARIMA with calculated order from above model = ARIMA(train.values, order=order) res = model.fit(disp=-1) y_fit = pd.Series( res.fittedvalues, copy=True, index=train.index[1:], ).cumsum() # predict a cycle y_pred = res.predict( start=len(train), end=len(train) + len(test) - 1, ).cumsum() rmse = error(test.values, y_pred) return y_fit, y_pred, rmse if __name__ == "__main__": df_train, df_test = get_data() df_train = stationary(df_train) df_test = stationary(df_test) arima_y, arima_y_pred, arima_rmse = arima_fn(df_train, df_test) plot_series(df_train.iloc[1:], df_test, arima_y, arima_y_pred)
for i, g in enumerate(series.gene_names): print 'Fitting series {}...'.format(i + 1) theta, sigma, LOO_predictions, _ = fitter.fit(x, y[:, i], loo=True) fit = Bunch( theta=theta, LOO_predictions=LOO_predictions, ) fits.append(fit) print 'Fitting with correlations...' levels = fitter.fit_multi(x, y, loo=True, n_iterations=2) res = levels[-1] print 'Theta:' for ti in res.theta: print ' {}'.format(ti) print 'Sigma:' print res.sigma plot_series(series, fitter.shape, res.theta, res.LOO_predictions) R2_pairs = [] for i, g in enumerate(series.gene_names): y_real = y[:, i] y_basic = fits[i].LOO_predictions y_multi_gene = res.LOO_predictions[:, i] # no NANs in the generated data, so no need to handle the original_inds mess basic_R2 = loo_score(y_real, y_basic) multi_gene_R2 = loo_score(y_real, y_multi_gene) R2_pairs.append((basic_R2, multi_gene_R2)) plot_comparison_scatter(R2_pairs, series.region_name) print 'R2_pairs = {}'.format(R2_pairs)
fits = [] for i,g in enumerate(series.gene_names): print 'Fitting series {}...'.format(i+1) theta, sigma, LOO_predictions,_ = fitter.fit(x,y[:,i],loo=True) fit = Bunch( theta = theta, LOO_predictions = LOO_predictions, ) fits.append(fit) print 'Fitting with correlations...' levels = fitter.fit_multi(x, y, loo=True, n_iterations=2) res = levels[-1] print 'Theta:' for ti in res.theta: print ' {}'.format(ti) print 'Sigma:' print res.sigma plot_series(series, fitter.shape, res.theta, res.LOO_predictions) R2_pairs = [] for i,g in enumerate(series.gene_names): y_real = y[:,i] y_basic = fits[i].LOO_predictions y_multi_gene = res.LOO_predictions[:,i] # no NANs in the generated data, so no need to handle the original_inds mess basic_R2 = loo_score(y_real,y_basic) multi_gene_R2 = loo_score(y_real,y_multi_gene) R2_pairs.append( (basic_R2, multi_gene_R2) ) plot_comparison_scatter(R2_pairs,series.region_name) print 'R2_pairs = {}'.format(R2_pairs)
from sklearn.gaussian_process.kernels import RBF, WhiteKernel, ExpSineSquared from plots import plot_series from sunspots import error, get_data, sklearn_formatting def gp(train, test, t=132): X_train, X_test = sklearn_formatting(train, test) gp_kernel = 2**2 \ + ExpSineSquared(1, 60000.0) \ + ExpSineSquared(2, 120000.0) \ + WhiteKernel(2.5) gpr = GaussianProcessRegressor(kernel=gp_kernel) gpr.fit(X_train, train.values) y_fit = gpr.predict(X_train, return_std=False) # predict a cycle y_pred = gpr.predict(X_test, return_std=False) rmse = error(test.values, y_pred) return y_fit, y_pred, rmse if __name__ == "__main__": df_train, df_test = get_data() gauss_y, gauss_y_pred, gauss_rmse = gp(df_train, df_test) plot_series(df_train, df_test, gauss_y, gauss_y_pred)