def generate_params_file(self): params2save = {k: v for k, v in self.__dict__.items() if k not in ['data', 't', 't0']} # get previous experiments if os.path.exists(config.get_filename('params.yml', self.experiment_name)): with open(config.get_filename('params.yml', self.experiment_name), 'r') as outfile: params2save['experiments'].append(yaml.load(outfile)['experiments']) with open(config.get_filename('params.yml', self.experiment_name), 'w') as outfile: yaml.dump(params2save, outfile, default_flow_style=False)
def explore(self, x_operator_func, y_operator_func, rational=False): subfolders = [self.type_of_experiment] stats = pd.DataFrame([]) for df, country, period in self.get_country_data(): print('\n\n========== ========== ========== ==========') print('Exploring {}'.format(country)) if country not in self.info.keys(): self.info[country] = {} self.set_underlying_model(df) for variable in [self.get_variables()] + [variable for variable in self.get_variables()]: variable = Field(variable) base_name = str(variable) if 'all' not in self.accepted_variables and base_name not in self.accepted_variables: continue print('\nVariable {}'.format(base_name)) if base_name not in self.info[country].keys(): self.info[country][base_name] = [] # ---------- fit eqdiff ---------- data_manager = DataManager() data_manager.add_variables(variable) # data_manager.add_regressors(self.get_regressors()) data_manager.set_domain() data_manager.set_X_operator(x_operator_func(rational=rational)) data_manager.set_y_operator(y_operator_func()) pde_finder = self.fit_eqdifff(data_manager) stats = pd.concat([stats, pd.concat([pd.DataFrame([[country, period.label, period.fecha]], index=pde_finder.coefs_.index, columns=['country', 'medidas', 'fecha_final']), pde_finder.coefs_], axis=1)], axis=0, sort=True) # ---------- plot ---------- with savefig('{}_{}_coeficients.png'.format(base_name, country), self.experiment_name, subfolders=subfolders, format='png'): self.plot_coefficients(pde_finder) plt.xscale('log') with savefig('{}_{}_fitvsreal.png'.format(base_name, country), self.experiment_name, subfolders=subfolders, format='png'): self.plot_fitted_and_real(pde_finder, data_manager, col="blue", subinit=None, sublen=None) # --------- predictions --------- predictions_temp = self.optimize_predictions(pde_finder, variable, x_operator_func, y_operator_func, data_manager, period, rational) self.info[country][base_name].append({'coefs': pde_finder.coefs_, 'period': period, 'data_real': data_manager.field, 'predictions': predictions_temp, 'data_raw': df}) stats.to_csv(config.get_filename(filename='{}_coefs.csv'.format(base_name), experiment=self.experiment_name, subfolders=[self.type_of_experiment])) self.plot_results()
def load_csv(filename, experiment, subfolders=[], astypes={}, verbose=False): fname = get_filename(filename, experiment, subfolders)+'.csv' if verbose: print('Loading in: {}'.format(fname)) if os.path.exists(fname): return pd.read_csv(fname, index_col=0).apply(convert_type) return None
def savefig(figname, experiment, subfolders=[], verbose=False, format='eps'): yield filename = get_filename(figname, experiment, subfolders) if verbose: print('Saving in: {}'.format(filename)) plt.savefig(filename, dpi=500, format=format) plt.close()
def save(data, path, experiment, subfolders=[]): yield filename = get_filename(path, experiment, subfolders) print('Saving in: {}'.format(filename)) if filename.split['.'][-1] == 'csv': data.to_csv(filename) elif filename.split['.'][-1] == 'pickle': with open(filename, 'wb') as f: pickle.dump(data, f)
def load(path, experiment, subfolders=[]): filename = get_filename(path, experiment, subfolders) if os.path.exists(filename): print('loading ', filename) if filename.split['.'][-1] == 'csv': data = pd.read_csv(filename) elif filename.split['.'][-1] == 'pickle': with open(filename, 'rb') as f: data = pickle.load(f) else: raise Exception(filename, 'is not pickle or csv') return data else: return False
def plot_results(self): for country, vars_info in self.info.items(): for _, list_info in vars_info.items(): # ------ original data ------ country_data = self.df_data.loc[self.df_data['Countries and territories'] == country, :] country_data = country_data.sort_values(by='DateRep') if self.cumulative: country_data['Deaths'] = country_data['Deaths'].cumsum() country_data['Cases'] = country_data['Cases'].cumsum() original_var_names = [str(var)[:-3] for var in list_info[0]['data_real'].data] var_names = [english2spanish_dict[v] for v in original_var_names] lines = {} with savefig('{}_predict_{}.png'.format(country, '_'.join(var_names)), self.experiment_name, subfolders=[self.type_of_experiment], format='png'): nvars = len(original_var_names) fig, ax = plt.subplots(ncols=nvars, nrows=1, figsize=(8 * nvars, 8)) if nvars == 1: ax = [ax] for i, (temp_ax, original_var_name, var_name) in enumerate(zip(ax, original_var_names, var_names)): temp_ax.set_title(english2spanish_dict[country] + ' ' + var_name.lower()) temp_ax.set_xlabel('Time') temp_ax.set_ylabel(var_name) # ------------------ plot real ------------------ lab = 'Data real {}'.format(var_name.lower()) lines[lab], = temp_ax.plot(country_data['DateRep'], country_data[original_var_name], '.-', c='tab:green', label=lab) # xmin = np.Inf real = [] t_real = [] ymax = 0 for info in list_info: var = info['data_real'].data[i] ymax = np.max((ymax, var.data.max() * 2)) # xmin = np.min((xmin, min([d.data.min() for d in info['data_real'].data]))) # plt.xlim(left=xmin) temp_ax.set_ylim((0, ymax)) # ------------------ plot real ------------------ lab = 'Data real {} for train'.format(var_name.lower()) real2use = var.data.tolist() real += real2use dt = var.domain.step_width['t'] t_real += np.arange(var.domain.lower_limits['t'], var.domain.upper_limits['t'] + dt, dt).tolist() t_real2use = info['data_raw'].loc[info['data_raw']['total_days'].isin(t_real), 'DateRep'] lines[lab], = temp_ax.plot(t_real2use, real2use, '.-', c='k', label=lab) # ------------------ plot prediction ------------------ lab = 'Predictcion {}'.format(info['period'].label.lower()) tmin = info['data_raw'].loc[ info['data_raw']['total_days'].isin( info['predictions'].index), 'DateRep'].values.min() t = pd.date_range(tmin, periods=info['predictions'].shape[0], freq='D') lines[lab], = temp_ax.plot(t, info['predictions'].loc[:, str(var)].values, '-', label=lab) tosave = info['predictions'] tosave['real'] = np.nan tosave.loc[[True if j in t_real2use.tolist() else False for j in t], 'real'] \ = [r for j, r in zip(t_real2use, real) if j in t] tosave.to_csv( config.get_filename( filename='{}_predictions_{}_{}.csv'.format('_'.join(var_names), info['period'].label.lower(), var_name), experiment=self.experiment_name, subfolders=[self.type_of_experiment])) try: lab = 'Fin {}'.format(info['period'].label.lower()) lines[lab] = temp_ax.axvline(pd.to_datetime(info['period'].fecha, dayfirst=True), c='r', linestyle='-.', ymin=0, ymax=ymax / 2, label=lab) except: pass temp_ax.grid(axis='x', color='gray', linestyle='-.', linewidth=1, alpha=0.65) temp_ax.legend(list(lines.values()), list(lines.keys())) plt.tight_layout()
def save_csv(data, filename, experiment, subfolders=[], verbose=False): fname = get_filename(filename, experiment, subfolders)+'.csv' if verbose: print('Saving in: {}'.format(fname)) data.to_csv(fname)