Esempio n. 1
0
    def generate_params_file(self):
        params2save = {k: v for k, v in self.__dict__.items() if k not in ['data', 't', 't0']}

        # get previous experiments
        if os.path.exists(config.get_filename('params.yml', self.experiment_name)):
            with open(config.get_filename('params.yml', self.experiment_name), 'r') as outfile:
                params2save['experiments'].append(yaml.load(outfile)['experiments'])

        with open(config.get_filename('params.yml', self.experiment_name), 'w') as outfile:
            yaml.dump(params2save, outfile, default_flow_style=False)
Esempio n. 2
0
    def explore(self, x_operator_func, y_operator_func, rational=False):
        subfolders = [self.type_of_experiment]
        stats = pd.DataFrame([])
        for df, country, period in self.get_country_data():
            print('\n\n========== ========== ========== ==========')
            print('Exploring {}'.format(country))
            if country not in self.info.keys():
                self.info[country] = {}

            self.set_underlying_model(df)

            for variable in [self.get_variables()] + [variable for variable in self.get_variables()]:
                variable = Field(variable)
                base_name = str(variable)
                if 'all' not in self.accepted_variables and base_name not in self.accepted_variables:
                    continue
                print('\nVariable {}'.format(base_name))

                if base_name not in self.info[country].keys():
                    self.info[country][base_name] = []

                # ---------- fit eqdiff ----------
                data_manager = DataManager()
                data_manager.add_variables(variable)
                # data_manager.add_regressors(self.get_regressors())
                data_manager.set_domain()

                data_manager.set_X_operator(x_operator_func(rational=rational))
                data_manager.set_y_operator(y_operator_func())
                pde_finder = self.fit_eqdifff(data_manager)
                stats = pd.concat([stats, pd.concat([pd.DataFrame([[country, period.label, period.fecha]],
                                                                  index=pde_finder.coefs_.index,
                                                                  columns=['country', 'medidas',
                                                                           'fecha_final']),
                                                     pde_finder.coefs_],
                                                    axis=1)], axis=0, sort=True)
                # ---------- plot ----------
                with savefig('{}_{}_coeficients.png'.format(base_name, country), self.experiment_name,
                             subfolders=subfolders, format='png'):
                    self.plot_coefficients(pde_finder)
                    plt.xscale('log')

                with savefig('{}_{}_fitvsreal.png'.format(base_name, country), self.experiment_name,
                             subfolders=subfolders, format='png'):
                    self.plot_fitted_and_real(pde_finder, data_manager, col="blue", subinit=None, sublen=None)

                # --------- predictions ---------
                predictions_temp = self.optimize_predictions(pde_finder, variable, x_operator_func, y_operator_func,
                                                             data_manager, period, rational)

                self.info[country][base_name].append({'coefs': pde_finder.coefs_,
                                                      'period': period,
                                                      'data_real': data_manager.field,
                                                      'predictions': predictions_temp,
                                                      'data_raw': df})

                stats.to_csv(config.get_filename(filename='{}_coefs.csv'.format(base_name),
                                                 experiment=self.experiment_name,
                                                 subfolders=[self.type_of_experiment]))
                self.plot_results()
Esempio n. 3
0
def load_csv(filename, experiment, subfolders=[], astypes={}, verbose=False):
    fname = get_filename(filename, experiment, subfolders)+'.csv'
    if verbose:
        print('Loading in: {}'.format(fname))
    if os.path.exists(fname):
        return pd.read_csv(fname, index_col=0).apply(convert_type)
    return None
Esempio n. 4
0
def savefig(figname, experiment, subfolders=[], verbose=False, format='eps'):
    yield
    filename = get_filename(figname, experiment, subfolders)
    if verbose:
        print('Saving in: {}'.format(filename))
    plt.savefig(filename, dpi=500, format=format)
    plt.close()
Esempio n. 5
0
def save(data, path, experiment, subfolders=[]):
    yield
    filename = get_filename(path, experiment, subfolders)
    print('Saving in: {}'.format(filename))

    if filename.split['.'][-1] == 'csv':
        data.to_csv(filename)
    elif filename.split['.'][-1] == 'pickle':
        with open(filename, 'wb') as f:
            pickle.dump(data, f)
Esempio n. 6
0
def load(path, experiment, subfolders=[]):
    filename = get_filename(path, experiment, subfolders)
    if os.path.exists(filename):
        print('loading ', filename)
        if filename.split['.'][-1] == 'csv':
            data = pd.read_csv(filename)
        elif filename.split['.'][-1] == 'pickle':
            with open(filename, 'rb') as f:
                data = pickle.load(f)
        else:
            raise Exception(filename, 'is not pickle or csv')
        return data
    else:
        return False
Esempio n. 7
0
    def plot_results(self):
        for country, vars_info in self.info.items():
            for _, list_info in vars_info.items():
                # ------ original data ------
                country_data = self.df_data.loc[self.df_data['Countries and territories'] == country, :]
                country_data = country_data.sort_values(by='DateRep')
                if self.cumulative:
                    country_data['Deaths'] = country_data['Deaths'].cumsum()
                    country_data['Cases'] = country_data['Cases'].cumsum()

                original_var_names = [str(var)[:-3] for var in list_info[0]['data_real'].data]
                var_names = [english2spanish_dict[v] for v in original_var_names]
                lines = {}
                with savefig('{}_predict_{}.png'.format(country, '_'.join(var_names)), self.experiment_name,
                             subfolders=[self.type_of_experiment], format='png'):
                    nvars = len(original_var_names)
                    fig, ax = plt.subplots(ncols=nvars, nrows=1, figsize=(8 * nvars, 8))
                    if nvars == 1:
                        ax = [ax]

                    for i, (temp_ax, original_var_name, var_name) in enumerate(zip(ax, original_var_names, var_names)):
                        temp_ax.set_title(english2spanish_dict[country] + ' ' + var_name.lower())
                        temp_ax.set_xlabel('Time')
                        temp_ax.set_ylabel(var_name)

                        # ------------------ plot real ------------------
                        lab = 'Data real {}'.format(var_name.lower())
                        lines[lab], = temp_ax.plot(country_data['DateRep'], country_data[original_var_name], '.-',
                                               c='tab:green', label=lab)

                        # xmin = np.Inf
                        real = []
                        t_real = []
                        ymax = 0
                        for info in list_info:
                            var = info['data_real'].data[i]
                            ymax = np.max((ymax, var.data.max() * 2))
                            # xmin = np.min((xmin, min([d.data.min() for d in info['data_real'].data])))

                            # plt.xlim(left=xmin)
                            temp_ax.set_ylim((0, ymax))

                            # ------------------ plot real ------------------
                            lab = 'Data real {} for train'.format(var_name.lower())
                            real2use = var.data.tolist()
                            real += real2use
                            dt = var.domain.step_width['t']
                            t_real += np.arange(var.domain.lower_limits['t'], var.domain.upper_limits['t'] + dt, dt).tolist()
                            t_real2use = info['data_raw'].loc[info['data_raw']['total_days'].isin(t_real), 'DateRep']
                            lines[lab], = temp_ax.plot(t_real2use, real2use, '.-', c='k', label=lab)

                            # ------------------ plot prediction ------------------
                            lab = 'Predictcion {}'.format(info['period'].label.lower())
                            tmin = info['data_raw'].loc[
                                info['data_raw']['total_days'].isin(
                                    info['predictions'].index), 'DateRep'].values.min()
                            t = pd.date_range(tmin, periods=info['predictions'].shape[0], freq='D')

                            lines[lab], = temp_ax.plot(t,
                                                   info['predictions'].loc[:, str(var)].values, '-',
                                                   label=lab)

                            tosave = info['predictions']
                            tosave['real'] = np.nan

                            tosave.loc[[True if j in t_real2use.tolist() else False for j in t], 'real'] \
                                = [r for j, r in zip(t_real2use, real) if j in t]
                            tosave.to_csv(
                                config.get_filename(
                                    filename='{}_predictions_{}_{}.csv'.format('_'.join(var_names), info['period'].label.lower(), var_name),
                                    experiment=self.experiment_name,
                                    subfolders=[self.type_of_experiment]))

                            try:
                                lab = 'Fin {}'.format(info['period'].label.lower())
                                lines[lab] = temp_ax.axvline(pd.to_datetime(info['period'].fecha, dayfirst=True), c='r',
                                                         linestyle='-.', ymin=0,
                                                         ymax=ymax / 2,
                                                         label=lab)
                            except:
                                pass

                        temp_ax.grid(axis='x', color='gray', linestyle='-.', linewidth=1, alpha=0.65)
                        temp_ax.legend(list(lines.values()), list(lines.keys()))

                    plt.tight_layout()
Esempio n. 8
0
def save_csv(data, filename, experiment, subfolders=[], verbose=False):
    fname = get_filename(filename, experiment, subfolders)+'.csv'
    if verbose:
        print('Saving in: {}'.format(fname))
    data.to_csv(fname)