コード例 #1
0
 def fit(self, data: tf.data.Dataset, epochs=1, steps_per_epoch=1,
         validation_data=None, validation_steps=1,
         **flow_kwargs):
     data.repeat(epochs)
     if validation_data is not None:
         validation_data = validation_data.repeat(epochs)
     test_hist = dict()
     for epoch in range(epochs):
         train_hist = dict()
         with tqdm(total=steps_per_epoch, desc=f'train, epoch {epoch+1}/{epochs}') as prog:
             for i, (x, y) in enumerate(data.take(steps_per_epoch)):
                 loss, nll = self.train_batch(x, y, **flow_kwargs)
                 utils.update_metrics(train_hist, loss=loss.numpy(), nll=nll.numpy())
                 prog.update(1)
                 prog.set_postfix(utils.get_metrics(train_hist))
         with tqdm(total=validation_steps, desc=f'test, epoch {epoch+1}/{epochs}') as prog:
             if validation_data is None:
                 continue
             for i, (x, y) in enumerate(validation_data.take(validation_steps)):
                 nll = self.eval_batch(x, y, **flow_kwargs)
                 utils.update_metrics(test_hist, nll=nll.numpy())
                 prog.update(1)
                 prog.set_postfix(utils.get_metrics(test_hist))
     return test_hist
コード例 #2
0
 def train(self, train_data: tf.data.Dataset, steps_per_epoch, num_epochs=1,
           lam=1.0, lam_decay=0.0, alpha=0.0, **flow_kwargs):
     train_gen_data = train_data.take(steps_per_epoch).repeat(num_epochs)
     with tqdm(total=steps_per_epoch*num_epochs, desc='train') as prog:
         hist = dict()
         lam = tf.Variable(lam, dtype=tf.float32)
         for epoch in range(num_epochs):
             for x,y in train_gen_data.take(steps_per_epoch):
                 # train discriminators
                 dx_loss, dy_loss = self.train_discriminators_on_batch(x, y)
                 # train generators
                 g_obj, nll_x, nll_y, gx_loss, gy_loss, gx_aux, gy_aux = self.train_generators_on_batch(x, y, alpha=alpha, lam=utils.var(lam))
                 utils.update_metrics(hist, g_obj=g_obj.numpy(), gx_loss=gx_loss.numpy(), gy_loss=dy_loss.numpy(),
                                      nll_x=nll_x.numpy(), nll_y=nll_y.numpy())
                 prog.update(1)
                 prog.set_postfix(utils.get_metrics(hist))
             lam.assign_sub(lam_decay)
     return hist
コード例 #3
0
 def evaluate(self, validation_data: tf.data.Dataset, validation_steps, **flow_kwargs):
     validation_data = validation_data.take(validation_steps)
     with tqdm(total=validation_steps, desc='eval') as prog:
         hist = dict()
         for x,y in validation_data:
             # train discriminators
             dx_loss, dy_loss = self.eval_discriminators_on_batch(x, y)
             # train generators
             nll_x, nll_y, gx_loss, gy_loss, gx_aux, gy_aux = self.eval_generators_on_batch(x, y)
             utils.update_metrics(hist,
                                  nll_x=nll_x.numpy(),
                                  nll_y=nll_y.numpy(),
                                  gx_loss=gx_loss.numpy(),
                                  gy_loss=gy_loss.numpy(),
                                  dx_loss=dx_loss.numpy(),
                                  dy_loss=dy_loss.numpy(),
                                  gx_aux=gx_aux.numpy(),
                                  gy_aux=gy_aux.numpy())
             prog.update(1)
             prog.set_postfix(utils.get_metrics(hist))
     return hist
コード例 #4
0
def fit_bmd_maxt(fold, i, epochs, lr, batch_size, buffer_size, validate_freq):
    mlflow.log_param('fold', i + 1)
    indices = tdex.indices('Time', convert_units_fn=lambda x: x + 273.15)
    data_fold = maxt.preprocess_fold_maxt(fold)
    train_lo, train_hi = data_fold.train
    test_lo, test_hi = data_fold.test
    N_train, N_test = train_lo.Time.size, test_lo.Time.size
    (ht_lr, wt_lr), (ht_hi, wt_hi) = train_lo.shape[1:3], train_hi.shape[1:3]
    monthly_means_lo, monthly_means_hi = data_fold.monthly_means
    train_ds = data_fold.train_dataset(batch_size=batch_size,
                                       buffer_size=buffer_size,
                                       mode='supervised')
    test_ds = data_fold.test_dataset(batch_size=batch_size,
                                     buffer_size=N_test,
                                     mode='test')
    scale = wt_hi // wt_lr
    encoder = create_bmd_cnn10(ht_lr, wt_lr, scale=scale, c_out=2)
    model = VariationalModel(encoder,
                             normal(),
                             optimizer=tf.keras.optimizers.Adam(lr=lr),
                             output_shape=(None, ht_hi, wt_hi, 1))
    ckpt_dir = f'/tmp/bmd-final'
    os.makedirs(ckpt_dir)
    for j in range(0, epochs, validate_freq):
        hist = model.fit(train_ds,
                         epochs=validate_freq,
                         steps_per_epoch=N_train // batch_size,
                         validation_data=test_ds,
                         validation_steps=N_test // batch_size)
        hist = get_metrics(hist)
        mlflow.log_metrics(hist)
        j += validate_freq
        mlflow.log_metric('epoch', j)
        encoder.save(f'{ckpt_dir}/bmd-epoch{j}.h5')
        mlflow.log_artifact(f'{ckpt_dir}/bmd-epoch{j}.h5',
                            artifact_path=f'model/')
        x_true = []
        y_true = []
        y_mean = []
        y_samples = []
        for x, y in test_ds:
            x_true.append(x)
            y_true.append(y)
            y_mean.append(model.mean(x))
            y_samples.append(model.sample(x))
        x_true = tf.concat(x_true, axis=0)
        y_true = tf.concat(y_true, axis=0)
        y_mean = tf.concat(y_mean, axis=0)
        y_samples = tf.concat(y_samples, axis=0)
        fig = bmd_plot(x_true, y_true, y_mean, y_samples,
                       (test_lo.lat, test_lo.lon), (test_hi.lat, test_hi.lon))
        plt.savefig(f'/tmp/samples-epoch{j}.png')
        mlflow.log_artifact(f'/tmp/samples-epoch{j}.png', 'figures')
        metrics = maxt.eval_metrics(indices, y_true, y_mean, test_hi.coords,
                                    monthly_means_hi)
        np.savez(f'/tmp/metrics-epoch{j}.npz', **metrics)
        mlflow.log_artifact(f'/tmp/metrics-epoch{j}.npz', 'data')
        avg_metrics = {k: float(np.mean(v)) for k, v in metrics.items()}
        mlflow.log_metrics(avg_metrics)
        # create plots
        fig = maxt.plot_indices(metrics)
        plt.savefig(f'/tmp/indices-epoch{j}.png')
        mlflow.log_artifact(f'/tmp/indices-epoch{j}.png', 'figures')
        fig = maxt.plot_error_maps(metrics, test_hi.lat, test_hi.lon)
        plt.savefig(f'/tmp/error-maps-epoch{j}.png')
        mlflow.log_artifact(f'/tmp/error-maps-epoch{j}.png', 'figures')
    shutil.rmtree(ckpt_dir)