def fit(self, data: tf.data.Dataset, epochs=1, steps_per_epoch=1, validation_data=None, validation_steps=1, **flow_kwargs): data.repeat(epochs) if validation_data is not None: validation_data = validation_data.repeat(epochs) test_hist = dict() for epoch in range(epochs): train_hist = dict() with tqdm(total=steps_per_epoch, desc=f'train, epoch {epoch+1}/{epochs}') as prog: for i, (x, y) in enumerate(data.take(steps_per_epoch)): loss, nll = self.train_batch(x, y, **flow_kwargs) utils.update_metrics(train_hist, loss=loss.numpy(), nll=nll.numpy()) prog.update(1) prog.set_postfix(utils.get_metrics(train_hist)) with tqdm(total=validation_steps, desc=f'test, epoch {epoch+1}/{epochs}') as prog: if validation_data is None: continue for i, (x, y) in enumerate(validation_data.take(validation_steps)): nll = self.eval_batch(x, y, **flow_kwargs) utils.update_metrics(test_hist, nll=nll.numpy()) prog.update(1) prog.set_postfix(utils.get_metrics(test_hist)) return test_hist
def train(self, train_data: tf.data.Dataset, steps_per_epoch, num_epochs=1, lam=1.0, lam_decay=0.0, alpha=0.0, **flow_kwargs): train_gen_data = train_data.take(steps_per_epoch).repeat(num_epochs) with tqdm(total=steps_per_epoch*num_epochs, desc='train') as prog: hist = dict() lam = tf.Variable(lam, dtype=tf.float32) for epoch in range(num_epochs): for x,y in train_gen_data.take(steps_per_epoch): # train discriminators dx_loss, dy_loss = self.train_discriminators_on_batch(x, y) # train generators g_obj, nll_x, nll_y, gx_loss, gy_loss, gx_aux, gy_aux = self.train_generators_on_batch(x, y, alpha=alpha, lam=utils.var(lam)) utils.update_metrics(hist, g_obj=g_obj.numpy(), gx_loss=gx_loss.numpy(), gy_loss=dy_loss.numpy(), nll_x=nll_x.numpy(), nll_y=nll_y.numpy()) prog.update(1) prog.set_postfix(utils.get_metrics(hist)) lam.assign_sub(lam_decay) return hist
def evaluate(self, validation_data: tf.data.Dataset, validation_steps, **flow_kwargs): validation_data = validation_data.take(validation_steps) with tqdm(total=validation_steps, desc='eval') as prog: hist = dict() for x,y in validation_data: # train discriminators dx_loss, dy_loss = self.eval_discriminators_on_batch(x, y) # train generators nll_x, nll_y, gx_loss, gy_loss, gx_aux, gy_aux = self.eval_generators_on_batch(x, y) utils.update_metrics(hist, nll_x=nll_x.numpy(), nll_y=nll_y.numpy(), gx_loss=gx_loss.numpy(), gy_loss=gy_loss.numpy(), dx_loss=dx_loss.numpy(), dy_loss=dy_loss.numpy(), gx_aux=gx_aux.numpy(), gy_aux=gy_aux.numpy()) prog.update(1) prog.set_postfix(utils.get_metrics(hist)) return hist
def fit_bmd_maxt(fold, i, epochs, lr, batch_size, buffer_size, validate_freq): mlflow.log_param('fold', i + 1) indices = tdex.indices('Time', convert_units_fn=lambda x: x + 273.15) data_fold = maxt.preprocess_fold_maxt(fold) train_lo, train_hi = data_fold.train test_lo, test_hi = data_fold.test N_train, N_test = train_lo.Time.size, test_lo.Time.size (ht_lr, wt_lr), (ht_hi, wt_hi) = train_lo.shape[1:3], train_hi.shape[1:3] monthly_means_lo, monthly_means_hi = data_fold.monthly_means train_ds = data_fold.train_dataset(batch_size=batch_size, buffer_size=buffer_size, mode='supervised') test_ds = data_fold.test_dataset(batch_size=batch_size, buffer_size=N_test, mode='test') scale = wt_hi // wt_lr encoder = create_bmd_cnn10(ht_lr, wt_lr, scale=scale, c_out=2) model = VariationalModel(encoder, normal(), optimizer=tf.keras.optimizers.Adam(lr=lr), output_shape=(None, ht_hi, wt_hi, 1)) ckpt_dir = f'/tmp/bmd-final' os.makedirs(ckpt_dir) for j in range(0, epochs, validate_freq): hist = model.fit(train_ds, epochs=validate_freq, steps_per_epoch=N_train // batch_size, validation_data=test_ds, validation_steps=N_test // batch_size) hist = get_metrics(hist) mlflow.log_metrics(hist) j += validate_freq mlflow.log_metric('epoch', j) encoder.save(f'{ckpt_dir}/bmd-epoch{j}.h5') mlflow.log_artifact(f'{ckpt_dir}/bmd-epoch{j}.h5', artifact_path=f'model/') x_true = [] y_true = [] y_mean = [] y_samples = [] for x, y in test_ds: x_true.append(x) y_true.append(y) y_mean.append(model.mean(x)) y_samples.append(model.sample(x)) x_true = tf.concat(x_true, axis=0) y_true = tf.concat(y_true, axis=0) y_mean = tf.concat(y_mean, axis=0) y_samples = tf.concat(y_samples, axis=0) fig = bmd_plot(x_true, y_true, y_mean, y_samples, (test_lo.lat, test_lo.lon), (test_hi.lat, test_hi.lon)) plt.savefig(f'/tmp/samples-epoch{j}.png') mlflow.log_artifact(f'/tmp/samples-epoch{j}.png', 'figures') metrics = maxt.eval_metrics(indices, y_true, y_mean, test_hi.coords, monthly_means_hi) np.savez(f'/tmp/metrics-epoch{j}.npz', **metrics) mlflow.log_artifact(f'/tmp/metrics-epoch{j}.npz', 'data') avg_metrics = {k: float(np.mean(v)) for k, v in metrics.items()} mlflow.log_metrics(avg_metrics) # create plots fig = maxt.plot_indices(metrics) plt.savefig(f'/tmp/indices-epoch{j}.png') mlflow.log_artifact(f'/tmp/indices-epoch{j}.png', 'figures') fig = maxt.plot_error_maps(metrics, test_hi.lat, test_hi.lon) plt.savefig(f'/tmp/error-maps-epoch{j}.png') mlflow.log_artifact(f'/tmp/error-maps-epoch{j}.png', 'figures') shutil.rmtree(ckpt_dir)