def view_fixed(identifier, epoch, n_samples=6, dim=None): """ What happens when we give the same point at each time step? """ settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r')) Z_samples = model.sample_Z(n_samples, settings['seq_length'], settings['latent_dim'], settings['use_time']) # now, propagate forward the value at time 0 (which time doesn't matter) for i in range(1, settings['seq_length']): if dim is None: Z_samples[:, i, :] = Z_samples[:, 0, :] else: Z_samples[:, i, dim] = Z_samples[:, 0, dim] # now generate samples = model.sample_trained_model(settings, epoch, n_samples, Z_samples) # now visualise plotting.save_plot_sample(samples, epoch, identifier + '_fixed', n_samples) return True
gen_opt.step() gen_loss_meter.add(gen_loss.item()) ## Visualization code ## if cur_step % display_step == 0 and cur_step > 0: print( f"Step {cur_step}: Generator loss: {gen_loss_meter.value()[0]}, " f"discriminator loss: {crit_loss_meter.value()[0]}") gen_loss_history.append(gen_loss_meter.value()[0]) crit_loss_history.append(crit_loss_meter.value()[0]) # Reset average meters gen_loss_meter.reset() crit_loss_meter.reset() save_plot_sample(fake, f"Fakes at Step {cur_step}", plot_dir, f"fake_step_{cur_step}", n_samples=n_plot_samples, ncol=3) save_plot_sample(real, f"Reals at Step {cur_step}", plot_dir, f"real_step_{cur_step}", n_samples=n_plot_samples, ncol=3) cur_step += 1 if epoch % epochs_per_save == 0: save_ckpt(epoch, gen, 'generator', gen_opt, ckpt_dir, device) save_ckpt(epoch, crit, 'critic', crit_opt, ckpt_dir, device) time_elapsed = time.time() - start
def get_reconstruction_errors(identifier, epoch, g_tolerance=0.05, max_samples=1000, rerun=False, tstr=False): """ Get the reconstruction error of every point in the training set of a given experiment. """ settings = json.load( open('./experiments/settings/' + identifier + '.txt', 'r')) if settings['data_load_from']: data_dict = np.load('./experiments/data/' + settings['data_load_from'] + '.data.npy').item() else: data_dict = np.load('./experiments/data/' + identifier + '.data.npy').item() samples = data_dict['samples'] train = samples['train'] vali = samples['vali'] test = samples['test'] labels = data_dict['labels'] train_labels, test_labels, synth_labels, vali_labels = None, None, None, None try: if rerun: raise FileNotFoundError errors = np.load('./experiments/eval/' + identifier + '_' + str(epoch) + '_' + str(g_tolerance) + '.reconstruction_errors.npy').item() train_errors = errors['train'] test_errors = errors['test'] generated_errors = errors['generated'] noisy_errors = errors['noisy'] print('Loaded precomputed errors') except FileNotFoundError: if tstr: synth_data = np.load('./experiments/tstr/' + identifier + '_' + str(epoch) + '.data.npy').item() generated = synth_data['samples'] synth_labels = synth_data['labels'] train_labels = labels['train'] test_labels = labels['test'] vali_labels = labels['vali'] else: # generate new data n_eval = 500 # generate "easy" samples from the distribution generated = model.sample_trained_model(settings, epoch, n_eval) # generate "hard' random samples, not from train/test distribution # TODO: use original validation examples, add noise etc. ## random_samples = np.random.normal(size=generated.shape) # random_samples -= np.mean(random_samples, axis=0) # random_samples += np.mean(vali, axis=0) # random_samples /= np.std(random_samples, axis=0) # random_samples *= np.std(vali, axis=0) # get all the errors print('Getting reconstruction errors on train set') if train.shape[0] > max_samples: index_subset = np.random.permutation(train.shape[0])[:max_samples] train = train[index_subset] if train_labels is not None: train_labels = train_labels[index_subset] train_errors = error_per_sample(identifier, epoch, train, n_rep=5, g_tolerance=g_tolerance, C_samples=train_labels) print('Getting reconstruction errors on test set') if test.shape[0] > max_samples: index_subset = np.random.permutation(test.shape[0])[:max_samples] test = test[index_subset] if test_labels is not None: test_labels = test_labels[index_subset] test_errors = error_per_sample(identifier, epoch, test, n_rep=5, g_tolerance=g_tolerance, C_samples=test_labels) D_test, p_test = ks_2samp(train_errors, test_errors) print('KS statistic and p-value for train v. test erors:', D_test, p_test) pdb.set_trace() print('Getting reconstruction errors on generated set') generated_errors = error_per_sample(identifier, epoch, generated, n_rep=5, g_tolerance=g_tolerance, C_samples=synth_labels) D_gen, p_gen = ks_2samp(generated_errors, train_errors) print('KS statistic and p-value for train v. gen erors:', D_gen, p_gen) D_gentest, p_gentest = ks_2samp(generated_errors, test_errors) print('KS statistic and p-value for gen v. test erors:', D_gentest, p_gentest) # print('Getting reconstruction errors on noisy set') # alpha = 0.5 # noisy_samples = alpha*vali + (1-alpha)*np.random.permutation(vali) # noisy_errors = error_per_sample(identifier, epoch, noisy_samples, n_rep=5, g_tolerance=g_tolerance, C_samples=vali_labels) noisy_errors = None # save! errors = { 'train': train_errors, 'test': test_errors, 'generated': generated_errors, 'noisy': noisy_errors } np.save( './experiments/eval/' + identifier + '_' + str(epoch) + '_' + str(g_tolerance) + '.reconstruction_errors.npy', errors) # do two-sample Kolomogorov-Smirnov test for equality D_test, p_test = ks_2samp(train_errors, test_errors) print('KS statistic and p-value for train v. test erors:', D_test, p_test) D_gen, p_gen = ks_2samp(generated_errors, train_errors) print('KS statistic and p-value for train v. gen erors:', D_gen, p_gen) D_gentest, p_gentest = ks_2samp(generated_errors, test_errors) print('KS statistic and p-value for gen v. test erors:', D_gentest, p_gentest) # visualise distribution of errors for train and test plotting.reconstruction_errors( identifier + '_' + str(epoch) + '_' + str(g_tolerance), train_errors, test_errors, generated_errors, noisy_errors) # visualise the "hardest" and "easiest" samples from train ranking_train = np.argsort(train_errors) easiest_train = ranking_train[:6] hardest_train = ranking_train[-6:] plotting.save_plot_sample(train[easiest_train], epoch, identifier + '_easytrain', n_samples=6, num_epochs=None, ncol=2) plotting.save_plot_sample(train[hardest_train], epoch, identifier + '_hardtrain', n_samples=6, num_epochs=None, ncol=2) # visualise the "hardest" and "easiest" samples from random # ranking_random = np.argsort(noisy_errors) # easiest_random = ranking_random[:6] # hardest_random = ranking_random[-6:] # plotting.save_plot_sample(random_samples[easiest_random], epoch, identifier + '_easyrandom', n_samples=6, num_epochs=None, ncol=2) # plotting.save_plot_sample(random_samples[hardest_random], epoch, identifier + '_hardrandom', n_samples=6, num_epochs=None, ncol=2) return True
labels=labs) else: plotting.save_mnist_plot_sample(samps, 0, identifier + '_real', n_samples=6, labels=labs) elif 'eICU' in data: plotting.vis_eICU_patients_downsampled(vis_real, resample_rate_in_min, identifier=identifier + '_real', idx=0) else: plotting.save_plot_sample(vis_real, 0, identifier + '_real', n_samples=6, num_epochs=num_epochs) trace = open('./experiments/traces/' + identifier + '.trace.txt', 'w') trace.write('epoch time D_loss G_loss mmd2 that ll real_ll\n') # --- train --- # train_vars = [ 'batch_size', 'D_rounds', 'G_rounds', 'use_time', 'seq_length', 'latent_dim', 'num_generated_features', 'cond_dim', 'max_val', 'WGAN_clip', 'one_hot' ] train_settings = dict((k, settings[k]) for k in train_vars) t0 = time()
# sigma_opt_thresh = 0.001 # sigma_opt_vars = [var for var in tf.global_variables() if 'SIGMA_optimizer' in var.name] # --- run the program --- # config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) # sess = tf.Session() sess.run(tf.global_variables_initializer()) # # -- plot the real samples -- # vis_real_indices = np.random.choice(len(samples), size=16) vis_real = np.float32(samples[vis_real_indices, :, :]) plotting.save_plot_sample(vis_real, 0, identifier + '_real', n_samples=16, num_epochs=num_epochs) #随机选择了16个样本 plotting.save_samples_real(vis_real, identifier) # --- train --- # train_vars = [ 'batch_size', 'D_rounds', 'G_rounds', 'use_time', 'seq_length', 'latent_dim' ] train_settings = dict((k, settings[k]) for k in train_vars) train_settings['num_signals'] = num_variables t0 = time() MMD = np.zeros([ num_epochs,