Beispiel #1
0
def view_fixed(identifier, epoch, n_samples=6, dim=None):
    """ What happens when we give the same point at each time step? """
    settings = json.load(open('./experiments/settings/' + identifier + '.txt', 'r'))
    Z_samples = model.sample_Z(n_samples, settings['seq_length'], settings['latent_dim'], 
                                      settings['use_time'])
    # now, propagate forward the value at time 0 (which time doesn't matter)
    for i in range(1, settings['seq_length']):
        if dim is None:
            Z_samples[:, i, :] = Z_samples[:, 0, :]
        else:
            Z_samples[:, i, dim] = Z_samples[:, 0, dim]
    # now generate
    samples = model.sample_trained_model(settings, epoch, n_samples, Z_samples)
    # now visualise
    plotting.save_plot_sample(samples, epoch, identifier + '_fixed', n_samples)
    return True
Beispiel #2
0
        gen_opt.step()
        gen_loss_meter.add(gen_loss.item())

        ## Visualization code ##
        if cur_step % display_step == 0 and cur_step > 0:
            print(
                f"Step {cur_step}: Generator loss: {gen_loss_meter.value()[0]}, "
                f"discriminator loss: {crit_loss_meter.value()[0]}")
            gen_loss_history.append(gen_loss_meter.value()[0])
            crit_loss_history.append(crit_loss_meter.value()[0])
            # Reset average meters
            gen_loss_meter.reset()
            crit_loss_meter.reset()
            save_plot_sample(fake,
                             f"Fakes at Step {cur_step}",
                             plot_dir,
                             f"fake_step_{cur_step}",
                             n_samples=n_plot_samples,
                             ncol=3)
            save_plot_sample(real,
                             f"Reals at Step {cur_step}",
                             plot_dir,
                             f"real_step_{cur_step}",
                             n_samples=n_plot_samples,
                             ncol=3)

        cur_step += 1

    if epoch % epochs_per_save == 0:
        save_ckpt(epoch, gen, 'generator', gen_opt, ckpt_dir, device)
        save_ckpt(epoch, crit, 'critic', crit_opt, ckpt_dir, device)
        time_elapsed = time.time() - start
Beispiel #3
0
def get_reconstruction_errors(identifier,
                              epoch,
                              g_tolerance=0.05,
                              max_samples=1000,
                              rerun=False,
                              tstr=False):
    """
    Get the reconstruction error of every point in the training set of a given
    experiment.
    """
    settings = json.load(
        open('./experiments/settings/' + identifier + '.txt', 'r'))
    if settings['data_load_from']:
        data_dict = np.load('./experiments/data/' +
                            settings['data_load_from'] + '.data.npy').item()
    else:
        data_dict = np.load('./experiments/data/' + identifier +
                            '.data.npy').item()
    samples = data_dict['samples']
    train = samples['train']
    vali = samples['vali']
    test = samples['test']
    labels = data_dict['labels']
    train_labels, test_labels, synth_labels, vali_labels = None, None, None, None
    try:
        if rerun:
            raise FileNotFoundError
        errors = np.load('./experiments/eval/' + identifier + '_' +
                         str(epoch) + '_' + str(g_tolerance) +
                         '.reconstruction_errors.npy').item()
        train_errors = errors['train']
        test_errors = errors['test']
        generated_errors = errors['generated']
        noisy_errors = errors['noisy']
        print('Loaded precomputed errors')
    except FileNotFoundError:
        if tstr:
            synth_data = np.load('./experiments/tstr/' + identifier + '_' +
                                 str(epoch) + '.data.npy').item()
            generated = synth_data['samples']
            synth_labels = synth_data['labels']
            train_labels = labels['train']
            test_labels = labels['test']
            vali_labels = labels['vali']
        else:
            # generate new data
            n_eval = 500
            # generate "easy" samples from the distribution
            generated = model.sample_trained_model(settings, epoch, n_eval)
            # generate "hard' random samples, not from train/test distribution
            # TODO: use original validation examples, add noise etc.
        ##    random_samples = np.random.normal(size=generated.shape)
        #    random_samples -= np.mean(random_samples, axis=0)
        #    random_samples += np.mean(vali, axis=0)
        #    random_samples /= np.std(random_samples, axis=0)
        #    random_samples *= np.std(vali, axis=0)

        # get all the errors
        print('Getting reconstruction errors on train set')
        if train.shape[0] > max_samples:
            index_subset = np.random.permutation(train.shape[0])[:max_samples]
            train = train[index_subset]
            if train_labels is not None:
                train_labels = train_labels[index_subset]
        train_errors = error_per_sample(identifier,
                                        epoch,
                                        train,
                                        n_rep=5,
                                        g_tolerance=g_tolerance,
                                        C_samples=train_labels)
        print('Getting reconstruction errors on test set')
        if test.shape[0] > max_samples:
            index_subset = np.random.permutation(test.shape[0])[:max_samples]
            test = test[index_subset]
            if test_labels is not None:
                test_labels = test_labels[index_subset]
        test_errors = error_per_sample(identifier,
                                       epoch,
                                       test,
                                       n_rep=5,
                                       g_tolerance=g_tolerance,
                                       C_samples=test_labels)
        D_test, p_test = ks_2samp(train_errors, test_errors)
        print('KS statistic and p-value for train v. test erors:', D_test,
              p_test)
        pdb.set_trace()
        print('Getting reconstruction errors on generated set')
        generated_errors = error_per_sample(identifier,
                                            epoch,
                                            generated,
                                            n_rep=5,
                                            g_tolerance=g_tolerance,
                                            C_samples=synth_labels)
        D_gen, p_gen = ks_2samp(generated_errors, train_errors)
        print('KS statistic and p-value for train v. gen erors:', D_gen, p_gen)
        D_gentest, p_gentest = ks_2samp(generated_errors, test_errors)
        print('KS statistic and p-value for gen v. test erors:', D_gentest,
              p_gentest)
        #        print('Getting reconstruction errors on noisy set')
        #        alpha = 0.5
        #        noisy_samples = alpha*vali + (1-alpha)*np.random.permutation(vali)
        #        noisy_errors = error_per_sample(identifier, epoch, noisy_samples, n_rep=5, g_tolerance=g_tolerance, C_samples=vali_labels)
        noisy_errors = None
        # save!
        errors = {
            'train': train_errors,
            'test': test_errors,
            'generated': generated_errors,
            'noisy': noisy_errors
        }
        np.save(
            './experiments/eval/' + identifier + '_' + str(epoch) + '_' +
            str(g_tolerance) + '.reconstruction_errors.npy', errors)
    # do two-sample Kolomogorov-Smirnov test for equality
    D_test, p_test = ks_2samp(train_errors, test_errors)
    print('KS statistic and p-value for train v. test erors:', D_test, p_test)
    D_gen, p_gen = ks_2samp(generated_errors, train_errors)
    print('KS statistic and p-value for train v. gen erors:', D_gen, p_gen)
    D_gentest, p_gentest = ks_2samp(generated_errors, test_errors)
    print('KS statistic and p-value for gen v. test erors:', D_gentest,
          p_gentest)
    # visualise distribution of errors for train and test
    plotting.reconstruction_errors(
        identifier + '_' + str(epoch) + '_' + str(g_tolerance), train_errors,
        test_errors, generated_errors, noisy_errors)
    # visualise the "hardest" and "easiest" samples from train
    ranking_train = np.argsort(train_errors)
    easiest_train = ranking_train[:6]
    hardest_train = ranking_train[-6:]
    plotting.save_plot_sample(train[easiest_train],
                              epoch,
                              identifier + '_easytrain',
                              n_samples=6,
                              num_epochs=None,
                              ncol=2)
    plotting.save_plot_sample(train[hardest_train],
                              epoch,
                              identifier + '_hardtrain',
                              n_samples=6,
                              num_epochs=None,
                              ncol=2)
    # visualise the "hardest" and "easiest" samples from random
    #    ranking_random = np.argsort(noisy_errors)
    #    easiest_random = ranking_random[:6]
    #    hardest_random = ranking_random[-6:]
    #    plotting.save_plot_sample(random_samples[easiest_random], epoch, identifier + '_easyrandom', n_samples=6, num_epochs=None, ncol=2)
    #    plotting.save_plot_sample(random_samples[hardest_random], epoch, identifier + '_hardrandom', n_samples=6, num_epochs=None, ncol=2)
    return True
Beispiel #4
0
                                        labels=labs)
    else:
        plotting.save_mnist_plot_sample(samps,
                                        0,
                                        identifier + '_real',
                                        n_samples=6,
                                        labels=labs)
elif 'eICU' in data:
    plotting.vis_eICU_patients_downsampled(vis_real,
                                           resample_rate_in_min,
                                           identifier=identifier + '_real',
                                           idx=0)
else:
    plotting.save_plot_sample(vis_real,
                              0,
                              identifier + '_real',
                              n_samples=6,
                              num_epochs=num_epochs)

trace = open('./experiments/traces/' + identifier + '.trace.txt', 'w')
trace.write('epoch time D_loss G_loss mmd2 that ll real_ll\n')

# --- train --- #
train_vars = [
    'batch_size', 'D_rounds', 'G_rounds', 'use_time', 'seq_length',
    'latent_dim', 'num_generated_features', 'cond_dim', 'max_val', 'WGAN_clip',
    'one_hot'
]
train_settings = dict((k, settings[k]) for k in train_vars)

t0 = time()
Beispiel #5
0
# sigma_opt_thresh = 0.001
# sigma_opt_vars = [var for var in tf.global_variables() if 'SIGMA_optimizer' in var.name]

# --- run the program --- #
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
# sess = tf.Session()
sess.run(tf.global_variables_initializer())

# # -- plot the real samples -- #
vis_real_indices = np.random.choice(len(samples), size=16)
vis_real = np.float32(samples[vis_real_indices, :, :])
plotting.save_plot_sample(vis_real,
                          0,
                          identifier + '_real',
                          n_samples=16,
                          num_epochs=num_epochs)  #随机选择了16个样本
plotting.save_samples_real(vis_real, identifier)

# --- train --- #
train_vars = [
    'batch_size', 'D_rounds', 'G_rounds', 'use_time', 'seq_length',
    'latent_dim'
]
train_settings = dict((k, settings[k]) for k in train_vars)
train_settings['num_signals'] = num_variables

t0 = time()
MMD = np.zeros([
    num_epochs,