def main(args: Args): train_image = Path(args.train_image) train_label = Path(args.train_label) test_image = Path(args.test_image) test_label = Path(args.test_label) data = Mnist(32, 0.9, train_image, train_label, test_image, test_label) model = MnistEncoder(28, 64, 3) # https://pytorch-lightning.readthedocs.io/en/latest/api/pytorch_lightning.loggers.mlflow.html mlflow_logger = MLFlowLogger() trainer = pl.Trainer(max_epochs=1, logger=mlflow_logger) trainer.fit(model, train_dataloader=data)
def dataset_iterator(args): if args.dataset == 'mnist': train_gen, dev_gen, test_gen = Mnist.load(args.batch_size, args.batch_size) if args.dataset == 'cifar10': data_dir = '../../../images/cifar-10-batches-py/' train_gen, dev_gen = Cifar10.load(args.batch_size, data_dir) test_gen = None if args.dataset == 'imagenet': data_dir = '../../../images/imagenet12/imagenet_val_png/' train_gen, dev_gen = Imagenet.load(args.batch_size, data_dir) test_gen = None if args.dataset == 'raise': data_dir = '../../../images/raise/' train_gen, dev_gen = Raise.load(args.batch_size, data_dir) test_gen = None else: raise ValueError return (train_gen, dev_gen, test_gen)
def main(): print("Local rank: ", hvd.local_rank(), hvd.size()) logdir = osp.join(FLAGS.logdir, FLAGS.exp) if hvd.rank() == 0: if not osp.exists(logdir): os.makedirs(logdir) logger = TensorBoardOutputFormat(logdir) else: logger = None LABEL = None print("Loading data...") if FLAGS.dataset == 'cifar10': dataset = Cifar10(augment=FLAGS.augment, rescale=FLAGS.rescale) test_dataset = Cifar10(train=False, rescale=FLAGS.rescale) channel_num = 3 X_NOISE = tf.placeholder(shape=(None, 32, 32, 3), dtype=tf.float32) X = tf.placeholder(shape=(None, 32, 32, 3), dtype=tf.float32) LABEL = tf.placeholder(shape=(None, 10), dtype=tf.float32) LABEL_POS = tf.placeholder(shape=(None, 10), dtype=tf.float32) if FLAGS.large_model: model = ResNet32Large(num_channels=channel_num, num_filters=128, train=True) elif FLAGS.larger_model: model = ResNet32Larger(num_channels=channel_num, num_filters=128) elif FLAGS.wider_model: model = ResNet32Wider(num_channels=channel_num, num_filters=192) else: model = ResNet32(num_channels=channel_num, num_filters=128) elif FLAGS.dataset == 'imagenet': dataset = Imagenet(train=True) test_dataset = Imagenet(train=False) channel_num = 3 X_NOISE = tf.placeholder(shape=(None, 32, 32, 3), dtype=tf.float32) X = tf.placeholder(shape=(None, 32, 32, 3), dtype=tf.float32) LABEL = tf.placeholder(shape=(None, 1000), dtype=tf.float32) LABEL_POS = tf.placeholder(shape=(None, 1000), dtype=tf.float32) model = ResNet32Wider(num_channels=channel_num, num_filters=256) elif FLAGS.dataset == 'imagenetfull': channel_num = 3 X_NOISE = tf.placeholder(shape=(None, 128, 128, 3), dtype=tf.float32) X = tf.placeholder(shape=(None, 128, 128, 3), dtype=tf.float32) LABEL = tf.placeholder(shape=(None, 1000), dtype=tf.float32) LABEL_POS = tf.placeholder(shape=(None, 1000), dtype=tf.float32) model = ResNet128(num_channels=channel_num, num_filters=64) elif FLAGS.dataset == 'mnist': dataset = Mnist(rescale=FLAGS.rescale) test_dataset = dataset channel_num = 1 X_NOISE = tf.placeholder(shape=(None, 28, 28), dtype=tf.float32) X = tf.placeholder(shape=(None, 28, 28), dtype=tf.float32) LABEL = tf.placeholder(shape=(None, 10), dtype=tf.float32) LABEL_POS = tf.placeholder(shape=(None, 10), dtype=tf.float32) model = MnistNet(num_channels=channel_num, num_filters=FLAGS.num_filters) elif FLAGS.dataset == 'dsprites': dataset = DSprites(cond_shape=FLAGS.cond_shape, cond_size=FLAGS.cond_size, cond_pos=FLAGS.cond_pos, cond_rot=FLAGS.cond_rot) test_dataset = dataset channel_num = 1 X_NOISE = tf.placeholder(shape=(None, 64, 64), dtype=tf.float32) X = tf.placeholder(shape=(None, 64, 64), dtype=tf.float32) if FLAGS.dpos_only: LABEL = tf.placeholder(shape=(None, 2), dtype=tf.float32) LABEL_POS = tf.placeholder(shape=(None, 2), dtype=tf.float32) elif FLAGS.dsize_only: LABEL = tf.placeholder(shape=(None, 1), dtype=tf.float32) LABEL_POS = tf.placeholder(shape=(None, 1), dtype=tf.float32) elif FLAGS.drot_only: LABEL = tf.placeholder(shape=(None, 2), dtype=tf.float32) LABEL_POS = tf.placeholder(shape=(None, 2), dtype=tf.float32) elif FLAGS.cond_size: LABEL = tf.placeholder(shape=(None, 1), dtype=tf.float32) LABEL_POS = tf.placeholder(shape=(None, 1), dtype=tf.float32) elif FLAGS.cond_shape: LABEL = tf.placeholder(shape=(None, 3), dtype=tf.float32) LABEL_POS = tf.placeholder(shape=(None, 3), dtype=tf.float32) elif FLAGS.cond_pos: LABEL = tf.placeholder(shape=(None, 2), dtype=tf.float32) LABEL_POS = tf.placeholder(shape=(None, 2), dtype=tf.float32) elif FLAGS.cond_rot: LABEL = tf.placeholder(shape=(None, 2), dtype=tf.float32) LABEL_POS = tf.placeholder(shape=(None, 2), dtype=tf.float32) else: LABEL = tf.placeholder(shape=(None, 3), dtype=tf.float32) LABEL_POS = tf.placeholder(shape=(None, 3), dtype=tf.float32) model = DspritesNet(num_channels=channel_num, num_filters=FLAGS.num_filters, cond_size=FLAGS.cond_size, cond_shape=FLAGS.cond_shape, cond_pos=FLAGS.cond_pos, cond_rot=FLAGS.cond_rot) print("Done loading...") if FLAGS.dataset == "imagenetfull": # In the case of full imagenet, use custom_tensorflow dataloader data_loader = TFImagenetLoader('train', FLAGS.batch_size, hvd.rank(), hvd.size(), rescale=FLAGS.rescale) else: data_loader = DataLoader(dataset, batch_size=FLAGS.batch_size, num_workers=FLAGS.data_workers, drop_last=True, shuffle=True) batch_size = FLAGS.batch_size weights = [model.construct_weights('context_0')] Y = tf.placeholder(shape=(None), dtype=tf.int32) # Varibles to run in training X_SPLIT = tf.split(X, FLAGS.num_gpus) X_NOISE_SPLIT = tf.split(X_NOISE, FLAGS.num_gpus) LABEL_SPLIT = tf.split(LABEL, FLAGS.num_gpus) LABEL_POS_SPLIT = tf.split(LABEL_POS, FLAGS.num_gpus) LABEL_SPLIT_INIT = list(LABEL_SPLIT) tower_grads = [] tower_gen_grads = [] x_mod_list = [] optimizer = AdamOptimizer(FLAGS.lr, beta1=0.0, beta2=0.999) optimizer = hvd.DistributedOptimizer(optimizer) for j in range(FLAGS.num_gpus): if FLAGS.model_cclass: ind_batch_size = FLAGS.batch_size // FLAGS.num_gpus label_tensor = tf.Variable(tf.convert_to_tensor(np.reshape( np.tile(np.eye(10), (FLAGS.batch_size, 1, 1)), (FLAGS.batch_size * 10, 10)), dtype=tf.float32), trainable=False, dtype=tf.float32) x_split = tf.tile( tf.reshape(X_SPLIT[j], (ind_batch_size, 1, 32, 32, 3)), (1, 10, 1, 1, 1)) x_split = tf.reshape(x_split, (ind_batch_size * 10, 32, 32, 3)) energy_pos = model.forward(x_split, weights[0], label=label_tensor, stop_at_grad=False) energy_pos_full = tf.reshape(energy_pos, (ind_batch_size, 10)) energy_partition_est = tf.reduce_logsumexp(energy_pos_full, axis=1, keepdims=True) uniform = tf.random_uniform(tf.shape(energy_pos_full)) label_tensor = tf.argmax(-energy_pos_full - tf.log(-tf.log(uniform)) - energy_partition_est, axis=1) label = tf.one_hot(label_tensor, 10, dtype=tf.float32) label = tf.Print(label, [label_tensor, energy_pos_full]) LABEL_SPLIT[j] = label energy_pos = tf.concat(energy_pos, axis=0) else: energy_pos = [ model.forward(X_SPLIT[j], weights[0], label=LABEL_POS_SPLIT[j], stop_at_grad=False) ] energy_pos = tf.concat(energy_pos, axis=0) print("Building graph...") x_mod = x_orig = X_NOISE_SPLIT[j] x_grads = [] energy_negs = [] loss_energys = [] energy_negs.extend([ model.forward(tf.stop_gradient(x_mod), weights[0], label=LABEL_SPLIT[j], stop_at_grad=False, reuse=True) ]) eps_begin = tf.zeros(1) steps = tf.constant(0) c = lambda i, x: tf.less(i, FLAGS.num_steps) def langevin_step(counter, x_mod): x_mod = x_mod + tf.random_normal( tf.shape(x_mod), mean=0.0, stddev=0.005 * FLAGS.rescale * FLAGS.noise_scale) energy_noise = energy_start = tf.concat([ model.forward(x_mod, weights[0], label=LABEL_SPLIT[j], reuse=True, stop_at_grad=False, stop_batch=True) ], axis=0) x_grad, label_grad = tf.gradients(FLAGS.temperature * energy_noise, [x_mod, LABEL_SPLIT[j]]) energy_noise_old = energy_noise lr = FLAGS.step_lr if FLAGS.proj_norm != 0.0: if FLAGS.proj_norm_type == 'l2': x_grad = tf.clip_by_norm(x_grad, FLAGS.proj_norm) elif FLAGS.proj_norm_type == 'li': x_grad = tf.clip_by_value(x_grad, -FLAGS.proj_norm, FLAGS.proj_norm) else: print("Other types of projection are not supported!!!") assert False # Clip gradient norm for now if FLAGS.hmc: # Step size should be tuned to get around 65% acceptance def energy(x): return FLAGS.temperature * \ model.forward(x, weights[0], label=LABEL_SPLIT[j], reuse=True) x_last = hmc(x_mod, 15., 10, energy) else: x_last = x_mod - (lr) * x_grad x_mod = x_last x_mod = tf.clip_by_value(x_mod, 0, FLAGS.rescale) counter = counter + 1 return counter, x_mod steps, x_mod = tf.while_loop(c, langevin_step, (steps, x_mod)) energy_eval = model.forward(x_mod, weights[0], label=LABEL_SPLIT[j], stop_at_grad=False, reuse=True) x_grad = tf.gradients(FLAGS.temperature * energy_eval, [x_mod])[0] x_grads.append(x_grad) energy_negs.append( model.forward(tf.stop_gradient(x_mod), weights[0], label=LABEL_SPLIT[j], stop_at_grad=False, reuse=True)) test_x_mod = x_mod temp = FLAGS.temperature energy_neg = energy_negs[-1] x_off = tf.reduce_mean( tf.abs(x_mod[:tf.shape(X_SPLIT[j])[0]] - X_SPLIT[j])) loss_energy = model.forward(x_mod, weights[0], reuse=True, label=LABEL, stop_grad=True) print("Finished processing loop construction ...") target_vars = {} if FLAGS.cclass or FLAGS.model_cclass: label_sum = tf.reduce_sum(LABEL_SPLIT[0], axis=0) label_prob = label_sum / tf.reduce_sum(label_sum) label_ent = -tf.reduce_sum( label_prob * tf.math.log(label_prob + 1e-7)) else: label_ent = tf.zeros(1) target_vars['label_ent'] = label_ent if FLAGS.train: if FLAGS.objective == 'logsumexp': pos_term = temp * energy_pos energy_neg_reduced = (energy_neg - tf.reduce_min(energy_neg)) coeff = tf.stop_gradient(tf.exp(-temp * energy_neg_reduced)) norm_constant = tf.stop_gradient(tf.reduce_sum(coeff)) + 1e-4 pos_loss = tf.reduce_mean(temp * energy_pos) neg_loss = coeff * (-1 * temp * energy_neg) / norm_constant loss_ml = FLAGS.ml_coeff * (pos_loss + tf.reduce_sum(neg_loss)) elif FLAGS.objective == 'cd': pos_loss = tf.reduce_mean(temp * energy_pos) neg_loss = -tf.reduce_mean(temp * energy_neg) loss_ml = FLAGS.ml_coeff * (pos_loss + tf.reduce_sum(neg_loss)) elif FLAGS.objective == 'softplus': loss_ml = FLAGS.ml_coeff * \ tf.nn.softplus(temp * (energy_pos - energy_neg)) loss_total = tf.reduce_mean(loss_ml) if not FLAGS.zero_kl: loss_total = loss_total + tf.reduce_mean(loss_energy) loss_total = loss_total + \ FLAGS.l2_coeff * (tf.reduce_mean(tf.square(energy_pos)) + tf.reduce_mean(tf.square((energy_neg)))) print("Started gradient computation...") gvs = optimizer.compute_gradients(loss_total) gvs = [(k, v) for (k, v) in gvs if k is not None] print("Applying gradients...") tower_grads.append(gvs) print("Finished applying gradients.") target_vars['loss_ml'] = loss_ml target_vars['total_loss'] = loss_total target_vars['loss_energy'] = loss_energy target_vars['weights'] = weights target_vars['gvs'] = gvs target_vars['X'] = X target_vars['Y'] = Y target_vars['LABEL'] = LABEL target_vars['LABEL_POS'] = LABEL_POS target_vars['X_NOISE'] = X_NOISE target_vars['energy_pos'] = energy_pos target_vars['energy_start'] = energy_negs[0] if len(x_grads) >= 1: target_vars['x_grad'] = x_grads[-1] target_vars['x_grad_first'] = x_grads[0] else: target_vars['x_grad'] = tf.zeros(1) target_vars['x_grad_first'] = tf.zeros(1) target_vars['x_mod'] = x_mod target_vars['x_off'] = x_off target_vars['temp'] = temp target_vars['energy_neg'] = energy_neg target_vars['test_x_mod'] = test_x_mod target_vars['eps_begin'] = eps_begin if FLAGS.train: grads = average_gradients(tower_grads) train_op = optimizer.apply_gradients(grads) target_vars['train_op'] = train_op config = tf.ConfigProto() if hvd.size() > 1: config.gpu_options.visible_device_list = str(hvd.local_rank()) sess = tf.Session(config=config) saver = loader = tf.train.Saver(max_to_keep=30, keep_checkpoint_every_n_hours=6) total_parameters = 0 for variable in tf.trainable_variables(): # shape is an array of tf.Dimension shape = variable.get_shape() variable_parameters = 1 for dim in shape: variable_parameters *= dim.value total_parameters += variable_parameters print("Model has a total of {} parameters".format(total_parameters)) sess.run(tf.global_variables_initializer()) resume_itr = 0 if (FLAGS.resume_iter != -1 or not FLAGS.train) and hvd.rank() == 0: model_file = osp.join(logdir, 'model_{}'.format(FLAGS.resume_iter)) resume_itr = FLAGS.resume_iter # saver.restore(sess, model_file) optimistic_restore(sess, model_file) sess.run(hvd.broadcast_global_variables(0)) print("Initializing variables...") print("Start broadcast") print("End broadcast") if FLAGS.train: print("Training phase") train(target_vars, saver, sess, logger, data_loader, resume_itr, logdir) print("Testing phase") test(target_vars, saver, sess, logger, data_loader)
def main(): # Initialize dataset if FLAGS.dataset == 'cifar10': dataset = Cifar10(train=False, rescale=FLAGS.rescale) channel_num = 3 dim_input = 32 * 32 * 3 elif FLAGS.dataset == 'imagenet': dataset = ImagenetClass() channel_num = 3 dim_input = 64 * 64 * 3 elif FLAGS.dataset == 'mnist': dataset = Mnist(train=False, rescale=FLAGS.rescale) channel_num = 1 dim_input = 28 * 28 * 1 elif FLAGS.dataset == 'dsprites': dataset = DSprites() channel_num = 1 dim_input = 64 * 64 * 1 elif FLAGS.dataset == '2d' or FLAGS.dataset == 'gauss': dataset = Box2D() dim_output = 1 data_loader = DataLoader(dataset, batch_size=FLAGS.batch_size, num_workers=FLAGS.data_workers, drop_last=False, shuffle=True) if FLAGS.dataset == 'mnist': model = MnistNet(num_channels=channel_num) elif FLAGS.dataset == 'cifar10': if FLAGS.large_model: model = ResNet32Large(num_filters=128) elif FLAGS.wider_model: model = ResNet32Wider(num_filters=192) else: model = ResNet32(num_channels=channel_num, num_filters=128) elif FLAGS.dataset == 'dsprites': model = DspritesNet(num_channels=channel_num, num_filters=FLAGS.num_filters) weights = model.construct_weights('context_{}'.format(0)) config = tf.ConfigProto() sess = tf.Session(config=config) saver = loader = tf.train.Saver(max_to_keep=10) sess.run(tf.global_variables_initializer()) logdir = osp.join(FLAGS.logdir, FLAGS.exp) model_file = osp.join(logdir, 'model_{}'.format(FLAGS.resume_iter)) resume_itr = FLAGS.resume_iter if FLAGS.resume_iter != "-1": optimistic_restore(sess, model_file) else: print("WARNING, YOU ARE NOT LOADING A SAVE FILE") # saver.restore(sess, model_file) chain_weights, a_prev, a_new, x, x_init, approx_lr = ancestral_sample( model, weights, FLAGS.batch_size, temp=FLAGS.temperature) print("Finished constructing ancestral sample ...................") if FLAGS.dataset != "gauss": comb_weights_cum = [] batch_size = tf.shape(x_init)[0] label_tiled = tf.tile(label_default, (batch_size, 1)) e_compute = -FLAGS.temperature * model.forward( x_init, weights, label=label_tiled) e_pos_list = [] for data_corrupt, data, label_gt in tqdm(data_loader): e_pos = sess.run([e_compute], {x_init: data})[0] e_pos_list.extend(list(e_pos)) print(len(e_pos_list)) print("Positive sample probability ", np.mean(e_pos_list), np.std(e_pos_list)) if FLAGS.dataset == "2d": alr = 0.0045 elif FLAGS.dataset == "gauss": alr = 0.0085 elif FLAGS.dataset == "mnist": alr = 0.0065 #90 alr = 0.0035 else: # alr = 0.0125 if FLAGS.rescale == 8: alr = 0.0085 else: alr = 0.0045 # for i in range(1): tot_weight = 0 for j in tqdm(range(1, FLAGS.pdist + 1)): if j == 1: if FLAGS.dataset == "cifar10": x_curr = np.random.uniform(0, FLAGS.rescale, size=(FLAGS.batch_size, 32, 32, 3)) elif FLAGS.dataset == "gauss": x_curr = np.random.uniform(0, FLAGS.rescale, size=(FLAGS.batch_size, FLAGS.gauss_dim)) elif FLAGS.dataset == "mnist": x_curr = np.random.uniform(0, FLAGS.rescale, size=(FLAGS.batch_size, 28, 28)) else: x_curr = np.random.uniform(0, FLAGS.rescale, size=(FLAGS.batch_size, 2)) alpha_prev = (j - 1) / FLAGS.pdist alpha_new = j / FLAGS.pdist cweight, x_curr = sess.run( [chain_weights, x], { a_prev: alpha_prev, a_new: alpha_new, x_init: x_curr, approx_lr: alr * (5**(2.5 * -alpha_prev)) }) tot_weight = tot_weight + cweight print("Total values of lower value based off forward sampling", np.mean(tot_weight), np.std(tot_weight)) tot_weight = 0 for j in tqdm(range(FLAGS.pdist, 0, -1)): alpha_new = (j - 1) / FLAGS.pdist alpha_prev = j / FLAGS.pdist cweight, x_curr = sess.run( [chain_weights, x], { a_prev: alpha_prev, a_new: alpha_new, x_init: x_curr, approx_lr: alr * (5**(2.5 * -alpha_prev)) }) tot_weight = tot_weight - cweight print("Total values of upper value based off backward sampling", np.mean(tot_weight), np.std(tot_weight))
def main(): log_file = make_file_name() print args def evaluate(x, y, xu, yu, eval_tensors, iw=1): if iw == 1: xs, ys, xus, yus = [x], [y], [xu], [yu] else: batches = 2000 xs, ys = list(tb.nputils.split(x, batches)), list( tb.nputils.split(y, batches)) xus, yus = list(tb.nputils.split(xu, batches)), list( tb.nputils.split(yu, batches)) values = [] for x, y, xu, yu in zip(xs, ys, xus, yus): feed_dict = { T.x: x, T.xu: xu, T.y: y, T.yu: yu, T.phase: 0, T.u: u, T.iw: iw } v = T.sess.run(eval_tensors, feed_dict) values += [v] values = [np.mean(v).astype(v[0].dtype) for v in zip(*values)] return values def train(T_train_step, T_loss, data, iterep, n_epochs): for i in xrange(iterep * n_epochs): x, y, xu, yu = data.next_batch(args.bs) feed_dict = { T.x: x, T.xu: xu, T.y: y, T.yu: yu, T.phase: 1, T.u: u, T.iw: 1 } _, loss = T.sess.run([T_train_step, T_loss], feed_dict) message = "loss: {:.2e}".format(loss) end_epoch, epoch = tb.utils.progbar(i, iterep, message, bar_length=5) if np.isnan(loss): print "NaN detected" quit() if end_epoch: iw = 100 if epoch % args.n_checks == 0 else 1 tr_values = evaluate(data.x_label, data.y_label, data.x_train, data.y_train, writer.tensors, iw=1) va_values = evaluate(data.x_valid, data.y_valid, data.x_valid, data.y_valid, writer.tensors[:-1], iw=iw) te_values = evaluate(data.x_test, data.y_test, data.x_test, data.y_test, writer.tensors[:-1], iw=iw) values = tr_values + va_values + te_values + [epoch] writer.write(values=values) def make_writer(): # Make log file writer = tb.FileWriter(log_file, args=args, pipe_to_sys=True, overwrite=args.run >= 999) # Train log writer.add_var('train_iw', '{:4d}', T.iw) for v in ['bcde', 'bjde_x', 'bjde_xy', 'bjde_xu', 'bjde_yu', 'loss']: writer.add_var('train_{:s}'.format(v), '{:8.3f}', T[v]) writer.add_var('l2_loss', '{:9.2e}', T.l2) # Validation log writer.add_var('valid_iw', '{:4d}') for v in ['bcde', 'bcde_x', 'bjde_xy', 'bjde_xu', 'bjde_yu', 'loss']: writer.add_var('valid_{:s}'.format(v), '{:8.3f}') # Test log writer.add_var('test_iw', '{:4d}') for v in ['bcde', 'bcde_x', 'bjde_xy', 'bjde_xu', 'bjde_yu', 'loss']: writer.add_var('test_{:s}'.format(v), '{:8.3f}') # Extra info writer.add_var('epoch', '{:>8d}') writer.initialize() return writer ############### # Build model # ############### tf.reset_default_graph() T = tb.utils.TensorDict( dict(bcde=constant(0), bjde_x=constant(0), bjde_xu=constant(0), bjde_yu=constant(0), bjde_xy=constant(0), l2=constant(0), loss=constant(0))) T.xu = placeholder((None, args.x_size), name='xu') T.yu = placeholder((None, args.y_size), name='yu') T.x = placeholder((None, args.x_size), name='x') T.y = placeholder((None, args.y_size), name='y') T.iw = placeholder(None, 'int32', name='iw') * 1 # hack for pholder eval T.u = placeholder(None, name='u') T.phase = placeholder(None, tf.bool, name='phase') if args.model == 'conditional': conditional(T) elif args.model in {'hybrid', 'hybrid_factored'}: hybrid(T) elif args.model == 'pretrained': pretrained(T) T.sess = tf.Session() T.sess.run(tf.global_variables_initializer()) # Push all labeled data into unlabeled data set as well if using pretraining mnist = Mnist(args.n_label, args.seed, args.task, shift=args.shift, duplicate='pretrain' in args.model, binarize=True) # Define remaining optimization hyperparameters if args.model == 'conditional': iterep = args.n_label / args.bs u = 1 elif args.model in {'hybrid', 'hybrid_factored'}: iterep = args.n_total / args.bs u = 1 - args.n_label / float(args.n_total) elif args.model == 'pretrained': pretrain_iterep = args.n_total / args.bs iterep = args.n_label / args.bs u = 1 # Sanity checks and creation of logger print "Data/Task statistics" print "Task:", args.task print "Data shapes of (x, y) for Labeled/Train/Valid/Test sets" print(mnist.x_label.shape, mnist.y_label.shape) print(mnist.x_train.shape, mnist.y_train.shape) print(mnist.x_valid.shape, mnist.y_valid.shape) print(mnist.x_test.shape, mnist.y_test.shape) writer = make_writer() ############### # Train model # ############### if 'pretrained' in args.model: print "Pretrain epochs, iterep", args.n_pretrain_epochs, pretrain_iterep train(T.pre_train_step, T.pre_loss, mnist, pretrain_iterep, args.n_pretrain_epochs) if 'hybrid' in args.model: print "Hybrid weighting on x_train and x_label:", (u, 1 - u) print "Epochs, Iterep", args.n_epochs, iterep train(T.train_step, T.loss, mnist, iterep, args.n_epochs)
phase = placeholder((), tf.bool, name='phase') )) exec "from {0:s} import {0:s}".format(args.model) exec "T = {:s}(T)".format(args.model) T.sess.run(tf.global_variables_initializer()) if args.model != 'classifier': path = tf.train.latest_checkpoint('save') restorer = tf.train.Saver(tf.get_collection('trainable_variables', 'enc')) restorer.restore(T.sess, path) ############# # Load data # ############# mnist = Mnist(size=32) svhn = Svhn(size=32) ######### # Train # ######### bs = 100 iterep = 600 n_epoch = 5000 if args.model != 'classifier' else 17 epoch = 0 feed_dict = {T.phase: 1} saver = tf.train.Saver() print "Batch size:", bs print "Iterep:", iterep print "Total iterations:", n_epoch * iterep
q_net['z'].mu = Sequential([Dense(50, input_dim=256)]) q_net['z'].var = Sequential([Dense(50, input_dim=256), Activation('softplus')]) p_net['x'].net = Sequential([Dense(256, input_dim=60), Activation('relu'), Dense(256), Activation('relu'), Dense(784), Activation('sigmoid')]) vae = VAE(u_net=u_net, q_net=q_net, p_net=p_net) vae.compile('adam', loss_weights=[1.0, 1.0, 1.0]) dataloader = Mnist(nb_data=100, batchsize=100) losslog = LossLog() nll = NegativeLogLikelihood(dataloader, n_samples=1, run_every=1, run_training=True, run_validation=True, display_epoch=True, end_line=True) vae.fit(dataloader, nb_epoch=1000, iter_per_epoch=600, callbacks=[losslog, nll], verbose=1)
def main_single(gpu, FLAGS): if FLAGS.slurm: init_distributed_mode(FLAGS) os.environ['MASTER_ADDR'] = FLAGS.master_addr os.environ['MASTER_PORT'] = FLAGS.port rank_idx = FLAGS.node_rank * FLAGS.gpus + gpu world_size = FLAGS.nodes * FLAGS.gpus print("Values of args: ", FLAGS) if world_size > 1: if FLAGS.slurm: dist.init_process_group(backend='nccl', init_method='env://', world_size=world_size, rank=rank_idx) else: dist.init_process_group(backend='nccl', init_method='tcp://localhost:1700', world_size=world_size, rank=rank_idx) if FLAGS.dataset == "cifar10": train_dataset = Cifar10(FLAGS) valid_dataset = Cifar10(FLAGS, train=False, augment=False) test_dataset = Cifar10(FLAGS, train=False, augment=False) elif FLAGS.dataset == "stl": train_dataset = STLDataset(FLAGS) valid_dataset = STLDataset(FLAGS, train=False) test_dataset = STLDataset(FLAGS, train=False) elif FLAGS.dataset == "object": train_dataset = ObjectDataset(FLAGS.cond_idx) valid_dataset = ObjectDataset(FLAGS.cond_idx) test_dataset = ObjectDataset(FLAGS.cond_idx) elif FLAGS.dataset == "imagenet": train_dataset = ImageNet() valid_dataset = ImageNet() test_dataset = ImageNet() elif FLAGS.dataset == "mnist": train_dataset = Mnist(train=True) valid_dataset = Mnist(train=False) test_dataset = Mnist(train=False) elif FLAGS.dataset == "celeba": train_dataset = CelebAHQ(cond_idx=FLAGS.cond_idx) valid_dataset = CelebAHQ(cond_idx=FLAGS.cond_idx) test_dataset = CelebAHQ(cond_idx=FLAGS.cond_idx) elif FLAGS.dataset == "lsun": train_dataset = LSUNBed(cond_idx=FLAGS.cond_idx) valid_dataset = LSUNBed(cond_idx=FLAGS.cond_idx) test_dataset = LSUNBed(cond_idx=FLAGS.cond_idx) else: assert False train_dataloader = DataLoader(train_dataset, num_workers=FLAGS.data_workers, batch_size=FLAGS.batch_size, shuffle=True, drop_last=True) valid_dataloader = DataLoader(valid_dataset, num_workers=FLAGS.data_workers, batch_size=FLAGS.batch_size, shuffle=True, drop_last=True) test_dataloader = DataLoader(test_dataset, num_workers=FLAGS.data_workers, batch_size=FLAGS.batch_size, shuffle=True, drop_last=True) FLAGS_OLD = FLAGS logdir = osp.join(FLAGS.logdir, FLAGS.exp) best_inception = 0.0 if FLAGS.resume_iter != 0: model_path = osp.join(logdir, "model_{}.pth".format(FLAGS.resume_iter)) checkpoint = torch.load(model_path) best_inception = checkpoint['best_inception'] FLAGS = checkpoint['FLAGS'] FLAGS.resume_iter = FLAGS_OLD.resume_iter FLAGS.nodes = FLAGS_OLD.nodes FLAGS.gpus = FLAGS_OLD.gpus FLAGS.node_rank = FLAGS_OLD.node_rank FLAGS.master_addr = FLAGS_OLD.master_addr FLAGS.train = FLAGS_OLD.train FLAGS.num_steps = FLAGS_OLD.num_steps FLAGS.step_lr = FLAGS_OLD.step_lr FLAGS.batch_size = FLAGS_OLD.batch_size FLAGS.ensembles = FLAGS_OLD.ensembles FLAGS.kl_coeff = FLAGS_OLD.kl_coeff FLAGS.repel_im = FLAGS_OLD.repel_im FLAGS.save_interval = FLAGS_OLD.save_interval for key in dir(FLAGS): if "__" not in key: FLAGS_OLD[key] = getattr(FLAGS, key) FLAGS = FLAGS_OLD if FLAGS.dataset == "cifar10": model_fn = ResNetModel elif FLAGS.dataset == "stl": model_fn = ResNetModel elif FLAGS.dataset == "object": model_fn = CelebAModel elif FLAGS.dataset == "mnist": model_fn = MNISTModel elif FLAGS.dataset == "celeba": model_fn = CelebAModel elif FLAGS.dataset == "lsun": model_fn = CelebAModel elif FLAGS.dataset == "imagenet": model_fn = ImagenetModel else: assert False models = [model_fn(FLAGS).train() for i in range(FLAGS.ensembles)] models_ema = [model_fn(FLAGS).train() for i in range(FLAGS.ensembles)] torch.cuda.set_device(gpu) if FLAGS.cuda: models = [model.cuda(gpu) for model in models] model_ema = [model_ema.cuda(gpu) for model_ema in models_ema] if FLAGS.gpus > 1: sync_model(models) parameters = [] for model in models: parameters.extend(list(model.parameters())) optimizer = Adam(parameters, lr=FLAGS.lr, betas=(0.0, 0.9), eps=1e-8) ema_model(models, models_ema, mu=0.0) logger = TensorBoardOutputFormat(logdir) it = FLAGS.resume_iter if not osp.exists(logdir): os.makedirs(logdir) checkpoint = None if FLAGS.resume_iter != 0: model_path = osp.join(logdir, "model_{}.pth".format(FLAGS.resume_iter)) checkpoint = torch.load(model_path) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) for i, (model, model_ema) in enumerate(zip(models, models_ema)): model.load_state_dict(checkpoint['model_state_dict_{}'.format(i)]) model_ema.load_state_dict(checkpoint['ema_model_state_dict_{}'.format(i)]) print("New Values of args: ", FLAGS) pytorch_total_params = sum([p.numel() for p in model.parameters() if p.requires_grad]) print("Number of parameters for models", pytorch_total_params) train(models, models_ema, optimizer, logger, train_dataloader, FLAGS.resume_iter, logdir, FLAGS, gpu, best_inception)
def test(): from data import Mnist mnist = Mnist("./cache/") model = train(mnist.train_images, mnist.train_labels) confmat = get_confusion_matrix(model, mnist.test_images, mnist.test_labels) print(confmat.matrix_str())
kl_z = -log_norm(z, *z_prior) + log_norm(z, *z_post) u_loss = tf.transpose(tf.reshape(rec_x + rec_y + kl_z, (10, -1))) qy = tf.nn.softmax(y_logits) ln_qy = tf.nn.log_softmax(y_logits) u_loss = tf.reduce_mean( tf.reduce_sum(u_loss * qy + qy * ln_qy, axis=-1)) wl = placeholder(None, name='wl') wu = placeholder(None, name='wu') wa = placeholder(None, name='wa') with tf.name_scope('loss'): y = placeholder((None, 10), name='y') loss = wl * l_loss + wu * u_loss + wa * a_loss train_step = tf.train.AdamOptimizer().minimize(loss) mnist = Mnist(100, 0, binarize=False, duplicate=False) sess = tf.Session() sess.run(tf.global_variables_initializer()) wu = 1.0 wl = 1.0 wa = 1.0 writer.add_var('train_acc', '{:8.3f}', acc) writer.add_var('train_a_loss', '{:8.3f}', a_loss) writer.add_var('train_l_loss', '{:8.3f}', l_loss) writer.add_var('train_u_loss', '{:8.3f}', u_loss) writer.add_var('test_acc', '{:8.3f}') writer.add_var('test_a_loss', '{:8.3f}') writer.add_var('test_l_loss', '{:8.3f}') writer.add_var('test_u_loss', '{:8.3f}') writer.add_var('epoch', '{:>8d}')
def compute_inception(model): size = FLAGS.im_number num_steps = size // 1000 images = [] test_ims = [] if FLAGS.dataset == "cifar10": test_dataset = Cifar10(FLAGS) elif FLAGS.dataset == "celeba": test_dataset = CelebAHQ() elif FLAGS.dataset == "mnist": test_dataset = Mnist(train=True) test_dataloader = DataLoader(test_dataset, batch_size=FLAGS.batch_size, num_workers=4, shuffle=True, drop_last=False) if FLAGS.dataset == "cifar10": for data_corrupt, data, label_gt in tqdm(test_dataloader): data = data.numpy() test_ims.extend(list(rescale_im(data))) if len(test_ims) > 10000: break elif FLAGS.dataset == "mnist": for data_corrupt, data, label_gt in tqdm(test_dataloader): data = data.numpy() test_ims.extend(list(np.tile(rescale_im(data), (1, 1, 3)))) if len(test_ims) > 10000: break test_ims = test_ims[:10000] classes = 10 print(FLAGS.batch_size) data_buffer = None for j in range(num_steps): itr = int(1000 / 500 * FLAGS.repeat_scale) if data_buffer is None: data_buffer = InceptionReplayBuffer(1000) curr_index = 0 identity = np.eye(classes) if FLAGS.dataset == "celeba": n = 128 c = 3 elif FLAGS.dataset == "mnist": n = 28 c = 1 else: n = 32 c = 3 for i in tqdm(range(itr)): noise_scale = [1] if len(data_buffer) < 1000: x_init = np.random.uniform(0, 1, (FLAGS.batch_size, c, n, n)) label = np.random.randint(0, classes, (FLAGS.batch_size)) x_init = torch.Tensor(x_init).cuda() label = identity[label] label = torch.Tensor(label).cuda() x_new, _ = gen_image(label, FLAGS, model, x_init, FLAGS.num_steps) x_new = x_new.detach().cpu().numpy() label = label.detach().cpu().numpy() data_buffer.add(x_new, label) else: if i < itr - FLAGS.nomix: (x_init, label), idx = data_buffer.sample( FLAGS.batch_size, transform=FLAGS.transform) else: if FLAGS.dataset == "celeba": n = 20 else: n = 2 ix = i % n # for i in range(n): start_idx = (1000 // n) * ix end_idx = (1000 // n) * (ix + 1) (x_init, label) = data_buffer._encode_sample( list(range(start_idx, end_idx)), transform=False) idx = list(range(start_idx, end_idx)) x_init = torch.Tensor(x_init).cuda() label = torch.Tensor(label).cuda() x_new, energy = gen_image(label, FLAGS, model, x_init, FLAGS.num_steps) energy = energy.cpu().detach().numpy() x_new = x_new.cpu().detach().numpy() label = label.cpu().detach().numpy() data_buffer.set_elms(idx, x_new, label) if FLAGS.im_number != 50000: print(np.mean(energy), np.std(energy)) curr_index += 1 ims = np.array(data_buffer._storage[:1000]) ims = rescale_im(ims).transpose((0, 2, 3, 1)) if FLAGS.dataset == "mnist": ims = np.tile(ims, (1, 1, 1, 3)) images.extend(list(ims)) random.shuffle(images) saveim = osp.join('sandbox_cachedir', FLAGS.exp, "test{}.png".format(FLAGS.idx)) if FLAGS.dataset == "cifar10": rix = np.random.permutation(1000)[:100] ims = ims[rix] im_panel = ims.reshape((10, 10, 32, 32, 3)).transpose( (0, 2, 1, 3, 4)).reshape((320, 320, 3)) imsave(saveim, im_panel) print("Saved image!!!!") splits = max(1, len(images) // 5000) score, std = get_inception_score(images, splits=splits) print("Inception score of {} with std of {}".format(score, std)) # FID score n = min(len(images), len(test_ims)) fid = get_fid_score(images, test_ims) print("FID of score {}".format(fid)) elif FLAGS.dataset == "mnist": # ims = ims[:100] # im_panel = ims.reshape((10, 10, 32, 32, 3)).transpose((0, 2, 1, 3, 4)).reshape((320, 320, 3)) # imsave(saveim, im_panel) ims = ims[:100] im_panel = ims.reshape((10, 10, 28, 28, 3)).transpose( (0, 2, 1, 3, 4)).reshape((280, 280, 3)) imsave(saveim, im_panel) print("Saved image!!!!") splits = max(1, len(images) // 5000) # score, std = get_inception_score(images, splits=splits) # print("Inception score of {} with std of {}".format(score, std)) # FID score n = min(len(images), len(test_ims)) fid = get_fid_score(images, test_ims) print("FID of score {}".format(fid)) elif FLAGS.dataset == "celeba": ims = ims[:25] im_panel = ims.reshape((5, 5, 128, 128, 3)).transpose( (0, 2, 1, 3, 4)).reshape((5 * 128, 5 * 128, 3)) imsave(saveim, im_panel)