예제 #1
0
파일: utils.py 프로젝트: RuiShu/bcde
def build_optimizer(loss, update_ops=[], scope=None, reuse=None):
    with tf.variable_scope(scope, 'gradients', reuse=reuse):
        print "Building optimizer"
        optimizer = AdamaxOptimizer(args.lr) if args.adamax else AdamOptimizer(
            args.lr)
        # max clip and max norm hyperparameters from Sonderby's LVAE code
        clipped, grad_norm = clip_gradients(optimizer,
                                            loss,
                                            max_clip=0.9,
                                            max_norm=4)
        with tf.control_dependencies(update_ops):
            train_step = optimizer.apply_gradients(clipped)
    return train_step
예제 #2
0
    def add_training_op(self):
        """Sets up the training Ops.

        Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this
        function is what must be passed to the `sess.run()` call to cause the model to train. For more 
        information, see:

        https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer
        
        Examples: https://github.com/pbhatnagar3/cs224s-tensorflow-tutorial/blob/master/tensorflow%20MNIST.ipynb
        https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/3_NeuralNetworks/multilayer_perceptron.ipynb
        """
        optimizer = None

        # logits [16, 50, 2] -> grabbing last timestep to compare logits [16, 2] against targets [16]
        logits_shape = tf.shape(self.logits)
        reshaped_logits = tf.slice(self.logits, [0, logits_shape[1] - 1, 0],
                                   [-1, 1, -1])
        reshaped_logits = tf.reshape(reshaped_logits,
                                     shape=[logits_shape[0], logits_shape[2]])
        # reshaped_logits = tf.reshape(self.logits, shape=[logits_shape[0], logits_shape[1]*logits_shape[2]])

        self.cost = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=reshaped_logits, labels=self.targets_placeholder))
        optimizer = AdamaxOptimizer(Config.lr).minimize(self.cost)

        # TODO: IS LOGITS[0] LAUGHTER OR LOGITS[1]????

        self.pred = tf.argmax(reshaped_logits, 1)
        correct_pred = tf.equal(tf.argmax(reshaped_logits, 1),
                                tf.cast(self.targets_placeholder, tf.int64))
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

        self.optimizer = optimizer
예제 #3
0
    def build_magan(self):
        # Generator
        self.g = self.generator(self.z)

        # Discriminator
        _, d_real = self.discriminator(self.x)
        _, d_fake = self.discriminator(self.g, reuse=True)

        self.d_real_loss = t.mse_loss(self.x, d_real, self.batch_size)
        self.d_fake_loss = t.mse_loss(self.g, d_fake, self.batch_size)
        self.d_loss = self.d_real_loss + tf.maximum(0.,
                                                    self.m - self.d_fake_loss)
        self.g_loss = self.d_fake_loss

        # Summary
        tf.summary.scalar("loss/d_loss", self.d_loss)
        tf.summary.scalar("loss/d_real_loss", self.d_real_loss)
        tf.summary.scalar("loss/d_fake_loss", self.d_fake_loss)
        tf.summary.scalar("loss/g_loss", self.g_loss)

        # Optimizer
        t_vars = tf.trainable_variables()
        d_params = [v for v in t_vars if v.name.startswith('d')]
        g_params = [v for v in t_vars if v.name.startswith('g')]

        self.d_op = AdamaxOptimizer(learning_rate=self.lr,
                                    beta1=self.beta1).minimize(
                                        self.d_loss, var_list=d_params)
        self.g_op = AdamaxOptimizer(learning_rate=self.lr,
                                    beta1=self.beta1).minimize(
                                        self.g_loss, var_list=g_params)

        # Merge summary
        self.merged = tf.summary.merge_all()

        # Model saver
        self.saver = tf.train.Saver(max_to_keep=1)
        self.writer = tf.summary.FileWriter('./model/', self.s.graph)
예제 #4
0
    def createGraph(self):
        """Creates graph for training"""
        self.base_cost = 0.0
        self.accuracy = 0
        num_sizes = len(self.bins)
        self.cost_list = []
        sum_weight = 0
        self.bin_losses = []
        saturation_loss = []

        # Create all bins and calculate losses for them

        with vs.variable_scope("var_lengths"):
            for seqLength, itemCount, ind in zip(self.bins, self.count_list,
                                                 range(num_sizes)):
                x_in = tf.placeholder("int32", [itemCount, seqLength])
                y_in = tf.placeholder("int64", [itemCount, seqLength])
                self.x_input.append(x_in)
                self.y_input.append(y_in)
                self.saturation_costs = []
                c, a, _, _, perItemCost, _ = self.createLoss(
                    x_in, y_in, seqLength)

                weight = 1.0  #/seqLength
                sat_cost = tf.add_n(self.saturation_costs) / (
                    (seqLength**2) * itemCount)
                saturation_loss.append(sat_cost * weight)
                self.bin_losses.append(perItemCost)
                self.base_cost += c * weight
                sum_weight += weight
                self.accuracy += a
                self.cost_list.append(c)
                tf.get_variable_scope().reuse_variables()

        # calculate the total loss
        self.base_cost /= sum_weight
        self.accuracy /= num_sizes

        self.sat_loss = tf.reduce_sum(
            tf.stack(saturation_loss)) * self.saturation_weight / sum_weight
        cost = self.base_cost + self.sat_loss

        # add gradient noise proportional to learning rate
        tvars = tf.trainable_variables()
        grads_0 = tf.gradients(cost, tvars)

        grads = []
        for grad in grads_0:
            grad1 = grad + tf.truncated_normal(
                tf.shape(grad)) * self.learning_rate * 1e-4
            grads.append(grad1)

        # optimizer
        optimizer = AdamaxOptimizer(self.learning_rate,
                                    beta1=0.9,
                                    beta2=1.0 - self.beta2_rate,
                                    epsilon=1e-8)
        self.optimizer = optimizer.apply_gradients(
            zip(grads, tvars), global_step=self.global_step)

        # some values for printout
        max_vals = []

        for var in tvars:
            varV = optimizer.get_slot(var, "m")
            max_vals.append(varV)

        self.gnorm = tf.global_norm(max_vals)
        self.cost_list = tf.stack(self.cost_list)
예제 #5
0
    def __init__(self, encoder, decoder, embeddings, vocab, rev_vocab):
        """
        Initializes your System

        :param encoder: an encoder that you constructed in train.py
        :param decoder: a decoder that you constructed in train.py
        :param args: pass in more arguments as needed
        """

        self.tuple_to_ind = {}
        self.ind_to_tuple = {}
        tmp = []
        for i in range(0, Config.paraLen):
            for j in range(i, Config.paraLen):
                if (j - i) >= Config.max_length: continue
                tmp.append((i, j))
        for i in range(len(tmp)):
            self.tuple_to_ind[tmp[i]] = i
            self.ind_to_tuple[i] = tmp[i]

        self.vocab = vocab
        self.rev_vocab = rev_vocab

        # Define loss parameters here

        startTuples = tf.constant(
            np.array(
                [[0, i] for i in range(Config.paraLen)
                 for j in range(Config.paraLen - i if i >= Config.paraLen -
                                Config.max_length else Config.max_length)]))
        endTuples = tf.constant(
            np.array(
                [[j, j + i] for i in range(Config.paraLen)
                 for j in range(Config.paraLen - i if i >= Config.paraLen -
                                Config.max_length else Config.max_length)]))

        self.startTuples = tf.cast(startTuples, tf.int32)
        self.endTuples = tf.cast(endTuples, tf.int32)

        # ==== set up placeholder tokens ========
        self.paragraph_placeholder = tf.placeholder(tf.int32,
                                                    [None, Config.paraLen])
        self.para_lens = tf.placeholder(tf.int32, [None])
        self.q_placeholder = tf.placeholder(tf.int32, [None, Config.qLen])
        self.q_lens = tf.placeholder(tf.int32, [None])
        self.labels_placeholder = tf.placeholder(
            tf.int32, [None, Config.labels_one_hot_size])

        # ==== assemble pieces ====
        with tf.variable_scope(
                "qa", initializer=tf.uniform_unit_scaling_initializer(1.0)):
            self.encoder = encoder
            self.decoder = decoder
            self.embeddings = embeddings
            self.setup_embeddings()
            self.setup_system()
            self.setup_loss()

        # ==== set up training/updating procedure ====
        optimizer = AdamaxOptimizer()
        #optimizer = tf.train.AdamOptimizer(Config.lr)
        #optimizer = tf.train.GradientDescentOptimizer(Config.lr)
        train_op = optimizer.minimize(self.loss)
        self.train_op = train_op
예제 #6
0
파일: main.py 프로젝트: zizai/hvae-nips
def run(args):

    print('\nSettings: \n', args, '\n')

    args.model_signature = str(dt.datetime.now())[0:19].replace(' ', '_')
    args.model_signature = args.model_signature.replace(':', '_')

    ########## Find GPUs
    (gpu_config, n_gpu_used) = set_gpus(args.n_gpu)

    ########## Data, model, and optimizer setup
    mnist = MNIST(args)

    x = tf.placeholder(tf.float32, [None, 28, 28, 1])

    if args.model == 'hvae':
        if not args.K:
            raise ValueError('Must set number of flow steps when using HVAE')
        elif not args.temp_method:
            raise ValueError('Must set tempering method when using HVAE')
        model = HVAE(args, mnist.avg_logit)
    elif args.model == 'cnn':
        model = VAE(args, mnist.avg_logit)
    else:
        raise ValueError('Invalid model choice')

    elbo = model.get_elbo(x, args)
    nll = model.get_nll(x, args)

    optimizer = AdamaxOptimizer(learning_rate=args.learn_rate,
                                eps=args.adamax_eps)
    opt_step = optimizer.minimize(-elbo)

    ########## Tensorflow and saver setup
    sess = tf.Session(config=gpu_config)
    sess.run(tf.global_variables_initializer())

    saver = tf.train.Saver()
    savepath = os.path.join(args.checkpoint_dir, args.model_signature,
                            'model.ckpt')

    if not os.path.exists(args.checkpoint_dir):
        os.makedirs(args.checkpoint_dir)

    ########## Test that GPU memory is sufficient
    if n_gpu_used > 0:
        try:
            x_test = mnist.next_test_batch()
            (t_e, t_n) = sess.run((elbo, nll), {x: x_test})
            mnist.batch_idx_test = 0  # Reset batch counter if it works
        except:
            raise MemoryError("""
                Likely insufficient GPU memory
                Reduce test batch by lowering the -tbs parameter
                """)

    ########## Training Loop

    train_elbo_hist = []
    val_elbo_hist = []

    # For early stopping
    best_elbo = -np.inf
    es_epochs = 0
    epoch = 0

    train_times = []

    for epoch in range(1, args.epochs + 1):

        t0 = time.time()
        train_elbo = train(epoch, mnist, opt_step, elbo, x, args, sess)
        train_elbo_hist.append(train_elbo)
        train_times.append(time.time() - t0)
        print('One epoch took {:.2f} seconds'.format(time.time() - t0))

        val_elbo = validate(mnist, elbo, x, sess)
        val_elbo_hist.append(val_elbo)

        if val_elbo > best_elbo:

            # Save the model that currently generalizes best
            es_epochs = 0
            best_elbo = val_elbo
            saver.save(sess, savepath)
            best_model_epoch = epoch

        elif args.early_stopping_epochs > 0:

            es_epochs += 1

            if es_epochs >= args.early_stopping_epochs:
                print('***** STOPPING EARLY ON EPOCH {} of {} *****'.format(
                    epoch, args.epochs))
                break

        print('--> Early stopping: {}/{} (Best ELBO: {:.4f})'.format(
            es_epochs, args.early_stopping_epochs, best_elbo))
        print('\t Current val ELBO: {:.4f}\n'.format(val_elbo))

        if np.isnan(val_elbo):
            raise ValueError('NaN encountered!')

    train_times = np.array(train_times)
    mean_time = np.mean(train_times)
    std_time = np.std(train_times)
    print('Average train time per epoch: {:.2f} +/- {:.2f}'.format(
        mean_time, std_time))

    ########## Evaluation

    # Restore the best-performing model
    saver.restore(sess, savepath)

    test_elbos = np.zeros(args.n_nll_runs)
    test_nlls = np.zeros(args.n_nll_runs)

    for i in range(args.n_nll_runs):

        print('\n---- Test run {} of {} ----\n'.format(i + 1, args.n_nll_runs))
        (test_elbos[i], test_nlls[i]) = evaluate(mnist, elbo, nll, x, args,
                                                 sess)

    mean_elbo = np.mean(test_elbos)
    std_elbo = np.std(test_elbos)

    mean_nll = np.mean(test_nlls)
    std_nll = np.std(test_nlls)

    print('\nTest ELBO: {:.2f} +/- {:.2f}'.format(mean_elbo, std_elbo))
    print('Test NLL: {:.2f} +/- {:.2f}'.format(mean_nll, std_nll))

    ########## Logging, Saving, and Plotting

    with open(args.logfile, 'a') as ff:
        print('----------------- Test ID {} -----------------'.format(
            args.model_signature),
              file=ff)
        print(args, file=ff)
        print('Stopped after {} epochs'.format(epoch), file=ff)
        print('Best model from epoch {}'.format(best_model_epoch), file=ff)
        print('Average train time per epoch: {:.2f} +/- {:.2f}'.format(
            mean_time, std_time),
              file=ff)

        print('FINAL VALIDATION ELBO: {:.2f}'.format(val_elbo_hist[-1]),
              file=ff)
        print('Test ELBO: {:.2f} +/- {:.2f}'.format(mean_elbo, std_elbo),
              file=ff)
        print('Test NLL: {:.2f} +/- {:.2f}\n'.format(mean_nll, std_nll),
              file=ff)

    if not os.path.exists(args.pickle_dir):
        os.makedirs(args.pickle_dir)

    train_dict = {
        'train_elbo': train_elbo_hist,
        'val_elbo': val_elbo_hist,
        'args': args
    }
    pickle.dump(
        train_dict,
        open(os.path.join(args.pickle_dir, args.model_signature + '.p'), 'wb'))

    if not os.path.exists(args.plot_dir):
        os.makedirs(args.plot_dir)

    tf_gen_samples = model.get_samples(args)
    np_gen_samples = sess.run(tf_gen_samples)
    plot_digit_samples(np_gen_samples, args)

    plot_training_curve(train_elbo_hist, val_elbo_hist, args)

    ########## Email notification upon test completion

    try:

        msg_text = """Test completed for ID {0}.

        Parameters: {1}

        Test ELBO: {2:.2f} +/- {3:.2f}
        Test NLL: {4:.2f} +/- {5:.2f} """.format(args.model_signature, args,
                                                 mean_elbo, std_elbo, mean_nll,
                                                 std_nll)

        msg = MIMEText(msg_text)
        msg['Subject'] = 'Test ID {0} Complete'.format(args.model_signature)
        msg['To'] = args.receiver
        msg['From'] = args.sender

        s = smtplib.SMTP('localhost')
        s.sendmail(args.sender, [args.receiver], msg.as_string())
        s.quit()

    except:

        print('Unable to send email from sender {0} to receiver {1}'.format(
            args.sender, args.receiver))