def train_epochs(self, start_epoch=1):
        start_time = time.time()
        self.megabatch = []
        self.ep_loss = 0
        self.curr_idx = 0

        self.eval()
        evaluate_sts(self, self.args)
        self.train()

        try:
            for ep in range(start_epoch, self.args.epochs + 1):
                self.mb = utils.get_minibatches_idx(len(self.data),
                                                    self.args.batchsize,
                                                    shuffle=True)
                self.curr_idx = 0
                self.ep_loss = 0
                self.megabatch = []
                cost = 0
                counter = 0

                while (cost is not None):
                    cost = pairing.compute_loss_one_batch(self)
                    if cost is None:
                        continue

                    self.ep_loss += cost.item()
                    counter += 1
                    print("Epoch {0}, Counter {1}/{2}".format(
                        ep, counter, len(self.mb)))
                    if self.save_interval > 0 and counter > 0:
                        if counter % self.save_interval == 0:
                            self.eval()
                            evaluate_sts(self, self.args)
                            self.train()
                            self.save_params(ep, counter=counter)

                    self.optimizer.zero_grad()
                    cost.backward()
                    torch.nn.utils.clip_grad_norm_(self.parameters,
                                                   self.args.grad_clip)
                    self.optimizer.step()

                self.eval()
                evaluate_sts(self, self.args)
                self.train()

                if self.args.save_every_epoch:
                    self.save_params(ep)

                print('Epoch {0}\tCost: '.format(ep), self.ep_loss / counter)

        except KeyboardInterrupt:
            print("Training Interrupted")

        if self.args.save_final:
            self.save_params(ep)

        end_time = time.time()
        print("Total Time:", (end_time - start_time))
Esempio n. 2
0
    def fit(self, X, y, sess):
        max_epochs = 20

        # Split into training and validation sets
        X_train, X_val, y_train, y_val = train_test_split(X,
                                                          y,
                                                          test_size=0.33,
                                                          random_state=42)

        for epoch in range(max_epochs):
            start = time.time()
            train_indices = get_minibatches_idx(len(X_train),
                                                batch_size,
                                                shuffle=True)
            print("\nEpoch %d" % (epoch + 1))

            train_accs = []
            for c, it in enumerate(train_indices):
                batch_train_x = [X_train[i] for i in it]
                batch_train_y = [y_train[i] for i in it]
                feed_dict = {
                    self.x: batch_train_x,
                    self.y: batch_train_y,
                    self.deterministic: False
                }
                _, acc = sess.run([self.train_step, self.accuracy], feed_dict)
                train_accs.append(acc)
                #print(c,len(train_indices),acc)
            print("Training accuracy: %.3f" % np.mean(train_accs))
            val_pred = self.predict(X_val, sess)
            y = np.argmax(y_val, axis=1)
            val_acc = np.mean(np.equal(val_pred, y))
            print("Val accuracy: %.3f" % val_acc)
            print("Time taken: %.3fs" % (time.time() - start))
        return
def simple_test_batch(testloader, model, config, hidden=False):
    model.eval()
    total = 0.0
    correct = 0.0
    pred_np = []
    hidden_vectors = []
    minibatches_idx = get_minibatches_idx(
        len(testloader),
        minibatch_size=config['simple_test_batch_size'],
        shuffle=False)
    for minibatch in minibatches_idx:
        inputs = torch.Tensor(
            np.array([list(testloader[x][0].cpu().numpy())
                      for x in minibatch]))
        targets = torch.Tensor(
            np.array([list(testloader[x][1].cpu().numpy())
                      for x in minibatch]))
        inputs, targets = Variable(inputs.cuda()).squeeze(1), Variable(
            targets.cuda()).squeeze()
        outputs = model(inputs)
        if hidden:
            hiddens = get_hidden(model, inputs)
            hidden_vectors.extend(list(hiddens.cpu().data.numpy()))
        pred_np.extend(list(outputs.cpu().data.numpy()))
        predicted = (outputs >= 0.5).long().squeeze()
        total += targets.size(0)
        correct += predicted.eq(targets.long()).sum().item()
    test_accuracy = correct / total
    pred_np = np.array(pred_np)
    if hidden:
        return test_accuracy, pred_np, np.array(hidden_vectors)
    return test_accuracy, pred_np
Esempio n. 4
0
def simple_train_batch(trainloader, model, loss_function, optimizer, config):
    model.train()
    for epoch in range(config['epoch_num']):
        if epoch == int(config['epoch_num'] / 3):
            for g in optimizer.param_groups:
                g['lr'] = config['lr'] / 10
            print('divide current learning rate by 10')
        elif epoch == int(config['epoch_num'] * 2 / 3):
            for g in optimizer.param_groups:
                g['lr'] = config['lr'] / 100
            print('divide current learning rate by 10')
        total_loss = 0
        minibatches_idx = get_minibatches_idx(
            len(trainloader),
            minibatch_size=config['simple_train_batch_size'],
            shuffle=True)
        for minibatch in minibatches_idx:
            inputs = torch.Tensor(
                np.array([
                    list(trainloader[x][0].cpu().numpy()) for x in minibatch
                ]))
            targets = torch.Tensor(
                np.array([
                    list(trainloader[x][1].cpu().numpy()) for x in minibatch
                ]))
            inputs, targets = Variable(inputs.cuda()).squeeze(1), Variable(
                targets.long().cuda()).squeeze()
            optimizer.zero_grad()
            outputs = model(inputs).squeeze()
            loss = loss_function(outputs, targets)
            total_loss += loss
            loss.backward()
            optimizer.step()
        print('epoch:', epoch, 'loss:', total_loss)
Esempio n. 5
0
    def do_eval(sess, train_q, train_a, train_lab):
        train_correct = 0.0
        # number_examples = len(train_q)
        # print("valid examples:", number_examples)
        eval_loss, eval_accc, eval_counter = 0.0, 0.0, 0
        eval_true_positive, eval_false_positive, eval_true_negative, eval_false_negative = 0, 0, 0, 0
        # batch_size = 1
        weights_label = {}  # weight_label[label_index]=(number,correct)
        weights = np.ones((opt.batch_size))
        kf_train = get_minibatches_idx(len(train_q),
                                       opt.batch_size,
                                       shuffle=True)
        for _, train_index in kf_train:
            train_sents_1 = [train_q[t] for t in train_index]
            train_sents_2 = [train_a[t] for t in train_index]
            train_labels = [train_lab[t] for t in train_index]
            train_labels_array = np.array(train_labels)
            # print("train_labels", train_labels.shape)
            # train_labels = train_labels.reshape((len(train_labels), opt.category))
            train_labels = np.eye(opt.category)[train_labels_array]
            x_train_batch_1, x_train_mask_1 = prepare_data_for_emb(
                train_sents_1, opt)
            x_train_batch_2, x_train_mask_2 = prepare_data_for_emb(
                train_sents_2, opt)

            curr_eval_loss, curr_accc, logits = sess.run(
                [loss_, accuracy_, logits_],
                feed_dict={
                    x_1_: x_train_batch_1,
                    x_2_: x_train_batch_2,
                    x_mask_1_: x_train_mask_1,
                    x_mask_2_: x_train_mask_2,
                    y_: train_labels,
                    opt.weights_label: weights,
                    keep_prob: 1.0
                })
            true_positive, false_positive, true_negative, false_negative = compute_confuse_matrix(
                logits, train_labels
            )  # logits:[batch_size,label_size]-->logits[0]:[label_size]
            # write_predict_error_to_file(start,file_object,logits[0], evalY[start:end][0],vocabulary_index2word,evalX1[start:end],evalX2[start:end])
            eval_loss, eval_accc, eval_counter = eval_loss + curr_eval_loss, eval_accc + curr_accc, eval_counter + 1  # 注意这里计算loss和accc的方法,计算累加值,然后归一化
            weights_label = compute_labels_weights(
                weights_label, logits, train_labels_array
            )  # compute_labels_weights(weights_label,logits,labels)
            eval_true_positive, eval_false_positive = eval_true_positive + true_positive, eval_false_positive + false_positive
            eval_true_negative, eval_false_negative = eval_true_negative + true_negative, eval_false_negative + false_negative
            # weights_label = compute_labels_weights(weights_label, logits, evalY[start:end]) #compute_labels_weights(weights_label,logits,labels)
        print("true_positive:", eval_true_positive, ";false_positive:",
              eval_false_positive, ";true_negative:", eval_true_negative,
              ";false_negative:", eval_false_negative)
        p = float(eval_true_positive) / float(eval_true_positive +
                                              eval_false_positive)
        r = float(eval_true_positive) / float(eval_true_positive +
                                              eval_false_negative)
        f1_score = (2 * p * r) / (p + r)
        print("eval_counter:", eval_counter, ";eval_acc:", eval_accc)
        return eval_loss / float(eval_counter), eval_accc / float(
            eval_counter), f1_score, p, r, weights_label
Esempio n. 6
0
    def train(self, params, train, dev, test):
        start_time = time.time()
        counter = 0
        try:
            for eidx in xrange(params.epochs):

                kf = utils.get_minibatches_idx(len(train), params.batchsize, shuffle=True)

                uidx = 0
                for _, train_index in kf:

                    uidx += 1

                    batch = [train[t] for t in train_index]
                    vocab = self.get_word_arr(batch)
                    y = self.get_y(batch)
                    x, xmask = self.prepare_data(self.populate_embeddings_words(batch, vocab))
                    idxs = self.get_idxs(xmask)

                    if params.nntype == "charlstm" or params.nntype == "charcnn":
                        char_indices = self.populate_embeddings_characters(vocab)
                    if params.nntype == "charagram":
                        char_hash = self.populate_embeddings_characters_charagram(vocab)

                    if params.nntype == "charlstm":
                        c, cmask = self.prepare_data(char_indices)
                    if params.nntype == "charcnn":
                        c = self.prepare_data_conv(char_indices)

                    if params.nntype == "charlstm":
                        cost = self.train_function(c, cmask, x, xmask, idxs, y)
                    if params.nntype == "charcnn":
                        cost = self.train_function(c, x, xmask, idxs, y)
                    if params.nntype == "charagram":
                        cost = self.train_function(char_hash, x, xmask, idxs, y)

                    if np.isnan(cost) or np.isinf(cost):
                        print 'NaN detected'

                    #print 'Epoch ', (eidx+1), 'Update ', (uidx+1), 'Cost ', cost

                if(params.save):
                    counter += 1
                    utils.save_params(self, params.outfile+str(counter)+'.pickle')

                if(params.evaluate):
                    devscore = self.evaluate(dev, params)
                    testscore = self.evaluate(test, params)
                    trainscore = self.evaluate(train, params)
                    print "accuracy: ", devscore, testscore, trainscore

                print 'Epoch ', (eidx+1), 'Cost ', cost

        except KeyboardInterrupt:
            print "Training interrupted"

        end_time = time.time()
        print "total time:", (end_time - start_time)
def run_model(opt, X):
    try:
        params = np.load('./param_g.npz')
        if params['Wemb'].shape == (opt.n_words, opt.embed_size):
            print('Use saved embedding.')
            opt.W_emb = params['Wemb']
        else:
            print('Emb Dimension mismatch: param_g.npz:' +
                  str(params['Wemb'].shape) + ' opt: ' + str(
                      (opt.n_words, opt.embed_size)))
            opt.fix_emb = False
    except IOError:
        print('No embedding file found.')
        opt.fix_emb = False

    with tf.device('/gpu:0'):
        x_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.sent_len])
        if opt.plot_type == 'ae':
            x_lat_ = ae(x_, opt)
        elif opt.plot_type == 'vae' or opt.plot_type == 'cyc':
            mu_, z_ = vae(x_, opt)
            x_lat_ = z_ if opt.use_z else mu_

    config = tf.ConfigProto(log_device_placement=False,
                            allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    # config.gpu_options.per_process_gpu_memory_fraction = 0.3
    np.set_printoptions(precision=3)
    np.set_printoptions(threshold=np.inf)
    saver = tf.train.Saver()

    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        if opt.restore:
            try:
                t_vars = tf.trainable_variables()
                #print([var.name[:-2] for var in t_vars])
                loader = restore_from_save(t_vars, sess, opt)

            except Exception as e:
                print(e)
                print("No saving session, using random initialization")
                sess.run(tf.global_variables_initializer())

        X_emb = np.zeros([len(X), opt.z_dim], dtype='float32')
        kf = get_minibatches_idx(len(X), opt.batch_size)
        t = 0
        for _, index in kf:
            sents_b = [X[i] for i in index]
            x_b = prepare_data_for_cnn(sents_b, opt)
            x_lat = np.squeeze(sess.run(x_lat_, feed_dict={x_: x_b}))
            X_emb[t * opt.batch_size:(t + 1) * opt.batch_size] = x_lat
            if (t + 1) % 10 == 0:
                print('%d / %d' % (t + 1, len(kf)))
            t += 1

    return X_emb
Esempio n. 8
0
def train(model, data, words, params):
    start_time = time.time()

    counter = 0
    try:
        for eidx in xrange(params.epochs):

            kf = utils.get_minibatches_idx(len(data), params.batchsize, shuffle=True)
            uidx = 0
            for _, train_index in kf:

                uidx += 1

                batch = [data[t] for t in train_index]
                for i in batch:
                    i[0].populate_embeddings(words)
                    i[1].populate_embeddings(words)

                (g1x, g1mask, g2x, g2mask, p1x, p1mask, p2x, p2mask) = getpairs(model, batch, params)

                cost = model.train_function(g1x, g2x, p1x, p2x, g1mask, g2mask, p1mask, p2mask)

                if np.isnan(cost) or np.isinf(cost):
                    print 'NaN detected'

                if (utils.checkIfQuarter(uidx, len(kf))):
                    if (params.save):
                        counter += 1
                        utils.saveParams(model, params.outfile + str(counter) + '.pickle')
                    if (params.evaluate):
                        evaluate_all(model, words)
                        sys.stdout.flush()

                #undo batch to save RAM
                for i in batch:
                    i[0].representation = None
                    i[1].representation = None
                    i[0].unpopulate_embeddings()
                    i[1].unpopulate_embeddings()

                #print 'Epoch ', (eidx+1), 'Update ', (uidx+1), 'Cost ', cost

            if (params.save):
                counter += 1
                utils.saveParams(model, params.outfile + str(counter) + '.pickle')

            if (params.evaluate):
                evaluate_all(model, words)

            print 'Epoch ', (eidx + 1), 'Cost ', cost

    except KeyboardInterrupt:
        print "Training interupted"

    end_time = time.time()
    print "total time:", (end_time - start_time)
Esempio n. 9
0
def train(model, data, words, params):
    start_time = time.time()

    counter = 0
    try:
        for eidx in xrange(params.epochs):

            kf = utils.get_minibatches_idx(len(data), params.batchsize, shuffle=True)
            uidx = 0
            for _, train_index in kf:

                uidx += 1

                batch = [data[t] for t in train_index]
                for i in batch:
                    i[0].populate_embeddings(words)
                    i[1].populate_embeddings(words)

                (g1x, g1mask, g2x, g2mask, p1x, p1mask, p2x, p2mask) = getpairs(model, batch, params)

                cost = model.train_function(g1x, g2x, p1x, p2x, g1mask, g2mask, p1mask, p2mask)

                if np.isnan(cost) or np.isinf(cost):
                    print 'NaN detected'

                if (utils.checkIfQuarter(uidx, len(kf))):
                    if (params.save):
                        counter += 1
                        utils.saveParams(model, params.outfile + str(counter) + '.pickle')
                    if (params.evaluate):
                        evaluate_all(model, words)
                        sys.stdout.flush()

                # undo batch to save RAM
                for i in batch:
                    i[0].representation = None
                    i[1].representation = None
                    i[0].unpopulate_embeddings()
                    i[1].unpopulate_embeddings()

                    # print 'Epoch ', (eidx+1), 'Update ', (uidx+1), 'Cost ', cost

            if (params.save):
                counter += 1
                utils.saveParams(model, params.outfile + str(counter) + '.pickle')

            if (params.evaluate):
                evaluate_all(model, words)

            print 'Epoch ', (eidx + 1), 'Cost ', cost

    except KeyboardInterrupt:
        print "Training interupted"

    end_time = time.time()
    print "total time:", (end_time - start_time)
Esempio n. 10
0
 def predict(self, X, sess):
     indices = get_minibatches_idx(len(X), batch_size, shuffle=False)
     pred = []
     for i in indices:
         batch_x = [X[j] for j in i]
         feed_dict = {self.x: batch_x, self.deterministic: True}
         pred_batch = sess.run(self.pred, feed_dict)
         pred.append(pred_batch)
     pred = np.concatenate(pred, axis=0)
     pred = np.argmax(pred, axis=1)
     pred = np.reshape(pred, (-1))
     return pred
def get_features(trainloader, model, config):
    total_features = []
    total_labels = []
    minibatches_idx = get_minibatches_idx(
        len(trainloader),
        minibatch_size=config['simple_test_batch_size'],
        shuffle=False)
    for minibatch in minibatches_idx:
        inputs = torch.Tensor(
            np.array(
                [list(trainloader[x][0].cpu().numpy()) for x in minibatch]))
        targets = torch.Tensor(
            np.array(
                [list(trainloader[x][1].cpu().numpy()) for x in minibatch]))
        inputs, targets = Variable(inputs.cuda()).squeeze(1), Variable(
            targets.cuda()).squeeze()
        features = model.get_features(inputs)
        total_features.extend(features.cpu().data.numpy().tolist())
        total_labels.extend(targets.cpu().data.numpy().tolist())
    total_features = np.array(total_features)
    total_labels = np.array(total_labels)
    print('total features', total_features.shape)
    print('total labels', total_labels.shape)
    avg_feature = np.mean(total_features, axis=0)
    # print('avg feature', np.linalg.norm(avg_feature))
    centralized_features = total_features - avg_feature
    feature_norm = np.square(np.linalg.norm(centralized_features, axis=1))
    class_features = []
    feature_norm_list = []
    for i in range(10):
        mask_index = (total_labels == i)
        mask_index = mask_index.reshape(len(mask_index), 1)
        # print('mask index', mask_index)
        if config['R'] == 'inf' and i == config['t1']:
            break
        class_features.append(
            np.sum(total_features * mask_index, axis=0) /
            np.sum(mask_index.reshape(-1)))
        feature_norm_list.append(
            np.sum(feature_norm * mask_index.reshape(-1)) /
            np.sum(mask_index.reshape(-1)))

    class_features = np.array(class_features)
    # print('original class features', class_features)
    class_features = np.array(class_features) - avg_feature
    # print('centralized class features', class_features)
    print('feature norm list', feature_norm_list)
    print('avg square feature norm', np.mean(feature_norm_list))
    return class_features
Esempio n. 12
0
    def run_epoch(sess, epoch, mode, print_freq=-1, train_writer=None):
        fetches_ = {
            'loss': loss_,
            'accuracy': accuracy_
        }

        if mode == 'train':
            x, y, is_train = train, train_lab, 1
            fetches_['train_op'] = train_op_
            fetches_['summary'] = merged
        elif mode == 'val':
            assert(print_freq == -1)
            x, y, is_train = val, val_lab, None
        elif mode == 'test':
            assert(print_freq == -1)
            x, y, is_train = test, test_lab, None

        correct, acc_loss, acc_n = 0.0, 0.0, 0.0
        local_t = 0
        global_t = epoch*epoch_t # only used in train mode
        start_time = time.time()
        kf = get_minibatches_idx(len(x), opt.batch_size, shuffle=True)

        for _, index in kf:
            local_t += 1
            global_t += 1

            sents_b = [x[i] for i in index]
            sents_b_n = add_noise(sents_b, opt)
            y_b = [y[i] for i in index]
            y_b = np.array(y_b)
            y_b = y_b.reshape((len(y_b), 1))
            x_b = prepare_data_for_cnn(sents_b_n, opt) # Batch L
            feed_t = {x_: x_b, y_: y_b, is_train_: is_train}
            fetches = sess.run(fetches_, feed_dict=feed_t)

            batch_size = len(index)
            acc_n += batch_size
            acc_loss += fetches['loss']*batch_size
            correct += fetches['accuracy']*batch_size
            if print_freq>0 and local_t%print_freq==0:
                print("%s Iter %d: loss %.4f, acc %.4f, time %.1fs" %
                    (mode, local_t, acc_loss/acc_n, correct/acc_n, time.time()-start_time))
            if mode == 'train' and train_writer != None:
                train_writer.add_summary(fetches['summary'], global_t)

        print("%s Epoch %d: loss %.4f, acc %.4f, time %.1fs" %
            (mode, epoch, acc_loss/acc_n, correct/acc_n, time.time()-start_time))
        return acc_loss/acc_n, correct/acc_n
    def reset(self):
        """
        Resets the state of the environment, returning an initial observation.
        Outputs
        -------
        observation : the initial observation of the space. (Initial reward is assumed to be 0.)
        """
        t0 = time()
        #for now lets get one sample with all.
        kf = utils.get_minibatches_idx(len(self.qi),
                                       len(self.qi),
                                       shuffle=True)
        _, train_index = kf[
            0]  #iterate if len(kf)>1 --> for _, train_index in kf:
        print "kf", kf, len(self.qi)
        print("Got minibatch index {}".format(time() - t0))

        qi, qi_i, qi_lst, D_gt_id, D_gt_url = self.get_samples(
            self.qi,
            self.dt,
            self.vocab,
            train_index,
            self.search.engine,
            max_words_input=self.search.max_words_input)

        current_queries = qi_lst
        self.current_queries = qi_lst
        self.D_gt_id = D_gt_id
        print('current queries are', current_queries)
        n_iterations = 1  # number of query reformulation iterations.
        if n_iterations < self.search.q_0_fixed_until:
            ones = np.ones((len(current_queries), self.search.max_words_input))
            reformulated_query = ones
            if n_iterations > 0:
                # select everything from the original query in the first iteration.
                reformulated_query = np.concatenate([ones, ones], axis=1)

        print 'reformulated_query', reformulated_query.shape
        # reformulated_query is our action!!!

        actions = reformulated_query
        state, reward, done = self.execute(actions)

        print "state", state
        print "actions", actions
        print "rew", reward
        return state
Esempio n. 14
0
    def train(self, X, training_epochs=10):
        print("\nStarting training")

        for epoch in range(training_epochs):
            avg_cost = 0.0
            train_indices = get_minibatches_idx(len(X),
                                                batch_size,
                                                shuffle=True)

            for it in train_indices:
                batch_x = [X[i] for i in it]
                _, cost = self.sess.run((self.train_step, self.cost),
                                        feed_dict={self.x: batch_x})

                avg_cost += cost / n_samples * batch_size

            print("Epoch:", '%d' % (epoch + 1), "cost=",
                  "{:.3f}".format(avg_cost))
Esempio n. 15
0
def predict(test_inputs, model, config):
    # test inputs: (T, D)
    model.eval()
    pred_np = []
    minibatches_idx = get_minibatches_idx(
        len(test_inputs),
        minibatch_size=config['test_batch_size'],
        shuffle=False)
    for minibatch in minibatches_idx:
        inputs = torch.Tensor(np.array([test_inputs[x] for x in minibatch]))
        inputs = Variable(inputs.cuda().squeeze())
        # (B, D)
        outputs = model(inputs)
        pred_np.extend(list(np.exp(outputs.cpu().data.numpy())))
        # (B, C)
    pred_np = np.array(pred_np)
    # (T, C)
    return pred_np
Esempio n. 16
0
def simple_train_batch(trainloader, model, loss_function, optimizer, config):
    model.train()
    for epoch in range(config['epoch_num']):
        if epoch % (config['epoch_num'] // 10) == 0:
            print('current epoch: ', epoch)
        total_loss = 0
        minibatches_idx = get_minibatches_idx(
            len(trainloader),
            minibatch_size=config['simple_train_batch_size'],
            shuffle=True)
        # model.train()
        # BCE, (100, 1, 1) doesn't matter
        # MSE, (100, 1, 1) matter
        for minibatch in minibatches_idx:
            inputs = torch.Tensor(
                np.array([
                    list(trainloader[x][0].cpu().numpy()) for x in minibatch
                ]))
            targets = torch.Tensor(
                np.array([
                    list(trainloader[x][1].cpu().numpy()) for x in minibatch
                ]))
            inputs, targets = Variable(inputs.cuda()).squeeze(), Variable(
                targets.float().cuda()).squeeze()
            # inputs, targets = Variable(inputs.cuda()).squeeze(1), Variable(targets.float().cuda()).squeeze(1)
            # print('inputs', inputs.size())
            # print('targets', targets.size())
            if config['model'] == 'CNN_MNIST':
                inputs = inputs.unsqueeze(1)
            optimizer.zero_grad()
            # outputs = model(inputs)
            outputs = model(inputs).squeeze()
            loss = loss_function(outputs, targets)
            # print('outputs', outputs.size())
            # print('loss', loss)
            total_loss += loss
            loss.backward()
            optimizer.step()
        if epoch % (config['epoch_num'] // 10) == 0:
            print('loss', total_loss)
Esempio n. 17
0
    def evaluate(self, data, params):

        kf = utils.get_minibatches_idx(len(data), 100, shuffle=False)

        preds = []
        for _, train_index in kf:
            batch = [data[t] for t in train_index]
            vocab = self.get_word_arr(batch)
            x, xmask = self.prepare_data(
                self.populate_embeddings_words(batch, vocab))
            idxs = self.get_idxs(xmask)

            if params.nntype == "charlstm" or params.nntype == "charcnn":
                char_indices = self.populate_embeddings_characters(vocab)
            if params.nntype == "charagram":
                char_hash = self.populate_embeddings_characters_charagram(
                    vocab)

            if params.nntype == "charlstm":
                c, cmask = self.prepare_data(char_indices)
            if params.nntype == "charcnn":
                c = self.prepare_data_conv(char_indices)

            if params.nntype == "charlstm":
                temp = self.scoring_function(c, cmask, x, xmask, idxs)
            if params.nntype == "charcnn":
                temp = self.scoring_function(c, x, xmask, idxs)
            if params.nntype == "charagram":
                temp = self.scoring_function(char_hash, x, xmask, idxs)
            preds.extend(temp)

        ys = []
        for i in data:
            for j in i[1]:
                y = self.tags[j]
                ys.append(y)

        return accuracy_score(ys, preds)
Esempio n. 18
0
def train(train_data, dev_data, model, loss_function, optimizer, ner_to_ix,
          config):
    total_loss_list = []
    for epoch in range(config['epoch_num']):
        model.train()
        print('current epoch: ', epoch, end='\r\n')
        total_loss = 0
        minibatches_idx = get_minibatches_idx(
            len(train_data['inputs']),
            minibatch_size=config['train_batch_size'],
            shuffle=True)
        for minibatch in minibatches_idx:
            inputs = torch.Tensor(
                np.array([train_data['inputs'][x] for x in minibatch]))
            targets = torch.Tensor(
                np.array([train_data['labels'][x] for x in minibatch]))
            confidences = torch.Tensor(
                np.array([train_data['confidences'][x] for x in minibatch]))
            inputs, targets = Variable(inputs.cuda()).squeeze(), Variable(
                targets.cuda()).squeeze().long()
            confidences = Variable(confidences.cuda(),
                                   requires_grad=False).squeeze()
            # inputs: (B, d), targets: B, confidences: B
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = loss_function(outputs, targets)
            # print('loss', loss)
            loss = torch.sum(loss * confidences)
            total_loss += loss
            loss.backward()
            optimizer.step()
        total_loss_list.append(total_loss.cpu().data.item())
        print('train loss', total_loss)
        # train_accuracy = evaluate(train_data, model, ner_to_ix, config)
        # print('train accuracy', train_accuracy)
        dev_accuracy = evaluate(dev_data, model, ner_to_ix, config)
        print('dev accuracy', dev_accuracy)
Esempio n. 19
0
def train_batch_autoencoder(trainloader, model, loss_function, optimizer,
                            config):
    model.train()
    for epoch in range(config['epoch_num']):
        if epoch % (config['epoch_num'] // 10) == 0:
            print('current epoch: ', epoch)
        total_loss = 0
        minibatches_idx = get_minibatches_idx(
            len(trainloader),
            minibatch_size=config['simple_train_batch_size'],
            shuffle=True)
        for minibatch in minibatches_idx:
            inputs = torch.Tensor(
                np.array([
                    list(trainloader[x][0].cpu().numpy()) for x in minibatch
                ]))
            targets = torch.Tensor(
                np.array([
                    list(trainloader[x][1].cpu().numpy()) for x in minibatch
                ]))
            inputs, targets = Variable(inputs.cuda()).squeeze(1), Variable(
                targets.float().cuda()).squeeze(1)
            # print(inputs.size())
            optimizer.zero_grad()
            outputs = model(inputs)
            # print(outputs.size())
            loss = loss_function(outputs, inputs)
            total_loss += loss
            loss.backward()
            optimizer.step()
        if epoch % (config['epoch_num'] // 10) == 0:
            pic = to_img(outputs.cpu().data)
            save_image(
                pic,
                '/path/to/experiments/dir/figures/autoencoders/image_{}.png'.
                format(epoch))
            print('loss', total_loss)
Esempio n. 20
0
    def evaluate(self, data, params):

        kf = utils.get_minibatches_idx(len(data), 100, shuffle=False)

        preds = []
        for _, train_index in kf:
            batch = [data[t] for t in train_index]
            vocab = self.get_word_arr(batch)
            x, xmask = self.prepare_data(self.populate_embeddings_words(batch, vocab))
            idxs = self.get_idxs(xmask)

            if params.nntype == "charlstm" or params.nntype == "charcnn":
                char_indices = self.populate_embeddings_characters(vocab)
            if params.nntype == "charagram":
                char_hash = self.populate_embeddings_characters_charagram(vocab)

            if params.nntype == "charlstm":
                c, cmask = self.prepare_data(char_indices)
            if params.nntype == "charcnn":
                c = self.prepare_data_conv(char_indices)

            if params.nntype == "charlstm":
                temp = self.scoring_function(c, cmask, x, xmask, idxs)
            if params.nntype == "charcnn":
                temp = self.scoring_function(c, x, xmask, idxs)
            if params.nntype == "charagram":
                temp = self.scoring_function(char_hash, x, xmask, idxs)
            preds.extend(temp)

        ys = []
        for i in data:
            for j in i[1]:
                y = self.tags[j]
                ys.append(y)

        return accuracy_score(ys, preds)
Esempio n. 21
0
def simple_test_batch(testloader, model, config):
    model.eval()
    total = 0.0
    correct = 0.0
    minibatches_idx = get_minibatches_idx(
        len(testloader),
        minibatch_size=config['simple_test_batch_size'],
        shuffle=False)
    y_true = []
    y_pred = []
    for minibatch in minibatches_idx:
        inputs = torch.Tensor(
            np.array([list(testloader[x][0].cpu().numpy())
                      for x in minibatch]))
        targets = torch.Tensor(
            np.array([list(testloader[x][1].cpu().numpy())
                      for x in minibatch]))
        inputs, targets = Variable(inputs.cuda()).squeeze(1), Variable(
            targets.cuda()).squeeze()
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.long()).sum().item()
        y_true.extend(targets.cpu().data.numpy().tolist())
        y_pred.extend(predicted.cpu().data.numpy().tolist())
    test_accuracy = correct / total
    test_confusion_matrix = confusion_matrix(y_true, y_pred)
    t1 = config['t1']
    big_class_acc = np.sum([test_confusion_matrix[i, i] for i in range(t1)
                            ]) / np.sum(test_confusion_matrix[:t1])
    if t1 == 10:
        small_class_acc = None
    else:
        small_class_acc = \
            np.sum([test_confusion_matrix[i, i] for i in range(10)[t1:]]) / np.sum(test_confusion_matrix[t1:])
    return test_accuracy, big_class_acc, small_class_acc, test_confusion_matrix
Esempio n. 22
0
def main():
    # global n_words
    # Prepare training and testing data
    loadpath = "./data/yahoo.p"
    x = cPickle.load(open(loadpath, "rb"))
    train, val, test = x[0], x[1], x[2]
    train_lab, val_lab, test_lab = x[3], x[4], x[5]
    wordtoix, ixtoword = x[6], x[7]

    train_lab = np.array(train_lab, dtype='float32')
    val_lab = np.array(val_lab, dtype='float32')
    test_lab = np.array(test_lab, dtype='float32')

    opt = Options()
    opt.n_words = len(ixtoword)

    del x

    print(dict(opt))
    print('Total words: %d' % opt.n_words)

    if opt.part_data:
        np.random.seed(123)
        train_ind = np.random.choice(len(train),
                                     int(len(train) * opt.portion),
                                     replace=False)
        train = [train[t] for t in train_ind]
        train_lab = [train_lab[t] for t in train_ind]

    try:
        params = np.load('./param_g.npz')
        if params['Wemb'].shape == (opt.n_words, opt.embed_size):
            print('Use saved embedding.')
            opt.W_emb = params['Wemb']
        else:
            print('Emb Dimension mismatch: param_g.npz:' +
                  str(params['Wemb'].shape) + ' opt: ' +
                  str((opt.n_words, opt.embed_size)))
            opt.fix_emb = False
    except IOError:
        print('No embedding file found.')
        opt.fix_emb = False

    with tf.device('/gpu:1'):
        x_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.maxlen])
        x_mask_ = tf.placeholder(tf.float32,
                                 shape=[opt.batch_size, opt.maxlen])
        keep_prob = tf.placeholder(tf.float32)
        y_ = tf.placeholder(tf.float32, shape=[opt.batch_size, 10])
        accuracy_, loss_, train_op, W_emb_ = emb_classifier(
            x_, x_mask_, y_, keep_prob, opt)
        # merged = tf.summary.merge_all()

    uidx = 0
    max_val_accuracy = 0.
    max_test_accuracy = 0.
    # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1)
    config = tf.ConfigProto(log_device_placement=False,
                            allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    np.set_printoptions(precision=3)
    np.set_printoptions(threshold=np.inf)
    saver = tf.train.Saver()

    with tf.Session(config=config) as sess:
        train_writer = tf.summary.FileWriter(opt.log_path + '/train',
                                             sess.graph)
        test_writer = tf.summary.FileWriter(opt.log_path + '/test', sess.graph)
        sess.run(tf.global_variables_initializer())
        if opt.restore:
            try:

                t_vars = tf.trainable_variables()
                # print([var.name[:-2] for var in t_vars])
                save_keys = tensors_key_in_file(opt.save_path)
                # print(save_keys.keys())
                ss = set([var.name for var in t_vars]) & set(
                    [s + ":0" for s in save_keys.keys()])
                cc = {var.name: var for var in t_vars}
                # only restore variables with correct shape
                ss_right_shape = set(
                    [s for s in ss if cc[s].get_shape() == save_keys[s[:-2]]])

                loader = tf.train.Saver(var_list=[
                    var for var in t_vars if var.name in ss_right_shape
                ])
                loader.restore(sess, opt.save_path)

                print("Loading variables from '%s'." % opt.save_path)
                print("Loaded variables:" + str(ss))

            except:
                print("No saving session, using random initialization")
                sess.run(tf.global_variables_initializer())

        try:
            for epoch in range(opt.max_epochs):
                print("Starting epoch %d" % epoch)
                kf = get_minibatches_idx(len(train),
                                         opt.batch_size,
                                         shuffle=True)
                for _, train_index in kf:
                    uidx += 1
                    sents = [train[t] for t in train_index]
                    x_labels = [train_lab[t] for t in train_index]
                    x_labels = np.array(x_labels)
                    x_labels = x_labels.reshape((len(x_labels), 10))

                    x_batch, x_batch_mask = prepare_data_for_emb(sents, opt)

                    _, loss = sess.run(
                        [train_op, loss_],
                        feed_dict={
                            x_: x_batch,
                            x_mask_: x_batch_mask,
                            y_: x_labels,
                            keep_prob: opt.drop_rate
                        })

                    if uidx % opt.valid_freq == 0:
                        train_correct = 0.0
                        kf_train = get_minibatches_idx(500,
                                                       opt.batch_size,
                                                       shuffle=True)
                        for _, train_index in kf_train:
                            train_sents = [train[t] for t in train_index]
                            train_labels = [train_lab[t] for t in train_index]
                            train_labels = np.array(train_labels)
                            train_labels = train_labels.reshape(
                                (len(train_labels), 10))
                            x_train_batch, x_train_batch_mask = prepare_data_for_emb(
                                train_sents, opt)  # Batch L

                            train_accuracy = sess.run(accuracy_,
                                                      feed_dict={
                                                          x_: x_train_batch,
                                                          x_mask_:
                                                          x_train_batch_mask,
                                                          y_: train_labels,
                                                          keep_prob: 1.0
                                                      })

                            train_correct += train_accuracy * len(train_index)

                        train_accuracy = train_correct / 500

                        print("Iteration %d: Training loss %f " % (uidx, loss))
                        print("Train accuracy %f " % train_accuracy)

                        val_correct = 0.0
                        kf_val = get_minibatches_idx(20000,
                                                     opt.batch_size,
                                                     shuffle=True)
                        for _, val_index in kf_val:
                            val_sents = [val[t] for t in val_index]
                            val_labels = [val_lab[t] for t in val_index]
                            val_labels = np.array(val_labels)
                            val_labels = val_labels.reshape(
                                (len(val_labels), 10))
                            x_val_batch, x_val_batch_mask = prepare_data_for_emb(
                                val_sents, opt)

                            val_accuracy = sess.run(accuracy_,
                                                    feed_dict={
                                                        x_: x_val_batch,
                                                        x_mask_:
                                                        x_val_batch_mask,
                                                        y_: val_labels,
                                                        keep_prob: 1.0
                                                    })

                            val_correct += val_accuracy * len(val_index)

                        val_accuracy = val_correct / 20000
                        print("Validation accuracy %f " % val_accuracy)

                        if val_accuracy > max_val_accuracy:
                            max_val_accuracy = val_accuracy

                            test_correct = 0.0
                            kf_test = get_minibatches_idx(len(test),
                                                          opt.batch_size,
                                                          shuffle=True)
                            for _, test_index in kf_test:
                                test_sents = [test[t] for t in test_index]
                                test_labels = [test_lab[t] for t in test_index]
                                test_labels = np.array(test_labels)
                                test_labels = test_labels.reshape(
                                    (len(test_labels), 10))
                                x_test_batch, x_test_batch_mask = prepare_data_for_emb(
                                    test_sents, opt)

                                test_accuracy = sess.run(accuracy_,
                                                         feed_dict={
                                                             x_: x_test_batch,
                                                             x_mask_:
                                                             x_test_batch_mask,
                                                             y_: test_labels,
                                                             keep_prob: 1.0
                                                         })

                                test_correct += test_accuracy * len(test_index)

                            test_accuracy = test_correct / len(test)

                            print("Test accuracy %f " % test_accuracy)

                            max_test_accuracy = test_accuracy

                print("Epoch %d: Max Test accuracy %f" %
                      (epoch, max_test_accuracy))

                emb = sess.run(W_emb_, feed_dict={x_: x_test_batch})

                cPickle.dump([emb], open("yahoo_emb_max_300.p", "wb"))

            print("Max Test accuracy %f " % max_test_accuracy)

        except KeyboardInterrupt:
            # print 'Training interupted'
            print('Training interupted')
            print("Max Test accuracy %f " % max_test_accuracy)
Esempio n. 23
0
def run_model(opt, train, val, ixtoword):


    try:
        params = np.load('./param_g.npz')
        if params['Wemb'].shape == (opt.n_words, opt.embed_size):
            print('Use saved embedding.')
            opt.W_emb = params['Wemb']
        else:
            print('Emb Dimension mismatch: param_g.npz:'+ str(params['Wemb'].shape) + ' opt: ' + str((opt.n_words, opt.embed_size)))
            opt.fix_emb = False
    except IOError:
        print('No embedding file found.')
        opt.fix_emb = False

    with tf.device('/gpu:0'):
        x_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.sent_len])
        x_org_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.sent_len])
        is_train_ = tf.placeholder(tf.bool, name='is_train_')
        res_, g_loss_, d_loss_, gen_op, dis_op = textGAN(x_, opt)
        merged = tf.summary.merge_all()
        # opt.is_train = False
        # res_val_, loss_val_, _ = auto_encoder(x_, x_org_, opt)
        # merged_val = tf.summary.merge_all()

    #tensorboard --logdir=run1:/tmp/tensorflow/ --port 6006
    #writer = tf.train.SummaryWriter(opt.log_path, graph=tf.get_default_graph())


    uidx = 0
    config = tf.ConfigProto(log_device_placement = False, allow_soft_placement=True,
                            graph_options=tf.GraphOptions(build_cost_model=1))
    #config = tf.ConfigProto(device_count={'GPU':0})
    config.gpu_options.allow_growth = True
    np.set_printoptions(precision=3)
    np.set_printoptions(threshold=np.inf)
    saver = tf.train.Saver()



    run_metadata = tf.RunMetadata()


    with tf.Session(config = config) as sess:
        train_writer = tf.summary.FileWriter(opt.log_path + '/train', sess.graph)
        test_writer = tf.summary.FileWriter(opt.log_path + '/test', sess.graph)
        sess.run(tf.global_variables_initializer())
        if opt.restore:
            try:
                #pdb.set_trace()

                t_vars = tf.trainable_variables()
                #print([var.name[:-2] for var in t_vars])
                loader = restore_from_save(t_vars, sess, opt)


            except Exception as e:
                print(e)
                print("No saving session, using random initialization")
                sess.run(tf.global_variables_initializer())

        for epoch in range(opt.max_epochs):
            print("\nStarting epoch %d\n" % epoch)
            # if epoch >= 10:
            #     print("Relax embedding ")
            #     opt.fix_emb = False
            #     opt.batch_size = 2
            kf = get_minibatches_idx(len(train), opt.batch_size, shuffle=True)
            for _, train_index in kf:
                print "\rIter: %d" % uidx,
                uidx += 1
                sents = [train[t] for t in train_index]

                sents_permutated = add_noise(sents, opt)

                #sents[0] = np.random.permutation(sents[0])
                x_batch = prepare_data_for_cnn(sents_permutated, opt) # Batch L
                if x_batch.shape[0] == opt.batch_size:
                    d_loss = 0
                    g_loss = 0
                    if profile:
                        if uidx % opt.dis_steps == 0:
                            _, d_loss = sess.run([dis_op, d_loss_], feed_dict={x_: x_batch},options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),run_metadata=run_metadata)
                        if uidx % opt.gen_steps == 0:
                            _, g_loss = sess.run([gen_op, g_loss_], feed_dict={x_: x_batch},options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),run_metadata=run_metadata)
                    else:
                        if uidx % opt.dis_steps == 0:
                            _, d_loss = sess.run([dis_op, d_loss_], feed_dict={x_: x_batch})
                        if uidx % opt.gen_steps == 0:
                            _, g_loss = sess.run([gen_op, g_loss_], feed_dict={x_: x_batch})





                if uidx % opt.valid_freq == 0:
                    is_train = True
                    # print('Valid Size:', len(val))
                    valid_index = np.random.choice(len(val), opt.batch_size)

                    val_sents = [val[t] for t in valid_index]

                    val_sents_permutated = add_noise(val_sents, opt)

                    x_val_batch = prepare_data_for_cnn(val_sents_permutated, opt)

                    d_loss_val = sess.run(d_loss_, feed_dict={x_: x_val_batch})
                    g_loss_val = sess.run(g_loss_, feed_dict={x_: x_val_batch})


                    res = sess.run(res_, feed_dict={x_: x_val_batch})
                    print("Validation d_loss %f, g_loss %f  mean_dist %f" % (d_loss_val, g_loss_val, res['mean_dist']))
                    print "Sent:" + u' '.join([ixtoword[x] for x in res['syn_sent'][0] if x != 0]).encode('utf-8').strip()
                    print("MMD loss %f, GAN loss %f" % (res['mmd'], res['gan']))
                    np.savetxt('./text/rec_val_words.txt', res['syn_sent'], fmt='%i', delimiter=' ')
                    if opt.discrimination:
                        print ("Real Prob %f Fake Prob %f" % (res['prob_r'], res['prob_f']))


                    val_set = [prepare_for_bleu(s) for s in val_sents]
                    [bleu2s, bleu3s, bleu4s] = cal_BLEU([prepare_for_bleu(s) for s in res['syn_sent']], {0: val_set})
                    print 'Val BLEU (2,3,4): ' + ' '.join([str(round(it, 3)) for it in (bleu2s, bleu3s, bleu4s)])

                    summary = sess.run(merged, feed_dict={x_: x_val_batch})
                    test_writer.add_summary(summary, uidx)





                if uidx % opt.print_freq == 0:
                    #pdb.set_trace()
                    res = sess.run(res_, feed_dict={x_: x_batch})
                    median_dis = np.sqrt(np.median([((x-y)**2).sum() for x in res['real_f'] for y in res['real_f']]))
                    print("Iteration %d: d_loss %f, g_loss %f, mean_dist %f, realdist median %f" % (uidx, d_loss, g_loss, res['mean_dist'], median_dis))
                    np.savetxt('./text/rec_train_words.txt', res['syn_sent'], fmt='%i', delimiter=' ')
                    print "Sent:" + u' '.join([ixtoword[x] for x in res['syn_sent'][0] if x != 0]).encode('utf-8').strip()

                    summary = sess.run(merged, feed_dict={x_: x_batch})
                    train_writer.add_summary(summary, uidx)
                    # print res['x_rec'][0][0]
                    # print res['x_emb'][0][0]
                    if profile:
                        tf.contrib.tfprof.model_analyzer.print_model_analysis(
                        tf.get_default_graph(),
                        run_meta=run_metadata,
                        tfprof_options=tf.contrib.tfprof.model_analyzer.PRINT_ALL_TIMING_MEMORY)

            saver.save(sess, opt.save_path, global_step=epoch)
Esempio n. 24
0
def run_model(opt, train, val, ixtoword):

    try:
        params = np.load('./param_g.npz')
        if params['Wemb'].shape == (opt.n_words, opt.embed_size):
            print('Use saved embedding.')
            opt.W_emb = params['Wemb']
        else:
            print('Emb Dimension mismatch: param_g.npz:' +
                  str(params['Wemb'].shape) + ' opt: ' +
                  str((opt.n_words, opt.embed_size)))
            opt.fix_emb = False
    except IOError:
        print('No embedding file found.')
        opt.fix_emb = False

    with tf.device('/gpu:1'):
        x_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.sent_len])
        x_org_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.sent_len])
        # is_train_ = tf.placeholder(tf.bool, name='is_train_')
        res_, g_loss_, d_loss_, gen_op, dis_op = textGAN(x_, x_org_, opt)
        merged = tf.summary.merge_all()
        # opt.is_train = False
        # res_val_, loss_val_, _ = auto_encoder(x_, x_org_, opt)
        # merged_val = tf.summary.merge_all()

    #tensorboard --logdir=run1:/tmp/tensorflow/ --port 6006
    #writer = tf.train.SummaryWriter(opt.log_path, graph=tf.get_default_graph())

    uidx = 0
    config = tf.ConfigProto(log_device_placement=False,
                            allow_soft_placement=True,
                            graph_options=tf.GraphOptions(build_cost_model=1))
    #config = tf.ConfigProto(device_count={'GPU':0})
    config.gpu_options.per_process_gpu_memory_fraction = 0.8
    np.set_printoptions(precision=3)
    np.set_printoptions(threshold=np.inf)
    saver = tf.train.Saver()

    run_metadata = tf.RunMetadata()

    with tf.Session(config=config) as sess:
        train_writer = tf.summary.FileWriter(opt.log_path + '/train',
                                             sess.graph)
        test_writer = tf.summary.FileWriter(opt.log_path + '/test', sess.graph)
        sess.run(tf.global_variables_initializer())
        if opt.restore:
            try:
                #pdb.set_trace()

                t_vars = tf.trainable_variables()
                #print([var.name[:-2] for var in t_vars])
                loader = restore_from_save(t_vars, sess, opt)
                print('\nload successfully\n')

            except Exception as e:
                print(e)
                print("No saving session, using random initialization")
                sess.run(tf.global_variables_initializer())

        # for i in range(34):
        #     valid_index = np.random.choice(
        #         len(val), opt.batch_size)
        #     val_sents = [val[t] for t in valid_index]
        #     val_sents_permutated = add_noise(val_sents, opt)
        #     x_val_batch = prepare_data_for_cnn(
        #         val_sents_permutated, opt)
        #     x_val_batch_org = prepare_data_for_rnn(val_sents, opt)
        #     res = sess.run(res_, feed_dict={
        #                     x_: x_val_batch, x_org_: x_val_batch_org})
        #     if i == 0:
        #         valid_text = res['syn_sent']
        #     else:
        #         valid_text = np.concatenate(
        #             (valid_text, res['syn_sent']), 0)

        # np.savetxt('./text_news/vae_words.txt', valid_text, fmt='%i', delimiter=' ')
        # pdb.set_trace()

        for epoch in range(opt.max_epochs):
            print("Starting epoch %d" % epoch)
            # if epoch >= 10:
            #     print("Relax embedding ")
            #     opt.fix_emb = False
            #     opt.batch_size = 2
            kf = get_minibatches_idx(len(train), opt.batch_size, shuffle=True)
            for _, train_index in kf:
                uidx += 1
                sents = [train[t] for t in train_index]

                sents_permutated = add_noise(sents, opt)

                #sents[0] = np.random.permutation(sents[0])
                x_batch = prepare_data_for_cnn(sents_permutated,
                                               opt)  # Batch L
                x_batch_org = prepare_data_for_rnn(sents, opt)
                d_loss = 0
                g_loss = 0
                if profile:
                    if uidx % opt.dis_steps == 0:
                        _, d_loss = sess.run(
                            [dis_op, d_loss_],
                            feed_dict={
                                x_: x_batch,
                                x_org_: x_batch_org
                            },
                            options=tf.RunOptions(
                                trace_level=tf.RunOptions.FULL_TRACE),
                            run_metadata=run_metadata)
                    if uidx % opt.gen_steps == 0:
                        _, g_loss = sess.run(
                            [gen_op, g_loss_],
                            feed_dict={
                                x_: x_batch,
                                x_org_: x_batch_org
                            },
                            options=tf.RunOptions(
                                trace_level=tf.RunOptions.FULL_TRACE),
                            run_metadata=run_metadata)
                else:
                    if uidx % opt.dis_steps == 0:
                        _, d_loss = sess.run([dis_op, d_loss_],
                                             feed_dict={
                                                 x_: x_batch,
                                                 x_org_: x_batch_org
                                             })
                    if uidx % opt.gen_steps == 0:
                        _, g_loss = sess.run([gen_op, g_loss_],
                                             feed_dict={
                                                 x_: x_batch,
                                                 x_org_: x_batch_org
                                             })
                ''' validation '''
                if uidx % opt.valid_freq == 0:

                    valid_index = np.random.choice(len(val), opt.batch_size)
                    val_sents = [val[t] for t in valid_index]

                    val_sents_permutated = add_noise(val_sents, opt)

                    x_val_batch = prepare_data_for_cnn(val_sents_permutated,
                                                       opt)
                    x_val_batch_org = prepare_data_for_rnn(val_sents, opt)

                    d_loss_val = sess.run(d_loss_,
                                          feed_dict={
                                              x_: x_val_batch,
                                              x_org_: x_val_batch_org
                                          })
                    g_loss_val = sess.run(g_loss_,
                                          feed_dict={
                                              x_: x_val_batch,
                                              x_org_: x_val_batch_org
                                          })

                    res = sess.run(res_,
                                   feed_dict={
                                       x_: x_val_batch,
                                       x_org_: x_val_batch_org
                                   })
                    print("Validation d_loss %f, g_loss %f  mean_dist %f" %
                          (d_loss_val, g_loss_val, res['mean_dist']))
                    print("Sent:" + u' '.join([
                        ixtoword[x] for x in res['syn_sent'][0] if x != 0
                    ]))  #.encode('utf-8', 'ignore').decode("utf8").strip())
                    print("MMD loss %f, GAN loss %f" %
                          (res['mmd'], res['gan']))
                    # np.savetxt('./text_arxiv/syn_val_words.txt', res['syn_sent'], fmt='%i', delimiter=' ')
                    if opt.discrimination:
                        print("Real Prob %f Fake Prob %f" %
                              (res['prob_r'], res['prob_f']))

                    for i in range(4):
                        valid_index = np.random.choice(len(val),
                                                       opt.batch_size)
                        val_sents = [val[t] for t in valid_index]
                        val_sents_permutated = add_noise(val_sents, opt)
                        x_val_batch = prepare_data_for_cnn(
                            val_sents_permutated, opt)
                        x_val_batch_org = prepare_data_for_rnn(val_sents, opt)
                        res = sess.run(res_,
                                       feed_dict={
                                           x_: x_val_batch,
                                           x_org_: x_val_batch_org
                                       })
                        if i == 0:
                            valid_text = res['syn_sent']
                        else:
                            valid_text = np.concatenate(
                                (valid_text, res['syn_sent']), 0)

                    np.savetxt('./text_news/syn_val_words.txt',
                               valid_text,
                               fmt='%i',
                               delimiter=' ')

                    val_set = [prepare_for_bleu(s) for s in val_sents]
                    [bleu2s, bleu3s, bleu4s] = cal_BLEU(
                        [prepare_for_bleu(s) for s in res['syn_sent']],
                        {0: val_set})
                    print('Val BLEU (2,3,4): ' + ' '.join(
                        [str(round(it, 3))
                         for it in (bleu2s, bleu3s, bleu4s)]))

                    summary = sess.run(merged,
                                       feed_dict={
                                           x_: x_val_batch,
                                           x_org_: x_val_batch_org
                                       })
                    test_writer.add_summary(summary, uidx)
Esempio n. 25
0
def main(opt):
    # global n_words
    # Prepare training and testing data
    
    
    data_path = opt.data_dir + "/" + opt.data_name
    print('loading '+data_path)
    x = cPickle.load(open(data_path, "rb"))
    train, val, test = x[0], x[1], x[2]
    wordtoix, ixtoword = x[3], x[4]


    opt.n_words = len(ixtoword) 
    print datetime.datetime.now().strftime("%I:%M%p on %B %d, %Y")
    print dict(opt)
    print('Total words: %d' % opt.n_words)

    with tf.device('/gpu:1'):
        x_1_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.sent_len])
        x_2_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.sent_len])
        y_ = tf.placeholder(tf.float32, shape=[opt.batch_size,])
        l_temp_ = tf.placeholder(tf.float32, shape=[])
        res_, loss_ ,train_op = cons_disc(x_1_, x_2_, y_, opt, l_temp_)
        merged = tf.summary.merge_all()

    

    uidx = 0
    
    config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.95
    
    np.set_printoptions(precision=3)
    np.set_printoptions(threshold=np.inf)
    saver = tf.train.Saver()

    with tf.Session(config=config) as sess:
        train_writer = tf.summary.FileWriter(opt.log_path + '/train', sess.graph)
        test_writer = tf.summary.FileWriter(opt.log_path + '/test', sess.graph)
        sess.run(tf.global_variables_initializer()) # feed_dict={x_: np.zeros([opt.batch_size, opt.sent_len]), x_org_: np.zeros([opt.batch_size, opt.sent_len])}

        if opt.restore:
            print('-'*20)
            print("Loading variables from '%s'." % opt.load_path)
            try:
                #pdb.set_trace()
                t_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) #tf.trainable_variables()
                #print([var.name[:-2] for var in t_vars]              
                save_keys = tensors_key_in_file(opt.load_path)
                ss = [var for var in t_vars if var.name[:-2] in save_keys.keys()]
                ss = [var.name for var in ss if var.get_shape() == save_keys[var.name[:-2]]]
                loader = tf.train.Saver(var_list= [var for var in t_vars if var.name in ss])
                loader.restore(sess, opt.load_path)
                print("Loaded variables:"+str(ss))
                print('-'*20)

            except Exception as e:
                print 'Error: '+str(e)
                exit()
                print("No saving session, using random initialization")
                sess.run(tf.global_variables_initializer())

        # train
        # if don't want to train, set max_epochs=0

        for epoch in range(opt.max_epochs):
            print("Starting epoch %d" % epoch)
            opt.l_temp = min(opt.l_temp * opt.l_temp_factor, opt.l_temp_max)
            print("Annealing temperature " + str(opt.l_temp))
            kf = get_minibatches_idx(len(train), opt.batch_size, shuffle=True)
            for _, train_index in kf:
                uidx += 1
                sents = [train[t] for t in train_index]
                indice = [rand_pair(opt.task, opt.data_name) for _ in range(opt.batch_size)]
                if opt.task == 'L':

                    x_1 = [sents[i][idx[0]] for i, idx in enumerate(indice)]
                    x_2 = [sents[i][idx[1]] for i, idx in enumerate(indice)]
                    y_batch = [(i1-i2)%2 == 0 for i1,i2 in indice]
                elif opt.task == 'C':

                    batch_indice = np.concatenate([np.random.permutation(opt.batch_size/2) , range(opt.batch_size/2, opt.batch_size)]) 
                    y_batch = (range(opt.batch_size) == batch_indice)

                    rn = np.random.choice(7,size = opt.batch_size)
 
                    x_1 = [sents[i][idx[0]] for i, idx in enumerate(indice)]
                    x_2 = [sents[batch_indice[i]][idx[1]] for i, idx in enumerate(indice)]
                else: # G
                    batch_indice = np.concatenate([np.random.permutation(opt.batch_size/2) , range(opt.batch_size/2, opt.batch_size)]) 
                    y_batch = (range(opt.batch_size) == batch_indice)
              
                    x_1 = [sents[i][idx[0]] for i, idx in enumerate(indice)]
                    x_2 = [sents[batch_indice[i]][idx[1]] for i, idx in enumerate(indice)]
                x_1_batch = prepare_data_for_cnn(x_1, opt)  # Batch L
                x_2_batch = prepare_data_for_cnn(x_2, opt)  # Batch L

                feed = {x_1_: x_1_batch, x_2_: x_2_batch, y_:np.float32(y_batch),l_temp_:opt.l_temp}
                _, loss = sess.run([train_op, loss_], feed_dict=feed)



                if uidx % opt.print_freq == 1:
                    print("Iteration %d: loss %f " % (uidx, loss))
                    res = sess.run(res_, feed_dict=feed)
                    if opt.verbose:
                        print("logits:" + str(res['logits']))
                        print("H1:" + str(res['H_1'][0]))
                        print("H2:" + str(res['H_2'][0]))
                    # print("H2:" + str(res['H_1'][0]*res['H_2'][0]-0.5))
                    acc = sum(np.equal(res['y_pred'],y_batch))/np.float(opt.batch_size)
                    print("Accuracy: %f" % (acc))
                    print("y_mean: %f" % (np.mean(y_batch)))
                    print("corr:" + str(res['corr']))

                    sys.stdout.flush()
                    summary = sess.run(merged, feed_dict=feed)
                    train_writer.add_summary(summary, uidx)

                if uidx % opt.valid_freq == 1:
                    acc, loss_val, y_mean, corr = 0, 0, 0, 0
                    indice = [rand_pair(opt.task, opt.data_name) for _ in range(opt.batch_size)]
                    for i in range(100):
                        valid_index = np.random.choice(len(test), opt.batch_size)
                        sents = [test[t] for t in valid_index]
                        if opt.task == 'L':
                           
                            x_1 = [sents[i][idx[0]] for i, idx in enumerate(indice)]
                            x_2 = [sents[i][idx[1]] for i, idx in enumerate(indice)]
                            y_batch = [(i1-i2)%2 == 0 for i1,i2 in indice]
                        elif opt.task == 'C':
     
                            batch_indice = np.concatenate([np.random.permutation(opt.batch_size/2) , range(opt.batch_size/2, opt.batch_size)]) 
                            y_batch = (range(opt.batch_size) == batch_indice)
                     
                            rn = np.random.choice(7,size = opt.batch_size)
                        
                            x_1 = [sents[i][idx[0]] for i, idx in enumerate(indice)]
                            x_2 = [sents[batch_indice[i]][idx[1]] for i, idx in enumerate(indice)]
                        else: # G
                            batch_indice = np.concatenate([np.random.permutation(opt.batch_size/2) , range(opt.batch_size/2, opt.batch_size)]) 
                            y_batch = (range(opt.batch_size) == batch_indice)
                            x_1 = [sents[i][idx[0]] for i, idx in enumerate(indice)]
                            x_2 = [sents[batch_indice[i]][idx[1]] for i, idx in enumerate(indice)]

                        x_1_batch = prepare_data_for_cnn(x_1, opt)  # Batch L
                        x_2_batch = prepare_data_for_cnn(x_2, opt)  # Batch L

                        feed = {x_1_: x_1_batch, x_2_: x_2_batch, y_:np.float32(y_batch),l_temp_:opt.l_temp}
                        loss_val += sess.run(loss_, feed_dict=feed)
                        res = sess.run(res_, feed_dict=feed)
                        acc += sum(np.equal(res['y_pred'],y_batch))/np.float(opt.batch_size)
                        y_mean += np.mean(y_batch)
                        corr += res['corr']

                    loss_val = loss_val / 100.0
                    acc = acc / 100.0
                    y_mean = y_mean / 100.0
                    corr = corr / 100.0
                    print("Validation loss %.4f " % (loss_val))
                    print("Validation accuracy: %.4f" % (acc))
                    print("Validation y_mean: %.4f" % (y_mean))
                    print("Validation corr: %.4f" % (corr))
                    print("")
                    sys.stdout.flush()
                    
                    summary = sess.run(merged, feed_dict=feed)
                    test_writer.add_summary(summary, uidx)

            saver.save(sess, opt.save_path, global_step=epoch)


        # test

        if opt.test:
            print('Testing....')
            iter_num = np.int(np.floor(len(test)/opt.batch_size))+1
            for i in range(iter_num):
                if i%100 == 0:
                    print('Iter %i/%i'%(i, iter_num))
                test_index = range(i*opt.batch_size, (i+1)*opt.batch_size)
                test_sents = [test[t%len(test)] for t in test_index]
                indice = [(0,1),(2,3),(4,5),(6,7)]
                for idx in indice:
                    x_1 = [test_sents[i][idx[0]] for i in range(opt.batch_size)]
                    x_2 = [test_sents[i][idx[1]] for i in range(opt.batch_size)]
                    y_batch = [True for i in range(opt.batch_size)]
                    x_1_batch = prepare_data_for_cnn(x_1, opt)  # Batch L
                    x_2_batch = prepare_data_for_cnn(x_2, opt)  # Batch L

                    feed = {x_1_: x_1_batch, x_2_: x_2_batch, y_:np.float32(y_batch), l_temp_:opt.l_temp}
                    res = sess.run(res_, feed_dict=feed)
                    for d in range(opt.batch_size):
                        with open(opt.log_path + '.feature.txt', "a") as myfile:
                            myfile.write(str(test_index[d]) + "\t" + str(idx[0]) + "\t" + " ".join([ixtoword[x] for x in x_1_batch[d] if x != 0]) + "\t" + " ".join(map(str,res['H_1'][d]))+ "\n")
                            myfile.write(str(test_index[d]) + "\t" + str(idx[1]) + "\t" + " ".join([ixtoword[x] for x in x_2_batch[d] if x != 0]) + "\t" + " ".join(map(str,res['H_2'][d]))+ "\n")
    def run_epoch(sess,
                  epoch,
                  mode,
                  print_freq=-1,
                  display_sent=-1,
                  train_writer=None):
        fetches_ = {'loss': loss_}
        if mode == 'train':
            x, is_train = train, 1
            fetches_['train_op'] = train_op_
            fetches_['summary'] = merged
        elif mode == 'val':
            assert (print_freq == -1)
            x, is_train = val, None
        elif mode == 'test':
            assert (print_freq == -1)
            x, is_train = test, None

        acc_loss, acc_n = 0.0, 0.0
        local_t = 0
        global_t = epoch * epoch_t  # only used in train mode
        start_time = time.time()
        kf = get_minibatches_idx(len(x), opt.batch_size, shuffle=True)

        for _, index in kf:
            local_t += 1
            global_t += 1

            sents_b = [x[i] for i in index]
            sents_b_n = add_noise(sents_b, opt)
            x_b_org = prepare_data_for_rnn(sents_b, opt)  # Batch L
            x_b = prepare_data_for_cnn(sents_b_n, opt)  # Batch L
            feed_t = {x_: x_b, x_org_: x_b_org, is_train_: is_train}
            fetches = sess.run(fetches_, feed_dict=feed_t)

            batch_size = len(index)
            acc_n += batch_size
            acc_loss += fetches['loss'] * batch_size
            if print_freq > 0 and local_t % print_freq == 0:
                print("%s Iter %d: loss %.4f, time %.1fs" %
                      (mode, local_t, acc_loss / acc_n,
                       time.time() - start_time))
                sys.stdout.flush()
            if mode == 'train' and train_writer != None:
                train_writer.add_summary(fetches['summary'], global_t)

        if display_sent > 0:
            index_d = np.random.choice(len(x), opt.batch_size, replace=False)
            sents_d = [x[i] for i in index_d]
            sents_d_n = add_noise(sents_d, opt)
            x_d_org = prepare_data_for_rnn(sents_d, opt)  # Batch L
            x_d = prepare_data_for_cnn(sents_d_n, opt)  # Batch L
            res = sess.run(res_,
                           feed_dict={
                               x_: x_d,
                               x_org_: x_d_org,
                               is_train_: is_train
                           })
            for i in range(display_sent):
                print(
                    "%s Org: " % mode + " ".join([
                        ixtoword[ix]
                        for ix in sents_d[i] if ix != 0 and ix != 2
                    ]))
                if mode == 'train':
                    print(
                        "%s Rec(feedy): " % mode + " ".join([
                            ixtoword[ix] for ix in res['rec_sents_feed_y'][i]
                            if ix != 0 and ix != 2
                        ]))
                print(
                    "%s Rec: " % mode + " ".join([
                        ixtoword[ix]
                        for ix in res['rec_sents'][i] if ix != 0 and ix != 2
                    ]))

        print("%s Epoch %d: loss %.4f, time %.1fs" %
              (mode, epoch, acc_loss / acc_n, time.time() - start_time))
        return acc_loss / acc_n
Esempio n. 27
0
def main():
    # Prepare training and testing data
    opt = Options()
    # load data
    loadpath = "./data/mimic3.p"
    embpath = "mimic3_emb.p"
    opt.num_class = 50

    x = cPickle.load(open(loadpath, "rb"))
    train, train_text, train_lab = x[0], x[1], x[2]
    val, val_text, val_lab = x[3], x[4], x[5]
    test, test_text, test_lab = x[6], x[7], x[8]
    wordtoix, ixtoword = x[10], x[9]
    del x
    print("load data finished")

    train_lab = np.array(train_lab, dtype='float32')
    val_lab = np.array(val_lab, dtype='float32')
    test_lab = np.array(test_lab, dtype='float32')
    opt.n_words = len(ixtoword)
    if opt.part_data:
        #np.random.seed(123)
        train_ind = np.random.choice(len(train),
                                     int(len(train) * opt.portion),
                                     replace=False)
        train = [train[t] for t in train_ind]
        train_lab = [train_lab[t] for t in train_ind]

    os.environ['CUDA_VISIBLE_DEVICES'] = str(opt.GPUID)

    print(dict(opt))
    print('Total words: %d' % opt.n_words)

    try:
        opt.W_emb = np.array(cPickle.load(open(embpath, 'rb')),
                             dtype='float32')
        opt.W_class_emb = load_class_embedding(wordtoix, opt)
    except IOError:
        print('No embedding file found.')
        opt.fix_emb = False

    with tf.device('/gpu:1'):
        x_ = tf.placeholder(tf.int32,
                            shape=[opt.batch_size, opt.maxlen],
                            name='x_')
        x_mask_ = tf.placeholder(tf.float32,
                                 shape=[opt.batch_size, opt.maxlen],
                                 name='x_mask_')
        keep_prob = tf.placeholder(tf.float32, name='keep_prob')
        y_ = tf.placeholder(tf.float32,
                            shape=[opt.batch_size, opt.num_class],
                            name='y_')
        class_penalty_ = tf.placeholder(tf.float32, shape=())
        accuracy_, loss_, train_op, W_norm_, global_step, logits_, prob_ = emb_classifier(
            x_, x_mask_, y_, keep_prob, opt, class_penalty_)
    uidx = 0
    max_val_accuracy = 0.
    max_test_accuracy = 0.
    max_val_auc_mean = 0.
    max_test_auc_mean = 0.

    config = tf.ConfigProto(
        log_device_placement=False,
        allow_soft_placement=True,
    )
    config.gpu_options.allow_growth = True
    np.set_printoptions(precision=3)
    np.set_printoptions(threshold=np.inf)
    saver = tf.train.Saver()

    with tf.Session(config=config) as sess:
        train_writer = tf.summary.FileWriter(opt.log_path + '/train',
                                             sess.graph)
        test_writer = tf.summary.FileWriter(opt.log_path + '/test', sess.graph)
        sess.run(tf.global_variables_initializer())
        if opt.restore:
            try:
                t_vars = tf.trainable_variables()
                save_keys = tensors_key_in_file(opt.save_path)
                ss = set([var.name for var in t_vars]) & set(
                    [s + ":0" for s in save_keys.keys()])
                cc = {var.name: var for var in t_vars}
                # only restore variables with correct shape
                ss_right_shape = set(
                    [s for s in ss if cc[s].get_shape() == save_keys[s[:-2]]])

                loader = tf.train.Saver(var_list=[
                    var for var in t_vars if var.name in ss_right_shape
                ])
                loader.restore(sess, opt.save_path)

                print("Loading variables from '%s'." % opt.save_path)
                print("Loaded variables:" + str(ss))

            except:
                print("No saving session, using random initialization")
                sess.run(tf.global_variables_initializer())

        try:
            for epoch in range(opt.max_epochs):
                print("Starting epoch %d" % epoch)
                kf = get_minibatches_idx(len(train),
                                         opt.batch_size,
                                         shuffle=True)
                for _, train_index in kf:
                    uidx += 1
                    sents = [train[t] for t in train_index]
                    x_labels = [train_lab[t] for t in train_index]
                    x_labels = np.array(x_labels)
                    x_labels = x_labels.reshape((len(x_labels), opt.num_class))

                    x_batch, x_batch_mask = prepare_data_for_emb(sents, opt)
                    _, loss, step, = sess.run(
                        [train_op, loss_, global_step],
                        feed_dict={
                            x_: x_batch,
                            x_mask_: x_batch_mask,
                            y_: x_labels,
                            keep_prob: opt.dropout,
                            class_penalty_: opt.class_penalty
                        })

                    if uidx % opt.valid_freq == 0:
                        train_correct = 0.0
                        # sample evaluate accuaccy on 500 sample data
                        kf_train = get_minibatches_idx(500,
                                                       opt.batch_size,
                                                       shuffle=True)
                        for _, train_index in kf_train:
                            train_sents = [train[t] for t in train_index]
                            train_labels = [train_lab[t] for t in train_index]
                            train_labels = np.array(train_labels)
                            train_labels = train_labels.reshape(
                                (len(train_labels), opt.num_class))
                            x_train_batch, x_train_batch_mask = prepare_data_for_emb(
                                train_sents, opt)
                            train_accuracy = sess.run(accuracy_,
                                                      feed_dict={
                                                          x_: x_train_batch,
                                                          x_mask_:
                                                          x_train_batch_mask,
                                                          y_: train_labels,
                                                          keep_prob: 1.0,
                                                          class_penalty_: 0.0
                                                      })

                            train_correct += train_accuracy * len(train_index)

                        train_accuracy = train_correct / 500

                        print("Iteration %d: Training loss %f " % (uidx, loss))
                        print("Train accuracy %f " % train_accuracy)

                        val_correct = 0.0
                        val_y = []
                        val_logits_list = []
                        val_prob_list = []
                        val_true_list = []

                        kf_val = get_minibatches_idx(len(val),
                                                     opt.batch_size,
                                                     shuffle=True)
                        for _, val_index in kf_val:
                            val_sents = [val[t] for t in val_index]
                            val_labels = [val_lab[t] for t in val_index]
                            val_labels = np.array(val_labels)
                            val_labels = val_labels.reshape(
                                (len(val_labels), opt.num_class))
                            x_val_batch, x_val_batch_mask = prepare_data_for_emb(
                                val_sents, opt)
                            val_accuracy, val_logits, val_probs = sess.run(
                                [accuracy_, logits_, prob_],
                                feed_dict={
                                    x_: x_val_batch,
                                    x_mask_: x_val_batch_mask,
                                    y_: val_labels,
                                    keep_prob: 1.0,
                                    class_penalty_: 0.0
                                })

                            val_correct += val_accuracy * len(val_index)
                            val_y += np.argmax(val_labels, axis=1).tolist()
                            val_logits_list += val_logits.tolist()
                            val_prob_list += val_probs.tolist()
                            val_true_list += val_labels.tolist()

                        val_accuracy = val_correct / len(val)
                        val_logits_array = np.asarray(val_logits_list)
                        val_prob_array = np.asarray(val_prob_list)
                        val_true_array = np.asarray(val_true_list)
                        val_auc_list = []
                        val_auc_micro = roc_auc_score(y_true=val_true_array,
                                                      y_score=val_logits_array,
                                                      average='micro')
                        val_auc_macro = roc_auc_score(y_true=val_true_array,
                                                      y_score=val_logits_array,
                                                      average='macro')
                        for i in range(opt.num_class):
                            if np.max(val_true_array[:, i] > 0):
                                val_auc = roc_auc_score(
                                    y_true=val_true_array[:, i],
                                    y_score=val_logits_array[:, i],
                                )
                                val_auc_list.append(val_auc)
                        val_auc_mean = np.mean(val_auc)

                        # print("Validation accuracy %f " % val_accuracy)
                        print("val auc macro %f micro %f " %
                              (val_auc_macro, val_auc_micro))

                        if True:
                            test_correct = 0.0
                            test_y = []
                            test_logits_list = []
                            test_prob_list = []
                            test_true_list = []

                            kf_test = get_minibatches_idx(len(test),
                                                          opt.batch_size,
                                                          shuffle=True)
                            for _, test_index in kf_test:
                                test_sents = [test[t] for t in test_index]
                                test_labels = [test_lab[t] for t in test_index]
                                test_labels = np.array(test_labels)
                                test_labels = test_labels.reshape(
                                    (len(test_labels), opt.num_class))
                                x_test_batch, x_test_batch_mask = prepare_data_for_emb(
                                    test_sents, opt)

                                test_accuracy, test_logits, test_probs = sess.run(
                                    [accuracy_, logits_, prob_],
                                    feed_dict={
                                        x_: x_test_batch,
                                        x_mask_: x_test_batch_mask,
                                        y_: test_labels,
                                        keep_prob: 1.0,
                                        class_penalty_: 0.0
                                    })

                                test_correct += test_accuracy * len(test_index)

                                test_correct += test_accuracy * len(test_index)
                                test_y += np.argmax(test_labels,
                                                    axis=1).tolist()
                                test_logits_list += test_logits.tolist()
                                test_prob_list += test_probs.tolist()
                                test_true_list += test_labels.tolist()
                            test_accuracy = test_correct / len(test)
                            test_logits_array = np.asarray(test_logits_list)
                            test_prob_array = np.asarray(test_prob_list)
                            test_true_array = np.asarray(test_true_list)
                            test_auc_list = []
                            test_auc_micro = roc_auc_score(
                                y_true=test_true_array,
                                y_score=test_logits_array,
                                average='micro')
                            test_auc_macro = roc_auc_score(
                                y_true=test_true_array,
                                y_score=test_logits_array,
                                average='macro')

                            test_f1_micro = micro_f1(
                                test_prob_array.ravel() > 0.5,
                                test_true_array.ravel(),
                            )
                            test_f1_macro = macro_f1(
                                test_prob_array > 0.5,
                                test_true_array,
                            )
                            test_p5 = precision_at_k(test_logits_array,
                                                     test_true_array, 5)

                            for i in range(opt.num_class):
                                if np.max(test_true_array[:, i] > 0):
                                    test_auc = roc_auc_score(
                                        y_true=test_true_array[:, i],
                                        y_score=test_logits_array[:, i],
                                    )
                                    test_auc_list.append(test_auc)

                            test_auc_mean = np.mean(test_auc)
                            print("Test auc macro %f micro %f " %
                                  (test_auc_macro, test_auc_micro))
                            print("Test f1 macro %f micro %f " %
                                  (test_f1_macro, test_f1_micro))
                            print("P5 %f" % test_p5)
                            # max_test_accuracy = test_accuracy
                            max_test_auc_mean = test_auc_mean
                            # print("Test accuracy %f " % test_accuracy)
                            # max_test_accuracy = test_accuracy

                # print("Epoch %d: Max Test accuracy %f" % (epoch, max_test_accuracy))
                print("Epoch %d: Max Test auc %f" % (epoch, max_test_auc_mean))
                saver.save(sess, opt.save_path, global_step=epoch)
            print("Max Test accuracy %f " % max_test_accuracy)

        except KeyboardInterrupt:
            print('Training interupted')
            print("Max Test accuracy %f " % max_test_accuracy)
def train(model, train_triples, valid_triples, test_triples, sr_index, params):
    rng = np.random
    n_entities, n_relations = model.n_entities, model.n_relations

    train_fn = model.train_fn(num_neg=params[NUM_NEG], lrate=params[LEARNING_RATE], marge=params[MARGE])
    ranks_fn = model.ranks_fn()
    scores_fn = model.scores_fn()

    uidx = 1
    best_p = None
    history_valid_hits = []
    history_test_hits = []
    history_epoch_times = []
    bins = [1, 11, 21, 31, 51, 101, 1001, 10001, 20000]
    print("Training on {:d} triples".format(len(train_triples)))
    num_batches = int(math.ceil(len(train_triples) / params[BATCH_SIZE]))
    print("Batch size = {:d}, Number of batches = {:d}".format(params[BATCH_SIZE], num_batches))
    print("The eval is being printed with number of items the bins -> %s" % bins)
    try:
        # We iterate over epochs:
        train_start_time = time.time()
        for epoch in range(params[NUM_EPOCHS]):
            # In each epoch, we do a full pass over the training data:
            epoch_start_time = time.time()
            for _, train_index in utils.get_minibatches_idx(len(train_triples), params[BATCH_SIZE], False):
                # Normalize the entity embeddings
                if params[IS_NORMALIZED]:
                    model.normalize()

                tmb = train_triples[train_index]

                # generating negative examples replacing left entity
                tmbln_list = [rng.randint(0, n_entities, tmb.shape[0]).astype(dtype=tmb.dtype) for i in xrange(params[NUM_NEG])]

                # generating negative examples replacing right entity
                tmbrn_list = [rng.randint(0, n_entities, tmb.shape[0]).astype(dtype=tmb.dtype) for i in xrange(params[NUM_NEG])]

                cost = train_fn(*([tmb] + tmbln_list + tmbrn_list))[0]

                if np.isnan(cost) or np.isinf(cost):
                    print('bad cost detected! Cost is ' + str(cost))
                    return get_best_metric(history_valid_hits)

                if uidx % params[DISP_FREQ] == 0:
                    print('Epoch ', epoch, 'Iter', uidx, 'Cost ', cost)

                if uidx % params[VALID_FREQ] == 0:
                    print('Epoch ', epoch, 'Iter', uidx, 'Cost ', cost)

                    # print("Epoch {} of {} uidx {} took {:.3f}s".format(
                    #     epoch + 1, params[NUM_EPOCHS], uidx, time.time() - start_time))
                    if len(history_epoch_times) > 0:
                        print ("  Average epoch time - {:.3f}s".format(np.mean(history_epoch_times)))
                    print("  Time since start - {:.3f}s".format(time.time() - train_start_time))

                    print("  Train Minibatch Metrics")
                    train_hits10 = get_batch_metrics(tmb, sr_index, scores_fn, False)
                    print('')
                    print("  Validation data Metrics")
                    valid_hits10 = get_batch_metrics(valid_triples, sr_index, scores_fn, True)
                    print('')
                    print("  Test data Metrics")
                    test_hits10 = get_batch_metrics(test_triples, sr_index, scores_fn, True)

                    if (best_p is None) or (len(history_valid_hits) > 0 and valid_hits10 >= np.max(history_valid_hits)):
                        print("found best params yet")
                        best_p = utils.get_params(model)

                    history_valid_hits.append(valid_hits10)
                    history_test_hits.append(test_hits10)

                if uidx % params[SAVE_FREQ] == 0:
                    if best_p is None:
                        all_params = utils.get_params(model)
                    else:
                        all_params = best_p

                    utils.save(params[SAVETO_FILE], all_params)

                uidx += 1

            history_epoch_times.append(time.time() - epoch_start_time)

    except KeyboardInterrupt:
        print("training interrupted")

    return model, get_best_metric(history_valid_hits), train_fn, scores_fn
Esempio n. 29
0
def main():
    loadpath = "./data/snli.p"
    x = cPickle.load(open(loadpath, "rb"))

    train, val, test = x[0], x[1], x[2]
    wordtoix, ixtoword = x[4], x[5]

    train_q, train_a, train_lab = train[0], train[1], train[2]
    val_q, val_a, val_lab = val[0], val[1], val[2]
    test_q, test_a, test_lab = test[0], test[1], test[2]

    train_lab = np.array(train_lab, dtype='float32')
    val_lab = np.array(val_lab, dtype='float32')
    test_lab = np.array(test_lab, dtype='float32')

    opt = Options()
    opt.n_words = len(ixtoword)

    del x

    print(dict(opt))
    print('Total words: %d' % opt.n_words)

    if opt.part_data:
        np.random.seed(123)
        train_ind = np.random.choice(len(train_q), int(len(train_q)*opt.portion), replace=False)
        train_q = [train_q[t] for t in train_ind]
        train_a = [train_a[t] for t in train_ind]
        train_lab = [train_lab[t] for t in train_ind]

    try:
        params = np.load('./data/snli_emb.p')
        if params[0].shape == (opt.n_words, opt.embed_size):
            print('Use saved embedding.')
            #pdb.set_trace()
            opt.W_emb = np.array(params[0], dtype='float32')
        else:
            print('Emb Dimension mismatch: param_g.npz:' + str(params[0].shape) + ' opt: ' + str(
                (opt.n_words, opt.embed_size)))
            opt.fix_emb = False
    except IOError:
        print('No embedding file found.')
        opt.fix_emb = False

    with tf.device('/gpu:1'):
        x_1_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.maxlen])
        x_2_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.maxlen])
        x_mask_1_ = tf.placeholder(tf.float32, shape=[opt.batch_size, opt.maxlen])
        x_mask_2_ = tf.placeholder(tf.float32, shape=[opt.batch_size, opt.maxlen])
        y_ = tf.placeholder(tf.float32, shape=[opt.batch_size, opt.category])
        keep_prob = tf.placeholder(tf.float32)
        accuracy_, loss_, train_op_, W_emb_ = auto_encoder(x_1_, x_2_, x_mask_1_, x_mask_2_, y_, keep_prob, opt)
        merged = tf.summary.merge_all()

    uidx = 0
    max_val_accuracy = 0.
    max_test_accuracy = 0.
    # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1)
    config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    np.set_printoptions(precision=3)
    np.set_printoptions(threshold=np.inf)
    saver = tf.train.Saver()

    with tf.Session(config=config) as sess:
        train_writer = tf.summary.FileWriter(opt.log_path + '/train', sess.graph)
        test_writer = tf.summary.FileWriter(opt.log_path + '/test', sess.graph)
        sess.run(tf.global_variables_initializer())
        if opt.restore:
            try:
                #pdb.set_trace()
                t_vars = tf.trainable_variables()
                # print([var.name[:-2] for var in t_vars])
                save_keys = tensors_key_in_file(opt.save_path)

                # pdb.set_trace()
                # print(save_keys.keys())
                ss = set([var.name for var in t_vars]) & set([s + ":0" for s in save_keys.keys()])
                cc = {var.name: var for var in t_vars}
                #pdb.set_trace()

                # only restore variables with correct shape
                ss_right_shape = set([s for s in ss if cc[s].get_shape() == save_keys[s[:-2]]])

                loader = tf.train.Saver(var_list=[var for var in t_vars if var.name in ss_right_shape])
                loader.restore(sess, opt.save_path)

                print("Loading variables from '%s'." % opt.save_path)
                print("Loaded variables:" + str(ss))

            except:
                print("No saving session, using random initialization")
                sess.run(tf.global_variables_initializer())

        try:
            for epoch in range(opt.max_epochs):
                print("Starting epoch %d" % epoch)
                kf = get_minibatches_idx(len(train_q), opt.batch_size, shuffle=True)
                for _, train_index in kf:

                    uidx += 1
                    sents_1 = [train_q[t] for t in train_index]
                    sents_2 = [train_a[t] for t in train_index]
                    x_labels = [train_lab[t] for t in train_index]
                    x_labels = np.array(x_labels)
                    x_labels = x_labels.reshape((len(x_labels), opt.category))

                    x_batch_1, x_batch_mask_1 = prepare_data_for_emb(sents_1, opt)
                    x_batch_2, x_batch_mask_2 = prepare_data_for_emb(sents_2, opt)

                    _, loss = sess.run([train_op_, loss_], feed_dict={x_1_: x_batch_1, x_2_: x_batch_2,
                                       x_mask_1_: x_batch_mask_1, x_mask_2_: x_batch_mask_2, y_: x_labels, keep_prob: opt.dropout_ratio})

                    if uidx % opt.valid_freq == 0:

                        train_correct = 0.0
                        kf_train = get_minibatches_idx(3070, opt.batch_size, shuffle=True)
                        for _, train_index in kf_train:
                            train_sents_1 = [train_q[t] for t in train_index]
                            train_sents_2 = [train_a[t] for t in train_index]
                            train_labels = [train_lab[t] for t in train_index]
                            train_labels = np.array(train_labels)
                            train_labels = train_labels.reshape((len(train_labels), opt.category))
                            x_train_batch_1, x_train_mask_1 = prepare_data_for_emb(train_sents_1, opt)
                            x_train_batch_2, x_train_mask_2 = prepare_data_for_emb(train_sents_2, opt)

                            train_accuracy = sess.run(accuracy_,
                                                      feed_dict={x_1_: x_train_batch_1, x_2_: x_train_batch_2, x_mask_1_: x_train_mask_1, x_mask_2_: x_train_mask_2,
                                                                 y_: train_labels, keep_prob: 1.0})

                            train_correct += train_accuracy * len(train_index)

                        train_accuracy = train_correct / 3070

                        # print("Iteration %d: Training loss %f, dis loss %f, rec loss %f" % (uidx,
                        #                                                                     loss, dis_loss, rec_loss))
                        print("Train accuracy %f " % train_accuracy)

                        val_correct = 0.0
                        is_train = True
                        kf_val = get_minibatches_idx(len(val_q), opt.batch_size, shuffle=True)
                        for _, val_index in kf_val:
                            val_sents_1 = [val_q[t] for t in val_index]
                            val_sents_2 = [val_a[t] for t in val_index]
                            val_labels = [val_lab[t] for t in val_index]
                            val_labels = np.array(val_labels)
                            val_labels = val_labels.reshape((len(val_labels), opt.category))
                            x_val_batch_1, x_val_mask_1 = prepare_data_for_emb(val_sents_1, opt)
                            x_val_batch_2, x_val_mask_2 = prepare_data_for_emb(val_sents_2, opt)

                            val_accuracy = sess.run(accuracy_, feed_dict={x_1_: x_val_batch_1, x_2_: x_val_batch_2,
                                                                          x_mask_1_: x_val_mask_1, x_mask_2_: x_val_mask_2, y_: val_labels, keep_prob: 1.0})

                            val_correct += val_accuracy * len(val_index)

                        val_accuracy = val_correct / len(val_q)

                        print("Validation accuracy %f " % val_accuracy)

                        if val_accuracy > max_val_accuracy:
                            max_val_accuracy = val_accuracy

                            test_correct = 0.0
                            kf_test = get_minibatches_idx(len(test_q), opt.batch_size, shuffle=True)
                            for _, test_index in kf_test:
                                test_sents_1 = [test_q[t] for t in test_index]
                                test_sents_2 = [test_a[t] for t in test_index]
                                test_labels = [test_lab[t] for t in test_index]
                                test_labels = np.array(test_labels)
                                test_labels = test_labels.reshape((len(test_labels), opt.category))
                                x_test_batch_1, x_test_mask_1 = prepare_data_for_emb(test_sents_1, opt)
                                x_test_batch_2, x_test_mask_2 = prepare_data_for_emb(test_sents_2, opt)

                                test_accuracy = sess.run(accuracy_, feed_dict={x_1_: x_test_batch_1, x_2_: x_test_batch_2,
                                                                               x_mask_1_: x_test_mask_1, x_mask_2_: x_test_mask_2,
                                                                               y_: test_labels, keep_prob: 1.0})

                                test_correct += test_accuracy * len(test_index)

                            test_accuracy = test_correct / len(test_q)

                            print("Test accuracy %f " % test_accuracy)

                            max_test_accuracy = test_accuracy

                print("Epoch %d: Max Test accuracy %f" % (epoch, max_test_accuracy))

            print("Max Test accuracy %f " % max_test_accuracy)

        except KeyboardInterrupt:
            print('Training interupted')
            print("Max Test accuracy %f " % max_test_accuracy)
Esempio n. 30
0
def main():
    # Prepare training and testing data
    opt = Options()
    # load data
    if opt.dataset == 'Tweet':
        loadpath = "./data/langdetect_tweet0.7.p"
        embpath = "./data/langdetect_tweet_emb.p"
        opt.num_class = 4
        opt.class_name = ['apple', 'google', 'microsoft', 'twitter']
    if opt.dataset == 'N20short':
        loadpath = "./data/N20short.p"
        embpath = "./data/N20short_emb.p"
        opt.class_name = [
            'rec.autos', 'talk.politics.misc', 'sci.electronics',
            'comp.sys.ibm.pc.hardware', 'talk.politics.guns', 'sci.med',
            'rec.motorcycles', 'soc.religion.christian',
            'comp.sys.mac.hardware', 'comp.graphics', 'sci.space',
            'alt.atheism', 'rec.sport.baseball', 'comp.windows.x',
            'talk.religion.misc', 'comp.os.ms-windows.misc', 'misc.forsale',
            'talk.politics.mideast', 'sci.crypt', 'rec.sport.hockey'
        ]
        opt.num_class = len(opt.class_name)
    elif opt.dataset == 'agnews':
        loadpath = "./data/ag_news.p"
        embpath = "./data/ag_news_glove.p"
        opt.num_class = 4
        opt.class_name = ['World', 'Sports', 'Business', 'Science']
    elif opt.dataset == 'dbpedia':
        loadpath = "./data/dbpedia.p"
        embpath = "./data/dbpedia_glove.p"
        opt.num_class = 14
        opt.class_name = [
            'Company',
            'Educational Institution',
            'Artist',
            'Athlete',
            'Office Holder',
            'Mean Of Transportation',
            'Building',
            'Natural Place',
            'Village',
            'Animal',
            'Plant',
            'Album',
            'Film',
            'Written Work',
        ]
    elif opt.dataset == 'yelp_full':
        loadpath = "./data/yelp_full.p"
        embpath = "./data/yelp_full_glove.p"
        opt.num_class = 5
        opt.class_name = ['worst', 'bad', 'middle', 'good', 'best']
    x = cPickle.load(open(loadpath, "rb"), encoding='iso-8859-1')
    train, val, test = x[0], x[1], x[2]
    print(len(val))
    train_lab, val_lab, test_lab = x[3], x[4], x[5]
    wordtoix, ixtoword = x[6], x[7]
    del x
    print("len of train,val,test:", len(train), len(val), len(test))
    print("load data finished")

    train_lab = np.array(train_lab, dtype='float32')
    val_lab = np.array(val_lab, dtype='float32')
    test_lab = np.array(test_lab, dtype='float32')
    opt.n_words = len(ixtoword)
    if opt.part_data:
        #np.random.seed(123)
        train_ind = np.random.choice(len(train),
                                     int(len(train) * opt.portion),
                                     replace=False)
        train = [train[t] for t in train_ind]
        train_lab = [train_lab[t] for t in train_ind]

    os.environ['CUDA_VISIBLE_DEVICES'] = str(opt.GPUID)

    print(dict(opt))
    print('Total words: %d' % opt.n_words)

    try:
        opt.W_emb = np.array(cPickle.load(open(embpath, 'rb'),
                                          encoding='iso-8859-1'),
                             dtype='float32')
        opt.W_class_emb = load_class_embedding(wordtoix, opt)
    except IOError:
        print('No embedding file found.')
        opt.fix_emb = False

    with tf.device('/cpu:0'):
        x_ = tf.placeholder(tf.int32,
                            shape=[opt.batch_size, opt.maxlen],
                            name='x_')
        x_mask_ = tf.placeholder(tf.float32,
                                 shape=[opt.batch_size, opt.maxlen],
                                 name='x_mask_')
        keep_prob = tf.placeholder(tf.float32, name='keep_prob')
        y_ = tf.placeholder(tf.float32,
                            shape=[opt.batch_size, opt.num_class],
                            name='y_')
        class_penalty_ = tf.placeholder(tf.float32, shape=())
        accuracy_, loss_, train_op, W_norm_, global_step, prob_ = emb_classifier(
            x_, x_mask_, y_, keep_prob, opt, class_penalty_)
    uidx = 0
    max_val_accuracy = 0.
    max_test_accuracy = 0.

    config = tf.ConfigProto(
        log_device_placement=False,
        allow_soft_placement=True,
    )
    config.gpu_options.allow_growth = True
    np.set_printoptions(precision=3)
    np.set_printoptions(threshold=np.inf)
    saver = tf.train.Saver()

    with tf.Session(config=config) as sess:
        train_writer = tf.summary.FileWriter(opt.log_path + '/train',
                                             sess.graph)
        test_writer = tf.summary.FileWriter(opt.log_path + '/test', sess.graph)
        sess.run(tf.global_variables_initializer())
        if opt.restore:
            try:
                t_vars = tf.trainable_variables()
                save_keys = tensors_key_in_file(opt.save_path)
                ss = set([var.name for var in t_vars]) & set(
                    [s + ":0" for s in save_keys.keys()])
                cc = {var.name: var for var in t_vars}
                # only restore variables with correct shape
                ss_right_shape = set(
                    [s for s in ss if cc[s].get_shape() == save_keys[s[:-2]]])

                loader = tf.train.Saver(var_list=[
                    var for var in t_vars if var.name in ss_right_shape
                ])
                loader.restore(sess, opt.save_path)

                print("Loading variables from '%s'." % opt.save_path)
                print("Loaded variables:" + str(ss))

            except:
                print("No saving session, using random initialization")
                sess.run(tf.global_variables_initializer())

        try:
            for epoch in range(opt.max_epochs):
                print("Starting epoch %d" % epoch)
                kf = get_minibatches_idx(len(train),
                                         opt.batch_size,
                                         shuffle=True)
                for _, train_index in kf:
                    uidx += 1
                    sents = [train[t] for t in train_index]
                    x_labels = [train_lab[t] for t in train_index]
                    # print(x_labels)
                    x_labels = np.array(x_labels)
                    x_labels = x_labels.reshape((len(x_labels), opt.num_class))
                    # print(x_labels)
                    # exit()
                    x_batch, x_batch_mask = prepare_data_for_emb(sents, opt)
                    _, loss, step, = sess.run(
                        [train_op, loss_, global_step],
                        feed_dict={
                            x_: x_batch,
                            x_mask_: x_batch_mask,
                            y_: x_labels,
                            keep_prob: opt.dropout,
                            class_penalty_: opt.class_penalty
                        })

                    if uidx % opt.valid_freq == 0:
                        train_correct = 0.0
                        # sample evaluate accuaccy on 500 sample data
                        kf_train = get_minibatches_idx(500,
                                                       opt.batch_size,
                                                       shuffle=True)
                        for _, train_index in kf_train:
                            train_sents = [train[t] for t in train_index]
                            train_labels = [train_lab[t] for t in train_index]
                            train_labels = np.array(train_labels)
                            train_labels = train_labels.reshape(
                                (len(train_labels), opt.num_class))
                            x_train_batch, x_train_batch_mask = prepare_data_for_emb(
                                train_sents, opt)
                            train_accuracy = sess.run(accuracy_,
                                                      feed_dict={
                                                          x_: x_train_batch,
                                                          x_mask_:
                                                          x_train_batch_mask,
                                                          y_: train_labels,
                                                          keep_prob: 1.0,
                                                          class_penalty_: 0.0
                                                      })

                            train_correct += train_accuracy * len(train_index)

                        train_accuracy = train_correct / 500

                        print("Iteration %d: Training loss %f " % (uidx, loss))
                        print("Train accuracy %f " % train_accuracy)

                        val_correct = 0.0
                        kf_val = get_minibatches_idx(len(val),
                                                     opt.batch_size,
                                                     shuffle=True)
                        for _, val_index in kf_val:
                            val_sents = [val[t] for t in val_index]
                            val_labels = [val_lab[t] for t in val_index]
                            val_labels = np.array(val_labels)
                            val_labels = val_labels.reshape(
                                (len(val_labels), opt.num_class))
                            x_val_batch, x_val_batch_mask = prepare_data_for_emb(
                                val_sents, opt)

                            val_accuracy = sess.run(accuracy_,
                                                    feed_dict={
                                                        x_: x_val_batch,
                                                        x_mask_:
                                                        x_val_batch_mask,
                                                        y_: val_labels,
                                                        keep_prob: 1.0,
                                                        class_penalty_: 0.0
                                                    })
                            val_correct += val_accuracy * len(val_index)

                        val_accuracy = val_correct / len(val)
                        print("Validation accuracy %f " % val_accuracy)

                        if val_accuracy > max_val_accuracy:
                            max_val_accuracy = val_accuracy

                            # test_correct = 0.0
                            #
                            # kf_test = get_minibatches_idx(len(test), opt.batch_size, shuffle=True)
                            # for _, test_index in kf_test:
                            #     test_sents = [test[t] for t in test_index]
                            #     test_labels = [test_lab[t] for t in test_index]
                            #     test_labels = np.array(test_labels)
                            #     test_labels = test_labels.reshape((len(test_labels), opt.num_class))
                            #     x_test_batch, x_test_batch_mask = prepare_data_for_emb(test_sents, opt)
                            #
                            #     test_accuracy,predict_prob = sess.run([accuracy_,prob_],feed_dict={x_: x_test_batch, x_mask_: x_test_batch_mask,y_: test_labels, keep_prob: 1.0, class_penalty_: 0.0})
                            #     print(predict_prob)
                            #     test_correct += test_accuracy * len(test_index)
                            #
                            # test_accuracy = test_correct / len(test)
                            # print("Test accuracy %f " % test_accuracy)
                            # max_test_accuracy = test_accuracy

                # print("Epoch %d: Max Test accuracy %f" % (epoch, max_test_accuracy))
                saver.save(sess, opt.save_path, global_step=epoch)
                saver.save(sess, "save_model/model.ckpt")
            # print("Max Test accuracy %f " % max_test_accuracy)

            test_correct = 0.0

            kf_test = get_minibatches_idx(len(test),
                                          opt.batch_size,
                                          shuffle=False)
            for _, test_index in kf_test:
                test_sents = [test[t] for t in test_index]
                test_labels = [test_lab[t] for t in test_index]
                test_labels = np.array(test_labels)
                test_labels = test_labels.reshape(
                    (len(test_labels), opt.num_class))
                x_test_batch, x_test_batch_mask = prepare_data_for_emb(
                    test_sents, opt)

                test_accuracy, predict_prob = sess.run(
                    [accuracy_, prob_],
                    feed_dict={
                        x_: x_test_batch,
                        x_mask_: x_test_batch_mask,
                        y_: test_labels,
                        keep_prob: 1.0,
                        class_penalty_: 0.0
                    })

                for prob in predict_prob:
                    topnlabel_onedoc = [0] * opt.num_class
                    for iter_topnlabel in range(opt.topnlabel):
                        index_label = np.argwhere(prob == max(prob))
                        topnlabel_onedoc[index_label[0]
                                         [0]] = prob[index_label][0][0]
                        prob[index_label] = -1
                    topnlabel_docwithoutlabel.append(topnlabel_onedoc)
                test_correct += test_accuracy * len(test_index)
            print(topnlabel_docwithoutlabel)
            test_accuracy = test_correct / len(test)
            print("Predict accuracy %f " % test_accuracy)

            max_test_accuracy = test_accuracy

            filename = 'test'
            file = open(filename, 'w')
            file.write(str(len(test)))
            file.write('\n')
            # print(wordtoix.get('close'))
            # exit()
            for topic_prob in topnlabel_docwithoutlabel:
                print(topic_prob)
                for prob_each_label in topic_prob:
                    file.write(str(prob_each_label))
                    file.write(" ")
                file.write('\n')

        except KeyboardInterrupt:
            print('Training interupted')
            print("Max Test accuracy %f " % max_test_accuracy)
Esempio n. 31
0
def run_model(opt, train, val, ixtoword):

    try:
        params = np.load('./param_g.npz')
        if params['Wemb'].shape == (opt.n_words, opt.embed_size):
            print('Use saved embedding.')
            opt.W_emb = params['Wemb']
        else:
            print('Emb Dimension mismatch: param_g.npz:' +
                  str(params['Wemb'].shape) + ' opt: ' + str(
                      (opt.n_words, opt.embed_size)))
            opt.fix_emb = False
    except IOError:
        print('No embedding file found.')
        opt.fix_emb = False

    with tf.device('/gpu:1'):
        x_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.sent_len])
        x_org_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.sent_len])
        is_train_ = tf.placeholder(tf.bool, name='is_train_')
        res_, loss_, train_op = auto_encoder(x_, x_org_, is_train_, opt)
        merged = tf.summary.merge_all()
        # opt.is_train = False
        # res_val_, loss_val_, _ = auto_encoder(x_, x_org_, opt)
        # merged_val = tf.summary.merge_all()

    #tensorboard --logdir=run1:/tmp/tensorflow/ --port 6006
    #writer = tf.train.SummaryWriter(opt.log_path, graph=tf.get_default_graph())

    uidx = 0
    config = tf.ConfigProto(log_device_placement=False,
                            allow_soft_placement=True,
                            graph_options=tf.GraphOptions(build_cost_model=1))
    #config = tf.ConfigProto(device_count={'GPU':0})
    # config.gpu_options.per_process_gpu_memory_fraction = 0.8
    config.gpu_options.allow_growth = True
    np.set_printoptions(precision=3)
    np.set_printoptions(threshold=np.inf)
    saver = tf.train.Saver()

    run_metadata = tf.RunMetadata()

    with tf.Session(config=config) as sess:
        train_writer = tf.summary.FileWriter(opt.log_path + '/train',
                                             sess.graph)
        test_writer = tf.summary.FileWriter(opt.log_path + '/test', sess.graph)
        sess.run(tf.global_variables_initializer())
        if opt.restore:
            try:
                t_vars = tf.trainable_variables()
                #print([var.name[:-2] for var in t_vars])
                loader = restore_from_save(t_vars, sess, opt)
                print('Load pretrain successfully')

            except Exception as e:
                print(e)
                print("No saving session, using random initialization")
                sess.run(tf.global_variables_initializer())

        for epoch in range(opt.max_epochs):
            print("Starting epoch %d" % epoch)
            # if epoch >= 10:
            #     print("Relax embedding ")
            #     opt.fix_emb = False
            #     opt.batch_size = 2
            kf = get_minibatches_idx(len(train), opt.batch_size, shuffle=True)
            for _, train_index in kf:
                uidx += 1
                sents = [train[t] for t in train_index]

                sents_permutated = add_noise(sents, opt)

                #sents[0] = np.random.permutation(sents[0])

                if opt.model != 'rnn_rnn' and opt.model != 'cnn_rnn':
                    x_batch_org = prepare_data_for_cnn(sents, opt)  # Batch L
                else:
                    x_batch_org = prepare_data_for_rnn(sents, opt)  # Batch L

                if opt.model != 'rnn_rnn':
                    x_batch = prepare_data_for_cnn(sents_permutated,
                                                   opt)  # Batch L
                else:
                    x_batch = prepare_data_for_rnn(sents_permutated,
                                                   opt,
                                                   is_add_GO=False)  # Batch L

                if profile:
                    _, loss = sess.run(
                        [train_op, loss_],
                        feed_dict={
                            x_: x_batch,
                            x_org_: x_batch_org,
                            is_train_: 1
                        },
                        options=tf.RunOptions(
                            trace_level=tf.RunOptions.FULL_TRACE),
                        run_metadata=run_metadata)
                else:
                    _, loss = sess.run([train_op, loss_],
                                       feed_dict={
                                           x_: x_batch,
                                           x_org_: x_batch_org,
                                           is_train_: 1
                                       })

                if uidx % opt.valid_freq == 0:
                    is_train = None
                    valid_index = np.random.choice(len(val), opt.batch_size)
                    val_sents = [val[t] for t in valid_index]

                    val_sents_permutated = add_noise(val_sents, opt)

                    if opt.model != 'rnn_rnn' and opt.model != 'cnn_rnn':
                        x_val_batch_org = prepare_data_for_cnn(val_sents, opt)
                    else:
                        x_val_batch_org = prepare_data_for_rnn(val_sents, opt)

                    if opt.model != 'rnn_rnn':
                        x_val_batch = prepare_data_for_cnn(
                            val_sents_permutated, opt)
                    else:
                        x_val_batch = prepare_data_for_rnn(
                            val_sents_permutated, opt, is_add_GO=False)

                    loss_val = sess.run(loss_,
                                        feed_dict={
                                            x_: x_val_batch,
                                            x_org_: x_val_batch_org,
                                            is_train_: is_train
                                        })
                    print("Validation loss %f " % (loss_val))
                    res = sess.run(res_,
                                   feed_dict={
                                       x_: x_val_batch,
                                       x_org_: x_val_batch_org,
                                       is_train_: is_train
                                   })
                    np.savetxt(opt.save_txt + '/rec_val_words.txt',
                               res['rec_sents'],
                               fmt='%i',
                               delimiter=' ')
                    try:
                        print("Orig:" + u' '.join([
                            ixtoword[x]
                            for x in x_val_batch_org[0] if x != 0 and x != 1
                        ]))  #.encode('utf-8', 'ignore').strip()
                        print("Sent:" + u' '.join([
                            ixtoword[x] for x in res['rec_sents'][0] if x != 0
                        ]))  #.encode('utf-8', 'ignore').strip()
                    except:
                        pass
                    if opt.discrimination:
                        print("Real Prob %f Fake Prob %f" %
                              (res['prob_r'], res['prob_f']))

                    summary = sess.run(merged,
                                       feed_dict={
                                           x_: x_val_batch,
                                           x_org_: x_val_batch_org,
                                           is_train_: is_train
                                       })
                    test_writer.add_summary(summary, uidx)
                    is_train = True

                if uidx % opt.print_freq == 1:
                    #pdb.set_trace()
                    print("Iteration %d: loss %f " % (uidx, loss))
                    res = sess.run(res_,
                                   feed_dict={
                                       x_: x_batch,
                                       x_org_: x_batch_org,
                                       is_train_: 1
                                   })
                    np.savetxt(opt.save_txt + '/rec_train_words.txt',
                               res['rec_sents'],
                               fmt='%i',
                               delimiter=' ')
                    try:
                        print("Orig:" + u' '.join([
                            ixtoword[x]
                            for x in x_batch_org[0] if x != 0 and x != 1
                        ]))  #.encode('utf-8').strip()
                        print("Sent:" + u' '.join([
                            ixtoword[x] for x in res['rec_sents'][0] if x != 0
                        ]))  #.encode('utf-8').strip()
                    except:
                        pass
                    summary = sess.run(merged,
                                       feed_dict={
                                           x_: x_batch,
                                           x_org_: x_batch_org,
                                           is_train_: 1
                                       })
                    train_writer.add_summary(summary, uidx)
                    # print res['x_rec'][0][0]
                    # print res['x_emb'][0][0]
                    if profile:
                        tf.contrib.tfprof.model_analyzer.print_model_analysis(
                            tf.get_default_graph(),
                            run_meta=run_metadata,
                            tfprof_options=tf.contrib.tfprof.model_analyzer.
                            PRINT_ALL_TIMING_MEMORY)

            saver.save(sess, opt.save_path, global_step=epoch)
def train_classifier(train,
                     valid,
                     test,
                     W,
                     n_p=10,
                     n_words=10000,
                     n_x=300,
                     n_h=200,
                     patience=10,
                     max_epochs=50,
                     lrate=0.001,
                     n_train=10000,
                     optimizer='RMSprop',
                     batch_size=50,
                     valid_batch_size=50,
                     dispFreq=10,
                     validFreq=100,
                     saveFreq=500,
                     eps=1e-3):
    """ train, valid, test : datasets
        W : the word embedding initialization
        n_words : vocabulary size
        n_x : word embedding dimension
        n_h : LSTM/GRU number of hidden units 
        n_z : latent embedding sapce for a sentence 
        patience : Number of epoch to wait before early stop if no progress
        max_epochs : The maximum number of epoch to run
        lrate : learning rate
        optimizer : methods to do optimization
        batch_size : batch size during training
        valid_batch_size : The batch size used for validation/test set
        dispFreq : Display to stdout the training progress every N updates
        validFreq : Compute the validation error after this number of update.
    """

    options = {}
    options['n_p'] = n_p
    options['n_words'] = n_words
    options['n_x'] = n_x
    options['n_h'] = n_h
    options['patience'] = patience
    options['max_epochs'] = max_epochs
    options['lrate'] = lrate
    options['optimizer'] = optimizer
    options['batch_size'] = batch_size
    options['valid_batch_size'] = valid_batch_size
    options['dispFreq'] = dispFreq
    options['validFreq'] = validFreq

    #if config.method in ['SVGD', 'SVGD_KFAC']: patience = 5

    logger.info('Model options {}'.format(options))

    logger.info('{} train examples'.format(len(train[0])))
    logger.info('{} valid examples'.format(len(valid[0])))
    logger.info('{} test examples'.format(len(test[0])))

    logger.info('Building model...')

    assert np.min(train[1]) == 0 and np.max(train[1]) == 1
    n_y = np.max(train[1]) + 1
    options['n_y'] = n_y

    params = init_params(options, W)
    tparams = init_tparams(params)

    (use_noise, x, mask, y, f_pred_prob, f_pred, cost,
     cache) = build_model(tparams, options)

    lr_theano = tensor.scalar(name='lr')
    ntrain_theano = tensor.scalar(name='ntrain')

    if config.method == 'pSGLD':
        f_grad_shared, f_update = pSGLD(tparams, cost, [x, mask, y],
                                        ntrain_theano, lr_theano)
    elif config.method == 'SGLD':
        f_grad_shared, f_update = SGLD(tparams, cost, [x, mask, y],
                                       ntrain_theano, lr_theano)
    elif config.method == 'RMSprop':
        f_grad_shared, f_update = RMSprop(tparams, cost, [x, mask, y],
                                          lr_theano)
    elif config.method == 'SVGD':
        f_grad_shared, f_update = SVGD(tparams,
                                       cost, [x, mask, y],
                                       ntrain_theano,
                                       lr_theano,
                                       kfac=False)
    elif config.method == 'SVGD_KFAC':
        f_grad_shared, f_update = SVGD(tparams,
                                       cost, [x, mask, y],
                                       ntrain_theano,
                                       lr_theano,
                                       kfac=True,
                                       average=True,
                                       cache=cache,
                                       eps=eps,
                                       n_p=n_p)
    elif config.method == 'MIXTURE_KFAC':
        f_grad_shared, f_update = SVGD(tparams,
                                       cost, [x, mask, y],
                                       ntrain_theano,
                                       lr_theano,
                                       kfac=True,
                                       average=False,
                                       cache=cache,
                                       eps=eps,
                                       n_p=n_p)

    #print 'Training model...'
    logger.info('Training model...')

    kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size)
    kf_test = get_minibatches_idx(len(test[0]), valid_batch_size)

    estop = False  # early stop
    history_errs = []
    best_train_err, best_valid_err, best_test_err = 0., 0., 0.
    bad_counter = 0
    uidx = 0  # the number of update done
    start_time = time.time()

    n_average = 0
    train_probs = np.zeros((len(train[0]), n_y))
    valid_probs = np.zeros((len(valid[0]), n_y))
    test_probs = np.zeros((len(test[0]), n_y))

    try:
        for eidx in xrange(max_epochs):
            print tparams.keys()
            from optimizers import sqr_dist
            ##['Wemb', 'lstm_encoder_W', 'lstm_encoder_U', 'lstm_encoder_rev_W', 'lstm_encoder_rev_U', 'Wy']
            tv = tensor.flatten(tparams['Wy'], 2)
            ftv = theano.function([], sqr_dist(tv, tv))
            otv = ftv()
            print(np.min(otv), np.max(otv), np.mean(otv), np.median(otv),
                  np.sum(otv**2) / n_p)

            n_samples = 0
            kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)

            for _, train_index in kf:
                uidx += 1
                #use_noise.set_value(0.5)
                use_noise.set_value(config.dropout)

                y = [train[1][t] for t in train_index]
                x = [train[0][t] for t in train_index]

                x, mask, y = prepare_data(x, y)
                n_samples += x.shape[1]

                cost = f_grad_shared(x, mask, y)
                if config.method == 'RMSprop':
                    f_update(lrate)
                elif config.method in ['SVGD', 'pSGLD', 'SGLD']:
                    f_update(lrate, n_train)
                elif config.method in ['SVGD_KFAC', 'MIXTURE_KFAC']:
                    f_update(lrate, n_train, x, mask, y)

                if np.isnan(cost) or np.isinf(cost):

                    logger.info('NaN detected')
                    estop = True
                    break
                    return 1., 1., 1.

                if np.mod(uidx, dispFreq) == 0:
                    logger.info('Epoch {} Update {} Cost {}'.format(
                        eidx, uidx, cost))

                if np.mod(uidx, saveFreq) == 0:
                    logger.info('Saving ...')
                    saveto = 'results/%s.npz' % save_prefix
                    np.savez(saveto, history_errs=history_errs)

                    logger.info('Done ...')

                if np.mod(uidx, validFreq) == 0:
                    use_noise.set_value(0.)

                    if eidx < 1:
                        train_err = pred_error(f_pred, prepare_data, train, kf)
                        valid_err = pred_error(f_pred, prepare_data, valid,
                                               kf_valid)
                        test_err = pred_error(f_pred, prepare_data, test,
                                              kf_test)
                        history_errs.append([valid_err, test_err, train_err])
                    else:
                        train_probs_curr = pred_probs(f_pred_prob,
                                                      prepare_data, train, kf,
                                                      options)
                        valid_probs_curr = pred_probs(f_pred_prob,
                                                      prepare_data, valid,
                                                      kf_valid, options)
                        test_probs_curr = pred_probs(f_pred_prob, prepare_data,
                                                     test, kf_test, options)
                        train_probs = (n_average * train_probs +
                                       train_probs_curr) / (n_average + 1)
                        valid_probs = (n_average * valid_probs +
                                       valid_probs_curr) / (n_average + 1)
                        test_probs = (n_average * test_probs +
                                      test_probs_curr) / (n_average + 1)
                        n_average += 1

                        train_pred = train_probs.argmax(axis=1)
                        valid_pred = valid_probs.argmax(axis=1)
                        test_pred = test_probs.argmax(axis=1)

                        train_err = (train_pred == np.array(train[1])).sum()
                        train_err = 1. - numpy_floatX(train_err) / len(
                            train[0])

                        valid_err = (valid_pred == np.array(valid[1])).sum()
                        valid_err = 1. - numpy_floatX(valid_err) / len(
                            valid[0])

                        test_err = (test_pred == np.array(test[1])).sum()
                        test_err = 1. - numpy_floatX(test_err) / len(test[0])
                        history_errs.append([valid_err, test_err, train_err])

                    if (uidx == 0 or
                            valid_err <= np.array(history_errs)[:, 0].min()):

                        best_train_err = train_err
                        best_valid_err = valid_err
                        best_test_err = test_err
                        bad_counter = 0

                    logger.info('Train {} Valid {} Test {}'.format(
                        train_err, valid_err, test_err))

                    if (len(history_errs) > patience and valid_err >=
                            np.array(history_errs)[:-patience, 0].min()):
                        #valid_err >= np.array(history_errs)[:-patience,0].mean()):
                        bad_counter += 1
                        #valid_err >= np.array(history_errs)[:-patience,0].mean()):
                        if bad_counter > patience:

                            logger.info('Early Stop!')
                            estop = True
                            break

            logger.info('Seen {} samples'.format(n_samples))

            if estop:
                break

    except KeyboardInterrupt:

        logger.info('Training interupted')

    end_time = time.time()
    logger.info('Train {} Valid {} Test {}'.format(best_train_err,
                                                   best_valid_err,
                                                   best_test_err))

    saveto = 'results/%s.npz' % save_prefix
    np.savez(saveto,
             train_err=best_train_err,
             valid_err=best_valid_err,
             test_err=best_test_err,
             history_errs=history_errs)

    logger.info('The code run for {} epochs, with {} sec/epochs'.format(
        eidx + 1, (end_time - start_time) / (1. * (eidx + 1))))

    #print >> sys.stderr, ('Training took %.1fs' %
    #                      (end_time - start_time))
    return best_train_err, best_valid_err, best_test_err
Esempio n. 33
0
train = transform(train, w2i)
dev = transform(dev, w2i)
test = transform(test, w2i)


def evaluate(model, dev, params):
    _, g1x, g1mask, g2x, g2mask = utils.get_prepare_data(dev, params.nout)
    golds = [score for sa, sb, score in dev]
    scores = model.scoring_function(g1x, g2x, g1mask, g2mask)
    preds = np.squeeze(scores)
    return pearsonr(preds, golds)[0], spearmanr(preds, golds)[0]


for epoch in range(300):
    process_bar = pyprind.ProgPercent(len(train))
    kf = utils.get_minibatches_idx(len(train), params.batchsize, shuffle=True)
    uidx = 0
    for _, train_index in kf:
        uidx += 1
        batch = [train[t] for t in train_index]

        scores, g1x, g1mask, g2x, g2mask = utils.get_prepare_data(
            batch, params.nout)

        # print scores[:2], g1x[:2], g1mask[:2], g2x[:2], g2mask[:2]
        cost = model.train_function(scores, g1x, g2x, g1mask, g2mask)

        if np.isnan(cost) or np.isinf(cost):
            print 'NaN detected'

    print 'Epoch ', (epoch + 1), 'Update ', (uidx + 1), 'Cost ', cost
Esempio n. 34
0
def main(gpu, mem_frac, batch_size, alpha, gamma, omega, euler_ord, max_len,
         optimizer, keep_prob, learning_rate, margin, norm_type, balancing):
    # ========================================================================== #
    # =============================== Parameters =============================== #
    # ========================================================================== #
    params = {
        'gpu': gpu,
        'mem_frac': mem_frac,
        'batch_size': batch_size,
        'alpha': alpha,  # threshold for euler angle
        'gamma': gamma,  # weight factor for twist loss
        'omega': omega,  # weight factor for smooth loss
        'euler_ord': euler_ord,
        'max_len': max_len,
        'optimizer': optimizer,
        'keep_prob': keep_prob,
        'learning_rate': learning_rate,
        'margin': margin,
        'norm_type': norm_type,
        'balancing': balancing
    }

    prefix = "pmnet"
    for k, v in params.items():
        if (k != 'gpu' and k != 'mem_frac' and k != 'euler_ord'):
            prefix += "_" + k + "=" + str(v)
    # ========================================================================== #
    # =============================== Load Data ================================ #
    # ========================================================================== #
    data_path = "../datasets/train/"
    stats_path = "../data/"

    # Mixamo joint configuration
    parents = np.array([
        -1, 0, 1, 2, 3, 4, 0, 6, 7, 8, 0, 10, 11, 12, 3, 14, 15, 16, 3, 18, 19,
        20
    ])

    all_local = []
    all_global = []
    all_skel = []
    all_names = []
    t_skel = []

    folders = [
        f for f in listdir(data_path) if not f.startswith(".")
        and not f.endswith("py") and not f.endswith(".npz")
    ]
    for folder_name in folders:
        files = [
            f for f in listdir(data_path + folder_name)
            if not f.startswith(".") and f.endswith("_seq.npy")
        ]
        for cfile in files:
            file_name = cfile[:-8]
            # Real joint positions
            positions = np.load(data_path + folder_name + "/" + file_name +
                                "_skel.npy")

            # After processed (Maybe, last 4 elements are dummy values)
            sequence = np.load(data_path + folder_name + "/" + file_name +
                               "_seq.npy")

            # Processed global positions (#frames, 4)
            offset = sequence[:, -8:-4]

            # Processed local positions (#frames, #joints, 3)
            sequence = np.reshape(sequence[:, :-8], [sequence.shape[0], -1, 3])
            positions[:, 0, :] = sequence[:, 0, :]  # root joint

            all_local.append(sequence)
            all_global.append(offset)
            all_skel.append(positions)
            all_names.append(folder_name)

    # Joint positions before processed
    train_skel = all_skel

    # After processed, relative position
    train_local = all_local
    train_global = all_global

    # T-pose (real position)
    for tt in train_skel:
        t_skel.append(tt[0:1])

    # Total training samples
    all_frames = np.concatenate(train_local)
    ntotal_samples = all_frames.shape[0]
    ntotal_sequences = len(train_local)
    print("Number of sequences: " + str(ntotal_sequences))

    # ========================================================================== #
    # ============================= Data Normalize ============================= #
    # ========================================================================== #
    # Calculate total mean and std
    allframes_n_skel = np.concatenate(train_local + t_skel)
    local_mean = allframes_n_skel.mean(axis=0)[None, :]
    global_mean = np.concatenate(train_global).mean(axis=0)[None, :]
    local_std = allframes_n_skel.std(axis=0)[None, :]
    global_std = np.concatenate(train_global).std(axis=0)[None, :]

    # Save the data stats
    np.save(stats_path + "mixamo_local_motion_mean.npy", local_mean)
    np.save(stats_path + "mixamo_local_motion_std.npy", local_std)
    np.save(stats_path + "mixamo_global_motion_mean.npy", global_mean)
    np.save(stats_path + "mixamo_global_motion_std.npy", global_std)

    # Normalize the data (whitening)
    n_joints = all_local[0].shape[-2]
    local_std[local_std == 0] = 1

    for i in xrange(len(train_local)):
        train_local[i] = (train_local[i] - local_mean) / local_std
        train_global[i] = (train_global[i] - global_mean) / global_std
        train_skel[i] = (train_skel[i] - local_mean) / local_std

    # ========================================================================== #
    # =============================== Load Model =============================== #
    # ========================================================================== #
    models_dir = "../data/models/" + prefix
    logs_dir = "../data/logs/" + prefix

    if not exists(models_dir):
        makedirs(models_dir)

    if not exists(logs_dir):
        makedirs(logs_dir)

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_frac)

    with tf.device("/gpu:%d" % gpu):
        net = pmnet_model(batch_size, alpha, gamma, omega, euler_ord, n_joints,
                          max_len, parents, keep_prob, learning_rate,
                          optimizer, local_mean, local_std, global_mean,
                          global_std, logs_dir, margin, norm_type, balancing)

    # ========================================================================== #
    # ================================ Training ================================ #
    # ========================================================================== #
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                          log_device_placement=False,
                                          gpu_options=gpu_options)) as sess:

        sess.run(tf.global_variables_initializer())

        loaded, model_name = net.load(sess, models_dir)
        if loaded:
            print("[*] Load SUCCESSFUL")
            iteration = int(model_name.split("-")[-1])
        else:
            print("[!] Starting from scratch ...")
            iteration = 0

        net.saver = tf.train.Saver(max_to_keep=10)

        max_iter = 15000

        while iteration < max_iter:
            mini_batches = get_minibatches_idx(len(train_local),
                                               batch_size,
                                               shuffle=True)

            for _, batch_idxs in mini_batches:
                start_time = time.time()
                if len(batch_idxs) == batch_size:

                    steps = np.repeat(max_len, batch_size)

                    localA_batch = []
                    globalA_batch = []
                    skelA_batch = []
                    localB_batch = []
                    globalB_batch = []
                    skelB_batch = []
                    mask_batch = np.zeros((batch_size, max_len),
                                          dtype="float32")
                    aeReg_batch = np.zeros((batch_size, 1), dtype="float32")

                    inp_height_batch = np.zeros((batch_size, 1),
                                                dtype="float32")
                    tgt_height_batch = np.zeros((batch_size, 1),
                                                dtype="float32")

                    # Make minibatch
                    for bb in xrange(batch_size):
                        low = 0
                        high = train_local[batch_idxs[bb]].shape[0] - max_len
                        if low >= high:
                            stidx = 0
                        else:
                            stidx = np.random.randint(low=low, high=high)

                        clocalA = train_local[batch_idxs[bb]][stidx:(stidx +
                                                                     max_len)]
                        mask_batch[
                            bb, :np.min([max_len, clocalA.shape[0]])] = 1.0

                        if clocalA.shape[0] < max_len:
                            clocalA = np.concatenate(
                                (clocalA,
                                 np.zeros((max_len - clocalA.shape[0],
                                           n_joints, 3))))

                        cglobalA = train_global[batch_idxs[bb]][stidx:(
                            stidx + max_len)]
                        if cglobalA.shape[0] < max_len:
                            cglobalA = np.concatenate(
                                (cglobalA,
                                 np.zeros((max_len - cglobalA.shape[0],
                                           n_joints, 3))))

                        cskelA = train_skel[batch_idxs[bb]][stidx:(stidx +
                                                                   max_len)]
                        if cskelA.shape[0] < max_len:
                            cskelA = np.concatenate(
                                (cskelA,
                                 np.zeros((max_len - cskelA.shape[0], n_joints,
                                           3))))

                        rnd_idx = np.random.randint(len(train_local))

                        cskelB = train_skel[rnd_idx][0:max_len]
                        if cskelB.shape[0] < max_len:
                            cskelB = np.concatenate(
                                (cskelB,
                                 np.zeros((max_len - cskelB.shape[0], n_joints,
                                           3))))

                        joints_a = cskelA[0].copy()
                        joints_a = joints_a[None]
                        joints_a = (joints_a * local_std) + local_mean
                        height_a = get_height_from_skel(joints_a[0])
                        height_a = height_a / 100

                        joints_b = cskelB[0].copy()
                        joints_b = joints_b[None]
                        joints_b = (joints_b * local_std + local_mean)
                        height_b = get_height_from_skel(joints_b[0])
                        height_b = height_b / 100

                        aeReg_on = np.random.binomial(1, p=0.5)
                        if aeReg_on:
                            cskelB = cskelA.copy()
                            aeReg_batch[bb, 0] = 1

                            inp_height_batch[bb, 0] = height_a
                            tgt_height_batch[bb, 0] = height_a
                        else:
                            aeReg_batch[bb, 0] = 0

                            inp_height_batch[bb, 0] = height_a
                            tgt_height_batch[bb, 0] = height_b

                        localA_batch.append(clocalA)
                        globalA_batch.append(cglobalA)
                        skelA_batch.append(cskelA)
                        localB_batch.append(clocalA)
                        globalB_batch.append(cglobalA)
                        skelB_batch.append(cskelB)

                    localA_batch = np.array(localA_batch).reshape(
                        (batch_size, max_len, -1))
                    globalA_batch = np.array(globalA_batch).reshape(
                        (batch_size, max_len, -1))
                    seqA_batch = np.concatenate((localA_batch, globalA_batch),
                                                axis=-1)
                    skelA_batch = np.array(skelA_batch).reshape(
                        (batch_size, max_len, -1))

                    localB_batch = np.array(localB_batch).reshape(
                        (batch_size, max_len, -1))
                    globalB_batch = np.array(globalB_batch).reshape(
                        (batch_size, max_len, -1))
                    seqB_batch = np.concatenate((localB_batch, globalB_batch),
                                                axis=-1)
                    skelB_batch = np.array(skelB_batch).reshape(
                        (batch_size, max_len, -1))

                    mid_time = time.time()

                    mf, mr, mg, shape, base = net.train(
                        sess, seqA_batch, skelA_batch, seqB_batch, skelB_batch,
                        mask_batch, aeReg_batch, inp_height_batch,
                        tgt_height_batch, iteration)

                    print("step=%d/%d, time=%.2f+%.2f" %
                          (iteration, max_iter, mid_time - start_time,
                           time.time() - mid_time))

                    if np.isnan(mg) or np.isinf(mg):
                        return

                    if iteration >= 1000 and iteration % 5000 == 0:
                        net.save(sess, models_dir, iteration)

                    iteration = iteration + 1

        net.save(sess, models_dir, iteration)
Esempio n. 35
0
def main(gpu, batch_size, alpha, beta, gamma, omega, margin, d_arch, d_rand,
         euler_ord, max_steps, min_steps, num_layer, gru_units, optim,
         norm_type, mem_frac, keep_prob, learning_rate):

    prefix = "Online_Retargeting_Mixamo_Cycle_Adv"

    for kk, vv in locals().iteritems():
        if (kk != "prefix" and kk != "mem_frac" and kk != "batch_size"
                and kk != "min_steps" and kk != "max_steps" and kk != "gpu"):
            prefix += "_" + kk + "=" + str(vv)

    layers_units = []
    for i in range(num_layer):
        layers_units.append(gru_units)

    data_path = "./datasets/train/"
    alllocal = []
    allglobal = []
    allskel = []
    allnames = []

    folders = [
        f for f in listdir(data_path) if not f.startswith(".")
        and not f.endswith("py") and not f.endswith(".npz")
    ]
    for folder in folders:
        files = [
            f for f in listdir(data_path + folder)
            if not f.startswith(".") and f.endswith("_seq.npy")
        ]
        for cfile in files:
            positions = np.load(data_path + folder + "/" + cfile[:-8] +
                                "_skel.npy")
            if positions.shape[0] >= min_steps:
                sequence = np.load(data_path + folder + "/" + cfile[:-8] +
                                   "_seq.npy")
                offset = sequence[:, -8:-4]
                sequence = np.reshape(sequence[:, :-8],
                                      [sequence.shape[0], -1, 3])
                positions[:, 0, :] = sequence[:, 0, :]
                alllocal.append(sequence)
                allglobal.append(offset)
                allskel.append(positions)
                allnames.append(folder)

    trainlocal = alllocal
    trainskel = allskel
    trainglobal = allglobal

    print("Number of examples: " + str(len(trainlocal)))
    tskel = []
    for tt in trainskel:
        tskel.append(tt[0:1])
    allframes_n_skel = np.concatenate(trainlocal + tskel)
    min_root = allframes_n_skel[:, 0:1].min(axis=0)
    max_root = allframes_n_skel[:, 0:1].max(axis=0)
    local_mean = allframes_n_skel.mean(axis=0)[None, :]
    global_mean = np.concatenate(trainglobal).mean(axis=0)[None, :]
    local_std = allframes_n_skel.std(axis=0)[None, :]
    global_std = np.concatenate(trainglobal).std(axis=0)[None, :]

    np.save(data_path[:-6] + "mixamo_local_motion_mean.npy", local_mean)
    np.save(data_path[:-6] + "mixamo_local_motion_std.npy", local_std)
    local_std[local_std == 0] = 1
    np.save(data_path[:-6] + "mixamo_global_motion_mean.npy", global_mean)
    np.save(data_path[:-6] + "mixamo_global_motion_std.npy", global_std)

    n_joints = alllocal[0].shape[-2]

    for i in xrange(len(trainlocal)):
        trainlocal[i] = (trainlocal[i] - local_mean) / local_std
        trainglobal[i] = (trainglobal[i] - global_mean) / global_std
        trainskel[i] = (trainskel[i] - local_mean) / local_std

    models_dir = "./models/" + prefix
    logs_dir = "./logs/" + prefix

    parents = np.array([
        -1, 0, 1, 2, 3, 4, 0, 6, 7, 8, 0, 10, 11, 12, 3, 14, 15, 16, 3, 18, 19,
        20
    ])
    with tf.device("/gpu:%d" % gpu):
        gru = EncoderDecoderGRU(batch_size, alpha, beta, gamma, omega,
                                euler_ord, n_joints, layers_units, max_steps,
                                local_mean, local_std, global_mean, global_std,
                                parents, keep_prob, logs_dir, learning_rate,
                                optim, margin, d_arch, d_rand, norm_type)

    if not exists(models_dir):
        makedirs(models_dir)

    if not exists(logs_dir):
        makedirs(logs_dir)

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_frac)
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                          log_device_placement=False,
                                          gpu_options=gpu_options)) as sess:

        sess.run(tf.global_variables_initializer())

        loaded, model_name = gru.load(sess, models_dir)
        if loaded:
            print("[*] Load SUCCESSFUL")
            step = int(model_name.split("-")[-1])
        else:
            print("[!] Starting from scratch ...")
            step = 0

        total_steps = 50000
        gru.saver = tf.train.Saver(max_to_keep=10)
        while step < total_steps:
            mini_batches = get_minibatches_idx(len(trainlocal),
                                               batch_size,
                                               shuffle=True)
            for _, batchidx in mini_batches:
                start_time = time.time()
                if len(batchidx) == batch_size:

                    if min_steps >= max_steps:
                        steps = np.repeat(max_steps, batch_size)
                    else:
                        steps = np.random.randint(low=min_steps,
                                                  high=max_steps + 1,
                                                  size=(batch_size, ))

                    realLocal_batch = []
                    realSkel_batch = []
                    realGlobal_batch = []
                    localA_batch = []
                    globalA_batch = []
                    skelA_batch = []
                    localB_batch = []
                    globalB_batch = []
                    skelB_batch = []
                    aeReg_batch = np.zeros((batch_size, 1), dtype="float32")
                    mask_batch = np.zeros((batch_size, max_steps),
                                          dtype="float32")

                    for b in xrange(batch_size):
                        low = 0
                        high = trainlocal[batchidx[b]].shape[0] - max_steps
                        if low >= high:
                            stidx = 0
                        else:
                            stidx = np.random.randint(low=low, high=high)

                        clocalA = trainlocal[batchidx[b]][stidx:stidx +
                                                          max_steps]
                        mask_batch[
                            b, :np.min([steps[b], clocalA.shape[0]])] = 1.0
                        if clocalA.shape[0] < max_steps:
                            clocalA = np.concatenate(
                                (clocalA,
                                 np.zeros((max_steps - clocalA.shape[0],
                                           n_joints, 3))))

                        cglobalA = trainglobal[batchidx[b]][stidx:stidx +
                                                            max_steps]
                        if cglobalA.shape[0] < max_steps:
                            cglobalA = np.concatenate(
                                (cglobalA,
                                 np.zeros((max_steps - cglobalA.shape[0], 4))))

                        cskelA = trainskel[batchidx[b]][stidx:stidx +
                                                        max_steps]
                        if cskelA.shape[0] < max_steps:
                            cskelA = np.concatenate(
                                (cskelA,
                                 np.zeros((max_steps - cskelA.shape[0],
                                           n_joints, 3))))

                        rnd_idx = np.random.randint(len(trainlocal))
                        cskelB = trainskel[rnd_idx][stidx:stidx + max_steps]
                        if cskelB.shape[0] < max_steps:
                            cskelB = np.concatenate(
                                (cskelB,
                                 np.zeros((max_steps - cskelB.shape[0],
                                           n_joints, 3))))

                        tgtname = allnames[rnd_idx]
                        rnd_idx = np.random.randint(len(trainlocal))
                        while tgtname != allnames[rnd_idx]:
                            rnd_idx = np.random.randint(len(trainlocal))

                        low = 0
                        high = trainlocal[rnd_idx].shape[0] - max_steps
                        if low >= high:
                            stidx = 0
                        else:
                            stidx = np.random.randint(low=low, high=high)

                        crealLocal = trainlocal[rnd_idx][stidx:stidx +
                                                         max_steps]
                        crealGlobal = trainglobal[rnd_idx][stidx:stidx +
                                                           max_steps]
                        crealSkel = trainskel[rnd_idx][stidx:stidx + max_steps]

                        regon = np.random.binomial(1, p=0.2)
                        if regon:
                            cskelB = cskelA.copy()
                            aeReg_batch[b, 0] = 1
                        else:
                            aeReg_batch[b, 0] = 0

                        localA_batch.append(clocalA)
                        globalA_batch.append(cglobalA)
                        skelA_batch.append(cskelA)
                        localB_batch.append(clocalA)
                        globalB_batch.append(cglobalA)
                        skelB_batch.append(cskelB)
                        realLocal_batch.append(crealLocal)
                        realGlobal_batch.append(crealGlobal)
                        realSkel_batch.append(crealSkel)

                    localA_batch = np.array(localA_batch).reshape(
                        (batch_size, max_steps, -1))
                    globalA_batch = np.array(globalA_batch).reshape(
                        (batch_size, max_steps, -1))
                    seqA_batch = np.concatenate((localA_batch, globalA_batch),
                                                axis=-1)
                    skelA_batch = np.array(skelA_batch).reshape(
                        (batch_size, max_steps, -1))

                    localB_batch = np.array(localB_batch).reshape(
                        (batch_size, max_steps, -1))
                    globalB_batch = np.array(globalB_batch).reshape(
                        (batch_size, max_steps, -1))
                    seqB_batch = np.concatenate((localB_batch, globalB_batch),
                                                axis=-1)
                    skelB_batch = np.array(skelB_batch).reshape(
                        (batch_size, max_steps, -1))

                    realLocal_batch = np.array(realLocal_batch).reshape(
                        (batch_size, max_steps, -1))
                    realGlobal_batch = np.array(realGlobal_batch).reshape(
                        (batch_size, max_steps, -1))
                    realSeq_batch = np.concatenate(
                        (realLocal_batch, realGlobal_batch), axis=-1)
                    realSkel_batch = np.array(realSkel_batch).reshape(
                        (batch_size, max_steps, -1))

                    mid_time = time.time()

                    dlf, dlr, gl, lc = gru.train(sess, realSeq_batch,
                                                 realSkel_batch, seqA_batch,
                                                 skelA_batch, seqB_batch,
                                                 skelB_batch, aeReg_batch,
                                                 mask_batch, step)

                    print(
                        "step=%d/%d,  g_loss=%.5f, d_loss=%.5f, cyc_loss=%.5f, "
                        "time=%.2f+%.2f" %
                        (step, total_steps, gl, dlf + dlr, lc,
                         mid_time - start_time, time.time() - mid_time))

                    if np.isnan(gl) or np.isinf(gl):
                        return

                    if step >= 1000 and step % 1000 == 0:
                        gru.save(sess, models_dir, step)

                    step = step + 1

        gru.save(sess, models_dir, step)
Esempio n. 36
0
def main(options):
    C.update(options)

    model = ResnetModel()

    x_train, y_train, x_validate, y_validate, x_test, y_test, train_size, validate_size, test_size = \
        pre_process_CIFAR10_data()

    learning_rate = 0.1

    for epoch in range(C['num_epoch']):
        start_time = time.time()

        kf = get_minibatches_idx(train_size, C['batch_size'], shuffle=True)

        train_loss = 0.0
        train_batches = 0
        for _, train_index in kf:
            inputs = x_train[train_index]
            targets = y_train[train_index]

            inputs, targets = prepare_CIFAR10_data(inputs, targets)

            loss = model.f_grad_shared(inputs, targets)
            model.f_update(learning_rate)

            train_loss += loss
            train_batches += 1

        kf_valid = get_minibatches_idx(validate_size, C['valid_batch_size'], shuffle=False)
        valid_loss = 0.0
        valid_accuracy = 0.0
        valid_batches = 0
        for _, valid_index in kf_valid:
            inputs = x_validate[valid_index]
            targets = y_validate[valid_index]

            inputs, targets = prepare_CIFAR10_data(inputs, targets)

            loss, accuracy = model.f_validate(inputs, targets)

            valid_loss += loss
            valid_accuracy += accuracy
            valid_batches += 1

        print(
            '''\
Epoch {} of {} took {:.3f}s
    training loss:        {:.6f}
    validation loss:      {:.6f}
    validation accuracy:  {:.2f} %'''.format(
                epoch, C['num_epoch'], time.time() - start_time,
                train_loss / train_batches,
                valid_loss / valid_batches,
                valid_accuracy / valid_batches * 100.0,
            )
        )

        if epoch + 1 == 41 or epoch + 1 == 61:
            learning_rate *= 0.1
            print('Discount learning rate to', learning_rate)

        print('Saving model...', end='')
        np.savez('cifar10_deep_residual_model.npz', *lasagne.layers.get_all_param_values(model.network))
        print('Done')