Exemple #1
0
def main():
    if args.train:
        for t in range(model.checkpoint, args.num_epochs):
            if t + 1 <= args.num_epochs_all_nodes:
                train(t + 1, get_batches(data_train_all_nodes,
                                         args.batch_size), 'train')
            else:
                train(t + 1, get_batches(data_train, args.batch_size), 'train')
            train(t + 1, dev_batches, 'dev')
            train(t + 1, test_batches, 'test')
    elif args.oracle:
        oracle(args, model, ptb, data_test, 'test')
    else:
        if args.robust:
            for i in range(args.num_epochs):
                eps_scheduler.step_epoch(verbose=False)
            res = []
            for i in range(1, args.budget + 1):
                logger.info('budget {}'.format(i))
                ptb.budget = i
                acc_rob = train(None, test_batches, 'test')
                res.append(acc_rob)
            logger.info('Verification results:')
            for i in range(len(res)):
                logger.info('budget {} acc_rob {:.3f}'.format(i + 1, res[i]))
            logger.info(res)
        else:
            train(None, test_batches, 'test')
Exemple #2
0
def train(epoch):
    model.train()
    # Load data for a epoch.
    train_batches = get_batches(data_train, args.batch_size)
    for a in avg:
        a.reset()

    eps_inc_per_step = 1.0 / (args.num_epochs_warmup * len(train_batches))

    for i, batch in enumerate(train_batches):
        # We increase eps linearly every batch.
        eps = args.eps * min(
            eps_inc_per_step * ((epoch - 1) * len(train_batches) + i + 1), 1.0)
        # Call the main training loop.
        acc, acc_robust, loss = res = step(model,
                                           ptb,
                                           batch,
                                           eps=eps,
                                           train=True)
        # Optimize the loss.
        torch.nn.utils.clip_grad_norm_(model.core.parameters(), 5.0)
        optimizer.step()
        optimizer.zero_grad()
        # Print training statistics.
        for k in range(3):
            avg[k].update(res[k], len(batch))
        if (i + 1) % args.log_interval == 0:
            logger.info(
                "Epoch {}, training step {}/{}: acc {:.3f}, robust acc {:.3f}, loss {:.3f}, eps {:.3f}"
                .format(epoch, i + 1, len(train_batches), avg_acc.avg,
                        avg_acc_robust.avg, avg_loss.avg, eps))
    model.save(epoch)
def main(_):

    data_path = 'data/new-dataset-cornell-length10-filter1-vocabSize40000.pkl'
    word2id, id2word, trainingSamples = load_dataset(data_path)
    hparam = Config()

    with tf.Session() as sess:

        model = Seq2SeqModel(hparam, word2id)
        ckpt = tf.train.get_checkpoint_state(hparam.save_path)

        if FLAGS.resume and ckpt and tf.train.checkpoint_exists(
                ckpt.model_checkpoint_path):
            print("Restoring model parameters from %s." %
                  ckpt.model_checkpoint_path)
            model.saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print("Creating model with fresh parameters.")
            sess.run(model.init)

        train_writer = tf.summary.FileWriter(hparam.save_path,
                                             graph=sess.graph)

        for epoch in range(hparam.num_epoch):
            print("Starting Epoch {}/{}:".format(epoch, hparam.num_epoch))

            batches = get_batches(trainingSamples, hparam.batch_size)
            total_loss = 0.0
            total_count = 0

            for nextBatch in tqdm(batches, desc="training"):

                outputs = model.train_session(sess, nextBatch)

                loss = outputs["loss"]
                summary = outputs["summary"]
                step = outputs["step"]
                train_writer.add_summary(summary, step)
                total_loss += loss
                total_count += 1

                if step % hparam.display_per_step == 0:

                    perplexity = math.exp(
                        float(total_loss / total_count)
                    ) if total_loss / total_count < 300 else float('inf')
                    tqdm.write(
                        " Step %d | Per-word Loss %.4f | Perplexity %.4f" %
                        (step, total_loss / total_count, perplexity))

                    checkpoint_path = os.path.join(hparam.save_path,
                                                   hparam.model_name)
                    model.saver.save(sess, checkpoint_path)

            tqdm.write("\n")
            tqdm.write(" Epoch %d | Per-word Loss %.4f | Perplexity %.4f" %
                       (epoch, total_loss / total_count, perplexity))
            tqdm.write("\n")
    def fit(self, train_data, val_data):

        train_data_len = len(train_data)

        saver = tf.train.Saver(tf.global_variables())

        self.sess = tf.Session()
        self.sess.run(self.init_op)
        self.add_summary(self.sess)

        (X_val_batch, Y_val_batch, X_val_batch_lens, Y_val_batch_lens) = \
            next(get_batches(val_data, self.batch_size, self.vocab, self.tag2label))

        for epoch in range(1, self.epoch_num + 1):

            for local_step, (X_train_batch, Y_train_batch, X_train_batch_lens, Y_train_batch_lens) in enumerate(
                    get_batches(train_data, self.batch_size, self.vocab, self.tag2label)):
                _, loss, summary, step_num = self.sess.run([self.train_op, self.loss, self.merged, self.global_step],
                                                                    {self.word_ids: X_train_batch,
                                                                     self.labels: Y_train_batch,
                                                                     self.sequence_lengths: X_train_batch_lens,
                                                                         self.dropout_pl: self.dropout_keep_prod})
                if local_step % self.display_step == 0:
                    val_loss = self.sess.run(self.loss, {self.word_ids: X_val_batch,
                                                    self.labels: Y_val_batch,
                                                     self.sequence_lengths: X_val_batch_lens,
                                                     self.dropout_pl: self.dropout_keep_prod})
                    print("Epoch %d/%d | Batch %d/%d | train_loss: %.3f | val_loss: %.3f"
                        % (epoch, self.epoch_num, local_step, len(train_data)//self.batch_size, loss, val_loss))

                self.file_writer.add_summary(summary, step_num)

                if self.n_step_to_save and step_num % self.n_step_to_save == 0 and step_num != 0:  # every n step
                    saver.save(self.sess, self.model_path, global_step=step_num)
                    print("Model Saved... at time step " + str(step_num))

        saver.save(self.sess, self.model_path)
        print("Model Saved.")
        self.sess.close()
Exemple #5
0
def oracle(args, model, ptb, data, type):
    logger.info('Running oracle for {}'.format(type))
    model.eval()
    assert (isinstance(ptb, PerturbationSynonym))
    cnt_cor = 0
    word_embeddings = model.word_embeddings.weight
    vocab = model.vocab
    for t, example in enumerate(data):
        embeddings, mask, tokens, label_ids = model.get_input([example])
        candidates = example['candidates']
        if tokens[0][0] == '[CLS]':
            candidates = [[]] + candidates + [[]]
        embeddings_all = []

        def dfs(tokens, embeddings, budget, index):
            if index == len(tokens):
                embeddings_all.append(embeddings.cpu())
                return
            dfs(tokens, embeddings, budget, index + 1)
            if budget > 0 and tokens[index] != '[UNK]' and len(candidates[index]) > 0\
                    and tokens[index] == candidates[index][0]:
                for w in candidates[index][1:]:
                    if w in vocab:
                        _embeddings = torch.cat([
                            embeddings[:index],
                            word_embeddings[vocab[w]].unsqueeze(0),
                            embeddings[index + 1:]
                        ],
                                                dim=0)
                        dfs(tokens, _embeddings, budget - 1, index + 1)

        dfs(tokens[0], embeddings[0], ptb.budget, 0)
        cor = True
        for embeddings in get_batches(embeddings_all, args.oracle_batch_size):
            embeddings_tensor = torch.cat(embeddings).cuda().reshape(
                len(embeddings), *embeddings[0].shape)
            logits = model.model_from_embeddings(embeddings_tensor, mask)
            for pred in list(torch.argmax(logits, dim=1)):
                if pred != example['label']:
                    cor = False
            if not cor: break
        cnt_cor += cor

        if (t + 1) % args.log_interval == 0:
            logger.info('{} {}/{}: oracle robust acc {:.3f}'.format(
                type, t + 1, len(data), cnt_cor * 1. / (t + 1)))
    logger.info('{}: oracle robust acc {:.3f}'.format(type,
                                                      cnt_cor * 1. / (t + 1)))
Exemple #6
0
    infer_model = InferenceModel(vocab_size, embedding_size, num_units,
                                 num_layers, max_target_sequence_length,
                                 infer_batch_size, beam_size, segment_to_int,
                                 infer_mode)

checkpoints_path = "model2/checkpoints"

#train_sess.run(initializer)
infer_batch = get_infer_batches(source_inputs, infer_batch_size,
                                vocab_to_int['<PAD>'])
print(infer_batch)

for i in range(epochs):
    for batch_i, batch in enumerate(
            get_batches(source_inputs, target_inputs, target_outputs,
                        batch_size, vocab_to_int['<PAD>'],
                        vocab_to_int['<PAD>'])):
        if batch_i <= 30000:
            current_loss = train_model.train(train_sess, batch)
            print('Epoch %d Batch %d/%d - Training Loss: %f' %
                  (i + 1, batch_i + 1,
                   (len(source_inputs) - 1) // batch_size + 1, current_loss))
            if (batch_i + 1) % infer_step == 0:
                print("in")
                checkpoint_path = train_model.saver.save(train_sess,
                                                         checkpoints_path,
                                                         global_step=(i * 100 +
                                                                      batch_i))
                print("out")
                infer_model.saver.restore(infer_sess, checkpoint_path)
                current_predict = infer_model.infer(infer_sess, infer_batch)
Exemple #7
0
def main(args):
    # parse args
    args = parse_args(args)

    # prepare data
    if args['prep_data']:
        print('\n>> Preparing Data\n')
        prepare_data(args)
        sys.exit()

    # ELSE
    # read data and metadata from pickled files
    with open(P_DATA_DIR + 'metadata.pkl', 'rb') as f:
        metadata = pkl.load(f)
    with open(P_DATA_DIR + 'data.pkl', 'rb') as f:
        data_ = pkl.load(f)

    # read content of data and metadata
    candidates = data_['candidates']
    candid2idx, idx2candid = metadata['candid2idx'], metadata['idx2candid']

    # get train/test/val data
    train, test, val = data_['train'], data_['test'], data_['val']

    # gather more information from metadata
    sentence_size = metadata['sentence_size']
    w2idx = metadata['w2idx']  # is a list
    idx2w = metadata['idx2w']
    memory_size = metadata['memory_size']
    vocab_size = metadata['vocab_size']
    n_cand = metadata['n_cand']
    candidate_sentence_size = metadata['candidate_sentence_size']
    # embeddings = metadata['embeddings']

    # vectorize candidates
    candidates_vec = data_utils.vectorize_candidates(candidates, w2idx,
                                                     candidate_sentence_size)

    print('---- memory config ----')
    print('embedding size:', EMBEDDING_SIZE)
    print('batch_size:', BATCH_SIZE)
    print('memory_size:', memory_size)
    print('vocab_size:', vocab_size)
    print('candidate_size:', n_cand)
    print('candidate_sentence_size:', candidate_sentence_size)
    print('hops:', HOPS)
    print('---- end ----')
    ###
    # create model
    # model = model['memn2n'](  # why?
    model = memn2n.MemN2NDialog(batch_size=BATCH_SIZE,
                                vocab_size=vocab_size,
                                candidates_size=n_cand,
                                sentence_size=sentence_size,
                                embedding_size=EMBEDDING_SIZE,
                                candidates_vec=candidates_vec,
                                hops=HOPS)

    # model = memn2n2.MemN2NDialog(
    #     batch_size=BATCH_SIZE,
    #     vocab_size=vocab_size,
    #     candidates_size=n_cand,
    #     sentence_size=sentence_size,
    #     embedding_size=EMBEDDING_SIZE,
    #     candidates_vec=candidates_vec,
    #     embeddings=embeddings,
    #     hops=HOPS
    # )

    # gather data in batches
    train, val, test, batches = data_utils.get_batches(train,
                                                       val,
                                                       test,
                                                       metadata,
                                                       batch_size=BATCH_SIZE)

    # for t in train['q']:
    #     print(recover_sentence(t, idx2w))

    if args['train']:
        # training starts here
        epochs = args['epochs']
        eval_interval = args['eval_interval']

        # restore from checkpoint
        _check_restore_parameters(model.get_sess(), model.saver, CKPT_DIR)
        #
        # training and evaluation loop
        print('\n>> Training started!\n')
        # write log to file
        log_handle = open(dir_path + '/../../logs/' + args['log_file'], 'w')
        cost_total = 0.
        best_cost = 100
        # best_validation_accuracy = 0.
        lowest_val_acc = 0.8
        total_begin = time.clock()
        begin = time.clock()
        for i in range(epochs + 1):

            for start, end in batches:
                s = train['s'][start:end]
                q = train['q'][start:end]
                # print(len(q))
                a = train['a'][start:end]
                if config.MULTILABEL >= 1:
                    # convert to one hot
                    one_hot = np.zeros((end - start, n_cand))
                    for aa in range(end - start):
                        for index in a[aa]:
                            one_hot[aa][index] = 1
                    a = one_hot
                cost_total += model.batch_fit(s, q, a)
            if config.MULTILABEL >= 1:
                if i % 1 == 0 and i:
                    print('stage...', i, cost_total)
                    if cost_total < best_cost:
                        print('saving model...', i, '++',
                              str(best_cost) + '-->' + str(cost_total))
                        best_cost = cost_total
                        model.saver.save(model.get_sess(),
                                         CKPT_DIR + '/memn2n_model.ckpt',
                                         global_step=i)
            else:
                if i % 1 == 0 and i:
                    print('stage...', i)
                    if i % eval_interval == 0 and i:
                        train_preds = batch_predict(model,
                                                    train['s'],
                                                    train['q'],
                                                    len(train['s']),
                                                    batch_size=BATCH_SIZE)
                        for error in range(len(train['q'])):
                            if train_preds[error] != train['a'][error]:
                                print_out = recover(error, train['s'],
                                                    train['q'],
                                                    train_preds[error],
                                                    train['a'][error], idx2w,
                                                    idx2candid)
                                print(print_out)
                                # print(recover_sentence(train['q'][i], idx2w),
                                #       recover_cls(train_preds[i], idx2candid),
                                #       recover_cls(train['a'][i], idx2candid))
                        val_preds = batch_predict(model,
                                                  val['s'],
                                                  val['q'],
                                                  len(val['s']),
                                                  batch_size=BATCH_SIZE)
                        train_acc = metrics.accuracy_score(
                            np.array(train_preds), train['a'])
                        val_acc = metrics.accuracy_score(val_preds, val['a'])
                        end = time.clock()
                        print('Epoch[{}] : <ACCURACY>\n\t,\
                              training : {} \n\t,\
                              validation : {}\n\t,\
                              current_best_accuracy: {}'.format(
                            i, train_acc, val_acc, lowest_val_acc))
                        print('time:{}'.format(end - begin))
                        # log_handle.write('{} {} {} {}\n'.format(i, train_acc, val_acc,
                        #                                         cost_total / (eval_interval * len(batches))))
                        cost_total = 0.  # empty cost
                        begin = end
                        #
                        # save the best model, to disk
                        # if val_acc > best_validation_accuracy:
                        # best_validation_accuracy = val_acc
                        if train_acc > lowest_val_acc:
                            print('saving model...', train_acc, lowest_val_acc)
                            lowest_val_acc = train_acc
                            model.saver.save(model.get_sess(),
                                             CKPT_DIR + '/memn2n_model.ckpt',
                                             global_step=i)
        # close file
        total_end = time.clock()
        print('Total time: {} minutes.'.format((total_end - total_begin) / 60))
        log_handle.close()

    else:  # inference
        ###
        # restore checkpoint
        # ckpt = tf.train.get_checkpoint_state(CKPT_DIR)
        # if ckpt and ckpt.model_checkpoint_path:
        #     print('\n>> restoring checkpoint from', ckpt.model_checkpoint_path)
        #     model.saver.restore(model.get_sess(), ckpt.model_checkpoint_path)
        # # base(model, idx2candid, w2idx, sentence_size, BATCH_SIZE, n_cand, memory_size)
        #
        # # create an base session instance
        # isess = InteractiveSession(
        #     model, idx2candid, w2idx, n_cand, memory_size)
        #
        # if args['infer']:
        #     query = ''
        #     while query != 'exit':
        #         query = input('>> ')
        #         print('>> ' + isess.reply(query))
        # elif args['ui']:
        #     return isess
        pass
Exemple #8
0
def main(_):
    with tf.Session() as sess:
        cells = get_lstm_cells(num_hidden, keep_prob)
        init_states = cells.zero_state(batch_size, tf.float32)

        outputs, final_states = rnn(rnn_inputs, cells, num_hidden[-1],
                                    num_steps, num_class, init_states)

        predicts = tf.argmax(outputs, -1, name='predict_op')
        softmax_out = tf.nn.softmax(outputs, name='softmax_op')
        top_k = tf.nn.top_k(softmax_out, k=k, sorted=False, name='top_k_op')
        with tf.variable_scope('train'):
            loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
                labels=labels, logits=outputs),
                                  name='loss_op')

            global_step = tf.Variable(0,
                                      name='global_step',
                                      trainable=False,
                                      collections=[
                                          tf.GraphKeys.GLOBAL_VARIABLES,
                                          tf.GraphKeys.GLOBAL_STEP
                                      ])

            optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                                   momentum=0.9)
            train_op = optimizer.minimize(loss,
                                          global_step=global_step,
                                          name='train_op')

            arg_labels = tf.argmax(labels, -1)
            acc = tf.reduce_mean(tf.cast(tf.equal(predicts, arg_labels),
                                         tf.float32),
                                 name='acc_op')

        sess.run(tf.global_variables_initializer())
        global_step_tensor = sess.graph.get_tensor_by_name(
            'train/global_step:0')
        train_op = sess.graph.get_operation_by_name('train/train_op')
        acc_op = sess.graph.get_tensor_by_name('train/acc_op:0')
        loss_tensor = sess.graph.get_tensor_by_name('train/loss_op:0')

        print('Start training ...')
        loss_history = []
        acc_history = []
        batch_num = 30
        a = datetime.now().replace(microsecond=0)

        for i in range(epochs):
            total_loss = 0
            total_acc = 0
            count = 0
            current_states = sess.run(init_states,
                                      feed_dict={batch_size: batch_num})
            for x, y in get_batches(train_encode, batch_num, num_steps):
                _, loss_value, acc_value, current_states = sess.run(
                    [train_op, loss_tensor, acc_op, final_states],
                    feed_dict={
                        X: x,
                        Y: y,
                        init_states: current_states,
                        keep_prob: 1
                    })
                total_loss += loss_value
                total_acc += acc_value
                count += 1
            total_loss /= count
            total_acc /= count

            valid_acc = 0
            count = 0
            current_states = sess.run(init_states,
                                      feed_dict={batch_size: batch_num})
            for x, y in get_batches(valid_encode, batch_num, num_steps):
                acc_value, current_states = sess.run([acc_op, final_states],
                                                     feed_dict={
                                                         X:
                                                         x,
                                                         Y:
                                                         y,
                                                         init_states:
                                                         current_states
                                                     })
                valid_acc += acc_value
                count += 1
            valid_acc /= count
            print("Epochs: {}, loss: {:.4f}, acc: {:.4f}, val_acc: {:.4f}".
                  format(i + 1, total_loss, total_acc, valid_acc))
            loss_history.append(total_loss)
            acc_history.append([total_acc, valid_acc])

        plt.plot(loss_history)
        plt.xlabel("epochs")
        plt.ylabel("BPC")
        plt.title("Training curve")
        plt.savefig("Training curve.png", dpi=100)

        plt.gcf().clear()

        acc_history = np.array(acc_history).T
        err_history = 1 - acc_history
        plt.plot(err_history[0], label='training error')
        plt.plot(err_history[1], label='validation error')
        plt.xlabel("epochs")
        plt.ylabel("Error rate")
        plt.title("Training error")
        plt.legend()
        plt.savefig("Training error.png", dpi=100)

        # predict 500 words
        seed = 'Asuka'
        seed_encode = np.array([vocab_to_int[c] for c in list(seed)])
        seed_encode = np.concatenate((seed_encode, np.zeros(num_steps - 5)))
        current_states = sess.run(init_states, feed_dict={batch_size: 1})
        index = 4
        for i in range(500):
            if index == num_steps - 1:
                candidates, current_states = sess.run([top_k, final_states],
                                                      feed_dict={
                                                          X:
                                                          seed_encode[None, :],
                                                          init_states:
                                                          current_states
                                                      })
                p = candidates.values[0, index]
                p /= p.sum()
                rand_idx = np.random.choice(k, p=p)
                seed_encode = np.append(candidates.indices[0, index, rand_idx],
                                        np.zeros(num_steps - 1))
            else:
                candidates = sess.run(top_k,
                                      feed_dict={
                                          X: seed_encode[None, :],
                                          init_states: current_states
                                      })
                p = candidates.values[0, index]
                p /= p.sum()
                rand_idx = np.random.choice(k, p=p)
                seed_encode[index + 1] = candidates.indices[0, index, rand_idx]

            seed += int_to_vocab[candidates.indices[0, index, rand_idx]]
            index = (index + 1) % num_steps
        print(seed)
        b = datetime.now().replace(microsecond=0)
        print("Time cost:", b - a)
Exemple #9
0
torch.cuda.manual_seed_all(args.seed)

dummy_embeddings = torch.zeros(1,
                               args.max_sent_length,
                               args.embedding_size,
                               device=args.device)
dummy_labels = torch.zeros(1, dtype=torch.long, device=args.device)

if args.model == 'transformer':
    dummy_mask = torch.zeros(1, 1, 1, args.max_sent_length, device=args.device)
    model = Transformer(args, data_train)
elif args.model == 'lstm':
    dummy_mask = torch.zeros(1, args.max_sent_length, device=args.device)
    model = LSTM(args, data_train)

dev_batches = get_batches(data_dev, args.batch_size)
test_batches = get_batches(data_test, args.batch_size)

ptb = PerturbationSynonym(budget=args.budget)
dummy_embeddings = BoundedTensor(dummy_embeddings, ptb)
model_ori = model.model_from_embeddings
bound_opts = {'relu': args.bound_opts_relu, 'exp': 'no-max-input'}
if isinstance(model_ori, BoundedModule):
    model_bound = model_ori
else:
    model_bound = BoundedModule(model_ori, (dummy_embeddings, dummy_mask),
                                bound_opts=bound_opts,
                                device=args.device)
model.model_from_embeddings = model_bound
if args.loss_fusion:
    bound_opts['loss_fusion'] = True
def train_model(model):

    train_num_batches = int(len(model.X_train) / model.config.batch_size)
    train_loss_history = np.zeros(
        (model.config.max_epochs,
         train_num_batches))  ## Store each batch separately.

    model.config.val_batchsize = model.config.batch_size  ## Can be anything, typically greater than train batch size.
    val_num_batches = int(len(model.X_val) / model.config.val_batchsize)
    val_loss_history = np.zeros((model.config.max_epochs, val_num_batches))

    train_acc_history = np.zeros(
        (model.config.max_epochs,
         model.config.label_size))  ## Store each class separately
    val_acc_history = np.zeros_like(train_acc_history)

    best_val_acc = 0
    best_epoch = 0

    if not os.path.exists("./weights"):
        os.makedirs("./weights")

    with tf.Session() as sess:

        init = tf.global_variables_initializer()
        sess.run(init)

        for epoch in range(model.config.max_epochs):
            print('Epoch: ', epoch)
            X_train, seq_len_train, y_train = get_batches(
                model.X_train, model.y_train, model.config.batch_size)
            epoch_train_loss, epoch_train_acc = run_epoch(
                sess, model, zip(X_train, seq_len_train, y_train))
            print()
            print(
                "Train Loss: {:.4f} \t Train Accuracy: {} \t Mean AUC: {:.5f}".
                format(np.mean(epoch_train_loss), epoch_train_acc,
                       np.mean(epoch_train_acc)))

            X_val, seq_len_val, y_val = get_batches(model.X_val, model.y_val,
                                                    model.config.val_batchsize)
            epoch_val_loss, epoch_val_acc = run_epoch(sess,
                                                      model,
                                                      zip(
                                                          X_val, seq_len_val,
                                                          y_val),
                                                      val=True)
            print("Val Loss: {:.4f} \t Val Accuracy: {} \t Mean AUC: {:.5f}".
                  format(np.mean(epoch_val_loss), epoch_val_acc,
                         np.mean(epoch_val_acc)))
            print()

            train_acc_history[epoch, :] = epoch_train_acc
            val_acc_history[epoch, :] = epoch_val_acc
            train_loss_history[epoch, :] = np.array(epoch_train_loss)
            val_loss_history[epoch, :] = np.array(epoch_val_loss)

            val_loss = np.mean(epoch_val_loss)

            if np.mean(epoch_val_acc) > best_val_acc:
                best_val_loss = val_loss
                best_epoch = epoch
                best_val_acc = np.mean(epoch_val_acc)
                saver = tf.train.Saver()
                saver.save(sess, './weights/%s' % model.config.model_name)

            if epoch - best_epoch > model.config.early_stopping:  ## Stop on no improvement
                print('Stopping due to early stopping')
                break

            if epoch - best_epoch > model.config.anneal_threshold:  ## Anneal lr on no improvement in val loss
                model.config.lr *= model.config.annealing_factor
                print("Annealing learning rate to {}".format(model.config.lr))

    print('Best Validation Accuracy is {}'.format(best_val_acc))
def test_model(test=False):

    config = Config()
    model = Model(config, 'train.csv', debug=False)

    start_time = time.time()

    train_model(model)  ## Save the weights and model

    print()
    print("#" * 20)
    print('Completed Training')
    print('Training Time:{} minutes'.format((time.time() - start_time) / 60))

    if not test:
        return

    test_data = pd.read_csv('test.csv')
    X_test = test_data['comment_text'].values
    test_idx = test_data.iloc[:, 0].values

    print("Generating test results ...")

    model.config.batch_size = 59

    with tf.Session() as sess:
        saver = tf.train.import_meta_graph('./weights/%s.meta' %
                                           model.config.model_name)
        saver.restore(sess, './weights/%s' % model.config.model_name)

        X_test, test_seq_length = get_batches(
            X=X_test,
            y=None,
            batch_size=model.config.batch_size,
            shuffle=False)
        e_pred = []
        for X, seq in zip(X_test, test_seq_length):  ## Run test in batches
            feed = model.build_feeddict(X, seq, val=True)
            p = sess.run(model.pred, feed_dict=feed)
            e_pred.append(p)

    prediction = np.concatenate(e_pred, axis=0)
    assert (len(test_idx) == len(prediction))

    ## Code to write the output submissions to a file

    submit_df = pd.DataFrame({
        'id': test_idx,
        'toxic': prediction[:, 0],
        'severe_toxic': prediction[:, 1],
        'obscene': prediction[:, 2],
        'threat': prediction[:, 3],
        'insult': prediction[:, 4],
        'identity_hate': prediction[:, 5]
    })
    submit_df.to_csv('submission.csv',
                     index=False,
                     columns=[
                         'id', 'toxic', 'severe_toxic', 'obscene', 'threat',
                         'insult', 'identity_hate'
                     ])
def train():

    if not tf.gfile.Exists(config.PREPROCESS_DATA):
        print('预处理文件不存在,重新生成预处理文件并存入:{}'.format(config.PREPROCESS_DATA))
        data_utils.preprocess_and_save_data(config.source_path,
                                            config.target_path)

    (source_int_text,
     target_int_text), (source_vocab_to_int,
                        target_vocab_to_int), _ = data_utils.load_preprocess()

    print("训练数据加载成功")

    train_graph = tf.Graph()
    with train_graph.as_default():

        model = Seq2Seq_Model(
            num_units=config.num_units,
            # keep_prob,
            batch_size=config.batch_size,
            source_vocab_size=len(source_vocab_to_int),
            target_vocab_size=len(target_vocab_to_int),
            encoding_embedding_size=config.encoding_embedding_size,
            decoding_embedding_size=config.decoding_embedding_size,
            target_vocab_to_int=target_vocab_to_int,
            mode='train')

        model.build_model()

        # Split data to training and validation sets
        batch_size = config.batch_size
        train_source = source_int_text[batch_size:]
        train_target = target_int_text[batch_size:]
        valid_source = source_int_text[:batch_size]
        valid_target = target_int_text[:batch_size]
        (valid_sources_batch, valid_targets_batch, valid_sources_lengths,
         valid_targets_lengths) = next(
             data_utils.get_batches(valid_source, valid_target, batch_size,
                                    source_vocab_to_int['<PAD>'],
                                    target_vocab_to_int['<PAD>']))

        with tf.Session(
                graph=train_graph,
                config=tf.ConfigProto(device_count={'GPU': 0})) as sess:

            sess.run(tf.global_variables_initializer())

            for epoch_i in range(1, config.num_epochs + 1):
                for batch_i, (source_batch, target_batch, sources_lengths, targets_lengths) in \
                    enumerate(data_utils.get_batches(train_source, train_target, config.batch_size,
                                source_vocab_to_int['<PAD>'],
                                target_vocab_to_int['<PAD>'])):

                    train_loss = model.train(sess, source_batch, target_batch,
                                             sources_lengths, targets_lengths,
                                             config.learning_rate)

                    if batch_i % config.display_step == 0 and batch_i > 0:
                        valid_loss = model.eval(
                            sess,
                            valid_sources_batch,
                            valid_targets_batch,
                            valid_sources_lengths,
                            valid_targets_lengths,
                        )

                        print(
                            'Epoch {:>3} Batch {:>4}/{} - Loss: {:>6.4f}, Valid Loss: {:6.4f}'
                            .format(epoch_i, batch_i,
                                    len(source_int_text) // batch_size,
                                    train_loss, valid_loss))


#  Train Accuracy: {:>6.4f}, Validation Accuracy: {:>6.4f},  | , train_acc, valid_acc

# Save Model
            saver = tf.train.Saver()
            saver.save(sess, config.save_path)
            print('Model Trained and Saved')
    def verify(self, example):
        start_time = time.time()

        embeddings, tokens = self.target.get_embeddings([example])
        length = embeddings.shape[1]
        tokens = tokens[0]

        self.logger.write("tokens:", " ".join(tokens))
        self.logger.write("length:", length)
        self.logger.write("label:", example["label"])

        self.std = self.target.step([example], infer_grad=True)[-1]

        result = {
            "tokens": tokens,
            "label": float(example["label"]),
            "bounds": []
        }

        cnt = 0
        sum_eps, min_eps = 0, 1e30

        assert (self.perturbed_words == 1)
        # [CLS] and [SEP] cannot be perturbed
        for i in range(1, length - 1):
            # skip OOV
            if tokens[i][0] == "#" or tokens[i + 1][0] == "#":
                continue

            candidates = []
            for w in self.words:
                _tokens = copy.deepcopy(tokens)
                _tokens[i] = w
                sent = ""
                for _w in _tokens[1:-1]:
                    if _w[0] == "#":
                        sent += _w[2:] + " "
                    else:
                        sent += _w + " "
                candidates.append({
                    "sent_a": sent.split(),
                    "label": example["label"]
                })

            epsilon = 1e10
            epsilon_max = 0
            for batch in get_batches(candidates, self.batch_size):
                r = self.target.step(batch)[-1]
                dist = torch.norm(r["embedding_output"][:, i] -
                                  embeddings[0][i].unsqueeze(0),
                                  p=self.p,
                                  dim=-1)
                for j in range(len(batch)):
                    if r["pred_labels"][j] != example["label"]:
                        epsilon = min(epsilon, float(dist[j]))
                    epsilon_max = max(epsilon_max, float(dist[j]))
            epsilon = min(epsilon, epsilon_max)

            epsilon_normalized = epsilon / torch.norm(embeddings[0, i],
                                                      p=self.p)

            self.logger.write("Position %d: %s %.5f %.5f" %
                              (i, tokens[i], epsilon, epsilon_normalized))

            result["bounds"].append({
                "position": i,
                "eps": float(epsilon),
                "eps_normalized": float(epsilon_normalized)
            })

            cnt += 1
            sum_eps += epsilon
            min_eps = min(min_eps, epsilon)

        result["time"] = time.time() - start_time

        self.logger.write("Time elapsed", result["time"])

        return result, sum_eps / cnt, min_eps
args = update_arguments(args)
set_seeds(args.seed)
data_train, data_valid, data_test, _, _ = load_data(args)
set_seeds(args.seed)

import tensorflow as tf
config = tf.ConfigProto(device_count={'GPU': 0})
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)

with sess.as_default():
    target = Transformer(args, data_train)

    random.shuffle(data_valid)
    random.shuffle(data_test)
    valid_batches = get_batches(data_valid, args.batch_size)
    test_batches = get_batches(data_test, args.batch_size)
    print("Dataset sizes: %d/%d/%d" %
          (len(data_train), len(data_valid), len(data_test)))

    summary_names = ["loss", "accuracy"]
    summary_num_pre = 2

    logger = Logger(sess, args, summary_names, 1)

    print("\n")

    if args.train:
        while logger.epoch.eval() <= args.num_epoches:
            random.shuffle(data_train)
            train_batches = get_batches(data_train, args.batch_size)
Exemple #15
0
def main(unused_argv):
    if len(unused_argv
           ) != 1:  # prints a message if you've entered flags incorrectly
        raise Exception("Problem with flags: %s" % unused_argv)

    tf.logging.set_verbosity(
        tf.logging.INFO)  # choose what level of logging you want
    if FLAGS.mode == 'rl_train':
        tf.logging.info('Starting model in %s mode...',
                        FLAGS.mode + '_' + FLAGS.reward_type)
    else:
        tf.logging.info('Starting model in %s mode...', FLAGS.mode)
    # If in decode mode, set batch_size = beam_size
    # Reason: in decode mode, we decode one example at a time.
    # On each step, we have beam_size-many hypotheses in the beam, so we need to make a batch of these hypotheses.
    if FLAGS.mode == 'beam_search_decode':
        FLAGS.batch_size = FLAGS.beam_size

    train_data, valid_data, test_data = prepare_dataset(FLAGS.data_path)
    print('TrainData Size:', len(train_data))
    print('ValidData Size:', len(valid_data))
    print('TestData Size:', len(test_data))

    print("Building vocabulary ..... ")
    word2id, id2word, _, max_ending_len, min_ending_len = creat_vocab(
        train_data, FLAGS.word_vocab_size)
    print("Finished building vocabulary!")
    word_vocab_size = len(word2id.keys())

    # Make a namedtuple hps, containing the values of the hyperparameters that the model needs
    hparam_list = [
        'mode', 'loss_rate_of_sem', 'loss_rate_of_mle', 'word_vocab_size',
        'use_mixed_loss', 'lr', 'train_keep_prob', 'rl_loss_scale_factor',
        'rand_unif_init_mag', 'max_grad_norm', 'hidden_dim', 'emb_dim',
        'batch_size', 'coverage', 'cov_loss_wt', 'pointer_gen'
    ]
    hps_dict = {}
    for key, val in FLAGS.__flags.items():  # for each flag
        if key in hparam_list:  # if it's in the list
            hps_dict[key] = val  # add it to the dict
    hps_dict['max_dec_steps'] = max_ending_len
    hps_dict['min_ending_len'] = min_ending_len
    if FLAGS.word_vocab_size == None:
        hps_dict['word_vocab_size'] = word_vocab_size
    hps = namedtuple("HParams", hps_dict.keys())(**hps_dict)

    # create minibatches of data
    train_batches = get_batches(len(train_data), FLAGS.batch_size)
    valid_batches = get_batches(len(valid_data), FLAGS.batch_size)

    tf.set_random_seed(111)  # a seed value for randomness

    if hps.mode == 'seq2seq_train':
        train_dir = os.path.join(FLAGS.exp_name, "train_seq2seq")
        if not os.path.exists(train_dir): os.makedirs(train_dir)
        with tf.Graph().as_default():
            initializer = tf.random_uniform_initializer(
                -hps.rand_unif_init_mag, hps.rand_unif_init_mag)
            with tf.variable_scope("Model",
                                   reuse=None,
                                   initializer=initializer):
                m_train = SCST_RLModel(is_training=True, hps=hps)
            with tf.variable_scope("Model",
                                   reuse=True,
                                   initializer=initializer):
                m_valid = SCST_RLModel(is_training=False, hps=hps)
            if FLAGS.convert_to_coverage_model:
                assert FLAGS.coverage, "To convert your non-coverage model to a coverage model, run with convert_to_coverage_model=True and coverage=True"
                convert_to_coverage_model()
            sv = tf.train.Supervisor(logdir=train_dir,
                                     save_model_secs=FLAGS.save_model_secs)

            sess_context_manager = sv.managed_session(config=util.get_config())
            tf.logging.info("Created session.")
            try:
                run_seq2seq_training(
                    m_train, m_valid, train_data, train_batches, valid_data,
                    valid_batches, word2id, max_ending_len, sv,
                    sess_context_manager
                )  # this is an infinite loop until interrupted
            except KeyboardInterrupt:
                tf.logging.info(
                    "Caught keyboard interrupt on worker. Stopping supervisor..."
                )
                sv.stop()

    elif hps.mode == 'rl_train':
        train_dir = os.path.join(
            FLAGS.exp_name, "train_rl" + '_' + FLAGS.reward_type + 'mu_' +
            str(FLAGS.rl_loss_scale_factor))
        if not os.path.exists(train_dir): os.makedirs(train_dir)
        with tf.Graph().as_default():
            initializer = tf.random_uniform_initializer(
                -hps.rand_unif_init_mag, hps.rand_unif_init_mag)
            with tf.variable_scope("Model",
                                   reuse=None,
                                   initializer=initializer):
                m_train = SCST_RLModel(is_training=True, hps=hps)
            with tf.variable_scope("Model",
                                   reuse=True,
                                   initializer=initializer):
                m_valid = SCST_RLModel(is_training=False, hps=hps)

            # define load_pretrain funtion for restoring best seq2seq model from eval_dir
            ckpt_dir = 'eval_seq2seq'
            latest_filename = "checkpoint_best" if ckpt_dir == "eval_seq2seq" else None
            ckpt_dir = os.path.join(FLAGS.exp_name, ckpt_dir)
            ckpt_state = tf.train.get_checkpoint_state(
                ckpt_dir, latest_filename=latest_filename)
            print("loading pre_trained seq2seq model from %s",
                  ckpt_state.model_checkpoint_path)
            saver = tf.train.Saver()

            def load_pretrain(sess):
                return saver.restore(sess, ckpt_state.model_checkpoint_path)

            sv = tf.train.Supervisor(logdir=train_dir,
                                     saver=saver,
                                     save_model_secs=FLAGS.save_model_secs,
                                     init_fn=load_pretrain)
            sess_context_manager = sv.managed_session(config=util.get_config())
            tf.logging.info("Created session.")
            try:
                run_rl_training(m_train, m_valid, train_data, train_batches,
                                valid_data, valid_batches, word2id,
                                max_ending_len, sv, sess_context_manager
                                )  # this is an infinite loop until interrupted
            except KeyboardInterrupt:
                tf.logging.info(
                    "Caught keyboard interrupt on worker. Stopping supervisor..."
                )
                sv.stop()

    elif hps.mode == 'beam_search_decode':
        # This will be the hyperparameters for the decoder model
        decode_model_hps = hps._replace(
            max_dec_steps=1
        )  # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries
        test_examples_list = prepare_data_for_beam_seach_decode(
            test_data, FLAGS.batch_size, word2id, max_plot_len, max_ending_len,
            FLAGS.pointer_gen)
        with tf.Graph().as_default():
            initializer = tf.random_uniform_initializer(
                -hps.rand_unif_init_mag, hps.rand_unif_init_mag)
            with tf.variable_scope("Model",
                                   reuse=None,
                                   initializer=initializer):
                model_test = SCST_RLModel(is_training=False,
                                          hps=decode_model_hps)
                run_beam_search_decode(model_test,
                                       test_examples_list,
                                       id2word,
                                       data='test_data',
                                       ckpt_dir=FLAGS.decode_ckpt_dir)
    else:
        raise ValueError(
            "The 'mode' flag must be one of seq2seq_train/rl_train/beam_search_decode"
        )
Exemple #16
0
        sess.run(tf.global_variables_initializer())

    train_current_step = 0
    dev_current_step = 0
    train_writer = tf.summary.FileWriter(args.summary_dir + 'train',
                                         graph=sess.graph)
    dev_writer = tf.summary.FileWriter(args.summary_dir + 'dev')
    train_batch = Batch()
    dev_batch = Batch()
    for e in range(args.numEpochs):
        dev_loss_sum = 0
        print("----- Epoch {}/{} -----".format(e + 1, args.numEpochs))
        for batch_i, (train_source_batch, train_target_batch,
                      train_source_length, train_target_length) in enumerate(
                          get_batches(source_data_train, target_data_train,
                                      args.batch_size,
                                      source_word_to_idx['<PAD>'])):

            train_batch.encoder_inputs = train_source_batch
            train_batch.decoder_targets = train_target_batch
            train_batch.encoder_inputs_length = train_source_length
            train_batch.decoder_targets_length = train_target_length

            # # Tqdm 是一个快速,可扩展的Python进度条,可以在 Python 长循环中添加一个进度提示信息,用户只需要封装任意的迭代器 tqdm(iterator)。
            # for nextBatch in tqdm(batches, desc="Training"):
            train_loss, train_summary = model.train(sess, train_batch)

            train_current_step += 1
            # 每多少步进行一次保存
            if train_current_step % args.steps_per_checkpoint == 0: