Example #1
0
def train(config):
    print('parameters: ')
    print(json.dumps(config, indent=4, ensure_ascii=False))

    # load data
    print('load data .....')
    X, y = data_helper.process_data(config)

    # make vocab
    print('make vocab .....')
    word_to_index, label_to_index = data_helper.generate_vocab(X, y, config)

    # padding data
    print('padding data .....')
    input_x, input_y = data_helper.padding(X, y, config, word_to_index, label_to_index)

    # split data
    print('split data .....')
    x_train, y_train, x_test, y_test, x_dev, y_dev = data_helper.split_data(input_x, input_y, config)

    print('length train: {}'.format(len(x_train)))
    print('length test: {}'.format(len(x_test)))
    print('length dev: {}'.format(len(x_dev)))
    print('training .....')
    with tf.Graph().as_default():
        sess_config = tf.ConfigProto(
            allow_soft_placement=config['allow_soft_placement'],
            log_device_placement=config['log_device_placement']
        )
        with tf.Session(config=sess_config) as sess:
            fast_text = FastText(config)

            # training procedure
            global_step = tf.Variable(0, name='global_step', trainable=False)
            optimizer = tf.train.AdamOptimizer(config['learning_rate'])
            grads_and_vars = optimizer.compute_gradients(fast_text.loss)
            train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

            # keep track of gradient values and sparsity
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram('{}/grad/hist'.format(v.name), g)
                    sparsity_summary = tf.summary.scalar('{}/grad/sparsity'.format(v.name), tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # output dir for models and summaries
            timestamp = str(int(time.time()))
            outdir = os.path.abspath(os.path.join(os.path.curdir, 'runs', timestamp))
            print('writing to {}'.format(outdir))

            # summary for loss and accuracy
            loss_summary = tf.summary.scalar('loss', fast_text.loss)
            acc_summary = tf.summary.scalar('accuracy', fast_text.accuracy)

            # train summary
            train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(outdir, 'summaries', 'train')
            train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

            # dev summary
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(outdir, 'summaries', 'dev')
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)

            # checkpoint dirctory
            checkpoint_dir = os.path.abspath(os.path.join(outdir, 'checkpoints'))
            checkpoint_prefix = os.path.join(checkpoint_dir, 'model.bin')

            if not os.path.exists(checkpoint_dir):
                os.mkdir(checkpoint_dir)

            saver = tf.train.Saver(tf.global_variables(), max_to_keep=config['num_checkpoints'])

            sess.run(tf.global_variables_initializer())

            def train_step(x_batch, y_batch):
                feed_dict = {
                    fast_text.input_x: x_batch,
                    fast_text.input_y: y_batch,
                }

                _, step, summaries, loss, accuracy = sess.run(
                    [train_op, global_step, train_summary_op, fast_text.loss, fast_text.accuracy],
                    feed_dict=feed_dict
                )

                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                train_summary_writer.add_summary(summaries, step)

            def dev_step(x_batch, y_batch, writer=None):
                feed_dic = {
                    fast_text.input_x: x_batch,
                    fast_text.input_y: y_batch,
                    fast_text.dropout_keep_prob: 1.0
                }

                step, summaries, loss, accuracy = sess.run(
                    [global_step, dev_summary_op, fast_text.loss, fast_text.accuracy],
                    feed_dict=feed_dic
                )

                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                if writer:
                    writer.add_summary(summaries, step)

            # generate batches
            batches = data_helper.generate_batchs(x_train, y_train, config)
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                train_step(x_batch, y_batch)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % config['evaluate_every'] == 0:
                    print('Evaluation:')
                    dev_step(x_dev, y_dev, writer=dev_summary_writer)

                if current_step % config['checkpoint_every'] == 0:
                    path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                    print('save model checkpoint to {}'.format(path))

            # test accuracy
            test_accuracy = sess.run([fast_text.accuracy], feed_dict={
                fast_text.input_x: x_test, fast_text.input_y: y_test, fast_text.dropout_keep_prob: 1.0})
            print('Test dataset accuracy: {}'.format(test_accuracy))
Example #2
0
def train(config):
    print('parameters:')
    print(config)

    # load data
    print('load data')
    X, y = data_helper.process_data(config)  # X=[[seq1],[seq2]]   y=[,,,,]

    # make vocab
    print('make vocab...')
    word2index, label2index = data_helper.generate_vocab(X, y, config)

    # padding data
    print('padding data')
    input_x, input_y = data_helper.padding(X, y, config, word2index,
                                           label2index)

    # split data
    print('split data...')
    x_train, y_train, x_test, y_test, x_dev, y_dev = data_helper.split_data(
        input_x, input_y, config)

    print('length train: {}'.format(len(x_train)))
    print('length test: {}'.format(len(x_test)))
    print('length dev: {}'.format(len(x_dev)))

    print('training...')

    with tf.Graph().as_default():
        sess_config = tf.ConfigProto(
            allow_soft_placement=config['allow_soft_placement'],
            log_device_placement=config['log_device_placement'])
        with tf.Session(config=sess_config) as sess:
            rcnn = TextRCNN(config)

        # training procedure
        global_step = tf.Variable(0, name='globel_step', trainable=False)
        train_op = tf.train.AdamOptimizer(config['learning_rate']).minimize(
            rcnn.loss, global_step=global_step)

        # output dir for models
        timestamp = str(int(time.time()))
        outdir = os.path.abspath(
            os.path.join(os.path.curdir, 'runs', timestamp))
        if not os.path.exists(os.path.join(os.path.curdir, 'runs')):
            os.mkdir(os.path.join(os.path.curdir, 'runs'))
        if not os.path.exists(outdir):
            os.mkdir(outdir)
        print('writing to {}'.format(outdir))

        # checkpoint dictory
        checkpoint_dir = os.path.abspath(os.path.join(outdir, 'checkpoints'))
        checkpoint_prefix = os.path.join(checkpoint_dir, 'model')

        if not os.path.exists(checkpoint_dir):
            os.mkdir(checkpoint_dir)

        saver = tf.train.Saver(tf.global_variables(),
                               max_to_keep=config['num_checkpoints'])

        sess.run(tf.global_variables_initializer())

        def train_step(x_batch, y_batch):
            feed_dict = {
                rcnn.input_x: x_batch,
                rcnn.input_y: y_batch,
                rcnn.dropout_keep_prob: config['dropout_keep_prob']
            }

            _, step, loss, accuracy = sess.run(
                [train_op, global_step, rcnn.loss, rcnn.accuracy],
                feed_dict=feed_dict)

            time_str = datetime.datetime.now().isoformat()
            print('{}: step {}, loss {}, acc {}'.format(
                time_str, step, loss, accuracy))

        def dev_step(x_batch, y_batch):
            feed_dict = {
                rcnn.input_x: x_batch,
                rcnn.input_y: y_batch,
                rcnn.dropout_keep_prob: 1.0
            }

            step, loss, accuracy = sess.run(
                [global_step, rcnn.loss, rcnn.accuracy], feed_dict=feed_dict)

            time_str = datetime.datetime.now().isoformat()
            print('{}: step {}, loss {}, acc {}'.format(
                time_str, step, loss, accuracy))

        # generate batches
        batches = data_helper.generate_batchs(x_train, y_train, config)
        for batch in batches:
            x_batch, y_batch = zip(*batch)
            print(y_batch)
            train_step(x_batch, y_batch)
            current_step = tf.train.global_step(sess, global_step)
            if current_step % config['evaluate_every'] == 0:
                print('Evaluation:')
                dev_step(x_dev, y_dev)

            if current_step % config['checkpoint_every'] == 0:
                path = saver.save(sess,
                                  checkpoint_prefix,
                                  global_step=current_step)
                print('save model checkpoint to {}'.format(path))

        # test accuracy
        test_accuracy = sess.run(
            [rcnn.accuracy],
            feed_dict={
                rcnn.input_x: x_test,
                rcnn.input_y: y_test,
                rcnn.dropout_keep_prob: 1.0
            })
        print('Test dataset accuracy: {}'.format(test_accuracy))
def train():
    X_train, y_train, all_words = data_helper.preprocess_data(
        './mini_data/train.txt')
    word_to_idx, idx_to_word = data_helper.generator_vocab(
        X_train, './mini_data')
    X_train_digit = data_helper.padding(X_train, word_to_idx)

    with tf.Graph().as_default():
        sess_config = tf.ConfigProto(allow_soft_placement=True)
        sess_config.gpu_options.allow_growth = True

        with tf.Session(config=sess_config) as sess:
            fasttext = FastText(seq_length=config["seq_lenght"],
                                num_class=config["num_class"],
                                vocab_size=config["vocab_size"],
                                embedding_size=config["embedding_size"])

            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(
                learning_rate=config["learning_rate"])
            train_op = optimizer.minimize(fasttext.loss,
                                          global_step=global_step)

            loss_summary = tf.summary.scalar('loss', fasttext.loss)
            acc_summary = tf.summary.scalar('precision', fasttext.predictions)

            time_stamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
            out_dir = os.path.join("runs", time_stamp)

            # train summary
            train_summary_op = tf.summary.merge([loss_summary, acc_summary])
            train_summary_dir = os.path.join(out_dir, 'summary', 'train')
            train_summary_write = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # dev summary
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, 'summary', 'dev')
            dev_summary_write = tf.summary.FileWriter(dev_summary_dir,
                                                      sess.graph)

            # checkpoint
            checkpoint_dir = os.path.join(out_dir, 'model')

            if not os.path.exists(checkpoint_dir):
                os.mkdir(checkpoint_dir)

            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=config["max_to_keep"])

            sess.run(tf.global_variables_initializer())

            def train_step(x_batch, y_batch):
                feed_dict = {
                    fasttext.input_x: x_batch,
                    fasttext.input_y: y_batch
                }

                _, step, summaries, loss = sess.run(
                    [train_op, global_step, train_summary_op, fasttext.loss],
                    feed_dict=feed_dict)
                train_summary_write.add_summary(summaries, global_step=step)

                print("train_step: {}, loss: {}".format(step, loss))

            def dev_step(x_batch, y_batch, write=None):
                feed_dict = {
                    fasttext.input_x: x_batch,
                    fasttext.input_y: y_batch,
                    fasttext.dropout_keep_prob: 1.0
                }

                step, summaries, loss = sess.run(
                    [global_step, dev_summary_op, fasttext.loss],
                    feed_dict=feed_dict)

                print("dev_step: {}, loss: {}".format(step, loss))

                if write:
                    write.write(summaries, step)

            # generate batches
            batches = data_helper.generate_batchs(X_train_digit, y_train)
            for batch in batches:
                x_batch, y_batch = zip(*batch)
                train_step(x_batch, y_batch)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % config["evaluate_every"] == 0:
                    dev_step(x_dev, y_dev, write=dev_summary_write)

                if current_step % config["checkpoint_every"] == 0:
                    path = saver.save(sess,
                                      checkpoint_dir,
                                      global_step=current_step)
                    print('save model checkpoint to {}'.format(path))

            # test
            feed_dict = {
                fasttext.input_x: x_test,
                fasttext.input_y: y_test,
                fasttext.dropout_keep_prob: 1.0
            }
            test_precision, test_recall = sess.run(
                [fasttext.precision, fasttext.recall], feed_dict=feed_dict)
            print('test_precision: {}, test_recall: {}'.format(
                test_precision, test_recall))
Example #4
0
 def predict(self, list_str):
     input_x = padding(list_str, None, config, self.word_to_index, None)
     feed_dict = {self.input_x: input_x, self.dropout_keep_prob: 1.0}
     predictions = self.sess.run(self.predictions, feed_dict=feed_dict)
     return [self.index_to_label[str(idx)] for idx in predictions]
Example #5
0
def train(config):
    learning_rate = config['learning_rate']
    clip_grad = config['clip_grad']
    max_model_keep = config['max_model_keep']

    print('parameters: ')
    print(json.dumps(config, indent=4, ensure_ascii=False))

    # load data
    print('load data .....')
    X, y = data_helper.process_data(config)

    # make vocab
    print('make vocab .....')
    word_to_index, label_to_index = data_helper.generate_vocab(X, y, config)
    config['num_tags'] = len(label_to_index)

    # padding data
    print('padding data .....')
    input_x, input_y, sequence_lengths = data_helper.padding(X, y, word_to_index, label_to_index)

    # split data
    print('split data .....')
    x_train, y_train, sequences_length_train, x_test, y_test, sequence_length_test, x_dev, y_dev, sequence_length_dev = \
        data_helper.split_data(input_x, input_y, sequence_lengths, config)

    print('length train: {}'.format(len(x_train)))
    print('length test: {}'.format(len(x_test)))
    print('length dev: {}'.format(len(x_dev)))

    with tf.Graph().as_default():
        with tf.Session() as sess:
            bilstm_crf = BilstmCrf(config)

            # training_procedure
            global_step = tf.Variable(0, name='global_step', trainable=False)
            optimizer = tf.train.AdamOptimizer(learning_rate)

            # apply grad clip to avoid gradiend explosion
            grads_and_vars = optimizer.compute_gradients(bilstm_crf.loss)
            grads_and_vars_clip = [[tf.clip_by_value(g, -clip_grad, clip_grad), v] for g, v in grads_and_vars]
            train_op = optimizer.apply_gradients(grads_and_vars_clip, global_step=global_step)

            # output dir for models and summaries
            timestamp = str(int(time.time()))
            outdir = os.path.abspath(os.path.join(os.path.curdir, 'runs', timestamp))
            print('writing to {} !!!'.format(outdir))

            # summary of loss
            tf.summary.scalar('loss', bilstm_crf.loss)

            # train summary
            train_sumary_op = tf.summary.merge_all()
            train_summary_dir = os.path.join(outdir, 'summaries', 'train')
            train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

            # dev summary
            dev_summary_op = tf.summary.merge_all()
            dev_summary_dir = os.path.join(outdir, 'summaries', 'dev')
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)

            # checkpoint dir
            checkpoint_dir = os.path.abspath(os.path.join(outdir, 'checkpoints'))
            checkpoint_prefix = os.path.join(checkpoint_dir, 'model')

            if not os.path.exists(checkpoint_dir):
                os.mkdir(checkpoint_dir)

            saver = tf.train.Saver(tf.global_variables(), max_to_keep=max_model_keep)

            sess.run(tf.global_variables_initializer())

            def viterbi_decoder(logits, seq_len_list, transition_params):
                label_list = []
                for logit, seq_len in zip(logits, seq_len_list):
                    viterbi_seq, _ = tf.contrib.crf.viterbi_decode(logit[:seq_len], transition_params)
                    label_list.append(viterbi_seq)
                return label_list

            def train_step(x_batch, y_batch, sequence_lengths):
                feed_dict = {
                    bilstm_crf.input_x: x_batch,
                    bilstm_crf.input_y: y_batch,
                    bilstm_crf.sequence_length: sequence_lengths,
                    bilstm_crf.dropout_keep_prob: config['dropout_keep_prob']
                }

                _, step, summaries, loss, transition_params, logits = sess.run(
                    [train_op, global_step, train_sumary_op, bilstm_crf.loss,
                     bilstm_crf.transition_params, bilstm_crf.logits],
                    feed_dict=feed_dict
                )

                label_list = viterbi_decoder(logits, sequence_lengths, transition_params)

                acc, recall, f1 = data_helper.measure(y_batch, label_list, sequence_lengths)

                time_str = datetime.datetime.now().isoformat()
                print("training: {}: step {}, loss {:g}, acc {:.2f} recall {:.2f} f1 {:.2f}".format
                      (time_str, step, loss, acc, recall, f1))
                train_summary_writer.add_summary(summaries, step)

            def dev_step(x_batch, y_batch, sequence_lengths, writer=None):
                feed_dic = {
                    bilstm_crf.input_x: x_batch,
                    bilstm_crf.input_y: y_batch,
                    bilstm_crf.sequence_length: sequence_lengths,
                    bilstm_crf.dropout_keep_prob: 1.0
                }

                step, summaries, loss, transition_params, logits = sess.run(
                    [global_step, dev_summary_op, bilstm_crf.loss, bilstm_crf.transition_params, bilstm_crf.logits],
                    feed_dict=feed_dic
                )

                label_list = viterbi_decoder(logits, sequence_lengths, transition_params)

                acc, recall, f1 = data_helper.measure(y_batch, label_list, sequence_lengths)

                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, f1 {:.2f}".format(time_str, step, loss, f1))
                if writer:
                    writer.add_summary(summaries, step)

            # generate batches
            batches = data_helper.generate_batchs(x_train, y_train, sequences_length_train, config)
            for batch in batches:
                x_batch, y_batch, sequence_length_batch = zip(*batch)
                train_step(x_batch, y_batch, sequence_length_batch)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % config['evaluate_every'] == 0:
                    print('Evaluation:')
                    dev_step(x_dev, y_dev, sequence_length_dev, writer=dev_summary_writer)

                if current_step % config['checkpoint_every'] == 0:
                    path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                    print('save model checkpoint to {}'.format(path))