Beispiel #1
0
 def __init__(self, _num_words, _num_class, _vocab_file, _language):
     self._graph = tf.Graph()
     with self._graph.as_default():
         self._model = TextCNN(sequenceLength=_num_words,
                               numClasses=_num_class,
                               vocabSize=10000,
                               embeddingSize=options.embedding_dim,
                               kernelSizes=list(
                                   map(int,
                                       options.kernel_sizes.split(","))),
                               numKernels=options.num_kernels,
                               l2RegLambda=options.l2_reg_lambda)
     self._sess = tf.Session(graph=self._graph)
     self._vocab_file = _vocab_file
     self._num_words = _num_words
     self._num_class = _num_class
     self._language = _language
Beispiel #2
0
    xEVAL, yEVAL = open_data.open_data_and_labels(options.dev_data, options.vocab_file, options.num_words, options.language)

    np.random.seed(10)
    shuffle_indices = np.random.permutation(np.arange(len(train_label)))
    xTrain = train_data[shuffle_indices]
    yTrain = train_label[shuffle_indices]

    del train_data, train_label

  sess = tf.Session()
  with sess.as_default():
    cnn = TextCNN(
      sequenceLength = options.num_words,
      numClasses=options.num_class,
      vocabSize=10000,
      embeddingSize=options.embedding_dim,
      kernelSizes=list(map(int, options.kernel_sizes.split(","))),
      numKernels=options.num_kernels,
      l2RegLambda=options.l2_reg_lambda
    )

    global_step = tf.Variable(0, name='globalStep', trainable=False)
    optimizer = tf.train.AdamOptimizer(1e-3)
    grads_and_vars = optimizer.compute_gradients(cnn.loss)
    train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

    sess.run(tf.global_variables_initializer())
    _saver = tf.train.Saver(tf.global_variables(), max_to_keep=1000)

    def trainStep(xBatch, yBatch):
      feed_dict = {cnn.inputX: xBatch, cnn.inputY: yBatch,
Beispiel #3
0
# Split train/test set
dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y)))
x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:]
y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:]

# Training
with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.allow_soft_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = TextCNN(sequence_length=x_train.shape[1],
                      num_classes=y_train.shape[1],
                      vocab_size=len(vocab_processor.vocabulary_),
                      embedding_size=FLAGS.embedding_dim,
                      filter_sizes=list(map(int,
                                            FLAGS.filter_sizes.split(","))),
                      num_filters=FLAGS.num_filters,
                      l2_reg_lambda=FLAGS.l2_reg_lambda)

        # Define Training procedure
        global_step = tf.Variable(0, name='global_step', trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-3)
        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)

        # Keep track of gradient values and sparsity
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:

##### Training  ###################################

with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
      allow_soft_placement = allow_soft_placement,
      log_device_placement = log_device_placement)

    sess = tf.Session(config=session_conf)

    with sess.as_default():
        cnn = TextCNN(
            sequence_length= trainX.shape[1],
            num_classes= trainY.shape[1],
            vocab_size= vocab_size,
            embedding_size=embedding_dim,
            filter_sizes=filter_sizes,
            num_filters=num_filters,
            l2_reg_lambda=L2_lambda)

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)

        optimizer = tf.train.AdamOptimizer(0.001) #learning rate = 0.001
        grads_and_vars = optimizer.compute_gradients(cnn.loss) #it returns a list of gradients and variables. W, dW, b, db etc.

        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)   #updates variable
        #train_op = optimizer.minimize(cnn.loss, global_step=global_step)
        # Keep track of gradient values and sparsity (optional)

x_train, x_dev = x_data[:validdata_size], x_data[validdata_size:]
y_train, y_dev = y_data[:validdata_size], y_data[validdata_size:]

# print "train data size is {:d} , and train lable is {:d}".format( len( x_train ) , len( y_train ) )
# print "valid data size is {:d} , and valid lable is {:d}".format( len( x_dev ) , len( y_dev ) )
"""
# Training
#=================================================
"""

with tf.Graph().as_default():
    sess = tf.Session()
    with sess.as_default():
        cnn = TextCNN( sequence_length = FLAGS.sequence_length ,\
             num_classes = y_train.shape[ 1 ] ,\
             embedding_size = FLAGS.embedding_dim , \
             filter_sizes = list( map( int , FLAGS.filter_sizes.split( "," ) ) ) , \
             num_filters = FLAGS.num_filters , \
             l2_reg_lambda = FLAGS.l2_reg_lambda )

    #define the Training produce
    global_step = tf.Variable(0, name="global_step", trainable=False)
    optimizer = tf.train.AdamOptimizer(1e-3)
    grads_and_var = optimizer.compute_gradients(cnn.loss)
    train_op = optimizer.apply_gradients(grads_and_var,
                                         global_step=global_step)

    #output directory for models and summaries
    timestamp = str(int(time.time()))
    out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
    print "Writing to {} \n".format(out_dir)
def train(x, y, pretrained_embedding_filter):

    # Split data into developement set and training set
    x_train, y_train, x_dev, y_dev = split_data(x, y, FLAGS.devset_percentage)

    # Training
    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN(sentence_len=x_train.shape[1],
                          vocab_size=pretrained_embedding_filter.shape[0],
                          embedding_size=pretrained_embedding_filter.shape[1],
                          static_embedding_filter=pretrained_embedding_filter,
                          filter_sizes=list(
                              map(int, FLAGS.filter_sizes.split(","))),
                          num_filters=FLAGS.num_filters,
                          num_classes=y_train.shape[1],
                          l2_reg_lambda=FLAGS.l2_reg_lambda)
            # Define training precedure
            global_step = tf.Variable(tf.constant(0),
                                      name="global_step",
                                      trainable=False)
            optimizer = tf.train.AdamOptimizer(learning_rate=1e-3)
            train_op = optimizer.minimize(cnn.loss, global_step=global_step)

            # Output directory for model
            timestamp = str(time.time())
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, "runs", timestamp))

            #Checkpoint_directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Initializer all variables
            sess.run(tf.global_variables_initializer())

            def train_step(x_batch, y_batch):
                """
                A single training step
                """
                feed_dict = {
                    cnn.inputs: x_batch,
                    cnn.labels: y_batch,
                    cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
                }
                _, step, loss, accuracy = sess.run(
                    [train_op, global_step, cnn.loss, cnn.accuracy], feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, loss, accuracy))

            def dev_step(x_batch, y_batch):
                """
                A single developement step
                """
                feed_dict = {
                    cnn.inputs: x_batch,
                    cnn.labels: y_batch,
                    cnn.dropout_keep_prob: 1.0
                }
                step, loss, accuracy = sess.run(
                    [global_step, cnn.loss, cnn.accuracy], feed_dict)

                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, loss, accuracy))
                return loss, accuracy

            # Batch Gradient iter
            best_loss = 0.0
            final_accuracy = 0.0
            patience = 0
            should_stop = False
            num_batches = int((len(y_train) - 1) / FLAGS.batch_size + 1)
            start_training_time = datetime.datetime.now().isoformat()
            for epoch in range(FLAGS.num_epochs):
                if should_stop:
                    break

                shuffled_indices = np.random.permutation(
                    np.arange(len(y_train)))
                x_shuffled = x_train[shuffled_indices]
                y_shuffled = y_train[shuffled_indices]

                for batch in range(num_batches):
                    start_index = batch * FLAGS.batch_size
                    end_index = min(start_index + FLAGS.batch_size,
                                    len(y_train))
                    train_step(x_shuffled[start_index:end_index],
                               y_shuffled[start_index:end_index])
                    current_step = tf.train.global_step(sess, global_step)

                    if current_step % FLAGS.evaluate_every == 0:
                        print("Evaluation...")
                        loss_value, accuracy_value = dev_step(x_dev, y_dev)
                        print("")

                        if current_step == FLAGS.evaluate_every or loss_value < best_loss:
                            patience -= patience
                            print(best_loss, loss_value)
                            best_loss = loss_value
                            final_accuracy = accuracy_value
                            path = saver.save(sess,
                                              checkpoint_prefix,
                                              global_step=current_step)
                            print(
                                "Saved model checkpoint to {}\n".format(path))
                        else:
                            patience += 1
                            if patience > FLAGS.patience_threshold:
                                should_stop = True
                                print("Early stopping after {} step".format(
                                    current_step))
                                break

            print("Accuracy: {}, Loss: {}".format(final_accuracy, best_loss))
            print("Training Completed!")
            end_training_time = datetime.datetime.now().isoformat()
            print("Started training: {}\nCompleted Training: {}".format(
                start_training_time, end_training_time))