def vdcnn_model(X_train, y_train, X_test, y_test, sequence_max_length):

    model = VDCNN(num_classes=y_train.shape[1],
                  sequence_length=sequence_max_length)

    if y_train.shape[1] == 1:
        model.compile(loss='binary_crossentropy',
                      optimizer=SGD(lr=0.0001, momentum=0.9),
                      metrics=['accuracy'])
    else:
        model.compile(loss='categorical_crossentropy',
                      optimizer=SGD(lr=0.0001, momentum=0.9),
                      metrics=['accuracy'])

    model.fit(X_train,
              y_train,
              validation_data=(X_test, y_test),
              epochs=50,
              batch_size=128)

    scores = model.evaluate(X_test, y_test)
    accuracy = scores[1] * 100
    y_pred = model.predict(X_test)

    return accuracy, y_pred
    def __init__(self, task='ag'):
        sequence_max_length = 200
        downsampling_type = 'maxpool'
        depth = 9
        num_layers = 4
        use_he_uniform = True
        optional_shortcut = True

        current_path = '/'.join(os.path.realpath(__file__).split('/')[:-1])
        model_root = opj(current_path, 'pretrained_models')

        model_path = {
            'ag': opj(model_root, 'ag', 'model-step92000.ckpt'),
            'yelp': opj(model_root, 'yelp', 'model-step282000.ckpt'),
            'ag-cam': 'pretrained_models/ag-cam/model-step10000.ckpt',
            'yelp-cam': 'pretrained_models/yelp-cam/model-step54000.ckpt',
            'dbpedia': opj(model_root, 'dbpedia', 'model-step40000.ckpt')
        }

        num_classes = {
            'ag': 4,
            'yelp': 2,
            'ag-cam': 4,
            'yelp-cam': 2,
            'dbpedia': 14
        }

        self.cnn = VDCNN(num_classes=num_classes[task],
                         depth=depth,
                         sequence_max_length=sequence_max_length,
                         use_he_uniform=use_he_uniform,
                         optional_shortcut=optional_shortcut)

        self.data_helper = data_helper(sequence_max_length=sequence_max_length)

        self.sess = tf.Session()
        saver = tf.train.Saver()
        saver.restore(self.sess, model_path[task])

        self.features = {}
        for layer_index in range(num_layers):
            if layer_index == 0:
                layer_tensor_name = 'add:0'
            else:
                layer_tensor_name = 'add_%d:0' % (layer_index * 2)

            layer_name = 'conv_%d' % layer_index
            layer_tensor = tf.get_default_graph().get_tensor_by_name(
                layer_tensor_name)

            self.features[layer_name] = layer_tensor
Esempio n. 3
0
def predict_label_vdcnn(model_file,
                        unlabeled_file,
                        resulting_labels,
                        num_classes,
                        classes_weights=tf.constant([1.0, 1.0])):
    tf.reset_default_graph()
    # print("Loading data...")
    data_helper_cl = DataHelper(sequence_max_length=FLAGS.sequence_max_length)
    unlabeled_data = data_helper_cl.load_unlabeled_data(unlabeled_file)
    # print("Loading data succees...")

    # ConvNet
    sess = tf.Session()
    cnn = VDCNN(num_classes=num_classes,
                depth=FLAGS.depth,
                sequence_max_length=FLAGS.sequence_max_length,
                downsampling_type=FLAGS.downsampling_type,
                use_he_uniform=FLAGS.use_he_uniform,
                optional_shortcut=FLAGS.optional_shortcut,
                keep_prob=FLAGS.keep_prob,
                classes_weights=classes_weights)

    def prediction_step(x_batch):
        feed_dict = {cnn.input_x: x_batch, cnn.is_training: False}
        softmax_output = sess.run([cnn.softmax_output], feed_dict)
        return softmax_output

    saver = tf.train.Saver()
    saver.restore(sess, model_file)

    # Labels prediction
    unlabeled_batches = data_helper_cl.batch_iter(unlabeled_data,
                                                  FLAGS.batch_size,
                                                  1,
                                                  shuffle=False)
    for unlabeled_batch in unlabeled_batches:
        softmax_output = prediction_step(unlabeled_batch)
        with open(resulting_labels, 'a') as f:
            for row in softmax_output[0]:
                for item in row:
                    if item == row[-1]:
                        f.write(str(item))
                    else:
                        f.write(str(item) + ",")
                f.write("\n")
    depth = [1, 1, 1, 1]
elif depth == 17:
    depth = [2, 2, 2, 2]
elif depth == 29:
    depth = [5, 5, 2, 2]
else:
    print('depth must be (9, 17, 29)')
    sys.exit()

print('VDCNN setting: emb_dim={} n_out={}, depth={}'.format(
    len(char2id) + 1, kind,
    sum(depth) * 2 + 1))

gpu_id = args.gpu

model = VDCNN(len(char2id) + 1, kind, depth)
if gpu_id >= 0:
    model.to_gpu(gpu_id)

print(mode, train_x.shape, train_y.shape, test_x.shape, test_y.shape)

train = datasets.TupleDataset(train_x, train_y)
test = datasets.TupleDataset(test_x, test_y)

batch_size = 128

train_iter = iterators.SerialIterator(train, batch_size)
test_iter = iterators.SerialIterator(test, batch_size, False, False)

epoch_size = 5000
max_epoch = 15
Esempio n. 5
0
# Data Preparation
# Load data
print("Loading data...")
data_helper = data_helper(sequence_max_length=FLAGS.sequence_max_length,
                          use_title=FLAGS.use_title)
train_data, train_label, train_texts, test_data, test_label, test_texts = data_helper.load_dataset(
    FLAGS.database_path)
num_batches_per_epoch = int((len(train_data) - 1) / FLAGS.batch_size) + 1
print("Loading data succees...")

# ConvNet
acc_list = [0]
sess = tf.Session()
cnn = VDCNN(num_classes=train_label.shape[1],
            depth=FLAGS.depth,
            sequence_max_length=FLAGS.sequence_max_length,
            downsampling_type=FLAGS.downsampling_type,
            use_he_uniform=FLAGS.use_he_uniform,
            optional_shortcut=FLAGS.optional_shortcut)

# Optimizer and LR Decay
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
    global_step = tf.Variable(0, name="global_step", trainable=False)
    learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                               global_step,
                                               FLAGS.num_epochs *
                                               num_batches_per_epoch,
                                               0.95,
                                               staircase=True)
    optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)
    gradients, variables = zip(*optimizer.compute_gradients(cnn.loss))
Esempio n. 6
0
# Training
# ==================================================

# ----------------- Phase de construction du graphe -------------------------------



# Input data.
with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
      allow_soft_placement=FLAGS.allow_soft_placement,
      log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = VDCNN()
	# Ensures that we execute the update_ops before performing the train
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
		global_step = tf.Variable(0, name="global_step", trainable=False)
		optimizer = tf.train.AdamOptimizer(1e-3)
		grads_and_vars = optimizer.compute_gradients(cnn.loss)
		train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)


        # Initialize all variables

	print("START %s" % datetime.datetime.now())
        sess.run(tf.initialize_all_variables())
	
	saver = tf.train.Saver()
def train():
	# Data Preparation
	# Load data
	print("Loading data...")
	x, y = data_loader.read_data(FLAGS.pos_data, FLAGS.neg_data,
												  FLAGS.max_sequence_length)
	print("Data Size:", len(y))
	np.random.seed(10)
	shuffle_indices = np.random.permutation(np.arange(len(y)))
	x_shuffled = x[shuffle_indices]
	y_shuffled = y[shuffle_indices]

	dev_sample_index = -1 * int(FLAGS.dev_percentage * float(len(y)))
	x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:]
	y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:]

	del x, y, x_shuffled, y_shuffled
	print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))

	num_batches_per_epoch = int((len(x_train) - 1) / FLAGS.batch_size) + 1
	print("Loading data succees...")

	# ConvNet
	acc_list = [0]

	session_conf = tf.ConfigProto(
		allow_soft_placement=FLAGS.allow_soft_placement,
		log_device_placement=FLAGS.log_device_placement)
	session_conf.gpu_options.allow_growth = True
	# session_conf.gpu_options.per_process_gpu_memory_fraction = 0.45
	sess = tf.Session(config=session_conf)

	cnn = VDCNN(num_classes=y_train.shape[1],
		num_quantized_chars=FLAGS.vocab_size,
		depth=FLAGS.depth,
		sequence_max_length=FLAGS.max_sequence_length,
		downsampling_type=FLAGS.downsampling_type,
		use_he_uniform=FLAGS.use_he_uniform,
		optional_shortcut=FLAGS.optional_shortcut)

	# Optimizer and LR Decay
	update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
	with tf.control_dependencies(update_ops):
		global_step = tf.Variable(0, name="global_step", trainable=False)
		learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, FLAGS.num_epochs*num_batches_per_epoch, 0.95, staircase=True)
		optimizer = tf.train.AdamOptimizer(learning_rate)
		gradients, variables = zip(*optimizer.compute_gradients(cnn.loss))
		gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
		train_op = optimizer.apply_gradients(zip(gradients, variables), global_step=global_step)

	###
	# Output directory for models and summaries
	timestamp = str(int(time.time()))
	out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
	print("Writing to {}\n".format(out_dir))

	# Summaries for loss and accuracy
	loss_summary = tf.summary.scalar("loss", cnn.loss)
	acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

	# Train Summaries
	train_summary_op = tf.summary.merge([loss_summary, acc_summary])
	train_summary_dir = os.path.join(out_dir, "summaries", "train")
	train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

	# Dev summaries
	dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
	dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
	dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)

	# Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
	checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
	checkpoint_prefix = os.path.join(checkpoint_dir, "model")
	if not os.path.exists(checkpoint_dir):
		os.makedirs(checkpoint_dir)
	saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints)

	# Initialize Graph
	sess.run(tf.global_variables_initializer())

	# sess = tfdbg.LocalCLIDebugWrapperSession(sess)  # 被调试器封装的会话
	# sess.add_tensor_filter("has_inf_or_nan", tfdbg.has_inf_or_nan)  # 调试器添加过滤规则

	# Train Step and Test Step
	def train_step(x_batch, y_batch):
		"""
		A single training step
		"""
		feed_dict = {cnn.input_x: x_batch,
					 cnn.input_y: y_batch,
					 cnn.is_training: True}
		_, step, summaries, loss, accuracy = sess.run([train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy], feed_dict)
		train_summary_writer.add_summary(summaries, step)
		time_str = datetime.datetime.now().isoformat()
		print("{}: Step {}, Epoch {}, Loss {:g}, Acc {:g}".format(time_str, step, int(step//num_batches_per_epoch)+1, loss, accuracy))
		#if step%FLAGS.evaluate_every == 0 and FLAGS.enable_tensorboard:
		#	summaries = sess.run(train_summary_op, feed_dict)
		#	train_summary_writer.add_summary(summaries, global_step=step)

	def test_step(x_batch, y_batch):
		"""
		Evaluates model on a dev set
		"""
		feed_dict = {cnn.input_x: x_batch,
					 cnn.input_y: y_batch,
					 cnn.is_training: False}
		summaries_dev, loss, preds, step = sess.run([dev_summary_op, cnn.loss, cnn.predictions, global_step], feed_dict)
		dev_summary_writer.add_summary(summaries_dev, step)
		time_str = datetime.datetime.now().isoformat()
		return preds, loss

	# Generate batches
	# train_batches = data_helper.batch_iter(list(zip(train_data, train_label)), FLAGS.batch_size, FLAGS.num_epochs)

	batches = data_loader.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs)

	# Training loop. For each batch...
	for train_batch in batches:
		x_batch, y_batch = zip(*train_batch)
		train_step(x_batch, y_batch)
		current_step = tf.train.global_step(sess, global_step)
		# Testing loop
		if current_step % FLAGS.evaluate_every == 0:
			print("\nEvaluation:")
			i = 0
			index = 0
			sum_loss = 0
			test_batches = data_loader.batch_iter(list(zip(x_dev, y_dev)), FLAGS.batch_size, 1, shuffle=False)
			y_preds = np.ones(shape=len(y_dev), dtype=np.int)
			for test_batch in test_batches:
				x_test_batch, y_test_batch = zip(*test_batch)
				preds, test_loss = test_step(x_test_batch, y_test_batch)
				sum_loss += test_loss
				res = np.absolute(preds - np.argmax(y_test_batch, axis=1))
				y_preds[index:index+len(res)] = res
				i += 1
				index += len(res)

			time_str = datetime.datetime.now().isoformat()
			acc = np.count_nonzero(y_preds==0)/len(y_preds)
			acc_list.append(acc)
			print("{}: Evaluation Summary, Loss {:g}, Acc {:g}".format(time_str, sum_loss/i, acc))
			print("{}: Current Max Acc {:g} in Iteration {}".format(time_str, max(acc_list), int(acc_list.index(max(acc_list))*FLAGS.evaluate_every)))

		if current_step % FLAGS.checkpoint_every == 0:
			path = saver.save(sess, checkpoint_prefix, global_step=current_step)
			print("Saved model checkpoint to {}\n".format(path))
Esempio n. 8
0
def train_vdcnn(dataset_path,
                model_file,
                classes_weights=tf.constant([1.0, 1.0])):
    tf.reset_default_graph()
    max_accuracy = 0
    restore = True
    results_output = dataset_path + 'vdcnn_res.txt'

    # print("Loading data...")
    data_helper = DataHelper(sequence_max_length=FLAGS.sequence_max_length)
    train_data, train_label, test_data, test_label = data_helper.load_dataset(
        dataset_path)
    num_batches_per_epoch = int((len(train_data) - 1) / FLAGS.batch_size) + 1
    # print("Loading data succees...")

    # ConvNet
    acc_list = [0]
    sess = tf.Session()
    cnn = VDCNN(num_classes=train_label.shape[1],
                depth=FLAGS.depth,
                sequence_max_length=FLAGS.sequence_max_length,
                downsampling_type=FLAGS.downsampling_type,
                use_he_uniform=FLAGS.use_he_uniform,
                optional_shortcut=FLAGS.optional_shortcut,
                keep_prob=FLAGS.keep_prob,
                classes_weights=classes_weights)

    # Optimizer and LR Decay
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        global_step = tf.Variable(0, name="global_step", trainable=False)
        learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                                   global_step,
                                                   FLAGS.num_epochs *
                                                   num_batches_per_epoch,
                                                   0.95,
                                                   staircase=True)
        optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)
        gradients, variables = zip(*optimizer.compute_gradients(cnn.loss))
        gradients, _ = tf.clip_by_global_norm(gradients, 7.0)
        train_op = optimizer.apply_gradients(zip(gradients, variables),
                                             global_step=global_step)

    if restore:
        saver = tf.train.Saver(tf.all_variables())
        saver.restore(sess, os.path.join(os.getcwd(), model_file))
    else:
        sess.run(tf.global_variables_initializer())

    with open(results_output, 'a') as f:
        f.write(
            "================================\n  New round of training \n ================================"
        )
        f.write(
            str(FLAGS.optional_shortcut) + ' ' + str(FLAGS.keep_prob) + '\n')

    # Train Step and Test Step
    def train_step(x_batch, y_batch):
        """
        A single training step
        """
        feed_dict = {
            cnn.input_x: x_batch,
            cnn.input_y: y_batch,
            cnn.is_training: True
        }
        _, step, loss, accuracy = sess.run(
            [train_op, global_step, cnn.loss, cnn.accuracy], feed_dict)
        time_str = datetime.datetime.now().isoformat()
        with open(results_output, 'a') as f:
            f.write("{}: Step {}, Epoch {}, Loss {:g}, Acc {:g}\n".format(
                time_str, step,
                int(step // num_batches_per_epoch) + 1, loss, accuracy))

    def test_step(x_batch, y_batch):
        """
        Evaluates model on a dev set
        """
        feed_dict = {
            cnn.input_x: x_batch,
            cnn.input_y: y_batch,
            cnn.is_training: False
        }
        loss, preds = sess.run([cnn.loss, cnn.predictions], feed_dict)
        return preds, loss

    # Generate batches
    train_batches = data_helper.batch_iter(list(zip(train_data, train_label)),
                                           FLAGS.batch_size, FLAGS.num_epochs)

    # Training loop. For each batch...
    for train_batch in train_batches:
        x_batch, y_batch = zip(*train_batch)
        train_step(x_batch, y_batch)
        current_step = tf.train.global_step(sess, global_step)

        # Testing loop
        if current_step % FLAGS.evaluate_every == 0:
            with open(results_output, 'a') as f:
                f.write("\nEvaluation:\n")
            i = 0
            index = 0
            sum_loss = 0
            test_batches = data_helper.batch_iter(list(
                zip(test_data, test_label)),
                                                  FLAGS.batch_size,
                                                  1,
                                                  shuffle=False)
            y_preds = np.ones(shape=len(test_label), dtype=np.int)
            for test_batch in test_batches:
                x_test_batch, y_test_batch = zip(*test_batch)
                preds, test_loss = test_step(x_test_batch, y_test_batch)
                sum_loss += test_loss
                res = np.absolute(preds - np.argmax(y_test_batch, axis=1))
                y_preds[index:index + len(res)] = res
                i += 1
                index += len(res)
            time_str = datetime.datetime.now().isoformat()
            acc = np.count_nonzero(y_preds == 0) / len(y_preds)
            acc_list.append(acc)
            with open(results_output, 'a') as f:
                if acc > max_accuracy:
                    max_accuracy = acc
                    saver = tf.train.Saver(tf.global_variables())
                    saver.save(sess, model_file)
                    f.write("New best model is at step " + str(current_step) +
                            "\n")
                f.write("{}: Evaluation Summary, Loss {:g}, Acc {:g}\n".format(
                    time_str, sum_loss / i, acc))
                f.write("{}: Current Max Acc {:g} in Iteration {}\n".format(
                    time_str, max(acc_list),
                    int(acc_list.index(max(acc_list)) * FLAGS.evaluate_every)))
Esempio n. 9
0
# Data Preparation
# Load data
print("Loading data...")
data_helper = data_helper(sequence_max_length=FLAGS.sequence_max_length)
train_data, train_label, test_data, test_label = data_helper.load_dataset(
    FLAGS.database_path)
num_batches_per_epoch = int((len(train_data) - 1) / FLAGS.batch_size) + 1
print("Loading data succees...")

# ConvNet
acc_list = [0]
sess = tf.Session()
cnn = VDCNN(num_classes=train_label.shape[1],
            sequence_max_length=FLAGS.sequence_max_length,
            downsampling_type=FLAGS.downsampling_type,
            weight_decay=FLAGS.weight_decay,
            use_he_uniform=FLAGS.use_he_uniform,
            use_bias=FLAGS.use_bias,
            num_filters=list(map(int, FLAGS.num_filters.split(","))),
            num_layers=list(map(int, FLAGS.num_layers.split(","))))

# Optimizer and LR Decay
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
    global_step = tf.Variable(0, name="global_step", trainable=False)
    learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                               global_step,
                                               FLAGS.num_epochs *
                                               num_batches_per_epoch,
                                               0.95,
                                               staircase=True)
    optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)
Esempio n. 10
0
File: train.py Progetto: evu/VDCNN
def train(x_train, y_train, x_test, y_test):

    session_ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    log_dir = str(pathlib.Path(FLAGS.train_log_dir) / session_ts)

    if FLAGS.dataset_type == "embeddings":
        embedding_input = True
        embedding_dim = x_train.shape[-1]
    else:
        embedding_input = False
        embedding_dim = 16

    # Build model
    model = VDCNN(
        num_classes=y_train.shape[1],
        depth=FLAGS.depth,
        sequence_length=FLAGS.sequence_length,
        shortcut=FLAGS.shortcut,
        pool_type=FLAGS.pool_type,
        sort=FLAGS.sort,
        use_bias=FLAGS.use_bias,
        embedding_input=embedding_input,
        embedding_dim=embedding_dim,
    )

    model.compile(
        optimizer=tf.keras.optimizers.SGD(lr=FLAGS.lr, momentum=0.9),
        loss="categorical_crossentropy",
        metrics=["acc"],
    )

    # Save model architecture
    model_json = model.to_json()
    with open("vdcnn_model.json", "w") as json_file:
        json_file.write(model_json)
    time_str = datetime.datetime.now().isoformat()
    print("{}: Model saved as json.".format(time_str))
    print("")

    # Trainer
    # Tensorboard and extra callback to support steps history
    tensorboard = tf.keras.callbacks.TensorBoard(log_dir=log_dir,
                                                 histogram_freq=50,
                                                 write_graph=True,
                                                 write_images=True)
    checkpointer = tf.keras.callbacks.ModelCheckpoint(
        filepath="./checkpoints/vdcnn_weights_val_acc_{val_acc:.4f}.h5",
        save_freq="epoch",
        verbose=1,
        save_best_only=True,
        mode="max",
        monitor="val_acc",
    )
    loss_history = custom_callbacks.LossHistory(model,
                                                tensorboard,
                                                logdir=log_dir)
    evaluate_step = custom_callbacks.EvaluateStep(
        model,
        checkpointer,
        tensorboard,
        FLAGS.evaluate_every,
        FLAGS.batch_size,
        x_test,
        y_test,
        log_dir,
    )

    # Fit model
    model.fit(
        x_train,
        y_train,
        batch_size=FLAGS.batch_size,
        epochs=FLAGS.num_epochs,
        validation_data=(x_test, y_test),
        verbose=1,
        callbacks=[
            checkpointer,
            tensorboard,
            # loss_history,
            evaluate_step,
        ],
    )
    print("-" * 30)
    time_str = datetime.datetime.now().isoformat()
    print("{}: Done training.".format(time_str))

    tf.keras.backend.clear_session()
    print("-" * 30)
    print()