Example #1
0
class DataLoader(object):
    def __init__(self):
        self.view = View()
        self.train = DatasetSplit()
        self.val = DatasetSplit()

    def load_data(self, train_path, val_path, delimiter, shuffle_data=False):
        self.view.print_to_screen('Loading data...')

        self.train.load_data(train_path, delimiter)
        train_texts = self.train.get_texts()
        train_labels = self.train.get_labels()
        train_images = self.train.get_images()

        self.val.load_data(val_path, delimiter)
        val_texts = self.val.get_texts()
        val_labels = self.val.get_labels()
        val_images = self.val.get_images()

        self.view.print_to_screen('Train/Dev split: {:d}/{:d}'.format(
            len(train_texts), len(val_texts)))

        if shuffle_data:
            train_texts, train_labels, train_images = shuffle(train_texts,
                                                              train_labels,
                                                              train_images,
                                                              random_state=10)

        self.set_training_data(train_texts, train_labels, train_images)
        self.set_val_data(val_texts, val_labels, val_images)

    def set_val_data(self, val_texts, val_labels, val_images):
        self.val.set_texts(val_texts)
        self.val.set_labels(val_labels)
        self.val.set_images(val_images)

    def set_training_data(self, train_texts, train_labels, train_images):
        self.train.set_texts(train_texts)
        self.train.set_labels(train_labels)
        self.train.set_images(train_images)

    def get_training_data(self):
        return self.train

    def get_val_data(self):
        return self.val
class EncodingExtractor(object):

    def __init__(self, train_dataset, val_dataset, root_dir, model_dir):
        self.train_dataset = train_dataset
        self.val_dataset = val_dataset
        self.root_dir = root_dir
        self.model_dir = model_dir
        self.view = View()

    def extract(self, extraction_parameters):
        checkpoint_dir = os.path.abspath(os.path.join(self.model_dir, 'checkpoints'))
        checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir)
        batch_size = extraction_parameters.get_batch_size()

        image_resizer = ImageManipulator(extraction_parameters.get_output_image_width())

        val_length = len(self.val_dataset.get_texts())
        no_of_val_batches = (val_length // batch_size) + 1

        graph = tf.Graph()
        with graph.as_default():
            sess = tf.Session()
            with sess.as_default():
                self.view.print_to_screen('Loading latest checkpoint: {}'.format(checkpoint_file))
                saver = tf.train.import_meta_graph('{}.meta'.format(checkpoint_file))
                saver.restore(sess, checkpoint_file)

                train_iterator, train_next_element = CustomIterator.create_iterator(self.train_dataset, batch_size)
                val_iterator, val_next_element = CustomIterator.create_iterator(self.val_dataset, batch_size)

                sess.run(train_iterator.initializer)
                sess.run(val_iterator.initializer)

                # Get the placeholders from the graph by name
                input_x = graph.get_operation_by_name('input_x').outputs[0]
                dropout_keep_prob = graph.get_operation_by_name('dropout_keep_prob').outputs[0]
                input_mask = graph.get_operation_by_name('input_mask').outputs[0]

                # Tensors we want to evaluate
                input_y = graph.get_operation_by_name('input_y').outputs[0]
                accuracy = graph.get_operation_by_name('accuracy/accuracy').outputs[0]
                sum = graph.get_operation_by_name('sum').outputs[0]

                input_tensor = ModelTensor(input_x, input_y, input_mask, dropout_keep_prob)

                correct = 0
                for i in range(no_of_val_batches):
                    val_batch = sess.run(val_next_element)
                    path_list = [el.decode('UTF-8') for el in val_batch[2]]

                    test_images_batch = image_resizer.preprocess_images(val_batch[2])
                    feed_dict = FeedDictCreator.create_feed_dict(input_tensor, val_batch, test_images_batch, 1)

                    acc, img_sum = sess.run([accuracy, sum], feed_dict)

                    correct += acc * batch_size
                    thread = Thread(target=self.embedding_to_image,
                                    args=(self.root_dir, img_sum, path_list, extraction_parameters))
                    thread.start()

                self.compute_and_print_accuracy(correct, val_length, 'Test')

                train_length = len(self.train_dataset.get_texts())
                no_of_train_batches = (train_length // batch_size) + 1

                correct = 0
                for i in range(no_of_train_batches):
                    train_batch = sess.run(train_next_element)
                    path_list = [el.decode('UTF-8') for el in train_batch[2]]

                    train_images_batch = image_resizer.preprocess_images(train_batch[2])
                    feed_dict = FeedDictCreator.create_feed_dict(input_tensor, train_batch, train_images_batch, 1)

                    acc, img_sum = sess.run([accuracy, sum], feed_dict)

                    correct += acc * batch_size
                    thread = Thread(target=self.embedding_to_image,
                                    args=(self.root_dir, img_sum, path_list, extraction_parameters))
                    thread.start()

                self.compute_and_print_accuracy(correct, train_length, 'Train')

    def embedding_to_image(self, root_dir, img_sum, test_img, extraction_parameters):
        x = extraction_parameters.get_separator_size()
        y = extraction_parameters.get_separator_size()
        encoding_height = extraction_parameters.get_encoding_height()
        superpixels_per_row = extraction_parameters.get_superpixel_per_row()
        superpixel_w = extraction_parameters.get_superpixel_w()
        output_image_width = extraction_parameters.get_output_image_width()
        superpixel_h = extraction_parameters.get_superpixel_h()

        for image, path in zip(img_sum, test_img):
            dir_names = path.split('/')[-3:]
            full_path = os.path.join(root_dir,
                                     os.path.join(dir_names[0], dir_names[1], dir_names[2].replace('.jpg', '.png')))

            img = cv2.imread(path, cv2.IMREAD_COLOR)
            img = cv2.resize(img, (extraction_parameters.get_image_w(), extraction_parameters.get_image_h()))

            assert extraction_parameters.get_separator_size() + superpixels_per_row * superpixel_w \
                   <= extraction_parameters.get_image_w(), 'the image width is smaller than the visual word width'

            text_encoding_crop = image[0:encoding_height, 0:output_image_width, :]
            word_features = np.reshape(text_encoding_crop,
                                       (output_image_width * encoding_height * 3))  # C-like index ordering

            sp_i = 0  # superpixel index

            # write the embedding
            for row in list(
                    range(y, int(y + extraction_parameters.get_superpixel_per_col() * superpixel_h), superpixel_h)):
                spw = 0  # counter for superpixels in row
                for col in list(range(x, int(x + superpixels_per_row * superpixel_w), superpixel_w)):
                    ptl = sp_i * 3
                    ptr = (sp_i + 1) * 3
                    bgr = word_features[ptl:ptr]
                    if len(bgr) == 0:
                        break
                    elif len(bgr) < 3:
                        c = bgr.copy()
                        c.resize(1, 3)
                        bgr = c

                    row_start = row
                    row_end = row + superpixel_w
                    for srow in range(row_start, row_end):
                        for scol in range(col, col + superpixel_h):
                            img[srow, scol] = bgr * 255

                    sp_i += 1
                    spw += 1

            cv2.imwrite(full_path, img)

    def compute_and_print_accuracy(self, correct, split_dataset_length, phase):
        test_accuracy = correct / split_dataset_length
        self.view.print_to_screen(
            '{} accuracy: {} / {} = {}'.format(phase, int(correct), split_dataset_length, test_accuracy))
Example #3
0
class ModelTrainer(object):
    def __init__(self, train_dataset, val_dataset):
        self.train_dataset = train_dataset
        self.val_dataset = val_dataset
        self.view = View()

    def train(self, training_params, model_params):

        patience = Patience(model_params.get_patience())
        best_accuracy = Accuracy(0)

        output_width = training_params.get_output_image_width()

        image_resizer = ImageManipulator(output_width)

        with tf.Graph().as_default():
            sess = tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True, log_device_placement=False))
            with sess.as_default():

                cnn = TextImgCNN(
                    sequence_length=self.train_dataset.get_texts().shape[1],
                    num_classes=self.train_dataset.get_labels().shape[1],
                    vocab_size=training_params.get_no_of_words_to_keep(),
                    embedding_size=training_params.get_embedding_dim(),
                    filter_sizes=list(
                        map(int,
                            training_params.get_filter_sizes().split(','))),
                    num_filters=training_params.get_num_filters(),
                    output_image_width=output_width,
                    encoding_height=training_params.get_encoding_height(),
                    l2_reg_lambda=0.0)

                train_iterator, next_train_batch = CustomIterator.create_iterator(
                    self.train_dataset, training_params.get_batch_size())
                test_iterator, next_test_element = CustomIterator.create_iterator(
                    self.val_dataset, training_params.get_batch_size())

                global_step = tf.Variable(0,
                                          name='global_step',
                                          trainable=False)
                optimizer = tf.train.AdamOptimizer(1e-3)
                grads_and_vars = optimizer.compute_gradients(cnn.loss)
                train_op = optimizer.apply_gradients(grads_and_vars,
                                                     global_step=global_step)

                # Keep track of gradient values and sparsity (optional)
                grad_summaries = []
                for g, v in grads_and_vars:
                    if g is not None:
                        grad_hist_summary = tf.summary.histogram(
                            '{}/grad/hist'.format(v.name), g)
                        sparsity_summary = tf.summary.scalar(
                            '{}/grad/sparsity'.format(v.name),
                            tf.nn.zero_fraction(g))
                        grad_summaries.append(grad_hist_summary)
                        grad_summaries.append(sparsity_summary)
                grad_summaries_merged = tf.summary.merge(grad_summaries)

                out_dir = model_params.get_model_directory()

                self.view.print_to_screen('Writing to {}\n'.format(out_dir))

                # Summaries for loss and test_accuracy
                loss_summary = tf.summary.scalar('loss', cnn.loss)
                acc_summary = tf.summary.scalar('test_accuracy', cnn.accuracy)

                # Train Summaries
                train_summary_op = tf.summary.merge(
                    [loss_summary, acc_summary, grad_summaries_merged])
                train_summary_dir = os.path.join(out_dir, 'summaries', 'train')
                train_summary_writer = tf.summary.FileWriter(
                    train_summary_dir, sess.graph)

                # Dev summaries
                dev_summary_op = tf.summary.merge([loss_summary, acc_summary])

                saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)

                file_logger = FileLogger(os.path.join(out_dir, 'result.txt'))
                file_logger.write_header(out_dir)

                sess.run(tf.global_variables_initializer())

                train_length = len(self.train_dataset.get_texts())

                no_of_training_batches = (train_length //
                                          training_params.get_batch_size()) + 1

                input_tensor = ModelTensor(cnn.input_x, cnn.input_y,
                                           cnn.input_mask,
                                           cnn.dropout_keep_prob)

                for epoch in range(model_params.get_no_of_epochs()):
                    sess.run(train_iterator.initializer)

                    for i in range(no_of_training_batches):
                        train_batch = sess.run(next_train_batch)

                        train_images_batch = image_resizer.preprocess_images(
                            train_batch[2])

                        feed_dict = FeedDictCreator.create_feed_dict(
                            input_tensor, train_batch, train_images_batch,
                            training_params.get_dropout_keep_probability())

                        _, step, summaries, loss, accuracy = sess.run([
                            train_op, global_step, train_summary_op, cnn.loss,
                            cnn.accuracy
                        ], feed_dict)

                        train_summary_writer.add_summary(summaries, step)
                        current_step = tf.train.global_step(sess, global_step)

                        training_result = TrainingResult(step, loss, accuracy)

                        self.view.print_to_screen(str(training_result))

                        if current_step % model_params.evaluate_every == 0:
                            self.view.print_to_screen('Evaluation:')

                            val_length = len(self.val_dataset.get_texts())
                            no_of_val_batches = (
                                val_length //
                                training_params.get_batch_size()) + 1

                            sess.run(test_iterator.initializer)
                            correct = 0

                            for i in range(no_of_val_batches):
                                test_batch = sess.run(next_test_element)
                                test_images_batch = image_resizer.preprocess_images(
                                    test_batch[2])

                                feed_dict = FeedDictCreator.create_feed_dict(
                                    input_tensor, test_batch,
                                    test_images_batch, 1)

                                step, summaries, loss, accuracy = sess.run([
                                    global_step, dev_summary_op, cnn.loss,
                                    cnn.accuracy
                                ], feed_dict)

                                correct += accuracy * len(test_images_batch)

                            test_accuracy = Accuracy(correct / val_length)

                            partial_result = PartialResult(
                                epoch, current_step, test_accuracy,
                                best_accuracy, patience)
                            self.view.print_to_screen(str(partial_result))
                            file_logger.write_partial_result_to_file(
                                partial_result)

                            if test_accuracy > best_accuracy:
                                best_accuracy.set_value(
                                    test_accuracy.get_value())
                                patience.reset_patience()
                                path = self.store_model(
                                    model_params, current_step, sess, saver)
                                self.view.print_to_screen(
                                    'Saved model checkpoint to {}\n'.format(
                                        path))
                            else:
                                patience.decrement_patience()

                        if patience.is_zero():
                            return

    @staticmethod
    def store_model(model_params, current_step, sess, saver):
        checkpoint_dir = os.path.abspath(
            os.path.join(model_params.get_model_directory(), 'checkpoints'))
        checkpoint_prefix = os.path.join(checkpoint_dir, 'model')
        return saver.save(sess, checkpoint_prefix, global_step=current_step)