Esempio n. 1
0
def recognize(image_path: str, weights_path: str, files_limit=4):
    decoder = TextFeatureIO().reader
    images, filenames = load_images(image_path, files_limit)
    images = np.squeeze(images)
    padded_images = np.zeros([32, 32, 100, 3])
    padded_images[:images.shape[0], :, :, :] = images
    tf.reset_default_graph()

    inputdata = tf.placeholder(dtype=tf.float32, shape=[32, 32, 100, 3], name='input')

    images_sh = tf.cast(x=inputdata, dtype=tf.float32)

    # build shadownet
    net = CRNN(phase='Test', hidden_nums=256, seq_length=25, num_classes=37)
    with tf.variable_scope('shadow'):
        net_out = net.build(inputdata=images_sh)
    decoded, _ = tf.nn.ctc_beam_search_decoder(net_out, 25 * np.ones(32), merge_repeated=False)

    # config tf saver
    saver = tf.train.Saver()
    sess = tf.Session()
    with sess.as_default():

        # restore the model weights
        saver.restore(sess=sess, save_path=weights_path)
        print("Predict...")
        start_time = time()
        predictions = sess.run(decoded, feed_dict={inputdata: padded_images})
        end_time = time()
        print("Prediction time: {}".format(end_time - start_time))
        preds_res = decoder.sparse_tensor_to_str(predictions[0])

        for i, fname in enumerate(filenames):
            print("{}: {}".format(fname, preds_res[i]))
Esempio n. 2
0
def save_model(weights_path: str, output_path: str):
    inputdata = tf.placeholder(dtype=tf.float32,
                               shape=[BATCH_SIZE, 32, 100, 3],
                               name='input')

    net = CRNN(phase='Test', hidden_nums=256, seq_length=25, num_classes=37)

    with tf.variable_scope('shadow'):
        net_out = net.build(inputdata=inputdata)
    decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=net_out,
                                               sequence_length=25 *
                                               np.ones(BATCH_SIZE),
                                               merge_repeated=False)
    sparse_tensor_values = tf.to_int32(decodes[0]).values
    sparse_tensor_indices = tf.to_int32(decodes[0]).indices
    flattened_indices = tf.to_int32(tf.reshape(sparse_tensor_indices, [-1]))
    output = tf.concat([flattened_indices, sparse_tensor_values],
                       0,
                       name='output')

    saver = tf.train.Saver()
    sess = tf.Session()

    with sess.as_default():
        saver.restore(sess=sess, save_path=weights_path)
        save_graph(sess, output_path)
Esempio n. 3
0
    def run(self):
        self._recognition_time = []
        images_sh, labels_sh, imagenames_sh = self.load_data()
        images_sh = tf.cast(x=images_sh, dtype=tf.float32)

        net = CRNN(phase='Test',
                   hidden_nums=256,
                   seq_length=25,
                   num_classes=37)
        with tf.variable_scope('shadow'):
            net_out = net.build(inputdata=images_sh)
        decoded, _ = tf.nn.ctc_beam_search_decoder(
            net_out,
            25 * np.ones(self._batch_size),
            merge_repeated=self._merge_repeated)
        sess_config = self.config_tf_session()

        # config tf saver
        saver = tf.train.Saver()
        sess = tf.Session(config=sess_config)

        with sess.as_default():
            # restore the model weights
            saver.restore(sess=sess, save_path=self._weights_path)
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            self._log.info('Start predicting ...')
            accuracy, distance = self.test(decoded, imagenames_sh, images_sh,
                                           labels_sh, sess)
            coord.request_stop()
            coord.join(threads=threads)
        sess.close()
        avg_time = np.mean(self._recognition_time)
        return accuracy, distance, avg_time
Esempio n. 4
0
 def _build_net_model(self, input_data):
     self._log.info('Build net model...')
     crnn = CRNN(phase='Train',
                 hidden_nums=256,
                 seq_length=25,
                 num_classes=37)
     with tf.variable_scope('shadow', reuse=False):
         net_out = crnn.build(inputdata=input_data)
     return net_out
Esempio n. 5
0
def recognize(image_path: str,
              weights_path: str,
              config: GlobalConfig,
              is_vis=True):
    logger = LogFactory.get_logger()
    image = load_and_resize_image(image_path)

    inputdata = tf.placeholder(dtype=tf.float32,
                               shape=[1, 32, 100, 3],
                               name='input')

    net = CRNN(phase='Test', hidden_nums=256, seq_length=25, num_classes=37)

    with tf.variable_scope('shadow'):
        net_out = net.build(inputdata=inputdata)

    decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=net_out,
                                               sequence_length=25 * np.ones(1),
                                               merge_repeated=False)

    decoder = TextFeatureIO()

    # config tf session
    sess_config = tf.ConfigProto()
    sess_config.gpu_options.per_process_gpu_memory_fraction = config.get_gpu_config(
    ).memory_fraction
    sess_config.gpu_options.allow_growth = config.get_gpu_config(
    ).is_tf_growth_allowed()

    # config tf saver
    saver = tf.train.Saver()

    sess = tf.Session(config=sess_config)

    with sess.as_default():
        saver.restore(sess=sess, save_path=weights_path)
        preds = sess.run(decodes, feed_dict={inputdata: image})
        preds = decoder.writer.sparse_tensor_to_str(preds[0])
        logger.info('Predict image {:s} label {:s}'.format(
            ops.split(image_path)[1], preds[0]))

        if is_vis:
            plt.figure('CRNN Model Demo')
            plt.imshow(
                cv2.imread(image_path, cv2.IMREAD_COLOR)[:, :, (2, 1, 0)])
            plt.show()
    sess.close()
Esempio n. 6
0
def recognize(image_path: str,
              weights_path: str,
              output_file: str,
              files_limit=32):
    decoder = TextFeatureIO().reader
    #Read all the files in the images folder
    files = [
        join(image_path, f) for f in listdir(image_path)
        if isfile(join(image_path, f))
    ][:]
    tf.reset_default_graph()
    inputdata = tf.placeholder(dtype=tf.float32,
                               shape=[BATCH_SIZE, 32, 100, 3],
                               name='input')
    images_sh = tf.cast(x=inputdata, dtype=tf.float32)
    # build shadownet
    net = CRNN(phase='Test', hidden_nums=256, seq_length=25, num_classes=37)
    with tf.variable_scope('shadow'):
        net_out = net.build(inputdata=images_sh)
    #top_paths=NUMBER_OF_PREDICTIONS is the number of words to predict
    decoded, log_probabilities = tf.nn.ctc_beam_search_decoder(
        net_out,
        25 * np.ones(BATCH_SIZE),
        merge_repeated=False,
        top_paths=NUMBER_OF_PREDICTIONS)

    # config tf saver
    saver = tf.train.Saver()
    sess = tf.Session()
    with sess.as_default():

        # restore the model weights
        #print('TFVERSION',tf.__version__)
        print("Restoring trained model")
        saver.restore(sess=sess, save_path=weights_path)
        print("Predicting {} images in chunks of {}".format(
            len(files), BATCH_SIZE))
        starting_time = time()

        #Run inference in groups of BATCH_SIZE images
        #Run it with all the files from the provided folder
        for group in chunker(files, BATCH_SIZE):
            start_time = time()
            images, filenames = load_images(group, files_limit)
            images = np.squeeze(images)
            padded_images = np.zeros([BATCH_SIZE, 32, 100, 3])
            padded_images[:images.shape[0], :, :, :] = images

            predictions, probs = sess.run([decoded, log_probabilities],
                                          feed_dict={inputdata: padded_images})
            for i, fname in enumerate(filenames):
                result = ''
                #log_probabilities is recomputed for softmax probs
                e_x = np.exp(probs[i, :]) / np.sum(np.exp(probs[i, :]))

                #build the array of N predictions for each image
                for x in range(NUMBER_OF_PREDICTIONS):
                    preds_res2 = decoder.sparse_tensor_to_str(predictions[x])
                    result = result + ',{:s},{:f}'.format(
                        preds_res2[i], e_x[x])
                #output string formatting and writing to csv file
                result = (basename(fname) + result)
                with open(output_file, 'a') as f:
                    f.write(result)
                    f.write('\n')
            end_time = time()
            print("Prediction time for {} images: {}".format(
                BATCH_SIZE, end_time - start_time))

        print("Total prediction time: {}".format(end_time - starting_time))
        print("Predictions saved in file {}".format(output_file))