예제 #1
0
def recognize(image_path, weights_path, is_vis=True):
    """

    :param image_path:
    :param weights_path:
    :param is_vis:
    :return:
    """
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    image = cv2.resize(image, (100, 32))
    image = np.expand_dims(image, axis=0).astype(np.float32)

    inputdata = tf.placeholder(dtype=tf.float32,
                               shape=[1, 32, 100, 3],
                               name='input')

    net = crnn_model.ShadowNet(phase='Test',
                               hidden_nums=256,
                               layers_nums=2,
                               seq_length=25,
                               num_classes=37)

    with tf.variable_scope('shadow'):
        net_out = net.build_shadownet(inputdata=inputdata)

    decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=net_out,
                                               sequence_length=25 * np.ones(1),
                                               merge_repeated=False)

    decoder = data_utils.TextFeatureIO()

    # config tf session
    sess_config = tf.ConfigProto()
    sess_config.gpu_options.per_process_gpu_memory_fraction = config.cfg.TRAIN.GPU_MEMORY_FRACTION
    sess_config.gpu_options.allow_growth = config.cfg.TRAIN.TF_ALLOW_GROWTH

    # config tf saver
    saver = tf.train.Saver()

    sess = tf.Session(config=sess_config)

    with sess.as_default():
        saver.restore(sess=sess, save_path=weights_path)

        preds = sess.run(decodes, feed_dict={inputdata: image})

        preds = decoder.writer.sparse_tensor_to_str(preds[0])

        logger.info('Predict image {:s} label {:s}'.format(
            ops.split(image_path)[1], preds[0]))

        if is_vis:
            plt.figure('CRNN Model Demo')
            plt.imshow(
                cv2.imread(image_path, cv2.IMREAD_COLOR)[:, :, (2, 1, 0)])
            plt.show()

        sess.close()

    return
예제 #2
0
def write_features(dataset_dir, save_dir):
    """

    :param dataset_dir:
    :param save_dir:
    :return:
    """
    if not ops.exists(save_dir):
        os.makedirs(save_dir)

    print('Initialize the dataset provider ......')
    provider = data_provider.TextDataProvider(dataset_dir=dataset_dir, annotation_name='sample.txt',
                                              validation_set=True, validation_split=0.15, shuffle=None,
                                              normalization=None)
    print('Dataset provider intialize complete')

    feature_io = data_utils.TextFeatureIO()

    # write train tfrecords
    print('Start writing training tf records')

    train_images = provider.train.images
    train_images = [bytes(list(np.reshape(tmp, [100 * 32 * 3]))) for tmp in train_images]
    train_labels = provider.train.labels
    train_imagenames = provider.train.imagenames

    train_tfrecord_path = ops.join(save_dir, 'train_feature.tfrecords')
    feature_io.writer.write_features(tfrecords_path=train_tfrecord_path, labels=train_labels, images=train_images,
                                     imagenames=train_imagenames)

    # write test tfrecords
    print('Start writing testing tf records')

    test_images = provider.test.images
    test_images = [bytes(list(np.reshape(tmp, [100 * 32 * 3]))) for tmp in test_images]
    test_labels = provider.test.labels
    test_imagenames = provider.test.imagenames

    test_tfrecord_path = ops.join(save_dir, 'test_feature.tfrecords')
    feature_io.writer.write_features(tfrecords_path=test_tfrecord_path, labels=test_labels, images=test_images,
                                     imagenames=test_imagenames)

    # write val tfrecords
    print('Start writing validation tf records')

    val_images = provider.validation.images
    val_images = [bytes(list(np.reshape(tmp, [100 * 32 * 3]))) for tmp in val_images]
    val_labels = provider.validation.labels
    val_imagenames = provider.validation.imagenames

    val_tfrecord_path = ops.join(save_dir, 'validation_feature.tfrecords')
    feature_io.writer.write_features(tfrecords_path=val_tfrecord_path, labels=val_labels, images=val_images,
                                     imagenames=val_imagenames)

    return
예제 #3
0
def getRecognize():
    #该函数用于从ckpt文件中恢复网络,并返回一个可以识别图像内容的recognize函数
    with tf.Graph().as_default() as net2_graph:
        inputdata = tf.placeholder(dtype=tf.float32, shape=[1, 32, 100, 3], name='input')

        net = crnn_model.ShadowNet(phase='Test', hidden_nums=256, layers_nums=2, seq_length=25, num_classes=20)

        with tf.variable_scope('shadow'):
            net_out = net.build_shadownet(inputdata=inputdata)

        decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=net_out, sequence_length=25*np.ones(1), merge_repeated=False)
        decoder = data_utils.TextFeatureIO()
        
        saver2 = tf.train.Saver()

    sess2 = tf.Session(graph=net2_graph)   
    saver2.restore(sess=sess2, save_path=FLAGS.crnnWeightsPath)

    def recognize(img, rst):
        '''
            按照前一级网络定位的结果进行识别,并在图像上进行标记
            输入:
                img:需要进行识别的原图
                rst:含有RBOX坐标的dict
            返回:
                img:完成识别并进行相应标注的图像
        '''
        for i,t in enumerate(rst['text_lines']):
            #先画RBOX框
            d = np.array([t['x0'], t['y0'], t['x1'], t['y1'], t['x2'],
                        t['y2'], t['x3'], t['y3']], dtype='int32')
            d = d.reshape(-1, 2)#按顺序两个元素组成一列,形成四个点的格式
            cv2.polylines(img, [d], isClosed=True, color=(255, 255, 0))
            
            #将RBOX框内内容送入识别器
            x0 = int(min(t['x0'], t['x1'], t['x2'], t['x3']))
            x1 = int(max(t['x0'], t['x1'], t['x2'], t['x3']))
            y0 = int(min(t['y0'], t['y1'], t['y2'], t['y3']))
            y1 = int(max(t['y0'], t['y1'], t['y2'], t['y3']))
            offset = (x1-x0)//10 #由于定位存在问题,导致RBOX框的水平宽度经常不足
            image = img[y0:y1, max(0, x0-offset):x1+offset]
            image = cv2.resize(image, (100, 32))
            image = np.expand_dims(image, axis=0).astype(np.float32)
            preds = sess2.run(decodes, feed_dict={inputdata: image})
            preds = decoder.writer.sparse_tensor_to_str(preds[0])

            #根据识别判断对错并显示识别内容
            if judge(str(preds[0])) == False:
                img = cv2.putText(img, preds[0], (x1, y1), cv2.FONT_HERSHEY_COMPLEX, 0.4, (255, 0 ,0), True)
                img = cv2.line(img, (x0, y0), (x1, y1), (0, 0 ,255), thickness=4)

        return img

    return recognize
예제 #4
0
 def execute(self, data, batch_size):
     sess = self.output['sess']
     x = self.output['x']
     y_ = self.output['y_']
     decoder = data_utils.TextFeatureIO()
     ret = []
     for i in range(batch_size):
         image = Image.open(data[i])
         image = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR)
         image = cv2.resize(image, (config.cfg.TRAIN.width, 32))
         image = np.expand_dims(image, axis=0).astype(np.float32)
         preds = sess.run(y_, feed_dict={x: image})
         preds = decoder.writer.sparse_tensor_to_str(preds[0])[0] + '\n'
         ret.append(preds)
     return ret
예제 #5
0
파일: main1.py 프로젝트: JD07/ArtQunProject
def getRecognize():
    #该函数用于从ckpt文件中恢复网络,并返回一个可以识别图像内容的recognize函数
    with tf.Graph().as_default() as net2_graph:
        inputdata = tf.placeholder(dtype=tf.float32,
                                   shape=[1, 32, 100, 3],
                                   name='input')

        net = crnn_model.ShadowNet(phase='Test',
                                   hidden_nums=256,
                                   layers_nums=2,
                                   seq_length=25,
                                   num_classes=19)

        with tf.variable_scope('shadow'):
            net_out = net.build_shadownet(inputdata=inputdata)

        decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=net_out,
                                                   sequence_length=25 *
                                                   np.ones(1),
                                                   merge_repeated=False)
        decoder = data_utils.TextFeatureIO()

        saver2 = tf.train.Saver()

    sess2 = tf.Session(graph=net2_graph)
    saver2.restore(sess=sess2, save_path=FLAGS.crnnWeightsPath)

    def recognize(path):
        '''
            对指定路径下的所有图像执行crnn网络识别,并在命令行上显示结果
            输入:
                path:指定数据集路劲
            返回:
        '''
        imageList = getfilelist(path)
        for imagePath in imageList:
            image = cv2.imread(imagePath, cv2.IMREAD_COLOR)
            image = cv2.resize(image, (100, 32))
            image = np.expand_dims(image, axis=0).astype(np.float32)
            preds = sess2.run(decodes, feed_dict={inputdata: image})
            preds = decoder.writer.sparse_tensor_to_str(preds[0])
            print('Predict image {:s} label {:s}'.format(
                os.path.split(imagePath)[1], preds[0]))

    return recognize
def test_shadownet(dataset_dir, weights_path, is_vis=False, is_recursive=True):
    """

    :param dataset_dir:
    :param weights_path:
    :param is_vis:
    :param is_recursive:
    :return:
    """
    # Initialize the record decoder
    decoder = data_utils.TextFeatureIO().reader
    images_t, labels_t, imagenames_t = decoder.read_features(ops.join(
        dataset_dir, 'test_feature.tfrecords'),
                                                             num_epochs=None)
    if not is_recursive:
        images_sh, labels_sh, imagenames_sh = tf.train.shuffle_batch(
            tensors=[images_t, labels_t, imagenames_t],
            batch_size=32,
            capacity=1000 + 32 * 2,
            min_after_dequeue=2,
            num_threads=4)
    else:
        images_sh, labels_sh, imagenames_sh = tf.train.batch(
            tensors=[images_t, labels_t, imagenames_t],
            batch_size=32,
            capacity=1000 + 32 * 2,
            num_threads=4)

    images_sh = tf.cast(x=images_sh, dtype=tf.float32)

    # build shadownet
    net = crnn_model.ShadowNet(phase='Test',
                               hidden_nums=256,
                               layers_nums=2,
                               seq_length=25,
                               num_classes=37)

    with tf.variable_scope('shadow'):
        net_out = net.build_shadownet(inputdata=images_sh)

    decoded, _ = tf.nn.ctc_beam_search_decoder(net_out,
                                               25 * np.ones(32),
                                               merge_repeated=False)

    # config tf session
    sess_config = tf.ConfigProto()
    sess_config.gpu_options.per_process_gpu_memory_fraction = config.cfg.TRAIN.GPU_MEMORY_FRACTION
    sess_config.gpu_options.allow_growth = config.cfg.TRAIN.TF_ALLOW_GROWTH

    # config tf saver
    saver = tf.train.Saver()

    sess = tf.Session(config=sess_config)

    test_sample_count = 0
    for record in tf.python_io.tf_record_iterator(
            ops.join(dataset_dir, 'test_feature.tfrecords')):
        test_sample_count += 1
    loops_nums = int(math.ceil(test_sample_count / 32))
    # loops_nums = 100

    with sess.as_default():

        # restore the model weights
        saver.restore(sess=sess, save_path=weights_path)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        print('Start predicting ......')
        if not is_recursive:
            predictions, images, labels, imagenames = sess.run(
                [decoded, images_sh, labels_sh, imagenames_sh])
            imagenames = np.reshape(imagenames, newshape=imagenames.shape[0])
            imagenames = [tmp.decode('utf-8') for tmp in imagenames]
            preds_res = decoder.sparse_tensor_to_str(predictions[0])
            gt_res = decoder.sparse_tensor_to_str(labels)

            accuracy = []

            for index, gt_label in enumerate(gt_res):
                pred = preds_res[index]
                totol_count = len(gt_label)
                correct_count = 0
                try:
                    for i, tmp in enumerate(gt_label):
                        if tmp == pred[i]:
                            correct_count += 1
                except IndexError:
                    continue
                finally:
                    try:
                        accuracy.append(correct_count / totol_count)
                    except ZeroDivisionError:
                        if len(pred) == 0:
                            accuracy.append(1)
                        else:
                            accuracy.append(0)

            accuracy = np.mean(np.array(accuracy).astype(np.float32), axis=0)
            print('Mean test accuracy is {:5f}'.format(accuracy))

            for index, image in enumerate(images):
                print(
                    'Predict {:s} image with gt label: {:s} **** predict label: {:s}'
                    .format(imagenames[index], gt_res[index],
                            preds_res[index]))
                if is_vis:
                    plt.imshow(image[:, :, (2, 1, 0)])
                    plt.show()
        else:
            accuracy = []
            for epoch in range(loops_nums):
                predictions, images, labels, imagenames = sess.run(
                    [decoded, images_sh, labels_sh, imagenames_sh])
                imagenames = np.reshape(imagenames,
                                        newshape=imagenames.shape[0])
                imagenames = [tmp.decode('utf-8') for tmp in imagenames]
                preds_res = decoder.sparse_tensor_to_str(predictions[0])
                gt_res = decoder.sparse_tensor_to_str(labels)

                for index, gt_label in enumerate(gt_res):
                    pred = preds_res[index]
                    totol_count = len(gt_label)
                    correct_count = 0
                    try:
                        for i, tmp in enumerate(gt_label):
                            if tmp == pred[i]:
                                correct_count += 1
                    except IndexError:
                        continue
                    finally:
                        try:
                            accuracy.append(correct_count / totol_count)
                        except ZeroDivisionError:
                            if len(pred) == 0:
                                accuracy.append(1)
                            else:
                                accuracy.append(0)

                for index, image in enumerate(images):
                    print(
                        'Predict {:s} image with gt label: {:s} **** predict label: {:s}'
                        .format(imagenames[index], gt_res[index],
                                preds_res[index]))
                    # if is_vis:
                    #     plt.imshow(image[:, :, (2, 1, 0)])
                    #     plt.show()

            accuracy = np.mean(np.array(accuracy).astype(np.float32), axis=0)
            print('Test accuracy is {:5f}'.format(accuracy))

        coord.request_stop()
        coord.join(threads=threads)

    sess.close()
    return
예제 #7
0
def write_features(dataset_dir, save_dir, batch_size):
    """

    :param dataset_dir:
    :param save_dir:
    :param batch_size:
    :return:
    """
    if not ops.exists(save_dir):
        os.makedirs(save_dir)

    print('Initialize the dataset provider ......')
    provider = data_provider.TextDataProvider(dataset_dir=dataset_dir, annotation_name='sample.txt',
                                              validation_set=True, validation_split=0.05, shuffle='every_epoch',
                                              normalization=None)
    print('Dataset provider intialize complete')

    feature_io = data_utils.TextFeatureIO()

    # write train tfrecords
    print('Start writing training tf records')

    train_images_nums = provider.train.num_examples
    epoch_nums = int(math.ceil(train_images_nums / batch_size))
    for loop in tqdm.tqdm(range(epoch_nums)):
        train_images, train_labels, train_imagenames = provider.train.next_batch(batch_size=batch_size)
        train_images = [cv2.resize(tmp, (width,32)) for tmp in train_images]
        train_images = [bytes(list(np.reshape(tmp, [width * 32*3]))) for tmp in train_images]

        if loop*batch_size+batch_size > train_images_nums:
            train_tfrecord_path = ops.join(save_dir, 'train_feature_{:d}_{:d}.tfrecords'.format(
                loop * batch_size, train_images_nums))
        else:
            train_tfrecord_path = ops.join(save_dir, 'train_feature_{:d}_{:d}.tfrecords'.format(
                loop*batch_size, loop*batch_size+batch_size))
        feature_io.writer.write_features(tfrecords_path=train_tfrecord_path, labels=train_labels, images=train_images,
                                         imagenames=train_imagenames)

    # write test tfrecords
    print('Start writing testing tf records')

    test_images_nums = provider.test.num_examples
    epoch_nums = int(math.ceil(test_images_nums / batch_size))
    for loop in tqdm.tqdm(range(epoch_nums)):
        test_images, test_labels, test_imagenames = provider.test.next_batch(batch_size=batch_size)
        test_images = [cv2.resize(tmp, (32, width)) for tmp in test_images]
        test_images = [bytes(list(np.reshape(tmp, [32 * width * 3]))) for tmp in test_images]

        if loop * batch_size + batch_size > test_images_nums:
            test_tfrecord_path = ops.join(save_dir, 'test_feature_{:d}_{:d}.tfrecords'.format(
                loop*batch_size, test_images_nums))
        else:
            test_tfrecord_path = ops.join(save_dir, 'test_feature_{:d}_{:d}.tfrecords'.format(
                loop * batch_size, loop * batch_size + batch_size))
        feature_io.writer.write_features(tfrecords_path=test_tfrecord_path, labels=test_labels, images=test_images,
                                         imagenames=test_imagenames)

    # write val tfrecords
    print('Start writing validation tf records')

    val_image_nums = provider.validation.num_examples
    epoch_nums = int(math.ceil(val_image_nums / batch_size))
    for loop in tqdm.tqdm(range(epoch_nums)):
        val_images, val_labels, val_imagenames = provider.validation.next_batch(batch_size=batch_size)
        val_images = [cv2.resize(tmp, (32, width)) for tmp in val_images]
        val_images = [bytes(list(np.reshape(tmp, [32 * width * 3]))) for tmp in val_images]

        if loop*batch_size+batch_size > val_image_nums:
            val_tfrecord_path = ops.join(save_dir, 'validation_feature_{:d}_{:d}.tfrecords'.format(
                loop*batch_size, val_image_nums))
        else:
            val_tfrecord_path = ops.join(save_dir, 'validation_feature_{:d}_{:d}.tfrecords'.format(
                loop * batch_size, loop*batch_size+batch_size))
        feature_io.writer.write_features(tfrecords_path=val_tfrecord_path, labels=val_labels, images=val_images,
                                         imagenames=val_imagenames)

    return
예제 #8
0
def train_shadownet(dataset_dir, weights_path=None):
    """

    :param dataset_dir:
    :param weights_path:
    :return:
    """
    # decode the tf records to get the training data
    decoder = data_utils.TextFeatureIO().reader
    images, labels, imagenames = decoder.read_features(ops.join(
        dataset_dir, 'train_feature.tfrecords'),
                                                       num_epochs=None)
    inputdata, input_labels, input_imagenames = tf.train.shuffle_batch(
        tensors=[images, labels, imagenames],
        batch_size=32,
        capacity=1000 + 2 * 32,
        min_after_dequeue=100,
        num_threads=1)

    inputdata = tf.cast(x=inputdata, dtype=tf.float32)

    # initializa the net model
    shadownet = crnn_model.ShadowNet(phase='Train',
                                     hidden_nums=256,
                                     layers_nums=2,
                                     seq_length=25,
                                     num_classes=37)

    with tf.variable_scope('shadow', reuse=False):
        net_out = shadownet.build_shadownet(inputdata=inputdata)

    cost = tf.reduce_mean(
        tf.nn.ctc_loss(labels=input_labels,
                       inputs=net_out,
                       sequence_length=25 * np.ones(32)))

    decoded, log_prob = tf.nn.ctc_beam_search_decoder(net_out,
                                                      25 * np.ones(32),
                                                      merge_repeated=False)

    sequence_dist = tf.reduce_mean(
        tf.edit_distance(tf.cast(decoded[0], tf.int32), input_labels))

    global_step = tf.Variable(0, name='global_step', trainable=False)

    starter_learning_rate = config.cfg.TRAIN.LEARNING_RATE
    learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                               global_step,
                                               config.cfg.TRAIN.LR_DECAY_STEPS,
                                               config.cfg.TRAIN.LR_DECAY_RATE,
                                               staircase=True)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    with tf.control_dependencies(update_ops):
        optimizer = tf.train.AdadeltaOptimizer(
            learning_rate=learning_rate).minimize(loss=cost,
                                                  global_step=global_step)

    # Set tf summary
    tboard_save_path = 'tboard/shadownet'
    if not ops.exists(tboard_save_path):
        os.makedirs(tboard_save_path)
    tf.summary.scalar(name='Cost', tensor=cost)
    tf.summary.scalar(name='Learning_Rate', tensor=learning_rate)
    tf.summary.scalar(name='Seq_Dist', tensor=sequence_dist)
    merge_summary_op = tf.summary.merge_all()

    # Set saver configuration
    saver = tf.train.Saver()
    model_save_dir = 'model/shadownet'
    if not ops.exists(model_save_dir):
        os.makedirs(model_save_dir)
    train_start_time = time.strftime('%Y-%m-%d-%H-%M-%S',
                                     time.localtime(time.time()))
    model_name = 'shadownet_{:s}.ckpt'.format(str(train_start_time))
    model_save_path = ops.join(model_save_dir, model_name)

    # Set sess configuration
    sess_config = tf.ConfigProto()
    sess_config.gpu_options.per_process_gpu_memory_fraction = config.cfg.TRAIN.GPU_MEMORY_FRACTION
    sess_config.gpu_options.allow_growth = config.cfg.TRAIN.TF_ALLOW_GROWTH

    sess = tf.Session(config=sess_config)

    summary_writer = tf.summary.FileWriter(tboard_save_path)
    summary_writer.add_graph(sess.graph)

    # Set the training parameters
    train_epochs = config.cfg.TRAIN.EPOCHS

    with sess.as_default():
        if weights_path is None:
            logger.info('Training from scratch')
            init = tf.global_variables_initializer()
            sess.run(init)
        else:
            logger.info('Restore model from {:s}'.format(weights_path))
            saver.restore(sess=sess, save_path=weights_path)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        for epoch in range(train_epochs):
            _, c, seq_distance, preds, gt_labels, summary = sess.run([
                optimizer, cost, sequence_dist, decoded, input_labels,
                merge_summary_op
            ])

            # calculate the precision
            preds = decoder.sparse_tensor_to_str(preds[0])
            gt_labels = decoder.sparse_tensor_to_str(gt_labels)

            accuracy = []

            for index, gt_label in enumerate(gt_labels):
                pred = preds[index]
                totol_count = len(gt_label)
                correct_count = 0
                try:
                    for i, tmp in enumerate(gt_label):
                        if tmp == pred[i]:
                            correct_count += 1
                except IndexError:
                    continue
                finally:
                    try:
                        accuracy.append(correct_count / totol_count)
                    except ZeroDivisionError:
                        if len(pred) == 0:
                            accuracy.append(1)
                        else:
                            accuracy.append(0)
            accuracy = np.mean(np.array(accuracy).astype(np.float32), axis=0)
            #
            if epoch % config.cfg.TRAIN.DISPLAY_STEP == 0:
                logger.info(
                    'Epoch: {:d} cost= {:9f} seq distance= {:9f} train accuracy= {:9f}'
                    .format(epoch + 1, c, seq_distance, accuracy))

            summary_writer.add_summary(summary=summary, global_step=epoch)
            saver.save(sess=sess, save_path=model_save_path, global_step=epoch)

        coord.request_stop()
        coord.join(threads=threads)

    sess.close()

    return
예제 #9
0
def train_shadownet(cfg: EasyDict,
                    weights_path: str = None,
                    decode: bool = False,
                    num_threads: int = 4):
    """
    :param cfg: configuration EasyDict (e.g. global_config.config.cfg)
    :param weights_path: Path to stored weights
    :param decode: Whether to perform CTC decoding to report progress during training
    :param num_threads: Number of threads to use in tf.train.shuffle_batch
    """
    # decode the tf records to get the training data
    decoder = data_utils.TextFeatureIO(
        char_dict_path=ops.join(cfg.PATH.CHAR_DICT_DIR, 'char_dict.json'),
        ord_map_dict_path=ops.join(cfg.PATH.CHAR_DICT_DIR,
                                   'ord_map.json')).reader
    images, labels, imagenames = decoder.read_features(
        ops.join(cfg.PATH.TFRECORDS_DIR, 'train_feature.tfrecords'),
        num_epochs=None,
        input_size=cfg.ARCH.INPUT_SIZE,
        input_channels=cfg.ARCH.INPUT_CHANNELS)
    inputdata, input_labels, input_imagenames = tf.train.shuffle_batch(
        tensors=[images, labels, imagenames],
        batch_size=cfg.TRAIN.BATCH_SIZE,
        capacity=1000 + 2 * cfg.TRAIN.BATCH_SIZE,
        min_after_dequeue=100,
        num_threads=num_threads)

    inputdata = tf.cast(x=inputdata, dtype=tf.float32)

    # initialise the net model
    shadownet = crnn_model.ShadowNet(phase='Train',
                                     hidden_nums=cfg.ARCH.HIDDEN_UNITS,
                                     layers_nums=cfg.ARCH.HIDDEN_LAYERS,
                                     num_classes=len(decoder.char_dict) + 1)

    with tf.variable_scope('shadow', reuse=False):
        net_out = shadownet.build_shadownet(inputdata=inputdata)

    cost = tf.reduce_mean(
        tf.nn.ctc_loss(labels=input_labels,
                       inputs=net_out,
                       sequence_length=cfg.ARCH.SEQ_LENGTH *
                       np.ones(cfg.TRAIN.BATCH_SIZE)))

    decoded, log_prob = tf.nn.ctc_beam_search_decoder(
        net_out,
        cfg.ARCH.SEQ_LENGTH * np.ones(cfg.TRAIN.BATCH_SIZE),
        merge_repeated=False)

    sequence_dist = tf.reduce_mean(
        tf.edit_distance(tf.cast(decoded[0], tf.int32), input_labels))

    global_step = tf.Variable(0, name='global_step', trainable=False)

    starter_learning_rate = cfg.TRAIN.LEARNING_RATE
    learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                               global_step,
                                               cfg.TRAIN.LR_DECAY_STEPS,
                                               cfg.TRAIN.LR_DECAY_RATE,
                                               staircase=True)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    with tf.control_dependencies(update_ops):
        optimizer = tf.train.AdadeltaOptimizer(
            learning_rate=learning_rate).minimize(loss=cost,
                                                  global_step=global_step)

    # Set tf summary
    os.makedirs(cfg.PATH.TBOARD_SAVE_DIR, exist_ok=True)
    tf.summary.scalar(name='Cost', tensor=cost)
    tf.summary.scalar(name='Learning_Rate', tensor=learning_rate)
    tf.summary.scalar(name='Seq_Dist', tensor=sequence_dist)
    merge_summary_op = tf.summary.merge_all()

    # Set saver configuration
    saver = tf.train.Saver()
    os.makedirs(cfg.PATH.TBOARD_SAVE_DIR, exist_ok=True)
    train_start_time = time.strftime('%Y-%m-%d-%H-%M-%S',
                                     time.localtime(time.time()))
    model_name = 'shadownet_{:s}.ckpt'.format(str(train_start_time))
    model_save_path = ops.join(cfg.PATH.MODEL_SAVE_DIR, model_name)

    # Set sess configuration
    sess_config = tf.ConfigProto()
    sess_config.gpu_options.per_process_gpu_memory_fraction = cfg.TRAIN.GPU_MEMORY_FRACTION
    sess_config.gpu_options.allow_growth = cfg.TRAIN.TF_ALLOW_GROWTH

    sess = tf.Session(config=sess_config)

    summary_writer = tf.summary.FileWriter(cfg.PATH.TBOARD_SAVE_DIR)
    summary_writer.add_graph(sess.graph)

    # Set the training parameters
    train_epochs = cfg.TRAIN.EPOCHS

    with sess.as_default():
        if weights_path is None:
            logger.info('Training from scratch')
            init = tf.global_variables_initializer()
            sess.run(init)
        else:
            logger.info('Restore model from {:s}'.format(weights_path))
            saver.restore(sess=sess, save_path=weights_path)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        for epoch in range(train_epochs):
            if decode:
                _, c, seq_distance, predictions, labels, summary = sess.run([
                    optimizer, cost, sequence_dist, decoded, input_labels,
                    merge_summary_op
                ])

                labels = decoder.sparse_tensor_to_str(labels)
                predictions = decoder.sparse_tensor_to_str(predictions[0])
                accuracy = compute_accuracy(labels, predictions)

                if epoch % cfg.TRAIN.DISPLAY_STEP == 0:
                    logger.info(
                        'Epoch: {:d} cost= {:9f} seq distance= {:9f} train accuracy= {:9f}'
                        .format(epoch + 1, c, seq_distance, accuracy))

            else:
                _, c, summary = sess.run([optimizer, cost, merge_summary_op])
                if epoch % cfg.TRAIN.DISPLAY_STEP == 0:
                    logger.info('Epoch: {:d} cost= {:9f}'.format(epoch + 1, c))

            summary_writer.add_summary(summary=summary, global_step=epoch)
            saver.save(sess=sess, save_path=model_save_path, global_step=epoch)

        coord.request_stop()
        coord.join(threads=threads)
예제 #10
0
def recognize(image_path: str,
              weights_path: str,
              cfg: EasyDict,
              is_vis: bool = True,
              num_classes: int = 0):
    """

    :param image_path:
    :param weights_path: Path to stored weights
    :param cfg:
    :param is_vis:
    :param num_classes:
    """

    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    image = cv2.resize(image, tuple(cfg.ARCH.INPUT_SIZE))
    image = np.expand_dims(image, axis=0).astype(np.float32)

    w, h = cfg.ARCH.INPUT_SIZE
    inputdata = tf.placeholder(dtype=tf.float32,
                               shape=[1, h, w, cfg.ARCH.INPUT_CHANNELS],
                               name='input')

    codec = data_utils.TextFeatureIO(
        char_dict_path=ops.join(cfg.PATH.CHAR_DICT_DIR, 'char_dict.json'),
        ord_map_dict_path=ops.join(cfg.PATH.CHAR_DICT_DIR, 'ord_map.json'))

    num_classes = len(
        codec.reader.char_dict) + 1 if num_classes == 0 else num_classes

    net = crnn_model.ShadowNet(phase='Test',
                               hidden_nums=cfg.ARCH.HIDDEN_UNITS,
                               layers_nums=cfg.ARCH.HIDDEN_LAYERS,
                               num_classes=num_classes)

    with tf.variable_scope('shadow'):
        net_out = net.build_shadownet(inputdata=inputdata)

    decodes, _ = tf.nn.ctc_beam_search_decoder(
        inputs=net_out,
        sequence_length=cfg.ARCH.SEQ_LENGTH * np.ones(1),
        merge_repeated=False)

    # config tf session
    sess_config = tf.ConfigProto()
    sess_config.gpu_options.per_process_gpu_memory_fraction = cfg.TRAIN.GPU_MEMORY_FRACTION
    sess_config.gpu_options.allow_growth = cfg.TRAIN.TF_ALLOW_GROWTH

    # config tf saver
    saver = tf.train.Saver()

    sess = tf.Session(config=sess_config)

    with sess.as_default():

        saver.restore(sess=sess, save_path=weights_path)

        preds = sess.run(decodes, feed_dict={inputdata: image})

        preds = codec.writer.sparse_tensor_to_str(preds[0])

        logger.info('Predict image {:s} label {:s}'.format(
            ops.split(image_path)[1], preds[0]))

        if is_vis:
            plt.figure('CRNN Model Demo')
            plt.imshow(
                cv2.imread(image_path, cv2.IMREAD_COLOR)[:, :, (2, 1, 0)])
            plt.show()

        sess.close()
예제 #11
0
def train_shadownet():
    """

    :param dataset_dir:
    :param weights_path:
    :return:
    """
    # input_tensor = tf.placeholder(dtype=tf.float32, shape=[config.cfg.TRAIN.BATCH_SIZE, 32, 100, 3],
    #                               name='input_tensor')

    # decode the tf records to get the training data
    decoder = data_utils.TextFeatureIO().reader
    images, labels, imagenames = decoder.read_features(FLAGS.dataset_dir,
                                                       num_epochs=None,
                                                       flag='Train')
    # images_val, labels_val, imagenames_val = decoder.read_features(dataset_dir, num_epochs=None,
    #                                                                flag='Validation')
    inputdata, input_labels, input_imagenames = tf.train.shuffle_batch(
        tensors=[images, labels, imagenames],
        batch_size=config.cfg.TRAIN.BATCH_SIZE,
        capacity=1000 + 2 * config.cfg.TRAIN.BATCH_SIZE,
        min_after_dequeue=100,
        num_threads=1)

    # inputdata_val, input_labels_val, input_imagenames_val = tf.train.shuffle_batch(
    #     tensors=[images_val, labels_val, imagenames_val], batch_size=config.TRAIN.BATCH_SIZE,
    #     capacity=1000 + 2 * config.TRAIN.BATCH_SIZE,
    #     min_after_dequeue=100, num_threads=1)

    inputdata = tf.cast(x=inputdata, dtype=tf.float32)
    phase_tensor = tf.placeholder(dtype=tf.string, shape=None, name='phase')
    accuracy_tensor = tf.placeholder(dtype=tf.float32,
                                     shape=None,
                                     name='accuracy_tensor')

    # initialize the net model
    shadownet = crnn_model.ShadowNet(phase=phase_tensor,
                                     hidden_nums=256,
                                     layers_nums=2,
                                     seq_length=15,
                                     num_classes=config.cfg.TRAIN.CLASSES_NUMS,
                                     rnn_cell_type='lstm')

    with tf.variable_scope('shadow', reuse=False):
        net_out, tensor_dict = shadownet.build_shadownet(inputdata=inputdata)

    cost = tf.reduce_mean(
        tf.nn.ctc_loss(labels=input_labels,
                       inputs=net_out,
                       sequence_length=20 *
                       np.ones(config.cfg.TRAIN.BATCH_SIZE)))

    decoded, log_prob = tf.nn.ctc_beam_search_decoder(
        net_out,
        20 * np.ones(config.cfg.TRAIN.BATCH_SIZE),
        merge_repeated=False)

    sequence_dist = tf.reduce_mean(
        tf.edit_distance(tf.cast(decoded[0], tf.int32), input_labels))

    global_step = tf.Variable(0, name='global_step', trainable=False)

    starter_learning_rate = config.cfg.TRAIN.LEARNING_RATE
    learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                               global_step,
                                               config.cfg.TRAIN.LR_DECAY_STEPS,
                                               config.cfg.TRAIN.LR_DECAY_RATE,
                                               staircase=True)

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        optimizer = tf.train.AdadeltaOptimizer(
            learning_rate=learning_rate).minimize(loss=cost,
                                                  global_step=global_step)
        # optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9).minimize(
        #     loss=cost, global_step=global_step)

    # Set tf summary
    tboard_save_path = '/data/output/'
    if not ops.exists(tboard_save_path):
        os.makedirs(tboard_save_path)

    visualizor = tensorboard_vis_summary.CNNVisualizer()

    # training过程summary
    train_cost_scalar = tf.summary.scalar(name='train_cost', tensor=cost)
    train_accuracy_scalar = tf.summary.scalar(name='train_accuray',
                                              tensor=accuracy_tensor)
    train_seq_scalar = tf.summary.scalar(name='train_seq_dist',
                                         tensor=sequence_dist)
    train_conv1_image = visualizor.merge_conv_image(
        feature_map=tensor_dict['conv1'], scope='conv1_image')
    train_conv2_image = visualizor.merge_conv_image(
        feature_map=tensor_dict['conv2'], scope='conv2_image')
    train_conv3_image = visualizor.merge_conv_image(
        feature_map=tensor_dict['conv3'], scope='conv3_image')
    train_conv7_image = visualizor.merge_conv_image(
        feature_map=tensor_dict['conv7'], scope='conv7_image')
    lr_scalar = tf.summary.scalar(name='Learning_Rate', tensor=learning_rate)

    weights_tensor_dict = dict()
    for vv in tf.trainable_variables():
        if 'conv' in vv.name:
            weights_tensor_dict[vv.name[:-2]] = vv
    train_weights_hist_dict = visualizor.merge_weights_hist(
        weights_tensor_dict=weights_tensor_dict,
        scope='weights_histogram',
        is_merge=False)

    train_summary_merge_list = [
        train_cost_scalar, train_accuracy_scalar, train_seq_scalar, lr_scalar,
        train_conv1_image, train_conv2_image, train_conv3_image
    ]
    for _, weights_hist in train_weights_hist_dict.items():
        train_summary_merge_list.append(weights_hist)
    train_summary_op_merge = tf.summary.merge(inputs=train_summary_merge_list)

    # validation过程summary
    # val_cost_scalar = tf.summary.scalar(name='val_cost', tensor=cost)
    # val_seq_scalar = tf.summary.scalar(name='val_seq_dist', tensor=sequence_dist)
    # val_accuracy_scalar = tf.summary.scalar(name='val_accuracy', tensor=accuracy_tensor)

    # test_summary_op_merge = tf.summary.merge(inputs=[val_cost_scalar, val_accuracy_scalar,
    #                                                  val_seq_scalar])

    # Set saver configuration
    restore_variable_list = [tmp.name for tmp in tf.trainable_variables()]
    saver = tf.train.Saver()
    model_save_dir = '/data/output'
    if not ops.exists(model_save_dir):
        os.makedirs(model_save_dir)
    train_start_time = time.strftime('%Y-%m-%d-%H-%M-%S',
                                     time.localtime(time.time()))
    model_name = 'shadownet_{:s}.ckpt'.format(str(train_start_time))
    model_save_path = ops.join(model_save_dir, model_name)

    # Set sess configuration
    sess_config = tf.ConfigProto()
    sess_config.gpu_options.per_process_gpu_memory_fraction = config.cfg.TRAIN.GPU_MEMORY_FRACTION
    sess_config.gpu_options.allow_growth = config.cfg.TRAIN.TF_ALLOW_GROWTH
    sess_config.gpu_options.allocator_type = 'BFC'

    sess = tf.Session(config=sess_config)

    summary_writer = tf.summary.FileWriter(tboard_save_path)
    summary_writer.add_graph(sess.graph)

    # Set the training parameters
    train_epochs = config.cfg.TRAIN.EPOCHS

    print('Global configuration is as follows:')
    pprint.pprint(config.cfg)

    with sess.as_default():

        if FLAGS.weights_path is None:
            logger.info('Training from scratch')
            init = tf.global_variables_initializer()
            sess.run(init)
        else:
            # logger.info('Restore model from last crnn check point{:s}'.format(weights_path))
            # init = tf.global_variables_initializer()
            # sess.run(init)
            # restore_saver = tf.train.Saver(var_list=restore_variable_list)
            # restore_saver.restore(sess=sess, save_path=weights_path)
            logger.info('Restore model from last crnn check point{:s}'.format(
                FLAGS.weights_path))
            saver.restore(sess=sess, save_path=FLAGS.weights_path)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        for epoch in range(train_epochs):
            _, c, seq_distance, preds, gt_labels = sess.run(
                [optimizer, cost, sequence_dist, decoded, input_labels],
                feed_dict={phase_tensor: 'train'})

            # calculate the precision
            preds = decoder.sparse_tensor_to_str(preds[0])
            gt_labels = decoder.sparse_tensor_to_str(gt_labels)

            accuracy = []

            for index, gt_label in enumerate(gt_labels):
                pred = preds[index]
                totol_count = len(gt_label)
                correct_count = 0
                try:
                    for i, tmp in enumerate(gt_label):
                        if tmp == pred[i]:
                            correct_count += 1
                except IndexError:
                    continue
                finally:
                    try:
                        accuracy.append(correct_count / totol_count)
                    except ZeroDivisionError:
                        if len(pred) == 0:
                            accuracy.append(1)
                        else:
                            accuracy.append(0)
            accuracy = np.mean(np.array(accuracy).astype(np.float32), axis=0)

            train_summary = sess.run(train_summary_op_merge,
                                     feed_dict={
                                         accuracy_tensor: accuracy,
                                         phase_tensor: 'train'
                                     })
            summary_writer.add_summary(summary=train_summary,
                                       global_step=epoch)

            if epoch % config.cfg.TRAIN.DISPLAY_STEP == 0:

                logger.info(
                    'Epoch: {:d} cost= {:9f} seq distance= {:9f} train accuracy= {:9f}'
                    .format(epoch + 1, c, seq_distance, accuracy))

            # if epoch % config.cfg.TRAIN.VAL_STEP == 0:
            #     inputdata_value = sess.run(inputdata_val)
            #     val_c, val_seq, val_preds, val_gt_labels = sess.run([
            #         cost, sequence_dist, decoded, input_labels_val],
            #         feed_dict={phase_tensor: 'test',
            #                    input_tensor: inputdata_value})
            #
            #     preds_val = decoder.sparse_tensor_to_str(val_preds[0])
            #     gt_labels_val = decoder.sparse_tensor_to_str(val_gt_labels)
            #
            #     accuracy_val = []
            #
            #     for index, gt_label in enumerate(gt_labels_val):
            #         pred = preds_val[index]
            #         totol_count = len(gt_label)
            #         correct_count = 0
            #         try:
            #             for i, tmp in enumerate(gt_label):
            #                 if tmp == pred[i]:
            #                     correct_count += 1
            #         except IndexError:
            #             continue
            #         finally:
            #             try:
            #                 accuracy_val.append(correct_count / totol_count)
            #             except ZeroDivisionError:
            #                 if len(pred) == 0:
            #                     accuracy_val.append(1)
            #                 else:
            #                     accuracy_val.append(0)
            #
            #     accuracy_val = np.mean(np.array(accuracy_val).astype(np.float32), axis=0)
            #
            #     test_summary = sess.run(test_summary_op_merge,
            #                             feed_dict={accuracy_tensor: accuracy_val,
            #                                        phase_tensor: 'test',
            #                                        input_tensor: inputdata_value})
            #     summary_writer.add_summary(summary=test_summary, global_step=epoch)
            #
            #     logger.info('Epoch: {:d} val_cost= {:9f} val_seq_distance= {:9f} val_accuracy= {:9f}'.format(
            #         epoch + 1, val_c, val_seq, accuracy_val))

            if epoch % 500 == 0:
                saver.save(sess=sess,
                           save_path=model_save_path,
                           global_step=epoch)

        coord.request_stop()
        coord.join(threads=threads)

    sess.close()

    return
예제 #12
0
def test_shadownet(weights_path: str,
                   cfg: EasyDict,
                   visualize: bool,
                   process_all_data: bool = True,
                   num_threads: int = 4,
                   num_classes: int = 0):
    """

    :param tfrecords_dir: Directory with test_feature.tfrecords
    :param charset_dir: Path to char_dict.json and ord_map.json (generated with write_text_features.py)
    :param weights_path: Path to stored weights
    :param cfg: configuration EasyDict (e.g. global_config.config.cfg)
    :param visualize: whether to display the images
    :param process_all_data:
    :param num_threads: Number of threads for tf.train.(shuffle_)batch
    :param num_classes: Number of different characters in the dataset
    """
    decoder = data_utils.TextFeatureIO(
        char_dict_path=ops.join(cfg.PATH.CHAR_DICT_DIR, 'char_dict.json'),
        ord_map_dict_path=ops.join(cfg.PATH.CHAR_DICT_DIR,
                                   'ord_map.json')).reader
    input_images, input_labels, input_image_names = decoder.read_features(
        cfg, cfg.TEST.BATCH_SIZE, num_threads, False)

    num_classes = len(
        decoder.char_dict) + 1 if num_classes == 0 else num_classes
    net = crnn_model.ShadowNet(phase='Test',
                               hidden_nums=cfg.ARCH.HIDDEN_UNITS,
                               layers_nums=cfg.ARCH.HIDDEN_LAYERS,
                               num_classes=num_classes)

    with tf.variable_scope('shadow'):
        net_out = net.build_shadownet(inputdata=input_images)

    decoded, _ = tf.nn.ctc_beam_search_decoder(net_out,
                                               cfg.ARCH.SEQ_LENGTH *
                                               np.ones(cfg.TEST.BATCH_SIZE),
                                               merge_repeated=False)

    # config tf session
    sess_config = tf.ConfigProto()
    sess_config.gpu_options.per_process_gpu_memory_fraction = cfg.TRAIN.GPU_MEMORY_FRACTION
    sess_config.gpu_options.allow_growth = cfg.TRAIN.TF_ALLOW_GROWTH

    # config tf saver
    saver = tf.train.Saver()

    sess = tf.Session(config=sess_config)

    test_sample_count = sum(1 for _ in tf.python_io.tf_record_iterator(
        ops.join(cfg.PATH.TFRECORDS_DIR, 'test_feature.tfrecords')))
    num_iterations = int(math.ceil(test_sample_count / cfg.TEST.BATCH_SIZE)) if process_all_data \
        else 1

    with sess.as_default():
        saver.restore(sess=sess, save_path=weights_path)

        print('Start predicting...')

        accuracy = 0
        for epoch in range(num_iterations):
            predictions, images, labels, image_names = sess.run(
                [decoded, input_images, input_labels, input_image_names])
            image_names = np.reshape(image_names,
                                     newshape=image_names.shape[0])
            image_names = [tmp.decode('utf-8') for tmp in image_names]

            labels = decoder.sparse_tensor_to_str(labels)
            predictions = decoder.sparse_tensor_to_str(predictions[0])

            accuracy += compute_accuracy(labels, predictions, display=False)

            for index, image in enumerate(images):
                print(
                    'Predict {:s} image with gt label: {:s} **** predicted label: {:s}'
                    .format(image_names[index], labels[index],
                            predictions[index]))
                # avoid accidentally displaying for the whole dataset
                if visualize and not process_all_data:
                    plt.imshow(image[:, :, (2, 1, 0)])
                    plt.show()

        # We compute a mean of means, so we need the sample sizes to be constant
        # (BATCH_SIZE) for this to equal the actual mean
        accuracy /= num_iterations
        print('Mean test accuracy is {:5f}'.format(accuracy))
예제 #13
0
def main():

    job_id = os.environ['PBS_JOBID']
    codec = data_utils.TextFeatureIO(
        char_dict_path='app/Config/char_dict.json',
        ord_map_dict_path=r'app/Config/ord_map.json')

    log.basicConfig(format="[ %(levelname)s ] %(message)s",
                    level=log.INFO,
                    stream=sys.stdout)
    args = build_argparser().parse_args()
    model_xml = args.model
    model_bin = os.path.splitext(model_xml)[0] + ".bin"

    # Plugin initialization for specified device and load extensions library if specified
    plugin = IEPlugin(device=args.device, plugin_dirs=args.plugin_dir)
    if args.cpu_extension and 'CPU' in args.device:
        plugin.add_cpu_extension(args.cpu_extension)
    # Read IR
    log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin))
    net = IENetwork(model=model_xml, weights=model_bin)

    # if plugin.device == "CPU":
    # supported_layers = plugin.get_supported_layers(net)
    # not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers]
    # if len(not_supported_layers) != 0:
    # log.error("Following layers are not supported by the plugin for specified device {}:\n {}".
    # format(plugin.device, ', '.join(not_supported_layers)))
    # log.error("Please try to specify cpu extensions library path in sample's command line parameters using -l "
    # "or --cpu_extension command line argument")
    # sys.exit(1)

    assert len(
        net.inputs.keys()) == 1, "Sample supports only single input topologies"
    assert len(
        net.outputs) == 1, "Sample supports only single output topologies"

    log.info("Preparing input blobs")
    input_blob = next(iter(net.inputs))
    out_blob = next(iter(net.outputs))
    net.batch_size = len(args.input)
    job_id = os.environ['PBS_JOBID'].split('.')[0]

    # Read and pre-process input images
    n, c, h, w = net.inputs[input_blob].shape
    images = np.ndarray(shape=(n, c, h, w))
    for i in range(n):
        image = cv2.imread(args.input[i])
        if image.shape[:-1] != (h, w):
            log.warning("Image {} is resized from {} to {}".format(
                args.input[i], image.shape[:-1], (h, w)))
            image = cv2.resize(image, (w, h))
        image = image.transpose(
            (2, 0, 1))  # Change data layout from HWC to CHW
        images[i] = image
    log.info("Batch size is {}".format(n))

    # Loading model to the plugin
    log.info("Loading model to the plugin")
    exec_net = plugin.load(network=net)
    del net

    # Start sync inference
    log.info("Starting inference ({} iterations)".format(args.number_iter))
    infer_time = []

    result_dir = os.path.join(args.output_dir, job_id)

    if not os.path.isdir(result_dir):
        print(result_dir)
        os.makedirs(result_dir, exist_ok=True)
    progress_file_path = os.path.join(result_dir, 'i_progress.txt')
    t0 = time()
    for i in range(args.number_iter):
        #t0 = time()
        res = exec_net.infer(inputs={input_blob: images})
        #infer_time.append((time()-t0)*1000)
        print(i, args.number_iter)
        progressUpdate(progress_file_path,
                       time() - t0, i + 1, args.number_iter)
    t1 = (time() - t0)
    log.info("Average running time of one iteration: {} ms".format(
        np.average(np.asarray(infer_time))))
    if args.perf_counts:
        perf_counts = exec_net.requests[0].get_perf_counts()
        log.info("Performance counters:")
        print("{:<70} {:<15} {:<15} {:<15} {:<10}".format(
            'name', 'layer_type', 'exet_type', 'status', 'real_time, us'))
        for layer, stats in perf_counts.items():
            print("{:<70} {:<15} {:<15} {:<15} {:<10}".format(
                layer, stats['layer_type'], stats['exec_type'],
                stats['status'], stats['real_time']))

    # Processing output blob
    log.info("Processing output blob")
    res = res[out_blob]

    preds = res.argmax(2)
    preds = preds.transpose(1, 0)
    preds = np.ascontiguousarray(preds, dtype=np.int8).view(dtype=np.int8)
    values = codec.writer.ordtochar(preds[0].tolist())
    values = [v for i, v in enumerate(values) if i == 0 or v != values[i - 1]]
    values = [x for x in values if x != ' ']
    res = ''.join(values)
    print("The result is : " + res)

    avg_time = round((t1 * 1000 / args.number_iter), 3)
    with open(os.path.join(args.output_dir, job_id, 'result.txt'), 'w') as f:
        f.write(res + "\n Inference performed in " + str(avg_time) + "ms")

    stats = {}
    stats['time'] = str(round(t1, 1))
    stats['frames'] = str(args.number_iter * n)
    stats['fps'] = str(args.number_iter * n / t1)
    stats_file = result_dir + "/stats.json"
    with open(stats_file, 'w') as f:
        json.dump(stats, f)

    del exec_net
    del plugin
def write_features(dataset_dir, save_dir, anno_name):
    """

    :param dataset_dir:
    :param save_dir:
    :return:
    """
    if not ops.exists(save_dir):
        os.makedirs(save_dir)

    print('Initialize the dataset provider ......')
    provider = data_provider.TextDataProvider(dataset_dir=dataset_dir,
                                              annotation_name=anno_name,
                                              validation_set=True,
                                              validation_split=0.001,
                                              shuffle=None,
                                              normalization=None)
    print('Dataset provider intialize complete')

    feature_io = data_utils.TextFeatureIO()

    # write train tfrecords
    print('Start writing training tf records')
    train_images_temp = provider.train.images
    train_image_widths = provider.train.image_widths
    train_images = []
    for index, image in enumerate(train_images_temp):
        train_images.append(
            bytes(list(np.reshape(image, [train_image_width * 32 * 3]))))
    print(len(train_images))
    train_labels = provider.train.labels
    train_imagenames = provider.train.imagenames
    train_tfrecord_path = ops.join(save_dir, anno_name[:-4] +
                                   '.tfrecords')  # 'train_feature.tfrecords'
    train_class_num = feature_io.writer.write_features(
        tfrecords_path=train_tfrecord_path,
        labels=train_labels,
        images=train_images,
        imagenames=train_imagenames,
        image_widths=train_image_widths)
    print('training class_num: ', train_class_num)

    # # write test tfrecords
    # print('Start writing testing tf records')
    # test_images_temp = provider.test.images
    # test_image_widths = provider.test.image_widths
    # test_images = []
    # for index, image in enumerate(test_images_temp):
    #     test_images.append(bytes(list(np.reshape(image, [train_image_width*32*3]))))
    # print(len(test_images))
    # test_labels = provider.test.labels
    # test_imagenames = provider.test.imagenames
    # test_tfrecord_path = ops.join(save_dir, 'test_feature.tfrecords')
    # test_class_num = feature_io.writer.write_features(
    #     tfrecords_path=test_tfrecord_path, labels=test_labels, images=test_images, imagenames=test_imagenames,
    #     image_widths=test_image_widths)
    # print('test num_class: ', test_class_num)

    # write val tfrecords
    # val_images_temp = provider.validation.images
    # val_image_widths = provider.validation.image_widths
    # val_images = []
    # for index, image in enumerate(val_images_temp):
    #     val_images.append(bytes(list(np.reshape(image, [train_image_width*32*3]))))
    # print(len(val_images))
    # val_labels = provider.validation.labels
    # val_imagenames = provider.validation.imagenames
    # val_tfrecord_path = ops.join(save_dir, 'validation_feature.tfrecords')
    # val_class_num = feature_io.writer.write_features(
    #     tfrecords_path=val_tfrecord_path, labels=val_labels, images=val_images, imagenames=val_imagenames,
    #     image_widths=val_image_widths)
    # print('val num_class: ', val_class_num)

    return
예제 #15
0
def test_shadownet(dataset_dir, weights_path, is_vis=True):
    """

    :param dataset_dir:
    :param weights_path:
    :param is_vis:
    :return:
    """
    # Initialize the record decoder
    decoder = data_utils.TextFeatureIO().reader
    images_t, labels_t, imagenames_t = decoder.read_features(ops.join(
        dataset_dir, 'test_feature.tfrecords'),
                                                             num_epochs=None)
    images_sh, labels_sh, imagenames_sh = tf.train.shuffle_batch(
        tensors=[images_t, labels_t, imagenames_t],
        batch_size=32,
        capacity=1000 + 32 * 2,
        min_after_dequeue=2,
        num_threads=4)

    images_sh = tf.cast(x=images_sh, dtype=tf.float32)

    # build shadownet
    net = crnn_model.ShadowNet(phase='Test',
                               hidden_nums=256,
                               layers_nums=2,
                               seq_length=25,
                               num_classes=37)

    with tf.variable_scope('shadow'):
        net_out = net.build_shadownet(inputdata=images_sh)

    decoded, _ = tf.nn.ctc_beam_search_decoder(net_out,
                                               25 * np.ones(32),
                                               merge_repeated=False)

    # config tf session
    sess_config = tf.ConfigProto()
    sess_config.gpu_options.per_process_gpu_memory_fraction = config.cfg.TRAIN.GPU_MEMORY_FRACTION
    sess_config.gpu_options.allow_growth = config.cfg.TRAIN.TF_ALLOW_GROWTH

    # config tf saver
    saver = tf.train.Saver()

    sess = tf.Session(config=sess_config)

    with sess.as_default():

        # restore the model weights
        saver.restore(sess=sess, save_path=weights_path)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        print('Start predicting ......')
        predictions, images, labels, imagenames = sess.run(
            [decoded, images_sh, labels_sh, imagenames_sh])
        imagenames = np.reshape(imagenames, newshape=imagenames.shape[0])
        imagenames = [tmp.decode('utf-8') for tmp in imagenames]
        preds_res = decoder.sparse_tensor_to_str(predictions[0])
        gt_res = decoder.sparse_tensor_to_str(labels)
        for index, image in enumerate(images):
            print(
                'Predict {:s} image with gt label: {:s} **** predict label: {:s}'
                .format(imagenames[index], gt_res[index], preds_res[index]))
            if is_vis:
                plt.imshow(image[:, :, (2, 1, 0)])
                plt.show()

        coord.request_stop()
        coord.join(threads=threads)

    sess.close()
    return
예제 #16
0
def train_shadownet(cfg: EasyDict,
                    weights_path: str = None,
                    decode: bool = False,
                    num_threads: int = 4) -> np.array:
    """
    :param cfg: configuration EasyDict (e.g. global_config.config.cfg)
    :param weights_path: Path to stored weights
    :param decode: Whether to perform CTC decoding to report progress during training
    :param num_threads: Number of threads to use in tf.train.shuffle_batch
    :return History of values of the cost function
    """
    # decode the tf records to get the training data
    decoder = data_utils.TextFeatureIO(
        char_dict_path=ops.join(cfg.PATH.CHAR_DICT_DIR, 'char_dict.json'),
        ord_map_dict_path=ops.join(cfg.PATH.CHAR_DICT_DIR,
                                   'ord_map.json')).reader

    input_images, input_labels, input_image_names = decoder.read_features(
        cfg, cfg.TRAIN.BATCH_SIZE, num_threads)

    shadownet = crnn_model.ShadowNet(phase='Train',
                                     hidden_nums=cfg.ARCH.HIDDEN_UNITS,
                                     layers_nums=cfg.ARCH.HIDDEN_LAYERS,
                                     num_classes=len(decoder.char_dict) + 1)

    with tf.variable_scope('shadow', reuse=False):
        net_out = shadownet.build_shadownet(inputdata=input_images)

    cost = tf.reduce_mean(
        tf.nn.ctc_loss(labels=input_labels,
                       inputs=net_out,
                       sequence_length=cfg.ARCH.SEQ_LENGTH *
                       np.ones(cfg.TRAIN.BATCH_SIZE)))

    decoded, log_prob = tf.nn.ctc_beam_search_decoder(
        net_out,
        cfg.ARCH.SEQ_LENGTH * np.ones(cfg.TRAIN.BATCH_SIZE),
        merge_repeated=False)

    sequence_dist = tf.reduce_mean(
        tf.edit_distance(tf.cast(decoded[0], tf.int32), input_labels))

    global_step = tf.Variable(0, name='global_step', trainable=False)

    starter_learning_rate = cfg.TRAIN.LEARNING_RATE
    learning_rate = tf.train.exponential_decay(
        starter_learning_rate,
        global_step,
        cfg.TRAIN.LR_DECAY_STEPS,
        cfg.TRAIN.LR_DECAY_RATE,
        staircase=cfg.TRAIN.LR_STAIRCASE)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    with tf.control_dependencies(update_ops):
        optimizer = tf.train.AdadeltaOptimizer(
            learning_rate=learning_rate).minimize(loss=cost,
                                                  global_step=global_step)

    # Set tf summary
    os.makedirs(cfg.PATH.TBOARD_SAVE_DIR, exist_ok=True)
    tf.summary.scalar(name='Cost', tensor=cost)
    tf.summary.scalar(name='Learning_Rate', tensor=learning_rate)
    if decode:
        tf.summary.scalar(name='Seq_Dist', tensor=sequence_dist)
    merge_summary_op = tf.summary.merge_all()

    # Set saver configuration
    saver = tf.train.Saver()
    os.makedirs(cfg.PATH.TBOARD_SAVE_DIR, exist_ok=True)
    train_start_time = time.strftime('%Y-%m-%d-%H-%M-%S',
                                     time.localtime(time.time()))
    model_name = 'shadownet_{:s}.ckpt'.format(str(train_start_time))
    model_save_path = ops.join(cfg.PATH.MODEL_SAVE_DIR, model_name)

    # Set sess configuration
    sess_config = tf.ConfigProto()
    sess_config.gpu_options.per_process_gpu_memory_fraction = cfg.TRAIN.GPU_MEMORY_FRACTION
    sess_config.gpu_options.allow_growth = cfg.TRAIN.TF_ALLOW_GROWTH

    sess = tf.Session(config=sess_config)

    summary_writer = tf.summary.FileWriter(cfg.PATH.TBOARD_SAVE_DIR)
    summary_writer.add_graph(sess.graph)

    # Set the training parameters
    train_epochs = cfg.TRAIN.EPOCHS

    with sess.as_default():
        if weights_path is None:
            logger.info('Training from scratch')
            init = tf.global_variables_initializer()
            sess.run(init)
        else:
            logger.info('Restore model from {:s}'.format(weights_path))
            saver.restore(sess=sess, save_path=weights_path)

        patience_counter = 1
        cost_history = [np.inf]
        for epoch in range(train_epochs):
            if epoch > 1 and cfg.TRAIN.EARLY_STOPPING:
                # We always compare to the first point where cost didn't improve
                if cost_history[-1 - patience_counter] - cost_history[
                        -1] > cfg.TRAIN.PATIENCE_DELTA:
                    patience_counter = 1
                else:
                    patience_counter += 1
                if patience_counter > cfg.TRAIN.PATIENCE_EPOCHS:
                    logger.info(
                        "Cost didn't improve beyond {:f} for {:d} epochs, stopping early."
                        .format(cfg.TRAIN.PATIENCE_DELTA, patience_counter))
                    break
            if decode:
                _, c, seq_distance, predictions, labels, summary = sess.run([
                    optimizer, cost, sequence_dist, decoded, input_labels,
                    merge_summary_op
                ])

                labels = decoder.sparse_tensor_to_str(labels)
                predictions = decoder.sparse_tensor_to_str(predictions[0])
                accuracy = compute_accuracy(labels, predictions)

                if epoch % cfg.TRAIN.DISPLAY_STEP == 0:
                    logger.info(
                        'Epoch: {:d} cost= {:9f} seq distance= {:9f} train accuracy= {:9f}'
                        .format(epoch + 1, c, seq_distance, accuracy))

            else:
                _, c, summary = sess.run([optimizer, cost, merge_summary_op])
                if epoch % cfg.TRAIN.DISPLAY_STEP == 0:
                    logger.info('Epoch: {:d} cost= {:9f}'.format(epoch + 1, c))

            cost_history.append(c)
            summary_writer.add_summary(summary=summary, global_step=epoch)
            saver.save(sess=sess, save_path=model_save_path, global_step=epoch)

        return np.array(cost_history[1:])  # Don't return the first np.inf