def __init__(self,
                 mean,
                 valid_target_len=float('inf'),
                 img_width_range=(12, 320),
                 word_len=30,
                 channel=3):

        img_height = 32

        # '[(16,32),(27,32),(35,32),(64,32),(80,32)]'
        self.bucket_specs = [(int(math.floor(64 / 4)), int(word_len + 2)),
                             (int(math.floor(108 / 4)), int(word_len + 2)),
                             (int(math.floor(140 / 4)), int(word_len + 2)),
                             (int(math.floor(256 / 4)), int(word_len + 2)),
                             (int(math.floor(img_width_range[1] / 4)),
                              int(word_len + 2))]

        self.bucket_min_width, self.bucket_max_width = img_width_range  #(12,320)
        self.image_height = img_height
        self.valid_target_len = valid_target_len

        self.mean = mean
        self.channel = channel

        assert len(self.mean) == self.channel

        #320个bucket,每個放相應大小爲width圖像
        self.bucket_data = {
            i: BucketData()
            for i in range(self.bucket_max_width + 1)
        }
Beispiel #2
0
    def __init__(self,
                 data_root,
                 annotation_fn,
                 evaluate=False,
                 valid_target_len=float('inf'),
                 img_width_range=(100, 800),
                 word_len=60):
        """
        :param data_root:
        :param annotation_fn:
        :param lexicon_fn:
        :param img_width_range: only needed for training set
        :return:
        """
        print("DATA GEN")
        img_height = 32
        self.data_root = data_root
        if os.path.exists(annotation_fn):
            self.annotation_path = annotation_fn
        else:
            self.annotation_path = os.path.join(data_root, annotation_fn)
        # fixed width and len of text. It have to be changed for long handwritten line
        '''if evaluate:
            self.bucket_specs = [(int(math.floor(64 / 4)), int(word_len + 2)), (int(math.floor(108 / 4)), int(word_len + 2)),
                                 (int(math.floor(140 / 4)), int(word_len + 2)), (int(math.floor(256 / 4)), int(word_len + 2)),
                                 (int(math.floor(img_width_range[1] / 4)), int(word_len + 2))]
        else:
            self.bucket_specs = [(int(64 / 4), 9 + 2), (int(108 / 4), 15 + 2),
                             (int(140 / 4), 17 + 2), (int(256 / 4), 20 + 2),
                             (int(math.ceil(img_width_range[1] / 4)), word_len + 2)]'''
        self.bucket_specs = [(int(100 / 4), int(word_len / 4) + 2),
                             (int(200 / 4), int(word_len / 4) + 2),
                             (int(300 / 4), int(word_len / 2) + 2),
                             (int(400 / 4), int(word_len / 2) + 2),
                             (int(500 / 4), word_len + 2),
                             (int(600 / 4), word_len + 2),
                             (int(700 / 4), word_len + 2),
                             (int(800 / 4), word_len + 2)]
        self.max_len = word_len + 2

        self.bucket_min_width, self.bucket_max_width = img_width_range
        self.image_height = img_height
        self.valid_target_len = valid_target_len

        self.bucket_data = {
            i: BucketData()
            for i in range(self.bucket_max_width + 1)
        }
        self.char2index = []
        with open(os.path.join(self.data_root, 'sample.txt'), "r") as ins:
            for line in ins:
                self.char2index.append(line.strip())
        self.char2index.append(' ')
        print(self.char2index)
Beispiel #3
0
    def __init__(
            self,
            data_root,
            annotation_fn,
            evaluate=False,
            valid_target_len=float('inf'),
            img_width_range=(83, 2083),  # iamdb train set
            word_len=81):
        # img_width_range = (354,1990), # sgdb
        # word_len = 74):
        #            img_width_range = (135,2358), # rimes
        #            word_len = 100):
        """
        :param data_root:
        :param annotation_fn:
        :param lexicon_fn:
        :param img_width_range: only needed for training set
        :return:
        """

        img_height = 32
        self.data_root = data_root
        if os.path.exists(annotation_fn):
            self.annotation_path = annotation_fn
        else:
            self.annotation_path = os.path.join(data_root, annotation_fn)

        if evaluate:
            self.bucket_specs = [(int(math.floor(64 / 4)), int(word_len + 2)),
                                 (int(math.floor(108 / 4)), int(word_len + 2)),
                                 (int(math.floor(140 / 4)), int(word_len + 2)),
                                 (int(math.floor(256 / 4)), int(word_len + 2)),
                                 (int(math.floor(img_width_range[1] / 4)),
                                  int(word_len + 2))]
        else:
            self.bucket_specs = [
                (int(64 / 4), 9 + 2), (int(108 / 4), 15 + 2),
                (int(140 / 4), 17 + 2), (int(256 / 4), 20 + 2),
                (int(math.ceil(img_width_range[1] / 4)), word_len + 2)
            ]

        self.bucket_min_width, self.bucket_max_width = img_width_range
        self.image_height = img_height
        self.valid_target_len = valid_target_len

        self.bucket_data = {
            i: BucketData()
            for i in range(self.bucket_max_width + 1)
        }
Beispiel #4
0
    def __init__(
            self,
            data_root,
            annotation_fn,
            evaluate=False,
            valid_target_len=float('inf'),
            img_width_range=(170, 1016),  # iam training width range
            word_len=81):  # iam training max
        """
        :param data_root:
        :param annotation_fn:
        :param lexicon_fn:
        :param img_width_range: only needed for training set
        :return:
        """

        img_height = 32
        self.data_root = data_root
        if os.path.exists(annotation_fn):
            self.annotation_path = annotation_fn
        else:
            self.annotation_path = os.path.join(data_root, annotation_fn)

        if evaluate:
            self.bucket_specs = [
                (int(math.floor(img_width_range[0])),
                 int(95 + 2)),  # iam test max
                (int(math.ceil(img_width_range[1] / 5)), int(95 + 2)),
                (int(math.ceil(img_width_range[1] / 4)), int(95 + 2)),
                (int(math.ceil(img_width_range[1] / 3)), int(95 + 2)),
                (int(math.ceil(img_width_range[1] / 2)), int(95 + 2))
            ]
        else:
            self.bucket_specs = [
                (int(math.floor(img_width_range[0])), int(word_len + 2)),
                (int(math.ceil(img_width_range[1] / 5)), int(word_len + 2)),
                (int(math.ceil(img_width_range[1] / 4)), int(word_len + 2)),
                (int(math.ceil(img_width_range[1] / 3)), int(word_len + 2)),
                (int(math.ceil(img_width_range[1] / 2)), int(word_len + 2))
            ]

        self.bucket_min_width, self.bucket_max_width = img_width_range
        self.image_height = img_height
        self.valid_target_len = valid_target_len

        self.bucket_data = {
            i: BucketData()
            for i in range(self.bucket_max_width + 1)
        }
Beispiel #5
0
    def __init__(self,
                 data_root, annotation_fn, train_sample_size=None,
                 evaluate = False,
                 valid_target_len = float('inf'),
                 img_width_range = (12, 320),
                 word_len = 60):
        """
        :param data_root:
        :param annotation_fn:
        :param lexicon_fn:
        :param img_width_range: only needed for training set
        :return:
        """

        img_height = 32
        self.data_root = data_root
        if os.path.exists(annotation_fn):
            self.annotation_path = annotation_fn
        else:
            self.annotation_path = os.path.join(data_root, annotation_fn)

        if evaluate:
            self.bucket_specs = [(int(math.floor(64 / 4)), int(word_len + 2)), (int(math.floor(108 / 4)), int(word_len + 2)),
                                 (int(math.floor(140 / 4)), int(word_len + 2)), (int(math.floor(256 / 4)), int(word_len + 2)),
                                 (int(math.floor(img_width_range[1] / 4)), int(word_len + 2)),
                                 (int(math.ceil(img_width_range[1] / 2)), word_len + 2)]
        else:
            self.bucket_specs = [(int(64 / 4), 9 + 2), (int(108 / 4), 15 + 2),
                             (int(140 / 4), 17 + 2), (int(256 / 4), 20 + 2),
                             (int(math.ceil(img_width_range[1] / 4)), word_len + 2),
                             (int(math.ceil(img_width_range[1] / 2)), word_len + 2)]

        self.bucket_min_width, self.bucket_max_width = img_width_range
        self.image_height = img_height
        self.valid_target_len = valid_target_len
        self.train_sample_size = train_sample_size

        self.bucket_data = {i: BucketData()
                            for i in range(self.bucket_max_width + 1)}

        self.OnLineDataGen = DataGenTextOnLine()
    def __init__(self,
                 data_root, annotation_fn,
                 evaluate = False,
                 valid_target_len = float('inf'),
                 img_width_range = (39,936),
                 word_len = 50):
        """
        :param data_root:
        :param annotation_fn:
        :param lexicon_fn:
        :param img_width_range: only needed for training set
        :return:
        """

        img_height = 48
        self.data_root = data_root
        if os.path.exists(annotation_fn):
            self.annotation_path = annotation_fn  # abs path
        else:
            self.annotation_path = os.path.join(data_root, annotation_fn)  # relative path

        if evaluate:
            self.bucket_specs = [(int(math.floor(64 / 4)), int(word_len + 2)), (int(math.floor(128 / 4)), int(word_len + 2)),
                                 (int(math.floor(256 / 4)), int(word_len + 2)), (int(math.floor(512 / 4)), int(word_len + 2)),
                                 (int(math.floor(img_width_range[1] / 4)), int(word_len + 2))]
        else:
            self.bucket_specs = [(int(64 / 4), 9 + 2), (int(128 / 4), 15 + 2),
                             (int(256 / 4), 17 + 2), (int(512 / 4), 25 + 2),
                             (int(math.ceil(img_width_range[1] / 4)), word_len + 2)]

        self.bucket_min_width, self.bucket_max_width = img_width_range
        self.image_height = img_height
        self.valid_target_len = valid_target_len

        self.bucket_data = {i: BucketData()
                            for i in range(self.bucket_max_width + 1)}
Beispiel #7
0
 def clear(self):
     self.bucket_data = {i: BucketData()
                         for i in range(self.bucket_max_width + 1)}
Beispiel #8
0
    def __init__(self):
        img_width_range = cfg.img_width_range
        word_len = cfg.word_len
        self.batch_size = cfg.batch_size
        self.visualize = cfg.visualize
        gpu_device_id = '/gpu:' + str(cfg.gpu_id)
        if cfg.gpu_id == -1:
            gpu_device_id = '/cpu:0'
            print("Using CPU model!")
        with tf.device(gpu_device_id):
            self.img_data = tf.placeholder(tf.float32,
                                           shape=(None, 1, 32, None),
                                           name='img_data')
            self.zero_paddings = tf.placeholder(tf.float32,
                                                shape=(None, None, 512),
                                                name='zero_paddings')

        self.bucket_specs = [(int(math.floor(64 / 4)), int(word_len + 2)),
                             (int(math.floor(108 / 4)), int(word_len + 2)),
                             (int(math.floor(140 / 4)), int(word_len + 2)),
                             (int(math.floor(256 / 4)), int(word_len + 2)),
                             (int(math.floor(img_width_range[1] / 4)),
                              int(word_len + 2))]
        buckets = self.buckets = self.bucket_specs

        self.decoder_inputs = []
        self.encoder_masks = []
        self.target_weights = []
        with tf.device(gpu_device_id):
            for i in xrange(int(buckets[-1][0] + 1)):
                self.encoder_masks.append(
                    tf.placeholder(tf.float32,
                                   shape=[None, 1],
                                   name="encoder_mask{0}".format(i)))
            for i in xrange(buckets[-1][1] + 1):
                self.decoder_inputs.append(
                    tf.placeholder(tf.int32,
                                   shape=[None],
                                   name="decoder{0}".format(i)))
                self.target_weights.append(
                    tf.placeholder(tf.float32,
                                   shape=[None],
                                   name="weight{0}".format(i)))
        self.bucket_min_width, self.bucket_max_width = img_width_range
        self.image_height = cfg.img_height
        self.valid_target_len = cfg.valid_target_len
        self.forward_only = True

        self.bucket_data = {
            i: BucketData()
            for i in range(self.bucket_max_width + 1)
        }

        with tf.device(gpu_device_id):
            cnn_model = CNN(self.img_data, True)  #(not self.forward_only))
            self.conv_output = cnn_model.tf_output()
            self.concat_conv_output = tf.concat(
                axis=1, values=[self.conv_output, self.zero_paddings])

            self.perm_conv_output = tf.transpose(self.concat_conv_output,
                                                 perm=[1, 0, 2])

        with tf.device(gpu_device_id):
            self.attention_decoder_model = Seq2SeqModel(
                encoder_masks=self.encoder_masks,
                encoder_inputs_tensor=self.perm_conv_output,
                decoder_inputs=self.decoder_inputs,
                target_weights=self.target_weights,
                target_vocab_size=cfg.target_vocab_size,
                buckets=self.buckets,
                target_embedding_size=cfg.target_embedding_size,
                attn_num_layers=cfg.attn_num_layers,
                attn_num_hidden=cfg.attn_num_hidden,
                forward_only=self.forward_only,
                use_gru=cfg.use_gru)
        #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0)
        self.sess = tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True))
        self.saver_all = tf.train.Saver(tf.global_variables())
        self.saver_all.restore(self.sess, cfg.ocr_model_path)
Beispiel #9
0
    def __init__(
            self,
            data_root,
            annotation_fn,
            evaluate=False,
            valid_target_len=float('inf'),
            img_width_range=(83, 2083),  # iamdb train set
            word_len=81):
        # img_width_range = (354,1990), # sgdb
        # word_len = 74):
        # img_width_range = (306,911), # pardb
        # word_len = 70):
        # img_width_range = (135,2358), # rimes
        # word_len = 100):
        # img_width_range = (175,1801), # gwdb
        # word_len = 87):
        """
        :param data_root:
        :param annotation_fn:
        :param lexicon_fn:
        :param img_width_range: only needed for training set
        :return:
        """

        img_height = 64
        self.data_root = data_root
        if os.path.exists(annotation_fn):
            self.annotation_path = annotation_fn
        else:
            self.annotation_path = os.path.join(data_root, annotation_fn)

        if evaluate:
            self.bucket_specs = [(int(math.ceil(img_width_range[0])),
                                  int(math.ceil(img_width_range[1] / 8))),
                                 (int(math.ceil(img_width_range[1] / 8)),
                                  int(math.ceil(img_width_range[1] / 6))),
                                 (int(math.ceil(img_width_range[1] / 6)),
                                  int(math.ceil(img_width_range[1] / 4))),
                                 (int(math.ceil(img_width_range[1] / 4)),
                                  int(math.ceil(img_width_range[1] / 3))),
                                 (int(math.ceil(img_width_range[1] / 3)),
                                  int(math.ceil(img_width_range[1] / 2)))]
        else:
            self.bucket_specs = [(int(math.ceil(img_width_range[0])),
                                  int(math.ceil(img_width_range[1] / 8))),
                                 (int(math.ceil(img_width_range[1] / 8)),
                                  int(math.ceil(img_width_range[1] / 6))),
                                 (int(math.ceil(img_width_range[1] / 6)),
                                  int(math.ceil(img_width_range[1] / 4))),
                                 (int(math.ceil(img_width_range[1] / 4)),
                                  int(math.ceil(img_width_range[1] / 3))),
                                 (int(math.ceil(img_width_range[1] / 3)),
                                  int(math.ceil(img_width_range[1] / 2)))]

        self.bucket_min_width, self.bucket_max_width = img_width_range
        self.image_height = img_height
        self.valid_target_len = valid_target_len

        self.bucket_data = {
            i: BucketData()
            for i in range(self.bucket_max_width + 1)
        }