def __init__(self, mean, valid_target_len=float('inf'), img_width_range=(12, 320), word_len=30, channel=3): img_height = 32 # '[(16,32),(27,32),(35,32),(64,32),(80,32)]' self.bucket_specs = [(int(math.floor(64 / 4)), int(word_len + 2)), (int(math.floor(108 / 4)), int(word_len + 2)), (int(math.floor(140 / 4)), int(word_len + 2)), (int(math.floor(256 / 4)), int(word_len + 2)), (int(math.floor(img_width_range[1] / 4)), int(word_len + 2))] self.bucket_min_width, self.bucket_max_width = img_width_range #(12,320) self.image_height = img_height self.valid_target_len = valid_target_len self.mean = mean self.channel = channel assert len(self.mean) == self.channel #320个bucket,每個放相應大小爲width圖像 self.bucket_data = { i: BucketData() for i in range(self.bucket_max_width + 1) }
def __init__(self, data_root, annotation_fn, evaluate=False, valid_target_len=float('inf'), img_width_range=(100, 800), word_len=60): """ :param data_root: :param annotation_fn: :param lexicon_fn: :param img_width_range: only needed for training set :return: """ print("DATA GEN") img_height = 32 self.data_root = data_root if os.path.exists(annotation_fn): self.annotation_path = annotation_fn else: self.annotation_path = os.path.join(data_root, annotation_fn) # fixed width and len of text. It have to be changed for long handwritten line '''if evaluate: self.bucket_specs = [(int(math.floor(64 / 4)), int(word_len + 2)), (int(math.floor(108 / 4)), int(word_len + 2)), (int(math.floor(140 / 4)), int(word_len + 2)), (int(math.floor(256 / 4)), int(word_len + 2)), (int(math.floor(img_width_range[1] / 4)), int(word_len + 2))] else: self.bucket_specs = [(int(64 / 4), 9 + 2), (int(108 / 4), 15 + 2), (int(140 / 4), 17 + 2), (int(256 / 4), 20 + 2), (int(math.ceil(img_width_range[1] / 4)), word_len + 2)]''' self.bucket_specs = [(int(100 / 4), int(word_len / 4) + 2), (int(200 / 4), int(word_len / 4) + 2), (int(300 / 4), int(word_len / 2) + 2), (int(400 / 4), int(word_len / 2) + 2), (int(500 / 4), word_len + 2), (int(600 / 4), word_len + 2), (int(700 / 4), word_len + 2), (int(800 / 4), word_len + 2)] self.max_len = word_len + 2 self.bucket_min_width, self.bucket_max_width = img_width_range self.image_height = img_height self.valid_target_len = valid_target_len self.bucket_data = { i: BucketData() for i in range(self.bucket_max_width + 1) } self.char2index = [] with open(os.path.join(self.data_root, 'sample.txt'), "r") as ins: for line in ins: self.char2index.append(line.strip()) self.char2index.append(' ') print(self.char2index)
def __init__( self, data_root, annotation_fn, evaluate=False, valid_target_len=float('inf'), img_width_range=(83, 2083), # iamdb train set word_len=81): # img_width_range = (354,1990), # sgdb # word_len = 74): # img_width_range = (135,2358), # rimes # word_len = 100): """ :param data_root: :param annotation_fn: :param lexicon_fn: :param img_width_range: only needed for training set :return: """ img_height = 32 self.data_root = data_root if os.path.exists(annotation_fn): self.annotation_path = annotation_fn else: self.annotation_path = os.path.join(data_root, annotation_fn) if evaluate: self.bucket_specs = [(int(math.floor(64 / 4)), int(word_len + 2)), (int(math.floor(108 / 4)), int(word_len + 2)), (int(math.floor(140 / 4)), int(word_len + 2)), (int(math.floor(256 / 4)), int(word_len + 2)), (int(math.floor(img_width_range[1] / 4)), int(word_len + 2))] else: self.bucket_specs = [ (int(64 / 4), 9 + 2), (int(108 / 4), 15 + 2), (int(140 / 4), 17 + 2), (int(256 / 4), 20 + 2), (int(math.ceil(img_width_range[1] / 4)), word_len + 2) ] self.bucket_min_width, self.bucket_max_width = img_width_range self.image_height = img_height self.valid_target_len = valid_target_len self.bucket_data = { i: BucketData() for i in range(self.bucket_max_width + 1) }
def __init__( self, data_root, annotation_fn, evaluate=False, valid_target_len=float('inf'), img_width_range=(170, 1016), # iam training width range word_len=81): # iam training max """ :param data_root: :param annotation_fn: :param lexicon_fn: :param img_width_range: only needed for training set :return: """ img_height = 32 self.data_root = data_root if os.path.exists(annotation_fn): self.annotation_path = annotation_fn else: self.annotation_path = os.path.join(data_root, annotation_fn) if evaluate: self.bucket_specs = [ (int(math.floor(img_width_range[0])), int(95 + 2)), # iam test max (int(math.ceil(img_width_range[1] / 5)), int(95 + 2)), (int(math.ceil(img_width_range[1] / 4)), int(95 + 2)), (int(math.ceil(img_width_range[1] / 3)), int(95 + 2)), (int(math.ceil(img_width_range[1] / 2)), int(95 + 2)) ] else: self.bucket_specs = [ (int(math.floor(img_width_range[0])), int(word_len + 2)), (int(math.ceil(img_width_range[1] / 5)), int(word_len + 2)), (int(math.ceil(img_width_range[1] / 4)), int(word_len + 2)), (int(math.ceil(img_width_range[1] / 3)), int(word_len + 2)), (int(math.ceil(img_width_range[1] / 2)), int(word_len + 2)) ] self.bucket_min_width, self.bucket_max_width = img_width_range self.image_height = img_height self.valid_target_len = valid_target_len self.bucket_data = { i: BucketData() for i in range(self.bucket_max_width + 1) }
def __init__(self, data_root, annotation_fn, train_sample_size=None, evaluate = False, valid_target_len = float('inf'), img_width_range = (12, 320), word_len = 60): """ :param data_root: :param annotation_fn: :param lexicon_fn: :param img_width_range: only needed for training set :return: """ img_height = 32 self.data_root = data_root if os.path.exists(annotation_fn): self.annotation_path = annotation_fn else: self.annotation_path = os.path.join(data_root, annotation_fn) if evaluate: self.bucket_specs = [(int(math.floor(64 / 4)), int(word_len + 2)), (int(math.floor(108 / 4)), int(word_len + 2)), (int(math.floor(140 / 4)), int(word_len + 2)), (int(math.floor(256 / 4)), int(word_len + 2)), (int(math.floor(img_width_range[1] / 4)), int(word_len + 2)), (int(math.ceil(img_width_range[1] / 2)), word_len + 2)] else: self.bucket_specs = [(int(64 / 4), 9 + 2), (int(108 / 4), 15 + 2), (int(140 / 4), 17 + 2), (int(256 / 4), 20 + 2), (int(math.ceil(img_width_range[1] / 4)), word_len + 2), (int(math.ceil(img_width_range[1] / 2)), word_len + 2)] self.bucket_min_width, self.bucket_max_width = img_width_range self.image_height = img_height self.valid_target_len = valid_target_len self.train_sample_size = train_sample_size self.bucket_data = {i: BucketData() for i in range(self.bucket_max_width + 1)} self.OnLineDataGen = DataGenTextOnLine()
def __init__(self, data_root, annotation_fn, evaluate = False, valid_target_len = float('inf'), img_width_range = (39,936), word_len = 50): """ :param data_root: :param annotation_fn: :param lexicon_fn: :param img_width_range: only needed for training set :return: """ img_height = 48 self.data_root = data_root if os.path.exists(annotation_fn): self.annotation_path = annotation_fn # abs path else: self.annotation_path = os.path.join(data_root, annotation_fn) # relative path if evaluate: self.bucket_specs = [(int(math.floor(64 / 4)), int(word_len + 2)), (int(math.floor(128 / 4)), int(word_len + 2)), (int(math.floor(256 / 4)), int(word_len + 2)), (int(math.floor(512 / 4)), int(word_len + 2)), (int(math.floor(img_width_range[1] / 4)), int(word_len + 2))] else: self.bucket_specs = [(int(64 / 4), 9 + 2), (int(128 / 4), 15 + 2), (int(256 / 4), 17 + 2), (int(512 / 4), 25 + 2), (int(math.ceil(img_width_range[1] / 4)), word_len + 2)] self.bucket_min_width, self.bucket_max_width = img_width_range self.image_height = img_height self.valid_target_len = valid_target_len self.bucket_data = {i: BucketData() for i in range(self.bucket_max_width + 1)}
def clear(self): self.bucket_data = {i: BucketData() for i in range(self.bucket_max_width + 1)}
def __init__(self): img_width_range = cfg.img_width_range word_len = cfg.word_len self.batch_size = cfg.batch_size self.visualize = cfg.visualize gpu_device_id = '/gpu:' + str(cfg.gpu_id) if cfg.gpu_id == -1: gpu_device_id = '/cpu:0' print("Using CPU model!") with tf.device(gpu_device_id): self.img_data = tf.placeholder(tf.float32, shape=(None, 1, 32, None), name='img_data') self.zero_paddings = tf.placeholder(tf.float32, shape=(None, None, 512), name='zero_paddings') self.bucket_specs = [(int(math.floor(64 / 4)), int(word_len + 2)), (int(math.floor(108 / 4)), int(word_len + 2)), (int(math.floor(140 / 4)), int(word_len + 2)), (int(math.floor(256 / 4)), int(word_len + 2)), (int(math.floor(img_width_range[1] / 4)), int(word_len + 2))] buckets = self.buckets = self.bucket_specs self.decoder_inputs = [] self.encoder_masks = [] self.target_weights = [] with tf.device(gpu_device_id): for i in xrange(int(buckets[-1][0] + 1)): self.encoder_masks.append( tf.placeholder(tf.float32, shape=[None, 1], name="encoder_mask{0}".format(i))) for i in xrange(buckets[-1][1] + 1): self.decoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i))) self.target_weights.append( tf.placeholder(tf.float32, shape=[None], name="weight{0}".format(i))) self.bucket_min_width, self.bucket_max_width = img_width_range self.image_height = cfg.img_height self.valid_target_len = cfg.valid_target_len self.forward_only = True self.bucket_data = { i: BucketData() for i in range(self.bucket_max_width + 1) } with tf.device(gpu_device_id): cnn_model = CNN(self.img_data, True) #(not self.forward_only)) self.conv_output = cnn_model.tf_output() self.concat_conv_output = tf.concat( axis=1, values=[self.conv_output, self.zero_paddings]) self.perm_conv_output = tf.transpose(self.concat_conv_output, perm=[1, 0, 2]) with tf.device(gpu_device_id): self.attention_decoder_model = Seq2SeqModel( encoder_masks=self.encoder_masks, encoder_inputs_tensor=self.perm_conv_output, decoder_inputs=self.decoder_inputs, target_weights=self.target_weights, target_vocab_size=cfg.target_vocab_size, buckets=self.buckets, target_embedding_size=cfg.target_embedding_size, attn_num_layers=cfg.attn_num_layers, attn_num_hidden=cfg.attn_num_hidden, forward_only=self.forward_only, use_gru=cfg.use_gru) #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0) self.sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) self.saver_all = tf.train.Saver(tf.global_variables()) self.saver_all.restore(self.sess, cfg.ocr_model_path)
def __init__( self, data_root, annotation_fn, evaluate=False, valid_target_len=float('inf'), img_width_range=(83, 2083), # iamdb train set word_len=81): # img_width_range = (354,1990), # sgdb # word_len = 74): # img_width_range = (306,911), # pardb # word_len = 70): # img_width_range = (135,2358), # rimes # word_len = 100): # img_width_range = (175,1801), # gwdb # word_len = 87): """ :param data_root: :param annotation_fn: :param lexicon_fn: :param img_width_range: only needed for training set :return: """ img_height = 64 self.data_root = data_root if os.path.exists(annotation_fn): self.annotation_path = annotation_fn else: self.annotation_path = os.path.join(data_root, annotation_fn) if evaluate: self.bucket_specs = [(int(math.ceil(img_width_range[0])), int(math.ceil(img_width_range[1] / 8))), (int(math.ceil(img_width_range[1] / 8)), int(math.ceil(img_width_range[1] / 6))), (int(math.ceil(img_width_range[1] / 6)), int(math.ceil(img_width_range[1] / 4))), (int(math.ceil(img_width_range[1] / 4)), int(math.ceil(img_width_range[1] / 3))), (int(math.ceil(img_width_range[1] / 3)), int(math.ceil(img_width_range[1] / 2)))] else: self.bucket_specs = [(int(math.ceil(img_width_range[0])), int(math.ceil(img_width_range[1] / 8))), (int(math.ceil(img_width_range[1] / 8)), int(math.ceil(img_width_range[1] / 6))), (int(math.ceil(img_width_range[1] / 6)), int(math.ceil(img_width_range[1] / 4))), (int(math.ceil(img_width_range[1] / 4)), int(math.ceil(img_width_range[1] / 3))), (int(math.ceil(img_width_range[1] / 3)), int(math.ceil(img_width_range[1] / 2)))] self.bucket_min_width, self.bucket_max_width = img_width_range self.image_height = img_height self.valid_target_len = valid_target_len self.bucket_data = { i: BucketData() for i in range(self.bucket_max_width + 1) }