def __load_data(self): """ Load all the images in the folder """ print('Loading data') examples = [] count = 0 skipped = 0 for f in os.listdir(self.examples_path): if len(f.split('_')[0]) > self.max_char_count: continue arr, initial_len = resize_image( os.path.join(self.examples_path, f), self.max_image_width ) examples.append( ( arr, f.split('_')[0], label_to_array(f.split('_')[0]) ) ) count += 1 return examples, len(examples)
def load_data(self): """ Load all the images in the folder return: List with tuples (img_arr, label_string, label_index_array) and list length """ # TODO:: Change this for different format of data. print("Loading data") examples = [] count = 0 skipped = 0 for f in os.listdir(self.examples_path): if len(f.split("_")[0]) > self.max_char_count: continue arr, initial_len = resize_image( np.array(Image.open(os.path.join( self.examples_path, f), mode="r")), self.max_image_width, ) examples.append( ( arr, f.split("_")[0], label_to_array(f.split("_")[0], self.char_vector), ) ) count += 1 if len(examples) < self.batch_size: raise "Error: Data less than batch size" return examples, len(examples)
def __load_data(self): """ Load all the images in the folder """ print('Loading data...') examples = [] count = 0 skipped = 0 files = os.listdir(self.examples_path) for i in range(10): random.shuffle(files) for f in files: if len(f.split('_')[0]) > self.max_char_count: continue arr, initial_len = read_image( os.path.join(self.examples_path, f) ) examples.append( ( arr, f.split('_')[0], label_to_array(f.split('_')[0], self.char_vector) ) ) #print(f.split('_')[0], label_to_array(f.split('_')[0], self.char_vector)) count += 1 print("Loaded!") return examples, len(examples)
def __load_data(self): """ Load all the images in the folder """ print('Loading data') examples = [] count = 0 skipped = 0 for i, f in enumerate(os.listdir(self.examples_path)): if i > 100000: break if len(f.split('_')[0]) > self.max_char_count: continue arr, initial_len = resize_image( os.path.join(self.examples_path, f), self.max_image_width ) examples.append( ( arr, f.split('_')[0].lower(), label_to_array(f.split('_')[0].lower()), label_to_array_2(f.split('_')[0].lower()) ) ) count += 1 print(count) return examples, len(examples)
def batch_generator(self, queue): """Takes a queue and enqueue batches in it """ generator = GeneratorFromDict(language=self.language) while True: batch = [] while len(batch) < self.batch_size: img, lbl = generator.next() batch.append(( resize_image(np.array(img.convert("L")), self.max_image_width)[0], lbl, label_to_array(lbl, self.char_vector), )) raw_batch_x, raw_batch_y, raw_batch_la = zip(*batch) batch_y = np.reshape(np.array(raw_batch_y), (-1)) batch_dt = sparse_tuple_from( np.reshape(np.array(raw_batch_la), (-1))) raw_batch_x = np.swapaxes(raw_batch_x, 1, 2) batch_x = np.reshape( np.array(raw_batch_x), (len(raw_batch_x), self.max_image_width, 32, 1)) if queue.qsize() < 20: queue.put((batch_y, batch_dt, batch_x)) else: pass
def load_data(self): """Load all the images in the folder """ print("Loading data") examples = [] count = 0 skipped = 0 for f in os.listdir(self.examples_path): if len(f.split("_")[0]) > self.max_char_count: continue arr, initial_len = resize_image( imread(os.path.join(self.examples_path, f), mode="L"), self.max_image_width, ) examples.append(( arr, f.split("_")[0], label_to_array(f.split("_")[0], self.char_vector), )) count += 1 return examples, len(examples)
def __load_data(self): """ load all the images in the folder :return: """ print("Loading data") examples = [] count = 0 skipped = 0 for f in os.listdir(self.examples_path): # 字符数超过最大长度 if len(f.split("_")[0]) > self.max_char_count: continue arr, initial_len = resize_image(os.path.join(self.examples_path, f), self.max_image_width) examples.append( ( arr, f.split("_")[0], label_to_array(f.split("_")[0]) ) ) imsave("blah.png", arr) # ??? count += 1 return examples, len(examples)
def __iter__(self): examples = [] for f in os.listdir(self.examples_path): label, _ = f.split('_') if len(f.split('_')[0]) > self.max_char_count: continue arr, _ = resize_image(os.path.join(self.examples_path, f), self.max_image_width) # to lower label_lower = label.lower() examples.append((arr, label_lower, label_to_array(label_lower))) if len(examples) == self.batch_size: raw_batch_x, raw_batch_y, raw_batch_la = zip(*examples) batch_y = np.reshape(np.array(raw_batch_y), (-1)) batch_dt = sparse_tuple_from(np.array(raw_batch_la)) raw_batch_x = np.swapaxes(raw_batch_x, 1, 2) batch_x = np.reshape( np.array(raw_batch_x), (len(raw_batch_x), self.max_image_width, 32, 1)) yield (batch_y, batch_dt, batch_x) examples = []
def __load_train_data(self): """ load all train data """ print("loading train data") examples = [] filename = './data/train/*.tfrecords' files = tf.train.match_filenames_once(filename) filename_queue = tf.train.string_input_producer(files,shuffle=True,num_epochs=1) reader = tf.TFRecordReader() _,serialized_example = reader.read(filename_queue) features = tf.parse_single_example(serialized_example, features={ 'label': tf.FixedLenFeature([],tf.string), 'img_raw': tf.FixedLenFeature([],tf.string), 'row': tf.FixedLenFeature([],tf.int64), 'col': tf.FixedLenFeature([],tf.int64) }) image = tf.decode_raw(features['img_raw'],tf.uint8) img_label = features['label'] #tf中的字符串是以二进制存储的,用bytes.decode解码一下就好 row = tf.cast(features['row'],tf.int64) col = tf.cast(features['col'],tf.int64) with tf.Session() as sess: #函数内部可能定义了局部变量,还有自己定义的全局变量,在run之前一定要把所有变量初始化 init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) images = [] i = 0 try: while True: i = i + 1 image1,label1,height,width = sess.run([image,img_label,row,col]) label1 = bytes.decode(label1) #对二进制存储进行转换 image1 = np.reshape(np.array(image1),(height,width)) arr, initial_len = resize_train_image(image1,self.max_image_width) strs = label1.split('_') label_without_ = ''.join(strs[i] for i in range(len(strs))) #print(label_without_) #保存到tensorboard里观察 examples.append( ( arr, label_without_, label_to_array(label_without_) ) ) except tf.errors.OutOfRangeError: print("done!") print(i) coord.request_stop() coord.join(threads) return examples,len(examples)
def __load_data(self): """ Load all the images in the folder """ print('Loading data') examples = [] count = 0 skipped = 0 for f in os.listdir(self.examples_path): if len(f.split('_')[0]) > self.max_char_count: continue arr, initial_len = resize_image( os.path.join(self.examples_path, f), self.max_image_width) examples.append( (arr, f.split('_')[0], label_to_array(f.split('_')[0]))) count += 1 return examples, len(examples)
def __generate_tfRecord_batch(self): """ load one batch tfRecord """ examples = [] images,labels,heights,widths = self.iterator.get_next() with tf.Session() as sess: image_batch,label_batch,height_batch,width_batch = sess.run([images,labels,heights,widths]) for i in range(self.batch_size): #不定长的tensorflow的batch太麻烦了,不如我按1取,然后自己batch image1 = image_batch[0] label1 = label_batch[0] height = height_batch[0] width = width_batch[0] #parse to data type label1 = bytes.decode(label1) #print(label1) image1 = np.reshape(np.array(image1),(height,width)) arr, initial_len = resize_train_image(image1,self.max_image_width) #保存到tensorboard里观察 #r,c = np.shape(arr) #new_img = tf.reshape(arr,(r,c)) # pic_num = pic_num + 1 #pics.append(new_img) examples.append( ( arr, label1, label_to_array(label1) ) ) #print(len(examples)) return examples
def train(self, iteration_count): with self.__session.as_default(): print('Training') for i in range(iteration_count): iter_loss = 0 for batch_y, batch_sl, batch_x in self.__data_manager.get_next_train_batch( ): data_targets = np.asarray([ label_to_array(lbl, config.CHAR_VECTOR) for lbl in batch_y ]) data_targets = sparse_tuple_from(data_targets) _, loss_value, decoded = self.__session.run( [self.__optimizer, self.__loss, self.__decoded], feed_dict={ self.__inputs: batch_x, self.__seq_len: batch_sl, self.__targets: data_targets }) iter_loss += loss_value print('[{}] Iteration loss: {}'.format(i, iter_loss)) return None
def train(self, iteration_count): with self.__session.as_default(): print('Training') for i in range(self.step, iteration_count + self.step): iter_loss = 0 for batch_y, batch_sl, batch_x in self.__data_manager.get_next_train_batch( ): data_targets = np.asarray([ label_to_array(lbl, config.CHAR_VECTOR) for lbl in batch_y ]) data_targets = sparse_tuple_from(data_targets) op, decoded, loss_value = self.__session.run( [self.__optimizer, self.__decoded, self.__cost], feed_dict={ self.__inputs: batch_x, self.__seq_len: [self.__max_char_count] * self.__data_manager.batch_size, self.__targets: data_targets }) if i % 10 == 0: for j in range(2): print(batch_y[j]) print(ground_truth_to_word(decoded[j])) iter_loss += loss_value self.__saver.save(self.__session, self.__save_path, global_step=self.step) print('[{}] Iteration loss: {}'.format(self.step, iter_loss)) self.step += 1 return None
def test(self): with self.__session.as_default(): print('Testing') total_error = 0 example_count = 0 for batch_y, batch_sl, batch_x in self.__data_manager.get_next_test_batch( ): data_targets = np.asarray([ label_to_array(lbl, config.CHAR_VECTOR) for lbl in batch_y ]) data_targets = sparse_tuple_from(data_targets) decoded = self.__session.run([self.__decoded], feed_dict={ self.__inputs: batch_x, self.__seq_len: batch_sl }) example_count += len(batch_y) total_error += np.sum( levenshtein(ground_truth_to_word(batch_y), ground_truth_to_word(decoded))) print('Error on test set: {}'.format(total_error, total_error / example_count)) return None
def __load_data(self): """ Load all the images in the folder """ print('Loading data') examples = [] count = 0 skipped = 0 # for f in os.listdir(self.examples_picture_path): # if len(f.split('_')[0]) > self.max_char_count: # continue # arr, initial_len = resize_image( # os.path.join(self.examples_path, f), # self.max_image_width # ) with open(self.examples_label_path, 'r') as f: # Address of target_label.txt for line in f.readlines(): address = line.split("__")[0] label = line.split("__")[1] if len(label) > self.max_char_count: continue if list(label)[0] == '#': continue img = cv2.imread(address, cv2.IMREAD_GRAYSCALE) arr, initial_len = resize_image(img, self.max_image_width) dictionary, _, dictionary_len = read_dictionary( self.dictionary_path) examples.append((arr, label, label_to_array(label, dictionary))) count += 1 dictionary_len = dictionary_len + 1 #! return examples, len(examples), dictionary_len
def __load_data(self): """ Load all the images in the folder """ print('Loading data from {}'.format(self.examples_path)) examples = [] count = 0 skipped = 0 for f in os.listdir(self.examples_path): if "(" in f: os.remove(os.path.join(self.examples_path, f)) continue if len(self.get_label(f)) > self.max_char_count: continue arr, initial_len = resize_image( os.path.join(self.examples_path, f), self.max_image_width) examples.append((arr, f, label_to_array(self.get_label(f)))) imsave('blah.png', arr) count += 1 shuffle(examples) return examples, len(examples)
def main(args): print('===========load dict===========') data_dict = loaddict() iteration_count = 1000 batch_size = 64 batch_image = 400000 log_save_dir = "..//model//" restore = True # The training data print('==============load data=============') imagefiles = [] with codecs.open("image_path.txt", 'r', encoding='utf-8') as file: line = file.readline() while line: imagefiles.append(line.strip()) line = file.readline() file.close() # data= load_data(data_dir) # print('data size:', len(data)) # perm=np.arange(len(data)) # np.random.shuffle(perm) # data=np.asarray(data) # train_data=data[perm] # test_data = data[int(len(data) * 0.10):] graph = tf.Graph() with graph.as_default(): inputs = tf.placeholder(tf.float32, [batch_size, 32, None, 3], name='inputs') # The CRNN crnn = CRNN(inputs) # Our target output targets = tf.sparse_placeholder(tf.int32, name='targets') # The length of the sequence seq_len = tf.placeholder(tf.int32, [None], name='seq_len') logits = tf.reshape(crnn, [-1, 512]) #(batchsize x 37) x 512 W = tf.Variable(tf.truncated_normal([512, config.NUM_CLASSES], stddev=0.1, dtype=tf.float32), name="W") b = tf.Variable(tf.constant(0., shape=[config.NUM_CLASSES], dtype=tf.float32), name="b") print(logits.get_shape()) logits = tf.matmul(logits, W) + b print(logits.get_shape()) logits = tf.reshape(logits, [batch_size, -1, config.NUM_CLASSES ]) # batch_size x 36 x NUM_CLASSES print(logits.get_shape()) # Final layer, the output of the BLSTM logits = tf.transpose(logits, (1, 0, 2)) #36 x batch_size x NUM_CLASSES global_step = tf.Variable(0, trainable=False) # Loss and cost calculation loss = tf.nn.ctc_loss(targets, logits, seq_len) cost = tf.reduce_mean(loss) # learning_rate = tf.train.exponential_decay(0.1, # global_step, # 5000, # 0.1, staircase=True) # Training step # optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9).minimize(cost) # optimizer = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(cost,global_step=global_step) optimizer = tf.train.AdadeltaOptimizer(learning_rate=0.001).minimize( loss=cost, global_step=global_step) # The decoded answer decoded, log_prob = tf.nn.ctc_beam_search_decoder(logits, seq_len) # The error rate seq_dis = tf.reduce_mean( tf.edit_distance(tf.cast(decoded[0], tf.int32), targets)) config_gpu = tf.ConfigProto() config_gpu.gpu_options.allow_growth = True with tf.Session(graph=graph, config=config_gpu) as sess: # tf.global_variables_initializer().run() saver = tf.train.Saver(tf.global_variables()) # Train if restore: print('=============load model============') ckpt = tf.train.get_checkpoint_state("../model/") if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("load success") else: print("no such file") return print('============begin training=============') for it in range(0, iteration_count): i = 0 for iter in range(1 + (len(imagefiles) // batch_image)): imagepath = imagefiles[iter * batch_image:(iter + 1) * batch_image] train_data = load_data_big(imagepath) for b in [ train_data[x * batch_size:x * batch_size + batch_size] for x in range(0, int(len(train_data) / batch_size)) ]: start_time = time.time() in_data, labels, data_seq_len = zip(*b) # print(data_seq_len) data_targets = np.asarray( [label_to_array(lbl, data_dict) for lbl in labels]) data_targets = sparse_tuple_from(data_targets) # print(np.shape(data_targets[0])) # print(np.shape(data_targets[1])) # print(np.shape(data_targets[2])) # print(data_targets[0]) # print(data_targets[1]) # print(data_targets[2]) data_shape = np.shape(in_data) in_data = np.reshape( in_data, (data_shape[0], data_shape[1], data_shape[2], 3)) costacc, _ = sess.run( [cost, optimizer], { inputs: in_data, targets: data_targets, seq_len: data_seq_len, }) i += 1 print('epoch:{}/1000,cost={},iter={},time={}'.format( it, costacc, i, time.time() - start_time)) del train_data gc.collect() print("complete 40W images") if (it % 1 == 0): checkpoint_path = os.path.join(log_save_dir, 'model.ckpt') saver.save(sess, checkpoint_path) # iter_avg_cost += (np.sum(cost_val) / batch_size) / (int(len(train_data) / batch_size)) print("complete one epoch")
def main(args): """ Usage: train.py [iteration_count] [batch_size] [data_dir] [log_save_dir] [graph_save_dir] """ # The user-defined training parameters iteration_count = int(args[1]) batch_size = int(args[2]) data_dir = args[3] log_save_dir = args[4] graph_save_dir = args[5] # The training data data = load_data(data_dir) train_data = data[0:int(len(data) * 0.70)] test_data = data[int(len(data) * 0.70):] graph = tf.Graph() with graph.as_default(): inputs = tf.placeholder(tf.float32, [batch_size, 32, None, 1]) # The CRNN crnn = CRNN(inputs) # Our target output targets = tf.sparse_placeholder(tf.int32, name='targets') # The length of the sequence seq_len = tf.placeholder(tf.int32, [None], name='seq_len') logits = tf.reshape(crnn, [-1, 512]) W = tf.Variable(tf.truncated_normal([512, config.NUM_CLASSES], stddev=0.1), name="W") b = tf.Variable(tf.constant(0., shape=[config.NUM_CLASSES]), name="b") print(logits.get_shape()) logits = tf.matmul(logits, W) + b print(logits.get_shape()) logits = tf.reshape(logits, [batch_size, -1, config.NUM_CLASSES]) print(logits.get_shape()) # Final layer, the output of the BLSTM logits = tf.transpose(logits, (1, 0, 2)) # Loss and cost calculation loss = tf.nn.ctc_loss(targets, logits, seq_len) cost = tf.reduce_mean(loss) # Training step optimizer = tf.train.MomentumOptimizer(0.01, 0.9).minimize(cost) # The decoded answer decoded, log_prob = tf.nn.ctc_beam_search_decoder(logits, seq_len) # The error rate acc = tf.reduce_mean( tf.edit_distance(tf.cast(decoded[0], tf.int32), targets)) with tf.Session(graph=graph) as sess: tf.global_variables_initializer().run() # Train for it in range(0, iteration_count): iter_avg_cost = 0 start = time.time() for b in [ train_data[x * batch_size:x * batch_size + batch_size] for x in range(0, int(len(train_data) / batch_size)) ]: in_data, labels, data_seq_len = zip(*b) print(data_seq_len) data_targets = np.asarray([ label_to_array(lbl, config.CHAR_VECTOR) for lbl in labels ]) data_targets = sparse_tuple_from(data_targets) print(np.shape(data_targets[0])) print(np.shape(data_targets[1])) print(np.shape(data_targets[2])) print(data_targets[0]) print(data_targets[1]) print(data_targets[2]) data_shape = np.shape(in_data) in_data = np.reshape( in_data, (data_shape[0], data_shape[1], data_shape[2], 1)) decoded_val, cost_val = sess.run( [decoded, cost], { inputs: in_data, targets: data_targets, seq_len: data_seq_len, }) iter_avg_cost += (np.sum(cost_val) / batch_size) / (int( len(train_data) / batch_size)) print('[{}] {} : {}'.format(time.time() - start, it, iter_avg_cost))