def __init__(self): # create models and load weights self.det_info = { 'input_shape': (512, 1024, 3), 'channel_mean': np.array([123, 117, 104]), 'weight_path': '/home/eugene/storystream/ai-p-fonts/log/seglink/seglink_epoch-29_loss-0.91_val_loss-0.36.h5' } self.reg_info = { 'input_shape': (32, 480, 1), 'weight_path': '/home/eugene/storystream/ai-p-fonts/log/crnn/gru/CRNN_epoch-29_loss-0.00_val_loss-0.00.h5' } self.det = SegLink(input_shape=self.det_info['input_shape']) self.reg = CRNN(input_shape=self.reg_info['input_shape']) self.det.create_model(self.det_info['weight_path']) self.reg.create_model(self.reg_info['weight_path'], gru=True, train=False) self.lexicons = [ 'gt2', 'gt2rs', 'targa', 'turbo', '91150', 'carrera 4', 'carrera 4 gts', 'carrera 4s', 'carrera gts', 'carrera s', 'targa 4', 'targa 4 gts', 'targa 4s', 'turbo s', '911 turbo', '911 turbo s', '911 carrera 4', '911 carrera 4 gts', '911 carrera 4s', '911 carrera gts', '911 carrera s', '911 targa 4', '911 targa 4 gts', '911 targa 4s' ]
def __init__(self, batch_size=None): net_params, train_params = parser_cfg_file('./net.cfg') self._model_save_path = str(train_params['model_save_path']) self.input_img_height = int(net_params['input_height']) self.input_img_width = int(net_params['input_width']) if batch_size is None: self.test_batch_size = int(net_params['test_batch_size']) else: self.test_batch_size = batch_size # 加载label onehot f = open('./data/word_onehot.txt', 'r') data = f.read() words_onehot_dict = eval(data) self.words_list = list(words_onehot_dict.keys()) self.words_onehot_list = [words_onehot_dict[self.words_list[i]] for i in range(len(self.words_list))] # 构建网络 self.inputs_tensor = tf.placeholder(tf.float32, [self.test_batch_size, self.input_img_height, self.input_img_width, 1]) self.seq_len_tensor = tf.placeholder(tf.int32, [None], name='seq_len') crnn_net = CRNN(net_params, self.inputs_tensor, self.seq_len_tensor, self.test_batch_size, True) net_output, decoded, self.max_char_count = crnn_net.construct_graph() self.dense_decoded = tf.sparse_tensor_to_dense(decoded[0], default_value=-1) self.sess = tf.Session() saver = tf.train.Saver() saver.restore(self.sess, "./model/ckpt")
def main(): """ Entry point when using CRNN from the commandline """ args = parse_arguments() crnn = None if crnn is None: crnn = CRNN( args.iteration_count, args.batch_size, args.model_path, args.examples_path, args.max_image_width, 0, #train/test ratio here train rate is 0 args.restore, 1) predict_result = crnn.test() f = open(args.output_path, 'w') for str in predict_result: str1 = str.split(':')[0] str2 = str.split(':')[1] str2 = str2.strip('_') f.writelines(str1 + ':' + str2) f.close()
def load_model(abc, seq_proj=[0, 0], backend='resnet18', snapshot=None, cuda=False): net = CRNN(abc=abc, seq_proj=seq_proj, backend=backend) net = nn.DataParallel(net) if snapshot is not None: load_weights(net, torch.load(snapshot)) if cuda: net = net.cuda() return net
def load_model_from_checkpoint(checkpoint_file_name, use_gpu=False): """Load a pretrained CRNN model.""" model = CRNN(line_size, 1, len(vocab), 256) checkpoint = torch.load(checkpoint_file_name, map_location='cpu' if not use_gpu else None) model.load_state_dict(checkpoint['state_dict']) model.float() model.eval() model = model.cuda() if use_gpu else model.cpu() return model
def main(): """ Entry point when using CRNN from the commandline """ args = parse_arguments() if not args.train and not args.test: print("If we are not training, and not testing, what is the point?") crnn = None if args.train: crnn = CRNN(args.iteration_count, args.batch_size, args.model_path, args.examples_path, args.max_image_width, args.train_test_ratio, args.restore, 0) crnn.train(args.iteration_count) if args.test: if crnn is None: crnn = CRNN(args.iteration_count, args.batch_size, args.model_path, args.examples_path, args.max_image_width, 0, args.restore, 1) crnn.test()
def main(): args = parse_arguments() if not args.train and not args.test: print("If we are not training,and not testing,what is the point?") crnn = None if args.train: crnn = CRNN( args.batch_size, args.model_path, args.example_path, args.max_image_width, args.train_test_ratio, args.restore ) crnn.train(args.iteration_count) if args.test: if crnn is None: crnn = CRNN( args.batch_size, args.model_path, args.examples_path, args.max_image_width, 0, args.restore ) crnn.test()
def main(): """ Entry point when using CRNN from the commandline """ args = parse_arguments() if not args.train and not args.test: print("If we are not training, and not testing, what is the point?") crnn = None charset = "" if os.path.isfile(args.char_set_string): # if charset is file read from file. with open(args.char_set_string, "r") as f: while True: c = f.readline() charset += c.strip("\n") if not c: charset += "\n" # Add line break to charset at the end break else: charset = args.char_set_string if args.train: crnn = CRNN( args.batch_size, args.model_path, args.examples_path, args.max_image_width, args.train_test_ratio, args.restore, charset, args.use_trdg, args.language, args.learning_rate ) crnn.train(args.iteration_count) if args.test: if crnn is None: crnn = CRNN( args.batch_size, args.model_path, args.examples_path, args.max_image_width, 0, args.restore, charset, args.use_trdg, args.language, args.learning_rate ) crnn.test()
def eval(path="checkpoint3.pt"): net = CRNN(nclass=100).double() optimizer = optim.Adam(net.parameters()) checkpoint = torch.load(path) net.load_state_dict(checkpoint["model_state_dict"]) optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) epoch = checkpoint["epoch"] loss = checkpoint["loss"] print(f"model current epoch: {epoch} with loss: {loss}") net.eval() while 1: data = next(dataset) images = data["the_inputs"] labels = data["the_labels"] input_length = data["input_length"] label_length = data["label_length"] preds = net(images).detach() pred_texts, probs = decode_batch2(preds, string.printable) for i in range(len(pred_texts)): print(pred_texts[i], probs[i]) print(images[i].size())
def __init__(self, model_path): alphabet_unicode = config.alphabet_v2 self.alphabet = ''.join([chr(uni) for uni in alphabet_unicode]) # print(len(self.alphabet)) self.nclass = len(self.alphabet) + 1 self.model = CRNN(config.imgH, 1, self.nclass, 256) self.cuda = False if torch.cuda.is_available(): self.cuda = True self.model.cuda() self.model.load_state_dict({ k.replace('module.', ''): v for k, v in torch.load(model_path).items() }) else: # self.model = nn.DataParallel(self.model) self.model.load_state_dict( torch.load(model_path, map_location='cpu')) self.model.eval() self.converter = strLabelConverter(self.alphabet)
def ocr(orig_img, lines, checkpoint_file_name, use_gpu=False): """OCR on segmented lines.""" model = CRNN(line_size, 1, len(vocab), 256) checkpoint = torch.load(checkpoint_file_name, map_location='cpu' if not use_gpu else None) model.load_state_dict(checkpoint['state_dict']) model.float() model.eval() model = model.cuda() if use_gpu else model.cpu() torch.set_grad_enabled(False) result = [] for line in lines: (x1, y1), (x2, y2) = line line_img = image_resize(np.array(np.rot90(orig_img[y1:y2, x1:x2])), height=line_size) inputs = torch.from_numpy(line_img / 255).float().unsqueeze(0).unsqueeze(0) outputs = model(inputs) prediction = outputs.softmax(2).max(2)[1] def to_text(tensor, max_length=None, remove_repetitions=False): sentence = '' sequence = tensor.cpu().detach().numpy() for i in range(len(sequence)): if max_length is not None and i >= max_length: continue char = idx2char[sequence[i]] if char != 'B': # ignore blank if remove_repetitions and i != 0 and char == idx2char[ sequence[i - 1]]: pass else: sentence = sentence + char return sentence predicted_text = to_text(prediction[:, 0], remove_repetitions=True) result.append((line_img, predicted_text)) return result
def __init__(self, pre_train=False): net_params, train_params = parser_cfg_file('./net.cfg') self.input_height = int(net_params['input_height']) self.input_width = int(net_params['input_width']) self.batch_size = int(train_params['batch_size']) self._learning_rate = float(train_params['learning_rate']) self._max_iterators = int(train_params['max_iterators']) self._train_logger_init() self._pre_train = pre_train self._model_save_path = str(train_params['model_save_path']) if self._pre_train: ckpt = tf.train.checkpoint_exists(self._model_save_path) if ckpt: print('Checkpoint is valid...') f = open('./model/train_step.txt', 'r') step = f.readline() self._start_step = int(step) f.close() else: assert 0, print('Checkpoint is invalid...') else: self._start_step = 0 self._inputs = tf.placeholder( tf.float32, [self.batch_size, 32, self.input_width, 1]) # label self._label = tf.sparse_placeholder(tf.int32, name='label') # The length of the sequence [32] * 64 self._seq_len = tf.placeholder(tf.int32, [None], name='seq_len') crnn_net = CRNN(net_params, self._inputs, self._seq_len, self.batch_size, True) self._net_output, self._decoded, self._max_char_count = crnn_net.construct_graph( ) self.dense_decoded = tf.sparse_tensor_to_dense(self._decoded[0], default_value=-1)
def recognition2(examples_path, output_path): """ Entry point when using CRNN from the commandline """ crnn = None if crnn is None: crnn = CRNN( 10, 1, "./save/", examples_path, 230, 0, #train/test ratio here train rate is 0 True, 1) predict_result = crnn.test() f = open(output_path, 'w') for str in predict_result: str1 = str.split(':')[0] str2 = str.split(':')[1] str2 = str2.strip('_') f.writelines(str1 + ':' + str2) f.close()
class PytorchOcr(): def __init__(self, model_path): alphabet_unicode = config.alphabet_v2 self.alphabet = ''.join([chr(uni) for uni in alphabet_unicode]) # print(len(self.alphabet)) self.nclass = len(self.alphabet) + 1 self.model = CRNN(config.imgH, 1, self.nclass, 256) self.cuda = False if torch.cuda.is_available(): self.cuda = True self.model.cuda() self.model.load_state_dict({ k.replace('module.', ''): v for k, v in torch.load(model_path).items() }) else: # self.model = nn.DataParallel(self.model) self.model.load_state_dict( torch.load(model_path, map_location='cpu')) self.model.eval() self.converter = strLabelConverter(self.alphabet) def recognize(self, img): h, w = img.shape[:2] if len(img.shape) == 3: img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) image = Image.fromarray(img) transformer = resizeNormalize((int(w / h * 32), 32)) image = transformer(image) image = image.view(1, *image.size()) image = Variable(image) if self.cuda: image = image.cuda() preds = self.model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) txt = self.converter.decode(preds.data, preds_size.data, raw=False) return txt
def main(epoch_num, lr=0.1, training=True, fix_width=True): """ Main Args: training (bool, optional): If True, train the model, otherwise test it (default: True) fix_width (bool, optional): Scale images to fixed size (default: True) """ model_path = ('fix_width_' if fix_width else '') + 'crnn.pth' letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' root = 'data/IIIT5K/' if training: net = CRNN(1, len(letters) + 1) start_epoch = 0 # if there is pre-trained model, load it if os.path.exists(model_path): print('Pre-trained model detected.\nLoading model...') net.load_state_dict(torch.load(model_path)) if torch.cuda.is_available(): print('GPU detected.') net = train(root, start_epoch, epoch_num, letters, net=net, lr=lr, fix_width=fix_width) # save the trained model for training again torch.save(net.state_dict(), model_path) # test test(root, net, letters, fix_width=fix_width) else: net = CRNN(1, len(letters) + 1) if os.path.exists(model_path): net.load_state_dict(torch.load(model_path)) test(root, net, letters, fix_width=fix_width)
def main(): """ Main Function. """ print(__doc__) # optical flow parameters opt_params = { 'pyr_scale': 0.5, 'levels': 3, 'winsize': 15, 'iterations': 3, 'poly_n': 5, 'poly_sigma': 1.2 } # create optical flow object opt = OpticalFlow(**opt_params) # video dataset parameters labels_path = '../labels_gary.txt' width = 100 height = 100 processor = opt # create video data object vids = VideoDataset(labels_path) vids.set_video_params(width, height, processor) # read video paths and labels X, y = vids.read_data() # partition dataset X_tr, y_tr, X_te, y_te = vids.partition_data(X, y, ratio=0.8) X_tr = X_tr[:10].copy() y_tr = y_tr[:10].copy() X_te = X_te[:10].copy() y_te = y_te[:10].copy() print(X_tr) print(y_tr) # create CRNN model crnn = CRNN() print(crnn) # train model tr = Trainer(crnn, vids) tr.train(X_tr, y_tr, X_te, y_te, epochs=2, batch_size=10)
def __init__(self, config=TextRecognitionModelConfig()): super().__init__() self.config = config self.cnn = ResNet() if self.config.with_STN: config_stn = TransformationConfig() # config_stn = TransformationConfig(self.cnn) # config_stn.outputsize = 256*2*16 self.stn = Transformation(config_stn) self.encoder = CRNN(self.cnn) self.decoder = DecoderWithAttention( num_classes=config.num_classes, in_planes=self.encoder.out_planes, sDim=config.decoder_s_dim, attDim=config.attention_dim, max_len_labels=config.max_len_labels, use_bidecoder=config.use_bidecoder, device=config.device, ) for name, param in self.named_parameters(): if 'fc2' in name: print(f'Skip {name} as it is already initialized') continue if 'bias' in name: nn.init.constant_(param, 0.0) if 'weight' in name: if len(param.shape) >= 2: nn.init.kaiming_normal_(param) else: param.data.fill_(1)
def infer(files, save_static_path=None): result_list = [] place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() print('train with {}'.format(place)) with fluid.dygraph.guard(place): params, _ = fluid.load_dygraph('{}/crnn_best'.format('output/baidu_model'))#train_parameters['save_model_dir'])) # crnn = CRNN(train_parameters["class_dim"] + 1, 1) crnn = CRNN(3828, 1) crnn.load_dict(params) crnn.eval() for file in tqdm(files): img = precess_img(file) img = fluid.dygraph.to_variable(img).astype('float32') if save_static_path is not None: out_dygraph, static_layer = TracedLayer.trace(crnn, inputs=[img]) # 将转换后的模型保存 static_layer.save_inference_model(save_static_path, feed=[0], fetch=[0]) pred = crnn(img) output = utils.greedy_decode(pred.numpy(), blank=train_parameters["class_dim"]) p_s = "".join([train_parameters['r_label_dict'][c] for c in output[0]]) result_list.append('{0}\t{1}'.format(os.path.basename(file), p_s)) break return result_list
def main(): """ Entry point when using CRNN from the commandline """ args = parse_arguments() if not args.train and not args.test: print("If we are not training, and not testing, what is the point?") crnn = None if args.train: crnn = CRNN( args.batch_size, args.model_path, args.examples_path, args.max_image_width, args.train_test_ratio, args.restore ) crnn.train(args.iteration_count) if args.test: if crnn is None: crnn = CRNN( args.batch_size, args.model_path, args.examples_path, args.max_image_width, 0, args.restore ) crnn.test()
size=(384, 48), max_length=None) dloader_test384 = torch.utils.data.DataLoader(dset_test384, shuffle=False, batch_size=opt.test_batchsize, num_workers=int(opt.workers)) character_str = open(opt.char_dir, 'r').read() print('character ', character_str[13]) net_t_list = [] net_t_list.append( CRNN(48, 1, len(character_str) - 1, 256, opt.nrnn, 0.5, opt.variational_dropout, leakyRelu=True)) net_t_list.append( CRNN(48, 1, len(character_str) - 1, 256, opt.nrnn, 0.5, opt.variational_dropout, RRelu=True)) net_t_list.append( CRNN(48, 1,
def train(): with tf.device('/cpu:0'): # x_text, pos1, pos2, y = data_helpers.load_data_and_labels(FLAGS.train_dir) x_text, y = data_helpers.load_data_and_labels(FLAGS.train_dir) # Build vocabulary # Example: x_text[3] = "A misty <e1>ridge</e1> uprises from the <e2>surge</e2>." # ['a misty ridge uprises from the surge <UNK> <UNK> ... <UNK>'] # => # [27 39 40 41 42 1 43 0 0 ... 0] # dimension = FLAGS.max_sentence_length text_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(FLAGS.max_sentence_length) text_vec = np.array(list(text_vocab_processor.fit_transform(x_text))) print("Text Vocabulary Size: {:d}".format(len(text_vocab_processor.vocabulary_))) # Example: pos1[3] = [-2 -1 0 1 2 3 4 999 999 999 ... 999] # [95 96 97 98 99 100 101 999 999 999 ... 999] # => # [11 12 13 14 15 16 21 17 17 17 ... 17] # dimension = MAX_SENTENCE_LENGTH # pos_vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(FLAGS.max_sentence_length) # pos_vocab_processor.fit(pos1 + pos2) # pos1_vec = np.array(list(pos_vocab_processor.transform(pos1))) # pos2_vec = np.array(list(pos_vocab_processor.transform(pos2))) # print("Position Vocabulary Size: {:d}".format(len(pos_vocab_processor.vocabulary_))) # x = np.array([list(i) for i in zip(text_vec, pos1_vec, pos2_vec)]) x = np.array([list(i) for i in text_vec]) print("x = {0}".format(x.shape)) print("y = {0}".format(y.shape)) # Randomly shuffle data np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices] # Split train/test set # TODO: This is very crude, should use cross-validation dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y))) x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:] # x_dev = np.array(x_dev).transpose((1, 0, 2)) y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:] print("Train/Dev split: {:d}/{:d}\n".format(len(y_train), len(y_dev))) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: model = CRNN(layers=FLAGS.layers, max_length=FLAGS.max_sentence_length, n_classes=y.shape[1], pooling_type=FLAGS.pooling_type, vocab_size=len(text_vocab_processor.vocabulary_), embedding_size=FLAGS.text_embedding_dim, f1=FLAGS.f1, f2=FLAGS.f2, n_channels=FLAGS.n_channels) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Write vocabulary text_vocab_processor.save(os.path.join(out_dir, "text_vocab")) # pos_vocab_processor.save(os.path.join(out_dir, "position_vocab")) sess.run(tf.global_variables_initializer()) # Pre-trained word2vec if FLAGS.word2vec: # initial matrix with random uniform initW = np.random.uniform(-0.25, 0.25, (len(text_vocab_processor.vocabulary_), FLAGS.text_embedding_dim)) # load any vectors from the word2vec print("Load word2vec file {0}".format(FLAGS.word2vec)) with open(FLAGS.word2vec, "rb") as f: header = f.readline() vocab_size, layer1_size = map(int, header.split()) binary_len = np.dtype('float32').itemsize * layer1_size for line in range(vocab_size): word = [] while True: ch = f.read(1).decode('latin-1') if ch == ' ': word = ''.join(word) break if ch != '\n': word.append(ch) idx = text_vocab_processor.vocabulary_.get(word) if idx != 0: initW[idx] = np.fromstring(f.read(binary_len), dtype='float32') else: f.read(binary_len) sess.run(model.W_emb.assign(initW)) print("Success to load pre-trained word2vec model!\n") batches = data_helpers.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) max_f1 = -1 for step, batch in enumerate(batches): x_batch, y_batch = zip(*batch) feed_dict = {model.input_text: x_batch, model.dropout_keep_prob: FLAGS.dropout_keep_prob, model.labels: y_batch} # max_pooling, convs = sess.run([model.max_pooing, model.conv], feed_dict=feed_dict) _, loss, accuracy = sess.run([model.train, model.cost, model.accuracy], feed_dict=feed_dict) # Training log display if step % FLAGS.display_every == 0: print("step {}:, loss {}, acc {}".format(step, loss, accuracy)) # Evaluation if step % FLAGS.evaluate_every == 0: print("\nEvaluation:") feed_dict = { model.input_text: x_dev, model.labels: y_dev, model.dropout_keep_prob: 1.0 } loss, accuracy, predictions = sess.run( [model.cost, model.accuracy, model.predictions], feed_dict) f1 = f1_score(np.argmax(y_dev, axis=1), predictions, average="macro") print("step {}:, loss {}, acc {}, f1 {}\n".format(step, loss, accuracy, f1)) # Model checkpoint if f1 > max_f1 * 0.99: path = saver.save(sess, checkpoint_prefix, global_step=step) print("Saved model checkpoint to {}\n".format(path)) max_f1 = f1
from crnn import CRNN batch_size = 10 model_path = 'MyModel' examples_picture_path = 'restore/' examples_label_path = 'target_label.txt' dictionary_path = 'dictionary.txt' max_image_width = 256 train_test_ratio = 0.9 restore = False NUM_CLASSES = 52 iteration_count = 4000 crnn = CRNN(batch_size, model_path, examples_picture_path, examples_label_path, dictionary_path, max_image_width, train_test_ratio, restore, NUM_CLASSES) if __name__ == '__main__': crnn.train(iteration_count)
def main(): conf_file = "conf/train.yml" with open(conf_file, 'r') as f: args = edict(yaml.load(f)) train_root = args.train_root test_root = args.test_root batch_size = args.batch_size max_len = args.max_len img_h = args.img_h img_w = args.img_w n_hidden = args.n_hidden n_iter = args.n_iter lr = args.lr cuda = args.cuda val_interval = args.val_interval save_interval = args.save_interval model_dir = args.model_dir debug_level = args.debug_level experiment = args.experiment n_channel = args.n_channel n_class = args.n_class beta = args.beta image = torch.FloatTensor(batch_size, n_channel, img_h, img_h) text = torch.IntTensor(batch_size * max_len) length = torch.IntTensor(batch_size) logging.getLogger().setLevel(debug_level) ''' 50 - critical 40 - error 30 - warining 20 - info 10 - debug ''' crnn = CRNN(img_h, n_channel, n_class, n_hidden).cuda() crnn.apply(weights_init) criterion = CTCLoss().cuda() optimizer = optim.RMSprop(crnn.parameters(), lr=lr) # optimizer = optim.Adam(crnn.parameters(), lr=lr, # betas=(beta, 0.999)) trainset = train_set(train_root, batch_size, img_h, img_w, n_class) valset = train_set(test_root, batch_size, img_h, img_w, n_class) cur_iter = 0 for ITER in range(n_iter): for train_img, train_label, train_lengths, batch_label \ in iter(trainset): for p in crnn.parameters(): p.requires_grad = True crnn.train() if train_img is None: break cur_iter += 1 loadData(image, train_img) loadData(text, train_label) loadData(length, train_lengths) preds = crnn(train_img.cuda()) # preds = F.softmax(preds, dim=2) # print(preds.shape) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) # print(batch_label, text, length, len(text), len(length), length.sum(), # preds.shape, preds_size.shape) cost = criterion(preds, text, preds_size, length)\ / batch_size crnn.zero_grad() cost.backward() optimizer.step() print("training-iter {} cost {}".format( ITER, cost.cpu().detach().numpy()[0])) if cur_iter % val_interval == 0: val(crnn, valset, criterion, n_class) if cur_iter % save_interval == 0: model_file = os.path.join(model_dir, "crnn_iter{}.pth".format(ITER)) print("saving in file {}".format(model_file)) with open(model_file, 'wb') as f: torch.save(crnn, f)
def train(): if config.restart_training: shutil.rmtree(config.output_dir, ignore_errors=True) if config.output_dir is None: config.output_dir = 'output' if not os.path.exists(config.output_dir): os.makedirs(config.output_dir) logger = setup_logger(os.path.join(config.output_dir, 'train_log')) logger.info('train with gpu %s and mxnet %s' % (config.gpu_id, mx.__version__)) ctx = mx.gpu(config.gpu_id) # 设置随机种子 mx.random.seed(2) mx.random.seed(2, ctx=ctx) train_transfroms = transforms.Compose( [transforms.RandomBrightness(0.5), transforms.ToTensor()]) train_dataset = ImageDataset(config.trainfile, (config.img_h, config.img_w), 3, 80, config.alphabet, phase='train') train_data_loader = DataLoader( train_dataset.transform_first(train_transfroms), config.train_batch_size, shuffle=True, last_batch='keep', num_workers=config.workers) test_dataset = ImageDataset(config.testfile, (config.img_h, config.img_w), 3, 80, config.alphabet, phase='test') test_data_loader = DataLoader(test_dataset.transform_first( transforms.ToTensor()), config.eval_batch_size, shuffle=True, last_batch='keep', num_workers=config.workers) net = CRNN(len(config.alphabet), hidden_size=config.nh) net.hybridize() if not config.restart_training and config.checkpoint != '': logger.info('load pretrained net from {}'.format(config.checkpoint)) net.load_parameters(config.checkpoint, ctx=ctx) else: net.initialize(ctx=ctx) criterion = gluon.loss.CTCLoss() all_step = len(train_data_loader) logger.info('each epoch contains {} steps'.format(all_step)) schedule = mx.lr_scheduler.FactorScheduler(step=config.lr_decay_step * all_step, factor=config.lr_decay, stop_factor_lr=config.end_lr) # schedule = mx.lr_scheduler.MultiFactorScheduler(step=[15 * all_step, 30 * all_step, 60 * all_step,80 * all_step], # factor=0.1) adam_optimizer = mx.optimizer.Adam(learning_rate=config.lr, lr_scheduler=schedule) trainer = gluon.Trainer(net.collect_params(), optimizer=adam_optimizer) sw = SummaryWriter(logdir=config.output_dir) for epoch in range(config.start_epoch, config.end_epoch): loss = .0 train_acc = .0 tick = time.time() cur_step = 0 for i, (data, label) in enumerate(train_data_loader): data = data.as_in_context(ctx) label = label.as_in_context(ctx) with autograd.record(): output = net(data) loss_ctc = criterion(output, label) loss_ctc.backward() trainer.step(data.shape[0]) loss_c = loss_ctc.mean() cur_step = epoch * all_step + i sw.add_scalar(tag='ctc_loss', value=loss_c.asscalar(), global_step=cur_step // 2) sw.add_scalar(tag='lr', value=trainer.learning_rate, global_step=cur_step // 2) loss += loss_c acc = accuracy(output, label, config.alphabet) train_acc += acc if (i + 1) % config.display_interval == 0: acc /= len(label) sw.add_scalar(tag='train_acc', value=acc, global_step=cur_step) batch_time = time.time() - tick logger.info( '[{}/{}], [{}/{}],step: {}, Speed: {:.3f} samples/sec, ctc loss: {:.4f},acc: {:.4f}, lr:{},' ' time:{:.4f} s'.format( epoch, config.end_epoch, i, all_step, cur_step, config.display_interval * config.train_batch_size / batch_time, loss.asscalar() / config.display_interval, acc, trainer.learning_rate, batch_time)) loss = .0 tick = time.time() nd.waitall() if epoch == 0: sw.add_graph(net) logger.info('start val ....') train_acc /= train_dataset.__len__() validation_accuracy = evaluate_accuracy( net, test_data_loader, ctx, config.alphabet) / test_dataset.__len__() sw.add_scalar(tag='val_acc', value=validation_accuracy, global_step=cur_step) logger.info("Epoch {},train_acc {:.4f}, val_acc {:.4f}".format( epoch, train_acc, validation_accuracy)) net.save_parameters("{}/{}_{:.4f}_{:.4f}.params".format( config.output_dir, epoch, train_acc, validation_accuracy)) sw.close()
import torch from crnn import CRNN embedding_size = 8 hidden_state = 512 hidden_state = 4 output_size = 8 output_size = 2 batch_size = 1 clock_periods = list([2**i for i in range(9)]) model = CRNN(embedding_size, hidden_state, output_size, clock_periods) inputs = torch.randn(16, batch_size, embedding_size) y_predicted = model.forward(inputs) print(y_predicted)
# m.weight.data.normal_(1.0, 0.02) m.weight.data.uniform_(1.0, 5) m.bias.data.fill_(0) elif isinstance(m, nn.GRU): nn.init.xavier_uniform_(m.weight.data, gain=nn.init.calculate_gain('leaky_relu')) # elif isinstance(m, nn.Linear): # m.weight.data.normal_(0.0, 0.02) # m.bias.data.fill_(0) net = CRNN(48, 1, len(char2index), 256, opt.nrnn, opt.dropout, opt.variational_dropout, leakyRelu=True) print(net) params = net.state_dict() params_shape = [] for k, v in params.items(): # print(k, v.numpy().shape, reduce(mul, v.numpy().shape)) params_shape.append(reduce(mul, v.numpy().shape)) params_total = sum(params_shape) print('params_total:', params_total) if opt.finetune: print('Loading model from', opt.modeldir + opt.modelname) net.load_state_dict(torch.load(opt.modeldir + opt.modelname))
return reader if __name__ == '__main__': from paddle import fluid total_step = 30 LR = 1e-3 with fluid.dygraph.guard(): lr = fluid.layers.piecewise_decay( [total_step // 3, total_step * 2 // 3], [LR, LR * 0.1, LR * 0.01]) # lr = fluid.layers.polynomial_decay(LR,total_step,1e-7,power=0.9) from crnn import CRNN crnn = CRNN(train_parameters["class_dim"] + 1, batch_size=16) optimizer = fluid.optimizer.Adam(learning_rate=lr, parameter_list=crnn.parameters()) step = [] lr = [] for x in range(total_step): step.append(x) l = fluid.dygraph.to_variable(np.array([1])) optimizer.minimize(l) lr.append(optimizer.current_step_lr()) print(x, optimizer.current_step_lr()) from matplotlib import pyplot as plt plt.plot(step, lr) plt.show()
class PorscheFonts: def __init__(self): # create models and load weights self.det_info = { 'input_shape': (512, 1024, 3), 'channel_mean': np.array([123, 117, 104]), 'weight_path': '/home/eugene/storystream/ai-p-fonts/log/seglink/seglink_epoch-29_loss-0.91_val_loss-0.36.h5' } self.reg_info = { 'input_shape': (32, 480, 1), 'weight_path': '/home/eugene/storystream/ai-p-fonts/log/crnn/gru/CRNN_epoch-29_loss-0.00_val_loss-0.00.h5' } self.det = SegLink(input_shape=self.det_info['input_shape']) self.reg = CRNN(input_shape=self.reg_info['input_shape']) self.det.create_model(self.det_info['weight_path']) self.reg.create_model(self.reg_info['weight_path'], gru=True, train=False) self.lexicons = [ 'gt2', 'gt2rs', 'targa', 'turbo', '91150', 'carrera 4', 'carrera 4 gts', 'carrera 4s', 'carrera gts', 'carrera s', 'targa 4', 'targa 4 gts', 'targa 4s', 'turbo s', '911 turbo', '911 turbo s', '911 carrera 4', '911 carrera 4 gts', '911 carrera 4s', '911 carrera gts', '911 carrera s', '911 targa 4', '911 targa 4 gts', '911 targa 4s' ] def word_postprocess(self, word): word = word.replace(" ", "-") word = word.replace("4-gts", "4gts") word = word.replace("91150", "carrera-50") word = word.replace("911-", "") word = word.replace("gt2rs", "gt2-rs") word = word + "-" if "-" not in word else word return word def predict(self, img_path, save=False): ori_img = io.imread(img_path)[..., :3] gt_label = os.path.basename(img_path).split('_')[0] gt_label = gt_label + "-" if "-" not in gt_label else gt_label quads = self.det.test_one(ori_img) if len(quads): words, scores = self.reg.test_one(ori_img, quads) words = np.array(words) scores = np.array(scores) keep_bool = scores > 0.5 if np.any(keep_bool): quads = quads[keep_bool] words = list(words[keep_bool]) scores = list(scores[keep_bool]) # Save image for debug pil_image = draw_annotated_box(ori_img, quads, words, scores) if save: pil_image.save( os.path.join( 'output', os.path.splitext(os.path.basename(img_path))[0] + '.jpg')) # zip quadrilaterals, words and scores and sort predictions = list(zip(quads, words, scores)) predictions.sort(key=lambda x: x[2], reverse=True) # Sort scores descending for quad, word, score in predictions: if word in self.lexicons: word = self.word_postprocess(word) print(gt_label, word) if gt_label == word: return True return False
batch_size = 90 video_dataset = TorchVideoTrainDataset('torch_video_3/', 'qia2020/train/', df, y_df, 40000) train_loader = DataLoader(video_dataset, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=8, pin_memory=True) val_video_dataset = TorchVideoTrainDataset('torch_video_3_val/', 'qia2020/val/', val_df, val_y_df, 5000) val_loader = DataLoader(val_video_dataset, batch_size=batch_size, shuffle=False, drop_last=True, num_workers=8, pin_memory=True) # checkpoint_dir = 'lightning_logs/version_39/checkpoints/epoch=1.ckpt' system = CRNN() seed_everything(42) # trainer = Trainer(gpus=[0], accelerator='ddp', resume_from_checkpoint=checkpoint_dir, deterministic=True, max_epochs=100) trainer = Trainer(gpus=[0], max_epochs=100, deterministic=True) trainer.fit(system, train_loader, val_loader)
def start_train(): model = CRNN(MODEL_HYPER.batch_size, MODEL_HYPER.epoches, MODEL_HYPER.data_path, MODEL_HYPER.text_path, MODEL_HYPER.log_path, MODEL_HYPER.model_path) model.train() model.save()
import os import torch import cv2 from crnn import CRNN from tqdm import tqdm import csv import numpy as np model = CRNN() model.load_state_dict(torch.load('55acc.pt')) model.eval() model.to('cuda') data_dir = "qia2020/test/" emo = {0: 'hap', 1: 'sur', 2: 'neu', 3: 'fea', 4: 'dis', 5: 'ang', 6: 'sad'} with open('test_confirm.csv', 'w', newline='') as csvfile: writer = csv.writer(csvfile) writer.writerow(['FileID', 'Emotion']) for filename in tqdm(sorted(os.listdir(data_dir))): if not filename.endswith(".mp4"): continue f = 'torch_video_3_test/' + filename[:5] + '.pt' X = torch.load(f) X = X.unsqueeze(0).to('cuda:0') with np.load(data_dir + filename[:5] + '.npz') as data: T = torch.Tensor(data['word_embed'])
def test_CRNN(): T = 50 batch_size = 2 nstates = 3 input_size = 4 output_size = 6 clock_rates = [1, 2, 4] unit = CRNN(input_size, nstates, output_size, clock_rates) W = unit.get_weights() X = np.random.randn(T, input_size, batch_size) unit.forget() acc_Y = unit.forward(X) wrand = np.random.randn(*acc_Y.shape) loss = np.sum(acc_Y * wrand) dY = wrand dX = unit.backward(dY) dW = unit.get_grads() unit.forget() def fwd(): unit.set_weights(W) h = unit.forward(X) unit.forget() return np.sum(h * wrand) delta = 1e-4 error_threshold = 1e-3 all_values = [X, W] backpropagated_gradients = [dX, dW] names = ['X', 'W'] error_count = 0 for v in range(len(names)): values = all_values[v] dvalues = backpropagated_gradients[v] name = names[v] for i in range(values.size): actual = values.flat[i] values.flat[i] = actual + delta loss_minus = fwd() values.flat[i] = actual - delta loss_plus = fwd() values.flat[i] = actual backpropagated_gradient = dvalues.flat[i] numerical_gradient = (loss_minus - loss_plus) / (2 * delta) if numerical_gradient == 0 and backpropagated_gradient == 0: error = 0 elif abs(numerical_gradient) < 1e-7 and abs(backpropagated_gradient) < 1e-7: error = 0 else: error = abs(backpropagated_gradient - numerical_gradient) / abs(numerical_gradient + backpropagated_gradient) if error > error_threshold: print 'FAILURE!!!\n' print '\tparameter: ', name, '\tindex: ', np.unravel_index(i, values.shape) print '\tvalues: ', actual print '\tbackpropagated_gradient: ', backpropagated_gradient print '\tnumerical_gradient', numerical_gradient print '\terror: ', error print '\n\n' error_count += 1 if error_count == 0: print 'CRNN Gradient Check Passed' else: print 'Failed for {} parameters'.format(error_count)