def crnnSource(): alphabet = keys.alphabet converter = utils.strLabelConverter(alphabet) if torch.cuda.is_available() and GPU: model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1).cuda() else: model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1).cpu() path = './crnn/pytorch/models/model_acc97.pth' model.eval() model.load_state_dict(torch.load(path)) return model, converter
def main(args): alpha = cfg.word.get_all_words() if args.direction == 'horizontal': net = crnn.CRNN(num_classes=len(alpha)) else: net = crnn.CRNNV(num_classes=len(alpha)) device = torch.device('cuda:{}'.format(args.local_rank) if args.device == 'cuda' and torch.cuda.is_available() else 'cpu') net.load_state_dict( torch.load(args.weight_path, map_location='cpu')['model']) net.to(device) net.eval() # load image data_set = Generator(cfg.word.get_all_words(), args.direction) acc_list = [] for i in range(args.eval_num): image, target, input_len, target_len = data_set[i] predict_text = inference_single_image(net, image, device) gt = target[:target_len] # print("{} {}".format(gt, predict_text)) acc_list.append( len(gt) == len(predict_text) and np.allclose(gt, predict_text)) # 精度计算 acc = np.array(acc_list).mean() print('acc:{:.3f}'.format(acc))
def load(self, crnn_path): # load CRNN self.crnn = crnn.CRNN(self.IMGH, self.nc, self.nclass, nh=256).to(device) self.crnn.load_state_dict(torch.load(crnn_path, map_location=device)) # remember to set to test mode (otherwise some layers might behave differently) self.crnn.eval()
def train(train_dir=None, model_dir=None, mode='train'): model = crnn.CRNN(mode) model._build_model() global_step = tf.Variable(0, trainable=False) #sess1 = tf.InteractiveSession() #load training data train_data, train_label = load_data() train_label = dense_to_one_hot(train_label, 4) training_size = train_data.shape[0] with tf.name_scope('cross_entropy'): cross_entropy = tf.nn.softmax_cross_entropy_with_logits( labels=model.labels, logits=model.logits) loss = tf.reduce_mean(cross_entropy) # print model.logits.get_shape() with tf.name_scope('accuracy'): correct_pred = tf.equal(tf.argmax(model.logits, 1), tf.argmax(model.labels, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) with tf.name_scope("moving_average"): variable_averages = tf.train.ExponentialMovingAverage( FLAGS.momentum, global_step) variable_averages_op = variable_averages.apply( tf.trainable_variables()) with tf.name_scope("train_step"): lr = tf.train.exponential_decay(FLAGS.learning_rate, global_step, training_size / FLAGS.train_batch_size, FLAGS.decay_rate, staircase=True) #print (lr.eval()) train_step = tf.train.AdamOptimizer(lr).minimize( loss, global_step=global_step) with tf.control_dependencies([train_step, variable_averages_op]): train_op = tf.no_op(name='train') saver = tf.train.Saver() init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) for i in range(FLAGS.num_epochs): start = (i * FLAGS.train_batch_size) % training_size end = min(start + FLAGS.train_batch_size, training_size) _, loss_value, step, acc = sess.run( [train_op, loss, global_step, accuracy], feed_dict={ model.inputs: train_data[start:end], model.labels: train_label[start:end] }) if i % 10 == 0: print( "After %d training step(s), loss on training batch is %.2f, accuracy is %.3f." % (step, loss_value, acc)) saver.save(sess, os.path.join(FLAGS.checkpoint, FLAGS.model_name), global_step=global_step)
def __init__(self, weightfile, gpu_id=0): alphabet = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-()图' print(alphabet) print(len(alphabet)) nclass = len(alphabet) + 1 self.__net = crnn.CRNN(32, 1, nclass, 256) if torch.cuda.is_available(): self.__net.cuda(device=gpu_id) self.__gpu_id = gpu_id self.__net.load_state_dict(torch.load(weightfile)) self.__transformer = dataset.resizeNormalize((160, 32)) self.__converter = utils.strLabelConverter(alphabet)
def main(args): alpha = cfg.word.get_all_words() net = crnn.CRNN(num_classes=len(alpha)) net.load_state_dict( torch.load(args.weight_path, map_location='cpu')['model']) net.eval() # load image if args.image_dir: image_path_list = [ os.path.join(args.image_dir, n) for n in os.listdir(args.image_dir) ] image_path_list.sort() for image_path in image_path_list: label = inference_image(net, alpha, image_path) print("image_path:{},label:{}".format(image_path, label)) else: label = inference_image(net, alpha, args.image_path) print("image_path:{},label:{}".format(args.image_path, label))
def __init__(self, proxy_map): super(SpecificWorker, self).__init__(proxy_map) self.timer.timeout.connect(self.compute) self.Period = 200 self.timer.start(self.Period) # load the pre-trained EAST text detector print "[INFO] loading EAST text detector..." self.net = cv2.dnn.readNet(NET_FILE) self.model = crnn.CRNN(32, 1, 37, 256) if torch.cuda.is_available(): self.model = self.model.cuda() print "[INFO] loading CRNN text recognizer..." self.model.load_state_dict(torch.load(MODEL_FILE)) self.tree = None if self.use_lexicon: print "[INFO] loading generic english lexicon..." lexicon = [] with open(LEXICON_FILE) as f: for line in f.read().splitlines(): lexicon.append(line.lower()) print "Length of the lexicon: ", len(lexicon) self.tree = BKTree(hamming_distance, lexicon)
""" import torch from torch.autograd import Variable import utils import dataset import crnn as crnn modelPath = 'model/ocr/english/crnn.pth' alphabet = '0123456789abcdefghijklmnopqrstuvwxyz' if torch.cuda.is_available(): model = crnn.CRNN(32, 1, 37, 256, 1).cuda() else: model = crnn.CRNN(32, 1, 37, 256, 1).cpu() ### load model weigth from path model.load_state_dict(torch.load(modelPath)) converter = utils.strLabelConverter(alphabet) transformer = dataset.resizeNormalize((100, 32)) def crnn_ocr(image): image = image.convert('L') if torch.cuda.is_available(): image = transformer(image).cuda() else: image = transformer(image).cpu()
import torch import torch.utils.data import torch.optim as optim import torch.backends.cudnn as cudnn from torch.autograd import Variable import keys import crnn import dataset from utils import strLabelConverter alphabet = keys.alphabet raw_input('\ninput: ') converter = strLabelConverter(alphabet) model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1).cuda() path = './models/model_acc97.pth' model.load_state_dict(torch.load(path)) print(model) while 1: im_name = raw_input("\nplease input file name: ") im_path = "./img/" + im_name image = Image.open(im_path).convert('L') scale = image.size[1] * 1.0 / 32 w = int(image.size[0] / scale) transformer = dataset.resizeNormalize((w, 32)) image = transformer(image).cuda() image = image.view(1, *image.size()) image = Variable(image)
def train(args): init_distributed_mode(args) print(args) device = torch.device('cuda:{}'.format(args.local_rank) if args.device == 'cuda' and torch.cuda.is_available() else 'cpu') torch.backends.cudnn.benchmark = True # data loader data_set = Generator(cfg.word.get_all_words(), args.direction) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( data_set) else: train_sampler = torch.utils.data.RandomSampler(data_set) data_loader = DataLoader(data_set, batch_size=args.batch_size, sampler=train_sampler, num_workers=args.workers) # model model = crnn.CRNN(len(data_set.alpha)) model = model.to(device) criterion = CTCLoss() criterion = criterion.to(device) if args.distributed and args.sync_bn: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) # optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) optimizer = optim.Adadelta(model.parameters(), weight_decay=args.weight_decay) # lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma) model_without_ddp = model if args.distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.local_rank], output_device=args.local_rank) model_without_ddp = model.module # 加载预训练模型 if args.init_epoch > 0: checkpoint = torch.load(os.path.join( args.output_dir, 'crnn.{}.{:03d}.pth'.format(args.direction, args.init_epoch)), map_location='cpu') optimizer.load_state_dict(checkpoint['optimizer']) # lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) model_without_ddp.load_state_dict(checkpoint['model']) # log writer = SummaryWriter( log_dir=cfg.log_dir) if utils.is_main_process() else None # train model.train() for epoch in range(args.init_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) # 训练 loss = train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, args) # 记录日志 utils.add_scalar_on_master(writer, 'scalar/lr', optimizer.param_groups[0]['lr'], epoch + 1) utils.add_scalar_on_master(writer, 'scalar/train_loss', loss, epoch + 1) utils.add_weight_history_on_master(writer, model_without_ddp, epoch + 1) # 更新lr # lr_scheduler.step(epoch) # 保存模型 if args.output_dir: checkpoint = { 'model': model_without_ddp.state_dict(), 'optimizer': optimizer.state_dict(), # 'lr_scheduler': lr_scheduler.state_dict(), 'epoch': epoch + 1, 'args': args } utils.save_on_master( checkpoint, os.path.join( args.output_dir, 'crnn.{}.{:03d}.pth'.format(args.direction, epoch + 1))) if utils.is_main_process(): writer.close()
def train(train_dir=None, model_dir=None, mode='train'): model = crnn.CRNN(mode) model._build_model() global_step = tf.Variable(0, trainable=False) #sess1 = tf.InteractiveSession() #load training data train_data, train_label, valid_data, Valid_label = load_data() train_label = dense_to_one_hot(train_label, 4) Valid_label = dense_to_one_hot(Valid_label, 4) training_size = train_data.shape[0] with tf.name_scope('cross_entropy'): cross_entropy = tf.nn.softmax_cross_entropy_with_logits( labels=model.labels, logits=model.logits) loss = tf.reduce_mean(cross_entropy) # print model.logits.get_shape() with tf.name_scope('accuracy'): correct_pred = tf.equal(tf.argmax(model.softmax, 1), tf.argmax(model.labels, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) with tf.name_scope("moving_average"): variable_averages = tf.train.ExponentialMovingAverage( FLAGS.momentum, global_step) variable_averages_op = variable_averages.apply( tf.trainable_variables()) with tf.name_scope("train_step"): lr = tf.train.exponential_decay(FLAGS.learning_rate, global_step, training_size / FLAGS.train_batch_size, FLAGS.decay_rate, staircase=True) #print (lr.eval()) train_step = tf.train.AdamOptimizer(lr).minimize( loss, global_step=global_step) with tf.control_dependencies([train_step, variable_averages_op]): train_op = tf.no_op(name='train') saver = tf.train.Saver() init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) for e in range(FLAGS.num_epochs): print(type(train_label)) print(train_label.shape) index = np.arange(training_size) np.random.shuffle(index) train_data = train_data[index] train_label = train_label[index] for i in range(int(training_size / FLAGS.train_batch_size) + 1): start = (i * FLAGS.train_batch_size) % training_size end = min(start + FLAGS.train_batch_size, training_size) _, loss_value, step, acc, softmax = sess.run( [train_op, loss, global_step, accuracy, model.softmax], feed_dict={ model.inputs: train_data[start:end], model.labels: train_label[start:end] }) if i % 10 == 0: print( "After epoch:%d, step: %d, loss on training batch is %.2f, accuracy is %.3f." % (e, step, loss_value, acc)) saver.save(sess, os.path.join(FLAGS.checkpoint, FLAGS.model_name), global_step=global_step) train_acc_uw = recall(np.argmax(softmax, 1), np.argmax(train_label[start:end], 1), average='macro') train_acc_w = recall(np.argmax(softmax, 1), np.argmax(train_label[start:end], 1), average='weighted') train_conf = confusion( np.argmax(softmax, 1), np.argmax(train_label[start:end], 1)) print("train_UA: %3.4g" % train_acc_uw) print("train_WA: %3.4g" % train_acc_w) print('Confusion Matrix:["ang","sad","hap","neu"]') print(train_conf) if i % 20 == 0: #for validation data valid_size = len(valid_data) valid_iter = divmod((valid_size), FLAGS.valid_batch_size)[0] y_pred_valid = np.empty((valid_size, 4), dtype=np.float32) index = 0 cost_valid = 0 if (valid_size < FLAGS.valid_batch_size): validate_feed = { model.inputs: valid_data, model.labels: Valid_label } y_pred_valid, p_loss = sess.run( [model.softmax, cross_entropy], feed_dict=validate_feed) cost_valid = cost_valid + np.sum(p_loss) else: print(valid_data.shape) for v in range(valid_iter): v_begin = v * FLAGS.valid_batch_size v_end = (v + 1) * FLAGS.valid_batch_size if (v == valid_iter - 1): if (v_end < valid_size): v_end = valid_size validate_feed = { model.inputs: valid_data[v_begin:v_end], model.labels: Valid_label[v_begin:v_end] } p_loss, y_pred_valid[v_begin:v_end, :] = sess.run( [cross_entropy, model.softmax], feed_dict=validate_feed) cost_valid = cost_valid + np.sum(p_loss) cost_valid = cost_valid / valid_size print(np.argmax(y_pred_valid, 1)) print(np.argmax(Valid_label, 1)) valid_acc_uw = recall(np.argmax(Valid_label, 1), np.argmax(y_pred_valid, 1), average='macro') valid_acc_w = recall(np.argmax(Valid_label, 1), np.argmax(y_pred_valid, 1), average='weighted') valid_conf = confusion(np.argmax(Valid_label, 1), np.argmax(y_pred_valid, 1)) print('----------segment metrics---------------') print("valid_UA: %3.4g" % valid_acc_uw) print("valid_WA: %3.4g" % valid_acc_w) print('Valid Confusion Matrix:["ang","sad","hap","neu"]') print(valid_conf) print('----------segment metrics---------------')
def natural_keys(text): return [ atof(c) for c in re.split(r'[+-]?([0-9]+(?:[.][0-9]*)?|[.][0-9]+)', text) ] model_path = './pre_trained_model/crnn.pth' img_dir = './data/*.jpg' alphabet = '0123456789abcdefghijklmnopqrstuvwxyz' print('\n\n') print('Predicting text images......') model = crnn.CRNN(32, 1, 37, 256) if torch.cuda.is_available(): model = model.cuda() model.load_state_dict(torch.load( model_path)) # Load the pre-trained model parameters to the current model converter = utils.strLabelConverter(alphabet) transformer = dataset.resizeNormalize((100, 32)) images = glob.glob(img_dir) images.sort(key=natural_keys) pred_result = [] for image in images: with open(image, 'rb') as file: img = Image.open(file).convert('L') img = transformer(img)
preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) # raw_pred = converter.decode( preds.data, preds_size.data, raw=True ) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) return sim_pred if __name__ == '__main__': import sys model_path = './crnn_models/CRNN-0627-crop_48_901.pth' gpu = True if not torch.cuda.is_available(): gpu = False model = crnn.CRNN(config.imgH, 1, len(alphabet) + 1, 256) if gpu: model = model.cuda() print('loading pretrained model from %s' % model_path) if gpu: model.load_state_dict(torch.load(model_path)) else: model.load_state_dict( torch.load(model_path, map_location=lambda storage, loc: storage)) if len(sys.argv) > 1 and 'train' in sys.argv[1]: infofile = 'data_set/infofile_updated_0627_train.txt' print(val_model(infofile, model, gpu, '0627_train.log')) elif len(sys.argv) > 1 and 'gen' in sys.argv[1]: infofile = 'data_set/infofile_0627_gen_test.txt' print(val_model(infofile, model, gpu, '0627_gen.log'))
num_workers=params.workers) # shuffle=True, just for time consuming. val_loader = DataLoader(val_dataset, batch_size=params.val_batchSize, shuffle=True, num_workers=params.workers) converter = utils.strLabelConverter(dataset.alphabet) nclass = len(params.alphabet) + 1 print('nclass:', nclass) nc = 1 criterion = torch.nn.CTCLoss(reduction='sum') # criterion = CTCLoss() # cnn and rnn crnn = crnn.CRNN(32, nc, nclass, params.nh) crnn.apply(weights_init) if params.crnn != '': print('loading pretrained model from %s' % params.crnn) crnn.load_state_dict(torch.load(params.crnn)) # setup optimizer if params.adam: optimizer = optim.Adam(crnn.parameters(), lr=params.lr, betas=(params.beta1, 0.999)) elif params.adadelta: optimizer = optim.Adadelta(crnn.parameters(), lr=params.lr) else: optimizer = optim.RMSprop(crnn.parameters(), lr=params.lr)
test_set = OCR_set('/content/Plate', augmentations(False)) test_loader = DataLoader(test_set, 1, shuffle=True) assert test_set alphabet = '1234567890abekmhopctyx' nclass = len(alphabet) + 1 nc = 3 nh = 256 imgH = 64 preds_size = torch.tensor([33]) converter = utils.strLabelConverter(alphabet) crnn = crnn.CRNN(imgH, nc, nclass, nh).to(device) crnn.load_state_dict(torch.load('/content/drive/My Drive/WeightNet/OCR(3.0)')) tp_1, fp_1, fn_1 = 0, 0, 0 # True Positive, False positive, False negative for first head tp_2, fp_2, fn_2 = 0, 0, 0 # for double head result = ['', ''] with torch.no_grad(): for x, y in test_loader: cpu_images, cpu_texts = x.to(device), y preds = crnn(cpu_images) preds = preds.view(-1, preds_size.item(), 1, 23) for idx, head in enumerate(preds):
help="weight path") parse.add_argument('-v', "--weight-path-vertical", type=str, default=None, help="weight path") parse.add_argument('-d', "--device", type=str, default='cpu', help="cpu or cuda") args = parse.parse_args(sys.argv[1:]) alpha = cfg.word.get_all_words() device = torch.device('cuda' if args.device == 'cuda' and torch.cuda.is_available() else 'cpu') # 加载权重,水平方向 h_net = crnn.CRNN(num_classes=len(alpha)) h_net.load_state_dict( torch.load(args.weight_path_horizontal, map_location='cpu')['model']) h_net.eval() h_net.to(device) # 垂直方向 v_net = crnn.CRNNV(num_classes=len(alpha)) v_net.load_state_dict( torch.load(args.weight_path_vertical, map_location='cpu')['model']) v_net.eval() v_net.to(device) # 启动restful服务 start_tornado(app, 5000)
if __name__ == '__main__': random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if not os.path.exists(args.save_path): os.mkdir(args.save_path) num_classes = len(alphabet.alphabet) + 1 converter = utils.StrLabelConverter(alphabet.alphabet) trainloader, validloader = prepare_dataloader() crnn = crnn.CRNN(num_classes).to(device) criterion = torch.nn.CTCLoss().to(device) if args.adam: optimizer = optim.Adam(crnn.parameters(), lr=args.lr) elif args.rmsprop: optimizer = optim.RMSprop(crnn.parameters(), lr=args.lr) else: optimizer = optim.Adadelta(crnn.parameters()) if args.pretrained != '': print('loading pretrained model from {}'.format(args.pretrained)) crnn.load_state_dict(torch.load(args.pretrained)) crnn.train() for epoch in range(args.num_epoch):
import argparse #外部输入参数 if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-lr', type=float, default=opt.learning_rate, help='initial learning rate') args = parser.parse_args() # 确定模型尺寸 char_set = open('./char.txt', 'r', encoding='utf-8').readlines() char_set = ''.join([ch.strip('\n') for ch in char_set[1:]] + ['卍']) n_class = len(char_set) model = crnn.CRNN(img_h, 1, n_class, 256) if torch.cuda.is_available and use_gpu: model.cuda() # modelpath = opt.modelpath modelpath = opt.model_path # 学习率、损失函数、优化器构建 learning_rate = args.lr loss_func = torch.nn.CTCLoss() optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=opt.weight_decay) # optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=opt.weight_decay) # 重载模型
import numpy as np import crnn as crnn import cv2 import torch.nn.functional as F import keys import config gpu = True if not torch.cuda.is_available(): gpu = False model_path = './crnn_models/CRNN-0618-10w_21_990.pth' alphabet = keys.alphabet print(len(alphabet)) imgH = config.imgH imgW = config.imgW model = crnn.CRNN(imgH, 1, len(alphabet) + 1, 256) if gpu: model = model.cuda() print('loading pretrained model from %s' % model_path) if gpu: model.load_state_dict(torch.load(model_path)) else: model.load_state_dict( torch.load(model_path, map_location=lambda storage, loc: storage)) converter = utils.strLabelConverter(alphabet) transformer = mydataset.resizeNormalize((imgW, imgH), is_test=True) def recognize_downline(img, crnn_model=model): img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# criterion = CTCLoss(reduction='sum',zero_infinity=True) criterion = CTCLoss() best_acc = 0.9 # custom weights initialization called on crnn def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) crnn = crnn.CRNN(config.imgH, config.nc, config.nclass, config.nh) if config.pretrained_model != '' and os.path.exists(config.pretrained_model): print('loading pretrained model from %s' % config.pretrained_model) crnn.load_state_dict(torch.load(config.pretrained_model)) else: crnn.apply(weights_init) print(crnn) # image = torch.FloatTensor(config.batchSize, 3, config.imgH, config.imgH) # text = torch.IntTensor(config.batchSize * 5) # length = torch.IntTensor(config.batchSize) device = torch.device('cpu') if config.cuda: crnn.cuda() # crnn = torch.nn.DataParallel(crnn, device_ids=range(opt.ngpu))
nc = 1 converter = utils.strLabelConverter(alphabet) criterion = CTCLoss() # custom weights initialization called on crnn def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) crnn = crnn.CRNN(opt.imgH, nc, nclass, nh, ngpu) crnn.apply(weights_init) if opt.crnn != '': print('loading pretrained model from %s' % opt.crnn) crnn.load_state_dict(torch.load(opt.crnn)) print(crnn) image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH) text = torch.IntTensor(opt.batchSize * 5) length = torch.IntTensor(opt.batchSize) if opt.cuda: crnn.cuda() image = image.cuda() criterion = criterion.cuda()
def evaluate(): with tf.Graph().as_default() as g: model = crnn.CRNN('test') model._build_model() #load training data test_data, test_label, valid_data, valid_label, Valid_label, Test_label, pernums_test, pernums_valid = load_data( ) # test, valid segment size test_size = test_data.shape[0] valid_size = valid_data.shape[0] # for hole sentence label test_label = dense_to_one_hot(test_label, 4) valid_label = dense_to_one_hot(valid_label, 4) # for segement label Test_label = dense_to_one_hot(Test_label, 4) Valid_label = dense_to_one_hot(Valid_label, 4) # for sgement type : 1 :for hole sentence, 2: for sgement sentecne tnum = pernums_test.shape[0] vnum = pernums_valid.shape[0] pred_test_uw = np.empty((tnum, 4), dtype=np.float32) pred_test_w = np.empty((tnum, 4), dtype=np.float32) valid_iter = divmod((valid_size), FLAGS.valid_batch_size)[0] test_iter = divmod((test_size), FLAGS.test_batch_size)[0] y_pred_valid = np.empty((valid_size, 4), dtype=np.float32) y_pred_test = np.empty((test_size, 4), dtype=np.float32) y_test = np.empty((tnum, 4), dtype=np.float32) y_valid = np.empty((vnum, 4), dtype=np.float32) cross_entropy = tf.nn.softmax_cross_entropy_with_logits( labels=model.labels, logits=model.logits) variable_averages = tf.train.ExponentialMovingAverage(FLAGS.momentum) variable_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variable_to_restore) #saver = tf.train.Saver() flag = False best_valid_uw = 0 best_valid_w = 0 for i in range(5): with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) global_step = ckpt.model_checkpoint_path.split( '/')[-1].split('-')[-1] #for validation data index = 0 cost_valid = 0 if (valid_size < FLAGS.valid_batch_size): validate_feed = { model.inputs: valid_data, model.labels: Valid_label } y_pred_valid, loss = sess.run( [model.softmax, cross_entropy], feed_dict=validate_feed) cost_valid = cost_valid + np.sum(loss) else: for v in range(valid_iter): v_begin = v * FLAGS.valid_batch_size v_end = (v + 1) * FLAGS.valid_batch_size if (v == valid_iter - 1): if (v_end < valid_size): v_end = valid_size validate_feed = { model.inputs: valid_data[v_begin:v_end], model.labels: Valid_label[v_begin:v_end] } loss, y_pred_valid[v_begin:v_end, :] = sess.run( [cross_entropy, model.softmax], feed_dict=validate_feed) cost_valid = cost_valid + np.sum(loss) cost_valid = cost_valid / valid_size print(y_pred_valid) valid_acc_uw = recall(np.argmax(Valid_label, 1), np.argmax(y_pred_valid, 1), average='macro') valid_acc_w = recall(np.argmax(Valid_label, 1), np.argmax(y_pred_valid, 1), average='weighted') valid_conf = confusion(np.argmax(Valid_label, 1), np.argmax(y_pred_valid, 1)) print('----------segment metrics---------------') print("Best valid_UA: %3.4g" % best_valid_uw) print("Best valid_WA: %3.4g" % best_valid_w) print('Valid Confusion Matrix:["ang","sad","hap","neu"]') print(valid_conf) print('----------segment metrics---------------') for s in range(vnum): y_valid[s, :] = np.max( y_pred_valid[index:index + pernums_valid[s], :], 0) index += pernums_valid[s] valid_acc_uw = recall(np.argmax(valid_label, 1), np.argmax(y_valid, 1), average='macro') valid_acc_w = recall(np.argmax(valid_label, 1), np.argmax(y_valid, 1), average='weighted') valid_conf = confusion(np.argmax(valid_label, 1), np.argmax(y_valid, 1)) #for test set index = 0 for t in range(test_iter): t_begin = t * FLAGS.test_batch_size t_end = (t + 1) * FLAGS.test_batch_size if (t == test_iter - 1): if (t_end < test_size): t_end = test_size #print t_begin,t_end,t,test_iter test_feed = { model.inputs: test_data[t_begin:t_end], model.labels: Test_label[t_begin:t_end] } y_pred_test[t_begin:t_end, :] = sess.run( model.logits, feed_dict=test_feed) for s in range(tnum): y_test[s, :] = np.max( y_pred_test[index:index + pernums_test[s], :], 0) index = index + pernums_test[s] if valid_acc_uw > best_valid_uw: best_valid_uw = valid_acc_uw pred_test_uw = y_test test_acc_uw = recall(np.argmax(test_label, 1), np.argmax(y_test, 1), average='macro') test_conf = confusion(np.argmax(test_label, 1), np.argmax(y_test, 1)) confusion_uw = test_conf flag = True if valid_acc_w > best_valid_w: best_valid_w = valid_acc_w pred_test_w = y_test test_acc_w = recall(np.argmax(test_label, 1), np.argmax(y_test, 1), average='weighted') test_conf = confusion(np.argmax(test_label, 1), np.argmax(y_test, 1)) confusion_w = test_conf flag = True print( "*****************************************************************" ) print(global_step) print("Epoch: %s" % global_step) print("Valid cost: %2.3g" % cost_valid) print("Valid_UA: %3.4g" % valid_acc_uw) print("Valid_WA: %3.4g" % valid_acc_w) print("Best valid_UA: %3.4g" % best_valid_uw) print("Best valid_WA: %3.4g" % best_valid_w) print('Valid Confusion Matrix:["ang","sad","hap","neu"]') print(valid_conf) print("Test_UA: %3.4g" % test_acc_uw) print("Test_WA: %3.4g" % test_acc_w) print('Test Confusion Matrix:["ang","sad","hap","neu"]') print(confusion_uw) print( "*****************************************************************" ) if (flag): f = open(FLAGS.pred_name, 'wb') pickle.dump(( best_valid_uw, best_valid_w, pred_test_w, test_acc_w, confusion_w, pred_test_uw, test_acc_uw, confusion_uw, ), f) f.close() flag = False
results = [] for word in preds: if raw: results.append(''.join([alphabet[int(i)] for i in word])) else: result = [] l = len(word) for i in range(l): if word[i] != 0 and (not (i > 0 and word[i] == word[i - 1])): # Hack to decode label as well result.append(alphabet[int(word[i])]) results.append(''.join(result)) return results if __name__ == '__main__': model_path = './output/netCRNN_29.pth' # model_path = './output/model.pkl' img_path = '/data1/zj/dataset/test/0_song5_0_3.jpg' alphabet = Alphabet.CHINESECHAR_LETTERS_DIGIT_SYMBOLS # 初始化网络 net = crnn.CRNN(32, 1, len(alphabet), 256) model = Pytorch_model(model_path, alphabet=alphabet, net=net, img_shape=[200, 32], img_channel=1) # 执行预测 img = cv2.imread(img_path, 1) result = model.predict(img) # 可视化 plt.title(result, fontproperties=myfont, fontsize=18) plt.imshow(img, cmap='gray_r') plt.show()