def data_loader(): # train train_dataset = dataset.lmdbDataset(root=args.trainroot, transform=dataset.customResize()) assert train_dataset if not params.random_sample: sampler = dataset.randomSequentialSampler(train_dataset, params.batchSize) else: sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=params.batchSize, \ shuffle=True, sampler=sampler, num_workers=int(params.workers), \ collate_fn=dataset.alignCollate(imgH=params.imgH, imgW=params.imgW)) # val val_dataset = dataset.lmdbDataset(root=args.valroot, transform=dataset.resizeNormalize( (params.imgW, params.imgH))) assert val_dataset val_loader = torch.utils.data.DataLoader(val_dataset, shuffle=True, batch_size=params.batchSize, num_workers=int(params.workers)) return train_loader, val_loader
def data_loader(): # train train_transform = ImgAugTransform() train_datasets = [] for train_root in params.train_roots: train_dataset = dataset.lmdbDataset(root=train_root, transform=train_transform) train_datasets.append(train_dataset) train_dataset = torch.utils.data.ConcatDataset(train_datasets) assert train_dataset if not params.random_sample: sampler = dataset.randomSequentialSampler(train_dataset, params.batchSize) else: sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=params.batchSize, \ shuffle=True, sampler=sampler, num_workers=int(params.workers), \ collate_fn=dataset.alignCollate(imgH=params.imgH, imgW=params.imgW, keep_ratio=params.keep_ratio)) # val val_dataset_list = [] for val_root in params.val_roots: val_dataset = dataset.lmdbDataset(root=val_root, transform=dataset.processing_image((params.imgW, params.imgH))) val_dataset_list.append(val_dataset) val_dataset = torch.utils.data.ConcatDataset(val_dataset_list) assert val_dataset val_loader = torch.utils.data.DataLoader(val_dataset, shuffle=True, batch_size=params.batchSize, num_workers=int(params.workers)) return train_loader, val_loader, train_dataset, val_dataset
def data_loader(): # train transform = torchvision.transforms.Compose( [ImgAugTransform(), GridDistortion(prob=0.65)]) train_dataset = dataset.lmdbDataset(root=args.trainroot, transform=transform) assert train_dataset if not params.random_sample: sampler = dataset.randomSequentialSampler(train_dataset, params.batchSize) else: sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=params.batchSize, \ shuffle=True, sampler=sampler, num_workers=int(params.workers), \ collate_fn=dataset.alignCollate(imgH=params.imgH, imgW=params.imgW, keep_ratio=params.keep_ratio)) # val transform = torchvision.transforms.Compose( [dataset.resizeNormalize((params.imgW, params.imgH))]) val_dataset = dataset.lmdbDataset(root=args.valroot, transform=transform) assert val_dataset val_loader = torch.utils.data.DataLoader(val_dataset, shuffle=True, batch_size=params.batchSize, num_workers=int(params.workers)) return train_loader, val_loader
def main(): resnet_crnn = ResNetCRNN(rc_params.imgH, 1, len(rc_params.alphabet) + 1, rc_params.nh, resnet_type=rc_params.resnet_type, feat_size=rc_params.feat_size) resnet_crnn = torch.nn.DataParallel(resnet_crnn) state_dict = torch.load( './work_dirs/resnet18_rcnn_sgd_imgh128_rgb_512x1x16_lr_0.00100_batchSize_8_time_0319110013_/crnn_Rec_done_epoch_7.pth' ) resnet_crnn.load_state_dict(state_dict) test_dataset = dataset.lmdbDataset(root='to_lmdb/test_index', rgb=True) converter = utils.strLabelConverter(rc_params.alphabet) resnet_crnn.eval() resnet_crnn.cuda() data_loader = torch.utils.data.DataLoader( test_dataset, shuffle=False, batch_size=1, num_workers=int(rc_params.workers), collate_fn=alignCollate(imgH=rc_params.imgH, imgW=rc_params.imgW, keep_ratio=rc_params.keep_ratio, rgb=True)) val_iter = iter(data_loader) max_iter = len(data_loader) record_dir = 'test_out/test_out.txt' r = 1 f = open(record_dir, "a") image = torch.FloatTensor(rc_params.batchSize, 3, rc_params.imgH, rc_params.imgH) prog_bar = mmcv.ProgressBar(max_iter) for i in range(max_iter): data = val_iter.next() i += 1 cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) # image = cpu_images.cuda() with torch.no_grad(): preds = resnet_crnn(image) preds_size = torch.IntTensor([preds.size(0)] * batch_size) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) if not isinstance(sim_preds, list): sim_preds = [sim_preds] for pred in sim_preds: f.write(str(r).zfill(6) + ".jpg " + pred + "\n") r += 1 prog_bar.update() print("") f.close()
def dataloader(self): # train_loader train_dataset = dataset.lmdbDataset( root=self.trainroot, label_type=cfg.label_type, channel=cfg.nc) # zyk: cfg.nc 为图通道 # zyk: 确定下图的通道输出是否为nc assert cfg.nc == np.asarray(train_dataset[0][0]).shape[2] collate = dataset.AlignCollate( imgH=cfg.imgH, imgW=cfg.imgW, keep_ratio=cfg.keep_ratio, crop=cfg.crop) train_loader = DataLoader(train_dataset, batch_size=cfg.batchSize, shuffle=True, num_workers=int(cfg.workers), collate_fn=collate) # test_loader test_dataset = dataset.lmdbDataset( root=self.testroot, label_type=cfg.label_type, transform=dataset.ResizeNormalize((cfg.imgW, cfg.imgH), crop=cfg.crop)) test_loader = DataLoader(test_dataset, shuffle=False, batch_size=2*cfg.batchSize, num_workers=int(cfg.workers)) return train_loader, test_loader
def initValDataSets(): index = 0 list_name = [] if os.path.exists(val_path + "/data.mdb"): one_dataset = dataset.lmdbDataset(root=val_path, transform=dataset.resizeNormalize( (100, 32))) val_data = { "dir": val_path, "dataset": one_dataset, # "loader": one_loader, "index": index } val_data_list.append(val_data) list_name.append(val_path) else: fs = os.listdir(val_path) for one in fs: root_path = val_path + "/" + one + "/val" if not os.path.exists(root_path) or not os.path.exists( val_path + "/" + one + "/val/data.mdb"): if os.path.exists(val_path + "/" + one + "/data.mdb"): root_path = val_path + "/" + one else: continue # print("添加校验数据集:{}".format(root_path)) one_dataset = dataset.lmdbDataset( root=root_path, transform=dataset.resizeNormalize((100, 32))) # one_loader = torch.utils.data.DataLoader(one_dataset, shuffle=True, batch_size=opt.batchSize, # num_workers=int(opt.workers)) val_data = { "dir": one, "dataset": one_dataset, # "loader": one_loader, "index": index } index += 1 val_data_list.append(val_data) list_name.append(one) print_msg("加载了{}个验证集:{}".format(len(list_name), list_name))
def data_loader(): # val val_dataset = dataset.lmdbDataset(root=args.valroot, transform=dataset.resizeNormalize( (params.imgW, params.imgH))) assert val_dataset val_loader = torch.utils.data.DataLoader(val_dataset, shuffle=True, batch_size=params.batchSize, num_workers=int(params.workers)) return val_loader
def prepare_dataloader(): transform = transforms.Compose([ transforms.Resize([args.height, args.width]), transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, )) ]) # 让内置的 cuDNN 的 auto-tuner 自动寻找最适合当前配置的高效算法 # 适用与输入大小不怎么变化的情况,变化巨大的情况可能会降低效率 torch.backends.cudnn.benchmark = True train_set = dataset.lmdbDataset(root=args.train_root, transform=transform) trainloader = torch.utils.data.DataLoader(dataset=train_set, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) valid_set = dataset.lmdbDataset(root=args.valid_root, transform=transform) validloader = torch.utils.data.DataLoader(dataset=valid_set, batch_size=args.batch_size, num_workers=args.num_workers) return trainloader, validloader
def data_loader(): # train train_dataset = dataset.lmdbDataset(root=args.trainroot, transform=Compose([ Rotate(p=0.5, limit=(-15, 15), border_mode=cv2.BORDER_CONSTANT, value=255), CustomPiecewiseAffineTransform(), ])) assert train_dataset if not params.random_sample: sampler = dataset.randomSequentialSampler(train_dataset, params.batchSize) else: sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=params.batchSize, \ shuffle=True, sampler=sampler, num_workers=int(params.workers), \ collate_fn=dataset.alignCollate(imgH=params.imgH, imgW=params.imgW, keep_ratio=params.keep_ratio)) # val val_dataset = dataset.lmdbDataset(root=args.valroot, transform=dataset.resizeNormalize((params.imgW, params.imgH))) assert val_dataset val_loader = torch.utils.data.DataLoader(val_dataset, shuffle=True, batch_size=params.batchSize, num_workers=int(params.workers)) return train_loader, val_loader
def initTrainDataLoader(): print_msg("开始加载训练集lmdb:{}".format(dataset_dir)) train_dataset = dataset.lmdbDataset(root=dataset_dir) assert train_dataset print_msg("加载训练集lmdb 成功") if opt.random_sample: sampler = dataset.randomSequentialSampler(train_dataset, opt.batchSize) else: sampler = None loader = torch.utils.data.DataLoader( train_dataset, batch_size=opt.batchSize, shuffle=True, sampler=sampler, num_workers=int(opt.workers), collate_fn=dataset.alignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio=opt.keep_ratio)) return loader
def convert(originPath, outputPath): args = [0] * 6 originDataset = dataset.lmdbDataset(originPath, 'abc', args) print('Origin dataset has %d samples' % len(originDataset)) labelStrList = [] for i in range(len(originDataset)): label = originDataset.getLabel(i + 1) labelStrList.append(label) if i % 10000 == 0: print(i) lengthList = [len(s) for s in labelStrList] items = zip(lengthList, range(len(labelStrList))) items.sort(key=lambda item: item[0]) env = lmdb.open(outputPath, map_size=1099511627776) cnt = 1 cache = {} nSamples = len(items) for i in range(nSamples): imageKey = 'image-%09d' % cnt labelKey = 'label-%09d' % cnt origin_i = items[i][1] img, label = originDataset[origin_i + 1] cache[labelKey] = label cache[imageKey] = img if cnt % 1000 == 0 or cnt == nSamples: writeCache(env, cache) cache = {} print('Written %d / %d' % (cnt, nSamples)) cnt += 1 nSamples = cnt - 1 cache['num-samples'] = str(nSamples) writeCache(env, cache) print('Convert dataset with %d samples' % nSamples)
def infer(args: Namespace): if not args.src.endswith('/test/'): logging.warning("LMDB dataset here is expected to be a test set.") test_dataset = dataset.lmdbDataset(root=args.src, transform=dataset.ResizeNormalize( (cfg.imgW, cfg.imgH), crop=cfg.crop)) test_loader = DataLoader(test_dataset, shuffle=False, batch_size=2 * cfg.batchSize, num_workers=int(cfg.workers)) device = torch.device("cuda") codec = dataset.Codec(cfg.alphabet) nClass = len(cfg.alphabet) + 1 model = crnn.CRNN(cfg.imgH, cfg.nc, nClass, cfg.nh).to(device) print(model) para = torch.load( os.path.join("/home/chuan/captcha/crnn/weights", args.para)) model.load_state_dict(para) utils.infer(model, test_loader, device, codec) return
if __name__ == '__main__': manualSeed = random.randint(1, 10000) # fix seed random.seed(manualSeed) np.random.seed(manualSeed) torch.manual_seed(manualSeed) cudnn.benchmark = True log_dir = get_log_dir() logger = get_logger(log_dir, params.name, params.name + '_info.log') logger.info(opt) # store model path if not os.path.exists('./expr'): os.mkdir('./expr') # read train set train_dataset = dataset.lmdbDataset(root=opt.trainroot, rand_hcrop=params.with_crop) assert train_dataset if params.random_sample: sampler = dataset.randomSequentialSampler(train_dataset, params.batchSize) else: sampler = None os.environ["CUDA_VISIBLE_DEVICES"] = opt.GPU_ID # images will be resize to 32*160 train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=params.batchSize, shuffle=False, sampler=sampler, num_workers=int(params.workers), collate_fn=dataset.alignCollate(imgH=params.imgH,
Resume_flag = False check_point_path = '' def resumeModel(crnn, optimizer): checkpoint = torch.load(check_point_path) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) epoch = checkpoint['epoch'] loss = checkpoint['loss'] #model.train() # First Load dataset from lmdb file. train_dataset = lmdbDataset(root=cfg.lmdb_train_path) assert train_dataset train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=cfg.batch_size, shuffle=True, sampler=None, num_workers=4, collate_fn=alignCollate( imgH=cfg.imgH, imgW=cfg.imgW)) # Usage: train_iter = iter(train_loader) # images, texts = train_iter.next() def init_weights(m): classname = m.__class__.__name__ if classname.find('Conv') != -1:
label = np.zeros(length) for i, char in enumerate(text): index = characters.find(char) if index == -1: index = characters.find(u' ') if i < length: label[i] = index return label # 导入数据 if random_sample: sampler = dataset.randomSequentialSampler(train_dataset, batchSize) else: sampler = None train_dataset = dataset.lmdbDataset(root=trainroot, target_transform=one_hot) # print(len(train_dataset)) test_dataset = dataset.lmdbDataset(root=valroot, transform=dataset.resizeNormalize( (imgW, imgH)), target_transform=one_hot) # 生成训练用数据 train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batchSize, shuffle=True, sampler=sampler, num_workers=int(workers), collate_fn=dataset.alignCollate( imgH=imgH,
# predictionFile = '../../../../dataset_formal/classify_data/crnnData/result/result_crnn_with_1800ctpn_continue.csv' predictionFile = '../../../../dataset_formal/classify_data/crnnData/result/result_crnn_tight_ctpn.csv' df.to_csv(predictionFile, index=False) print("\nover") # raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:666] #raw_preds是长的那种....,类似 “XG--------7-8-2-3--3-8-3-8” if __name__ == "__main__": # test_lmdb_path = "../../../../dataset_formal/classify_data/crnnData/test_byCTPN_MDB" # test_lmdb_path = "../../../../dataset_formal/classify_data/crnnData/test_9mianzhi_MDB" test_lmdb_path = "../../../../dataset_formal/classify_data/crnnData/test_tight_MDB" test_dataset = dataset.lmdbDataset(root=test_lmdb_path, transform=dataset.resizeNormalize( (100, 32)), type="test") nclass = len(alphabet) + 1 nc = 1 converter = utils.strLabelConverter(alphabet) crnn = crnn.CRNN(32, nc, nclass, 256) image = torch.FloatTensor(64, 3, 32, 32) text = torch.IntTensor(64 * 5) length = torch.IntTensor(64) if use_cuda: crnn.cuda() crnn = torch.nn.DataParallel(crnn, device_ids=range(1))
if __name__ == '__main__': args = init_args() manualSeed = random.randint(1, 10000) # fix seed random.seed(manualSeed) np.random.seed(manualSeed) torch.manual_seed(manualSeed) cudnn.benchmark = True # store model path if not os.path.exists('model'): os.mkdir('model') # read train set train_dataset = dataset.lmdbDataset(root=args.trainpath) assert train_dataset if not params.random_sample: sampler = dataset.randomSequentialSampler(train_dataset, params.batchSize) else: sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=params.batchSize, shuffle=True, sampler=sampler, num_workers=int(params.workers), collate_fn=dataset.alignCollate(imgH=params.imgH, imgW=params.imgW,
torch.cuda.set_device(gpu_ids[0]) device = torch.device('cuda:{}'.format( gpu_ids[0])) if gpu_ids else torch.device('cpu') netCRNN = netCRNN.to(gpu_ids[0]) netCRNN = torch.nn.DataParallel(netCRNN, device_ids=gpu_ids) image = image.to(gpu_ids[0]) image = Variable(image) print('loading pretrained netCRNN from %s' % model_path) # if isinstance(netCRNN, torch.nn.DataParallel): # netCRNN = netCRNN.module netCRNN.load_state_dict(torch.load(model_path, map_location=str(device))) # Load the test data transformer = dataset.resizeNormalize((opt.imgW, opt.imgH)) test_dataset = dataset.lmdbDataset(root=opt.valRoot, transform=transformer) test_data_loader = torch.utils.data.DataLoader(test_dataset, shuffle=True, batch_size=opt.batchSize, num_workers=int(opt.workers)) # Set the label converter converter = utils.strLabelConverter(alphabet) # =============================================== # Helper Function # =============================================== def EditDistance(strA, strB): lenA = len(strA) lenB = len(strB)
parser.add_argument('--adadelta', action='store_true', help='Whether to use adadelta (default is rmsprop)', default=False) parser.add_argument('--keep_ratio', action='store_true', help='whether to keep ratio for image resize', default=True) parser.add_argument('--random_sample', action='store_true', help='whether to sample the dataset with random sampler', default=True) opt = parser.parse_args() print(opt) val_dataset = dataset.lmdbDataset(root=opt.valroot) model_path = '../expr1/netCRNN_79_2679.pth' if torch.cuda.is_available() and not opt.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) converter = utils.strLabelConverter(opt.alphabet) image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH) text = torch.IntTensor(opt.batchSize * 5) length = torch.IntTensor(opt.batchSize) criterion = CTCLoss() #print(alphabet)
print(opt) if not os.path.exists(opt.expr_dir): os.makedirs(opt.expr_dir) random.seed(opt.manualSeed) np.random.seed(opt.manualSeed) torch.manual_seed(opt.manualSeed) cudnn.benchmark = True if torch.cuda.is_available() and not opt.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) train_dataset = dataset.lmdbDataset(root=opt.trainRoot, type="train") assert train_dataset if not opt.random_sample: sampler = dataset.randomSequentialSampler(train_dataset, opt.batchSize) else: sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=opt.batchSize, # shuffle=True, sampler=sampler, shuffle=True, #xzy num_workers=int(opt.workers), collate_fn=dataset.alignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio=opt.keep_ratio)) test_dataset = dataset.lmdbDataset(root=opt.valRoot,
os.system('mkdir {0}'.format(opt.experiment)) opt.manualSeed = random.randint(1, 200000) # fix seed print("Random Seed: ", opt.manualSeed) random.seed(opt.manualSeed) np.random.seed(opt.manualSeed) torch.manual_seed(opt.manualSeed) cudnn.benchmark = True if torch.cuda.is_available() and not opt.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) train_dataset = dataset.lmdbDataset(root=opt.trainroot) assert train_dataset if opt.random_sample: sampler = dataset.randomSequentialSampler(train_dataset, opt.batchSize) else: sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=opt.batchSize, shuffle=False, sampler=sampler, num_workers=int(opt.workers), collate_fn=dataset.alignCollate( imgH=opt.imgH, imgW=opt.imgW, keep_ratio=opt.keep_ratio)) # test_dataset = dataset.lmdbDataset(
filePath = osj(path,folder) for _rt,_dirs,_files in os.walk(filePath): for filename in _files: ret.append(osj(filePath,filename)) return ret model_path = './output/netCRNN_L3.pth' #ConvertToUTF8(val_fileList) opt_valroot = "./data/val" opt_batchSize = 20 opt_workers = 2 test_dataset = dataset.lmdbDataset( root=opt_valroot, transform=dataset.resizeNormalize((800, 32))) #test_dataset = dataset.lmdbDataset(root=opt_valroot) print(len(test_dataset)) converter = utils.strLabelConverter(keys.alphabet) criterion = CTCLoss() crnn = crnn.CRNN(32, 1, 352, 256) if torch.cuda.is_available(): crnn = crnn.cuda() crnn = torch.nn.DataParallel(crnn, device_ids=range(1)) print('loading pretrained model from %s' % model_path) crnn.load_state_dict(torch.load(model_path)) image = torch.FloatTensor(opt_batchSize, 3, 32, 32) text = torch.IntTensor(opt_batchSize * 5)
state_dict = torch.load(find_new_file(model_dir)) # state_dict = torch.load('./expr/model/model_Rec_done_670.pth') new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k[7:] new_state_dict[name] = v model.load_state_dict(new_state_dict) print('load the model %s' % find_new_file(model_dir)) model_id = re.findall(r'\d+', find_new_file(model_dir)) model_id = int(model_id[0]) return model, model_id test_dataset = dataset.lmdbDataset(root=opt.valroot, transform=dataset.resizeNormalize( (imgW, imgH))) test_loader = torch.utils.data.DataLoader(test_dataset, shuffle=False, sampler=None, batch_size=batchSize, num_workers=int(workers)) word2index, index2word, n_words = lang() nc = 1 input_size = 1024 hidden_size = 1024 encoder_layers = 2 decoder_layers = 1
os.system('mkdir {0}'.format(opt.experiment)) opt.manualSeed = random.randint(1, 10000) # fix seed print("Random Seed: ", opt.manualSeed) random.seed(opt.manualSeed) np.random.seed(opt.manualSeed) torch.manual_seed(opt.manualSeed) cudnn.benchmark = True if torch.cuda.is_available() and not opt.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) train_dataset = dataset.lmdbDataset(root=opt.trainroot) assert train_dataset if not opt.random_sample: sampler = dataset.randomSequentialSampler(train_dataset, opt.batchSize) else: sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=opt.batchSize, shuffle=True, sampler=sampler, num_workers=int(opt.workers), collate_fn=dataset.alignCollate( imgH=opt.imgH, imgW=opt.imgW, keep_ratio=opt.keep_ratio)) test_dataset = dataset.lmdbDataset(root=opt.valroot,
if opt.experiment is None: opt.experiment = 'samples' os.system('mkdir {0}'.format(opt.experiment)) opt.manualSeed = random.randint(1, 10000) # fix seed print("Random Seed: ", opt.manualSeed) random.seed(opt.manualSeed) np.random.seed(opt.manualSeed) torch.manual_seed(opt.manualSeed) cudnn.benchmark = True if torch.cuda.is_available() and not opt.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") train_dataset = dataset.lmdbDataset(root=opt.trainroot) assert train_dataset if not opt.random_sample: sampler = dataset.randomSequentialSampler(train_dataset, opt.batchSize) else: sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=opt.batchSize, shuffle=True, sampler=sampler, num_workers=int(opt.workers), collate_fn=dataset.alignCollate(imgH=opt.imgH, keep_ratio=opt.keep_ratio)) test_dataset = dataset.lmdbDataset( root=opt.valroot, transform=dataset.resizeNormalize((100, 32))) ngpu = int(opt.ngpu) nh = int(opt.nh)
import dataset import models.VGG_BiLSTM_CTC as crnn import models.ResNet_BiLSTM_CTC as crnn valRoot = 'data' model_path = 'crnn.pth' alphabet = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ\';:.-! )"$\\#%,@&/?([]{}+-=*^|' # can't pickle Environment objects => workers = 0 workers = 2 imgH = 32 nclass = len(alphabet) + 1 nc = 1 test_dataset = dataset.lmdbDataset(root=valRoot, transform=dataset.resizeNormalize( (100, 32))) converter = utils.strLabelConverter(alphabet) model = crnn.CRNN(imgH, nc, nclass, 256) model = torch.nn.DataParallel(model).cuda() model.load_state_dict(torch.load(model_path)) # random initial image = torch.FloatTensor(1, 3, 3, 4) text = torch.IntTensor(5) length = torch.IntTensor(1) image = Variable(image) text = Variable(text)
# tensorboardX writer = SummaryWriter(os.path.join(log_dir, 'tb_logs')) # store model path if not os.path.exists('./expr'): os.mkdir('./expr') os.environ["CUDA_VISIBLE_DEVICES"] = opt.GPU_ID # read train set tr_t = transforms.Compose([ transforms.ColorJitter(brightness=0.1, contrast=0.3, saturation=0.1, hue=0.1), ]) train_dataset = dataset.lmdbDataset(root=opt.trainroot, rgb=params.rgb, transform=tr_t, rand_hcrop=True) assert train_dataset if params.random_sample: sampler = dataset.randomSequentialSampler(train_dataset, params.batchSize) else: sampler = None # images will be resize to 32*160 train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=params.batchSize, shuffle=False, sampler=sampler, num_workers=int(params.workers),
import loader import dataset import time import multiprocessing import queue import threading from mylog import * import torch import signal, os, sys from buffer import Buffer #TODO:这个需要手动定义,有点丑陋 ds = dataset.lmdbDataset('/data/share/ImageNet/ILSVRC-train.lmdb', True) class Loader(object): @staticmethod def process(id_queue, resp_queue, buf_name): torch.set_num_threads(1) buf = Buffer(buf_name, 602116) while True: data_id, data_idx = id_queue.get(True) data = ds[data_id] buf.write_data(data_idx, data) logging.critical("loader write data %d in %d", data_id, data_idx) resp_queue.put((data_id, data_idx)) @staticmethod #TODO: hard code workers def loading(id_queue, resp_queue, buf_name, workers=8, s=0):
def val(net, criterion, max_iter=100): print('Start val') # read test set test_dataset = dataset.lmdbDataset(root=params.valroot, transform=dataset.resizeNormalize( (params.imgW, params.imgH))) for p in crnn.parameters(): p.requires_grad = False net.eval() try: data_loader = torch.utils.data.DataLoader(test_dataset, shuffle=True, batch_size=params.batchSize, num_workers=int( params.workers)) val_iter = iter(data_loader) i = 0 n_correct = 0 loss_avg = utils.averager() max_iter = min(max_iter, len(data_loader)) for i in range(max_iter): data = val_iter.next() i += 1 cpu_images, cpu_texts = data batch_size = cpu_images.size(0) utils.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) utils.loadData(text, t) utils.loadData(length, l) preds = crnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size loss_avg.add(cost) _, preds = preds.max(2) # preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) list_1 = [] for i in cpu_texts: list_1.append(i.decode('utf-8', 'strict')) for pred, target in zip(sim_preds, list_1): if pred == target: n_correct += 1 raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:params.n_test_disp] for raw_pred, pred, gt in zip(raw_preds, sim_preds, list_1): print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt)) # print(n_correct) # print(max_iter * params.batchSize) accuracy = n_correct / float(max_iter * params.batchSize) print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy)) except: pass
print(opt) if not os.path.exists(opt.expr_dir): os.makedirs(opt.expr_dir) random.seed(opt.manualSeed) np.random.seed(opt.manualSeed) torch.manual_seed(opt.manualSeed) cudnn.benchmark = True if torch.cuda.is_available(): torch.cuda.set_device(opt.gpu) print('device:', torch.cuda.current_device()) train_dataset = dataset.lmdbDataset(root=opt.trainRoot) assert train_dataset dataset_size = len(train_dataset) indices = list(range(dataset_size)) split = 200 train_idx, valid_idx = indices[split:], indices[:split] train_sampler = SubsetRandomSampler(train_idx) valid_sampler = SubsetRandomSampler(valid_idx) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=opt.batchSize, shuffle=False, sampler=train_sampler, num_workers=int(opt.workers),
print('%s saved' % save_name) if __name__ == '__main__': manualSeed = random.randint(1, 10000) # fix seed random.seed(manualSeed) np.random.seed(manualSeed) torch.manual_seed(manualSeed) cudnn.benchmark = True # store model path if not os.path.exists(params.experiment): os.mkdir(params.experiment) # read train set train_dataset = dataset.lmdbDataset(root=params.trainroot) assert train_dataset if not params.random_sample: sampler = dataset.randomSequentialSampler(train_dataset, params.batchSize) else: sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=params.batchSize, shuffle=True, sampler=sampler, num_workers=int(params.workers), collate_fn=dataset.alignCollate(imgH=params.imgH, imgW=params.imgW,