def create_tb_tree(self) : for l in self.get_abs_tree() : if not exist(l) : self.logger.info("\ncreating : \n%s" % l) mkdir(l) else : self.logger.warning("\ndirectory : \n%s\nalready exists" % l)
def get_celebrity(celebrity, path): idx = 0 file_utils.mkdir(path) for refer, photo_url in celebrity.photos(): name = os.path.basename(photo_url) full_path = os.path.join(path, name) if os.path.exists(full_path): print('pic {} exist skip'.format(name)) continue # print("{}: saving {}".format(idx, name)) headers = { "Referer": refer, "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.104 Safari/537.36" } threadPoolExecutor.submit(file_utils.save_from_url, url=photo_url, headers=headers, name=full_path, index=idx) # file_utils.save_from_url(photo_url, headers, path + '/' + name) idx += 1 print("Finish parsing celebrity pages, all file will save to {}".format( path))
def generate_top_category_files(column_name): file_utils.mkdir("top_category_files") rows = file_utils.read_csv('unspsc_codes_v3.csv') tcs = {} for row in rows: if row[column_name] not in tcs: tcs[row[column_name]] = [] tcs[row[column_name]].append(row) for tc in tcs: filename = "top_category_files/" + tc + ".csv" print("Saving " + filename) file_utils.save_csv(filename, tcs[tc])
def __mk_tb_files(self,check=False) : """ calling this method suppose that create_tree was invoked earlier """ n = self.get_root_name()+'/' for a,r in zip(self.get_abs_tree(),self.get_rel_tree()) : if not exist(a) : self.logger.info("\ncreating : \n%s" % a) mkdir(a) key = string.replace(r,n,"",1) if key in self.get_tb_files() : for action_name, name in self.get_tb_files()[key].items() : if name == "$root_name$" : name = self.get_root_name() action = getattr(Tb_files,"mk_"+action_name,None) if callable(action) : value = action(self,check,a,r,name,key) if (check == "check_only") and not value : return False #NO UPDATE# if self.__tb_style == 'sys' : self.update_CMake(check) return True
def train_net(model=None, data_loader=None, optimizer=None, epoch=50, lr=0.001, lr_decay_step=10, DISPLPAY_INTERVAL=None, SAVE_INTERVAL=None, lr_decay_gamma=None, save=None): device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') criterion = nn.CrossEntropyLoss() total_step = len(data_loader) print('[LINE-TEXT-RECOGNITION TRAINING KICK-OFF]') for e in range(1, epoch + 1): model.train() start = time.time() if e % lr_decay_step == 0: adjust_lr(optimizer, lr_decay_gamma) lr *= lr_decay_gamma for k, (image, label) in enumerate(data_loader): image = image.to(device) label = label.to(device) y = model(image) loss = criterion(y, label) optimizer.zero_grad() loss.backward() optimizer.step() if (k + 1) % DISPLPAY_INTERVAL == 0: end = time.time() print('Epoch [{}/{}], Step [{}/{}], lr: {}, Loss: {:.4f}, TIME COST: {:.4f}' .format(e, epoch, k + 1, total_step, lr, loss.item(), end - start)) start = time.time() start = time.time() if e % SAVE_INTERVAL == 0: file_utils.mkdir(dir=[save]) print( 'save model ... -> {}'.format(save + 'res18' + '-' + str(e) + '.pth')) torch.save(model.state_dict(), save + 'res18' + '-' + repr(e) + '.pth')
def train(args): file_utils.mkdir(dir=[args.save_models]) if args.vis_train: file_utils.mkdir(dir=['./vis/']) ''' MAKE DATASET ''' datasets = webtoon_dataset(opt.DETECTION_TRAIN_IMAGE_PATH, opt.DETECTION_TRAIN_LABEL_PATH, args.train_size) train_data_loader = DataLoader(datasets, batch_size=args.batch, shuffle=True) ''' INITIALIZE MODEL, GPU, OPTIMIZER, and, LOSS ''' model = LTD() model = torch.nn.DataParallel(model).cuda() optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.lr_decay_gamma) criterion = LTD_LOSS() step_idx = 0 model.train() print('TEXT DETECTION TRAINING KICK-OFF]') ''' KICK OFF TRAINING PROCESS ''' for e in range(args.epoch): start = time.time() ''' LOAD MATERIAL FOR TRAINING FROM DATALOADER ''' for idx, (image, region_score_GT, affinity_score_GT, confidence) in enumerate(train_data_loader): ''' ADJUST LEARNING RATE PER 20000 ITERATIONS ''' if idx % args.lr_decay_step == 0 and idx != 0: step_idx += 1 #adjust_learning_rate(optimizer, args.lr, step_idx) ''' CONVERT NUMPY => TORCH ''' images = Variable(image.type(torch.FloatTensor)).cuda() region_score_GT = Variable(region_score_GT.type(torch.FloatTensor)).cuda() affinity_score_GT = Variable(affinity_score_GT.type(torch.FloatTensor)).cuda() confidence = Variable(confidence.type(torch.FloatTensor)).cuda() ''' PASS THE MODEL AND PREDICT SCORES ''' y, _ = model(images) score_region = y[:, :, :, 0].cuda() score_affinity = y[:, :, :, 1].cuda() if args.vis_train: if idx % 20 == 0 and idx != 0 and e % 2 == 0: for idx2 in range(args.batch): render_img1 = score_region[idx2].cpu().detach().numpy().copy() render_img2 = score_affinity[idx2].cpu().detach().numpy().copy() render_img = np.hstack((render_img1, render_img2)) render_img = imgproc.cvt2HeatmapImg(render_img) cv2.imwrite('./vis/e' + str(e) + '-s' + str(idx) + '-' + str(idx2) + '.jpg', render_img) ''' CALCULATE LOSS VALUE AND UPDATE WEIGHTS ''' optimizer.zero_grad() loss = criterion(region_score_GT, affinity_score_GT, score_region, score_affinity, confidence) loss.backward() optimizer.step() if idx % args.display_interval == 0: end = time.time() print('epoch: {}, iter:[{}/{}], lr:{}, loss: {:.8f}, Time Cost: {:.4f}s'.format(e, idx, len(train_data_loader), args.lr, loss.item(), end - start)) start = time.time() ''' SAVE MODEL PER 2 EPOCH ''' start = time.time() if e % args.save_interval == 0: print('save model ... :' + args.save_models) torch.save(model.module.state_dict(), args.save_models + 'ltd' + repr(e) + '.pth')
def train(args): file_utils.rm_all_dir(dir='./train/cache/') # clean cache dataset_name = "voc_2007_trainval" args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '20'] args.cfg_file = "cfgs/{}_ls.yml".format(args.backbone) if args.large_scale else "cfgs/{}.yml".format(args.backbone) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) np.random.seed(cfg.RNG_SEED) cfg.TRAIN.USE_FLIPPED = opt.BUBBLE_TRAIN_FLIP cfg.USE_GPU_NMS = opt.cuda _, _, _, name_lists = file_utils.get_files('./train/images/') file_utils.makeTrainIndex(names=name_lists, save_to='./train/trainval.txt') imdb, roidb, ratio_list, ratio_index = combined_roidb(dataset_name) train_size = len(roidb) print('TRAIN IMAGE NUM: {:d}'.format(len(roidb))) file_utils.mkdir(dir=[args.save_models]) sampler_batch = sampler(train_size, args.batch) dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch,\ imdb.num_classes, training=True) dataloader = DataLoader(dataset, batch_size=args.batch, sampler=sampler_batch, num_workers=args.num_workers) im_data = Variable(torch.FloatTensor(1).cuda()) im_info = Variable(torch.FloatTensor(1).cuda()) num_boxes = Variable(torch.LongTensor(1).cuda()) gt_boxes = Variable(torch.FloatTensor(1).cuda()) fasterRCNN = resnet(imdb.classes, 101, pretrained=True, class_agnostic=False) fasterRCNN.create_architecture() lr = args.lr params = [] for key, value in dict(fasterRCNN.named_parameters()).items(): if value.requires_grad: if 'bias' in key: params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1),\ 'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}] else: params += [{'params': [value], 'lr': lr, 'weight_decay': cfg.TRAIN.WEIGHT_DECAY}] if args.optimizer == "adam": lr = lr * 0.1 optimizer = torch.optim.Adam(params) elif args.optimizer == "sgd": optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM) if opt.cuda: cfg.CUDA = True fasterRCNN.cuda() if args.resume: load_name = os.path.join(args.save_models, 'Speech-Bubble-Detector-{}-{}-{}.pth'.format(args.backbone, args.resume_epoch, args.resume_batch)) checkpoint = torch.load(load_name) args.session = checkpoint['session'] args.start_epoch = checkpoint['epoch'] fasterRCNN.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr = optimizer.param_groups[0]['lr'] if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] if args.multi_gpus: fasterRCNN = nn.DataParallel(fasterRCNN) iters_per_epoch = int(train_size / args.batch) if args.use_tfboard: from tensorboardX import SummaryWriter logger = SummaryWriter("logs") args.max_epochs = args.epoch for epoch in range(1, args.epoch + 1): fasterRCNN.train() loss_temp = 0 start = time.time() if epoch % (args.lr_decay_step + 1) == 0: adjust_learning_rate(optimizer, args.lr_decay_gamma) lr *= args.lr_decay_gamma data_iter = iter(dataloader) for step in range(iters_per_epoch): data = next(data_iter) im_data.data.resize_(data[0].size()).copy_(data[0]) im_info.data.resize_(data[1].size()).copy_(data[1]) gt_boxes.data.resize_(data[2].size()).copy_(data[2]) num_boxes.data.resize_(data[3].size()).copy_(data[3]) fasterRCNN.zero_grad() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) loss = rpn_loss_cls.mean() + rpn_loss_box.mean() \ + RCNN_loss_cls.mean() + RCNN_loss_bbox.mean() loss_temp += loss.item() # backward optimizer.zero_grad() loss.backward() if args.backbone == "vgg16": clip_gradient(fasterRCNN, 10.) optimizer.step() if step % args.display_interval == 0: end = time.time() if step > 0: loss_temp /= (args.display_interval + 1) if args.multi_gpus: loss_rpn_cls = rpn_loss_cls.mean().item() loss_rpn_box = rpn_loss_box.mean().item() loss_rcnn_cls = RCNN_loss_cls.mean().item() loss_rcnn_box = RCNN_loss_bbox.mean().item() fg_cnt = torch.sum(rois_label.data.ne(0)) bg_cnt = rois_label.data.numel() - fg_cnt else: loss_rpn_cls = rpn_loss_cls.item() loss_rpn_box = rpn_loss_box.item() loss_rcnn_cls = RCNN_loss_cls.item() loss_rcnn_box = RCNN_loss_bbox.item() fg_cnt = torch.sum(rois_label.data.ne(0)) bg_cnt = rois_label.data.numel() - fg_cnt print("[epoch %d][iter %d/%d] loss: %.4f, lr: %.2e" \ % (epoch, step, iters_per_epoch, loss_temp, lr)) print("\t\t\tfg/bg=(%d/%d), time cost: %f" % (fg_cnt, bg_cnt, end - start)) print("\t\t\trpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box %.4f" \ % (loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box)) if args.use_tfboard: info = { 'loss': loss_temp, 'loss_rpn_cls': loss_rpn_cls, 'loss_rpn_box': loss_rpn_box, 'loss_rcnn_cls': loss_rcnn_cls, 'loss_rcnn_box': loss_rcnn_box } logger.add_scalars("logs_s_{}/losses".format(args.session), info, (epoch - 1) * iters_per_epoch + step) loss_temp = 0 start = time.time() save_name = args.save_models + args.backbone + '-' + str(epoch) + '.pth' save_checkpoint({ 'session': args.session, 'epoch': epoch + 1, 'model': fasterRCNN.module.state_dict() if args.multi_gpus else fasterRCNN.state_dict(), 'optimizer': optimizer.state_dict(), 'pooling_mode': cfg.POOLING_MODE, 'class_agnostic': False, }, save_name) print('save model: {}'.format(save_name)) if args.use_tfboard: logger.close()
def update_tb_tree(self) : for l in self.get_abs_tree() : if not exist(l) : self.logger.info("\ncreating : \n%s" % l) mkdir(l)
def createDataset(args): file_utils.rm_all_dir(dir=opt.RECOGNITION_TRAIN_IMAGE_PATH) file_utils.mkdir(dir=[opt.RECOGNITION_TRAIN_IMAGE_PATH]) with codecs.open('./labels-2213.txt', 'r', encoding='utf-8') as f: labels = f.read().strip('\ufeff').splitlines() FONTS_PATH = opt.RECOGNITIOON_FONT_PATH CSV_PATH = opt.RECOGNITION_CSV_PATH IMAGE_PATH = opt.RECOGNITION_TRAIN_IMAGE_PATH fonts = glob.glob(os.path.join(FONTS_PATH, '*.ttf')) labels_csv = codecs.open(os.path.join(CSV_PATH), 'w', encoding='utf-8') print("[THE NUMBER OF FONTS : {}]".format(len(fonts))) cnt = 0 prev_cnt = 0 total = opt.NUM_CLASSES * len(fonts) * opt.MORPH_NUM if args.salt_pepper: total *= 2 if args.chunk_noise: total *= 2 for k, character in enumerate(labels): if cnt - prev_cnt > 5000: prev_cnt = cnt sys.stdout.write( 'TRAINING IMAGE GENERATION: ({}/{}) \r'.format(cnt, total)) sys.stdout.flush() for f in fonts: for v in range(opt.MORPH_NUM): image, drawing = make_canvas(width=opt.RECOG_IMAGE_WIDTH, height=opt.RECOG_IMAGE_HEIGHT, color=opt.RECOG_BACKGROUND) font_type = determine_font_size(font=f, size=opt.RECOG_FONT_SIZE) w, h = determine_canvas_size(canvas=drawing, label=character, font=font_type) make_letter(canvas=drawing, label=character, width=w, height=h, color=opt.RECOG_FONT_COLOR, font=font_type) morph_templete = np.array(image.copy()) kernel = np.ones((2, 2), np.uint8) if v == 1: morph_templete = cv2.erode(morph_templete, kernel, iterations=1) else: morph_templete = cv2.dilate(morph_templete, kernel, iterations=1) copy = morph_templete.copy() cnt += 1 copy = Image.fromarray(np.array(copy)) file_utils.saveImage(save_to=IMAGE_PATH, img=np.array(copy), index1=cnt, ext='.png') file_utils.saveCSV(save_to=IMAGE_PATH, dst=labels_csv, index=cnt, label=character, num=k, ext='.png') if args.salt_pepper: cnt += 11 copy = generate_salt_and_pepper_noise(copy) file_utils.saveImage(save_to=IMAGE_PATH, img=copy, index1=cnt, ext='.png') file_utils.saveCSV(save_to=IMAGE_PATH, dst=labels_csv, index=cnt, label=character, num=k, ext='.png') if args.chunk_noise: copy = generate_chunk_noise(copy) file_utils.saveImage(save_to=IMAGE_PATH, img=copy, index1=cnt, ext='.png') file_utils.saveCSV(save_to=IMAGE_PATH, dst=labels_csv, index=cnt, label=character, num=k, ext='.png') # added custom training data difficult to classify from webtoon if args.webtoon_data: tranfer_img_list, _, _, _ = file_utils.get_files(opt.RECOG_WEBTOON_TRAIN_DATA_PATH) label_mapper = file_utils.makeLabelMapper('./labels-2213.txt') test_txt = []; test_num = [] print("[CUSTOM HANGUL DIFFICULT DATASET GENERATION : {}]".format(len(tranfer_img_list))) text_labels = file_utils.loadText(opt.RECOG_WEBTOON_TRAIN_LABEL_PATH) for txt in text_labels[0]: test_num.append(label_mapper[0].tolist().index(txt)) test_txt.append(txt) for idx, in_path in enumerate(tranfer_img_list): k, character = test_num[idx], test_txt[idx] img = imgproc.loadImage(in_path) img = imgproc.cvtColorGray(img) for x in range(1): copy = img.copy() cnt += 1 copy = Image.fromarray(np.array(copy)) file_utils.saveImage(save_to=IMAGE_PATH, img=copy, index1=cnt, ext='.png') file_utils.saveCSV(save_to=IMAGE_PATH, dst=labels_csv, index=cnt, label=character, num=k, ext='.png') labels_csv.close()
parser.add_argument('--demo_folder', default='./data/', type=str, help='folder path to demo images') parser.add_argument('--cuda', action='store_true', default=True, help='use cuda for inference') args = parser.parse_args() """ For test images in a folder """ image_list, _, _, name_list = file_utils.get_files(args.demo_folder) file_utils.rm_all_dir(dir='./result/') # clean directories for next test file_utils.mkdir(dir=[ './result/', './result/bubbles/', './result/cuts/', './result/demo/', './result/chars/' ]) # load net models = net_utils.load_net(args) # initialize and load weights spaces = [] # text recognition spacing word text_warp_items = [] # text to warp bubble image demos = [] # all demo image storage t = time.time() cnt = 0 # load data for k, image_path in enumerate(image_list): print("TEST IMAGE ({:d}/{:d}): INPUT PATH:[{:s}]".format(