Beispiel #1
0
 def create_tb_tree(self) :
     for l in  self.get_abs_tree() :
        if not exist(l) :
            self.logger.info("\ncreating : \n%s" % l)
            mkdir(l)
        else :
            self.logger.warning("\ndirectory : \n%s\nalready exists" % l)
Beispiel #2
0
def get_celebrity(celebrity, path):
    idx = 0
    file_utils.mkdir(path)
    for refer, photo_url in celebrity.photos():
        name = os.path.basename(photo_url)
        full_path = os.path.join(path, name)
        if os.path.exists(full_path):
            print('pic {} exist skip'.format(name))
            continue
        # print("{}: saving {}".format(idx, name))
        headers = {
            "Referer":
            refer,
            "user-agent":
            "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.104 Safari/537.36"
        }
        threadPoolExecutor.submit(file_utils.save_from_url,
                                  url=photo_url,
                                  headers=headers,
                                  name=full_path,
                                  index=idx)
        # file_utils.save_from_url(photo_url, headers, path + '/' + name)
        idx += 1

    print("Finish parsing celebrity pages, all file will save to {}".format(
        path))
def generate_top_category_files(column_name):
    file_utils.mkdir("top_category_files")
    rows = file_utils.read_csv('unspsc_codes_v3.csv')
    tcs = {}
    for row in rows:
        if row[column_name] not in tcs:
            tcs[row[column_name]] = []
        tcs[row[column_name]].append(row)
    for tc in tcs:
        filename = "top_category_files/" + tc + ".csv"
        print("Saving " + filename)
        file_utils.save_csv(filename, tcs[tc])
Beispiel #4
0
    def __mk_tb_files(self,check=False) :
        """ calling this method suppose that create_tree
        was invoked earlier
        """
        n = self.get_root_name()+'/'
        for a,r in  zip(self.get_abs_tree(),self.get_rel_tree()) :
            if not exist(a) :
               self.logger.info("\ncreating : \n%s" % a)
               mkdir(a)
            key = string.replace(r,n,"",1)
            if key in self.get_tb_files() :
                for action_name, name in self.get_tb_files()[key].items() :
                    if name == "$root_name$" : name = self.get_root_name()
                    action = getattr(Tb_files,"mk_"+action_name,None)
                    if callable(action) : 
                        value = action(self,check,a,r,name,key)
                        if (check == "check_only") and not value : return False
#NO UPDATE#        if self.__tb_style == 'sys' : self.update_CMake(check)
        return True
Beispiel #5
0
def train_net(model=None, data_loader=None, optimizer=None, epoch=50, lr=0.001, lr_decay_step=10,
              DISPLPAY_INTERVAL=None, SAVE_INTERVAL=None, lr_decay_gamma=None, save=None):
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    criterion = nn.CrossEntropyLoss()

    total_step = len(data_loader)

    print('[LINE-TEXT-RECOGNITION TRAINING KICK-OFF]')
    for e in range(1, epoch + 1):
        model.train()
        start = time.time()
        if e % lr_decay_step == 0:
            adjust_lr(optimizer, lr_decay_gamma)
            lr *= lr_decay_gamma

        for k, (image, label) in enumerate(data_loader):

            image = image.to(device)
            label = label.to(device)

            y = model(image)
            loss = criterion(y, label)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (k + 1) % DISPLPAY_INTERVAL == 0:
                end = time.time()
                print('Epoch [{}/{}], Step [{}/{}], lr: {}, Loss: {:.4f}, TIME COST: {:.4f}'
                      .format(e, epoch, k + 1, total_step, lr, loss.item(), end - start))
                start = time.time()
        start = time.time()
        if e % SAVE_INTERVAL == 0:
            file_utils.mkdir(dir=[save])
            print(
                'save model ... -> {}'.format(save + 'res18' + '-' + str(e) + '.pth'))
            torch.save(model.state_dict(), save + 'res18' + '-' + repr(e) + '.pth')
Beispiel #6
0
def train(args):

    file_utils.mkdir(dir=[args.save_models])
    if args.vis_train: file_utils.mkdir(dir=['./vis/'])

    ''' MAKE DATASET '''
    datasets = webtoon_dataset(opt.DETECTION_TRAIN_IMAGE_PATH, opt.DETECTION_TRAIN_LABEL_PATH, args.train_size)
    train_data_loader = DataLoader(datasets, batch_size=args.batch, shuffle=True)

    ''' INITIALIZE MODEL, GPU, OPTIMIZER, and, LOSS '''

    model = LTD()
    model = torch.nn.DataParallel(model).cuda()
    optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.lr_decay_gamma)
    criterion = LTD_LOSS()

    step_idx = 0

    model.train()
    print('TEXT DETECTION TRAINING KICK-OFF]')

    ''' KICK OFF TRAINING PROCESS '''
    for e in range(args.epoch):

        start = time.time()

        ''' LOAD MATERIAL FOR TRAINING FROM DATALOADER '''
        for idx, (image, region_score_GT, affinity_score_GT, confidence) in enumerate(train_data_loader):

            ''' ADJUST LEARNING RATE PER 20000 ITERATIONS '''
            if idx % args.lr_decay_step == 0 and idx != 0:
                step_idx += 1
                #adjust_learning_rate(optimizer, args.lr, step_idx)

            ''' CONVERT NUMPY => TORCH '''
            images = Variable(image.type(torch.FloatTensor)).cuda()
            region_score_GT = Variable(region_score_GT.type(torch.FloatTensor)).cuda()
            affinity_score_GT = Variable(affinity_score_GT.type(torch.FloatTensor)).cuda()
            confidence = Variable(confidence.type(torch.FloatTensor)).cuda()

            ''' PASS THE MODEL AND PREDICT SCORES '''
            y, _ = model(images)
            score_region = y[:, :, :, 0].cuda()
            score_affinity = y[:, :, :, 1].cuda()

            if args.vis_train:
                if idx % 20 == 0 and idx != 0 and e % 2 == 0:
                    for idx2 in range(args.batch):
                        render_img1 = score_region[idx2].cpu().detach().numpy().copy()
                        render_img2 = score_affinity[idx2].cpu().detach().numpy().copy()
                        render_img = np.hstack((render_img1, render_img2))
                        render_img = imgproc.cvt2HeatmapImg(render_img)
                        cv2.imwrite('./vis/e' + str(e) + '-s' + str(idx) + '-' + str(idx2) + '.jpg',
                                    render_img)

            ''' CALCULATE LOSS VALUE AND UPDATE WEIGHTS '''
            optimizer.zero_grad()
            loss = criterion(region_score_GT, affinity_score_GT, score_region, score_affinity, confidence)
            loss.backward()
            optimizer.step()

            if idx % args.display_interval == 0:
                end = time.time()
                print('epoch: {}, iter:[{}/{}], lr:{}, loss: {:.8f}, Time Cost: {:.4f}s'.format(e, idx,
                                                                                                len(train_data_loader),
                                                                                                args.lr,
                                                                                                loss.item(),
                                                                                                end - start))
                start = time.time()

        ''' SAVE MODEL PER 2 EPOCH '''
        start = time.time()
        if e % args.save_interval == 0:
            print('save model ... :' + args.save_models)
            torch.save(model.module.state_dict(), args.save_models + 'ltd' + repr(e) + '.pth')
Beispiel #7
0
def train(args):
    file_utils.rm_all_dir(dir='./train/cache/')  # clean cache
    dataset_name = "voc_2007_trainval"
    args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '20']
    args.cfg_file = "cfgs/{}_ls.yml".format(args.backbone) if args.large_scale else "cfgs/{}.yml".format(args.backbone)

    if args.cfg_file is not None: cfg_from_file(args.cfg_file)
    if args.set_cfgs is not None: cfg_from_list(args.set_cfgs)

    np.random.seed(cfg.RNG_SEED)

    cfg.TRAIN.USE_FLIPPED = opt.BUBBLE_TRAIN_FLIP
    cfg.USE_GPU_NMS = opt.cuda

    _, _, _, name_lists = file_utils.get_files('./train/images/')
    file_utils.makeTrainIndex(names=name_lists, save_to='./train/trainval.txt')
    imdb, roidb, ratio_list, ratio_index = combined_roidb(dataset_name)
    train_size = len(roidb)

    print('TRAIN IMAGE NUM: {:d}'.format(len(roidb)))

    file_utils.mkdir(dir=[args.save_models])

    sampler_batch = sampler(train_size, args.batch)

    dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch,\
                             imdb.num_classes, training=True)

    dataloader = DataLoader(dataset, batch_size=args.batch,
                            sampler=sampler_batch, num_workers=args.num_workers)

    im_data = Variable(torch.FloatTensor(1).cuda())
    im_info = Variable(torch.FloatTensor(1).cuda())
    num_boxes = Variable(torch.LongTensor(1).cuda())
    gt_boxes = Variable(torch.FloatTensor(1).cuda())

    fasterRCNN = resnet(imdb.classes, 101, pretrained=True, class_agnostic=False)
    fasterRCNN.create_architecture()

    lr = args.lr

    params = []
    for key, value in dict(fasterRCNN.named_parameters()).items():
        if value.requires_grad:
            if 'bias' in key:
                params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1),\
                            'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}]
            else:
                params += [{'params': [value], 'lr': lr, 'weight_decay': cfg.TRAIN.WEIGHT_DECAY}]

    if args.optimizer == "adam":
        lr = lr * 0.1
        optimizer = torch.optim.Adam(params)

    elif args.optimizer == "sgd":
        optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM)

    if opt.cuda:
        cfg.CUDA = True
        fasterRCNN.cuda()

    if args.resume:
        load_name = os.path.join(args.save_models,
                                 'Speech-Bubble-Detector-{}-{}-{}.pth'.format(args.backbone, args.resume_epoch, args.resume_batch))
        checkpoint = torch.load(load_name)
        args.session = checkpoint['session']
        args.start_epoch = checkpoint['epoch']
        fasterRCNN.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr = optimizer.param_groups[0]['lr']
        if 'pooling_mode' in checkpoint.keys():
            cfg.POOLING_MODE = checkpoint['pooling_mode']

    if args.multi_gpus:
        fasterRCNN = nn.DataParallel(fasterRCNN)

    iters_per_epoch = int(train_size / args.batch)

    if args.use_tfboard:
        from tensorboardX import SummaryWriter

        logger = SummaryWriter("logs")

    args.max_epochs = args.epoch
    for epoch in range(1, args.epoch + 1):

        fasterRCNN.train()
        loss_temp = 0
        start = time.time()

        if epoch % (args.lr_decay_step + 1) == 0:
            adjust_learning_rate(optimizer, args.lr_decay_gamma)
            lr *= args.lr_decay_gamma

        data_iter = iter(dataloader)
        for step in range(iters_per_epoch):
            data = next(data_iter)
            im_data.data.resize_(data[0].size()).copy_(data[0])
            im_info.data.resize_(data[1].size()).copy_(data[1])
            gt_boxes.data.resize_(data[2].size()).copy_(data[2])
            num_boxes.data.resize_(data[3].size()).copy_(data[3])

            fasterRCNN.zero_grad()
            rois, cls_prob, bbox_pred, \
            rpn_loss_cls, rpn_loss_box, \
            RCNN_loss_cls, RCNN_loss_bbox, \
            rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

            loss = rpn_loss_cls.mean() + rpn_loss_box.mean() \
                   + RCNN_loss_cls.mean() + RCNN_loss_bbox.mean()
            loss_temp += loss.item()

            # backward
            optimizer.zero_grad()
            loss.backward()
            if args.backbone == "vgg16":
                clip_gradient(fasterRCNN, 10.)
            optimizer.step()

            if step % args.display_interval == 0:
                end = time.time()
                if step > 0:
                    loss_temp /= (args.display_interval + 1)

                if args.multi_gpus:
                    loss_rpn_cls = rpn_loss_cls.mean().item()
                    loss_rpn_box = rpn_loss_box.mean().item()
                    loss_rcnn_cls = RCNN_loss_cls.mean().item()
                    loss_rcnn_box = RCNN_loss_bbox.mean().item()
                    fg_cnt = torch.sum(rois_label.data.ne(0))
                    bg_cnt = rois_label.data.numel() - fg_cnt
                else:
                    loss_rpn_cls = rpn_loss_cls.item()
                    loss_rpn_box = rpn_loss_box.item()
                    loss_rcnn_cls = RCNN_loss_cls.item()
                    loss_rcnn_box = RCNN_loss_bbox.item()
                    fg_cnt = torch.sum(rois_label.data.ne(0))
                    bg_cnt = rois_label.data.numel() - fg_cnt

                print("[epoch %d][iter %d/%d] loss: %.4f, lr: %.2e" \
                      % (epoch, step, iters_per_epoch, loss_temp, lr))
                print("\t\t\tfg/bg=(%d/%d), time cost: %f" % (fg_cnt, bg_cnt, end - start))
                print("\t\t\trpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box %.4f" \
                      % (loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box))
                if args.use_tfboard:
                    info = {
                        'loss': loss_temp,
                        'loss_rpn_cls': loss_rpn_cls,
                        'loss_rpn_box': loss_rpn_box,
                        'loss_rcnn_cls': loss_rcnn_cls,
                        'loss_rcnn_box': loss_rcnn_box
                    }
                    logger.add_scalars("logs_s_{}/losses".format(args.session), info,
                                       (epoch - 1) * iters_per_epoch + step)

                loss_temp = 0
                start = time.time()

        save_name = args.save_models + args.backbone + '-' + str(epoch) + '.pth'
        save_checkpoint({
            'session': args.session,
            'epoch': epoch + 1,
            'model': fasterRCNN.module.state_dict() if args.multi_gpus else fasterRCNN.state_dict(),
            'optimizer': optimizer.state_dict(),
            'pooling_mode': cfg.POOLING_MODE,
            'class_agnostic': False,
        }, save_name)
        print('save model: {}'.format(save_name))

    if args.use_tfboard:
        logger.close()
Beispiel #8
0
 def update_tb_tree(self) :
     for l in  self.get_abs_tree() :
        if not exist(l) :
            self.logger.info("\ncreating : \n%s" % l)
            mkdir(l)
Beispiel #9
0
def createDataset(args):

    file_utils.rm_all_dir(dir=opt.RECOGNITION_TRAIN_IMAGE_PATH)
    file_utils.mkdir(dir=[opt.RECOGNITION_TRAIN_IMAGE_PATH])

    with codecs.open('./labels-2213.txt', 'r', encoding='utf-8') as f:
        labels = f.read().strip('\ufeff').splitlines()

    FONTS_PATH = opt.RECOGNITIOON_FONT_PATH
    CSV_PATH = opt.RECOGNITION_CSV_PATH
    IMAGE_PATH = opt.RECOGNITION_TRAIN_IMAGE_PATH

    fonts = glob.glob(os.path.join(FONTS_PATH, '*.ttf'))
    labels_csv = codecs.open(os.path.join(CSV_PATH), 'w', encoding='utf-8')

    print("[THE NUMBER OF FONTS : {}]".format(len(fonts)))

    cnt = 0
    prev_cnt = 0
    total = opt.NUM_CLASSES * len(fonts) * opt.MORPH_NUM
    if args.salt_pepper: total *= 2
    if args.chunk_noise: total *= 2

    for k, character in enumerate(labels):

        if cnt - prev_cnt > 5000:
            prev_cnt = cnt
            sys.stdout.write(
                'TRAINING IMAGE GENERATION: ({}/{}) \r'.format(cnt, total))
            sys.stdout.flush()

        for f in fonts:

            for v in range(opt.MORPH_NUM):

                image, drawing = make_canvas(width=opt.RECOG_IMAGE_WIDTH, height=opt.RECOG_IMAGE_HEIGHT,
                                            color=opt.RECOG_BACKGROUND)
                font_type = determine_font_size(font=f, size=opt.RECOG_FONT_SIZE)
                w, h = determine_canvas_size(canvas=drawing, label=character, font=font_type)
                make_letter(canvas=drawing, label=character, width=w, height=h, color=opt.RECOG_FONT_COLOR, font=font_type)

                morph_templete = np.array(image.copy())
                kernel = np.ones((2, 2), np.uint8)

                if v == 1: morph_templete = cv2.erode(morph_templete, kernel, iterations=1)
                else: morph_templete = cv2.dilate(morph_templete, kernel, iterations=1)

                copy = morph_templete.copy()
                cnt += 1

                copy = Image.fromarray(np.array(copy))
                file_utils.saveImage(save_to=IMAGE_PATH, img=np.array(copy), index1=cnt, ext='.png')
                file_utils.saveCSV(save_to=IMAGE_PATH, dst=labels_csv, index=cnt, label=character, num=k, ext='.png')

                if args.salt_pepper:
                    cnt += 11
                    copy = generate_salt_and_pepper_noise(copy)
                    file_utils.saveImage(save_to=IMAGE_PATH, img=copy, index1=cnt, ext='.png')
                    file_utils.saveCSV(save_to=IMAGE_PATH, dst=labels_csv, index=cnt, label=character, num=k,
                                       ext='.png')
                if args.chunk_noise:
                    copy = generate_chunk_noise(copy)
                    file_utils.saveImage(save_to=IMAGE_PATH, img=copy, index1=cnt, ext='.png')
                    file_utils.saveCSV(save_to=IMAGE_PATH, dst=labels_csv, index=cnt, label=character, num=k,
                                       ext='.png')

    #  added custom training data difficult to classify from webtoon

    if args.webtoon_data:
        tranfer_img_list, _, _, _ = file_utils.get_files(opt.RECOG_WEBTOON_TRAIN_DATA_PATH)
        label_mapper = file_utils.makeLabelMapper('./labels-2213.txt')
        test_txt = []; test_num = []
        print("[CUSTOM HANGUL DIFFICULT DATASET GENERATION : {}]".format(len(tranfer_img_list)))
        text_labels = file_utils.loadText(opt.RECOG_WEBTOON_TRAIN_LABEL_PATH)
        for txt in text_labels[0]:
            test_num.append(label_mapper[0].tolist().index(txt))
            test_txt.append(txt)

        for idx, in_path in enumerate(tranfer_img_list):
            k, character = test_num[idx], test_txt[idx]
            img = imgproc.loadImage(in_path)
            img = imgproc.cvtColorGray(img)
            for x in range(1):
                copy = img.copy()
                cnt += 1

                copy = Image.fromarray(np.array(copy))
                file_utils.saveImage(save_to=IMAGE_PATH, img=copy, index1=cnt, ext='.png')
                file_utils.saveCSV(save_to=IMAGE_PATH, dst=labels_csv, index=cnt, label=character, num=k, ext='.png')

    labels_csv.close()
Beispiel #10
0
parser.add_argument('--demo_folder',
                    default='./data/',
                    type=str,
                    help='folder path to demo images')
parser.add_argument('--cuda',
                    action='store_true',
                    default=True,
                    help='use cuda for inference')

args = parser.parse_args()
""" For test images in a folder """
image_list, _, _, name_list = file_utils.get_files(args.demo_folder)

file_utils.rm_all_dir(dir='./result/')  # clean directories for next test
file_utils.mkdir(dir=[
    './result/', './result/bubbles/', './result/cuts/', './result/demo/',
    './result/chars/'
])

# load net
models = net_utils.load_net(args)  # initialize and load weights

spaces = []  # text recognition spacing word
text_warp_items = []  # text to warp bubble image
demos = []  # all demo image storage
t = time.time()

cnt = 0

# load data
for k, image_path in enumerate(image_list):
    print("TEST IMAGE ({:d}/{:d}): INPUT PATH:[{:s}]".format(