コード例 #1
0
 with io.open(imgpath, 'rb') as image_file:
     content = image_file.read()
 jsonpath=config.pdfdata+"json/"+os.path.splitext(os.path.basename(imgpath))[0]+".json"
 with open(jsonpath) as f:
     bounds = json.load(f)
 bounds=bounds_refine(bounds,imgpath,ref)
 #print("Characters in Image=",len(bounds))
 ds,coords,labels,wordid,seq=get_ds(imgpath,bounds)
 coordsagg.extend(coords)
 labelsagg.extend(labels)
 pageagg.extend([os.path.splitext(os.path.basename(imgpath))[0]]*len(labels))
 wordidagg.extend(wordid)
 sequenceagg.extend(seq)
 ds_train=DataUtils.EVALIMGDS(label_dict,ds)
 train_gen = torch.utils.data.DataLoader(ds_train ,batch_size=64,shuffle=False,num_workers =6,pin_memory=True)
 train_gen =DataUtils.DeviceDataLoader(train_gen, device)
 result = ModelUtils.evaluate(model,train_gen)
 print("Accuracy on {} page is {}".format(imgpath,result['val_acc']))
 pdf_acc.append(len(bounds)*result['val_acc'])
 weight.append(len(bounds))
 #os.remove(imgpath)
 #os.remove(jsonpath)
 train_gen = torch.utils.data.DataLoader(ds_train ,batch_size=64,shuffle=False,num_workers =6,pin_memory=True)
 train_gen =DataUtils.DeviceDataLoader(train_gen, device)
 predic=[]
 for batch in train_gen:
     images,labels= batch 
     with torch.no_grad(): 
         out = model(images)
     _, preds = torch.max(out, dim=1)
     predic.extend(preds.detach().cpu().numpy().tolist())
コード例 #2
0
def RUN(batchsize, lr):
    #batchsize=config.batchsize
    #lr=config.learning_rate
    #num_epochs=config.num_epochs
    num_epochs = 50
    device = config.device
    if device == None:
        device = utils.get_default_device()
    label_dict = utils.create_label_dict(config.symbols)
    revdict = {}
    for i, sym in enumerate(config.symbols):
        revdict[i] = sym
    model = InceptFC.FC_Model()
    #model=Resnet.ResNet50(3,97)
    model.to(device)
    print(config.checkpath)
    checkpoint = torch.load(config.checkpath, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    print("MODEL LOADED")
    model.train()
    for name, child in model.named_children():
        if name in ['conv_block1', "conv_block2", "conv1"]:
            print(name + ' is frozen')
            for param in child.parameters():
                param.requires_grad = False
        else:
            print(name + ' is unfrozen')
            for param in child.parameters():
                param.requires_grad = True

    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        model.parameters()),
                                 lr=lr,
                                 weight_decay=lr / 10.)
    finepath = config.data_dir_path
    myvalpath = "/home/ubuntu/data/ocr/kdeval/good/images/"
    valid_paths = [
        join(myvalpath, f) for f in listdir(myvalpath)
        if isfile(join(myvalpath, f))
    ]
    refinement_ratio = [0.5]
    checkpath = os.path.dirname(config.checkpath)
    checkpath = join(checkpath, "FineTune2")
    os.system('mkdir -p ' + checkpath)
    p = 'runs/Inceptfinalrun/hypergridfine_tune/LR' + str(int(
        1000000 * lr)) + 'BS' + str(batchsize)
    writer = SummaryWriter(p)
    fineds = [f for f in listdir(finepath) if isfile(join(finepath, f))]
    for epoch_fine in range(num_epochs):
        random.shuffle(fineds)
        ds_train = DataUtils.FINEIMGDS(label_dict, finepath, fineds)
        train_gen = torch.utils.data.DataLoader(ds_train,
                                                batch_size=batchsize,
                                                shuffle=True,
                                                num_workers=6,
                                                pin_memory=True)
        train_gen = DataUtils.DeviceDataLoader(train_gen, device)
        result = ModelUtils.fit_fine(model, train_gen, optimizer)
        loss_epoch = result.item()
        print("MEAN LOSS ON EPOCH {} is : {}".format(epoch_fine, loss_epoch))
        ## SAVE WEIGHT AFTER FINETUNE PER EPOCH
        '''
        torch.save({
                    'epoch': epoch_fine,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss': loss_epoch,
                    }, os.path.join(checkpath, 'fine-epoch-{}.pt'.format(epoch_fine)))
        '''
        ## WRITER TENSORBOARD
        writer.add_scalar('Training loss per epoch', loss_epoch, epoch_fine)

        ###############################################################
        ####### CHECK FOR VALIDATION+
        pdf_acc = []
        weight = []
        for imgpath in tqdm(valid_paths, desc="TEST"):
            with io.open(imgpath, 'rb') as image_file:
                content = image_file.read()
            jsonpath = "/home/ubuntu/data/ocr/kdeval/good/json/" + os.path.splitext(
                os.path.basename(imgpath))[0] + ".json"
            with open(jsonpath) as f:
                bounds = json.load(f)
            bounds = bounds_refine(bounds, imgpath, 0.48)
            #print("Characters in Image=",len(bounds))
            ds = get_ds(imgpath, bounds)
            ds_train = DataUtils.EVALIMGDS(label_dict, ds)
            train_gen = torch.utils.data.DataLoader(ds_train,
                                                    batch_size=64,
                                                    shuffle=False,
                                                    num_workers=6,
                                                    pin_memory=True)
            train_gen = DataUtils.DeviceDataLoader(train_gen, device)
            result = ModelUtils.evaluate(model, train_gen)
            pdf_acc.append(len(bounds) * result['val_acc'])
            weight.append(len(bounds))
        print("EPOCHFINE={} Validation Accuracy Mean on GOOD pdf is {}".format(
            epoch_fine,
            sum(pdf_acc) / sum(weight)))
        writer.add_scalar('validation acc per epoch',
                          sum(pdf_acc) / sum(weight), epoch_fine)
コード例 #3
0
ファイル: run.py プロジェクト: iamsaurabh007/Char-Classifier
    imglist_train, imglist_val = utils.get_images_list(dir_path + "/imgs")
    #imglist_train=utils.csv_to_ls(config.csv_path+"/train_grid_imgs.csv")
    #imglist_val=utils.csv_to_ls(config.csv_path+"/valid_grid_imgs.csv")
    ds_train = DataUtils.IMGDS(label_dict, dir_path, imglist_train)
    ds_val = DataUtils.IMGDS(label_dict, dir_path, imglist_val)
    train_gen = torch.utils.data.DataLoader(ds_train,
                                            batch_size=batch_size,
                                            shuffle=shuffle,
                                            num_workers=num_worker,
                                            pin_memory=True)
    valid_gen = torch.utils.data.DataLoader(ds_val,
                                            batch_size=batch_size,
                                            shuffle=shuffle,
                                            num_workers=num_worker,
                                            pin_memory=True)
    train_gen = DataUtils.DeviceDataLoader(train_gen, device)
    valid_gen = DataUtils.DeviceDataLoader(valid_gen, device)
    #model=Resnet.ResNet50(3,config.num_classes)
    model = InceptFC.FC_Model()
    model = model.to(device)
    p = 'runs/Inceptfinalrun/LR' + str(int(
        100000 * l_r)) + 'BS' + str(batch_size)
    writer = SummaryWriter(p)
    history = ModelUtils.fit(num_epochs,
                             l_r,
                             model,
                             train_gen,
                             valid_gen,
                             opt_func=torch.optim.Adam,
                             writer=writer)