Ejemplo n.º 1
0
def predict(fold,model_name):    
    df=pd.read_csv("E:/kaggle_imgs/Plant-pathology-2020/Data/train_fold.csv")[0:10]
    device="cuda" if torch.cuda.is_available() else "cpu"
    imgs=df.image_id.values.tolist()
    path="E:/kaggle_imgs/Plant-pathology-2020/images_224_224/"
    test_imgs=[path+file+".png" for file in imgs]

    utils=Utils(mode="test")
    test_aug=utils.get_aug()
    test_tar=np.zeros((len(imgs),4))
    test_dataset=ClassificationLoader(
        image_paths=test_imgs,targets=test_tar,resize=None,augmentations=test_aug
    )
    test_loader=torch.utils.data.DataLoader(
        test_dataset,batch_size=bs_test,num_workers=4,shuffle=False
    )

    model=get_effinet(classes=2)
    model_save_path=f"./model_fold_{fold}.bin"
    model.load_state_dict(torch.load(model_save_path))
    model=model.to(device)

    engine=Engine(model,None,device,classes=2,weights=None)
    preds=engine.predict(test_loader)
    preds=np.vstack(preds)
    
    #script to c++
    sample=torch.rand(1,3,224,224)
    model.to("cpu")
    traced_script_module = torch.jit.trace(model,sample)
    traced_script_module.save("E:/temp/saved_models/"+f"/traced_1015_fold_{fold}.pt")
    
    return preds
Ejemplo n.º 2
0
def predict(fold=0):
    df = pd.read_csv("../input/plant-pathology-2020-fgvc7/test.csv")
    device = "cuda" if torch.cuda.is_available() else "cpu"
    imgs = df.image_id.values.tolist()
    path = "../input/plant-images-224-224/"
    test_imgs = [path + file + ".png" for file in imgs]
    test_aug = Utils.get_aug("test")
    test_tar = np.zeros((len(imgs), 4))
    test_dataset = ClassificationLoader(image_paths=test_imgs,
                                        targets=test_tar,
                                        resize=None,
                                        augmentations=test_aug)
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=bs_test,
                                              num_workers=0,
                                              shuffle=False)

    model = get_model(model_name)
    model_save_path = f"./model_fold_{fold}.bin"
    model.load_state_dict(torch.load(model_save_path))
    model = model.to(device)

    engine = Engine(model, None, device)
    preds = engine.predict(test_loader)
    preds = np.vstack(preds)
    return preds
Ejemplo n.º 3
0
def predict(fold):
    df = train[0:10]
    device = "cuda" if torch.cuda.is_available() else "cpu"

    test_imgs = df.tar_path.values.tolist()
    test_aug = Utils.get_aug("test")
    test_tar = np.zeros((len(test_imgs), 2))
    test_dataset = ClassificationLoader(image_paths=test_imgs,
                                        targets=test_tar,
                                        resize=None,
                                        augmentations=test_aug)
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=bs_valid,
                                              num_workers=0,
                                              shuffle=False)
    model = get_model_effi_b4(2)
    tm = datetime.datetime.now().strftime("%m%d")
    model_save_path = f"E:/kaggle_imgs/H2/saved_models/model_fold_{fold}_{tm}.bin"
    model.load_state_dict(torch.load(model_save_path))
    model = model.to(device)

    engine = Engine(model, None, device, classes=2, weights=None)
    preds = engine.predict(test_loader)
    preds = np.vstack(preds)  #.argmax(axis=1)

    #script to c++
    sample = torch.rand(1, 3, 224, 224)
    model.to("cpu")
    model.set_swish(False)
    traced_script_module = torch.jit.trace(model, sample)
    traced_script_module.save(
        f"E:/kaggle_imgs/H2/saved_models/traced_{model_name}_fold_{fold}_{tm}.pt"
    )
    return preds
Ejemplo n.º 4
0
def loop_train(fold, model_name,weights,sel_weight):
    history.initial_info(sel_weight)
    train_df=train[train.fold!=fold].reset_index(drop=True)#[0:65]
    valid_df=train[train.fold==fold].reset_index(drop=True)#[0:64]
    train_df["result"]=train_df["CAT"].apply(lambda x : x>0).astype(np.int)
    valid_df["result"]=valid_df["CAT"].apply(lambda x : x>0).astype(np.int)

    imgs=train_df.image_id.values.tolist()
    path="E:/kaggle_imgs/Plant-pathology-2020/images_224_224/"
    train_imgs=[path+file+".png" for file in imgs]
    train_aug=Utils.get_aug("train")
    train_tar=train_df.result.values
    train_dataset=ClassificationLoader(
        image_paths=train_imgs,targets=train_tar,resize=None,augmentations=train_aug
    )
#     CutMix_train_dataloader = CutMix(train_dataset, 
#                           num_class=4, 
#                           beta=1.0, 
#                           prob=0.999, 
#                           num_mix=1)
    CutMix_train_dataloader=train_dataset
    train_loader=torch.utils.data.DataLoader(
        CutMix_train_dataloader,batch_size=bs_train,num_workers=4,shuffle=True
    )
    
    imgs=valid_df.image_id.values.tolist()
    path="E:/kaggle_imgs/Plant-pathology-2020/images_224_224/"
    valid_imgs=[path+file+".png" for file in imgs]
    valid_aug=Utils.get_aug("valid")
    valid_tar=valid_df.result.values
    valid_dataset=ClassificationLoader(
        image_paths=valid_imgs,targets=valid_tar,resize=None,augmentations=valid_aug
    )
#     CutMix_valid_dataloader = CutMix(valid_dataset, 
#                           num_class=4, 
#                           beta=1.0, 
#                           prob=0, 
#                           num_mix=1)
    CutMix_valid_dataloader=valid_dataset
    valid_loader=torch.utils.data.DataLoader(
        CutMix_valid_dataloader,batch_size=bs_valid,num_workers=4,shuffle=False
    )
    
    # Model,Optimizer, scheduler, engine
    model=get_effinet(classes=2)
    
    device="cuda" if torch.cuda.is_available() else "cpu"
    model=model.to(device)
    optimizer=torch.optim.Adam(model.parameters(),lr=1e-4)
    scheduler=torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,patience=3,threshold=1e-5,mode="min",verbose=True
    )

    engine=Engine(model,optimizer,device,classes=2,weights=weights)
    best_loss=np.inf
    early_stopping=3#3
    early_stopping_cnt=0
    EPOCH=300
    for epoch in range(EPOCH):
        train_loss,train_acc=engine.train(train_loader)
        valid_loss,valid_acc,valid_labels,valid_preds=engine.validate(valid_loader)
        scheduler.step(valid_loss)
        
        # Add train Info
        history.add_train_info(sel_weight,train_acc,train_loss,valid_acc,valid_loss,valid_labels,valid_preds)
        tm=datetime.datetime.now().strftime("%H:%M:%S")
        print(f"{tm}, fold={fold}, epoch={epoch}, train_loss={train_loss:.4f}, valid_loss={valid_loss:.4f}, valid_acc={valid_acc:.4f}")    
        
        if valid_loss<best_loss :
            best_loss=valid_loss
            torch.save(model.state_dict(),f"model_fold_{fold}.bin")
            early_stopping_cnt=0
            history.best_idx[fold]=epoch
        else:
            early_stopping_cnt+=1
        if early_stopping_cnt>early_stopping:
            break

    print(f"fold={fold}, best val loss={best_loss}")
Ejemplo n.º 5
0
def loop_train(fold=0):
    ready_train_info(fold)
    train_df = train[train.fold != fold].reset_index(drop=True)  #[0:32]
    valid_df = train[train.fold == fold].reset_index(drop=True)  #[0:32]

    imgs = train_df.image_id.values.tolist()
    path = "E:/kaggle_imgs/Plant-pathology-2020/images_224_224/"
    train_imgs = [path + file + ".png" for file in imgs]
    train_aug = Utils.get_aug("train")
    #train_tar=train_df[["healthy","multiple_diseases","rust","scab"]].values
    train_tar = train_df.CAT.values
    train_dataset = ClassificationLoader(image_paths=train_imgs,
                                         targets=train_tar,
                                         resize=None,
                                         augmentations=train_aug)
    CutMix_train_dataloader = CutMix(train_dataset,
                                     num_class=4,
                                     beta=1.0,
                                     prob=0.999,
                                     num_mix=1)
    train_loader = torch.utils.data.DataLoader(CutMix_train_dataloader,
                                               batch_size=bs_train,
                                               num_workers=4,
                                               shuffle=True)

    imgs = valid_df.image_id.values.tolist()
    path = "E:/kaggle_imgs/Plant-pathology-2020/images_224_224/"
    valid_imgs = [path + file + ".png" for file in imgs]
    valid_aug = Utils.get_aug("valid")
    #valid_tar=valid_df[["healthy","multiple_diseases","rust","scab"]].values
    valid_tar = valid_df.CAT.values
    valid_dataset = ClassificationLoader(image_paths=valid_imgs,
                                         targets=valid_tar,
                                         resize=None,
                                         augmentations=valid_aug)
    CutMix_valid_dataloader = CutMix(valid_dataset,
                                     num_class=4,
                                     beta=1.0,
                                     prob=0,
                                     num_mix=1)
    valid_loader = torch.utils.data.DataLoader(CutMix_valid_dataloader,
                                               batch_size=bs_valid,
                                               num_workers=4,
                                               shuffle=False)

    # Model,Optimizer, scheduler, engine
    model = Utils.get_model("effinet")

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           threshold=1e-5,
                                                           mode="min",
                                                           verbose=True)

    engine = Engine(model, optimizer, device)
    best_loss = np.inf
    early_stopping = 7  #3
    early_stopping_cnt = 0
    EPOCH = 300
    for epoch in range(EPOCH):
        train_loss, train_acc = engine.train(train_loader)
        valid_loss, valid_acc = engine.validate(valid_loader)
        scheduler.step(valid_loss)

        # Add train Info
        add_train_info(fold, train_acc, train_loss, valid_acc, valid_loss)

        if valid_loss < best_loss:
            best_loss = valid_loss
            torch.save(model.state_dict(), f"model_fold_{fold}.bin")
            tm = datetime.datetime.now().strftime("%H:%M:%S")
            print(
                f"{tm}, fold={fold}, epoch={epoch}, train_loss={train_loss:.6f}, valid_loss={valid_loss:.6f}"
            )
            early_stopping_cnt = 0
        else:
            early_stopping_cnt += 1
        if early_stopping_cnt > early_stopping:
            break

    print(f"fold={fold}, best val loss={best_loss}")
Ejemplo n.º 6
0
def loop_train(fold, weights, sel_pos):
    train = pd.read_csv("E:/kaggle_imgs/H2/data/train_fold.csv")
    history.initial_info(sel_pos)
    train_df = train[train.fold != fold].reset_index(drop=True)[:100]
    valid_df = train[train.fold == fold].reset_index(drop=True)[:80]
    train_df["result"] = train_df["category"].apply(lambda x: x > 0).astype(
        np.int)
    valid_df["result"] = valid_df["category"].apply(lambda x: x > 0).astype(
        np.int)

    train_imgs = train_df.tar_path.values.tolist()
    train_aug = Utils.get_aug("train")
    train_tar = train_df.result.values
    train_dataset = ClassificationLoader(image_paths=train_imgs,
                                         targets=train_tar,
                                         resize=None,
                                         augmentations=train_aug)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=bs_train,
                                               num_workers=0,
                                               shuffle=True)

    valid_imgs = valid_df.tar_path.values.tolist()
    valid_aug = Utils.get_aug("valid")
    valid_tar = valid_df.result.values
    valid_dataset = ClassificationLoader(image_paths=valid_imgs,
                                         targets=valid_tar,
                                         resize=None,
                                         augmentations=valid_aug)
    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=bs_valid,
                                               num_workers=0,
                                               shuffle=False)

    model = get_model_effi_b4(classes=2)

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           threshold=1e-5,
                                                           mode="min",
                                                           verbose=True)

    engine = Engine(model, optimizer, device, classes=1, weights=weights)
    best_loss = np.inf
    early_stopping = 3  #3
    early_stopping_cnt = 0
    EPOCH = 3  #300
    for epoch in range(EPOCH):
        train_loss, train_acc, train_labels, train_preds = engine.train(
            train_loader)
        valid_loss, valid_acc, valid_labels, valid_preds = engine.validate(
            valid_loader)
        scheduler.step(valid_loss)

        # Add train Info
        history.add_train_info(sel_pos, train_acc, train_loss, train_labels,
                               train_preds, valid_acc, valid_loss,
                               valid_labels, valid_preds)
        tm = datetime.datetime.now().strftime("%H:%M:%S")
        print(
            f"{tm}, fold={fold}, epoch={epoch}, train_loss={train_loss:.4f}, valid_loss={valid_loss:.4f}, valid_acc={valid_acc:.4f}"
        )

        if valid_loss < best_loss:
            best_loss = valid_loss
            tm = datetime.datetime.now().strftime("%m%d")
            torch.save(
                model.state_dict(),
                f"E:/kaggle_imgs/H2/saved_models/model_fold_{fold}_{tm}.bin")
            early_stopping_cnt = 0
        else:
            early_stopping_cnt += 1
        if early_stopping_cnt >= early_stopping:
            break

    print(f"fold={fold}, best val loss={best_loss}")