Example #1
0
def prepare_data():

    data_transforms = {
        'train':
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(256),
            transforms.Resize(224),
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),
            transforms.RandomRotation(90),
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
        ]),
        'validation':
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(256),
            transforms.Resize(224),
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
        ]),
    }

    # Train dataset
    dataset_train = dataset(opt.data_train_folder, data_transforms["train"])

    # Validation dataset
    dataset_val = dataset(opt.data_val_folder, data_transforms["validation"])

    dataloader_train = torch.utils.data.DataLoader(dataset_train,
                                                   batch_size=opt.bs,
                                                   shuffle=True,
                                                   num_workers=opt.num_workers)

    dataloader_val = torch.utils.data.DataLoader(dataset_val,
                                                 batch_size=opt.bs,
                                                 shuffle=False,
                                                 num_workers=opt.num_workers)

    train_size = len(dataset_train)
    val_size = len(dataset_val)

    print("train dataset size =", train_size)
    print("validation dataset size=", val_size)

    print("dataset train class order= ", dataset_train.class_to_idx)
    print("dataset val class order= ", dataset_train.class_to_idx)

    #exit() # just for testing

    return {
        "train": dataloader_train,
        "val": dataloader_val,
        "dataset_size": {
            "train": train_size,
            "val": val_size
        }
    }
Example #2
0
def prepare_data():

    data_transforms = {
        'train': transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(256),
            transforms.Resize(224),
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),
            transforms.RandomRotation(90),
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
        ]),
        'validation': transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(256),
            transforms.Resize(224),
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
        ]),
    }


    # Use selected fold for validation
    train_folds = list(set(opt.all_folds) - set([opt.val_fold]))
    validation_fold = opt.val_fold
    

    # Train datasets
    image_datasets_train_all = {x: dataset(os.path.join(opt.data_root, x),
                                                data_transforms["train"])
                        for x in train_folds}

    # create one dataset from all datasets of training
    dataset_train = torch.utils.data.ConcatDataset([image_datasets_train_all[i] for i in train_folds])

    # Validation datasets
    dataset_val = dataset(os.path.join(opt.data_root, validation_fold),
                                                data_transforms["validation"])
                                                

    dataloader_train = torch.utils.data.DataLoader(dataset_train, batch_size=opt.bs,
                                                    shuffle=True, num_workers=opt.num_workers)

    dataloader_val = torch.utils.data.DataLoader(dataset_val, batch_size=opt.bs,
                                                    shuffle=False, num_workers=opt.num_workers)
                    
    train_size = len(dataset_train)
    val_size = len(dataset_val)

    print("train dataset size =", train_size)
    print("validation dataset size=", val_size)

   
    return {"train":dataloader_train, "val":dataloader_val, "dataset_size":{"train": train_size, "val":val_size} }
Example #3
0
def inference():

    #if opt.bs != 1:
    #    print("Please run with bs = 1")
    #   exit()

    test_model_checkpoint = input("Please enter the path of test model:")
    checkpoint = torch.load(test_model_checkpoint)

    model = prepare_model()
    model.load_state_dict(checkpoint["model_state_dict"])
    model.eval()

    trnsfm = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(256),
        transforms.Resize(224),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ])

    dataset_new = dataset(opt.data_to_inference, trnsfm)
    dataloader_new = torch.utils.data.DataLoader(dataset_new,
                                                 batch_size=opt.bs,
                                                 shuffle=False,
                                                 num_workers=opt.num_workers)

    class_names = list(string.ascii_uppercase)[:23]
    print(class_names)
    print("lenth of dataloader:", len(dataloader_new))
    df = pd.DataFrame(columns=["filename", "predicted-label"] + class_names)

    with torch.no_grad():
        for i, data in tqdm(enumerate(dataloader_new, 0)):

            inputs, labels, paths = data

            df_temp = pd.DataFrame(columns=["filename", "predicted-label"] +
                                   class_names)

            #print("paths:", paths)
            filenames = []
            for p in paths:
                #print(p)
                #print([list(p.split("/"))[-1]])
                filenames = filenames + [list(p.split("/"))[-1]]
            #print("filenames:", filename)

            #df_temp["filename"] = filename

            #print("file names:", filenames)
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            outputs = F.softmax(outputs, 1)
            predicted_probability, predicted = torch.max(outputs.data, 1)

            predicted = predicted.data.cpu().numpy()

            #print("predicted items=", predicted)
            #print("paths:", paths)

            df_temp["predicted-label"] = predicted
            df_temp["filename"] = filenames
            #df_temp[2:-1] =

            #print(df_temp)
            #df_temp["actual-label"] = class_names[labels.item()]

            # print("actual label:", labels.item())
            #print("predicted label:", predicted.item())
            # print("probabilities :", outputs.cpu())

            probabilities = outputs.cpu().squeeze()
            probabilities = probabilities.tolist()
            probabilities = np.around(probabilities, decimals=3)
            #print(probabilities)

            #print(probabilities)
            df_temp[class_names] = probabilities

            #print(df_temp)

            #record = record + [class_names[labels.item()]] + [class_names[predicted.item()]]

            #print(record)
            #print(df_temp.head())
            df = df.append(df_temp)
            # break

    print(df.head())
    print("length of DF:", len(df))
    prob_file_name = "%s/%s_inference.csv" % (opt.out_dir, py_file_name)
    df.to_csv(prob_file_name, index=False)