def prepare_data(): data_transforms = { 'train': transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(256), transforms.Resize(224), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.RandomRotation(90), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]), 'validation': transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(256), transforms.Resize(224), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]), } # Train dataset dataset_train = dataset(opt.data_train_folder, data_transforms["train"]) # Validation dataset dataset_val = dataset(opt.data_val_folder, data_transforms["validation"]) dataloader_train = torch.utils.data.DataLoader(dataset_train, batch_size=opt.bs, shuffle=True, num_workers=opt.num_workers) dataloader_val = torch.utils.data.DataLoader(dataset_val, batch_size=opt.bs, shuffle=False, num_workers=opt.num_workers) train_size = len(dataset_train) val_size = len(dataset_val) print("train dataset size =", train_size) print("validation dataset size=", val_size) print("dataset train class order= ", dataset_train.class_to_idx) print("dataset val class order= ", dataset_train.class_to_idx) #exit() # just for testing return { "train": dataloader_train, "val": dataloader_val, "dataset_size": { "train": train_size, "val": val_size } }
def prepare_data(): data_transforms = { 'train': transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(256), transforms.Resize(224), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.RandomRotation(90), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]), 'validation': transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(256), transforms.Resize(224), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]), } # Use selected fold for validation train_folds = list(set(opt.all_folds) - set([opt.val_fold])) validation_fold = opt.val_fold # Train datasets image_datasets_train_all = {x: dataset(os.path.join(opt.data_root, x), data_transforms["train"]) for x in train_folds} # create one dataset from all datasets of training dataset_train = torch.utils.data.ConcatDataset([image_datasets_train_all[i] for i in train_folds]) # Validation datasets dataset_val = dataset(os.path.join(opt.data_root, validation_fold), data_transforms["validation"]) dataloader_train = torch.utils.data.DataLoader(dataset_train, batch_size=opt.bs, shuffle=True, num_workers=opt.num_workers) dataloader_val = torch.utils.data.DataLoader(dataset_val, batch_size=opt.bs, shuffle=False, num_workers=opt.num_workers) train_size = len(dataset_train) val_size = len(dataset_val) print("train dataset size =", train_size) print("validation dataset size=", val_size) return {"train":dataloader_train, "val":dataloader_val, "dataset_size":{"train": train_size, "val":val_size} }
def inference(): #if opt.bs != 1: # print("Please run with bs = 1") # exit() test_model_checkpoint = input("Please enter the path of test model:") checkpoint = torch.load(test_model_checkpoint) model = prepare_model() model.load_state_dict(checkpoint["model_state_dict"]) model.eval() trnsfm = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(256), transforms.Resize(224), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) dataset_new = dataset(opt.data_to_inference, trnsfm) dataloader_new = torch.utils.data.DataLoader(dataset_new, batch_size=opt.bs, shuffle=False, num_workers=opt.num_workers) class_names = list(string.ascii_uppercase)[:23] print(class_names) print("lenth of dataloader:", len(dataloader_new)) df = pd.DataFrame(columns=["filename", "predicted-label"] + class_names) with torch.no_grad(): for i, data in tqdm(enumerate(dataloader_new, 0)): inputs, labels, paths = data df_temp = pd.DataFrame(columns=["filename", "predicted-label"] + class_names) #print("paths:", paths) filenames = [] for p in paths: #print(p) #print([list(p.split("/"))[-1]]) filenames = filenames + [list(p.split("/"))[-1]] #print("filenames:", filename) #df_temp["filename"] = filename #print("file names:", filenames) inputs = inputs.to(device) labels = labels.to(device) outputs = model(inputs) outputs = F.softmax(outputs, 1) predicted_probability, predicted = torch.max(outputs.data, 1) predicted = predicted.data.cpu().numpy() #print("predicted items=", predicted) #print("paths:", paths) df_temp["predicted-label"] = predicted df_temp["filename"] = filenames #df_temp[2:-1] = #print(df_temp) #df_temp["actual-label"] = class_names[labels.item()] # print("actual label:", labels.item()) #print("predicted label:", predicted.item()) # print("probabilities :", outputs.cpu()) probabilities = outputs.cpu().squeeze() probabilities = probabilities.tolist() probabilities = np.around(probabilities, decimals=3) #print(probabilities) #print(probabilities) df_temp[class_names] = probabilities #print(df_temp) #record = record + [class_names[labels.item()]] + [class_names[predicted.item()]] #print(record) #print(df_temp.head()) df = df.append(df_temp) # break print(df.head()) print("length of DF:", len(df)) prob_file_name = "%s/%s_inference.csv" % (opt.out_dir, py_file_name) df.to_csv(prob_file_name, index=False)