class SEResnext50_32x4dLearner(ActiveLearner): def __init__(self, device=0, logger_name=None): super().__init__(device=device) self.model = SEResnext50_32x4d(pretrained="imagenet") self.model.to(device) self.optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-4) self.Engine = Engine(self.model, self.optimizer, device) self.logger = logging.getLogger(logger_name) def inference(self, dataset, bs=64): loader = torch.utils.data.DataLoader( dataset, batch_size=bs, shuffle=False, num_workers=4 ) predictions = self.Engine.predict(loader) predictions = np.vstack((predictions)).ravel() probabilities = 1 / (1 + np.exp(-predictions)) probabilities = np.stack([probabilities, 1-probabilities], axis=1) return {'class_probabilities': probabilities} def fit(self, train_dataset, epochs=50, train_bs=32, **kwargs): labeled_targets = [x['targets'].numpy() for x in tqdm.tqdm(train_dataset)] train_target_distrib = pd.value_counts(labeled_targets) self.logger.info('Targets labeled distribution :') self.logger.info(train_target_distrib) if self.cuda_available: self.model.cuda() self.model.train() train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=train_bs, shuffle=True, num_workers=4 ) optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-4) for epoch in tqdm.tqdm(range(epochs)): train_loss = self.Engine.train(train_loader) return {'target_distribution': train_target_distrib} def score(self, valid_dataset, batch_size=64): self.model.eval() valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=batch_size, shuffle=False, num_workers=4 ) # valid_loss = self.Engine.evaluate(valid_loader) # print(f'Validation loss : {valid_loss:.3f}') predictions = self.Engine.predict(valid_loader) predictions = np.vstack((predictions)).ravel() valid_targets = valid_dataset.targets auc = metrics.roc_auc_score(valid_targets, predictions) print(f"AUC = {auc:.3f}") return {'auc': auc}
def predict(image_path, model): aug = albumentations.Compose( [ albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True) ] ) images = [image_path] targets = [0] test_dataset = ClassificationLoader( image_paths=images, targets=targets, resize=None, augmentations=aug, ) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=1, shuffle=False, num_workers=4 ) predictions = Engine.predict(test_loader, model, DEVICE) predictions = np.vstack((predictions)).ravel() return predictions
def predict(image_path, model): ''' Make prediction using trained model ''' test_aug = albumentations.Compose([ albumentations.Normalize(mean=MEAN, std=STD, max_pixel_value=255.0, always_apply=True) ]) test_images = [image_path] test_targets = [0] test_dataset = ClassificationDataset(image_paths=test_images, targets=test_targets, resize=None, augmentations=test_aug) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=1, shuffle=False, num_workers=0) engine = Engine(model=model, optimizer=None, device=DEVICE) predictions = engine.predict(data_loader=test_loader) return np.vstack((predictions)).reshape(-1)
def test(fold): test_img_path = config.TEST_IMAGES device = config.DEVICE test_bs = config.TEST_BS df_test = pd.read_csv(config.TEST_FILE).reset_index(drop=True) model_ = model.SE_Resnext50_32x4d(pretrained=None) model_.load_state_dict(torch.load(os.path.join(config.MODEL_PATH, f"model_fold_{fold}.bin"))) model_.to(device) mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) test_aug = alb.Compose([alb.Normalize(mean, std, max_pixel_value=255.0, always_apply=True)]) test_imgs = df_test.image_name.values.tolist() test_imgs = [os.path.join(test_img_path, i + ".jpg") for i in test_imgs] test_targets = np.zeros(len(test_imgs)) test_dataset = ClassificationLoader( image_paths = test_imgs, targets=test_targets, resize=(256,256), augmentations=test_aug ) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size= test_bs, shuffle=False, num_workers=4 ) predictions = Engine.predict(test_loader, model_, device=device) predictions = np.vstack((predictions)).ravel() return predictions
def predict(image_path, model): mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) test_aug = albumentations.Compose([ albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True), ]) test_images = [image_path] test_targets = [0] test_dataset = ClassificationLoader(image_paths=test_images, targets=test_targets, resize=None, augmentations=test_aug) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0) predictions = Engine.predict(test_loader, model, DEVICE) return np.vstack((predictions)).ravel()
def predict_result(image_path, model): test_images = [image_path] target = [0] test_loader = test_dataloader(images=test_images, targets=target) predictions = Engine.predict(test_loader, model=model, device=DEVICE) return np.vstack((predictions)).ravel()
def predict(fold): training_data_path = "/media/vatsal/Movies & Games/down_siim-isic-melanoma-classification/test224/" model_path = "/media/vatsal/Movies & Games/Melenoma-Deep-Learning/model/" df_test = pd.read_csv( "/media/vatsal/Movies & Games/down_siim-isic-melanoma-classification/test.csv" ) df_test.loc[:, "target"] = 0 device = "cuda" epochs = 50 test_bs = 8 mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) test_aug = albumentations.Compose([ albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True), ]) test_images = df_test.image_name.values.tolist() test_images = [ os.path.join(training_data_path, i + ".jpg") for i in test_images ] test_targets = df_test.target.values test_dataset = ClassificationLoader( image_paths=test_images, targets=test_targets, resize=None, augmentations=test_aug, ) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=4) model = SEResNext50_32x4d(pretrained=None) model.load_state_dict( torch.load(os.path.join(model_path, f"model{fold}.bin"))) model.to(device) predictions = Engine.predict(test_loader, model, device=device) predictions = np.vstack((predictions)).ravel() return predictions
def predict(fold): test_data_path = '' model_path = "" df_test = pd.read_csv("/.../test.csv") df_test.loc[:, 'target'] = 0 epochs = 50 device = 'cuda' test_bs = 32 #train batch size #valid_bs = 16 #normalize image pixel values mean = (0.485, 0.456, 0.406) #these values are for this model std = (0.229, 0.224, 0.225) #df_train = df[df.kfold != fold].reset_index(drop=True) #absolutely removes the previous index #df_valid = df[df.kfold == fold].reset_index(drop=True) #for image augmentation test_aug = albumentations.Compose([ albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True), ]) test_images = df_test.image_name.values.tolist() test_images = [ os.path.join(testing_data_path, i + '.jpg') for i in test_images ] test_targets = df_test.target.values test_dataset = ClassificationLoader(image_paths=test_images, targets=test_targets, resize=None, augmentation=test_aug) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=test_bs, shuffle=True, num_workers=4) model = SEResNext50_32x4d(pretrained='imagenet') model.load_state_dict(torch.load(os.path.join(model_path, f"model{fold}"))) model.to(device) predictions = Engine.predict(test_loader, model, device) return np.vstack((predictions)).ravel()
def predict(fold): # train image path test_data_path = input("Enter the test data path (resized image): ") # csv data path that was created from folds test_csv_path = input("Enter the test.csv file path: ") df_test = pd.read_csv(test_csv_path) df_test.loc[:, "target"] = 0 model_path = "model/" device = "cuda" epochs = 30 test_batch_size = 16 mean = (0.485, 0.456, 0.225) standard_deviation = (0.229, 0.224, 0.225) test_aug = albumentations.Compose([ albumentations.Normalize(mean, std=standard_deviation, max_pixel_value=255.0, always_apply=True) ]) # train image mapping test_images = df_test.image_name.values.tolist() test_images = [ os.path.join(test_data_path, i + ".jpg") for i in test_images ] test_targets = df_test.target.values # create valid dataset test_dataset = ClassificationLoader(image_paths=test_images, targets=test_targets, resize=None, augmentations=test_aug) # validation data loader test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False, num_workers=4) # import model model = SEResNext(pretrained='imagenet') model.load_state_dict( torch.load(os.path.join(model_path, f"model{fold}.bin"))) model.to(device) # predictions = Engine.predict(test_loader, model, device) return np.vstack((predictions)).ravel()
def get_tta_prediction( tta: int, test_loader, model, device: str, use_sigmoid: bool, prediction_length: int ) -> np.array: """ get prediction with tta """ prediction = np.zeros(prediction_length) for tta_id in range(tta): print(f"using tta at tta_id {tta_id}") predictions_tta = Engine.predict( test_loader, model, device=device, use_sigmoid=True ) predictions_tta = np.vstack((predictions_tta)).ravel() prediction += predictions_tta prediction /= tta return prediction
def predict(fold): test_data_path = "" model_path = "" df_test = pd.read_csv("") df_test.loc[:, "target"] = 0 device = "cuda" epochs = 50 test_bs = 16 mean = (0.485, 0.456, 0.406) std = (0.229, 0.244, 0.255) test_aug = albumentations.Compose( [ albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True), ] ) test_images = df_test.image_name.values.tolist() test_images = [os.path.join(test_data_path, i + ".jpg") for i in test_images] test_targets = np.zeros(len(test_images)) test_dataset = ClassificationLoader( image_paths=test_images, targets=test_targets, resize=None, augmentations=test_aug ) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=test_bs, shuffle=False, num_workers=4 ) model = SEResNext50_32x4d(pretrained=None) model.load_state_dict(torch.load(os.path.join(model_path, f"model{fold}.bin"))) model.to(device) predictions = Engine.predict( test_loader, model, device=device ) return np.vstack((predictions)).ravel()
def predict(fold): #config : path, df, device, epochs, train_bs test_path = img_path + "/test3/" df = pd.read_csv(data_path + "/test.csv") device = "cuda" if torch.cuda.is_available() else "cpu" epochs = 1 test_bs = 18 df_test = df #Aug : mean, std, train_aug,valid_aug mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) aug = albumentations.Compose([ albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True) ]) #Dataset: train_imgs, train_targets, train_dataset, train_loader test_imgs = df.image_name.values.tolist() test_imgs = [test_path + file + ".jpg" for file in test_imgs] print(test_imgs[0]) test_target = np.zeros(len(test_imgs)) test_dataset = ClassificationLoader(image_paths=test_imgs, targets=test_target, resize=None, augmentations=aug) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=test_bs, shuffle=False, num_workers=4) #Model Utils: model, optimizer, scheduler, es model = SEResnext50_32x4d(pretrained=True) model_path = save_path + f"/200712_fold_{fold}.bin" model.load_state_dict(torch.load(model_path)) model = model.to(device) # train, predict preds = Engine.predict(test_loader, model, device=device) preds = np.vstack(preds).flatten() return preds
def predict(fold): test_data_path = "/home/prakhar/Desktop/ml/Melanoma_Detection/input/" model_path = "/home/prakhar/Desktop/ml/Melanoma_Detection/model_weights/" df_test = pd.read_csv( "/home/prakhar/Desktop/ml/Melanoma_Detection/input/test.csv") df_test.loc[:, "target"] = 0 device = "cuda" epochs = 50 test_bs = 16 mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.255) # mean = (0.485, 0.456, 0.406) # std = (0.229, 0.224, 0.225) test_aug = albumentations.Compose([ albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True) ]) test_images = df_test.image_name.values.tolist() test_images = [ os.path.join(test_data_path, i + '.jpg') for i in test_images ] test_targets = df_test.target.values test_dataset = ClassificationDataLoader(image_paths=test_images, targets=test_targets, resize=None, augmentations=test_aug) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=valid_bs, shuffle=False, num_workers=4) model = SEResNext50_32x4d(pretrained='imagenet') model.state_dict(torch.load(os.path.join(model_path, f"model{fold}.bin"))) model.to(device) predictions = Engine.predict(test_loader, model, device) return np.vstack((predictions)).ravel()
def predict(fold): print(f"Predicting fold #{fold}") test_data_path = "/mnt/Data/MelanomaClassification/input/kaggle/working/test224/" df = pd.read_csv("/mnt/Data/MelanomaClassification/input/test.csv") device = "cuda" model_path = os.path.join("/mnt/Data/MelanomaClassification/models/", f"model_fold_{fold}.bin") mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) aug = albumentations.Compose([ albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True) ]) images = df.image_name.values.tolist() images = [os.path.join(test_data_path, i + ".png") for i in images] targets = np.zeros(len(images)) test_dataset = ClassificationLoader( image_paths=images, targets=targets, resize=None, augmentations=aug, ) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4) model = SEResNext50_32x4d(pretrained=None) model.load_state_dict(torch.load(model_path)) model.to(device) predictions = Engine.predict(test_loader, model, device=device) predictions = np.vstack((predictions)).ravel() return predictions
def predict(image_path, fold): device = "cpu" model_path = "../model/" mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) test_aug = albumentations.Compose([ albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True), ]) test_images = [image_path] test_targets = [0] test_dataset = ClassificationLoader( image_paths=test_images, targets=test_targets, resize=None, augmentations=test_aug, ) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=4) model = SEResNext50_32x4d(pretrained=None) model.load_state_dict( torch.load(os.path.join(model_path, f"model_fold_{fold}.bin")) ) #if gets error here add map_location=torch.device('cpu') predictions = Engine.predict(test_loader, model, device=device) predictions = np.vstack((predictions)).ravel() print(predictions) return predictions[0]
def predict(fold): #df load test_data_path=img_path+"/test3/" df=pd.read_csv(data_path+"/test.csv") device="cuda" if torch.cuda.is_available() else "cpu" model_path=saved_path+f"/model_fold_{fold}.bin" test_bs=32 #augmentation mean=(0.485, 0.456, 0.406) std=(0.229, 0.224, 0.225) aug=albumentations.Compose([ albumentations.Normalize(mean,std,max_pixel_value=255.0,always_apply=True) ]) #dataset images=df.image_name.values.tolist() images=[test_data_path+file+".jpg" for file in images] targets=np.zeros(len(images)) test_dataset=ClassificationLoader( image_paths=images, targets=targets, resize=None, augmentations=aug, ) #data loader test_loader=torch.utils.data.DataLoader( test_dataset,batch_size=test_bs,shuffle=False,num_workers=4) #model model=SEResnext50_32x4d(pretrained=True) model.load_state_dict(torch.load(model_path)) model.to(device) predictions=Engine.predict(test_loader,model,device) predictions=np.vstack((predictions)).ravel() return predictions
def predict(fold): print(f"Prediction on test set fold {fold}") args = get_args() with open(args.config) as file: config_file = yaml.load(file, Loader=yaml.FullLoader) config = wandb.config # Initialize config config.update(config_file) df_test = pd.read_csv(config.test_csv) if config.use_metadata: df_test, meta_features = get_meta_feature(df_test) else: meta_features = None if config.swa["use_swa"]: model_path = config.swa["model_path"].format(fold) print(f"using SWA, loading checkpoint from {model_path}") else: model_path = config.model_path.format(fold) mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) # aug = albumentations.Compose( # [albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True)] # ) aug = albumentations.Compose([ AdvancedHairAugmentation(hairs_folder="../input/melanoma-hairs/"), # albumentations.augmentations.transforms.CenterCrop(64, 64, p=0.8), albumentations.augmentations.transforms.RandomBrightnessContrast(), albumentations.augmentations.transforms.HueSaturationValue(), # Microscope(p=0.4), albumentations.augmentations.transforms.RandomResizedCrop( config.input_size, config.input_size, scale=(0.7, 1.0), p=0.4), albumentations.augmentations.transforms.VerticalFlip(p=0.4), # albumentations.augmentations.transforms.Cutout(p=0.8), # doesnt work albumentations.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=15), albumentations.Flip(p=0.5), # RandomAugMix(severity=7, width=7, alpha=5, p=1), # albumentations.augmentations.transforms.Resize( # config.input_size, config.input_size, p=1 # ), albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True), ]) images = df_test.image_name.values.tolist() images = [os.path.join(config.test_data_path, i + ".jpg") for i in images] targets = np.zeros(len(images)) test_dataset = ClassificationDataset( image_paths=images, targets=targets, resize=None, augmentations=aug, meta_features=meta_features, df_meta_features=df_test, ) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=config.test_batch_size, shuffle=False, num_workers=4, ) model = get_model( config.model_backbone, config.model_name, config.num_classes, config.input_size, config.use_metadata, meta_features, ) model.load_state_dict(torch.load(model_path)) model.to(config.device) if config.tta: predictions = get_tta_prediction(config.tta, test_loader, model, config.device, True, len(images)) else: predictions = Engine.predict(test_loader, model, device=config.device, use_sigmoid=True) predictions = np.vstack((predictions)).ravel() return predictions
def train(): df = pd.read_csv(DATA_PATH + "images_labeled.csv") X_train, X_test, y_train, y_test = train_test_split(df.image.values, df.label.values, test_size=0.3, random_state=42, shuffle=True) train_aug = albumentations.Compose([ albumentations.Normalize(mean=MEAN, std=STD, max_pixel_value=255.0, always_apply=True) ]) valid_aug = albumentations.Compose([ albumentations.Normalize(mean=MEAN, std=STD, max_pixel_value=255.0, always_apply=True) ]) train_images = [os.path.join(DATA_PATH, filename) for filename in X_train] valid_images = [os.path.join(DATA_PATH, filename) for filename in X_test] train_dataset = ClassificationDataset( image_paths=train_images, targets=y_train, resize=None, augmentations=train_aug, ) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=TRAIN_BS, shuffle=True, num_workers=4) valid_dataset = ClassificationDataset( image_paths=valid_images, targets=y_test, resize=None, augmentations=valid_aug, ) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=VALID_BS, shuffle=False, num_workers=4) model = CV_Model(pretrained="imagenet") model.to(DEVICE) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, mode="max") es = EarlyStopping(patience=5, mode="max") for epoch in range(EPOCHS): engine = Engine(model=model, optimizer=optimizer, device=DEVICE) engine.train(train_loader) predictions = engine.predict(valid_loader) predictions = np.vstack((predictions)).ravel() auc = metrics.roc_auc_score(y_test, predictions) print(f"Epoch = {epoch}, AUC = {auc}") scheduler.step(auc) es(auc, model, model_path=os.path.join(MODEL_PATH, "model.bin")) if es.early_stop: print("Early stopping") break
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.6) eng = Engine(model, optimizer, device=args.device) for epoch in range(args.epochs): train_loss = eng.train(train_loader) valid_loss = eng.evaluate(valid_loader) print(f"{epoch}, Train Loss={train_loss} Valid Loss={valid_loss}") test_df = pd.read_csv(os.path.join(args.data_path, "test.csv")) images = test_df.image_id.values.tolist() images = [os.path.join(args.data_path, "images", i + ".jpg") for i in images] targets = np.zeros((len(images), 4)) aug = albumentations.Compose( [albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True)] ) test_loader = ClassificationDataLoader( image_paths=images, targets=targets, resize=(128, 128), augmentations=aug ).fetch(batch_size=16, num_workers=4, drop_last=False, shuffle=False, tpu=False) eng = Engine(model, optimizer, device=args.device) predictions = eng.predict(test_loader) predictions = np.vstack((predictions)) sample = pd.read_csv(os.path.join(args.data_path, "sample_submission.csv")) sample.loc[:, ["healthy", "multiple_diseases", "rust", "scab"]] = predictions sample.to_csv(os.path.join(args.data_path, "submission.csv"), index=False)
images = test_df.image_id.values.tolist() images = [ os.path.join(args.data_path, "images", i + ".jpg") for i in images ] targets = np.zeros((len(images), 4)) aug = albumentations.Compose([ albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True) ]) test_loader = ClassificationDataLoader(image_paths=images, targets=targets, resize=(128, 128), augmentations=aug).fetch( batch_size=16, num_workers=4, drop_last=False, shuffle=False, tpu=False) predictions = Engine.predict(test_loader, model, device=args.device) predictions = np.vstack((predictions)) sample = pd.read_csv(os.path.join(args.data_path, "sample_submission.csv")) sample.loc[:, ["healthy", "multiple_diseases", "rust", "scab"]] = predictions sample.to_csv(os.path.join(args.data_path, "submission.csv"), index=False)