def full_predict(df, model_class, model_params, general_params): with open(os.path.join(general_params["logdir"], "vectorizer.pickle"), "rb") as input_file: vectorizer = pickle.load(input_file) df = make_df(df, vectorizer) ds = GeneralDataset( df["tokens"].values, labels=None, max_sentence_len=general_params["max_sentence_len"], ) dl = DataLoader( dataset=ds, batch_size=general_params["batch_size"], shuffle=False, num_workers=general_params["num_workers"], ) model_params = copy.deepcopy(model_params) model_params.update({"vocab_size": len(vectorizer.vocabulary_)}) model = model_class(**model_params).float() runner = SupervisedRunner(model=model) runner_out = runner.predict_loader( loader=dl, resume=os.path.join(general_params["logdir"], "checkpoints", general_params["checkpoint_name"]), ) y_pred = [] for pred in runner_out: pred = pred[runner.output_key].cpu().numpy() for p in pred: y_pred.append(np.array(p)) return np.array(y_pred)
def main(train, test, features, target): # get args args = parse_arguments() params = yaml_to_json(args.yaml_path) # hyper param num_folds = params.fold seed = params.seed base_path = params.base_path target_cols = params.target features_cols = params.features preprocessed_data_path = params.preprocessed_data batch_size = params.batch_size num_epochs = params.epochs # ex) '/hoge/logs' base_logdir = params.base_logdir # fix seed set_global_seed(seed) device = get_device() # set up logdir now = datetime.now() base_logdir = os.path.join(base_logdir + now.strftime("%Y%m%d%H%M%S")) os.makedirs(base_logdir, exist_ok=True) # dump yaml contents with open(os.path.join(base_logdir, 'params.json'), mode="w") as f: json.dump(params, f, indent=4) # dump this scripts my_file_path = os.path.abspath(__file__) shutil.copyfile(my_file_path, base_logdir) # load dataset if preprocessed_data_path == '': train, test, sample_submission = read_data(base_path) # noqa # TODO: You should implement these function!! train, test = preprocess(train, test) # noqa train, test = build_feature(train, test) # noqa else: train = pd.read_csv(preprocessed_data_path + 'train.csv') test = pd.read_csv(preprocessed_data_path + 'test.csv') sample_submission = pd.read_csv(preprocessed_data_path + 'sample_submission.csv') # execute CV # TODO: set your CV method kf = KFold(n_splits=num_folds, random_state=seed) ids = kf.split(train) fold_scores = [] test_preds = [] for fold, (train_idx, valid_idx) in enumerate(ids): print('Fold {}'.format(fold + 1)) logdir = os.path.join(base_logdir + 'fold_{}'.format(fold + 1)) os.makedirs(logdir, exist_ok=True) # data X_train = train[features_cols] # 目的変数の正規化は...? Y_train = train[target_cols] X_test = train[features_cols] # create dataloaders train_dls, test_dl = create_data_loader( X_train.iloc[train_idx].to_numpy(), Y_train.iloc[train_idx].to_numpy(), X_train.iloc[valid_idx].to_numpy(), Y_train.iloc[valid_idx].to_numpy(), X_test.to_numpy(), batch_size=batch_size) # init models # TODO: set your model and learning condition # ここは関数を用意して、キーワードで取り出すようにできると汎用性は上がる model = SampleNN(input_dim=1000, out_dim=1) criterion = nn.BCELoss() optimizer = torch.optim.AdamW(model.parameters()) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer) # init catalyst runner runner = SupervisedRunner(device=device) # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=train_dls, logdir=logdir, num_epochs=num_epochs, callbacks=[EarlyStoppingCallback(patience=15, min_delta=0)], verbose=False) # calculate valid score best_model_path = logdir + '/checkpoints/best.pth' val_preds = runner.predict_loader(model, train_dls['valid'], resume=best_model_path, verbose=False) val_truth = Y_train.iloc[valid_idx].values # TODO: set your score function cv_score = mean_spearmanr_correlation_score(val_truth, val_preds) print('Fold {} CV score : {}'.format(fold + 1, cv_score)) fold_scores.append(cv_score) # test prediction test_pred = runner.predict_loader( model, test_dl, resume=best_model_path, verbose=False) / num_folds test_preds.append(test_pred) # submit # TODO: set your submit process sample_submission[target_cols] = np.mean(test_preds, axis=0) sample_submission.to_csv('submission.csv') return True
shuffle=False, drop_last=True, num_workers=0) test_truth = [] for i in test_dl: test_truth.append(i[1].cpu().numpy().tolist()) test_truth = [item for sublist in test_truth for item in sublist] predictions = np.vstack( list( map( lambda x: x["logits"].cpu().numpy(), runner.predict_loader( model=model, loader=test_dl, resume=f"{logdir}/model/nonecrop/chickpea_lentils.pth")))) probabilities = [] pred_labels = [] true_labels = [] pred_classes = [] true_classes = [] for i, (truth, logits) in enumerate(zip(test_truth, predictions)): probability = torch.softmax(torch.from_numpy(logits), dim=0) pred_label = probability.argmax().item() probabilities.append(probability.cpu().numpy()) pred_labels.append(pred_label) true_labels.append(truth) pred_classes.append(class_names[pred_label]) # true_classes.append(class_names[truth])
X, y = torch.rand(num_samples, num_features), torch.rand(num_samples) dataset = TensorDataset(X, y) loader = DataLoader(dataset, batch_size=32, num_workers=1) loaders = {"train": loader, "valid": loader} # model, criterion, optimizer, scheduler model = torch.nn.Linear(num_features, 1) criterion = torch.nn.MSELoss() optimizer = torch.optim.Adam(model.parameters()) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [3, 6]) runner = SupervisedRunner() # model training runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, logdir="./logdir", num_epochs=8, verbose=True, check=True, load_best_on_end=True, ) # model inference for prediction in runner.predict_loader(loader=loader): assert prediction["logits"].cpu().detach().numpy().shape == (32, 1) # model tracing traced_model = runner.trace(loader=loader)
TEST_IMAGES = sorted(test_image_path.glob("*.png")) # create test dataset test_dataset = SegmentationDataset(TEST_IMAGES, transforms=valid_transforms) num_workers: int = 4 infer_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) # this get predictions for the whole loader predictions = runner.predict_loader( model=model, loader=infer_loader, resume=f"{logdir}/checkpoints/best.pth", verbose=False, ) print(type(predictions)) print(predictions.shape) # In[22]: threshold = 0.5 max_count = 5 for i, (features, logits) in enumerate(zip(test_dataset, predictions)): image = utils.tensor_to_ndimage(features["image"]) mask_ = torch.from_numpy(logits[0]).sigmoid()
def main(): # Enable argument parsing for file paths args = vars(get_args()) train_images_path = args["train_images"] train_masks_path = args["train_masks"] test_images_path = args["test_images"] test_masks_path = args["test_masks"] # print out yaml file configuration dir_path = os.path.dirname(os.path.realpath(__file__)) yaml_path = os.path.join(dir_path, "config/igvc.yaml") ARCH = yaml.safe_load(open(yaml_path, "r")) # Set a seed for reproducibility utils.set_global_seed(ARCH["train"]["seed"]) utils.prepare_cudnn(deterministic=ARCH["train"]["cudnn"]) # Set up U-Net with pretrained EfficientNet backbone model = smp.Unet( encoder_name=ARCH["encoder"]["name"], encoder_weights=ARCH["encoder"]["weight"], classes=ARCH["train"]["classes"], activation=ARCH["encoder"]["activation"], ) # Get Torch loaders loaders = get_loaders( images=np.load(train_images_path), masks=np.load(train_masks_path), image_arr_path=train_images_path, mask_arr_path=train_masks_path, random_state=ARCH["train"]["random_state"], valid_size=ARCH["train"]["valid_size"], batch_size=ARCH["train"]["batch_size"], num_workers=ARCH["train"]["num_workers"], ) # Optimize for cross entropy using Adam criterion = { "CE": CrossentropyND(), } optimizer = AdamW( model.parameters(), lr=ARCH["train"]["lr"], betas=(ARCH["train"]["betas_min"], ARCH["train"]["betas_max"]), eps=float(ARCH["train"]["eps"]), weight_decay=ARCH["train"]["w_decay"], amsgrad=ARCH["train"]["amsgrad"], ) scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, factor=ARCH["train"]["optim_factor"], patience=ARCH["train"]["optim_patience"], ) device = utils.get_device() print("Using device: {}".format(device)) print(f"torch: {torch.__version__}, catalyst: {catalyst.__version__}") runner = SupervisedRunner(device=device, input_key="image", input_target_key="mask") # Use Catalyst callbacks for metric calculations during training callbacks = [ CriterionCallback(input_key="mask", prefix="loss", criterion_key="CE"), MulticlassDiceMetricCallback(input_key="mask"), ] # Train and print model training logs runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, callbacks=callbacks, logdir=ARCH["train"]["logdir"], num_epochs=ARCH["train"]["epochs"], main_metric="loss", minimize_metric=ARCH["train"]["minimize_metric"], fp16=ARCH["train"]["fp16"], verbose=ARCH["train"]["verbose"], ) # Test model on test dataset test_data = SegmentationDataset(test_images_path, test_masks_path) infer_loader = DataLoader( test_data, batch_size=ARCH["test"]["batch_size"], shuffle=ARCH["test"]["shuffle"], num_workers=ARCH["test"]["num_workers"], ) # Get model predictions on test dataset predictions = np.vstack( list( map( lambda x: x["logits"].cpu().numpy(), runner.predict_loader( loader=infer_loader, resume=f"content/full_model2/checkpoints/best.pth", ), ))) save_result(predictions, test_data)
# create test dataset test_dataset = SegmentationDataset(TEST_IMAGES, transforms=valid_transforms) num_workers: int = 4 infer_loader = DataLoader( test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers ) # this get predictions for the whole loader predictions = np.vstack( list( map( lambda x: x["logits"].cpu().numpy(), runner.predict_loader( loader=infer_loader, resume=f"{logdir}/checkpoints/best.pth" ), ) ) ) print(type(predictions)) print(predictions.shape) threshold = 0.5 max_count = 5 for i, (features, logits) in enumerate(zip(test_dataset, predictions)): image = utils.tensor_to_ndimage(features["image"]) mask_ = torch.from_numpy(logits[0]).sigmoid()
X_train, y_train = get_Xy('train') X_valid, y_valid = get_Xy('valid') X_test, y_test = get_Xy('test') train_loader = get_loader(X_train, y_train) valid_loader = get_loader(X_valid, y_valid) loaders = {"train": train_loader, "valid": valid_loader} model = nn.Linear(X_train.size()[1], 4) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.01) runner = SupervisedRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, logdir="./logdir", callbacks=[AccuracyCallback(num_classes=4, accuracy_args=[1])], num_epochs=10, verbose=True, ) test_loader = get_loader(X_test, y_test) logits = runner.predict_loader(model=model, loader=test_loader, verbose=True) y_pred = torch.max(torch.from_numpy(logits), dim=1)[1] print(y_pred[:10])
# 1, 2]), EarlyStoppingCallback(metric='accuracy01', minimize=False, patience=10)] # ) # # model inference test_dl = DataLoader(valid_ds, batch_size=BATCH_SIZE, shuffle=False, drop_last=True, num_workers=0) test_truth = [] for i in test_dl: test_truth.append(i[1].cpu().numpy().tolist()) test_truth = [item for sublist in test_truth for item in sublist] predictions = np.vstack(list(map( lambda x: x["logits"].cpu().numpy(), runner.predict_loader(model=model, loader=test_dl, resume=f"{logdir}/model/partition/evi_full.pth") ))) probabilities = [] pred_labels = [] true_labels = [] pred_classes = [] true_classes = [] for i, (truth, logits) in enumerate(zip(test_truth, predictions)): probability = torch.softmax(torch.from_numpy(logits), dim=0) pred_label = probability.argmax().item() probabilities.append(probability.cpu().numpy()) pred_labels.append(pred_label) true_labels.append(truth) pred_classes.append(class_names[pred_label]) true_classes.append(class_names[truth])
shuffle=False, drop_last=True, num_workers=0) test_truth = [] for i in test_dl: test_truth.append(i[1].cpu().numpy().tolist()) test_truth = [item for sublist in test_truth for item in sublist] predictions = np.vstack( list( map( lambda x: x["logits"].cpu().numpy(), runner.predict_loader(model=model, loader=test_dl, resume=f"{logdir}/model/vic_198.pth")))) probabilities = [] pred_labels = [] true_labels = [] pred_classes = [] true_classes = [] for i, (truth, logits) in enumerate(zip(test_truth, predictions)): probability = torch.softmax(torch.from_numpy(logits), dim=0) pred_label = probability.argmax().item() probabilities.append(probability.cpu().numpy()) pred_labels.append(pred_label) true_labels.append(truth) pred_classes.append(class_names[pred_label]) true_classes.append(class_names[truth])
# 1, 2]), EarlyStoppingCallback(metric='accuracy01', minimize=False, patience=10)] # ) # # model inference test_dl = DataLoader(valid_ds, batch_size=BATCH_SIZE, shuffle=False, drop_last=True, num_workers=0) test_truth = [] for i in test_dl: test_truth.append(i[1].cpu().numpy().tolist()) test_truth = [item for sublist in test_truth for item in sublist] predictions = np.vstack(list(map( lambda x: x["logits"].cpu().numpy(), runner.predict_loader(model=model, loader=test_dl, resume=f"{logdir}/model/181920/2_evi_hue.pth") ))) probabilities = [] pred_labels = [] true_labels = [] pred_classes = [] true_classes = [] for i, (truth, logits) in enumerate(zip(test_truth, predictions)): probability = torch.softmax(torch.from_numpy(logits), dim=0) pred_label = probability.argmax().item() probabilities.append(probability.cpu().numpy()) pred_labels.append(pred_label) true_labels.append(truth) pred_classes.append(class_names[pred_label]) true_classes.append(class_names[truth])
shuffle=False, drop_last=True, num_workers=0) test_truth = [] for i in test_dl: test_truth.append(i[1].cpu().numpy().tolist()) test_truth = [item for sublist in test_truth for item in sublist] predictions = np.vstack( list( map( lambda x: x["logits"].cpu().numpy(), runner.predict_loader( model=model, loader=test_dl, resume=f"{logdir}/best_full_2019_210.pth")))) probabilities = [] pred_labels = [] true_labels = [] pred_classes = [] true_classes = [] for i, (truth, logits) in enumerate(zip(test_truth, predictions)): probability = torch.softmax(torch.from_numpy(logits), dim=0) pred_label = probability.argmax().item() probabilities.append(probability.cpu().numpy()) pred_labels.append(pred_label) true_labels.append(truth) pred_classes.append(class_names[pred_label]) true_classes.append(class_names[truth])
# 1, 2]), EarlyStoppingCallback(metric='accuracy01', minimize=False, patience=10)] # ) # # model inference test_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=False, drop_last=True, num_workers=0) test_truth = [] for i in test_dl: test_truth.append(i[1].cpu().numpy().tolist()) test_truth = [item for sublist in test_truth for item in sublist] predictions = np.vstack(list(map( lambda x: x["logits"].cpu().numpy(), runner.predict_loader(model=model, loader=test_dl, resume=f"{logdir}/model/3_all.pth") ))) probabilities = [] pred_labels = [] true_labels = [] pred_classes = [] true_classes = [] for i, (truth, logits) in enumerate(zip(test_truth, predictions)): probability = torch.softmax(torch.from_numpy(logits), dim=0) pred_label = probability.argmax().item() probabilities.append(probability.cpu().numpy()) pred_labels.append(pred_label) true_labels.append(truth) pred_classes.append(class_names[pred_label]) true_classes.append(class_names[truth])
) # Test model on test dataset test_data = SegmentationDataset(test_images_path, test_masks_path) infer_loader = DataLoader(test_data, batch_size=12, shuffle=False, num_workers=4) # Get model predictions on test dataset predictions = np.vstack( list( map( lambda x: x["logits"].cpu().numpy(), runner.predict_loader( loader=infer_loader, resume=f"content/full_model2/checkpoints/best.pth"), ))) # Pick sample images to analyze results low = 1 high = len(predictions) - 1 num_results = 30 num_rand_results = num_results - 2 rand_nums = np.random.randint(low, high, num_rand_results) """ The results specifically include images 30 and 141 as they are images containing multiple lines and barrels. Hence, they would be strong indicators of performance. """ rand_nums = np.insert(rand_nums, 0, 30)