def main(): file_name = os.path.basename(__file__).split('.')[0] name = str(file_name) experiment_logger = utils.ExperimentLogger(name) for i, (samples_train, samples_val) in enumerate(utils.mask_stratified_k_fold()): print("Running split {}".format(i)) model = Model(name, i) stats = model.fit(samples_train, samples_val) experiment_logger.set_split(i, stats) # Load the best performing checkpoint model.load() # Do a final validation model.validate(DataParallel(model.net), samples_val, -1) # Predict the test data test_predictions = utils.TestPredictions(name + '-split_{}'.format(i), mode='test') test_predictions.add_predictions(model.test(utils.get_test_samples())) test_predictions.save() experiment_logger.save()
def __init__(self, name, split): self.name = name self.split = split self.path = os.path.join(settings.checkpoints, name + '-split_{}'.format(split)) self.net = DualHypercolumnCatRefineNet( SCSENoPoolResNextBase(se_resnet152()), num_features=128, classifier=lambda c: RefineNetUpsampleClassifier(2*c, scale_factor=2), block=SCSERefineNetBlock ) self.tta = [ tta.Pipeline([tta.Pad((13, 14, 13, 14))]), tta.Pipeline([tta.Pad((13, 14, 13, 14)), tta.Flip()]) ] self.test_predictions = utils.TestPredictions('ensemble-{}'.format(split)).load()
def __init__(self, name, split): self.name = name self.split = split self.path = os.path.join(settings.checkpoints, name + '-split_{}'.format(split)) self.net = DualHypercolumnCatRefineNet( NoPoolDPN107Base(dpn107()), num_features=128, block_multiplier=1, num_features_base=[376, 1152, 2432, 2048 + 640], classifier=lambda c: SmallDropoutRefineNetUpsampleClassifier(2 * 128, scale_factor=2), ) self.tta = [ tta.Pipeline([tta.Pad((13, 14, 13, 14))]), tta.Pipeline([tta.Pad((13, 14, 13, 14)), tta.Flip()]) ] self.test_predictions = utils.TestPredictions('ensemble_top_6_postprocessed-split_{}'.format(split)).load()
def __init__(self, name, split): self.name = name self.split = split self.path = os.path.join(settings.checkpoints, name + '-split_{}'.format(split)) self.net = DualHypercolumnCatRefineNet( NoPoolDPN92Base(dpn92()), num_features=128, block_multiplier=1, num_features_base=[256 + 80, 512 + 192, 1024 + 528, 2048 + 640], classifier=lambda c: SmallOCRefineNetUpsampleClassifier( 2 * 128, scale_factor=2), ) self.tta = [ tta.Pipeline([tta.Pad((13, 14, 13, 14))]), tta.Pipeline([tta.Pad((13, 14, 13, 14)), tta.Flip()]) ] self.test_predictions = utils.TestPredictions( 'ensemble-{}'.format(split)).load()
shape: (height,width) of array to return Returns numpy array, 1 - mask, 0 - background ''' s = rle_mask.split() starts, lengths = [ np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2]) ] starts -= 1 ends = starts + lengths img = np.zeros(101 * 101, dtype=np.uint8) for lo, hi in zip(starts, ends): img[lo:hi] = 1 return img.reshape(101, 101).T for ensemble_i in tqdm(range(5), ascii=True): subm = pd.read_csv( './submissions/ensemble-{}'.format(ensemble_i)).fillna('') subm['mask'] = subm['rle_mask'].apply(rle_decode) predictions = utils.TestPredictions('ensemble-{}'.format(ensemble_i)) for i in tqdm(range(18000), ascii=True): id = subm['id'][i] mask = subm['mask'][i] predictions.add_sample(mask, id) predictions.save()
'nopoolrefinenet_seresnext101_dual_hypercolumn_aux_data_poly_lr_pseudo_labels', 'nopoolrefinenet_dpn107_dual_hypercolumn_poly_lr_aux_data_pseudo_labels', 'nopoolrefinenet_seresnext50_dual_hypercolumn_aux_data_poly_lr_pseudo_labels', 'nopoolrefinenet_seresnet152_dual_hypercolumn_aux_data_poly_lr_pseudo_labels', 'nopoolrefinenet_dpn92_dual_hypercolumn_poly_lr_aux_data_pseudo_labels', ] ensemble_name = 'ensemble' for i in range(5): print('Processing fold {}'.format(i)) test_predictions_experiment = [] for name in experiments: test_predictions_split = [] test_predictions = utils.TestPredictions('{}-split_{}'.format(name, i)) test_predictions_split.append(test_predictions.load_raw()) test_predictions_experiment.append(test_predictions_split) test_samples = utils.get_test_samples() predictions_mean = [] for id in tqdm(test_samples, ascii=True): # p = n_models x h x w p = [] for test_predictions_split in test_predictions_experiment: test_predictions_split = np.stack([predictions[id] for predictions in test_predictions_split], axis=0) p.append(test_predictions_split) p = np.concatenate(p, axis=0)
) parser.add_argument( 'name', help='Use one of the experiment names here excluding the .py ending.') parser.add_argument('test_set', help='Specify the path to the new test_set') parser.add_argument( 'output_dir', help='Specify the path to the output dir for the test-predictions.') args = parser.parse_args() name = args.name test_set = args.test_set output_dir = args.output_dir experiment_logger = utils.ExperimentLogger(name, mode='val') for i, (samples_train, samples_val) in enumerate(utils.mask_stratified_k_fold()): # Get the model architecture Model = locate('experiments.' + name + '.Model') model = Model(name, i) # Load the best performing checkpoint model.load() # Predict the test data test_predictions = utils.TestPredictions(name + '-split_{}'.format(i), mode=output_dir) test_predictions.add_predictions( model.test(utils.get_test_samples(test_set))) test_predictions.save()
import argparse from pydoc import locate import utils import settings parser = argparse.ArgumentParser( description='Predict validation for a experiment.') parser.add_argument( 'name', help='Use one of the experiment names here excluding the .py ending.') args = parser.parse_args() name = args.name test_predictions = utils.TestPredictions(name, mode='val') for i, (samples_train, samples_val) in enumerate(utils.mask_stratified_k_fold(5)): # Get the model architecture Model = locate('experiments.' + name + '.Model') model = Model(name, i) # Load the best performing checkpoint model.load() # Predict the test data test_predictions.add_predictions( model.test(samples_val, dir_test=settings.train, predict=model.predict_raw))
'nopoolrefinenet_seresnext50_dual_hypercolumn_aux_data_poly_lr_pseudo_labels', 'nopoolrefinenet_seresnext50_dual_hypercolumn_aux_data_poly_lr_pseudo_labels_ensemble', 'nopoolrefinenet_seresnet152_dual_hypercolumn_aux_data_poly_lr_pseudo_labels', 'nopoolrefinenet_seresnet152_dual_hypercolumn_aux_data_poly_lr_pseudo_labels_ensemble', 'nopoolrefinenet_dpn92_dual_hypercolumn_poly_lr_aux_data_pseudo_labels', ] ensemble_name = 'ensemble-top-12-test' input_dir = 'test' test_predictions_experiment = [] for name in experiments: test_predictions_split = [] for i in range(5): test_predictions = utils.TestPredictions('{}-split_{}'.format(name, i), mode=input_dir) test_predictions_split.append(test_predictions.load_raw()) test_predictions_experiment.append(test_predictions_split) test_samples = utils.get_test_samples() predictions_mean = [] for id in tqdm(test_samples, ascii=True): # p = n_models x h x w p = [] for i, test_predictions_split in enumerate(test_predictions_experiment): test_predictions_split = np.stack( [predictions[id] for predictions in test_predictions_split], axis=0) p.append(test_predictions_split)
'nopoolrefinenet_seresnext101_dual_hypercolumn_aux_data_poly_lr_pseudo_labels', 'nopoolrefinenet_senet154_dual_hypercolumn_aux_data_poly_lr_pseudo_labels', 'nopoolrefinenet_dpn107_dual_hypercolumn_poly_lr_aux_data_pseudo_labels', 'nopoolrefinenet_dpn92_dual_hypercolumn_poly_lr_aux_data_pseudo_labels_ensemble', 'nopoolrefinenet_seresnext50_dual_hypercolumn_aux_data_poly_lr_pseudo_labels', 'nopoolrefinenet_seresnext50_dual_hypercolumn_aux_data_poly_lr_pseudo_labels_ensemble', 'nopoolrefinenet_seresnet152_dual_hypercolumn_aux_data_poly_lr_pseudo_labels', 'nopoolrefinenet_seresnet152_dual_hypercolumn_aux_data_poly_lr_pseudo_labels_ensemble', 'nopoolrefinenet_dpn92_dual_hypercolumn_poly_lr_aux_data_pseudo_labels', ] output = 'ensemble-top-12-val' test_predictions_experiment = [] for name in experiments: test_predictions = utils.TestPredictions('{}'.format(name), mode='val') test_predictions_experiment.append(test_predictions.load_raw()) train_samples = utils.get_train_samples() transforms = generator.TransformationsGenerator([]) dataset = datasets.AnalysisDataset(train_samples, settings.train, transforms, utils.TestPredictions('{}'.format(name), mode='val').load()) split_map = [] val = utils.get_train_samples() predictions = [] masks = [] with tqdm(total=len(val), leave=False) as pbar: for id in val: