def preprocessing_fillna(features, config, train_mode, suffix, **kwargs): """ impute missing value by condition """ if train_mode: features_train, features_valid = features fillna = Step( name='fillna{}'.format(suffix), transformer=_fillna( config.preprocessing.impute_missing.fill_value), input_steps=[features_train, features_valid], adapter=Adapter({ 'X': E(features_train.name, 'features'), 'X_valid': E(features_valid.name, 'features'), }), experiment_directory=config.pipeline.experiment_directory, **kwargs) else: fillna = Step( name='fillna{}'.format(suffix), transformer=_fillna( config.preprocessing.impute_missing.fill_value), input_steps=[features], adapter=Adapter({'X': E(features.name, 'features')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) return fillna
def row_aggregation_features(config, train_mode, suffix, **kwargs): bucket_nrs = config.row_aggregations.bucket_nrs row_agg_features = [] for bucket_nr in bucket_nrs: row_agg_feature = Step( name='row_agg_feature_bucket_nr{}{}'.format(bucket_nr, suffix), transformer=fe.RowAggregationFeatures(bucket_nr=bucket_nr), input_data=['input'], adapter=Adapter({'X': E('input', 'X')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) row_agg_features.append(row_agg_feature) if train_mode: row_agg_features_valid = [] for bucket_nr, row_agg_feature in zip(bucket_nrs, row_agg_features): row_agg_feature_valid = Step( name='row_agg_feature_bucket_nr{}_valid{}'.format( bucket_nr, suffix), transformer=row_agg_feature, input_data=['input'], adapter=Adapter({'X': E('input', 'X_valid')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) row_agg_features_valid.append(row_agg_feature_valid) return row_agg_features, row_agg_features_valid else: return row_agg_features
def _numerical_transforms(dispatchers, config, train_mode, suffix, **kwargs): if train_mode: feature_by_type_split, feature_by_type_split_valid = dispatchers else: feature_by_type_split = dispatchers log_num = Step( name='log_num{}'.format(suffix), transformer=make_transformer(lambda x: np.log(x + 1), output_name='numerical_features'), input_steps=[feature_by_type_split], adapter=Adapter( {'x': E(feature_by_type_split.name, 'numerical_features')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) if train_mode: log_num_valid = Step( name='log_num_valid{}'.format(suffix), transformer=log_num, input_steps=[feature_by_type_split_valid], adapter=Adapter({ 'x': E(feature_by_type_split_valid.name, 'numerical_features') }), experiment_directory=config.pipeline.experiment_directory, **kwargs) return log_num, log_num_valid else: return log_num
def data_cleaning_v2(config, train_mode, suffix, **kwargs): cleaned_data = data_cleaning_v1(config, train_mode, suffix, **kwargs) if train_mode: cleaned_data, cleaned_data_valid = cleaned_data impute_missing = Step(name='dummies_missing{}'.format(suffix), transformer=dc.DummiesMissing(**config.dummies_missing), input_steps=[cleaned_data], adapter=Adapter({'X': E(cleaned_data.name, 'numerical_features'), } ), experiment_directory=config.pipeline.experiment_directory, **kwargs) if train_mode: impute_missing_valid = Step(name='dummies_missing_valid{}'.format(suffix), transformer=impute_missing, input_steps=[cleaned_data_valid], adapter=Adapter({'X': E(cleaned_data_valid.name, 'numerical_features'), } ), experiment_directory=config.pipeline.experiment_directory, **kwargs) return impute_missing, impute_missing_valid else: return impute_missing
def visualizer(model, label_encoder, config): label_decoder = Step(name='label_decoder', transformer=GoogleAiLabelDecoder(), input_steps=[ label_encoder, ], experiment_directory=config.env.cache_dirpath) decoder = Step( name='decoder', transformer=DataDecoder(**config.postprocessing.data_decoder), input_data=['input'], input_steps=[ model, ], experiment_directory=config.env.cache_dirpath) visualize = Step(name='visualizer', transformer=Visualizer(), input_steps=[label_decoder, decoder], input_data=['input'], adapter=Adapter({ 'images_data': E('input', 'images_data'), 'results': E(decoder.name, 'results'), 'decoder_dict': E(label_decoder.name, 'inverse_mapping') }), experiment_directory=config.env.cache_dirpath) return visualize
def postprocessing(model, label_encoder, config): label_decoder = Step(name='label_decoder', transformer=GoogleAiLabelDecoder(), input_steps=[ label_encoder, ], experiment_directory=config.env.cache_dirpath) decoder = Step( name='decoder', transformer=DataDecoder(**config.postprocessing.data_decoder), input_steps=[ model, ], experiment_directory=config.env.cache_dirpath) submission_producer = Step( name='submission_producer', transformer=PredictionFormatter( **config.postprocessing.prediction_formatter), input_steps=[label_decoder, decoder], input_data=['input'], adapter=Adapter({ 'image_ids': E('input', 'img_ids'), 'results': E(decoder.name, 'results'), 'decoder_dict': E(label_decoder.name, 'inverse_mapping') }), experiment_directory=config.env.cache_dirpath) return submission_producer
def preprocessing_inference(config, model_name='unet', suffix=''): if config.general.loader_mode == 'resize_and_pad': loader_config = config.loaders.resize_and_pad elif config.general.loader_mode == 'resize': loader_config = config.loaders.resize else: raise NotImplementedError if loader_config.dataset_params.image_source == 'memory': reader_inference = Step(name='reader_inference{}'.format(suffix), transformer=loaders.ImageReader(train_mode=False, **config.reader[model_name]), input_data=['input'], adapter=Adapter({'meta': E('input', 'meta')}), experiment_directory=config.execution.experiment_dir) elif loader_config.dataset_params.image_source == 'disk': reader_inference = Step(name='xy_inference{}'.format(suffix), transformer=loaders.XYSplit(train_mode=False, **config.xy_splitter[model_name]), input_data=['input'], adapter=Adapter({'meta': E('input', 'meta')}), experiment_directory=config.execution.experiment_dir) else: raise NotImplementedError loader = Step(name='loader{}'.format(suffix), transformer=loaders.ImageSegmentationLoader(train_mode=False, **loader_config), input_steps=[reader_inference], adapter=Adapter({'X': E(reader_inference.name, 'X'), 'y': E(reader_inference.name, 'y'), }), experiment_directory=config.execution.experiment_dir, cache_output=True) return loader
def _feature_by_type_splits(config, train_mode, suffix): if train_mode: feature_by_type_split = Step( name='inferred_type_splitter{}'.format(suffix), transformer=fe.InferredTypeSplitter(), input_data=['input'], adapter=Adapter({'X': E('input', 'X')}), experiment_directory=config.pipeline.experiment_directory) feature_by_type_split_valid = Step( name='inferred_type_splitter_valid{}'.format(suffix), transformer=feature_by_type_split, input_data=['input'], adapter=Adapter({'X': E('input', 'X_valid')}), experiment_directory=config.pipeline.experiment_directory) return feature_by_type_split, feature_by_type_split_valid else: feature_by_type_split = Step( name='inferred_type_splitter{}'.format(suffix), transformer=fe.InferredTypeSplitter(), input_data=['input'], adapter=Adapter({'X': E('input', 'X')}), experiment_directory=config.pipeline.experiment_directory) return feature_by_type_split
def _projection(projection_config, data_cleaned, config, train_mode, suffix, **kwargs): (DecompositionTransformer, transformer_config, transformer_name) = projection_config if train_mode: data_cleaned, data_cleaned_valid = data_cleaned projector = Step(name='{}{}'.format(transformer_name, suffix), transformer=DecompositionTransformer(**transformer_config), input_steps=[data_cleaned], adapter=Adapter({'features': E(data_cleaned.name, 'numerical_features')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) projector_pandas = Step(name='{}_pandas{}'.format(transformer_name, suffix), transformer=make_transformer(partial(to_pandas, column_prefix=transformer_name) , output_name='numerical_features'), input_steps=[projector], adapter=Adapter({'x': E(projector.name, 'features')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) if train_mode: projector_valid = Step(name='{}_valid{}'.format(transformer_name, suffix), transformer=projector, input_steps=[data_cleaned_valid], adapter=Adapter({'features': E(data_cleaned_valid.name, 'numerical_features')} ), experiment_directory=config.pipeline.experiment_directory, **kwargs) projector_pandas_valid = Step(name='{}_pandas_valid{}'.format(transformer_name, suffix), transformer=projector_pandas, input_steps=[projector_valid], adapter=Adapter({'x': E(projector_valid.name, 'features')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) return projector_pandas, projector_pandas_valid else: return projector_pandas
def mask_postprocessing(config, suffix=''): if config.execution.loader_mode == 'crop_and_pad': size_adjustment_function = partial(crop_image, target_size=ORIGINAL_SIZE) elif config.execution.loader_mode == 'resize': size_adjustment_function = partial(resize_image, target_size=ORIGINAL_SIZE) else: raise NotImplementedError mask_resize = Step(name='mask_resize{}'.format(suffix), transformer=make_apply_transformer( size_adjustment_function, output_name='resized_images', apply_on=['images']), input_data=['input_masks'], adapter=Adapter({ 'images': E('input_masks', 'mask_prediction'), }), experiment_directory=config.env.experiment_dir) binarizer = Step(name='binarizer{}'.format(suffix), transformer=make_apply_transformer( partial(binarize, threshold=config.thresholder.threshold_masks), output_name='binarized_images', apply_on=['images']), input_steps=[mask_resize], adapter=Adapter({ 'images': E(mask_resize.name, 'resized_images'), }), experiment_directory=config.env.experiment_dir) return binarizer
def test_adapter_creates_defined_keys(data): adapter = Adapter({ 'X': [E('input_1', 'features')], 'Y': [E('input_2', 'extra_features')] }) res = adapter.adapt(data) assert {'X', 'Y'} == set(res.keys())
def test_recipe_with_single_item(data): adapter = Adapter({ 'X': E('input_1', 'labels'), 'Y': E('input_3', 'labels'), }) res = adapter.adapt(data) assert np.array_equal(res['X'], data['input_1']['labels']) assert np.array_equal(res['Y'], data['input_3']['labels'])
def aggregator(name, model, tta_generator, experiment_directory, config): tta_aggregator = Step(name=name, transformer=loaders.TestTimeAugmentationAggregator(**config), input_steps=[model, tta_generator], adapter=Adapter({'images': E(model.name, 'mask_prediction'), 'tta_params': E(tta_generator.name, 'tta_params'), 'img_ids': E(tta_generator.name, 'img_ids'), }), experiment_directory=experiment_directory) return tta_aggregator
def _join_features(numerical_features, numerical_features_valid, categorical_features, categorical_features_valid, config, train_mode, suffix, **kwargs): if train_mode: persist_output = True cache_output = True load_persisted_output = True else: persist_output = False cache_output = True load_persisted_output = False feature_joiner = Step( name='feature_joiner{}'.format(suffix), transformer=fe.FeatureJoiner(**config.feature_joiner), input_steps=numerical_features + categorical_features, adapter=Adapter({ 'numerical_feature_list': [ E(feature.name, 'numerical_features') for feature in numerical_features ], 'categorical_feature_list': [ E(feature.name, 'categorical_features') for feature in categorical_features ], }), experiment_directory=config.pipeline.experiment_directory, persist_output=persist_output, cache_output=cache_output, load_persisted_output=load_persisted_output) if train_mode: feature_joiner_valid = Step( name='feature_joiner_valid{}'.format(suffix), transformer=feature_joiner, input_steps=numerical_features_valid + categorical_features_valid, adapter=Adapter({ 'numerical_feature_list': [ E(feature.name, 'numerical_features') for feature in numerical_features_valid ], 'categorical_feature_list': [ E(feature.name, 'categorical_features') for feature in categorical_features_valid ], }), experiment_directory=config.pipeline.experiment_directory, persist_output=persist_output, cache_output=cache_output, load_persisted_output=load_persisted_output) return feature_joiner, feature_joiner_valid else: return feature_joiner
def unet_tta(config, suffix=''): preprocessing, tta_generator = pipelines.preprocessing_inference_tta( config, model_name='unet') unet = Step(name='unet{}'.format(suffix), transformer=models.PyTorchUNet(**config.model['unet']), input_data=['callback_input'], input_steps=[preprocessing], is_trainable=True, experiment_directory=config.execution.experiment_dir) tta_aggregator = pipelines.aggregator( 'tta_aggregator{}'.format(suffix), unet, tta_generator=tta_generator, experiment_directory=config.execution.experiment_dir, config=config.tta_aggregator) prediction_renamed = Step( name='prediction_renamed{}'.format(suffix), transformer=IdentityOperation(), input_steps=[tta_aggregator], adapter=Adapter({ 'mask_prediction': E(tta_aggregator.name, 'aggregated_prediction') }), experiment_directory=config.execution.experiment_dir) if config.general.loader_mode == 'resize_and_pad': size_adjustment_function = partial( postprocessing.crop_image, target_size=config.general.original_size) elif config.general.loader_mode == 'resize': size_adjustment_function = partial( postprocessing.resize_image, target_size=config.general.original_size) else: raise NotImplementedError mask_resize = Step(name='mask_resize{}'.format(suffix), transformer=utils.make_apply_transformer( size_adjustment_function, output_name='resized_images', apply_on=['images']), input_steps=[prediction_renamed], adapter=Adapter({ 'images': E(prediction_renamed.name, 'mask_prediction'), }), experiment_directory=config.execution.experiment_dir) return mask_resize
def _target_encoders(dispatchers, config, train_mode, **kwargs): if train_mode: feature_by_type_split, feature_by_type_split_valid = dispatchers numpy_label, numpy_label_valid = _to_numpy_label(config, **kwargs) target_encoder = Step(name='target_encoder', transformer=fe.TargetEncoder(), input_data=['input'], input_steps=[feature_by_type_split, numpy_label], adapter=Adapter({ 'X': E(feature_by_type_split.name, 'categorical_features'), 'y': E(numpy_label.name, 'y'), }), cache_dirpath=config.env.cache_dirpath, **kwargs) target_encoder_valid = Step( name='target_encoder_valid', transformer=target_encoder, input_data=['input'], input_steps=[feature_by_type_split_valid, numpy_label_valid], adapter=Adapter({ 'X': E(feature_by_type_split_valid.name, 'categorical_features'), 'y': E(numpy_label_valid.name, 'y'), }), cache_dirpath=config.env.cache_dirpath, **kwargs) return target_encoder, target_encoder_valid else: feature_by_type_split = dispatchers target_encoder = Step(name='target_encoder', transformer=fe.TargetEncoder(), input_data=['input'], input_steps=[feature_by_type_split], adapter=Adapter({ 'X': E(feature_by_type_split.name, 'categorical_features') }), cache_dirpath=config.env.cache_dirpath, **kwargs) return target_encoder
def unet(config, suffix='', train_mode=True): if train_mode: preprocessing = pipelines.preprocessing_train(config, model_name='unet', suffix=suffix) else: preprocessing = pipelines.preprocessing_inference(config, suffix=suffix) unet = utils.FineTuneStep( name='unet{}'.format(suffix), transformer=models.PyTorchUNet(**config.model['unet']), input_data=['callback_input'], input_steps=[preprocessing], adapter=Adapter({ 'datagen': E(preprocessing.name, 'datagen'), 'validation_datagen': E(preprocessing.name, 'validation_datagen'), 'meta_valid': E('callback_input', 'meta_valid'), }), is_trainable=True, fine_tuning=config.model.unet.training_config.fine_tuning, experiment_directory=config.execution.experiment_dir) if config.general.loader_mode == 'resize_and_pad': size_adjustment_function = partial( postprocessing.crop_image, target_size=config.general.original_size) elif config.general.loader_mode == 'resize': size_adjustment_function = partial( postprocessing.resize_image, target_size=config.general.original_size) else: raise NotImplementedError mask_resize = Step(name='mask_resize{}'.format(suffix), transformer=utils.make_apply_transformer( size_adjustment_function, output_name='resized_images', apply_on=['images']), input_steps=[unet], adapter=Adapter({ 'images': E(unet.name, 'mask_prediction'), }), experiment_directory=config.execution.experiment_dir) return mask_resize
def classifier_light_gbm(features, config, train_mode, suffix='', **kwargs): if train_mode: features_train, features_valid = features log_target = Step(name='log_target{}'.format(suffix), transformer=make_transformer(lambda x: np.log1p(x), output_name='y'), input_data=['input'], adapter=Adapter({'x': E('input', 'y')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) log_target_valid = Step(name='log_target_valid{}'.format(suffix), transformer=log_target, input_data=['input'], adapter=Adapter({'x': E('input', 'y_valid')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) if config.random_search.light_gbm.n_runs: transformer = RandomSearchOptimizer(TransformerClass=LightGBM, params=config.light_gbm, train_input_keys=[], valid_input_keys=['X_valid', 'y_valid'], score_func=root_mean_squared_error, maximize=False, n_runs=config.random_search.light_gbm.n_runs, callbacks=[ NeptuneMonitor( **config.random_search.light_gbm.callbacks.neptune_monitor), PersistResults( **config.random_search.light_gbm.callbacks.persist_results)] ) else: transformer = LightGBM(**config.light_gbm) light_gbm = Step(name='light_gbm{}'.format(suffix), transformer=transformer, input_data=['input'], input_steps=[features_train, features_valid, log_target, log_target_valid], adapter=Adapter({'X': E(features_train.name, 'features'), 'y': E(log_target.name, 'y'), 'feature_names': E(features_train.name, 'feature_names'), 'categorical_features': E(features_train.name, 'categorical_features'), 'X_valid': E(features_valid.name, 'features'), 'y_valid': E(log_target_valid.name, 'y'), }), experiment_directory=config.pipeline.experiment_directory, **kwargs) else: light_gbm = Step(name='light_gbm{}'.format(suffix), transformer=LightGBM(**config.light_gbm), input_steps=[features], adapter=Adapter({'X': E(features.name, 'features')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) output = exp_target(light_gbm, config, suffix, **kwargs) return output
def test_nested_recipes(data): adapter = Adapter({ 'X': [{ 'a': [E('input_1', 'features')] }], 'Y': { 'a': [{ 'b': E('input_2', 'extra_features') }] } }) res = adapter.adapt(data) assert res['X'] == [{'a': [data['input_1']['features']]}] assert res['Y'] == {'a': [{'b': data['input_2']['extra_features']}]}
def classifier_sklearn(sklearn_features, ClassifierClass, full_config, clf_name, train_mode, suffix, normalize, **kwargs): config, model_params, rs_config = full_config if train_mode: if getattr(config.random_search, clf_name).n_runs: transformer = RandomSearchOptimizer( partial(get_sklearn_classifier, ClassifierClass=ClassifierClass, normalize=normalize), model_params, train_input_keys=[], valid_input_keys=['X_valid', 'y_valid'], score_func=score_function, maximize=True, n_runs=rs_config.n_runs, callbacks=[ NeptuneMonitor(**rs_config.callbacks.neptune_monitor), PersistResults(**rs_config.callbacks.persist_results) ]) else: transformer = get_sklearn_classifier(ClassifierClass, normalize, **model_params) sklearn_clf = Step( name='{}{}'.format(clf_name, suffix), transformer=transformer, is_trainable=True, input_data=['tap4fun'], input_steps=[sklearn_features], adapter=Adapter({ 'X': E(sklearn_features.name, 'X'), 'y': E('tap4fun', 'y'), 'X_valid': E(sklearn_features.name, 'X_valid'), 'y_valid': E('tap4fun', 'y_valid'), }), experiment_directory=config.pipeline.experiment_directory, **kwargs) else: sklearn_clf = Step( name='{}{}'.format(clf_name, suffix), transformer=get_sklearn_classifier(ClassifierClass, normalize, **model_params), is_trainable=True, input_steps=[sklearn_features], adapter=Adapter({'X': E(sklearn_features.name, 'X')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) return sklearn_clf
def _previous_application(config, train_mode, suffix, **kwargs): previous_application_cleaned = _previous_application_cleaning( config, suffix, **kwargs) previous_applications_hand_crafted = Step( name='previous_applications_hand_crafted', transformer=fe.PreviousApplicationFeatures( **config.previous_applications), input_steps=[previous_application_cleaned], adapter=Adapter({ 'prev_applications': E(previous_application_cleaned.name, 'previous_application') }), experiment_directory=config.pipeline.experiment_directory, **kwargs) previous_applications_hand_crafted_merge = Step( name='previous_applications_hand_crafted_merge{}'.format(suffix), transformer=fe.GroupbyMerge(**config.previous_applications), input_data=['application'], input_steps=[previous_applications_hand_crafted], adapter=Adapter({ 'table': E('application', 'X'), 'features': E(previous_applications_hand_crafted.name, 'features_table') }), experiment_directory=config.pipeline.experiment_directory, **kwargs) if train_mode: previous_applications_hand_crafted_merge_valid = Step( name='previous_applications_hand_crafted_merge_valid{}'.format( suffix), transformer=previous_applications_hand_crafted_merge, input_data=['application'], input_steps=[previous_applications_hand_crafted], adapter=Adapter({ 'table': E('application', 'X_valid'), 'features': E(previous_applications_hand_crafted.name, 'features_table') }), experiment_directory=config.pipeline.experiment_directory, **kwargs) return previous_applications_hand_crafted_merge, previous_applications_hand_crafted_merge_valid else: return previous_applications_hand_crafted_merge
def stacking_preprocessing_inference(config, model_name='network', suffix=''): reader_inference = Step(name='xy_inference{}'.format(suffix), transformer=loaders.XYSplit(train_mode=False, **config.xy_splitter[model_name]), input_data=['input'], adapter=Adapter({'meta': E('input', 'meta')}), experiment_directory=config.execution.experiment_dir) loader = Step(name='loader{}'.format(suffix), transformer=loaders.ImageSegmentationLoaderStacking(train_mode=False, **config.loaders.stacking), input_steps=[reader_inference], adapter=Adapter({'X': E(reader_inference.name, 'X'), 'y': E(reader_inference.name, 'y'), }), experiment_directory=config.execution.experiment_dir, cache_output=True) return loader
def sklearn_main(config, ClassifierClass, clf_name, train_mode, normalize=False): model_params = getattr(config, clf_name) random_search_config = getattr(config.random_search, clf_name) full_config = (config, model_params, random_search_config) if train_mode: features, features_valid = feature_extraction( config, train_mode, persist_output=True, cache_output=True, load_persisted_output=True) sklearn_preproc = preprocessing_fillna((features, features_valid), config, train_mode) else: features = feature_extraction(config, train_mode, cache_output=True) sklearn_preproc = preprocessing_fillna(features, config, train_mode) sklearn_clf = classifier_sklearn(sklearn_preproc, ClassifierClass, full_config, clf_name, train_mode, normalize) clipper = Step(name='clipper', transformer=Clipper(**config.clipper), input_steps=[sklearn_clf], adapter=Adapter( {'prediction': E(sklearn_clf.name, 'predicted')}), experiment_directory=config.pipeline.experiment_directory) return clipper
def test_recipe_with_tuple(data): adapter = Adapter({ 'X': (), 'Y': (E('input_1', 'features'), ), 'Z': (E('input_1', 'features'), E('input_2', 'extra_features')) }) res = adapter.adapt(data) for i, key in enumerate(('X', 'Y', 'Z')): assert isinstance(res[key], tuple) assert len(res[key]) == i assert res['X'] == () assert np.array_equal(res['Y'][0], data['input_1']['features']) assert np.array_equal(res['Z'][0], data['input_1']['features']) assert np.array_equal(res['Z'][1], data['input_2']['extra_features'])
def exp_target(model_output, config, suffix, **kwargs): exp_target = Step(name='exp_target{}'.format(suffix), transformer=make_transformer(lambda x: np.exp(x) - 1, output_name='prediction'), input_steps=[model_output], adapter=Adapter({'x': E(model_output.name, 'prediction')}), experiment_directory=config.pipeline.experiment_directory, **kwargs) return exp_target
def retinanet(config, train_mode, visualize=False): persist_output = False load_persisted_output = False loader = preprocessing_generator(config, is_train=train_mode) retinanet = Step(name='retinanet', transformer=Retina(**config.retinanet, train_mode=train_mode), input_steps=[loader], experiment_directory=config.env.cache_dirpath, persist_output=persist_output, is_trainable=True, load_persisted_output=load_persisted_output) if train_mode: return retinanet if visualize: return visualizer(retinanet, loader.get_step('label_encoder'), config) postprocessor = postprocessing(retinanet, loader.get_step('label_encoder'), config) output = Step(name='output', transformer=IdentityOperation(), input_steps=[postprocessor], adapter=Adapter( {'y_pred': E(postprocessor.name, 'submission')}), experiment_directory=config.env.cache_dirpath, persist_output=persist_output, load_persisted_output=load_persisted_output) return output
def _to_numpy_label(config, **kwargs): to_numpy_label = Step(name='to_numpy_label', transformer=ToNumpyLabel(), input_data=['input'], adapter=Adapter({'y': [E('input', 'y')]}), experiment_directory=config.pipeline.experiment_directory, **kwargs) to_numpy_label_valid = Step(name='to_numpy_label_valid', transformer=to_numpy_label, input_data=['input'], adapter=Adapter({'y': [E('input', 'y_valid')]}), experiment_directory=config.pipeline.experiment_directory, **kwargs) return to_numpy_label, to_numpy_label_valid
def ensemble(config, train_mode, suffix="", **kwargs): lgb_step = get_pipeline('lightGBM', train_mode) logreg_step = get_pipeline('log_reg', train_mode) rf_step = get_pipeline('log_reg', train_mode) ens_step = Step( name='Ensembler', transformer=AvgTransformer(), input_steps=[lgb_step, logreg_step, rf_step], adapter=Adapter({ 'y_proba_1': E(lgb_step.name, 'prediction'), 'y_proba_2': E(rf_step.name, 'prediction'), 'y_proba_3': E(logreg_step.name, 'prediction'), }), experiment_directory=config.pipeline.experiment_directory, ) return ens_step
def unet_tta(config, suffix=''): preprocessing, tta_generator = preprocessing_inference_tta( config, model_name='unet') unet = Step(name='unet{}'.format(suffix), transformer=PyTorchUNet(**config.model['unet']), input_data=['callback_input'], input_steps=[preprocessing], is_trainable=True, experiment_directory=config.env.experiment_dir) tta_aggregator = aggregator('tta_aggregator{}'.format(suffix), unet, tta_generator=tta_generator, experiment_directory=config.env.experiment_dir, config=config.tta_aggregator) prediction_renamed = Step(name='prediction_renamed{}'.format(suffix), transformer=IdentityOperation(), input_steps=[tta_aggregator], adapter=Adapter({ 'mask_prediction': E(tta_aggregator.name, 'aggregated_prediction') }), experiment_directory=config.env.experiment_dir) return prediction_renamed
def network_tta(config, suffix=''): if SECOND_LEVEL: raise NotImplementedError('Second level does not work with TTA') preprocessing, tta_generator = pipelines.preprocessing_inference_tta(config, model_name='network') if USE_DEPTH: Network = models.SegmentationModelWithDepth else: Network = models.SegmentationModel network = Step(name='network{}'.format(suffix), transformer=Network(**config.model['network']), input_data=['callback_input'], input_steps=[preprocessing], is_trainable=True, experiment_directory=config.execution.experiment_dir) tta_aggregator = pipelines.aggregator('tta_aggregator{}'.format(suffix), network, tta_generator=tta_generator, experiment_directory=config.execution.experiment_dir, config=config.tta_aggregator) prediction_renamed = Step(name='prediction_renamed{}'.format(suffix), transformer=IdentityOperation(), input_steps=[tta_aggregator], adapter=Adapter({'mask_prediction': E(tta_aggregator.name, 'aggregated_prediction') }), experiment_directory=config.execution.experiment_dir) if config.general.loader_mode == 'resize_and_pad': size_adjustment_function = partial(postprocessing.crop_image, target_size=config.general.original_size) elif config.general.loader_mode == 'resize' or config.general.loader_mode == 'stacking': size_adjustment_function = partial(postprocessing.resize_image, target_size=config.general.original_size) else: raise NotImplementedError mask_resize = Step(name='mask_resize{}'.format(suffix), transformer=utils.make_apply_transformer(size_adjustment_function, output_name='resized_images', apply_on=['images']), input_steps=[prediction_renamed], adapter=Adapter({'images': E(prediction_renamed.name, 'mask_prediction'), }), experiment_directory=config.execution.experiment_dir) return mask_resize