Esempio n. 1
0
def preprocessing_fillna(features, config, train_mode, suffix, **kwargs):
    """
        impute missing value by condition
    """
    if train_mode:
        features_train, features_valid = features
        fillna = Step(
            name='fillna{}'.format(suffix),
            transformer=_fillna(
                config.preprocessing.impute_missing.fill_value),
            input_steps=[features_train, features_valid],
            adapter=Adapter({
                'X': E(features_train.name, 'features'),
                'X_valid': E(features_valid.name, 'features'),
            }),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)
    else:
        fillna = Step(
            name='fillna{}'.format(suffix),
            transformer=_fillna(
                config.preprocessing.impute_missing.fill_value),
            input_steps=[features],
            adapter=Adapter({'X': E(features.name, 'features')}),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)
    return fillna
Esempio n. 2
0
def row_aggregation_features(config, train_mode, suffix, **kwargs):
    bucket_nrs = config.row_aggregations.bucket_nrs
    row_agg_features = []
    for bucket_nr in bucket_nrs:
        row_agg_feature = Step(
            name='row_agg_feature_bucket_nr{}{}'.format(bucket_nr, suffix),
            transformer=fe.RowAggregationFeatures(bucket_nr=bucket_nr),
            input_data=['input'],
            adapter=Adapter({'X': E('input', 'X')}),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)
        row_agg_features.append(row_agg_feature)

    if train_mode:
        row_agg_features_valid = []
        for bucket_nr, row_agg_feature in zip(bucket_nrs, row_agg_features):
            row_agg_feature_valid = Step(
                name='row_agg_feature_bucket_nr{}_valid{}'.format(
                    bucket_nr, suffix),
                transformer=row_agg_feature,
                input_data=['input'],
                adapter=Adapter({'X': E('input', 'X_valid')}),
                experiment_directory=config.pipeline.experiment_directory,
                **kwargs)
            row_agg_features_valid.append(row_agg_feature_valid)

        return row_agg_features, row_agg_features_valid
    else:
        return row_agg_features
def _numerical_transforms(dispatchers, config, train_mode, suffix, **kwargs):
    if train_mode:
        feature_by_type_split, feature_by_type_split_valid = dispatchers
    else:
        feature_by_type_split = dispatchers

    log_num = Step(
        name='log_num{}'.format(suffix),
        transformer=make_transformer(lambda x: np.log(x + 1),
                                     output_name='numerical_features'),
        input_steps=[feature_by_type_split],
        adapter=Adapter(
            {'x': E(feature_by_type_split.name, 'numerical_features')}),
        experiment_directory=config.pipeline.experiment_directory,
        **kwargs)

    if train_mode:
        log_num_valid = Step(
            name='log_num_valid{}'.format(suffix),
            transformer=log_num,
            input_steps=[feature_by_type_split_valid],
            adapter=Adapter({
                'x':
                E(feature_by_type_split_valid.name, 'numerical_features')
            }),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)
        return log_num, log_num_valid
    else:
        return log_num
Esempio n. 4
0
def data_cleaning_v2(config, train_mode, suffix, **kwargs):
    cleaned_data = data_cleaning_v1(config, train_mode, suffix, **kwargs)

    if train_mode:
        cleaned_data, cleaned_data_valid = cleaned_data

    impute_missing = Step(name='dummies_missing{}'.format(suffix),
                          transformer=dc.DummiesMissing(**config.dummies_missing),
                          input_steps=[cleaned_data],
                          adapter=Adapter({'X': E(cleaned_data.name, 'numerical_features'),
                                           }
                                          ),
                          experiment_directory=config.pipeline.experiment_directory, **kwargs)

    if train_mode:
        impute_missing_valid = Step(name='dummies_missing_valid{}'.format(suffix),
                                    transformer=impute_missing,
                                    input_steps=[cleaned_data_valid],
                                    adapter=Adapter({'X': E(cleaned_data_valid.name, 'numerical_features'),
                                                     }
                                                    ),
                                    experiment_directory=config.pipeline.experiment_directory, **kwargs)
        return impute_missing, impute_missing_valid
    else:
        return impute_missing
Esempio n. 5
0
def visualizer(model, label_encoder, config):
    label_decoder = Step(name='label_decoder',
                         transformer=GoogleAiLabelDecoder(),
                         input_steps=[
                             label_encoder,
                         ],
                         experiment_directory=config.env.cache_dirpath)

    decoder = Step(
        name='decoder',
        transformer=DataDecoder(**config.postprocessing.data_decoder),
        input_data=['input'],
        input_steps=[
            model,
        ],
        experiment_directory=config.env.cache_dirpath)

    visualize = Step(name='visualizer',
                     transformer=Visualizer(),
                     input_steps=[label_decoder, decoder],
                     input_data=['input'],
                     adapter=Adapter({
                         'images_data':
                         E('input', 'images_data'),
                         'results':
                         E(decoder.name, 'results'),
                         'decoder_dict':
                         E(label_decoder.name, 'inverse_mapping')
                     }),
                     experiment_directory=config.env.cache_dirpath)

    return visualize
Esempio n. 6
0
def postprocessing(model, label_encoder, config):
    label_decoder = Step(name='label_decoder',
                         transformer=GoogleAiLabelDecoder(),
                         input_steps=[
                             label_encoder,
                         ],
                         experiment_directory=config.env.cache_dirpath)

    decoder = Step(
        name='decoder',
        transformer=DataDecoder(**config.postprocessing.data_decoder),
        input_steps=[
            model,
        ],
        experiment_directory=config.env.cache_dirpath)

    submission_producer = Step(
        name='submission_producer',
        transformer=PredictionFormatter(
            **config.postprocessing.prediction_formatter),
        input_steps=[label_decoder, decoder],
        input_data=['input'],
        adapter=Adapter({
            'image_ids':
            E('input', 'img_ids'),
            'results':
            E(decoder.name, 'results'),
            'decoder_dict':
            E(label_decoder.name, 'inverse_mapping')
        }),
        experiment_directory=config.env.cache_dirpath)
    return submission_producer
def preprocessing_inference(config, model_name='unet', suffix=''):
    if config.general.loader_mode == 'resize_and_pad':
        loader_config = config.loaders.resize_and_pad
    elif config.general.loader_mode == 'resize':
        loader_config = config.loaders.resize
    else:
        raise NotImplementedError

    if loader_config.dataset_params.image_source == 'memory':
        reader_inference = Step(name='reader_inference{}'.format(suffix),
                                transformer=loaders.ImageReader(train_mode=False, **config.reader[model_name]),
                                input_data=['input'],
                                adapter=Adapter({'meta': E('input', 'meta')}),

                                experiment_directory=config.execution.experiment_dir)

    elif loader_config.dataset_params.image_source == 'disk':
        reader_inference = Step(name='xy_inference{}'.format(suffix),
                                transformer=loaders.XYSplit(train_mode=False, **config.xy_splitter[model_name]),
                                input_data=['input'],
                                adapter=Adapter({'meta': E('input', 'meta')}),
                                experiment_directory=config.execution.experiment_dir)
    else:
        raise NotImplementedError

    loader = Step(name='loader{}'.format(suffix),
                  transformer=loaders.ImageSegmentationLoader(train_mode=False, **loader_config),
                  input_steps=[reader_inference],
                  adapter=Adapter({'X': E(reader_inference.name, 'X'),
                                   'y': E(reader_inference.name, 'y'),
                                   }),
                  experiment_directory=config.execution.experiment_dir,
                  cache_output=True)
    return loader
def _feature_by_type_splits(config, train_mode, suffix):
    if train_mode:
        feature_by_type_split = Step(
            name='inferred_type_splitter{}'.format(suffix),
            transformer=fe.InferredTypeSplitter(),
            input_data=['input'],
            adapter=Adapter({'X': E('input', 'X')}),
            experiment_directory=config.pipeline.experiment_directory)

        feature_by_type_split_valid = Step(
            name='inferred_type_splitter_valid{}'.format(suffix),
            transformer=feature_by_type_split,
            input_data=['input'],
            adapter=Adapter({'X': E('input', 'X_valid')}),
            experiment_directory=config.pipeline.experiment_directory)

        return feature_by_type_split, feature_by_type_split_valid

    else:
        feature_by_type_split = Step(
            name='inferred_type_splitter{}'.format(suffix),
            transformer=fe.InferredTypeSplitter(),
            input_data=['input'],
            adapter=Adapter({'X': E('input', 'X')}),
            experiment_directory=config.pipeline.experiment_directory)

    return feature_by_type_split
Esempio n. 9
0
def _projection(projection_config, data_cleaned, config, train_mode, suffix, **kwargs):
    (DecompositionTransformer, transformer_config, transformer_name) = projection_config

    if train_mode:
        data_cleaned, data_cleaned_valid = data_cleaned

    projector = Step(name='{}{}'.format(transformer_name, suffix),
                     transformer=DecompositionTransformer(**transformer_config),
                     input_steps=[data_cleaned],
                     adapter=Adapter({'features': E(data_cleaned.name, 'numerical_features')}),
                     experiment_directory=config.pipeline.experiment_directory, **kwargs)

    projector_pandas = Step(name='{}_pandas{}'.format(transformer_name, suffix),
                            transformer=make_transformer(partial(to_pandas, column_prefix=transformer_name)
                                                         , output_name='numerical_features'),
                            input_steps=[projector],
                            adapter=Adapter({'x': E(projector.name, 'features')}),
                            experiment_directory=config.pipeline.experiment_directory, **kwargs)

    if train_mode:
        projector_valid = Step(name='{}_valid{}'.format(transformer_name, suffix),
                               transformer=projector,
                               input_steps=[data_cleaned_valid],
                               adapter=Adapter({'features': E(data_cleaned_valid.name, 'numerical_features')}
                                               ),
                               experiment_directory=config.pipeline.experiment_directory, **kwargs)
        projector_pandas_valid = Step(name='{}_pandas_valid{}'.format(transformer_name, suffix),
                                      transformer=projector_pandas,
                                      input_steps=[projector_valid],
                                      adapter=Adapter({'x': E(projector_valid.name, 'features')}),
                                      experiment_directory=config.pipeline.experiment_directory, **kwargs)
        return projector_pandas, projector_pandas_valid
    else:
        return projector_pandas
def mask_postprocessing(config, suffix=''):
    if config.execution.loader_mode == 'crop_and_pad':
        size_adjustment_function = partial(crop_image,
                                           target_size=ORIGINAL_SIZE)
    elif config.execution.loader_mode == 'resize':
        size_adjustment_function = partial(resize_image,
                                           target_size=ORIGINAL_SIZE)
    else:
        raise NotImplementedError

    mask_resize = Step(name='mask_resize{}'.format(suffix),
                       transformer=make_apply_transformer(
                           size_adjustment_function,
                           output_name='resized_images',
                           apply_on=['images']),
                       input_data=['input_masks'],
                       adapter=Adapter({
                           'images':
                           E('input_masks', 'mask_prediction'),
                       }),
                       experiment_directory=config.env.experiment_dir)

    binarizer = Step(name='binarizer{}'.format(suffix),
                     transformer=make_apply_transformer(
                         partial(binarize,
                                 threshold=config.thresholder.threshold_masks),
                         output_name='binarized_images',
                         apply_on=['images']),
                     input_steps=[mask_resize],
                     adapter=Adapter({
                         'images':
                         E(mask_resize.name, 'resized_images'),
                     }),
                     experiment_directory=config.env.experiment_dir)
    return binarizer
Esempio n. 11
0
def test_adapter_creates_defined_keys(data):
    adapter = Adapter({
        'X': [E('input_1', 'features')],
        'Y': [E('input_2', 'extra_features')]
    })
    res = adapter.adapt(data)

    assert {'X', 'Y'} == set(res.keys())
Esempio n. 12
0
def test_recipe_with_single_item(data):
    adapter = Adapter({
        'X': E('input_1', 'labels'),
        'Y': E('input_3', 'labels'),
    })
    res = adapter.adapt(data)

    assert np.array_equal(res['X'], data['input_1']['labels'])
    assert np.array_equal(res['Y'], data['input_3']['labels'])
Esempio n. 13
0
def aggregator(name, model, tta_generator, experiment_directory, config):
    tta_aggregator = Step(name=name,
                          transformer=loaders.TestTimeAugmentationAggregator(**config),
                          input_steps=[model, tta_generator],
                          adapter=Adapter({'images': E(model.name, 'mask_prediction'),
                                           'tta_params': E(tta_generator.name, 'tta_params'),
                                           'img_ids': E(tta_generator.name, 'img_ids'),
                                           }),
                          experiment_directory=experiment_directory)
    return tta_aggregator
def _join_features(numerical_features, numerical_features_valid,
                   categorical_features, categorical_features_valid, config,
                   train_mode, suffix, **kwargs):
    if train_mode:
        persist_output = True
        cache_output = True
        load_persisted_output = True
    else:
        persist_output = False
        cache_output = True
        load_persisted_output = False

    feature_joiner = Step(
        name='feature_joiner{}'.format(suffix),
        transformer=fe.FeatureJoiner(**config.feature_joiner),
        input_steps=numerical_features + categorical_features,
        adapter=Adapter({
            'numerical_feature_list': [
                E(feature.name, 'numerical_features')
                for feature in numerical_features
            ],
            'categorical_feature_list': [
                E(feature.name, 'categorical_features')
                for feature in categorical_features
            ],
        }),
        experiment_directory=config.pipeline.experiment_directory,
        persist_output=persist_output,
        cache_output=cache_output,
        load_persisted_output=load_persisted_output)
    if train_mode:
        feature_joiner_valid = Step(
            name='feature_joiner_valid{}'.format(suffix),
            transformer=feature_joiner,
            input_steps=numerical_features_valid + categorical_features_valid,
            adapter=Adapter({
                'numerical_feature_list': [
                    E(feature.name, 'numerical_features')
                    for feature in numerical_features_valid
                ],
                'categorical_feature_list': [
                    E(feature.name, 'categorical_features')
                    for feature in categorical_features_valid
                ],
            }),
            experiment_directory=config.pipeline.experiment_directory,
            persist_output=persist_output,
            cache_output=cache_output,
            load_persisted_output=load_persisted_output)

        return feature_joiner, feature_joiner_valid

    else:
        return feature_joiner
def unet_tta(config, suffix=''):
    preprocessing, tta_generator = pipelines.preprocessing_inference_tta(
        config, model_name='unet')

    unet = Step(name='unet{}'.format(suffix),
                transformer=models.PyTorchUNet(**config.model['unet']),
                input_data=['callback_input'],
                input_steps=[preprocessing],
                is_trainable=True,
                experiment_directory=config.execution.experiment_dir)

    tta_aggregator = pipelines.aggregator(
        'tta_aggregator{}'.format(suffix),
        unet,
        tta_generator=tta_generator,
        experiment_directory=config.execution.experiment_dir,
        config=config.tta_aggregator)

    prediction_renamed = Step(
        name='prediction_renamed{}'.format(suffix),
        transformer=IdentityOperation(),
        input_steps=[tta_aggregator],
        adapter=Adapter({
            'mask_prediction':
            E(tta_aggregator.name, 'aggregated_prediction')
        }),
        experiment_directory=config.execution.experiment_dir)

    if config.general.loader_mode == 'resize_and_pad':
        size_adjustment_function = partial(
            postprocessing.crop_image,
            target_size=config.general.original_size)
    elif config.general.loader_mode == 'resize':
        size_adjustment_function = partial(
            postprocessing.resize_image,
            target_size=config.general.original_size)
    else:
        raise NotImplementedError

    mask_resize = Step(name='mask_resize{}'.format(suffix),
                       transformer=utils.make_apply_transformer(
                           size_adjustment_function,
                           output_name='resized_images',
                           apply_on=['images']),
                       input_steps=[prediction_renamed],
                       adapter=Adapter({
                           'images':
                           E(prediction_renamed.name, 'mask_prediction'),
                       }),
                       experiment_directory=config.execution.experiment_dir)

    return mask_resize
Esempio n. 16
0
def _target_encoders(dispatchers, config, train_mode, **kwargs):
    if train_mode:
        feature_by_type_split, feature_by_type_split_valid = dispatchers
        numpy_label, numpy_label_valid = _to_numpy_label(config, **kwargs)
        target_encoder = Step(name='target_encoder',
                              transformer=fe.TargetEncoder(),
                              input_data=['input'],
                              input_steps=[feature_by_type_split, numpy_label],
                              adapter=Adapter({
                                  'X':
                                  E(feature_by_type_split.name,
                                    'categorical_features'),
                                  'y':
                                  E(numpy_label.name, 'y'),
                              }),
                              cache_dirpath=config.env.cache_dirpath,
                              **kwargs)

        target_encoder_valid = Step(
            name='target_encoder_valid',
            transformer=target_encoder,
            input_data=['input'],
            input_steps=[feature_by_type_split_valid, numpy_label_valid],
            adapter=Adapter({
                'X':
                E(feature_by_type_split_valid.name, 'categorical_features'),
                'y':
                E(numpy_label_valid.name, 'y'),
            }),
            cache_dirpath=config.env.cache_dirpath,
            **kwargs)

        return target_encoder, target_encoder_valid

    else:
        feature_by_type_split = dispatchers

        target_encoder = Step(name='target_encoder',
                              transformer=fe.TargetEncoder(),
                              input_data=['input'],
                              input_steps=[feature_by_type_split],
                              adapter=Adapter({
                                  'X':
                                  E(feature_by_type_split.name,
                                    'categorical_features')
                              }),
                              cache_dirpath=config.env.cache_dirpath,
                              **kwargs)

        return target_encoder
def unet(config, suffix='', train_mode=True):
    if train_mode:
        preprocessing = pipelines.preprocessing_train(config,
                                                      model_name='unet',
                                                      suffix=suffix)
    else:
        preprocessing = pipelines.preprocessing_inference(config,
                                                          suffix=suffix)

    unet = utils.FineTuneStep(
        name='unet{}'.format(suffix),
        transformer=models.PyTorchUNet(**config.model['unet']),
        input_data=['callback_input'],
        input_steps=[preprocessing],
        adapter=Adapter({
            'datagen':
            E(preprocessing.name, 'datagen'),
            'validation_datagen':
            E(preprocessing.name, 'validation_datagen'),
            'meta_valid':
            E('callback_input', 'meta_valid'),
        }),
        is_trainable=True,
        fine_tuning=config.model.unet.training_config.fine_tuning,
        experiment_directory=config.execution.experiment_dir)

    if config.general.loader_mode == 'resize_and_pad':
        size_adjustment_function = partial(
            postprocessing.crop_image,
            target_size=config.general.original_size)
    elif config.general.loader_mode == 'resize':
        size_adjustment_function = partial(
            postprocessing.resize_image,
            target_size=config.general.original_size)
    else:
        raise NotImplementedError

    mask_resize = Step(name='mask_resize{}'.format(suffix),
                       transformer=utils.make_apply_transformer(
                           size_adjustment_function,
                           output_name='resized_images',
                           apply_on=['images']),
                       input_steps=[unet],
                       adapter=Adapter({
                           'images':
                           E(unet.name, 'mask_prediction'),
                       }),
                       experiment_directory=config.execution.experiment_dir)

    return mask_resize
Esempio n. 18
0
def classifier_light_gbm(features, config, train_mode, suffix='', **kwargs):
    if train_mode:
        features_train, features_valid = features
        log_target = Step(name='log_target{}'.format(suffix),
                          transformer=make_transformer(lambda x: np.log1p(x), output_name='y'),
                          input_data=['input'],
                          adapter=Adapter({'x': E('input', 'y')}),
                          experiment_directory=config.pipeline.experiment_directory, **kwargs)

        log_target_valid = Step(name='log_target_valid{}'.format(suffix),
                                transformer=log_target,
                                input_data=['input'],
                                adapter=Adapter({'x': E('input', 'y_valid')}),
                                experiment_directory=config.pipeline.experiment_directory, **kwargs)

        if config.random_search.light_gbm.n_runs:
            transformer = RandomSearchOptimizer(TransformerClass=LightGBM,
                                                params=config.light_gbm,
                                                train_input_keys=[],
                                                valid_input_keys=['X_valid', 'y_valid'],
                                                score_func=root_mean_squared_error,
                                                maximize=False,
                                                n_runs=config.random_search.light_gbm.n_runs,
                                                callbacks=[
                                                    NeptuneMonitor(
                                                        **config.random_search.light_gbm.callbacks.neptune_monitor),
                                                    PersistResults(
                                                        **config.random_search.light_gbm.callbacks.persist_results)]
                                                )
        else:
            transformer = LightGBM(**config.light_gbm)

        light_gbm = Step(name='light_gbm{}'.format(suffix),
                         transformer=transformer,
                         input_data=['input'],
                         input_steps=[features_train, features_valid, log_target, log_target_valid],
                         adapter=Adapter({'X': E(features_train.name, 'features'),
                                          'y': E(log_target.name, 'y'),
                                          'feature_names': E(features_train.name, 'feature_names'),
                                          'categorical_features': E(features_train.name, 'categorical_features'),
                                          'X_valid': E(features_valid.name, 'features'),
                                          'y_valid': E(log_target_valid.name, 'y'),
                                          }),
                         experiment_directory=config.pipeline.experiment_directory, **kwargs)
    else:
        light_gbm = Step(name='light_gbm{}'.format(suffix),
                         transformer=LightGBM(**config.light_gbm),
                         input_steps=[features],
                         adapter=Adapter({'X': E(features.name, 'features')}),
                         experiment_directory=config.pipeline.experiment_directory, **kwargs)

    output = exp_target(light_gbm, config, suffix, **kwargs)

    return output
Esempio n. 19
0
def test_nested_recipes(data):
    adapter = Adapter({
        'X': [{
            'a': [E('input_1', 'features')]
        }],
        'Y': {
            'a': [{
                'b': E('input_2', 'extra_features')
            }]
        }
    })
    res = adapter.adapt(data)

    assert res['X'] == [{'a': [data['input_1']['features']]}]
    assert res['Y'] == {'a': [{'b': data['input_2']['extra_features']}]}
Esempio n. 20
0
def classifier_sklearn(sklearn_features, ClassifierClass, full_config,
                       clf_name, train_mode, suffix, normalize, **kwargs):
    config, model_params, rs_config = full_config
    if train_mode:
        if getattr(config.random_search, clf_name).n_runs:
            transformer = RandomSearchOptimizer(
                partial(get_sklearn_classifier,
                        ClassifierClass=ClassifierClass,
                        normalize=normalize),
                model_params,
                train_input_keys=[],
                valid_input_keys=['X_valid', 'y_valid'],
                score_func=score_function,
                maximize=True,
                n_runs=rs_config.n_runs,
                callbacks=[
                    NeptuneMonitor(**rs_config.callbacks.neptune_monitor),
                    PersistResults(**rs_config.callbacks.persist_results)
                ])
        else:
            transformer = get_sklearn_classifier(ClassifierClass, normalize,
                                                 **model_params)

        sklearn_clf = Step(
            name='{}{}'.format(clf_name, suffix),
            transformer=transformer,
            is_trainable=True,
            input_data=['tap4fun'],
            input_steps=[sklearn_features],
            adapter=Adapter({
                'X': E(sklearn_features.name, 'X'),
                'y': E('tap4fun', 'y'),
                'X_valid': E(sklearn_features.name, 'X_valid'),
                'y_valid': E('tap4fun', 'y_valid'),
            }),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)
    else:
        sklearn_clf = Step(
            name='{}{}'.format(clf_name, suffix),
            transformer=get_sklearn_classifier(ClassifierClass, normalize,
                                               **model_params),
            is_trainable=True,
            input_steps=[sklearn_features],
            adapter=Adapter({'X': E(sklearn_features.name, 'X')}),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)
    return sklearn_clf
def _previous_application(config, train_mode, suffix, **kwargs):
    previous_application_cleaned = _previous_application_cleaning(
        config, suffix, **kwargs)

    previous_applications_hand_crafted = Step(
        name='previous_applications_hand_crafted',
        transformer=fe.PreviousApplicationFeatures(
            **config.previous_applications),
        input_steps=[previous_application_cleaned],
        adapter=Adapter({
            'prev_applications':
            E(previous_application_cleaned.name, 'previous_application')
        }),
        experiment_directory=config.pipeline.experiment_directory,
        **kwargs)

    previous_applications_hand_crafted_merge = Step(
        name='previous_applications_hand_crafted_merge{}'.format(suffix),
        transformer=fe.GroupbyMerge(**config.previous_applications),
        input_data=['application'],
        input_steps=[previous_applications_hand_crafted],
        adapter=Adapter({
            'table':
            E('application', 'X'),
            'features':
            E(previous_applications_hand_crafted.name, 'features_table')
        }),
        experiment_directory=config.pipeline.experiment_directory,
        **kwargs)

    if train_mode:
        previous_applications_hand_crafted_merge_valid = Step(
            name='previous_applications_hand_crafted_merge_valid{}'.format(
                suffix),
            transformer=previous_applications_hand_crafted_merge,
            input_data=['application'],
            input_steps=[previous_applications_hand_crafted],
            adapter=Adapter({
                'table':
                E('application', 'X_valid'),
                'features':
                E(previous_applications_hand_crafted.name, 'features_table')
            }),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)
        return previous_applications_hand_crafted_merge, previous_applications_hand_crafted_merge_valid
    else:
        return previous_applications_hand_crafted_merge
def stacking_preprocessing_inference(config, model_name='network', suffix=''):
    reader_inference = Step(name='xy_inference{}'.format(suffix),
                            transformer=loaders.XYSplit(train_mode=False, **config.xy_splitter[model_name]),
                            input_data=['input'],
                            adapter=Adapter({'meta': E('input', 'meta')}),
                            experiment_directory=config.execution.experiment_dir)

    loader = Step(name='loader{}'.format(suffix),
                  transformer=loaders.ImageSegmentationLoaderStacking(train_mode=False, **config.loaders.stacking),
                  input_steps=[reader_inference],
                  adapter=Adapter({'X': E(reader_inference.name, 'X'),
                                   'y': E(reader_inference.name, 'y'),
                                   }),
                  experiment_directory=config.execution.experiment_dir,
                  cache_output=True)
    return loader
Esempio n. 23
0
def sklearn_main(config,
                 ClassifierClass,
                 clf_name,
                 train_mode,
                 normalize=False):
    model_params = getattr(config, clf_name)
    random_search_config = getattr(config.random_search, clf_name)
    full_config = (config, model_params, random_search_config)
    if train_mode:
        features, features_valid = feature_extraction(
            config,
            train_mode,
            persist_output=True,
            cache_output=True,
            load_persisted_output=True)

        sklearn_preproc = preprocessing_fillna((features, features_valid),
                                               config, train_mode)
    else:
        features = feature_extraction(config, train_mode, cache_output=True)
        sklearn_preproc = preprocessing_fillna(features, config, train_mode)

    sklearn_clf = classifier_sklearn(sklearn_preproc, ClassifierClass,
                                     full_config, clf_name, train_mode,
                                     normalize)

    clipper = Step(name='clipper',
                   transformer=Clipper(**config.clipper),
                   input_steps=[sklearn_clf],
                   adapter=Adapter(
                       {'prediction': E(sklearn_clf.name, 'predicted')}),
                   experiment_directory=config.pipeline.experiment_directory)
    return clipper
Esempio n. 24
0
def test_recipe_with_tuple(data):
    adapter = Adapter({
        'X': (),
        'Y': (E('input_1', 'features'), ),
        'Z': (E('input_1', 'features'), E('input_2', 'extra_features'))
    })
    res = adapter.adapt(data)

    for i, key in enumerate(('X', 'Y', 'Z')):
        assert isinstance(res[key], tuple)
        assert len(res[key]) == i

    assert res['X'] == ()
    assert np.array_equal(res['Y'][0], data['input_1']['features'])
    assert np.array_equal(res['Z'][0], data['input_1']['features'])
    assert np.array_equal(res['Z'][1], data['input_2']['extra_features'])
Esempio n. 25
0
def exp_target(model_output, config, suffix, **kwargs):
    exp_target = Step(name='exp_target{}'.format(suffix),
                      transformer=make_transformer(lambda x: np.exp(x) - 1, output_name='prediction'),
                      input_steps=[model_output],
                      adapter=Adapter({'x': E(model_output.name, 'prediction')}),
                      experiment_directory=config.pipeline.experiment_directory, **kwargs)
    return exp_target
Esempio n. 26
0
def retinanet(config, train_mode, visualize=False):
    persist_output = False
    load_persisted_output = False

    loader = preprocessing_generator(config, is_train=train_mode)

    retinanet = Step(name='retinanet',
                     transformer=Retina(**config.retinanet,
                                        train_mode=train_mode),
                     input_steps=[loader],
                     experiment_directory=config.env.cache_dirpath,
                     persist_output=persist_output,
                     is_trainable=True,
                     load_persisted_output=load_persisted_output)

    if train_mode:
        return retinanet

    if visualize:
        return visualizer(retinanet, loader.get_step('label_encoder'), config)

    postprocessor = postprocessing(retinanet, loader.get_step('label_encoder'),
                                   config)

    output = Step(name='output',
                  transformer=IdentityOperation(),
                  input_steps=[postprocessor],
                  adapter=Adapter(
                      {'y_pred': E(postprocessor.name, 'submission')}),
                  experiment_directory=config.env.cache_dirpath,
                  persist_output=persist_output,
                  load_persisted_output=load_persisted_output)
    return output
Esempio n. 27
0
def _to_numpy_label(config, **kwargs):
    to_numpy_label = Step(name='to_numpy_label',
                          transformer=ToNumpyLabel(),
                          input_data=['input'],
                          adapter=Adapter({'y': [E('input', 'y')]}),
                          experiment_directory=config.pipeline.experiment_directory,
                          **kwargs)

    to_numpy_label_valid = Step(name='to_numpy_label_valid',
                                transformer=to_numpy_label,
                                input_data=['input'],
                                adapter=Adapter({'y': [E('input', 'y_valid')]}),
                                experiment_directory=config.pipeline.experiment_directory,
                                **kwargs)

    return to_numpy_label, to_numpy_label_valid
Esempio n. 28
0
def ensemble(config, train_mode, suffix="", **kwargs):
    lgb_step = get_pipeline('lightGBM', train_mode)
    logreg_step = get_pipeline('log_reg', train_mode)
    rf_step = get_pipeline('log_reg', train_mode)
    ens_step = Step(
        name='Ensembler',
        transformer=AvgTransformer(),
        input_steps=[lgb_step, logreg_step, rf_step],
        adapter=Adapter({
            'y_proba_1': E(lgb_step.name, 'prediction'),
            'y_proba_2': E(rf_step.name, 'prediction'),
            'y_proba_3': E(logreg_step.name, 'prediction'),
        }),
        experiment_directory=config.pipeline.experiment_directory,
    )
    return ens_step
def unet_tta(config, suffix=''):
    preprocessing, tta_generator = preprocessing_inference_tta(
        config, model_name='unet')

    unet = Step(name='unet{}'.format(suffix),
                transformer=PyTorchUNet(**config.model['unet']),
                input_data=['callback_input'],
                input_steps=[preprocessing],
                is_trainable=True,
                experiment_directory=config.env.experiment_dir)

    tta_aggregator = aggregator('tta_aggregator{}'.format(suffix),
                                unet,
                                tta_generator=tta_generator,
                                experiment_directory=config.env.experiment_dir,
                                config=config.tta_aggregator)

    prediction_renamed = Step(name='prediction_renamed{}'.format(suffix),
                              transformer=IdentityOperation(),
                              input_steps=[tta_aggregator],
                              adapter=Adapter({
                                  'mask_prediction':
                                  E(tta_aggregator.name,
                                    'aggregated_prediction')
                              }),
                              experiment_directory=config.env.experiment_dir)

    return prediction_renamed
def network_tta(config, suffix=''):
    if SECOND_LEVEL:
        raise NotImplementedError('Second level does not work with TTA')

    preprocessing, tta_generator = pipelines.preprocessing_inference_tta(config, model_name='network')

    if USE_DEPTH:
        Network = models.SegmentationModelWithDepth
    else:
        Network = models.SegmentationModel

    network = Step(name='network{}'.format(suffix),
                   transformer=Network(**config.model['network']),
                   input_data=['callback_input'],
                   input_steps=[preprocessing],
                   is_trainable=True,
                   experiment_directory=config.execution.experiment_dir)

    tta_aggregator = pipelines.aggregator('tta_aggregator{}'.format(suffix), network,
                                          tta_generator=tta_generator,
                                          experiment_directory=config.execution.experiment_dir,
                                          config=config.tta_aggregator)

    prediction_renamed = Step(name='prediction_renamed{}'.format(suffix),
                              transformer=IdentityOperation(),
                              input_steps=[tta_aggregator],
                              adapter=Adapter({'mask_prediction': E(tta_aggregator.name, 'aggregated_prediction')
                                               }),
                              experiment_directory=config.execution.experiment_dir)

    if config.general.loader_mode == 'resize_and_pad':
        size_adjustment_function = partial(postprocessing.crop_image, target_size=config.general.original_size)
    elif config.general.loader_mode == 'resize' or config.general.loader_mode == 'stacking':
        size_adjustment_function = partial(postprocessing.resize_image, target_size=config.general.original_size)
    else:
        raise NotImplementedError

    mask_resize = Step(name='mask_resize{}'.format(suffix),
                       transformer=utils.make_apply_transformer(size_adjustment_function,
                                                                output_name='resized_images',
                                                                apply_on=['images']),
                       input_steps=[prediction_renamed],
                       adapter=Adapter({'images': E(prediction_renamed.name, 'mask_prediction'),
                                        }),
                       experiment_directory=config.execution.experiment_dir)

    return mask_resize