예제 #1
0
def _projection(projection_config, data_cleaned, config, train_mode, suffix, **kwargs):
    (DecompositionTransformer, transformer_config, transformer_name) = projection_config

    if train_mode:
        data_cleaned, data_cleaned_valid = data_cleaned

    projector = Step(name='{}{}'.format(transformer_name, suffix),
                     transformer=DecompositionTransformer(**transformer_config),
                     input_steps=[data_cleaned],
                     adapter=Adapter({'features': E(data_cleaned.name, 'numerical_features')}),
                     experiment_directory=config.pipeline.experiment_directory, **kwargs)

    projector_pandas = Step(name='{}_pandas{}'.format(transformer_name, suffix),
                            transformer=make_transformer(partial(to_pandas, column_prefix=transformer_name)
                                                         , output_name='numerical_features'),
                            input_steps=[projector],
                            adapter=Adapter({'x': E(projector.name, 'features')}),
                            experiment_directory=config.pipeline.experiment_directory, **kwargs)

    if train_mode:
        projector_valid = Step(name='{}_valid{}'.format(transformer_name, suffix),
                               transformer=projector,
                               input_steps=[data_cleaned_valid],
                               adapter=Adapter({'features': E(data_cleaned_valid.name, 'numerical_features')}
                                               ),
                               experiment_directory=config.pipeline.experiment_directory, **kwargs)
        projector_pandas_valid = Step(name='{}_pandas_valid{}'.format(transformer_name, suffix),
                                      transformer=projector_pandas,
                                      input_steps=[projector_valid],
                                      adapter=Adapter({'x': E(projector_valid.name, 'features')}),
                                      experiment_directory=config.pipeline.experiment_directory, **kwargs)
        return projector_pandas, projector_pandas_valid
    else:
        return projector_pandas
예제 #2
0
def _credit_card_balance_groupby_agg(config, train_mode, suffix, **kwargs):
    credit_card_balance_groupby_agg = Step(
        name='credit_card_balance_groupby_agg{}'.format(suffix),
        transformer=fe.GroupbyAggregateMerge(**config.credit_card_balance),
        input_data=['application', 'credit_card_balance'],
        adapter=Adapter({
            'main_table': E('application', 'X'),
            'side_table': E('credit_card_balance', 'X')
        }),
        experiment_directory=config.pipeline.experiment_directory,
        **kwargs)
    if train_mode:
        credit_card_balance_groupby_agg_valid = Step(
            name='credit_card_balance_groupby_agg_valid{}'.format(suffix),
            transformer=credit_card_balance_groupby_agg,
            input_data=['application', 'credit_card_balance'],
            adapter=Adapter({
                'main_table': E('application', 'X_valid'),
                'side_table': E('credit_card_balance', 'X')
            }),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)
        return credit_card_balance_groupby_agg, credit_card_balance_groupby_agg_valid

    else:
        return credit_card_balance_groupby_agg
예제 #3
0
def _previous_applications_groupby_agg(config, train_mode, suffix, **kwargs):
    previous_applications_groupby_agg = Step(
        name='previous_applications_groupby_agg{}'.format(suffix),
        transformer=fe.GroupbyAggregateMerge(**config.previous_applications),
        input_data=['application', 'previous_application'],
        adapter=Adapter({
            'main_table': E('application', 'X'),
            'side_table': E('previous_application', 'X')
        }),
        experiment_directory=config.pipeline.experiment_directory,
        **kwargs)
    if train_mode:
        previous_applications_groupby_agg_valid = Step(
            name='previous_applications_groupby_agg_valid{}'.format(suffix),
            transformer=previous_applications_groupby_agg,
            input_data=['application', 'previous_application'],
            adapter=Adapter({
                'main_table': E('application', 'X_valid'),
                'side_table': E('previous_application', 'X')
            }),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)
        return previous_applications_groupby_agg, previous_applications_groupby_agg_valid
    else:
        return previous_applications_groupby_agg
예제 #4
0
def _tap4fun_groupby_agg(config, train_mode, suffix, **kwargs):
    tap4fun_groupby_agg = Step(
        name='tap4fun_groupby_agg{}'.format(suffix),
        transformer=fe.GroupbyAggregate(**config.tap4fun.aggregations),
        is_trainable=True,
        input_data=['tap4fun'],
        adapter=Adapter({'main_table': E('tap4fun', 'X')}),
        experiment_directory=config.pipeline.experiment_directory,
        **kwargs)

    if train_mode:
        tap4fun_groupby_agg_valid = Step(
            name='tap4fun_groupby_agg_valid{}'.format(suffix),
            transformer=tap4fun_groupby_agg.transformer,
            input_data=['tap4fun'],
            adapter=Adapter({
                'main_table': E('tap4fun', 'X_valid'),
            }),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)

        return tap4fun_groupby_agg, tap4fun_groupby_agg_valid

    else:
        return tap4fun_groupby_agg
예제 #5
0
def _tap4fun(config, train_mode, suffix, **kwargs):
    if train_mode:
        tap4fun_cleaning, tap4fun_cleaning_valid = _tap4fun_cleaning(
            config, train_mode, suffix, **kwargs)
    else:
        tap4fun_cleaning = _tap4fun_cleaning(config, train_mode, suffix,
                                             **kwargs)

    tap4fun = Step(name='tap4fun_hand_crafted{}'.format(suffix),
                   transformer=fe.Tap4funFeatures(**config.tap4fun.columns),
                   input_steps=[tap4fun_cleaning],
                   adapter=Adapter({'X': E(tap4fun_cleaning.name, 'X')}),
                   experiment_directory=config.pipeline.experiment_directory,
                   **kwargs)
    if train_mode:
        tap4fun_valid = Step(
            name='tap4fun_hand_crafted_valid{}'.format(suffix),
            transformer=tap4fun.transformer,
            input_steps=[tap4fun_cleaning_valid],
            adapter=Adapter({'X': E(tap4fun_cleaning_valid.name, 'X')}),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)
        return tap4fun, tap4fun_valid
    else:
        return tap4fun
def postprocessing_pipeline_simplified(cache_dirpath, loader_mode):
    if loader_mode == 'resize_and_pad':
        size_adjustment_function = partial(crop_image, target_size=ORIGINAL_SIZE)
    elif loader_mode == 'resize':
        size_adjustment_function = partial(resize_image, target_size=ORIGINAL_SIZE)
    else:
        raise NotImplementedError

    mask_resize = Step(name='mask_resize',
                       transformer=make_apply_transformer(size_adjustment_function,
                                                          output_name='resized_images',
                                                          apply_on=['images']),
                       input_data=['unet_output'],
                       adapter=Adapter({'images': E('unet_output', 'mask_prediction'),
                                        }),
                       experiment_directory=cache_dirpath)

    binarizer = Step(name='binarizer',
                     transformer=make_apply_transformer(
                         partial(binarize, threshold=THRESHOLD),
                         output_name='binarized_images',
                         apply_on=['images']),
                     input_steps=[mask_resize],
                     adapter=Adapter({'images': E(mask_resize.name, 'resized_images'),
                                      }),
                     experiment_directory=cache_dirpath)

    output = Step(name='output',
                  transformer=IdentityOperation(),
                  input_steps=[binarizer],
                  adapter=Adapter({'y_pred': E(binarizer.name, 'binarized_images'),
                                   }),
                  experiment_directory=cache_dirpath)

    return output
예제 #7
0
def select_features_from_model(features, features_valid, config, train_mode,
                               suffix, **kwargs):
    select_features_step = Step(
        name='select_features_from_model{}'.format(suffix),
        transformer=fe.SelectFeaturesFromModel(threshold='median'),
        input_data=['tap4fun'],
        input_steps=[features],
        is_trainable=True,
        adapter=Adapter({
            'X': E(features.name, 'features'),
            'y': E('tap4fun', 'y')
        }),
        experiment_directory=config.pipeline.experiment_directory,
    )
    if train_mode:
        select_features_valid_step = Step(
            name='select_features_from_model_valid{}'.format(suffix),
            transformer=select_features_step.transformer,
            input_steps=[features_valid],
            adapter=Adapter({'X': E(features_valid.name, 'features')}),
            experiment_directory=config.pipeline.experiment_directory,
        )
        return select_features_step, select_features_valid_step
    else:
        return select_features_step
예제 #8
0
def retinanet(config, train_mode, visualize=False):
    persist_output = False
    load_persisted_output = False

    loader = preprocessing_generator(config, is_train=train_mode)

    retinanet = Step(name='retinanet',
                     transformer=Retina(**config.retinanet,
                                        train_mode=train_mode),
                     input_steps=[loader],
                     experiment_directory=config.env.cache_dirpath,
                     persist_output=persist_output,
                     is_trainable=True,
                     load_persisted_output=load_persisted_output)

    if train_mode:
        return retinanet

    if visualize:
        return visualizer(retinanet, loader.get_step('label_encoder'), config)

    postprocessor = postprocessing(retinanet, loader.get_step('label_encoder'),
                                   config)

    output = Step(name='output',
                  transformer=IdentityOperation(),
                  input_steps=[postprocessor],
                  adapter=Adapter(
                      {'y_pred': E(postprocessor.name, 'submission')}),
                  experiment_directory=config.env.cache_dirpath,
                  persist_output=persist_output,
                  load_persisted_output=load_persisted_output)
    return output
예제 #9
0
def visualizer(model, label_encoder, config):
    label_decoder = Step(name='label_decoder',
                         transformer=GoogleAiLabelDecoder(),
                         input_steps=[
                             label_encoder,
                         ],
                         experiment_directory=config.env.cache_dirpath)

    decoder = Step(
        name='decoder',
        transformer=DataDecoder(**config.postprocessing.data_decoder),
        input_data=['input'],
        input_steps=[
            model,
        ],
        experiment_directory=config.env.cache_dirpath)

    visualize = Step(name='visualizer',
                     transformer=Visualizer(),
                     input_steps=[label_decoder, decoder],
                     input_data=['input'],
                     adapter=Adapter({
                         'images_data':
                         E('input', 'images_data'),
                         'results':
                         E(decoder.name, 'results'),
                         'decoder_dict':
                         E(label_decoder.name, 'inverse_mapping')
                     }),
                     experiment_directory=config.env.cache_dirpath)

    return visualize
def _numerical_transforms(dispatchers, config, train_mode, suffix, **kwargs):
    if train_mode:
        feature_by_type_split, feature_by_type_split_valid = dispatchers
    else:
        feature_by_type_split = dispatchers

    log_num = Step(
        name='log_num{}'.format(suffix),
        transformer=make_transformer(lambda x: np.log(x + 1),
                                     output_name='numerical_features'),
        input_steps=[feature_by_type_split],
        adapter=Adapter(
            {'x': E(feature_by_type_split.name, 'numerical_features')}),
        experiment_directory=config.pipeline.experiment_directory,
        **kwargs)

    if train_mode:
        log_num_valid = Step(
            name='log_num_valid{}'.format(suffix),
            transformer=log_num,
            input_steps=[feature_by_type_split_valid],
            adapter=Adapter({
                'x':
                E(feature_by_type_split_valid.name, 'numerical_features')
            }),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)
        return log_num, log_num_valid
    else:
        return log_num
예제 #11
0
def postprocessing(model, label_encoder, config):
    label_decoder = Step(name='label_decoder',
                         transformer=GoogleAiLabelDecoder(),
                         input_steps=[
                             label_encoder,
                         ],
                         experiment_directory=config.env.cache_dirpath)

    decoder = Step(
        name='decoder',
        transformer=DataDecoder(**config.postprocessing.data_decoder),
        input_steps=[
            model,
        ],
        experiment_directory=config.env.cache_dirpath)

    submission_producer = Step(
        name='submission_producer',
        transformer=PredictionFormatter(
            **config.postprocessing.prediction_formatter),
        input_steps=[label_decoder, decoder],
        input_data=['input'],
        adapter=Adapter({
            'image_ids':
            E('input', 'img_ids'),
            'results':
            E(decoder.name, 'results'),
            'decoder_dict':
            E(label_decoder.name, 'inverse_mapping')
        }),
        experiment_directory=config.env.cache_dirpath)
    return submission_producer
def _feature_by_type_splits(config, train_mode, suffix):
    if train_mode:
        feature_by_type_split = Step(
            name='inferred_type_splitter{}'.format(suffix),
            transformer=fe.InferredTypeSplitter(),
            input_data=['input'],
            adapter=Adapter({'X': E('input', 'X')}),
            experiment_directory=config.pipeline.experiment_directory)

        feature_by_type_split_valid = Step(
            name='inferred_type_splitter_valid{}'.format(suffix),
            transformer=feature_by_type_split,
            input_data=['input'],
            adapter=Adapter({'X': E('input', 'X_valid')}),
            experiment_directory=config.pipeline.experiment_directory)

        return feature_by_type_split, feature_by_type_split_valid

    else:
        feature_by_type_split = Step(
            name='inferred_type_splitter{}'.format(suffix),
            transformer=fe.InferredTypeSplitter(),
            input_data=['input'],
            adapter=Adapter({'X': E('input', 'X')}),
            experiment_directory=config.pipeline.experiment_directory)

    return feature_by_type_split
def preprocessing_inference(config, model_name='unet', suffix=''):
    if config.general.loader_mode == 'resize_and_pad':
        loader_config = config.loaders.resize_and_pad
    elif config.general.loader_mode == 'resize':
        loader_config = config.loaders.resize
    else:
        raise NotImplementedError

    if loader_config.dataset_params.image_source == 'memory':
        reader_inference = Step(name='reader_inference{}'.format(suffix),
                                transformer=loaders.ImageReader(train_mode=False, **config.reader[model_name]),
                                input_data=['input'],
                                adapter=Adapter({'meta': E('input', 'meta')}),

                                experiment_directory=config.execution.experiment_dir)

    elif loader_config.dataset_params.image_source == 'disk':
        reader_inference = Step(name='xy_inference{}'.format(suffix),
                                transformer=loaders.XYSplit(train_mode=False, **config.xy_splitter[model_name]),
                                input_data=['input'],
                                adapter=Adapter({'meta': E('input', 'meta')}),
                                experiment_directory=config.execution.experiment_dir)
    else:
        raise NotImplementedError

    loader = Step(name='loader{}'.format(suffix),
                  transformer=loaders.ImageSegmentationLoader(train_mode=False, **loader_config),
                  input_steps=[reader_inference],
                  adapter=Adapter({'X': E(reader_inference.name, 'X'),
                                   'y': E(reader_inference.name, 'y'),
                                   }),
                  experiment_directory=config.execution.experiment_dir,
                  cache_output=True)
    return loader
예제 #14
0
def test_inputs_with_conflicting_names_require_adapter(data):
    step = Step(name='test_inputs_with_conflicting_names_require_adapter',
                transformer=IdentityOperation(),
                input_data=['input_1', 'input_3'],
                experiment_directory=EXP_DIR)
    with pytest.raises(StepsError):
        step.fit_transform(data)
예제 #15
0
def preprocessing_inference_tta(config, model_name='unet', suffix=''):
    reader_inference = Step(name='reader_inference{}'.format(suffix),
                            transformer=loaders.XYSplit(
                                train_mode=False,
                                **config.xy_splitter[model_name]),
                            input_data=['input'],
                            adapter=Adapter({'meta': E('input', 'meta')}),
                            experiment_directory=config.env.experiment_dir)

    tta_generator = Step(name='tta_generator{}'.format(suffix),
                         transformer=loaders.MetaTestTimeAugmentationGenerator(
                             **config.tta_generator),
                         input_steps=[reader_inference],
                         adapter=Adapter({'X': E('reader_inference', 'X')}),
                         experiment_directory=config.env.experiment_dir)

    if config.execution.loader_mode == 'crop_and_pad':
        Loader = loaders.ImageSegmentationLoaderCropPadTTA
    elif config.execution.loader_mode == 'resize':
        Loader = loaders.ImageSegmentationLoaderResizeTTA
    else:
        raise NotImplementedError

    loader = Step(name='loader{}'.format(suffix),
                  transformer=Loader(**config.loader),
                  input_steps=[tta_generator],
                  adapter=Adapter({
                      'X':
                      E(tta_generator.name, 'X_tta'),
                      'tta_params':
                      E(tta_generator.name, 'tta_params'),
                  }),
                  experiment_directory=config.env.experiment_dir,
                  cache_output=True)
    return loader, tta_generator
예제 #16
0
def preprocessing_fillna(features, config, train_mode, suffix, **kwargs):
    """
        impute missing value by condition
    """
    if train_mode:
        features_train, features_valid = features
        fillna = Step(
            name='fillna{}'.format(suffix),
            transformer=_fillna(
                config.preprocessing.impute_missing.fill_value),
            input_steps=[features_train, features_valid],
            adapter=Adapter({
                'X': E(features_train.name, 'features'),
                'X_valid': E(features_valid.name, 'features'),
            }),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)
    else:
        fillna = Step(
            name='fillna{}'.format(suffix),
            transformer=_fillna(
                config.preprocessing.impute_missing.fill_value),
            input_steps=[features],
            adapter=Adapter({'X': E(features.name, 'features')}),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)
    return fillna
def _application_groupby_agg(config, train_mode, suffix, **kwargs):
    application_groupby_agg = Step(
        name='application_groupby_agg{}'.format(suffix),
        transformer=fe.GroupbyAggregateDiffs(
            **config.applications.aggregations),
        input_data=['application'],
        adapter=Adapter({'main_table': E('application', 'X')}),
        experiment_directory=config.pipeline.experiment_directory,
        **kwargs)

    if train_mode:

        application_groupby_agg_valid = Step(
            name='application_groupby_agg_valid{}'.format(suffix),
            transformer=application_groupby_agg,
            input_data=['application'],
            adapter=Adapter({
                'main_table': E('application', 'X_valid'),
            }),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)

        return application_groupby_agg, application_groupby_agg_valid

    else:
        return application_groupby_agg
예제 #18
0
def preprocessing_inference(config, model_name='unet', suffix=''):
    if config.execution.loader_mode == 'crop_and_pad':
        Loader = loaders.ImageSegmentationLoaderCropPad
    elif config.execution.loader_mode == 'resize':
        Loader = loaders.ImageSegmentationLoaderResize
    else:
        raise NotImplementedError

    reader_inference = Step(name='xy_inference{}'.format(suffix),
                            transformer=loaders.XYSplit(
                                train_mode=False,
                                **config.xy_splitter[model_name]),
                            input_data=['input'],
                            adapter=Adapter({'meta': E('input', 'meta')}),
                            experiment_directory=config.env.experiment_dir)

    loader = Step(name='loader{}'.format(suffix),
                  transformer=Loader(train_mode=False, **config.loader),
                  input_steps=[reader_inference],
                  adapter=Adapter({
                      'X': E(reader_inference.name, 'X'),
                      'y': E(reader_inference.name, 'y'),
                  }),
                  experiment_directory=config.env.experiment_dir,
                  cache_output=True)
    return loader
예제 #19
0
def _bureau(config, train_mode, **kwargs):
    if train_mode:
        bureau = Step(name='bureau',
                      transformer=fe.GroupbyAggregationFromFile(**config.bureau),
                      input_data=['input'],
                      adapter=Adapter({'X': E('input', 'X')}),
                      experiment_directory=config.pipeline.experiment_directory,
                      **kwargs)

        bureau_valid = Step(name='bureau_valid',
                            transformer=bureau,
                            input_data=['input'],
                            adapter=Adapter({'X': E('input', 'X_valid')}),
                            experiment_directory=config.pipeline.experiment_directory,
                            **kwargs)

        return bureau, bureau_valid

    else:
        bureau = Step(name='bureau',
                      transformer=fe.GroupbyAggregationFromFile(**config.bureau),
                      input_data=['input'],
                      adapter=Adapter({'X': E('input', 'X')}),
                      experiment_directory=config.pipeline.experiment_directory,
                      **kwargs)

        return bureau
예제 #20
0
def data_cleaning_v2(config, train_mode, suffix, **kwargs):
    cleaned_data = data_cleaning_v1(config, train_mode, suffix, **kwargs)

    if train_mode:
        cleaned_data, cleaned_data_valid = cleaned_data

    impute_missing = Step(name='dummies_missing{}'.format(suffix),
                          transformer=dc.DummiesMissing(**config.dummies_missing),
                          input_steps=[cleaned_data],
                          adapter=Adapter({'X': E(cleaned_data.name, 'numerical_features'),
                                           }
                                          ),
                          experiment_directory=config.pipeline.experiment_directory, **kwargs)

    if train_mode:
        impute_missing_valid = Step(name='dummies_missing_valid{}'.format(suffix),
                                    transformer=impute_missing,
                                    input_steps=[cleaned_data_valid],
                                    adapter=Adapter({'X': E(cleaned_data_valid.name, 'numerical_features'),
                                                     }
                                                    ),
                                    experiment_directory=config.pipeline.experiment_directory, **kwargs)
        return impute_missing, impute_missing_valid
    else:
        return impute_missing
def mask_postprocessing(config, suffix=''):
    if config.general.loader_mode == 'crop_and_pad':
        size_adjustment_function = partial(
            crop_image, target_size=config.general.original_size)
    elif config.general.loader_mode == 'resize':
        size_adjustment_function = partial(
            resize_image, target_size=config.general.original_size)
    else:
        raise NotImplementedError

    mask_resize = Step(name='mask_resize{}'.format(suffix),
                       transformer=make_apply_transformer(
                           size_adjustment_function,
                           output_name='resized_images',
                           apply_on=['images']),
                       input_data=['input_masks'],
                       adapter=Adapter({
                           'images':
                           E('input_masks', 'mask_prediction'),
                       }),
                       experiment_directory=config.execution.experiment_dir)

    binarizer = Step(name='binarizer{}'.format(suffix),
                     transformer=make_apply_transformer(
                         partial(binarize,
                                 threshold=config.thresholder.threshold_masks),
                         output_name='binarized_images',
                         apply_on=['images']),
                     input_steps=[mask_resize],
                     adapter=Adapter({
                         'images':
                         E(mask_resize.name, 'resized_images'),
                     }),
                     experiment_directory=config.execution.experiment_dir)
    return binarizer
예제 #22
0
def _feature_by_type_splits(config, train_mode):
    if train_mode:
        feature_by_type_split = Step(name='feature_by_type_split',
                                     transformer=fe.DataFrameByTypeSplitter(
                                         **config.dataframe_by_type_splitter),
                                     input_data=['input'],
                                     adapter=Adapter({'X': E('input', 'X')}),
                                     cache_dirpath=config.env.cache_dirpath)

        feature_by_type_split_valid = Step(
            name='feature_by_type_split_valid',
            transformer=feature_by_type_split,
            input_data=['input'],
            adapter=Adapter({'X': E('input', 'X_valid')}),
            cache_dirpath=config.env.cache_dirpath)

        return feature_by_type_split, feature_by_type_split_valid

    else:
        feature_by_type_split = Step(name='feature_by_type_split',
                                     transformer=fe.DataFrameByTypeSplitter(
                                         **config.dataframe_by_type_splitter),
                                     input_data=['input'],
                                     adapter=Adapter({'X': E('input', 'X')}),
                                     cache_dirpath=config.env.cache_dirpath)

    return feature_by_type_split
예제 #23
0
def preprocessing_binary_train(config, model_name, suffix='_binary_model'):
    reader_train = Step(name='xy_train{}'.format(suffix),
                        transformer=loaders.MetaReader(
                            train_mode=True, **config.meta_reader[model_name]),
                        input_data=['input'],
                        adapter=Adapter({'meta': E('input', 'meta')}))

    reader_inference = Step(
        name='xy_inference{}'.format(suffix),
        transformer=loaders.MetaReader(train_mode=True,
                                       **config.meta_reader[model_name]),
        input_data=['callback_input'],
        adapter=Adapter({'meta': E('callback_input', 'meta_valid')}))

    transformer = OneClassImageClassificatioLoader(
        train_mode=True,
        loader_params=config.loaders.resize.loader_params,
        dataset_params=config.loaders.resize.dataset_params,
        augmentation_params=config.loaders.resize.augmentation_params)

    binary_loader = Step(name='loader{}'.format(suffix),
                         transformer=transformer,
                         input_steps=[reader_train, reader_inference],
                         adapter=Adapter({
                             'X':
                             E(reader_train.name, 'X'),
                             'y':
                             E(reader_train.name, 'y'),
                             'X_valid':
                             E(reader_inference.name, 'X'),
                             'y_valid':
                             E(reader_inference.name, 'y'),
                         }))

    return binary_loader
예제 #24
0
def preprocessing_train(config, model_name='network'):
    if config.general.loader_mode == 'resize':
        loader_config = config.loaders.resize
        LOADER = loaders.ImageSegmentationLoaderResize
    else:
        raise NotImplementedError

    reader_train = Step(name='xy_train',
                        transformer=loaders.MetaReader(
                            train_mode=True, **config.meta_reader[model_name]),
                        input_data=['input'],
                        adapter=Adapter({'meta': E('input', 'meta')}))

    reader_inference = Step(
        name='xy_inference',
        transformer=loaders.MetaReader(train_mode=True,
                                       **config.meta_reader[model_name]),
        input_data=['callback_input'],
        adapter=Adapter({'meta': E('callback_input', 'meta_valid')}))

    loader = Step(name='loader',
                  transformer=LOADER(train_mode=True, **loader_config),
                  input_steps=[reader_train, reader_inference],
                  adapter=Adapter({
                      'X': E(reader_train.name, 'X'),
                      'y': E(reader_train.name, 'y'),
                      'X_valid': E(reader_inference.name, 'X'),
                      'y_valid': E(reader_inference.name, 'y'),
                  }))
    return loader
예제 #25
0
def emptiness_preprocessing_train(config, model_name='network', suffix=''):
    reader_train = Step(name='xy_train{}'.format(suffix),
                        transformer=loaders.XYSplit(
                            train_mode=True, **config.xy_splitter[model_name]),
                        input_data=['input'],
                        adapter=Adapter({'meta': E('input', 'meta')}),
                        experiment_directory=config.execution.experiment_dir)

    reader_inference = Step(
        name='xy_inference{}'.format(suffix),
        transformer=loaders.XYSplit(train_mode=True,
                                    **config.xy_splitter[model_name]),
        input_data=['callback_input'],
        adapter=Adapter({'meta': E('callback_input', 'meta_valid')}),
        experiment_directory=config.execution.experiment_dir)

    loader = Step(name='loader{}'.format(suffix),
                  transformer=loaders.EmptinessLoader(train_mode=True,
                                                      **config.loaders.resize),
                  input_steps=[reader_train, reader_inference],
                  adapter=Adapter({
                      'X': E(reader_train.name, 'X'),
                      'y': E(reader_train.name, 'y'),
                      'X_valid': E(reader_inference.name, 'X'),
                      'y_valid': E(reader_inference.name, 'y'),
                  }),
                  experiment_directory=config.execution.experiment_dir)
    return loader
예제 #26
0
def preprocessing_inference_tta(config, model_name='network'):
    if config.general.loader_mode == 'resize':
        loader_config = config.loaders.resize_tta
        LOADER = loaders.ImageSegmentationLoaderResizeTTA
    else:
        raise NotImplementedError

    reader_inference = Step(name='reader_inference',
                            transformer=loaders.MetaReader(
                                train_mode=False,
                                **config.meta_reader[model_name]),
                            input_data=['input'],
                            adapter=Adapter({'meta': E('input', 'meta')}))

    tta_generator = Step(name='tta_generator',
                         transformer=loaders.MetaTestTimeAugmentationGenerator(
                             **config.tta_generator),
                         input_steps=[reader_inference],
                         adapter=Adapter({'X': E('reader_inference', 'X')}))

    loader = Step(name='loader',
                  transformer=LOADER(**loader_config),
                  input_steps=[tta_generator],
                  adapter=Adapter({
                      'X':
                      E(tta_generator.name, 'X_tta'),
                      'tta_params':
                      E(tta_generator.name, 'tta_params'),
                  }))
    return loader, tta_generator
def _application(config, train_mode, suffix, **kwargs):
    if train_mode:
        application_cleaning, application_cleaning_valid = _application_cleaning(
            config, train_mode, suffix, **kwargs)
    else:
        application_cleaning = _application_cleaning(config, train_mode,
                                                     suffix, **kwargs)

    application = Step(
        name='application_hand_crafted{}'.format(suffix),
        transformer=fe.ApplicationFeatures(**config.applications.columns),
        input_steps=[application_cleaning],
        adapter=Adapter({'X': E(application_cleaning.name, 'X')}),
        experiment_directory=config.pipeline.experiment_directory,
        **kwargs)
    if train_mode:
        application_valid = Step(
            name='application_hand_crafted_valid{}'.format(suffix),
            transformer=application,
            input_steps=[application_cleaning_valid],
            adapter=Adapter({'X': E(application_cleaning_valid.name, 'X')}),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)
        return application, application_valid
    else:
        return application
def _application_groupby_agg(config, train_mode, suffix, **kwargs):
    if train_mode:
        application_cleaning, application_cleaning_valid = _application_cleaning(
            config, train_mode, suffix, **kwargs)
    else:
        application_cleaning = _application_cleaning(config, train_mode,
                                                     suffix, **kwargs)

    application_groupby_agg = Step(
        name='application_groupby_agg{}'.format(suffix),
        transformer=fe.GroupbyAggregateDiffs(
            **config.applications.aggregations),
        input_steps=[application_cleaning],
        adapter=Adapter({'main_table': E(application_cleaning.name, 'X')}),
        experiment_directory=config.pipeline.experiment_directory,
        **kwargs)

    if train_mode:

        application_groupby_agg_valid = Step(
            name='application_groupby_agg_valid{}'.format(suffix),
            transformer=application_groupby_agg,
            input_steps=[application_cleaning_valid],
            adapter=Adapter(
                {'main_table': E(application_cleaning_valid.name, 'X')}),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)

        return application_groupby_agg, application_groupby_agg_valid

    else:
        return application_groupby_agg
def _categorical_encoders(config, train_mode, suffix, **kwargs):
    categorical_encoder = Step(
        name='categorical_encoder{}'.format(suffix),
        transformer=fe.CategoricalEncoder(
            **config.preprocessing.categorical_encoder),
        input_data=['application'],
        adapter=Adapter({
            'X': E('application', 'X'),
            'y': E('application', 'y')
        }),
        experiment_directory=config.pipeline.experiment_directory,
        **kwargs)
    if train_mode:
        categorical_encoder_valid = Step(
            name='categorical_encoder_valid{}'.format(suffix),
            transformer=categorical_encoder,
            input_data=['application'],
            adapter=Adapter({
                'X': E('application', 'X_valid'),
                'y': E('application', 'y_valid')
            }),
            experiment_directory=config.pipeline.experiment_directory,
            **kwargs)
        return categorical_encoder, categorical_encoder_valid
    else:
        return categorical_encoder
예제 #30
0
def test_inputs_with_conflicting_names_require_adapter(data):
    step = Step(name='test_inputs_with_conflicting_names_require_adapter',
                transformer=IdentityOperation(),
                input_data=['input_1', 'input_3'],
                cache_dirpath=CACHE_DIRPATH)
    with pytest.raises(StepsError):
        step.fit_transform(data)
def main():

    data_train = pd.read_csv('../../new_input/app_train.csv', nrows=5000)
    data_test = pd.read_csv('../../new_input/app_train.csv', nrows=5000)

    feature_cols = [col for col in data_train.columns if data_train[col].dtypes != 'object']
    print(len(feature_cols))

    y_train = data_train['TARGET']
    X_train = data_train[feature_cols]

    print(X_train.shape, y_train.shape)

    # 构建 数据结构

    trn_X, val_X, trn_y, val_y = train_test_split(X_train, y_train)

    train_data = {'input':
        {
            'X': trn_X,
            'y': trn_y,
        }
    }

    val_data = {'input':
        {
            'X': val_X,
            'y': val_y,
        }
    }

    norm_step = Step(name='Normalizer',
                     transformer=NormalizationTransformer(),
                     input_data=['input'],
                     adapter=Adapter({
                         'X': E('input', 'X')
                     }),
                     experiment_directory=EXPERIMENT_DIR)

    pca_step = Step(name='PCA',
                    transformer=PCATransformer(),
                    input_steps=[norm_step],
                    experiment_directory=EXPERIMENT_DIR)

    knn_step = Step(name='KNN',
                    transformer=KNNTransformer(),
                    input_data=['input'],
                    input_steps=[pca_step],
                    adapter=Adapter({
                        'X': E('PCA', 'X'),
                        'y': E('input', 'y')
                    }),
                    experiment_directory=EXPERIMENT_DIR)

    print(knn_step)


    trn_pred = knn_step.fit_transform(train_data)
    val_pred = knn_step.transform(val_data)

    print(roc_auc_score(val_y, val_pred['y_pred']))