Ejemplo n.º 1
0
def seq_conv_train(config):
    xy_train = Step(name='xy_train',
                    transformer=XYSplit(**config.xy_splitter),
                    input_data=['input'],
                    adapter={'meta': ([('input', 'meta')]),
                             'train_mode': ([('input', 'train_mode')])
                             },
                    cache_dirpath=config.env.cache_dirpath)

    xy_inference = Step(name='xy_inference',
                        transformer=XYSplit(**config.xy_splitter),
                        input_data=['input'],
                        adapter={'meta': ([('input', 'meta_valid')]),
                                 'train_mode': ([('input', 'train_mode')])
                                 },
                        cache_dirpath=config.env.cache_dirpath)

    loader_train = Step(name='loader',
                        transformer=MetadataImageSegmentationLoader(**config.loader),
                        input_data=['input'],
                        input_steps=[xy_train, xy_inference],
                        adapter={'X': ([('xy_train', 'X')], squeeze_inputs),
                                 'y': ([('xy_train', 'y')], squeeze_inputs),
                                 'train_mode': ([('input', 'train_mode')]),
                                 'X_valid': ([('xy_inference', 'X')], squeeze_inputs),
                                 'y_valid': ([('xy_inference', 'y')], squeeze_inputs),
                                 },
                        cache_dirpath=config.env.cache_dirpath)

    sequential_convnet = Step(name='sequential_convnet',
                              transformer=SequentialConvNet(**config.sequential_convnet),
                              input_steps=[loader_train],
                              cache_dirpath=config.env.cache_dirpath)

    mask_resize = Step(name='mask_resize',
                       transformer=Resizer(),
                       input_data=['input'],
                       input_steps=[sequential_convnet],
                       adapter={'images': ([('sequential_convnet', 'predicted_masks')]),
                                'target_sizes': ([('input', 'target_sizes')]),
                                },
                       cache_dirpath=config.env.cache_dirpath)

    thresholding = Step(name='thresholding',
                        transformer=Thresholder(**config.thresholder),
                        input_steps=[mask_resize],
                        adapter={'images': ([('mask_resize', 'resized_images')]),
                                 },
                        cache_dirpath=config.env.cache_dirpath)

    output = Step(name='output',
                  transformer=Dummy(),
                  input_steps=[thresholding],
                  adapter={'y_pred': ([('thresholding', 'binarized_images')]),
                           },
                  cache_dirpath=config.env.cache_dirpath)
    return output
def inference_preprocessing(config):
    xy_train = Step(name='xy_train',
                    transformer=XYSplit(**config.xy_splitter),
                    input_data=['input'],
                    adapter={'meta': ([('input', 'meta')]),
                             'train_mode': ([('input', 'train_mode')])
                             },
                    cache_dirpath=config.env.cache_dirpath)

    text_cleaner = Step(name='text_cleaner_train',
                        transformer=TextCleaner(**config.text_cleaner),
                        input_steps=[xy_train],
                        adapter={'X': ([('xy_train', 'X')])},
                        cache_dirpath=config.env.cache_dirpath)

    cleaning_output = Step(name='cleaning_output',
                           transformer=Dummy(),
                           input_data=['input'],
                           input_steps=[xy_train, text_cleaner],
                           adapter={'X': ([('text_cleaner_train', 'X')]),
                                    'y': ([('xy_train', 'y')]),
                                    'train_mode': ([('input', 'train_mode')]),
                                    },
                           cache_dirpath=config.env.cache_dirpath)
    return cleaning_output
Ejemplo n.º 3
0
def inference_preprocessing(config):
    fill_na_x = Step(name='fill_na_x',
                     transformer=FillNA(**config.fill_na),
                     input_data=['input'],
                     adapter={'X': ([('input', 'meta')])},
                     cache_dirpath=config.env.cache_dirpath)
    xy_split = Step(name='xy_split',
                    transformer=XYSplit(**config.xy_split),
                    input_data=['input'],
                    input_steps=[fill_na_x],
                    adapter={
                        'meta': ([('fill_na_x', 'X')]),
                        'train_mode': ([('input', 'train_mode')])
                    },
                    cache_dirpath=config.env.cache_dirpath)
    return xy_split
Ejemplo n.º 4
0
def unet_inference(config):
    xy_inference = Step(name='xy_inference',
                        transformer=XYSplit(**config.xy_splitter),
                        input_data=['input'],
                        adapter={'meta': ([('input', 'meta')]),
                                 'train_mode': ([('input', 'train_mode')])
                                 },
                        cache_dirpath=config.env.cache_dirpath)

    loader_inference = Step(name='loader',
                            transformer=MetadataImageSegmentationLoader(**config.loader),
                            input_data=['input'],
                            input_steps=[xy_inference, xy_inference],
                            adapter={'X': ([('xy_inference', 'X')], squeeze_inputs),
                                     'y': ([('xy_inference', 'y')], squeeze_inputs),
                                     'train_mode': ([('input', 'train_mode')]),
                                     },
                            cache_dirpath=config.env.cache_dirpath)

    unet_network = Step(name='unet_network',
                        transformer=PyTorchUNet(**config.unet_network),
                        input_steps=[loader_inference],
                        cache_dirpath=config.env.cache_dirpath)

    mask_resize = Step(name='mask_resize',
                       transformer=Resizer(),
                       input_data=['input'],
                       input_steps=[unet_network],
                       adapter={'images': ([('unet_network', 'predicted_masks')]),
                                'target_sizes': ([('input', 'target_sizes')]),
                                },
                       cache_dirpath=config.env.cache_dirpath)

    thresholding = Step(name='thresholding',
                        transformer=Thresholder(**config.thresholder),
                        input_steps=[mask_resize],
                        adapter={'images': ([('mask_resize', 'resized_images')]),
                                 },
                        cache_dirpath=config.env.cache_dirpath)

    output = Step(name='output',
                  transformer=Dummy(),
                  input_steps=[thresholding],
                  adapter={'y_pred': ([('thresholding', 'binarized_images')]),
                           },
                  cache_dirpath=config.env.cache_dirpath)
    return output
Ejemplo n.º 5
0
def preprocessing_multitask_inference(config):
    if config.execution.load_in_memory:
        reader_inference = Step(
            name='reader_inference',
            transformer=ImageReader(**config.reader_multitask),
            input_data=['input'],
            adapter={
                'meta': ([('input', 'meta')]),
                'train_mode': ([('input', 'train_mode')]),
            },
            cache_dirpath=config.env.cache_dirpath)

        loader = Step(
            name='loader',
            transformer=ImageSegmentationMultitaskLoader(**config.loader),
            input_data=['input'],
            input_steps=[reader_inference],
            adapter={
                'X': ([('reader_inference', 'X')]),
                'y': ([('reader_inference', 'y')]),
                'train_mode': ([('input', 'train_mode')]),
            },
            cache_dirpath=config.env.cache_dirpath)
    else:
        xy_inference = Step(name='xy_inference',
                            transformer=XYSplit(**config.xy_splitter),
                            input_data=['input'],
                            adapter={
                                'meta': ([('input', 'meta')]),
                                'train_mode': ([('input', 'train_mode')])
                            },
                            cache_dirpath=config.env.cache_dirpath)

        loader = Step(name='loader',
                      transformer=MetadataImageSegmentationMultitaskLoader(
                          **config.loader),
                      input_data=['input'],
                      input_steps=[xy_inference, xy_inference],
                      adapter={
                          'X': ([('xy_inference', 'X')], squeeze_inputs),
                          'y': ([('xy_inference', 'y')], squeeze_inputs),
                          'train_mode': ([('input', 'train_mode')]),
                      },
                      cache_dirpath=config.env.cache_dirpath)
    return loader
def count_features(config):
    xy_split = Step(name='xy_split',
                    transformer=XYSplit(**config.xy_splitter),
                    input_data=['input'],
                    adapter={'meta': ([('input', 'meta')]),
                             'train_mode': ([('input', 'train_mode')])
                             },
                    cache_dirpath=config.env.cache_dirpath)

    text_counter = Step(name='text_counter',
                        transformer=TextCounter(),
                        input_steps=[xy_split],
                        adapter={'X': ([('xy_split', 'X')])},
                        cache_dirpath=config.env.cache_dirpath)

    normalizer = Step(name='normalizer',
                      transformer=Normalizer(),
                      input_steps=[text_counter],
                      adapter={'X': ([('text_counter', 'X')])},
                      cache_dirpath=config.env.cache_dirpath)

    return normalizer
Ejemplo n.º 7
0
def _preprocessing(config, is_train=True):
    if is_train:
        xy_train = Step(name='xy_train',
                        transformer=XYSplit(**config.xy_splitter),
                        input_data=['input'],
                        adapter={
                            'meta': ([('input', 'meta')]),
                            'train_mode': ([('input', 'train_mode')])
                        },
                        cache_dirpath=config.env.cache_dirpath)

        text_cleaner_train = Step(
            name='text_cleaner_train',
            transformer=TextCleaner(**config.text_cleaner),
            input_steps=[xy_train],
            adapter={'X': ([('xy_train', 'X')])},
            cache_dirpath=config.env.cache_dirpath)

        xy_valid = Step(name='xy_valid',
                        transformer=XYSplit(**config.xy_splitter),
                        input_data=['input'],
                        adapter={
                            'meta': ([('input', 'meta_valid')]),
                            'train_mode': ([('input', 'train_mode')])
                        },
                        cache_dirpath=config.env.cache_dirpath)

        text_cleaner_valid = Step(
            name='text_cleaner_valid',
            transformer=TextCleaner(**config.text_cleaner),
            input_steps=[xy_valid],
            adapter={'X': ([('xy_valid', 'X')])},
            cache_dirpath=config.env.cache_dirpath)

        cleaning_output = Step(name='cleaning_output',
                               transformer=Dummy(),
                               input_data=['input'],
                               input_steps=[
                                   xy_train, text_cleaner_train, xy_valid,
                                   text_cleaner_valid
                               ],
                               adapter={
                                   'X': ([('text_cleaner_train', 'X')]),
                                   'y': ([('xy_train', 'y')]),
                                   'train_mode': ([('input', 'train_mode')]),
                                   'X_valid': ([('text_cleaner_valid', 'X')]),
                                   'y_valid': ([('xy_valid', 'y')]),
                               },
                               cache_dirpath=config.env.cache_dirpath)
    else:
        xy_train = Step(name='xy_train',
                        transformer=XYSplit(**config.xy_splitter),
                        input_data=['input'],
                        adapter={
                            'meta': ([('input', 'meta')]),
                            'train_mode': ([('input', 'train_mode')])
                        },
                        cache_dirpath=config.env.cache_dirpath)

        text_cleaner = Step(name='text_cleaner_train',
                            transformer=TextCleaner(**config.text_cleaner),
                            input_steps=[xy_train],
                            adapter={'X': ([('xy_train', 'X')])},
                            cache_dirpath=config.env.cache_dirpath)

        cleaning_output = Step(name='cleaning_output',
                               transformer=Dummy(),
                               input_data=['input'],
                               input_steps=[xy_train, text_cleaner],
                               adapter={
                                   'X': ([('text_cleaner_train', 'X')]),
                                   'y': ([('xy_train', 'y')]),
                                   'train_mode': ([('input', 'train_mode')]),
                               },
                               cache_dirpath=config.env.cache_dirpath)
    return cleaning_output
def ensemble_extraction(config):
    xy_train = Step(name='xy_train',
                    transformer=XYSplit(**config.xy_splitter),
                    input_data=['input_ensemble'],
                    adapter={'meta': ([('input_ensemble', 'meta')]),
                             'train_mode': ([('input_ensemble', 'train_mode')])
                             },
                    cache_dirpath=config.env.cache_dirpath)
    text_cleaner_train = Step(name='text_cleaner_train',
                              transformer=TextCleaner(**config.text_cleaner),
                              input_steps=[xy_train],
                              adapter={'X': ([('xy_train', 'X')])},
                              cache_dirpath=config.env.cache_dirpath)

    char_tokenizer = Step(name='char_tokenizer',
                          transformer=Tokenizer(**config.char_tokenizer),
                          input_steps=[text_cleaner_train],
                          input_data=['input_ensemble'],
                          adapter={'X': ([('text_cleaner_train', 'X')]),
                                   'train_mode': ([('input_ensemble', 'train_mode')])
                                   },
                          cache_dirpath=config.env.cache_dirpath)

    word_tokenizer = Step(name='word_tokenizer',
                          transformer=Tokenizer(**config.word_tokenizer),
                          input_steps=[text_cleaner_train],
                          input_data=['input_ensemble'],
                          adapter={'X': ([('text_cleaner_train', 'X')]),
                                   'train_mode': ([('input_ensemble', 'train_mode')])
                                   },
                          cache_dirpath=config.env.cache_dirpath)

    tfidf_char_vectorizer = Step(name='tfidf_char_vectorizer',
                                 transformer=TfidfVectorizer(**config.tfidf_char_vectorizer),
                                 input_steps=[text_cleaner_train],
                                 adapter={'text': ([('text_cleaner_train', 'X')]),
                                          },
                                 cache_dirpath=config.env.cache_dirpath)
    tfidf_word_vectorizer = Step(name='tfidf_word_vectorizer',
                                 transformer=TfidfVectorizer(**config.tfidf_word_vectorizer),
                                 input_steps=[text_cleaner_train],
                                 adapter={'text': ([('text_cleaner_train', 'X')]),
                                          },
                                 cache_dirpath=config.env.cache_dirpath)

    bad_word_filter = Step(name='bad_word_filter',
                           transformer=WordListFilter(**config.bad_word_filter),
                           input_steps=[text_cleaner_train],
                           adapter={'X': ([('text_cleaner_train', 'X')]),
                                    },
                           cache_dirpath=config.env.cache_dirpath)

    bad_word_tfidf_word_vectorizer = Step(name='bad_word_tfidf_word_vectorizer',
                                          transformer=TfidfVectorizer(**config.tfidf_word_vectorizer),
                                          input_steps=[bad_word_filter],
                                          adapter={'text': ([('bad_word_filter', 'X')]),
                                                   },
                                          cache_dirpath=config.env.cache_dirpath)

    text_counter = Step(name='text_counter',
                        transformer=TextCounter(),
                        input_steps=[xy_train],
                        adapter={'X': ([('xy_train', 'X')])},
                        cache_dirpath=config.env.cache_dirpath)

    normalizer = Step(name='normalizer',
                      transformer=Normalizer(),
                      input_steps=[text_counter],
                      adapter={'X': ([('text_counter', 'X')])},
                      cache_dirpath=config.env.cache_dirpath)

    glove_embeddings = Step(name='glove_embeddings',
                            transformer=GloveEmbeddingsMatrix(**config.glove_embeddings),
                            input_steps=[word_tokenizer],
                            adapter={'tokenizer': ([('word_tokenizer', 'tokenizer')]),
                                     },
                            cache_dirpath=config.env.cache_dirpath)

    logreg_count = Step(name='logreg_count',
                        transformer=LogisticRegressionMultilabel(**config.logistic_regression_multilabel),
                        input_steps=[xy_train, normalizer],
                        adapter={'X': ([('normalizer', 'X')]),
                                 'y': ([('xy_train', 'y')]),
                                 },
                        cache_dirpath=config.env.cache_dirpath,
                        cache_output=True)
    logreg_bad_word = Step(name='logreg_bad_word',
                           transformer=LogisticRegressionMultilabel(**config.logistic_regression_multilabel),
                           input_steps=[xy_train, bad_word_tfidf_word_vectorizer],
                           adapter={'X': ([('bad_word_tfidf_word_vectorizer', 'features')]),
                                    'y': ([('xy_train', 'y')]),
                                    },
                           cache_dirpath=config.env.cache_dirpath,
                           cache_output=True)
    logreg_bad_word_count = Step(name='logreg_bad_word_count',
                                 transformer=LogisticRegressionMultilabel(**config.logistic_regression_multilabel),
                                 input_steps=[xy_train, normalizer, bad_word_tfidf_word_vectorizer],
                                 adapter={'X': ([('normalizer', 'X'),
                                                 ('bad_word_tfidf_word_vectorizer', 'features')], sparse_hstack_inputs),
                                          'y': ([('xy_train', 'y')]),
                                          },
                                 cache_dirpath=config.env.cache_dirpath,
                                 cache_output=True)
    logreg_tfidf = Step(name='logreg_tfidf',
                        transformer=LogisticRegressionMultilabel(**config.logistic_regression_multilabel),
                        input_steps=[xy_train, tfidf_char_vectorizer, tfidf_word_vectorizer],
                        adapter={'X': ([('tfidf_char_vectorizer', 'features'),
                                        ('tfidf_word_vectorizer', 'features')], sparse_hstack_inputs),
                                 'y': ([('xy_train', 'y')]),
                                 },
                        cache_dirpath=config.env.cache_dirpath,
                        cache_output=True)
    char_vdcnn = Step(name='char_vdcnn',
                      transformer=CharVDCNN(**config.char_vdcnn_network),
                      input_steps=[char_tokenizer, xy_train],
                      adapter={'X': ([('char_tokenizer', 'X')]),
                               'y': ([('xy_train', 'y')]),
                               },
                      cache_dirpath=config.env.cache_dirpath,
                      cache_output=True)
    word_lstm = Step(name='word_lstm',
                     transformer=WordLSTM(**config.word_lstm_network),
                     input_steps=[word_tokenizer, xy_train],
                     adapter={'X': ([('word_tokenizer', 'X')]),
                              'y': ([('xy_train', 'y')]),
                              },
                     cache_dirpath=config.env.cache_dirpath,
                     cache_output=True)
    glove_lstm = Step(name='glove_lstm',
                      transformer=GloveLSTM(**config.glove_lstm_network),
                      input_steps=[word_tokenizer, xy_train, glove_embeddings],
                      adapter={'X': ([('word_tokenizer', 'X')]),
                               'y': ([('xy_train', 'y')]),
                               'embedding_matrix': ([('glove_embeddings', 'embeddings_matrix')]),
                               },
                      cache_dirpath=config.env.cache_dirpath,
                      cache_output=True)
    glove_scnn = Step(name='glove_scnn',
                      transformer=GloveSCNN(**config.glove_scnn_network),
                      input_steps=[word_tokenizer, xy_train, glove_embeddings],
                      adapter={'X': ([('word_tokenizer', 'X')]),
                               'y': ([('xy_train', 'y')]),
                               'embedding_matrix': ([('glove_embeddings', 'embeddings_matrix')]),
                               },
                      cache_dirpath=config.env.cache_dirpath,
                      cache_output=True)
    glove_dpcnn = Step(name='glove_dpcnn',
                       transformer=GloveDPCNN(**config.glove_dpcnn_network),
                       input_steps=[word_tokenizer, xy_train, glove_embeddings],
                       adapter={'X': ([('word_tokenizer', 'X')]),
                                'y': ([('xy_train', 'y')]),
                                'embedding_matrix': ([('glove_embeddings', 'embeddings_matrix')]),
                                },
                       cache_dirpath=config.env.cache_dirpath,
                       cache_output=True)

    return [logreg_count, logreg_bad_word, logreg_bad_word_count,
            logreg_tfidf, char_vdcnn, word_lstm, glove_lstm,
            glove_scnn, glove_dpcnn]
Ejemplo n.º 9
0
def ensemble_extraction(config):
    fill_na_x = Step(name='fill_na_x',
                     transformer=FillNA(**config.fill_na),
                     input_data=['input_ensemble'],
                     adapter={'X': ([('input_ensemble', 'meta')])},
                     cache_dirpath=config.env.cache_dirpath)
    xy_split = Step(name='xy_split',
                    transformer=XYSplit(**config.xy_split),
                    input_data=['input_ensemble'],
                    input_steps=[fill_na_x],
                    adapter={
                        'meta': ([('fill_na_x', 'X')]),
                        'train_mode': ([('input_ensemble', 'train_mode')])
                    },
                    cache_dirpath=config.env.cache_dirpath)

    char_tokenizer = Step(name='char_tokenizer',
                          transformer=Tokenizer(**config.char_tokenizer),
                          input_steps=[xy_split],
                          adapter={
                              'X': ([('xy_split', 'X')], fetch_x_train),
                              'train_mode': ([('xy_split', 'train_mode')])
                          },
                          cache_dirpath=config.env.cache_dirpath)

    word_tokenizer = Step(name='word_tokenizer',
                          transformer=Tokenizer(**config.word_tokenizer),
                          input_steps=[xy_split],
                          adapter={
                              'X': ([('xy_split', 'X')], fetch_x_train),
                              'train_mode': ([('xy_split', 'train_mode')])
                          },
                          cache_dirpath=config.env.cache_dirpath)

    tfidf_char_vectorizer = Step(
        name='tfidf_char_vectorizer',
        transformer=TfidfVectorizer(**config.tfidf_char_vectorizer),
        input_steps=[xy_split],
        adapter={
            'text': ([('xy_split', 'X')], fetch_x_train),
        },
        cache_dirpath=config.env.cache_dirpath)
    tfidf_word_vectorizer = Step(
        name='tfidf_word_vectorizer',
        transformer=TfidfVectorizer(**config.tfidf_word_vectorizer),
        input_steps=[xy_split],
        adapter={
            'text': ([('xy_split', 'X')], fetch_x_train),
        },
        cache_dirpath=config.env.cache_dirpath)

    glove_embeddings = Step(
        name='glove_embeddings',
        transformer=GloveEmbeddingsMatrix(**config.glove_embeddings),
        input_steps=[word_tokenizer],
        adapter={
            'tokenizer': ([('word_tokenizer', 'tokenizer')]),
        },
        cache_dirpath=config.env.cache_dirpath)

    log_reg_multi = Step(
        name='log_reg_multi',
        transformer=LogisticRegressionMultilabel(
            **config.logistic_regression_multilabel),
        input_steps=[xy_split, tfidf_char_vectorizer, tfidf_word_vectorizer],
        adapter={
            'X':
            ([('tfidf_char_vectorizer', 'features'),
              ('tfidf_word_vectorizer', 'features')], sparse_hstack_inputs),
            'y': ([('xy_split', 'y')]),
        },
        cache_dirpath=config.env.cache_dirpath,
        cache_output=True)

    char_vdcnn = Step(name='char_vdcnn',
                      transformer=CharVDCNN(**config.char_vdcnn_network),
                      input_steps=[char_tokenizer, xy_split],
                      adapter={
                          'X': ([('char_tokenizer', 'X')]),
                          'y': ([('xy_split', 'y')]),
                      },
                      cache_dirpath=config.env.cache_dirpath,
                      cache_output=True)
    word_lstm = Step(name='word_lstm',
                     transformer=WordLSTM(**config.word_lstm_network),
                     input_steps=[word_tokenizer, xy_split],
                     adapter={
                         'X': ([('word_tokenizer', 'X')]),
                         'y': ([('xy_split', 'y')]),
                     },
                     cache_dirpath=config.env.cache_dirpath,
                     cache_output=True)
    glove_lstm = Step(name='glove_lstm',
                      transformer=GloveLSTM(**config.glove_lstm_network),
                      input_steps=[word_tokenizer, xy_split, glove_embeddings],
                      adapter={
                          'X': ([('word_tokenizer', 'X')]),
                          'y': ([('xy_split', 'y')]),
                          'embedding_matrix':
                          ([('glove_embeddings', 'embeddings_matrix')]),
                      },
                      cache_dirpath=config.env.cache_dirpath,
                      cache_output=True)
    glove_scnn = Step(name='glove_scnn',
                      transformer=GloveSCNN(**config.glove_scnn_network),
                      input_steps=[word_tokenizer, xy_split, glove_embeddings],
                      adapter={
                          'X': ([('word_tokenizer', 'X')]),
                          'y': ([('xy_split', 'y')]),
                          'embedding_matrix':
                          ([('glove_embeddings', 'embeddings_matrix')]),
                      },
                      cache_dirpath=config.env.cache_dirpath,
                      cache_output=True)

    glove_dpcnn = Step(
        name='glove_dpcnn',
        transformer=GloveDPCNN(**config.glove_dpcnn_network),
        input_steps=[word_tokenizer, xy_split, glove_embeddings],
        adapter={
            'X': ([('word_tokenizer', 'X')]),
            'y': ([('xy_split', 'y')]),
            'embedding_matrix': ([('glove_embeddings', 'embeddings_matrix')]),
        },
        cache_dirpath=config.env.cache_dirpath,
        cache_output=True)

    return [
        log_reg_multi, char_vdcnn, word_lstm, glove_lstm, glove_scnn,
        glove_dpcnn
    ]