def unet(config, train_mode): if train_mode: save_output = False load_saved_output = False else: save_output = False load_saved_output = False loader = preprocessing(config, model_type='single', is_train=train_mode) unet = Step(name='unet', transformer=PyTorchUNet(**config.unet), input_steps=[loader], cache_dirpath=config.env.cache_dirpath, save_output=save_output, load_saved_output=load_saved_output) mask_postprocessed = mask_postprocessing(unet, config, save_output=save_output) detached = multiclass_object_labeler(mask_postprocessed, config, save_output=save_output) output = Step(name='output', transformer=Dummy(), input_steps=[detached], adapter={ 'y_pred': ([(detached.name, 'labeled_images')]), }, cache_dirpath=config.env.cache_dirpath, save_output=save_output, load_saved_output=False) return output
def watershed_contours(mask, contour, config, save_output=True): watershed_contour = Step(name='watershed_contour', transformer=WatershedContour(), input_steps=[mask, contour], adapter={ 'images': ([(mask.name, 'binarized_images')]), 'contours': ([(contour.name, 'binarized_images')]), }, cache_dirpath=config.env.cache_dirpath, save_output=save_output) drop_smaller = Step(name='drop_smaller', transformer=Dropper(**config.dropper), input_steps=[watershed_contour], adapter={ 'labels': ([('watershed_contour', 'detached_images')]), }, cache_dirpath=config.env.cache_dirpath, save_output=save_output) return drop_smaller binary_fill = Step(name='binary_fill', transformer=BinaryFillHoles(), input_steps=[drop_smaller], adapter={ 'images': ([('drop_smaller', 'labels')]), }, cache_dirpath=config.env.cache_dirpath, save_output=save_output) return binary_fill
def _normalize(features, config, train_mode, **kwargs): if train_mode: feature_train, features_valid = features normalizer = Step(name='normalizer', transformer=Normalizer(), input_steps=[feature_train], adapter={ 'X': ([(feature_train.name, 'features')]), }, cache_dirpath=config.env.cache_dirpath, **kwargs) normalizer_valid = Step(name='normalizer_valid', transformer=normalizer, input_steps=[features_valid], adapter={ 'X': ([(features_valid.name, 'features')]), }, cache_dirpath=config.env.cache_dirpath, **kwargs) return normalizer, normalizer_valid else: normalizer = Step(name='normalizer', transformer=Normalizer(), input_steps=[features], adapter={ 'X': ([(features.name, 'features')]), }, cache_dirpath=config.env.cache_dirpath, **kwargs) return normalizer
def tfidf_logreg(config): preprocessed_input = _preprocessing(config, is_train=False) tfidf_char_vectorizer, tfidf_word_vectorizer = _tfidf( preprocessed_input, config) tfidf_logreg = Step(name='tfidf_logreg', transformer=LogisticRegressionMultilabel( **config.logistic_regression_multilabel), input_steps=[ preprocessed_input, tfidf_char_vectorizer, tfidf_word_vectorizer ], adapter={ 'X': ([('tfidf_char_vectorizer', 'features'), ('tfidf_word_vectorizer', 'features')], sparse_hstack_inputs), 'y': ([('cleaning_output', 'y')]), }, cache_dirpath=config.env.cache_dirpath) output = Step(name='tfidf_logreg_output', transformer=Dummy(), input_steps=[tfidf_logreg], adapter={ 'y_pred': ([('tfidf_logreg', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return output
def bad_word_count_features_logreg(config): preprocessed_input = _preprocessing(config, is_train=False) normalizer = _count_features(config) xy_split = normalizer.get_step('xy_split') tfidf_word_vectorizer = _bad_word_tfidf(preprocessed_input, config) bad_word_count_logreg = Step( name='bad_word_count_logreg', transformer=LogisticRegressionMultilabel( **config.logistic_regression_multilabel), input_steps=[xy_split, normalizer, tfidf_word_vectorizer], adapter={ 'X': ([('normalizer', 'X'), ('bad_word_tfidf_word_vectorizer', 'features')], sparse_hstack_inputs), 'y': ([('xy_split', 'y')]), }, cache_dirpath=config.env.cache_dirpath) output = Step(name='bad_word_count_features_logreg_output', transformer=Dummy(), input_steps=[bad_word_count_logreg], adapter={ 'y_pred': ([('bad_word_count_logreg', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return output
def unet(config, train_mode): if train_mode: save_output = False load_saved_output = False else: save_output = False load_saved_output = False loader = preprocessing(config, model_type='single', is_train=train_mode) unet = Step(name='unet', transformer=PyTorchUNetStream(**config.unet) if config.execution.stream_mode else PyTorchUNet( **config.unet), input_steps=[loader], cache_dirpath=config.env.cache_dirpath, save_output=save_output, load_saved_output=load_saved_output) mask_postprocessed = mask_postprocessing(loader, unet, config, save_output=save_output) output = Step(name='output', transformer=Dummy(), input_steps=[mask_postprocessed], adapter={'y_pred': ([(mask_postprocessed.name, 'images')]), 'y_scores': ([(mask_postprocessed.name, 'scores')]) }, cache_dirpath=config.env.cache_dirpath, save_output=save_output, load_saved_output=False) return output
def inference_preprocessing(config): xy_train = Step(name='xy_train', transformer=XYSplit(**config.xy_splitter), input_data=['input'], adapter={'meta': ([('input', 'meta')]), 'train_mode': ([('input', 'train_mode')]) }, cache_dirpath=config.env.cache_dirpath) text_cleaner = Step(name='text_cleaner_train', transformer=TextCleaner(**config.text_cleaner), input_steps=[xy_train], adapter={'X': ([('xy_train', 'X')])}, cache_dirpath=config.env.cache_dirpath) cleaning_output = Step(name='cleaning_output', transformer=Dummy(), input_data=['input'], input_steps=[xy_train, text_cleaner], adapter={'X': ([('text_cleaner_train', 'X')]), 'y': ([('xy_train', 'y')]), 'train_mode': ([('input', 'train_mode')]), }, cache_dirpath=config.env.cache_dirpath) return cleaning_output
def glove_lstm_train(config): preprocessed_input = train_preprocessing(config) word_tokenizer, glove_embeddings = glove_preprocessing_train( config, preprocessed_input) glove_lstm = Step( name='glove_lstm', transformer=GloveLSTM(**config.glove_lstm_network), overwrite_transformer=True, input_steps=[word_tokenizer, preprocessed_input, glove_embeddings], adapter={ 'X': ([('word_tokenizer', 'X')]), 'y': ([('xy_split', 'y')]), 'embedding_matrix': ([('glove_embeddings', 'embeddings_matrix')]), 'validation_data': ([('word_tokenizer', 'X_valid'), ('xy_split', 'validation_data')], join_valid), }, cache_dirpath=config.env.cache_dirpath) glove_output = Step(name='output_glove', transformer=Dummy(), input_steps=[glove_lstm], adapter={ 'y_pred': ([('glove_lstm', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return glove_output
def char_vdcnn_train(config): preprocessed_input = train_preprocessing(config) char_tokenizer = Step(name='char_tokenizer', transformer=Tokenizer(**config.char_tokenizer), input_steps=[preprocessed_input], adapter={ 'X': ([('xy_split', 'X')], fetch_x_train), 'X_valid': ([('xy_split', 'validation_data')], fetch_x_valid), 'train_mode': ([('xy_split', 'train_mode')]) }, cache_dirpath=config.env.cache_dirpath) network = Step(name='char_vdcnn', transformer=CharVDCNN(**config.char_vdcnn_network), overwrite_transformer=True, input_steps=[char_tokenizer, preprocessed_input], adapter={ 'X': ([('char_tokenizer', 'X')]), 'y': ([('xy_split', 'y')]), 'validation_data': ([('char_tokenizer', 'X_valid'), ('xy_split', 'validation_data')], join_valid), }, cache_dirpath=config.env.cache_dirpath) char_output = Step(name='char_output', transformer=Dummy(), input_steps=[network], adapter={ 'y_pred': ([('char_vdcnn', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return char_output
def _numerical_features(clean_features, config, train_mode, **kwargs): if train_mode: clean, clean_valid = clean_features else: clean = clean_features numerical_features = Step(name='numerical_features', transformer=fe.ProcessNumerical(), input_steps=[clean], adapter={ 'numerical_features': ( [(clean.name, 'clean_features')], partial(pandas_subset_columns, cols=cfg.NUMERICAL_COLUMNS)) }, cache_dirpath=config.env.cache_dirpath, **kwargs) if train_mode: numerical_features_valid = Step(name='numerical_features_valid', transformer=numerical_features, input_steps=[clean_valid], adapter={'numerical_features': ( [(clean_valid.name, 'clean_features')], partial(pandas_subset_columns, cols=cfg.NUMERICAL_COLUMNS)) }, cache_dirpath=config.env.cache_dirpath, **kwargs) return numerical_features, numerical_features_valid else: return numerical_features
def _groupby_aggregations(clean_features, additional_features, config, train_mode, **kwargs): if train_mode: clean, clean_valid = clean_features added_feature, added_feature_valid = additional_features else: clean = clean_features added_feature = additional_features groupby_aggregations = Step(name='groupby_aggregations', transformer=fe.GroupbyAggregations(**config.groupby_aggregation), input_steps=[clean, added_feature], adapter={ 'X': ([(clean.name, 'clean_features'), (added_feature.name, 'categorical_features')], pandas_concat_inputs) }, cache_dirpath=config.env.cache_dirpath, **kwargs) if train_mode: groupby_aggregations_valid = Step(name='groupby_aggregations_valid', transformer=groupby_aggregations, input_steps=[clean_valid, added_feature_valid], adapter={'X': ([(clean_valid.name, 'clean_features'), (added_feature_valid.name, 'categorical_features')], pandas_concat_inputs ) }, cache_dirpath=config.env.cache_dirpath, **kwargs) return groupby_aggregations, groupby_aggregations_valid else: return groupby_aggregations
def _timestamp_features(clean_features, config, train_mode, **kwargs): if train_mode: clean, clean_valid = clean_features else: clean = clean_features timestamp_features = Step(name='timestamp_features', transformer=fe.DateFeatures(**config.date_features), input_steps=[clean], adapter={ 'timestamp_features': ( [(clean.name, 'clean_features')], partial(pandas_subset_columns, cols=cfg.TIMESTAMP_COLUMNS)) }, cache_dirpath=config.env.cache_dirpath, **kwargs) if train_mode: timestamp_features_valid = Step(name='timestamp_features_valid', transformer=timestamp_features, input_steps=[clean_valid], adapter={'timestamp_features': ( [(clean_valid.name, 'clean_features')], partial(pandas_subset_columns, cols=cfg.TIMESTAMP_COLUMNS)) }, cache_dirpath=config.env.cache_dirpath, **kwargs) return timestamp_features, timestamp_features_valid else: return timestamp_features
def _encode_categorical(clean_features, config, train_mode, **kwargs): if train_mode: clean, clean_valid = clean_features else: clean = clean_features categorical_encoder = Step(name='categorical_encoder', transformer=fe.OrdinalEncoder(**config.categorical_encoder), input_steps=[clean], adapter={ 'categorical_features': ( [(clean.name, 'clean_features')], partial(pandas_subset_columns, cols=cfg.CATEGORICAL_COLUMNS)) }, cache_dirpath=config.env.cache_dirpath, **kwargs) if train_mode: categorical_encoder_valid = Step(name='categorical_encoder_valid', transformer=categorical_encoder, input_steps=[clean_valid], adapter={'categorical_features': ( [(clean_valid.name, 'clean_features')], partial(pandas_subset_columns, cols=cfg.CATEGORICAL_COLUMNS)) }, cache_dirpath=config.env.cache_dirpath, **kwargs) return categorical_encoder, categorical_encoder_valid else: return categorical_encoder
def image_features(clean_features, config, train_mode, **kwargs): if train_mode: clean, clean_valid = clean_features else: clean = clean_features image_stats = Step(name='image_stats', transformer=fe.ImageStatistics(**config.image_stats), input_data=['specs'], input_steps=[clean], adapter={'X': ([(clean.name, 'clean_features')]), 'is_train': ([('specs', 'is_train')])}, cache_dirpath=config.env.cache_dirpath, **kwargs) if train_mode: image_stats_valid = Step(name='image_stats_valid', transformer=image_stats, input_data=['specs'], input_steps=[clean_valid], adapter={'X': ([(clean_valid.name, 'clean_features')]), 'is_train': ([('specs', 'is_train')])}, cache_dirpath=config.env.cache_dirpath, **kwargs) return image_stats, image_stats_valid else: return image_stats
def bad_word_count_features_svm(config): preprocessed_input = inference_preprocessing(config) normalizer = count_features(config) xy_split = normalizer.get_step('xy_split') tfidf_word_vectorizer = bad_word_tfidf(preprocessed_input, config) svm_multi = Step(name='svm_multi', transformer=LinearSVCMultilabel(**config.svc_multilabel), input_steps=[xy_split, normalizer, tfidf_word_vectorizer], adapter={ 'X': ([('normalizer', 'X'), ('bad_word_tfidf_word_vectorizer', 'features') ], sparse_hstack_inputs), 'y': ([('xy_split', 'y')]), }, cache_dirpath=config.env.cache_dirpath) svm_output = Step(name='svm_output', transformer=Dummy(), input_steps=[svm_multi], adapter={ 'y_pred': ([('svm_multi', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return svm_output
def solution_1(config, train_mode): if train_mode: features, features_valid = feature_extraction(config, train_mode, save_output=True, cache_output=True, load_saved_output=True) light_gbm = classifier_lgbm((features, features_valid), config, train_mode) else: features = feature_extraction(config, train_mode, cache_output=True) light_gbm = classifier_lgbm(features, config, train_mode) clipper = Step(name='clipper', transformer=Clipper(**config.clipper), input_steps=[light_gbm], adapter={ 'prediction': ([(light_gbm.name, 'prediction')]), }, cache_dirpath=config.env.cache_dirpath) output = Step(name='output', transformer=Dummy(), input_steps=[clipper], adapter={ 'y_pred': ([(clipper.name, 'clipped_prediction')]), }, cache_dirpath=config.env.cache_dirpath) return output
def hand_crafted_all_svm(config): xy_split, normalizer, char_vector, word_vector, bad_word_vector = hand_crafted_all( config) svm_multi = Step(name='svm_multi', transformer=LinearSVCMultilabel(**config.svc_multilabel), input_steps=[ xy_split, normalizer, char_vector, word_vector, bad_word_vector ], adapter={ 'X': ([('normalizer', 'X'), ('tfidf_char_vectorizer', 'features'), ('tfidf_word_vectorizer', 'features'), ('bad_word_tfidf_word_vectorizer', 'features') ], sparse_hstack_inputs), 'y': ([('xy_split', 'y')]), }, cache_dirpath=config.env.cache_dirpath) svm_output = Step(name='svm_output', transformer=Dummy(), input_steps=[svm_multi], adapter={ 'y_pred': ([('logreg_multi', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return svm_output
def logistic_regression_ensemble_train(config): model_outputs = ensemble_extraction(config) output_mappings = [(output_step.name, 'prediction_probability') for output_step in model_outputs] label = model_outputs[0].get_step('xy_train') input_steps = model_outputs + [label] logreg = Step(name='logreg_ensemble', transformer=LogisticRegressionMultilabel( **config.logistic_regression_ensemble), overwrite_transformer=True, input_steps=input_steps, adapter={ 'X': (output_mappings, hstack_inputs), 'y': ([('xy_train', 'y')]) }, cache_dirpath=config.env.cache_dirpath) logreg_ensemble_output = Step( name='logreg_ensemble_output', transformer=Dummy(), input_steps=[logreg], adapter={'y_pred': ([('logreg_ensemble', 'prediction_probability')])}, cache_dirpath=config.env.cache_dirpath) return logreg_ensemble_output
def word_lstm_train(config): preprocessed_input = train_preprocessing(config) word_tokenizer = Step(name='word_tokenizer', transformer=Tokenizer(**config.word_tokenizer), input_steps=[preprocessed_input], adapter={'X': ([('cleaning_output', 'X')]), 'X_valid': ([('cleaning_output', 'X_valid')]), 'train_mode': ([('cleaning_output', 'train_mode')]) }, cache_dirpath=config.env.cache_dirpath) word_lstm = Step(name='word_lstm', transformer=WordLSTM(**config.word_lstm_network), overwrite_transformer=True, input_steps=[word_tokenizer, preprocessed_input], adapter={'X': ([('word_tokenizer', 'X')]), 'y': ([('cleaning_output', 'y')]), 'validation_data': ( [('word_tokenizer', 'X_valid'), ('cleaning_output', 'y_valid')], to_tuple_inputs), }, cache_dirpath=config.env.cache_dirpath) word_output = Step(name='word_output', transformer=Dummy(), input_steps=[word_lstm], adapter={'y_pred': ([('word_lstm', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return word_output
def random_forest_ensemble_train(config): model_outputs = ensemble_extraction(config) output_mappings = [(output_step.name, 'prediction_probability') for output_step in model_outputs] label = model_outputs[0].get_step('xy_train') input_steps = model_outputs + [label] random_forest_ensemble = Step( name='random_forest_ensemble', transformer=RandomForestMultilabel(**config.random_forest_ensemble), overwrite_transformer=True, input_steps=input_steps, adapter={ 'X': (output_mappings, hstack_inputs), 'y': ([('xy_train', 'y')]) }, cache_dirpath=config.env.cache_dirpath) random_forest_ensemble_output = Step( name='random_forest_ensemble_output', transformer=Dummy(), input_steps=[random_forest_ensemble], adapter={ 'y_pred': ([('random_forest_ensemble', 'prediction_probability')]) }, cache_dirpath=config.env.cache_dirpath) return random_forest_ensemble_output
def _preprocessing_single_generator(config, is_train, use_patching): if use_patching: raise NotImplementedError else: if is_train: xy_train = Step(name='xy_train', transformer=XYSplit(**config.xy_splitter), input_data=['input'], adapter={ 'meta': ([('input', 'meta')]), 'train_mode': ([('input', 'train_mode')]) }, cache_dirpath=config.env.cache_dirpath) xy_inference = Step(name='xy_inference', transformer=XYSplit(**config.xy_splitter), input_data=['input'], adapter={ 'meta': ([('input', 'meta_valid')]), 'train_mode': ([('input', 'train_mode')]) }, cache_dirpath=config.env.cache_dirpath) loader = Step(name='loader', transformer=loaders.MetadataImageSegmentationLoader( **config.loader), input_data=['input'], input_steps=[xy_train, xy_inference], adapter={ 'X': ([('xy_train', 'X')], squeeze_inputs), 'y': ([('xy_train', 'y')], squeeze_inputs), 'train_mode': ([('input', 'train_mode')]), 'X_valid': ([('xy_inference', 'X')], squeeze_inputs), 'y_valid': ([('xy_inference', 'y')], squeeze_inputs), }, cache_dirpath=config.env.cache_dirpath) else: xy_inference = Step(name='xy_inference', transformer=XYSplit(**config.xy_splitter), input_data=['input'], adapter={ 'meta': ([('input', 'meta')]), 'train_mode': ([('input', 'train_mode')]) }, cache_dirpath=config.env.cache_dirpath) loader = Step(name='loader', transformer=loaders.MetadataImageSegmentationLoader( **config.loader), input_data=['input'], input_steps=[xy_inference, xy_inference], adapter={ 'X': ([('xy_inference', 'X')], squeeze_inputs), 'y': ([('xy_inference', 'y')], squeeze_inputs), 'train_mode': ([('input', 'train_mode')]), }, cache_dirpath=config.env.cache_dirpath) return loader
def char_vdcnn_inference(config): preprocessed_input = inference_preprocessing(config) char_tokenizer = Step(name='char_tokenizer', transformer=Tokenizer(**config.char_tokenizer), input_steps=[preprocessed_input], adapter={ 'X': ([('cleaning_output', 'X')]), 'train_mode': ([('cleaning_output', 'train_mode')]) }, cache_dirpath=config.env.cache_dirpath) network = Step(name='char_vdcnn', transformer=CharVDCNN(**config.char_vdcnn_network), input_steps=[char_tokenizer, preprocessed_input], adapter={ 'X': ([('char_tokenizer', 'X')]), 'y': ([('cleaning_output', 'y')]), }, cache_dirpath=config.env.cache_dirpath) char_output = Step(name='char_output', transformer=Dummy(), input_steps=[network], adapter={ 'y_pred': ([('char_vdcnn', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return char_output
def char_vdcnn(config, is_train): preprocessed_input = _preprocessing(config, is_train) char_tokenizer = _char_tokenizer(preprocessed_input, config, is_train) if is_train: network = Step(name='char_vdcnn', transformer=CharVDCNN(**config.char_vdcnn_network), overwrite_transformer=True, input_steps=[char_tokenizer, preprocessed_input], adapter={ 'X': ([('char_tokenizer', 'X')]), 'y': ([('cleaning_output', 'y')]), 'validation_data': ([('char_tokenizer', 'X_valid'), ('cleaning_output', 'y_valid')], to_tuple_inputs), }, cache_dirpath=config.env.cache_dirpath) else: network = Step(name='char_vdcnn', transformer=CharVDCNN(**config.char_vdcnn_network), input_steps=[char_tokenizer, preprocessed_input], adapter={ 'X': ([('char_tokenizer', 'X')]), 'y': ([('cleaning_output', 'y')]), }, cache_dirpath=config.env.cache_dirpath) output = Step(name='char_vdcnn_output', transformer=Dummy(), input_steps=[network], adapter={ 'y_pred': ([('char_vdcnn', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return output
def word_lstm_inference(config): preprocessed_input = inference_preprocessing(config) word_tokenizer = Step(name='word_tokenizer', transformer=Tokenizer(**config.word_tokenizer), input_steps=[preprocessed_input], adapter={ 'X': ([('cleaning_output', 'X')]), 'train_mode': ([('cleaning_output', 'train_mode')]) }, cache_dirpath=config.env.cache_dirpath) word_lstm = Step(name='word_lstm', transformer=WordLSTM(**config.word_lstm_network), input_steps=[word_tokenizer, preprocessed_input], adapter={ 'X': ([('word_tokenizer', 'X')]), 'y': ([('cleaning_output', 'y')]), }, cache_dirpath=config.env.cache_dirpath) word_output = Step(name='word_output', transformer=Dummy(), input_steps=[word_lstm], adapter={ 'y_pred': ([('word_lstm', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return word_output
def unet(config, train_mode): if train_mode: save_output = True load_saved_output = False preprocessing = preprocessing_train(config) else: save_output = True load_saved_output = False preprocessing = preprocessing_inference(config) unet = Step(name='unet', transformer=PyTorchUNet(**config.unet), input_steps=[preprocessing], cache_dirpath=config.env.cache_dirpath, save_output=save_output, load_saved_output=load_saved_output) mask_postprocessed = mask_postprocessing(unet, config, save_output=save_output) detached = nuclei_labeler(mask_postprocessed, config, save_output=save_output) output = Step(name='output', transformer=Dummy(), input_steps=[detached], adapter={ 'y_pred': ([(detached.name, 'labels')]), }, cache_dirpath=config.env.cache_dirpath) return output
def glove_dpcnn_train(config): preprocessed_input = train_preprocessing(config) word_tokenizer, glove_embeddings = glove_preprocessing_train( config, preprocessed_input) glove_dpcnn = Step( name='glove_dpcnn', transformer=GloveDPCNN(**config.glove_dpcnn_network), overwrite_transformer=True, input_steps=[word_tokenizer, preprocessed_input, glove_embeddings], adapter={ 'X': ([('word_tokenizer', 'X')]), 'y': ([('cleaning_output', 'y')]), 'embedding_matrix': ([('glove_embeddings', 'embeddings_matrix')]), 'validation_data': ([('word_tokenizer', 'X_valid'), ('cleaning_output', 'y_valid')], to_tuple_inputs), }, cache_dirpath=config.env.cache_dirpath) glove_output = Step(name='output_glove', transformer=Dummy(), input_steps=[glove_dpcnn], adapter={ 'y_pred': ([('glove_dpcnn', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return glove_output
def unet_multitask(config, train_mode): if train_mode: save_output = True load_saved_output = False preprocessing = preprocessing_multitask_train(config) else: save_output = True load_saved_output = False preprocessing = preprocessing_multitask_inference(config) unet_multitask = Step(name='unet_multitask', transformer=PyTorchUNetMultitask(**config.unet), input_steps=[preprocessing], cache_dirpath=config.env.cache_dirpath, save_output=save_output, load_saved_output=load_saved_output) mask_resize = Step(name='mask_resize', transformer=Resizer(), input_data=['input'], input_steps=[unet_multitask], adapter={ 'images': ([(unet_multitask.name, 'mask_prediction')]), 'target_sizes': ([('input', 'target_sizes')]), }, cache_dirpath=config.env.cache_dirpath, save_output=save_output) contour_resize = Step(name='contour_resize', transformer=Resizer(), input_data=['input'], input_steps=[unet_multitask], adapter={ 'images': ([(unet_multitask.name, 'contour_prediction')]), 'target_sizes': ([('input', 'target_sizes')]), }, cache_dirpath=config.env.cache_dirpath, save_output=save_output) detached = Step(name='detached', transformer=Postprocessor(), input_steps=[mask_resize, contour_resize], adapter={ 'images': ([(mask_resize.name, 'resized_images')]), 'contours': ([(contour_resize.name, 'resized_images')]), }, cache_dirpath=config.env.cache_dirpath, save_output=save_output) output = Step(name='output', transformer=Dummy(), input_steps=[detached], adapter={ 'y_pred': ([(detached.name, 'labeled_images')]), }, cache_dirpath=config.env.cache_dirpath) return output
def tfidf_svm(config): preprocessed_input = inference_preprocessing(config) tfidf_char_vectorizer, tfidf_word_vectorizer = tfidf( preprocessed_input, config) svm_multi = Step(name='svm_multi', transformer=LinearSVCMultilabel(**config.svc_multilabel), input_steps=[ preprocessed_input, tfidf_char_vectorizer, tfidf_word_vectorizer ], adapter={ 'X': ([('tfidf_char_vectorizer', 'features'), ('tfidf_word_vectorizer', 'features')], sparse_hstack_inputs), 'y': ([('cleaning_output', 'y')]), }, cache_dirpath=config.env.cache_dirpath) svm_output = Step(name='svm_output', transformer=Dummy(), input_steps=[svm_multi], adapter={ 'y_pred': ([('logreg_multi', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return svm_output
def _feature_by_type_splits(config, train_mode): if train_mode: feature_by_type_split = Step(name='feature_by_type_split', transformer=fe.DataFrameByTypeSplitter( **config.dataframe_by_type_splitter), input_data=['input'], adapter={ 'X': ([('input', 'X')]), }, cache_dirpath=config.env.cache_dirpath) feature_by_type_split_valid = Step( name='feature_by_type_split_valid', transformer=feature_by_type_split, input_data=['input'], adapter={ 'X': ([('input', 'X_valid')]), }, cache_dirpath=config.env.cache_dirpath) return feature_by_type_split, feature_by_type_split_valid else: feature_by_type_split = Step(name='feature_by_type_split', transformer=fe.DataFrameByTypeSplitter( **config.dataframe_by_type_splitter), input_data=['input'], adapter={ 'X': ([('input', 'X')]), }, cache_dirpath=config.env.cache_dirpath) return feature_by_type_split
def preprocessing_generator_padded_tta(config): xy_inference = Step(name='xy_inference', transformer=XYSplit(**config.xy_splitter), input_data=['input', 'specs'], adapter={ 'meta': ([('input', 'meta')]), 'train_mode': ([('specs', 'train_mode')]) }, cache_dirpath=config.env.cache_dirpath) tta_generator = Step(name='tta_generator', transformer=loaders.TestTimeAugmentationGenerator( **config.tta_generator), input_steps=[xy_inference], adapter={ 'X': ([('xy_inference', 'X')]), }, cache_dirpath=config.env.cache_dirpath) loader = Step( name='loader', transformer=loaders.ImageSegmentationLoaderInferencePaddingTTA( **config.loader), input_steps=[xy_inference, tta_generator], adapter={ 'X': ([(tta_generator.name, 'X_tta')], squeeze_inputs), 'tta_params': ([(tta_generator.name, 'tta_params')], squeeze_inputs), }, cache_dirpath=config.env.cache_dirpath) return loader, tta_generator