def feature_extraction_v0(config, train_mode, **kwargs): if train_mode: feature_by_type_split, feature_by_type_split_valid = _feature_by_type_splits( config, train_mode) categorical_features = Step(name='categorical_features', transformer=Dummy(), input_steps=[feature_by_type_split], adapter={ 'categorical_features': ([(feature_by_type_split.name, 'categorical_features')]), }, cache_dirpath=config.env.cache_dirpath, **kwargs) categorical_features_valid = Step( name='categorical_features_valid', transformer=Dummy(), input_steps=[feature_by_type_split_valid], adapter={ 'categorical_features': ([(feature_by_type_split_valid.name, 'categorical_features')]), }, cache_dirpath=config.env.cache_dirpath, **kwargs) feature_combiner = _join_features( numerical_features=[], numerical_features_valid=[], categorical_features=[categorical_features], categorical_features_valid=[categorical_features_valid], config=config, train_mode=train_mode, **kwargs) return feature_combiner else: feature_by_type_split = _feature_by_type_splits(config, train_mode) categorical_features = Step(name='categorical_features', transformer=Dummy(), input_steps=[feature_by_type_split], adapter={ 'categorical_features': ([(feature_by_type_split.name, 'categorical_features')]), }, cache_dirpath=config.env.cache_dirpath, **kwargs) feature_combiner = _join_features( numerical_features=[], numerical_features_valid=[], categorical_features=[categorical_features], categorical_features_valid=[], config=config, train_mode=train_mode, **kwargs) return feature_combiner
def char_vdcnn_train(config): preprocessed_input = train_preprocessing(config) char_tokenizer = Step(name='char_tokenizer', transformer=Tokenizer(**config.char_tokenizer), input_steps=[preprocessed_input], adapter={ 'X': ([('xy_split', 'X')], fetch_x_train), 'X_valid': ([('xy_split', 'validation_data')], fetch_x_valid), 'train_mode': ([('xy_split', 'train_mode')]) }, cache_dirpath=config.env.cache_dirpath) network = Step(name='char_vdcnn', transformer=CharVDCNN(**config.char_vdcnn_network), overwrite_transformer=True, input_steps=[char_tokenizer, preprocessed_input], adapter={ 'X': ([('char_tokenizer', 'X')]), 'y': ([('xy_split', 'y')]), 'validation_data': ([('char_tokenizer', 'X_valid'), ('xy_split', 'validation_data')], join_valid), }, cache_dirpath=config.env.cache_dirpath) char_output = Step(name='char_output', transformer=Dummy(), input_steps=[network], adapter={ 'y_pred': ([('char_vdcnn', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return char_output
def glove_lstm_train(config): preprocessed_input = train_preprocessing(config) word_tokenizer, glove_embeddings = glove_preprocessing_train( config, preprocessed_input) glove_lstm = Step( name='glove_lstm', transformer=GloveLSTM(**config.glove_lstm_network), overwrite_transformer=True, input_steps=[word_tokenizer, preprocessed_input, glove_embeddings], adapter={ 'X': ([('word_tokenizer', 'X')]), 'y': ([('xy_split', 'y')]), 'embedding_matrix': ([('glove_embeddings', 'embeddings_matrix')]), 'validation_data': ([('word_tokenizer', 'X_valid'), ('xy_split', 'validation_data')], join_valid), }, cache_dirpath=config.env.cache_dirpath) glove_output = Step(name='output_glove', transformer=Dummy(), input_steps=[glove_lstm], adapter={ 'y_pred': ([('glove_lstm', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return glove_output
def unet(config, train_mode): if train_mode: save_output = False load_saved_output = False else: save_output = False load_saved_output = False loader = preprocessing(config, model_type='single', is_train=train_mode) unet = Step(name='unet', transformer=PyTorchUNet(**config.unet), input_steps=[loader], cache_dirpath=config.env.cache_dirpath, save_output=save_output, load_saved_output=load_saved_output) mask_postprocessed = mask_postprocessing(unet, config, save_output=save_output) detached = multiclass_object_labeler(mask_postprocessed, config, save_output=save_output) output = Step(name='output', transformer=Dummy(), input_steps=[detached], adapter={ 'y_pred': ([(detached.name, 'labeled_images')]), }, cache_dirpath=config.env.cache_dirpath, save_output=save_output, load_saved_output=False) return output
def solution_1(config, train_mode): if train_mode: features, features_valid = feature_extraction(config, train_mode, save_output=True, cache_output=True, load_saved_output=True) light_gbm = classifier_lgbm((features, features_valid), config, train_mode) else: features = feature_extraction(config, train_mode, cache_output=True) light_gbm = classifier_lgbm(features, config, train_mode) clipper = Step(name='clipper', transformer=Clipper(**config.clipper), input_steps=[light_gbm], adapter={ 'prediction': ([(light_gbm.name, 'prediction')]), }, cache_dirpath=config.env.cache_dirpath) output = Step(name='output', transformer=Dummy(), input_steps=[clipper], adapter={ 'y_pred': ([(clipper.name, 'clipped_prediction')]), }, cache_dirpath=config.env.cache_dirpath) return output
def unet(config, train_mode): if train_mode: save_output = False load_saved_output = False else: save_output = False load_saved_output = False loader = preprocessing(config, model_type='single', is_train=train_mode) unet = Step(name='unet', transformer=PyTorchUNetStream(**config.unet) if config.execution.stream_mode else PyTorchUNet( **config.unet), input_steps=[loader], cache_dirpath=config.env.cache_dirpath, save_output=save_output, load_saved_output=load_saved_output) mask_postprocessed = mask_postprocessing(loader, unet, config, save_output=save_output) output = Step(name='output', transformer=Dummy(), input_steps=[mask_postprocessed], adapter={'y_pred': ([(mask_postprocessed.name, 'images')]), 'y_scores': ([(mask_postprocessed.name, 'scores')]) }, cache_dirpath=config.env.cache_dirpath, save_output=save_output, load_saved_output=False) return output
def bad_word_count_features_svm(config): preprocessed_input = inference_preprocessing(config) normalizer = count_features(config) xy_split = normalizer.get_step('xy_split') tfidf_word_vectorizer = bad_word_tfidf(preprocessed_input, config) svm_multi = Step(name='svm_multi', transformer=LinearSVCMultilabel(**config.svc_multilabel), input_steps=[xy_split, normalizer, tfidf_word_vectorizer], adapter={ 'X': ([('normalizer', 'X'), ('bad_word_tfidf_word_vectorizer', 'features') ], sparse_hstack_inputs), 'y': ([('xy_split', 'y')]), }, cache_dirpath=config.env.cache_dirpath) svm_output = Step(name='svm_output', transformer=Dummy(), input_steps=[svm_multi], adapter={ 'y_pred': ([('svm_multi', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return svm_output
def glove_dpcnn_train(config): preprocessed_input = train_preprocessing(config) word_tokenizer, glove_embeddings = glove_preprocessing_train( config, preprocessed_input) glove_dpcnn = Step( name='glove_dpcnn', transformer=GloveDPCNN(**config.glove_dpcnn_network), overwrite_transformer=True, input_steps=[word_tokenizer, preprocessed_input, glove_embeddings], adapter={ 'X': ([('word_tokenizer', 'X')]), 'y': ([('cleaning_output', 'y')]), 'embedding_matrix': ([('glove_embeddings', 'embeddings_matrix')]), 'validation_data': ([('word_tokenizer', 'X_valid'), ('cleaning_output', 'y_valid')], to_tuple_inputs), }, cache_dirpath=config.env.cache_dirpath) glove_output = Step(name='output_glove', transformer=Dummy(), input_steps=[glove_dpcnn], adapter={ 'y_pred': ([('glove_dpcnn', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return glove_output
def tfidf_svm(config): preprocessed_input = inference_preprocessing(config) tfidf_char_vectorizer, tfidf_word_vectorizer = tfidf( preprocessed_input, config) svm_multi = Step(name='svm_multi', transformer=LinearSVCMultilabel(**config.svc_multilabel), input_steps=[ preprocessed_input, tfidf_char_vectorizer, tfidf_word_vectorizer ], adapter={ 'X': ([('tfidf_char_vectorizer', 'features'), ('tfidf_word_vectorizer', 'features')], sparse_hstack_inputs), 'y': ([('cleaning_output', 'y')]), }, cache_dirpath=config.env.cache_dirpath) svm_output = Step(name='svm_output', transformer=Dummy(), input_steps=[svm_multi], adapter={ 'y_pred': ([('logreg_multi', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return svm_output
def word_lstm_inference(config): preprocessed_input = inference_preprocessing(config) word_tokenizer = Step(name='word_tokenizer', transformer=Tokenizer(**config.word_tokenizer), input_steps=[preprocessed_input], adapter={ 'X': ([('cleaning_output', 'X')]), 'train_mode': ([('cleaning_output', 'train_mode')]) }, cache_dirpath=config.env.cache_dirpath) word_lstm = Step(name='word_lstm', transformer=WordLSTM(**config.word_lstm_network), input_steps=[word_tokenizer, preprocessed_input], adapter={ 'X': ([('word_tokenizer', 'X')]), 'y': ([('cleaning_output', 'y')]), }, cache_dirpath=config.env.cache_dirpath) word_output = Step(name='word_output', transformer=Dummy(), input_steps=[word_lstm], adapter={ 'y_pred': ([('word_lstm', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return word_output
def char_vdcnn_inference(config): preprocessed_input = inference_preprocessing(config) char_tokenizer = Step(name='char_tokenizer', transformer=Tokenizer(**config.char_tokenizer), input_steps=[preprocessed_input], adapter={ 'X': ([('cleaning_output', 'X')]), 'train_mode': ([('cleaning_output', 'train_mode')]) }, cache_dirpath=config.env.cache_dirpath) network = Step(name='char_vdcnn', transformer=CharVDCNN(**config.char_vdcnn_network), input_steps=[char_tokenizer, preprocessed_input], adapter={ 'X': ([('char_tokenizer', 'X')]), 'y': ([('cleaning_output', 'y')]), }, cache_dirpath=config.env.cache_dirpath) char_output = Step(name='char_output', transformer=Dummy(), input_steps=[network], adapter={ 'y_pred': ([('char_vdcnn', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return char_output
def random_forest_ensemble_train(config): model_outputs = ensemble_extraction(config) output_mappings = [(output_step.name, 'prediction_probability') for output_step in model_outputs] label = model_outputs[0].get_step('xy_train') input_steps = model_outputs + [label] random_forest_ensemble = Step( name='random_forest_ensemble', transformer=RandomForestMultilabel(**config.random_forest_ensemble), overwrite_transformer=True, input_steps=input_steps, adapter={ 'X': (output_mappings, hstack_inputs), 'y': ([('xy_train', 'y')]) }, cache_dirpath=config.env.cache_dirpath) random_forest_ensemble_output = Step( name='random_forest_ensemble_output', transformer=Dummy(), input_steps=[random_forest_ensemble], adapter={ 'y_pred': ([('random_forest_ensemble', 'prediction_probability')]) }, cache_dirpath=config.env.cache_dirpath) return random_forest_ensemble_output
def hand_crafted_all_svm(config): xy_split, normalizer, char_vector, word_vector, bad_word_vector = hand_crafted_all( config) svm_multi = Step(name='svm_multi', transformer=LinearSVCMultilabel(**config.svc_multilabel), input_steps=[ xy_split, normalizer, char_vector, word_vector, bad_word_vector ], adapter={ 'X': ([('normalizer', 'X'), ('tfidf_char_vectorizer', 'features'), ('tfidf_word_vectorizer', 'features'), ('bad_word_tfidf_word_vectorizer', 'features') ], sparse_hstack_inputs), 'y': ([('xy_split', 'y')]), }, cache_dirpath=config.env.cache_dirpath) svm_output = Step(name='svm_output', transformer=Dummy(), input_steps=[svm_multi], adapter={ 'y_pred': ([('logreg_multi', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return svm_output
def tfidf_logreg(config): preprocessed_input = _preprocessing(config, is_train=False) tfidf_char_vectorizer, tfidf_word_vectorizer = _tfidf( preprocessed_input, config) tfidf_logreg = Step(name='tfidf_logreg', transformer=LogisticRegressionMultilabel( **config.logistic_regression_multilabel), input_steps=[ preprocessed_input, tfidf_char_vectorizer, tfidf_word_vectorizer ], adapter={ 'X': ([('tfidf_char_vectorizer', 'features'), ('tfidf_word_vectorizer', 'features')], sparse_hstack_inputs), 'y': ([('cleaning_output', 'y')]), }, cache_dirpath=config.env.cache_dirpath) output = Step(name='tfidf_logreg_output', transformer=Dummy(), input_steps=[tfidf_logreg], adapter={ 'y_pred': ([('tfidf_logreg', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return output
def unet(config, train_mode): save_output = False load_saved_output = False loader = preprocessing_generator(config, is_train=train_mode) unet = Step(name='unet', transformer=PyTorchUNetStream(**config.unet) if config.execution.stream_mode else PyTorchUNet(**config.unet), input_data=['callback_input'], input_steps=[loader], cache_dirpath=config.env.cache_dirpath, save_output=save_output, load_saved_output=load_saved_output) mask_postprocessed = mask_postprocessing(loader, unet, config, save_output=save_output) output = Step(name='output', transformer=Dummy(), input_steps=[mask_postprocessed], adapter={ 'y_pred': ([(mask_postprocessed.name, 'images_with_scores')]), }, cache_dirpath=config.env.cache_dirpath, save_output=save_output, load_saved_output=False) return output
def unet(config, train_mode): if train_mode: save_output = True load_saved_output = False preprocessing = preprocessing_train(config) else: save_output = True load_saved_output = False preprocessing = preprocessing_inference(config) unet = Step(name='unet', transformer=PyTorchUNet(**config.unet), input_steps=[preprocessing], cache_dirpath=config.env.cache_dirpath, save_output=save_output, load_saved_output=load_saved_output) mask_postprocessed = mask_postprocessing(unet, config, save_output=save_output) detached = nuclei_labeler(mask_postprocessed, config, save_output=save_output) output = Step(name='output', transformer=Dummy(), input_steps=[detached], adapter={ 'y_pred': ([(detached.name, 'labels')]), }, cache_dirpath=config.env.cache_dirpath) return output
def unet_multitask(config, train_mode): if train_mode: save_output = True load_saved_output = False preprocessing = preprocessing_multitask_train(config) else: save_output = True load_saved_output = False preprocessing = preprocessing_multitask_inference(config) unet_multitask = Step(name='unet_multitask', transformer=PyTorchUNetMultitask(**config.unet), input_steps=[preprocessing], cache_dirpath=config.env.cache_dirpath, save_output=save_output, load_saved_output=load_saved_output) mask_resize = Step(name='mask_resize', transformer=Resizer(), input_data=['input'], input_steps=[unet_multitask], adapter={ 'images': ([(unet_multitask.name, 'mask_prediction')]), 'target_sizes': ([('input', 'target_sizes')]), }, cache_dirpath=config.env.cache_dirpath, save_output=save_output) contour_resize = Step(name='contour_resize', transformer=Resizer(), input_data=['input'], input_steps=[unet_multitask], adapter={ 'images': ([(unet_multitask.name, 'contour_prediction')]), 'target_sizes': ([('input', 'target_sizes')]), }, cache_dirpath=config.env.cache_dirpath, save_output=save_output) detached = Step(name='detached', transformer=Postprocessor(), input_steps=[mask_resize, contour_resize], adapter={ 'images': ([(mask_resize.name, 'resized_images')]), 'contours': ([(contour_resize.name, 'resized_images')]), }, cache_dirpath=config.env.cache_dirpath, save_output=save_output) output = Step(name='output', transformer=Dummy(), input_steps=[detached], adapter={ 'y_pred': ([(detached.name, 'labeled_images')]), }, cache_dirpath=config.env.cache_dirpath) return output
def bad_word_count_features_logreg(config): preprocessed_input = _preprocessing(config, is_train=False) normalizer = _count_features(config) xy_split = normalizer.get_step('xy_split') tfidf_word_vectorizer = _bad_word_tfidf(preprocessed_input, config) bad_word_count_logreg = Step( name='bad_word_count_logreg', transformer=LogisticRegressionMultilabel( **config.logistic_regression_multilabel), input_steps=[xy_split, normalizer, tfidf_word_vectorizer], adapter={ 'X': ([('normalizer', 'X'), ('bad_word_tfidf_word_vectorizer', 'features')], sparse_hstack_inputs), 'y': ([('xy_split', 'y')]), }, cache_dirpath=config.env.cache_dirpath) output = Step(name='bad_word_count_features_logreg_output', transformer=Dummy(), input_steps=[bad_word_count_logreg], adapter={ 'y_pred': ([('bad_word_count_logreg', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return output
def char_vdcnn(config, is_train): preprocessed_input = _preprocessing(config, is_train) char_tokenizer = _char_tokenizer(preprocessed_input, config, is_train) if is_train: network = Step(name='char_vdcnn', transformer=CharVDCNN(**config.char_vdcnn_network), overwrite_transformer=True, input_steps=[char_tokenizer, preprocessed_input], adapter={ 'X': ([('char_tokenizer', 'X')]), 'y': ([('cleaning_output', 'y')]), 'validation_data': ([('char_tokenizer', 'X_valid'), ('cleaning_output', 'y_valid')], to_tuple_inputs), }, cache_dirpath=config.env.cache_dirpath) else: network = Step(name='char_vdcnn', transformer=CharVDCNN(**config.char_vdcnn_network), input_steps=[char_tokenizer, preprocessed_input], adapter={ 'X': ([('char_tokenizer', 'X')]), 'y': ([('cleaning_output', 'y')]), }, cache_dirpath=config.env.cache_dirpath) output = Step(name='char_vdcnn_output', transformer=Dummy(), input_steps=[network], adapter={ 'y_pred': ([('char_vdcnn', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return output
def logistic_regression_ensemble_train(config): model_outputs = ensemble_extraction(config) output_mappings = [(output_step.name, 'prediction_probability') for output_step in model_outputs] label = model_outputs[0].get_step('xy_train') input_steps = model_outputs + [label] logreg = Step(name='logreg_ensemble', transformer=LogisticRegressionMultilabel( **config.logistic_regression_ensemble), overwrite_transformer=True, input_steps=input_steps, adapter={ 'X': (output_mappings, hstack_inputs), 'y': ([('xy_train', 'y')]) }, cache_dirpath=config.env.cache_dirpath) logreg_ensemble_output = Step( name='logreg_ensemble_output', transformer=Dummy(), input_steps=[logreg], adapter={'y_pred': ([('logreg_ensemble', 'prediction_probability')])}, cache_dirpath=config.env.cache_dirpath) return logreg_ensemble_output
def inference_preprocessing(config): xy_train = Step(name='xy_train', transformer=XYSplit(**config.xy_splitter), input_data=['input'], adapter={'meta': ([('input', 'meta')]), 'train_mode': ([('input', 'train_mode')]) }, cache_dirpath=config.env.cache_dirpath) text_cleaner = Step(name='text_cleaner_train', transformer=TextCleaner(**config.text_cleaner), input_steps=[xy_train], adapter={'X': ([('xy_train', 'X')])}, cache_dirpath=config.env.cache_dirpath) cleaning_output = Step(name='cleaning_output', transformer=Dummy(), input_data=['input'], input_steps=[xy_train, text_cleaner], adapter={'X': ([('text_cleaner_train', 'X')]), 'y': ([('xy_train', 'y')]), 'train_mode': ([('input', 'train_mode')]), }, cache_dirpath=config.env.cache_dirpath) return cleaning_output
def word_lstm_train(config): preprocessed_input = train_preprocessing(config) word_tokenizer = Step(name='word_tokenizer', transformer=Tokenizer(**config.word_tokenizer), input_steps=[preprocessed_input], adapter={'X': ([('cleaning_output', 'X')]), 'X_valid': ([('cleaning_output', 'X_valid')]), 'train_mode': ([('cleaning_output', 'train_mode')]) }, cache_dirpath=config.env.cache_dirpath) word_lstm = Step(name='word_lstm', transformer=WordLSTM(**config.word_lstm_network), overwrite_transformer=True, input_steps=[word_tokenizer, preprocessed_input], adapter={'X': ([('word_tokenizer', 'X')]), 'y': ([('cleaning_output', 'y')]), 'validation_data': ( [('word_tokenizer', 'X_valid'), ('cleaning_output', 'y_valid')], to_tuple_inputs), }, cache_dirpath=config.env.cache_dirpath) word_output = Step(name='word_output', transformer=Dummy(), input_steps=[word_lstm], adapter={'y_pred': ([('word_lstm', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return word_output
def seq_conv_train(config): xy_train = Step(name='xy_train', transformer=XYSplit(**config.xy_splitter), input_data=['input'], adapter={'meta': ([('input', 'meta')]), 'train_mode': ([('input', 'train_mode')]) }, cache_dirpath=config.env.cache_dirpath) xy_inference = Step(name='xy_inference', transformer=XYSplit(**config.xy_splitter), input_data=['input'], adapter={'meta': ([('input', 'meta_valid')]), 'train_mode': ([('input', 'train_mode')]) }, cache_dirpath=config.env.cache_dirpath) loader_train = Step(name='loader', transformer=MetadataImageSegmentationLoader(**config.loader), input_data=['input'], input_steps=[xy_train, xy_inference], adapter={'X': ([('xy_train', 'X')], squeeze_inputs), 'y': ([('xy_train', 'y')], squeeze_inputs), 'train_mode': ([('input', 'train_mode')]), 'X_valid': ([('xy_inference', 'X')], squeeze_inputs), 'y_valid': ([('xy_inference', 'y')], squeeze_inputs), }, cache_dirpath=config.env.cache_dirpath) sequential_convnet = Step(name='sequential_convnet', transformer=SequentialConvNet(**config.sequential_convnet), input_steps=[loader_train], cache_dirpath=config.env.cache_dirpath) mask_resize = Step(name='mask_resize', transformer=Resizer(), input_data=['input'], input_steps=[sequential_convnet], adapter={'images': ([('sequential_convnet', 'predicted_masks')]), 'target_sizes': ([('input', 'target_sizes')]), }, cache_dirpath=config.env.cache_dirpath) thresholding = Step(name='thresholding', transformer=Thresholder(**config.thresholder), input_steps=[mask_resize], adapter={'images': ([('mask_resize', 'resized_images')]), }, cache_dirpath=config.env.cache_dirpath) output = Step(name='output', transformer=Dummy(), input_steps=[thresholding], adapter={'y_pred': ([('thresholding', 'binarized_images')]), }, cache_dirpath=config.env.cache_dirpath) return output
def unet_padded(config): save_output = False unet_pipeline = unet(config, train_mode=False).get_step('unet') loader = unet_pipeline.get_step("loader") loader.transformer = loaders.ImageSegmentationLoaderInferencePadding( **config.loader) prediction_crop = Step(name='prediction_crop', transformer=post.PredictionCropStream( **config.postprocessor.prediction_crop) if config.execution.stream_mode \ else post.PredictionCrop(**config.postprocessor.prediction_crop), input_steps=[unet_pipeline], adapter={'images': ([(unet_pipeline.name, 'multichannel_map_prediction')]), }, cache_dirpath=config.env.cache_dirpath, save_output=save_output) prediction_renamed = Step(name='prediction_renamed', transformer=Dummy(), input_steps=[prediction_crop], adapter={ 'multichannel_map_prediction': ([(prediction_crop.name, 'cropped_images')]), }, cache_dirpath=config.env.cache_dirpath, save_output=save_output) mask_postprocessed = mask_postprocessing(loader, prediction_renamed, config, save_output=save_output) output = Step(name='output', transformer=Dummy(), input_steps=[mask_postprocessed], adapter={ 'y_pred': ([(mask_postprocessed.name, 'images_with_scores')]), }, cache_dirpath=config.env.cache_dirpath, save_output=save_output) return output
def _price_features(dispatchers, config, train_mode, **kwargs): if train_mode: feature_by_type_split, feature_by_type_split_valid = dispatchers price_features = Step(name='price_features', transformer=Dummy(), input_steps=[feature_by_type_split], adapter={ 'numerical_features': ([(feature_by_type_split.name, 'numerical_features')]) }, cache_dirpath=config.env.cache_dirpath, **kwargs) price_features_valid = Step(name='price_features_valid', transformer=price_features, input_steps=[feature_by_type_split_valid], adapter={ 'numerical_features': ([(feature_by_type_split_valid.name, 'numerical_features')]) }, cache_dirpath=config.env.cache_dirpath, **kwargs) return price_features, price_features_valid else: feature_by_type_split = dispatchers price_features = Step(name='price_features', transformer=Dummy(), input_steps=[feature_by_type_split], adapter={ 'numerical_features': ([(feature_by_type_split.name, 'numerical_features')]) }, cache_dirpath=config.env.cache_dirpath, **kwargs) return price_features
def postprocessing__pipeline_simplified(cache_dirpath): mask_resize = Step(name='mask_resize', transformer=post.Resizer(), input_data=['unet_output', 'callback_input'], adapter={ 'images': ([('unet_output', 'multichannel_map_prediction')]), 'target_sizes': ([('callback_input', 'target_sizes')]), }, cache_dirpath=cache_dirpath) category_mapper = Step(name='category_mapper', transformer=post.CategoryMapper(), input_steps=[mask_resize], adapter={ 'images': ([('mask_resize', 'resized_images')]), }, cache_dirpath=cache_dirpath) labeler = Step(name='labeler', transformer=post.MulticlassLabeler(), input_steps=[category_mapper], adapter={ 'images': ([(category_mapper.name, 'categorized_images')]), }, cache_dirpath=cache_dirpath) score_builder = Step(name='score_builder', transformer=post.ScoreBuilder(), input_steps=[labeler, mask_resize], adapter={ 'images': ([(labeler.name, 'labeled_images')]), 'probabilities': ([(mask_resize.name, 'resized_images')]), }, cache_dirpath=cache_dirpath) output = Step(name='output', transformer=Dummy(), input_steps=[score_builder], adapter={ 'y_pred': ([(score_builder.name, 'images_with_scores')]), }, cache_dirpath=cache_dirpath) return output
def word2vec_dpcnn(config, is_train): preprocessed_input = _preprocessing(config, is_train) word_tokenizer = _word_tokenizer(preprocessed_input, config, is_train) word2vec_embeddings = _word2vec_embeddings(word_tokenizer, config) if is_train: word2vec_dpcnn = Step(name='word2vec_dpcnn', transformer=WordDPCNN(**config.dpcnn_network), overwrite_transformer=True, input_steps=[ word_tokenizer, preprocessed_input, word2vec_embeddings ], adapter={ 'X': ([('word_tokenizer', 'X')]), 'y': ([('cleaning_output', 'y')]), 'embedding_matrix': ([('word2vec_embeddings', 'embeddings_matrix')]), 'validation_data': ([ ('word_tokenizer', 'X_valid'), ('cleaning_output', 'y_valid') ], to_tuple_inputs), }, cache_dirpath=config.env.cache_dirpath) else: word2vec_dpcnn = Step(name='word2vec_dpcnn', transformer=WordDPCNN(**config.dpcnn_network), input_steps=[ word_tokenizer, preprocessed_input, word2vec_embeddings ], adapter={ 'X': ([('word_tokenizer', 'X')]), 'y': ([('cleaning_output', 'y')]), 'embedding_matrix': ([('word2vec_embeddings', 'embeddings_matrix')]), }, cache_dirpath=config.env.cache_dirpath) output = Step(name='word2vec_dpcnn_output', transformer=Dummy(), input_steps=[word2vec_dpcnn], adapter={ 'y_pred': ([('word2vec_dpcnn', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return output
def unet_inference(config): xy_inference = Step(name='xy_inference', transformer=XYSplit(**config.xy_splitter), input_data=['input'], adapter={'meta': ([('input', 'meta')]), 'train_mode': ([('input', 'train_mode')]) }, cache_dirpath=config.env.cache_dirpath) loader_inference = Step(name='loader', transformer=MetadataImageSegmentationLoader(**config.loader), input_data=['input'], input_steps=[xy_inference, xy_inference], adapter={'X': ([('xy_inference', 'X')], squeeze_inputs), 'y': ([('xy_inference', 'y')], squeeze_inputs), 'train_mode': ([('input', 'train_mode')]), }, cache_dirpath=config.env.cache_dirpath) unet_network = Step(name='unet_network', transformer=PyTorchUNet(**config.unet_network), input_steps=[loader_inference], cache_dirpath=config.env.cache_dirpath) mask_resize = Step(name='mask_resize', transformer=Resizer(), input_data=['input'], input_steps=[unet_network], adapter={'images': ([('unet_network', 'predicted_masks')]), 'target_sizes': ([('input', 'target_sizes')]), }, cache_dirpath=config.env.cache_dirpath) thresholding = Step(name='thresholding', transformer=Thresholder(**config.thresholder), input_steps=[mask_resize], adapter={'images': ([('mask_resize', 'resized_images')]), }, cache_dirpath=config.env.cache_dirpath) output = Step(name='output', transformer=Dummy(), input_steps=[thresholding], adapter={'y_pred': ([('thresholding', 'binarized_images')]), }, cache_dirpath=config.env.cache_dirpath) return output
def baseline(config, train_mode): if train_mode: features, features_valid = feature_extraction_v0(config, train_mode) light_gbm = classifier_lgbm((features, features_valid), config, train_mode) else: features = feature_extraction_v0(config, train_mode) light_gbm = classifier_lgbm(features, config, train_mode) output = Step(name='output', transformer=Dummy(), input_steps=[light_gbm], adapter={ 'y_pred': ([(light_gbm.name, 'prediction')]), }, cache_dirpath=config.env.cache_dirpath) return output
def fasttext_lstm(config, is_train): preprocessed_input = _preprocessing(config, is_train) word_tokenizer = _word_tokenizer(preprocessed_input, config, is_train) fasttext_embeddings = _fasttext_embeddings(word_tokenizer, config) if is_train: fasttext_lstm = Step(name='fasttext_lstm', transformer=WordCuDNNLSTM(**config.lstm_network), overwrite_transformer=True, input_steps=[ word_tokenizer, preprocessed_input, fasttext_embeddings ], adapter={ 'X': ([('word_tokenizer', 'X')]), 'y': ([('cleaning_output', 'y')]), 'embedding_matrix': ([('fasttext_embeddings', 'embeddings_matrix')]), 'validation_data': ([ ('word_tokenizer', 'X_valid'), ('cleaning_output', 'y_valid') ], to_tuple_inputs), }, cache_dirpath=config.env.cache_dirpath) else: fasttext_lstm = Step(name='fasttext_lstm', transformer=WordCuDNNLSTM(**config.lstm_network), input_steps=[ word_tokenizer, preprocessed_input, fasttext_embeddings ], adapter={ 'X': ([('word_tokenizer', 'X')]), 'y': ([('cleaning_output', 'y')]), 'embedding_matrix': ([('fasttext_embeddings', 'embeddings_matrix')]), }, cache_dirpath=config.env.cache_dirpath) output = Step(name='fasttext_lstm_output', transformer=Dummy(), input_steps=[fasttext_lstm], adapter={ 'y_pred': ([('fasttext_lstm', 'prediction_probability')]), }, cache_dirpath=config.env.cache_dirpath) return output