def bde(ctx): delete_cached_model() click.echo("Decision engine cache deleted") delete_previous_analysis_reports() click.echo("Analysis reports deleted") get_decision_engine(get_ml_data()) click.echo("Decision engine created")
def dea(ctx): delete_previous_analysis_reports() click.echo("Analysis reports deleted") ml_data = get_ml_data() decision_engine = get_decision_engine(ml_data) analyzer = DecisionEngineAnalyzer(decision_engine, ml_data) analyzer.create_analysis_reports() click.echo("Reports created in directory %s" % ANALYSIS_RESULTS_DIR)
from sklearn.pipeline import Pipeline from sklearn.ensemble import RandomForestClassifier from sklearn.grid_search import GridSearchCV from sklearn.cross_validation import train_test_split from sklearn.preprocessing import LabelBinarizer from data_processing.ml_data_prepairer import get_ml_data from models.preprocess_pipeline import CongestiveHeartFailurePreprocessor data = get_ml_data() treatment_label_binarizer = LabelBinarizer() preprocessor = CongestiveHeartFailurePreprocessor(False) preprocessor.fit(data) transformed_data = preprocessor.transform(data) y = treatment_label_binarizer.fit_transform(data.treatment.values) X_train, X_test, y_train, y_test = train_test_split(transformed_data, y, test_size=0.3) pipeline = Pipeline([ ('rf', RandomForestClassifier()) ]) param_grid = [{ 'rf__max_depth': list(range(9, 20)), 'rf__n_estimators': list(range(45, 70, 5)), 'rf__criterion': ["gini", "entropy"], "rf__max_features": ["auto", None] }]
from sklearn.pipeline import Pipeline from sklearn.ensemble import RandomForestClassifier from sklearn.grid_search import GridSearchCV from sklearn.cross_validation import train_test_split from sklearn.preprocessing import LabelBinarizer from data_processing.ml_data_prepairer import get_ml_data from models.preprocess_pipeline import CongestiveHeartFailurePreprocessor data = get_ml_data() treatment_label_binarizer = LabelBinarizer() preprocessor = CongestiveHeartFailurePreprocessor(False) preprocessor.fit(data) transformed_data = preprocessor.transform(data) y = treatment_label_binarizer.fit_transform(data.treatment.values) X_train, X_test, y_train, y_test = train_test_split(transformed_data, y, test_size=0.3) pipeline = Pipeline([('rf', RandomForestClassifier())]) param_grid = [{ 'rf__max_depth': list(range(9, 20)), 'rf__n_estimators': list(range(45, 70, 5)), 'rf__criterion': ["gini", "entropy"], "rf__max_features": ["auto", None] }]
def pd(ctx): # When building a new dataset, we should clear all cache since the models and analysis are no longer valid _all_clean() get_ml_data() click.echo("New dataset built")
import logging from data_processing.ml_data_prepairer import get_ml_data from models.build_decision_engine import get_decision_engine from models.analysis.decision_engine_analyzer import DecisionEngineAnalyzer logger = logging.getLogger() logger.setLevel(logging.INFO) congestive_heart_failure_data = get_ml_data() decision_engine = get_decision_engine(congestive_heart_failure_data) decision_engine_analyzer = DecisionEngineAnalyzer(decision_engine, congestive_heart_failure_data) print("Important Features for outcome prediction") print(decision_engine.get_outcome_feature_importance().sort_values('importance', ascending=False)) print("Important Features for actual treatment prediction") print(decision_engine.get_actual_treatment_feature_importance().sort_values('importance', ascending=False)) recommended_treatment_overview = decision_engine_analyzer.get_recommended_treatment_overview() print('Recommended treatment overview') print(recommended_treatment_overview) outcome_changes = decision_engine_analyzer.get_outcome_change_by_recommended_and_actual_treatment() print('Recommended treatment counts per actual treatment') print(outcome_changes) top_treatment_improvements = outcome_changes[(outcome_changes.counts > 20) & (outcome_changes.survival_rate_improvement > 0.025)] print("Top opportunities for treatment improvements")
import logging from data_processing.ml_data_prepairer import get_ml_data from models.build_decision_engine import get_decision_engine from models.analysis.decision_engine_analyzer import DecisionEngineAnalyzer logger = logging.getLogger() logger.setLevel(logging.INFO) congestive_heart_failure_data = get_ml_data() decision_engine = get_decision_engine(congestive_heart_failure_data) decision_engine_analyzer = DecisionEngineAnalyzer( decision_engine, congestive_heart_failure_data) print("Important Features for outcome prediction") print(decision_engine.get_outcome_feature_importance().sort_values( 'importance', ascending=False)) print("Important Features for actual treatment prediction") print(decision_engine.get_actual_treatment_feature_importance().sort_values( 'importance', ascending=False)) recommended_treatment_overview = decision_engine_analyzer.get_recommended_treatment_overview( ) print('Recommended treatment overview') print(recommended_treatment_overview) outcome_changes = decision_engine_analyzer.get_outcome_change_by_recommended_and_actual_treatment( )