コード例 #1
0
def test_save_fig_to_path():
    """
    Tests whether the .py version of the inspector works
    """
    extracted_dag = get_expected_dag_adult_easy_py()

    filename = os.path.join(str(get_project_root()), "test", "pipelines",
                            "adult_easy.png")
    save_fig_to_path(extracted_dag, filename)

    assert os.path.isfile(filename)
コード例 #2
0
def test_save_fig_to_path():
    """
    Tests whether the .py version of the inspector works
    """
    extracted_dag = get_expected_dag_adult_easy("<string-source>")

    filename = os.path.join(str(get_project_root()), "example_pipelines",
                            "adult_simple", "adult_simple.png")
    save_fig_to_path(extracted_dag, filename)

    assert os.path.isfile(filename)
コード例 #3
0
"""
COMPAS pipeline
"""
import os

import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, KBinsDiscretizer, label_binarize

from mlinspect.utils import get_project_root

train_file = os.path.join(str(get_project_root()), "experiments",
                          "user_interviews", "compas_train_modified.csv")
train = pd.read_csv(train_file, na_values='?', index_col=0)
test_file = os.path.join(str(get_project_root()), "example_pipelines",
                         "compas", "compas_test.csv")
test = pd.read_csv(test_file, na_values='?', index_col=0)

train = train[[
    'sex', 'dob', 'age', 'c_charge_degree', 'race', 'score_text',
    'priors_count', 'days_b_screening_arrest', 'decile_score', 'is_recid',
    'two_year_recid', 'c_jail_in', 'c_jail_out'
]]
test = test[[
    'sex', 'dob', 'age', 'c_charge_degree', 'race', 'score_text',
    'priors_count', 'days_b_screening_arrest', 'decile_score', 'is_recid',
    'two_year_recid', 'c_jail_in', 'c_jail_out'
]]
コード例 #4
0
"""
Tests whether the MaterializeFirstRowsInspection works
"""

import os

from testfixtures import compare, RangeComparison
from numpy.ma import array

from mlinspect.inspections.inspection_input import InspectionInputRow
from mlinspect.inspections.materialize_first_rows_inspection import MaterializeFirstRowsInspection
from mlinspect.instrumentation.dag_node import DagNode, OperatorType, CodeReference
from mlinspect.pipeline_inspector import PipelineInspector
from mlinspect.utils import get_project_root

FILE_PY = os.path.join(str(get_project_root()), "test", "pipelines", "adult_easy.py")


def test_materialize_first_rows_inspection():
    """
    Tests whether the MaterializeFirstRowsInspection works
    """
    inspector_result = PipelineInspector \
        .on_pipeline_from_py_file(FILE_PY) \
        .add_inspection(MaterializeFirstRowsInspection(2)) \
        .execute()
    inspection_result = inspector_result.inspection_to_annotations
    assert MaterializeFirstRowsInspection(2) in inspection_result
    result = inspection_result[MaterializeFirstRowsInspection(2)]

    compare(result, get_expected_result())
コード例 #5
0
ファイル: compas.py プロジェクト: stefan-grafberger/mlinspect
"""
An example pipeline
"""
import os

import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, KBinsDiscretizer, label_binarize

from mlinspect.utils import get_project_root

train_file = os.path.join(str(get_project_root()), "example_pipelines", "compas", "compas_train.csv")
train_data = pd.read_csv(train_file, na_values='?', index_col=0)
test_file = os.path.join(str(get_project_root()), "example_pipelines", "compas", "compas_test.csv")
test_data = pd.read_csv(test_file, na_values='?', index_col=0)

train_data = train_data[
    ['sex', 'dob', 'age', 'c_charge_degree', 'race', 'score_text', 'priors_count', 'days_b_screening_arrest',
     'decile_score', 'is_recid', 'two_year_recid', 'c_jail_in', 'c_jail_out']]
test_data = test_data[
    ['sex', 'dob', 'age', 'c_charge_degree', 'race', 'score_text', 'priors_count', 'days_b_screening_arrest',
     'decile_score', 'is_recid', 'two_year_recid', 'c_jail_in', 'c_jail_out']]

train_data = train_data[(train_data['days_b_screening_arrest'] <= 30) & (train_data['days_b_screening_arrest'] >= -30)]
train_data = train_data[train_data['is_recid'] != -1]
train_data = train_data[train_data['c_charge_degree'] != "O"]
train_data = train_data[train_data['score_text'] != 'N/A']
コード例 #6
0
"""
Tests whether the adult_easy test pipeline works
"""
import ast
import os
import nbformat
from nbconvert import PythonExporter
from mlinspect.utils import get_project_root

FILE_PY = os.path.join(str(get_project_root()), "test", "pipelines",
                       "adult_easy.py")
FILE_NB = os.path.join(str(get_project_root()), "test", "pipelines",
                       "adult_easy.ipynb")


def test_py_pipeline_runs():
    """
    Tests whether the .py version of the pipeline works
    """
    with open(FILE_PY) as file:
        text = file.read()
        parsed_ast = ast.parse(text)
        exec(compile(parsed_ast, filename="<ast>", mode="exec"))


def test_nb_pipeline_runs():
    """
    Tests whether the .ipynb version of the pipeline works
    """
    with open(FILE_NB) as file:
        notebook = nbformat.reads(file.read(), nbformat.NO_CONVERT)
コード例 #7
0
"""
Some useful utils for the project
"""
import os

from mlinspect.utils import get_project_root

ADULT_SIMPLE_PY = os.path.join(str(get_project_root()), "example_pipelines",
                               "adult_simple", "adult_simple.py")
ADULT_SIMPLE_IPYNB = os.path.join(str(get_project_root()), "example_pipelines",
                                  "adult_simple", "adult_simple.ipynb")
ADULT_SIMPLE_PNG = os.path.join(str(get_project_root()), "example_pipelines",
                                "adult_simple", "adult_simple.png")

ADULT_COMPLEX_PY = os.path.join(str(get_project_root()), "example_pipelines",
                                "adult_complex", "adult_complex.py")
ADULT_COMPLEX_PNG = os.path.join(str(get_project_root()), "example_pipelines",
                                 "adult_complex", "adult_complex.png")

COMPAS_PY = os.path.join(str(get_project_root()), "example_pipelines",
                         "compas", "compas.py")
COMPAS_PNG = os.path.join(str(get_project_root()), "example_pipelines",
                          "compas", "compas.png")

HEALTHCARE_PY = os.path.join(str(get_project_root()), "example_pipelines",
                             "healthcare", "healthcare.py")
HEALTHCARE_PNG = os.path.join(str(get_project_root()), "example_pipelines",
                              "healthcare", "healthcare.png")
コード例 #8
0
"""
Adult income pipeline
"""
import os
import pandas as pd
from sklearn import compose, preprocessing, tree, pipeline

from mlinspect.utils import get_project_root

print('pipeline start')

train_file_a = os.path.join(str(get_project_root()), "experiments",
                            "user_interviews", "adult_simple_train_a.csv")
raw_data_a = pd.read_csv(train_file_a, na_values='?', index_col=0)

train_file_b = os.path.join(str(get_project_root()), "experiments",
                            "user_interviews", "adult_simple_train_b.csv")
raw_data_b = pd.read_csv(train_file_b, na_values='?', index_col=0)

merged_raw_data = raw_data_a.merge(raw_data_b, on="id")

data = merged_raw_data.dropna()

labels = preprocessing.label_binarize(data['income-per-year'],
                                      classes=['>50K', '<=50K'])

column_transformer = compose.ColumnTransformer(
    transformers=[('categorical',
                   preprocessing.OneHotEncoder(handle_unknown='ignore'),
                   ['education', 'workclass']),
                  ('numeric', preprocessing.StandardScaler(),
コード例 #9
0
ファイル: healthcare.py プロジェクト: scnakandala/mlinspect
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from example_pipelines.healthcare.healthcare_utils import MyW2VTransformer, create_model
from mlinspect.utils import get_project_root

# FutureWarning: Given feature/column names or counts do not match the ones for the data given during fit
warnings.filterwarnings('ignore')

COUNTIES_OF_INTEREST = ['county2', 'county3']

# load input data sources (data generated with https://www.mockaroo.com as a single file and then split into two)
patients = pd.read_csv(os.path.join(str(get_project_root()),
                                    "example_pipelines", "healthcare",
                                    "healthcare_patients.csv"),
                       na_values='?')
histories = pd.read_csv(os.path.join(str(get_project_root()),
                                     "example_pipelines", "healthcare",
                                     "healthcare_histories.csv"),
                        na_values='?')

# combine input data into a single table
data = patients.merge(histories, on=['ssn'])

# compute mean complications per age group, append as column
complications = data.groupby('age_group').agg(
    mean_complications=('complications', 'mean'))
コード例 #10
0
"""
Tests whether the healthcare demo works
"""
import os

from importnb import Notebook
import matplotlib

from mlinspect.utils import get_project_root

DEMO_NB_FILE = os.path.join(str(get_project_root()), "demo",
                            "feature_overview", "feature_overview.ipynb")


def test_demo_nb():
    """
    Tests whether the demo notebook works
    """
    matplotlib.use(
        "template")  # Disable plt.show when executing nb as part of this test
    Notebook.load(DEMO_NB_FILE)
コード例 #11
0
"""
Tests whether the healthcare demo works
"""
import os

from importnb import Notebook
import matplotlib

from mlinspect.utils import get_project_root

ADULT_SIMPLE_TASK_NB = os.path.join(str(get_project_root()), "experiments",
                                    "user_interviews",
                                    "example-task-with-solution.ipynb")
COMPAS_TASK_NB = os.path.join(str(get_project_root()), "experiments",
                              "user_interviews", "task-1-solution.ipynb")
HEALTHCARE_TASK_NB = os.path.join(str(get_project_root()), "experiments",
                                  "user_interviews", "task-2-solution.ipynb")


def test_adult_simple_task_nb():
    """
    Tests whether this task notebook works
    """
    matplotlib.use(
        "template")  # Disable plt.show when executing nb as part of this test
    Notebook.load(ADULT_SIMPLE_TASK_NB)


def test_compas_task_nb():
    """
    Tests whether this task notebook works
コード例 #12
0
"""
Tests whether the performance benchmark notebook works
"""
import os

import matplotlib
from importnb import Notebook

from mlinspect.utils import get_project_root

EXPERIMENT_NB_FILE = os.path.join(str(get_project_root()), "experiments",
                                  "performance",
                                  "performance_benchmarks.ipynb")


def test_experiment_nb():
    """
    Tests whether the experiment notebook works
    """
    matplotlib.use(
        "template")  # Disable plt.show when executing nb as part of this test
    Notebook.load(EXPERIMENT_NB_FILE)
コード例 #13
0
def test_get_project_root():
    """
    Tests whether get_project_root works
    """
    assert get_project_root() == Path(__file__).parent.parent
コード例 #14
0
"""
Tests whether the healthcare demo works
"""
import ast
import os

from importnb import Notebook

from mlinspect.utils import get_project_root

ADULT_EASY_FILE_PY = os.path.join(str(get_project_root()), "test", "pipelines",
                                  "adult_easy.py")
FILE_NB = os.path.join(str(get_project_root()), "test", "pipelines",
                       "adult_easy.ipynb")

PIPELINE_FILE_PY = os.path.join(str(get_project_root()), "demo", "healthcare",
                                "healthcare.py")
DEMO_NB_FILE = os.path.join(str(get_project_root()), "demo", "healthcare",
                            "healthcare_demo.ipynb")


def test_py_pipeline_runs():
    """
    Tests whether the .py version of the pipeline works
    """
    with open(PIPELINE_FILE_PY) as file:
        healthcare_code = file.read()
        parsed_ast = ast.parse(healthcare_code)
        exec(compile(parsed_ast, filename="<ast>", mode="exec"))

コード例 #15
0
"""
An example pipeline
"""
import os
import pandas as pd

from sklearn import compose, preprocessing, tree, pipeline
from mlinspect.utils import get_project_root

print('pipeline start')
train_file = os.path.join(str(get_project_root()), "test", "data",
                          "adult_train.csv")
raw_data = pd.read_csv(train_file, na_values='?', index_col=0)

data = raw_data.dropna()

labels = preprocessing.label_binarize(data['income-per-year'],
                                      classes=['>50K', '<=50K'])

feature_transformation = compose.ColumnTransformer(
    transformers=[('categorical',
                   preprocessing.OneHotEncoder(handle_unknown='ignore'),
                   ['education', 'workclass']),
                  ('numeric', preprocessing.StandardScaler(),
                   ['age', 'hours-per-week'])])

income_pipeline = pipeline.Pipeline([('features', feature_transformation),
                                     ('classifier',
                                      tree.DecisionTreeClassifier())])

income_pipeline.fit(data, labels)
コード例 #16
0
"""
An example pipeline
"""
import os
import pandas as pd

from sklearn import compose, preprocessing, tree, pipeline
from mlinspect.utils import get_project_root

print('pipeline start')
train_file = os.path.join(str(get_project_root()), "example_pipelines", "adult_complex", "adult_train.csv")
raw_data = pd.read_csv(train_file, na_values='?', index_col=0)

data = raw_data.dropna()

labels = preprocessing.label_binarize(data['income-per-year'], classes=['>50K', '<=50K'])

feature_transformation = compose.ColumnTransformer(transformers=[
    ('categorical', preprocessing.OneHotEncoder(handle_unknown='ignore'), ['education', 'workclass']),
    ('numeric', preprocessing.StandardScaler(), ['age', 'hours-per-week'])
])


income_pipeline = pipeline.Pipeline([
    ('features', feature_transformation),
    ('classifier', tree.DecisionTreeClassifier())])

income_pipeline.fit(data, labels)


print('pipeline finished')