Esempio n. 1
0
def setup_mummify():
    subprocess.run("echo 'test_mummify.py' >> .gitignore", shell=True)
    subprocess.run("echo '__pycache__' >> .gitignore", shell=True)
    contents = """import mummify
accuracy = 0.80
mummify.log(f'Accuracy: {accuracy}')
"""
    with open("model.py", "w") as f:
        f.write(contents)
    subprocess.run("python model.py", shell=True)
Esempio n. 2
0
import dill
import mummify

n_samples = 1000
n_outliers = 50

X, y, coef = make_regression(n_samples=n_samples,
                             n_features=1,
                             n_informative=1,
                             noise=10,
                             coef=True,
                             random_state=0)

np.random.seed(0)
X[:n_outliers] = 3 + 0.5 * np.random.normal(size=(n_outliers, 1))
y[:n_outliers] = -3 + 10 * np.random.normal(size=n_outliers)

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.33,
                                                    random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)
model.score(X_test, y_test)

with open('exercises/regression.pkl', 'wb') as f:
    dill.dump(model, f)

mummify.log('')
Esempio n. 3
0
from sklearn.datasets import load_wine
from sklearn.neighbors import KNeighborsClassifier

import mummify

data = load_wine()
X, y = data.data, data.target

model = KNeighborsClassifier(n_neighbors=4)
model.fit(X, y)
accuracy = round(model.score(X, y), 4)

mummify.log(f'Accuracy: {accuracy}')
Esempio n. 4
0
    lambda x: pd.to_numeric(str(x).split(' ')[0], errors='coerce')
)
df['total_stops'] = df['total_stops'].fillna(0)
df = df.rename(columns={
    'date_of_journey': 'date',
    'total_stops': 'stops',
    'source': 'origin'
})

y = df['price']
X = df[['date', 'origin', 'destination', 'stops']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

mapper = DataFrameMapper([
    ('date', DateEncoder(), {'input_df': True}),
    ('origin', LabelBinarizer()),
    ('destination', LabelBinarizer()),
    ('stops', None)
], df_out=True)

model = LinearRegression()
pipe = make_pipeline(mapper, model)
pipe.fit(X_train, y_train)
score = pipe.score(X_test, y_test)

with open('pipe.pkl', 'wb') as f:
    pickle.dump(pipe, f)

mummify.log(f'R2 Score: {round(score, 4)}')
Esempio n. 5
0
X = df.drop(target, axis=1)

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.25,
                                                    random_state=42)

mapper = DataFrameMapper(
    [
        ("hexcode", HexTransformer(), {
            "input_df": True
        }),
        (["diameter"], StandardScaler()),
        (["weight"], StandardScaler()),
    ],
    df_out=True,
)

Z_train = mapper.fit_transform(X_train)
Z_test = mapper.transform(X_test)

model = KNeighborsClassifier(n_neighbors=4)
pipe = make_pipeline(mapper, model)
pipe.fit(X_train, y_train)
score = pipe.score(X_test, y_test)

with open("pipe.pkl", "wb") as f:
    pickle.dump(pipe, f)

mummify.log(f"Accuracy: {round(score, 4)}")