def setup_mummify(): subprocess.run("echo 'test_mummify.py' >> .gitignore", shell=True) subprocess.run("echo '__pycache__' >> .gitignore", shell=True) contents = """import mummify accuracy = 0.80 mummify.log(f'Accuracy: {accuracy}') """ with open("model.py", "w") as f: f.write(contents) subprocess.run("python model.py", shell=True)
import dill import mummify n_samples = 1000 n_outliers = 50 X, y, coef = make_regression(n_samples=n_samples, n_features=1, n_informative=1, noise=10, coef=True, random_state=0) np.random.seed(0) X[:n_outliers] = 3 + 0.5 * np.random.normal(size=(n_outliers, 1)) y[:n_outliers] = -3 + 10 * np.random.normal(size=n_outliers) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) model = LinearRegression() model.fit(X_train, y_train) model.score(X_test, y_test) with open('exercises/regression.pkl', 'wb') as f: dill.dump(model, f) mummify.log('')
from sklearn.datasets import load_wine from sklearn.neighbors import KNeighborsClassifier import mummify data = load_wine() X, y = data.data, data.target model = KNeighborsClassifier(n_neighbors=4) model.fit(X, y) accuracy = round(model.score(X, y), 4) mummify.log(f'Accuracy: {accuracy}')
lambda x: pd.to_numeric(str(x).split(' ')[0], errors='coerce') ) df['total_stops'] = df['total_stops'].fillna(0) df = df.rename(columns={ 'date_of_journey': 'date', 'total_stops': 'stops', 'source': 'origin' }) y = df['price'] X = df[['date', 'origin', 'destination', 'stops']] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) mapper = DataFrameMapper([ ('date', DateEncoder(), {'input_df': True}), ('origin', LabelBinarizer()), ('destination', LabelBinarizer()), ('stops', None) ], df_out=True) model = LinearRegression() pipe = make_pipeline(mapper, model) pipe.fit(X_train, y_train) score = pipe.score(X_test, y_test) with open('pipe.pkl', 'wb') as f: pickle.dump(pipe, f) mummify.log(f'R2 Score: {round(score, 4)}')
X = df.drop(target, axis=1) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) mapper = DataFrameMapper( [ ("hexcode", HexTransformer(), { "input_df": True }), (["diameter"], StandardScaler()), (["weight"], StandardScaler()), ], df_out=True, ) Z_train = mapper.fit_transform(X_train) Z_test = mapper.transform(X_test) model = KNeighborsClassifier(n_neighbors=4) pipe = make_pipeline(mapper, model) pipe.fit(X_train, y_train) score = pipe.score(X_test, y_test) with open("pipe.pkl", "wb") as f: pickle.dump(pipe, f) mummify.log(f"Accuracy: {round(score, 4)}")