def to_df(csv: str) -> pd.DataFrame: data = pd.read_csv(csv) loader = DataLoader() loader.fit(data) return loader.load_data()
#prediction1 = Predictor().predict(X_test) #loaded_model = pickle.load(open('models/KNN.pickle', 'rb')) #print(loaded_model.score(test_set[x_columns].values, test_set[y_column].values)) #print(accuracy_score(y_test,prediction)) #print(accuracy_score(y,prediction1)) #print(X_test) PREDICT_ROUTE = "http://127.0.0.1:8000/predict" info = specifications['description'] x_columns, y_column, metrics = info['X'], info['y'], info['metrics'] train_set = pd.read_csv(TRAIN_CSV, header=0) test_set = pd.read_csv(VAL_CSV, header=0) train_x, train_y = train_set[x_columns], train_set[y_column] test_x, test_y = test_set[x_columns], test_set[y_column] loader = DataLoader() loader.fit(train_x) train_processed = loader.load_data() loader = DataLoader() loader.fit(test_x) test_processed = loader.load_data() trained = Estimator.fit(train_processed, train_y) trained_predict = Estimator.predict(trained, test_processed) trained_score = round(eval(metrics)(test_y, trained_predict), 2) req_data = {'data': json.dumps(test_x.to_dict())} response = requests.get(PREDICT_ROUTE, data=req_data) api_predict = response.json()['prediction'] api_score = round(eval(metrics)(test_y, api_predict), 2) print(trained_score) print(api_score) assert trained_score == api_score
from sklearn.linear_model import LogisticRegression from utils.dataloader import DataLoader from settings.constants import TRAIN_CSV with open('settings/specifications.json') as f: specifications = json.load(f) raw_train = pd.read_csv(TRAIN_CSV) x_columns = specifications['description']['X'] y_column = specifications['description']['y'] x_raw = raw_train[x_columns] loader = DataLoader() loader.fit(x_raw) X = loader.load_data() y = raw_train.Response model = LogisticRegression(C=0.01, penalty='l1', solver='liblinear') model.fit(X, y) with open('models/log_reg.pickle', 'wb')as f: pickle.dump(model, f) import pickle import json