def do_setup(): import aml_utils #################################################################### # Setup- train model, create direct db connections, set global constants, etc. ##################################################################### # TODO: create another model that uses a different algo (logisticRegression, perhaps), so we can have tests for our logic when using multiple models but each predicting off the same features ml_predictor_titanic, df_titanic_test = aml_utils.train_basic_binary_classifier() file_name = '_test_suite_saved_pipeline.dill' ml_predictor_titanic.save(file_name) ml_predictor_titanic = load_ml_model(file_name) os.remove(file_name) # row_ids = [i for i in range(df_titanic_test.shape[0])] # df_titanic_test['row_id'] = row_ids persistent_db_config = { 'db': '__concordia_test_env' , 'host': 'localhost' , 'port': 27017 } in_memory_db_config = { 'db': 8 , 'host': 'localhost' , 'port': 6379 } host = in_memory_db_config['host'] port = in_memory_db_config['port'] db = in_memory_db_config['db'] rdb = redis.StrictRedis(host=host, port=port, db=db) host = persistent_db_config['host'] port = persistent_db_config['port'] db = persistent_db_config['db'] client = MongoClient(host=host, port=port) mdb = client[db] concord = load_concordia(persistent_db_config=persistent_db_config) existing_training_rows, _, _ = concord._get_training_data_and_predictions(model_id) len_existing_training_rows = existing_training_rows.shape[0] existing_live_rows = concord.retrieve_from_persistent_db(val_type='live_features', row_id=None, model_id=model_id) len_existing_live_rows = len(existing_live_rows) return ml_predictor_titanic, df_titanic_test, concord, rdb, mdb, len_existing_training_rows, len_existing_live_rows
"tradeTypeId": 'categorical', # 'bedrooms': 'categorical', # 'year': 'categorical', # 'month': 'categorical', } # print(column_description1) # # 合并两个字典 # column_descriptions = dict(column_description1, **column_description2) # # ml_predictor = Predictor(type_of_estimator='Regressor', column_descriptions=column_descriptions) # # ml_predictor.train(df_train,model_names='XGBRegressor') # ml_predictor.save('model_auto_ml_9.h5') ml_predictor = load_ml_model('auto_ml_new.h5') # 预测预测数据 x = ml_predictor.predict(df_test) x_dataframe = pd.DataFrame(x,columns=['predictions']) merge_data = pd.concat((origin_data,x_dataframe),axis=1) merge_data_df = pd.DataFrame(merge_data) merge_data_df.to_csv('./merge_data_bak/merge_data_auto_ml.csv',index=False) print(x_dataframe.describe()) print(df_test_label.describe()) print(mean_absolute_error(df_test_label,x)) compute_ratio(merge_data_df) # compute_ratio2(merge_data_df)
print(len(data_20) / len(data)) print(len(data_30) / len(data)) print(len(data_more) / len(data)) print(mean_absolute_error(data[test_column], data['daysOnMarket'])) if __name__ == '__main__': # 导入数据 data = pd.read_csv('./input/treb_toronto_9.csv') data = preprocess_data(data) origin_data = data.reset_index(drop=True) data_label = data['daysOnMarket'] data_prediction = data.drop(columns=['daysOnMarket']) # 导入模型 model = load_ml_model('./some_fine_model/model_auto_ml_9.h5') # 预测: prediction_result = model.predict(data_prediction) x = prediction_result x_dataframe = pd.DataFrame(x, columns=['predictions']) merge_data = pd.concat((origin_data, x_dataframe), axis=1) merge_data_df = pd.DataFrame(merge_data) merge_data_df.to_csv( './intermediate_generated_file/load_model_test_merge_prediction_result_data.csv', index=False) print(x_dataframe.describe()) print(data_label.describe()) print(mean_absolute_error(data_label, x)) compute_ratio(merge_data_df)
def my_predict_auto_ml(self, data): model = load_ml_model(model_auto_ml_path) result = model.predict(data) return result