def do_setup():
    import aml_utils

    ####################################################################
    # Setup- train model, create direct db connections, set global constants, etc.
    #####################################################################
    # TODO: create another model that uses a different algo (logisticRegression, perhaps), so we can have tests for our logic when using multiple models but each predicting off the same features
    ml_predictor_titanic, df_titanic_test = aml_utils.train_basic_binary_classifier()
    file_name = '_test_suite_saved_pipeline.dill'
    ml_predictor_titanic.save(file_name)
    ml_predictor_titanic = load_ml_model(file_name)
    os.remove(file_name)

    # row_ids = [i for i in range(df_titanic_test.shape[0])]
    # df_titanic_test['row_id'] = row_ids


    persistent_db_config = {
        'db': '__concordia_test_env'
        , 'host': 'localhost'
        , 'port': 27017
    }

    in_memory_db_config = {
        'db': 8
        , 'host': 'localhost'
        , 'port': 6379
    }


    host = in_memory_db_config['host']
    port = in_memory_db_config['port']
    db = in_memory_db_config['db']
    rdb = redis.StrictRedis(host=host, port=port, db=db)

    host = persistent_db_config['host']
    port = persistent_db_config['port']
    db = persistent_db_config['db']
    client = MongoClient(host=host, port=port)
    mdb = client[db]

    concord = load_concordia(persistent_db_config=persistent_db_config)

    existing_training_rows, _, _ = concord._get_training_data_and_predictions(model_id)
    len_existing_training_rows = existing_training_rows.shape[0]

    existing_live_rows = concord.retrieve_from_persistent_db(val_type='live_features', row_id=None, model_id=model_id)
    len_existing_live_rows = len(existing_live_rows)

    return ml_predictor_titanic, df_titanic_test, concord, rdb, mdb, len_existing_training_rows, len_existing_live_rows
Exemple #2
0
        "tradeTypeId": 'categorical',
        # 'bedrooms': 'categorical',
        # 'year': 'categorical',
        # 'month': 'categorical',

    }

    # print(column_description1)
    # # 合并两个字典
    # column_descriptions = dict(column_description1, **column_description2)
    #
    # ml_predictor = Predictor(type_of_estimator='Regressor', column_descriptions=column_descriptions)
    #
    # ml_predictor.train(df_train,model_names='XGBRegressor')
    # ml_predictor.save('model_auto_ml_9.h5')
    ml_predictor = load_ml_model('auto_ml_new.h5')



    # 预测预测数据
    x = ml_predictor.predict(df_test)
    x_dataframe = pd.DataFrame(x,columns=['predictions'])
    merge_data = pd.concat((origin_data,x_dataframe),axis=1)
    merge_data_df = pd.DataFrame(merge_data)
    merge_data_df.to_csv('./merge_data_bak/merge_data_auto_ml.csv',index=False)
    print(x_dataframe.describe())
    print(df_test_label.describe())

    print(mean_absolute_error(df_test_label,x))
    compute_ratio(merge_data_df)
    # compute_ratio2(merge_data_df)
Exemple #3
0
    print(len(data_20) / len(data))
    print(len(data_30) / len(data))
    print(len(data_more) / len(data))
    print(mean_absolute_error(data[test_column], data['daysOnMarket']))


if __name__ == '__main__':
    # 导入数据
    data = pd.read_csv('./input/treb_toronto_9.csv')
    data = preprocess_data(data)
    origin_data = data.reset_index(drop=True)

    data_label = data['daysOnMarket']
    data_prediction = data.drop(columns=['daysOnMarket'])
    # 导入模型
    model = load_ml_model('./some_fine_model/model_auto_ml_9.h5')
    # 预测:
    prediction_result = model.predict(data_prediction)

    x = prediction_result
    x_dataframe = pd.DataFrame(x, columns=['predictions'])
    merge_data = pd.concat((origin_data, x_dataframe), axis=1)
    merge_data_df = pd.DataFrame(merge_data)
    merge_data_df.to_csv(
        './intermediate_generated_file/load_model_test_merge_prediction_result_data.csv',
        index=False)
    print(x_dataframe.describe())
    print(data_label.describe())

    print(mean_absolute_error(data_label, x))
    compute_ratio(merge_data_df)
Exemple #4
0
 def my_predict_auto_ml(self, data):
     model = load_ml_model(model_auto_ml_path)
     result = model.predict(data)
     return result