def H1_init(): """ Test full model pipeline: Curate data, fit model, and predict property for new compounds """ # Clean # ----- integrative_utilities.clean_fit_predict() clean('H1') # Curate # ------ H1_curate()
def init(): """ Test full model pipeline: Curate data, fit model, and predict property for new compounds """ # Clean # ----- integrative_utilities.clean_fit_predict() clean() # Download # -------- download() # Curate # ------ curate()
def test(): """ Test full model pipeline: Curate data, fit model, and predict property for new compounds """ # Clean # ----- integrative_utilities.clean_fit_predict() clean() # Download # -------- download() # Curate # ------ curate() # Train model # ----------- # Read parameter JSON file with open('config_delaney_train_NN.json') as f: config = json.loads(f.read()) # Parse parameters params = parse.wrapper(config) # Create model pipeline model = mp.ModelPipeline(params) # Train model model.train_model() # Get uuid and reload directory # ----------------------------- uuid = integrative_utilities.get_subdirectory( 'result/delaney-processed_curated_fit/NN_graphconv_scaffold_regression' ) reload_dir = 'result/delaney-processed_curated_fit/NN_graphconv_scaffold_regression/' + uuid # Check training statistics # ------------------------- integrative_utilities.training_statistics_file(reload_dir, 'test', 0.6) # Make prediction parameters # -------------------------- # Read prediction parameter JSON file with open('config_delaney_predict_NN.json', 'r') as f: predict_parameters_dict = json.loads(f.read()) # Set transformer key here because model uuid is not known before fit predict_parameters_dict[ 'transformer_key'] = reload_dir + 'transformers.pkl' predict_parameters = parse.wrapper(predict_parameters_dict) # Load second test set # -------------------- data = pd.read_csv('delaney-processed_curated_external.csv') # Select columns and rename response column data = data[[ predict_parameters.id_col, predict_parameters.smiles_col, predict_parameters.response_cols[0] ]] data = data.rename( columns={predict_parameters.response_cols[0]: 'experimental_values'}) # Make prediction pipeline # ------------------------ pp = mp.create_prediction_pipeline_from_file(predict_parameters, reload_dir) # Predict # ------- predict = pp.predict_on_dataframe(data) # Check predictions # ----------------- assert (predict['pred'].shape[0] == 117 ), 'Error: Incorrect number of predictions' assert (np.all(np.isfinite( predict['pred'].values))), 'Error: Predictions are not numbers' # Save predictions with experimental values # ----------------------------------------- predict.reset_index(level=0, inplace=True) combined = pd.merge(data, predict, on=predict_parameters.id_col, how='inner') combined.to_csv('delaney-processed_curated_predict.csv') assert (os.path.isfile('delaney-processed_curated_predict.csv') and os.path.getsize('delaney-processed_curated_predict.csv') > 0 ), 'Error: Prediction file not created'