def chemprop_predict(): """Parses Chemprop predicting arguments and runs prediction using a trained Chemprop model. This is the entry point for the command line command :code:`chemprop_predict`. """ args = PredictArgs().parse_args() all_smiles = None if args.smiles is not None: smiles = args.smiles.split(',') all_smiles = [[s] for s in smiles] _, preds = make_predictions(args=PredictArgs().parse_args(), smiles=all_smiles) return preds
def predict_outside(args_dict): """ Used for calling this script from another python script. :dict args_dict: dict of args to use """ sys.argv = create_args(args_dict, 'predict.py') args = PredictArgs().parse_args() make_predictions(args)
def predict(data, model_predict): """ Modify this method to add pre and post processing for scoring calls. For example, this can be used to implement one-hot encoding for models that don't include it on their own. Parameters ---------- data: pd.DataFrame model_predict: Callable[[pd.DataFrame], pd.DataFrame] Returns ------- pd.DataFrame """ # Execute any steps you need to do before scoring # This method makes predictions against the raw, deserialized model #predictions = model_predict(data) data.to_csv("/opt/code/chemprop_folder/for_scoring.csv", index=False) args = PredictArgs().parse_args([ '--test_path', '/opt/chemprop_folder/for_scoring.csv', '--checkpoint_path', '/opt/code/model.pth', '--preds_path', '/opt/chemprop_folder/preds.csv' ]) make_predictions(args) preds_df = pds.read_csv("/opt/chemprop_folder/preds.csv") sh = str(preds_df.shape) print(sh) preds_df = preds_df.rename(columns={"p_np": "positive_class_label"}) preds_df = preds_df.drop(columns=['smiles']) preds_df["negative_class_label"] = 1 - preds_df["positive_class_label"] print(preds_df.head()) # Execute any steps you need to do after scoring # Note: To properly send predictions back to DataRobot, the returned DataFrame should contain a # column for each output label for classification or a single value column for regression return preds_df
def chemprop_predict() -> None: """Parses Chemprop predicting arguments and runs prediction using a trained Chemprop model. This is the entry point for the command line command :code:`chemprop_predict`. """ make_predictions(args=PredictArgs().parse_args())
def predict(): """Renders the predict page and makes predictions if the method is POST.""" if request.method == 'GET': return render_predict() # Get arguments ckpt_id = request.form['checkpointName'] if request.form['textSmiles'] != '': smiles = request.form['textSmiles'].split() elif request.form['drawSmiles'] != '': smiles = [request.form['drawSmiles']] else: # Upload data file with SMILES data = request.files['data'] data_name = secure_filename(data.filename) data_path = os.path.join(app.config['TEMP_FOLDER'], data_name) data.save(data_path) # Check if header is smiles possible_smiles = get_header(data_path)[0] smiles = [possible_smiles ] if Chem.MolFromSmiles(possible_smiles) is not None else [] # Get remaining smiles smiles.extend(get_smiles(data_path)) models = db.get_models(ckpt_id) model_paths = [ os.path.join(app.config['CHECKPOINT_FOLDER'], f'{model["id"]}.pt') for model in models ] task_names = load_task_names(model_paths[0]) num_tasks = len(task_names) gpu = request.form.get('gpu') train_args = load_args(model_paths[0]) # Build arguments arguments = [ '--test_path', 'None', '--preds_path', os.path.join(app.config['TEMP_FOLDER'], app.config['PREDICTIONS_FILENAME']), '--checkpoint_paths', *model_paths ] if gpu is not None: if gpu == 'None': arguments.append('--no_cuda') else: arguments += ['--gpu', gpu] # Handle additional features if train_args.features_path is not None: # TODO: make it possible to specify the features generator if trained using features_path arguments += [ '--features_generator', 'rdkit_2d_normalized', '--no_features_scaling' ] elif train_args.features_generator is not None: arguments += ['--features_generator', *train_args.features_generator] if not train_args.features_scaling: arguments.append('--no_features_scaling') # Parse arguments args = PredictArgs().parse_args(arguments) # Run predictions preds = make_predictions(args=args, smiles=smiles) if all(p is None for p in preds): return render_predict(errors=['All SMILES are invalid']) # Replace invalid smiles with message invalid_smiles_warning = 'Invalid SMILES String' preds = [ pred if pred is not None else [invalid_smiles_warning] * num_tasks for pred in preds ] return render_predict( predicted=True, smiles=smiles, num_smiles=min(10, len(smiles)), show_more=max(0, len(smiles) - 10), task_names=task_names, num_tasks=len(task_names), preds=preds, warnings=["List contains invalid SMILES strings"] if None in preds else None, errors=["No SMILES strings given"] if len(preds) == 0 else None)
def chemprop_predict() -> None: """Runs chemprop predicting.""" make_predictions(PredictArgs().parse_args())
def chemprop_fingerprint() -> None: """ Parses Chemprop predicting arguments and returns the latent representation vectors for provided molecules, according to a previously trained model. """ molecule_fingerprint(args=PredictArgs().parse_args())
"""Loads a trained model checkpoint and makes predictions on a dataset.""" import torch import numpy as np import random from chemprop.args import PredictArgs from chemprop.train import make_predictions if __name__ == '__main__': args = PredictArgs().parse_args() torch.manual_seed(0) torch.cuda.manual_seed_all(0) np.random.seed(0) # this one is needed for torchtext random call (shuffled iterator) # in multi gpu it ensures datasets are read in the same order random.seed(0) # some cudnn methods can be random even after fixing the seed # unless you tell it to be deterministic torch.backends.cudnn.deterministic = True make_predictions(args)
def make_predictions( args: PredictArgs, smiles: List[List[str]] = None, model_objects: Tuple[PredictArgs, TrainArgs, List[MoleculeModel], List[StandardScaler], int, List[str], ] = None, calibrator: UncertaintyCalibrator = None, return_invalid_smiles: bool = True, return_index_dict: bool = False, return_uncertainty: bool = False, ) -> List[List[Optional[float]]]: """ Loads data and a trained model and uses the model to make predictions on the data. If SMILES are provided, then makes predictions on smiles. Otherwise makes predictions on :code:`args.test_data`. :param args: A :class:`~chemprop.args.PredictArgs` object containing arguments for loading data and a model and making predictions. :param smiles: List of list of SMILES to make predictions on. :param model_objects: Tuple of output of load_model function which can be called separately outside this function. Preloaded model objects should have used the non-generator option for load_model if the objects are to be used multiple times or are intended to be used for calibration as well. :param calibrator: A :class: `~chemprop.uncertainty.UncertaintyCalibrator` object, for use in calibrating uncertainty predictions. Can be preloaded and provided as a function input or constructed within the function from arguments. The models and scalers used to initiate the calibrator must be lists instead of generators if the same calibrator is to be used multiple times or if the same models and scalers objects are also part of the provided model_objects input. :param return_invalid_smiles: Whether to return predictions of "Invalid SMILES" for invalid SMILES, otherwise will skip them in returned predictions. :param return_index_dict: Whether to return the prediction results as a dictionary keyed from the initial data indexes. :param return_uncertainty: Whether to return uncertainty predictions alongside the model value predictions. :return: A list of lists of target predictions. If returning uncertainty, a tuple containing first prediction values then uncertainty estimates. """ if model_objects: ( args, train_args, models, scalers, num_tasks, task_names, ) = model_objects else: ( args, train_args, models, scalers, num_tasks, task_names, ) = load_model(args, generator=True) num_models = len(args.checkpoint_paths) set_features(args, train_args) # Note: to get the invalid SMILES for your data, use the get_invalid_smiles_from_file or get_invalid_smiles_from_list functions from data/utils.py full_data, test_data, test_data_loader, full_to_valid_indices = load_data( args, smiles) if args.uncertainty_method is None and (args.calibration_method is not None or args.evaluation_methods is not None): if args.dataset_type in ['classification', 'multiclass']: args.uncertainty_method = 'classification' else: raise ValueError( 'Cannot calibrate or evaluate uncertainty without selection of an uncertainty method.' ) if calibrator is None and args.calibration_path is not None: calibration_data = get_data( path=args.calibration_path, smiles_columns=args.smiles_columns, target_columns=task_names, features_path=args.calibration_features_path, features_generator=args.features_generator, phase_features_path=args.calibration_phase_features_path, atom_descriptors_path=args.calibration_atom_descriptors_path, bond_features_path=args.calibration_bond_features_path, max_data_size=args.max_data_size, loss_function=args.loss_function, ) calibration_data_loader = MoleculeDataLoader( dataset=calibration_data, batch_size=args.batch_size, num_workers=args.num_workers, ) if isinstance(models, List) and isinstance(scalers, List): calibration_models = models calibration_scalers = scalers else: calibration_model_objects = load_model(args, generator=True) calibration_models = calibration_model_objects[2] calibration_scalers = calibration_model_objects[3] calibrator = build_uncertainty_calibrator( calibration_method=args.calibration_method, uncertainty_method=args.uncertainty_method, interval_percentile=args.calibration_interval_percentile, regression_calibrator_metric=args.regression_calibrator_metric, calibration_data=calibration_data, calibration_data_loader=calibration_data_loader, models=calibration_models, scalers=calibration_scalers, num_models=num_models, dataset_type=args.dataset_type, loss_function=args.loss_function, uncertainty_dropout_p=args.uncertainty_dropout_p, dropout_sampling_size=args.dropout_sampling_size, spectra_phase_mask=getattr(train_args, "spectra_phase_mask", None), ) # Edge case if empty list of smiles is provided if len(test_data) == 0: preds = [None] * len(full_data) unc = [None] * len(full_data) else: preds, unc = predict_and_save( args=args, train_args=train_args, test_data=test_data, task_names=task_names, num_tasks=num_tasks, test_data_loader=test_data_loader, full_data=full_data, full_to_valid_indices=full_to_valid_indices, models=models, scalers=scalers, num_models=num_models, calibrator=calibrator, return_invalid_smiles=return_invalid_smiles, ) if return_index_dict: preds_dict = {} unc_dict = {} for i in range(len(full_data)): if return_invalid_smiles: preds_dict[i] = preds[i] unc_dict[i] = unc[i] else: valid_index = full_to_valid_indices.get(i, None) if valid_index is not None: preds_dict[i] = preds[valid_index] unc_dict[i] = unc[valid_index] if return_uncertainty: return preds_dict, unc_dict else: return preds_dict else: if return_uncertainty: return preds, unc else: return preds
def predict(): args = PredictArgs()