def predict_smile(checkpoint_path: str, smile: str): smiles = [smile] """ Makes predictions. If smiles is provided, makes predictions on smiles. Otherwise makes predictions on args.test_data. :param args: Arguments. :param smiles: Smiles to make predictions on. :return: A list of lists of target predictions. """ args = Namespace() # print('Loading training args') scaler, features_scaler = load_scalers(checkpoint_path) train_args = load_args(checkpoint_path) # Update args with training arguments for key, value in vars(train_args).items(): if not hasattr(args, key): setattr(args, key, value) # print('Loading data') if smiles is not None: test_data = get_data_from_smiles(smiles=smiles, skip_invalid_smiles=False) else: print("Enter Valid Smile String") return # print('Validating SMILES') valid_indices = [ i for i in range(len(test_data)) if test_data[i].mol is not None ] full_data = test_data test_data = MoleculeDataset([test_data[i] for i in valid_indices]) # Edge case if empty list of smiles is provided if len(test_data) == 0: return [None] * len(full_data) # Normalize features if train_args.features_scaling: test_data.normalize_features(features_scaler) # Predict with each model individually and sum predictions if args.dataset_type == 'multiclass': sum_preds = np.zeros( (len(test_data), args.num_tasks, args.multiclass_num_classes)) else: sum_preds = np.zeros((len(test_data), args.num_tasks)) model = load_checkpoint(checkpoint_path, cuda=args.cuda) model_preds = predict(model=model, data=test_data, batch_size=1, scaler=scaler) sum_preds += np.array(model_preds) # Ensemble predictions return sum_preds[0][0]
def calculate_predictions(self): for i, (model, scaler_list) in enumerate( tqdm(zip(self.models, self.scalers), total=self.num_models)): ( scaler, features_scaler, atom_descriptor_scaler, bond_feature_scaler, ) = scaler_list if (features_scaler is not None or atom_descriptor_scaler is not None or bond_feature_scaler is not None): self.test_data.reset_features_and_targets() if features_scaler is not None: self.test_data.normalize_features(features_scaler) if atom_descriptor_scaler is not None: self.test_data.normalize_features( atom_descriptor_scaler, scale_atom_descriptors=True) if bond_feature_scaler is not None: self.test_data.normalize_features(bond_feature_scaler, scale_bond_features=True) preds = predict( model=model, data_loader=self.test_data_loader, scaler=scaler, return_unc_parameters=False, ) if self.dataset_type == "spectra": preds = normalize_spectra( spectra=preds, phase_features=self.test_data.phase_features(), phase_mask=self.spectra_phase_mask, excluded_sub_value=float("nan"), ) if i == 0: sum_preds = np.array(preds) sum_squared = np.square(preds) if self.individual_ensemble_predictions: individual_preds = np.expand_dims(np.array(preds), axis=-1) if model.train_class_sizes is not None: self.train_class_sizes = [model.train_class_sizes] else: sum_preds += np.array(preds) sum_squared += np.square(preds) if self.individual_ensemble_predictions: individual_preds = np.append(individual_preds, np.expand_dims(preds, axis=-1), axis=-1) if model.train_class_sizes is not None: self.train_class_sizes.append(model.train_class_sizes) uncal_preds = sum_preds / self.num_models uncal_vars = sum_squared / self.num_models \ - np.square(sum_preds) / self.num_models ** 2 self.uncal_preds, self.uncal_vars = uncal_preds.tolist( ), uncal_vars.tolist() if self.individual_ensemble_predictions: self.individual_preds = individual_preds.tolist()
def calculate_predictions(self): for i, (model, scaler_list) in enumerate( tqdm(zip(self.models, self.scalers), total=self.num_models)): ( scaler, features_scaler, atom_descriptor_scaler, bond_feature_scaler, ) = scaler_list if (features_scaler is not None or atom_descriptor_scaler is not None or bond_feature_scaler is not None): self.test_data.reset_features_and_targets() if features_scaler is not None: self.test_data.normalize_features(features_scaler) if atom_descriptor_scaler is not None: self.test_data.normalize_features( atom_descriptor_scaler, scale_atom_descriptors=True) if bond_feature_scaler is not None: self.test_data.normalize_features(bond_feature_scaler, scale_bond_features=True) preds, lambdas, alphas, betas = predict( model=model, data_loader=self.test_data_loader, scaler=scaler, return_unc_parameters=True, ) var = np.array(betas) / (np.array(lambdas) * (np.array(alphas) - 1)) if i == 0: sum_preds = np.array(preds) sum_squared = np.square(preds) sum_vars = np.array(var) individual_vars = [var] if self.individual_ensemble_predictions: individual_preds = np.expand_dims(np.array(preds), axis=-1) else: sum_preds += np.array(preds) sum_squared += np.square(preds) sum_vars += np.array(var) individual_vars.append(var) if self.individual_ensemble_predictions: individual_preds = np.append(individual_preds, np.expand_dims(preds, axis=-1), axis=-1) uncal_preds = sum_preds / self.num_models uncal_vars = (sum_vars + sum_squared) / self.num_models \ - np.square(sum_preds / self.num_models) self.uncal_preds, self.uncal_vars = uncal_preds.tolist( ), uncal_vars.tolist() self.individual_vars = individual_vars if self.individual_ensemble_predictions: self.individual_preds = individual_preds.tolist()
def calculate_predictions(self): for i, (model, scaler_list) in enumerate( tqdm(zip(self.models, self.scalers), total=self.num_models)): ( scaler, features_scaler, atom_descriptor_scaler, bond_feature_scaler, ) = scaler_list if (features_scaler is not None or atom_descriptor_scaler is not None or bond_feature_scaler is not None): self.test_data.reset_features_and_targets() if features_scaler is not None: self.test_data.normalize_features(features_scaler) if atom_descriptor_scaler is not None: self.test_data.normalize_features( atom_descriptor_scaler, scale_atom_descriptors=True) if bond_feature_scaler is not None: self.test_data.normalize_features(bond_feature_scaler, scale_bond_features=True) preds = predict( model=model, data_loader=self.test_data_loader, scaler=scaler, return_unc_parameters=False, ) if i == 0: sum_preds = np.array(preds) if self.individual_ensemble_predictions: individual_preds = np.expand_dims(np.array(preds), axis=-1) if model.train_class_sizes is not None: self.train_class_sizes = [model.train_class_sizes] else: sum_preds += np.array(preds) if self.individual_ensemble_predictions: individual_preds = np.append(individual_preds, np.expand_dims(preds, axis=-1), axis=-1) if model.train_class_sizes is not None: self.train_class_sizes.append(model.train_class_sizes) self.uncal_preds = (sum_preds / self.num_models).tolist() self.uncal_confidence = self.uncal_preds if self.individual_ensemble_predictions: self.individual_preds = individual_preds.tolist()
def calculate_predictions(self): model = next(self.models) ( scaler, features_scaler, atom_descriptor_scaler, bond_feature_scaler, ) = next(self.scalers) if (features_scaler is not None or atom_descriptor_scaler is not None or bond_feature_scaler is not None): self.test_data.reset_features_and_targets() if features_scaler is not None: self.test_data.normalize_features(features_scaler) if atom_descriptor_scaler is not None: self.test_data.normalize_features(atom_descriptor_scaler, scale_atom_descriptors=True) if bond_feature_scaler is not None: self.test_data.normalize_features(bond_feature_scaler, scale_bond_features=True) for i in range(self.dropout_sampling_size): preds = predict( model=model, data_loader=self.test_data_loader, scaler=scaler, return_unc_parameters=False, dropout_prob=self.uncertainty_dropout_p, ) if i == 0: sum_preds = np.array(preds) sum_squared = np.square(preds) else: sum_preds += np.array(preds) sum_squared += np.square(preds) uncal_preds = sum_preds / self.dropout_sampling_size uncal_vars = sum_squared / self.dropout_sampling_size \ - np.square(sum_preds) / self.dropout_sampling_size ** 2 self.uncal_preds, self.uncal_vars = uncal_preds.tolist( ), uncal_vars.tolist()
def __call__( self, smiles: List[Optional[str]] = None, smiles2: List[Optional[str]] = None ) -> List[Optional[List[float]]]: if self.computed_prop: # Identity non-None smiles if len(smiles) > 0: valid_indices, valid_smiles = zip( *[(i, smile) for i, smile in enumerate(smiles) if smile is not None]) else: valid_indices, valid_smiles = [], [] valid_props = [ self.scorer(valid_smile) for valid_smile in valid_smiles ] # Combine properties of non-None smiles with Nones props = [None] * len(smiles) for i, prop in zip(valid_indices, valid_props): props[i] = prop return props test_data = get_data_from_smiles(smiles=smiles, skip_invalid_smiles=False) valid_indices = [ i for i in range(len(test_data)) if test_data[i].mol is not None ] full_data = test_data test_data = MoleculeDataset([test_data[i] for i in valid_indices]) if self.features_generator is not None: self.generate_features(test_data) valid_indices = [ i for i in range(len(test_data)) if test_data[i].mol is not None and test_data[i].features is not None ] test_data = MoleculeDataset([test_data[i] for i in valid_indices]) # Edge case if empty list of smiles is provided if len(test_data) == 0: return [None] * len(full_data) # Normalize features if self.train_args.features_scaling: test_data.normalize_features(self.features_scaler) # Predict with each model individually and sum predictions sum_preds = np.zeros((len(test_data), self.num_tasks)) for chemprop_model in self.chemprop_models: model_preds = predict(model=chemprop_model, data=test_data, batch_size=self.batch_size, scaler=self.scaler) sum_preds += np.array(model_preds) # Ensemble predictions avg_preds = sum_preds / len(self.chemprop_models) avg_preds = avg_preds.tolist() # Put Nones for invalid smiles full_preds = [None] * len(full_data) for i, si in enumerate(valid_indices): full_preds[si] = avg_preds[i] if self.neg_threshold: return [ -p[self.prop_index] if p is not None else None for p in full_preds ] else: return [ p[self.prop_index] if p is not None else None for p in full_preds ]
def make_predictions(args: Namespace, smiles: List[str] = None) -> List[Optional[List[float]]]: """ Makes predictions. If smiles is provided, makes predictions on smiles. Otherwise makes predictions on args.test_data. :param args: Arguments. :param smiles: Smiles to make predictions on. :return: A list of lists of target predictions. """ if args.gpu is not None: torch.cuda.set_device(args.gpu) print('Loading training args') scaler, features_scaler = load_scalers(args.checkpoint_paths[0]) train_args = load_args(args.checkpoint_paths[0]) # Update args with training arguments for key, value in vars(train_args).items(): if not hasattr(args, key): setattr(args, key, value) print('Loading data') if smiles is not None: test_data = get_data_from_smiles(smiles=smiles, skip_invalid_smiles=False) else: test_data = get_data(path=args.test_path, args=args, use_compound_names=args.use_compound_names, skip_invalid_smiles=False) print('Validating SMILES') valid_indices = [ i for i in range(len(test_data)) if test_data[i].mol is not None ] full_data = test_data test_data = MoleculeDataset([test_data[i] for i in valid_indices]) # Edge case if empty list of smiles is provided if len(test_data) == 0: return [None] * len(full_data) if args.use_compound_names: compound_names = test_data.compound_names() print(f'Test size = {len(test_data):,}') # Normalize features if train_args.features_scaling: test_data.normalize_features(features_scaler) # Predict with each model individually and sum predictions if args.dataset_type == 'multiclass': sum_preds = np.zeros( (len(test_data), args.num_tasks, args.multiclass_num_classes)) else: sum_preds = np.zeros((len(test_data), args.num_tasks)) print( f'Predicting with an ensemble of {len(args.checkpoint_paths)} models') for checkpoint_path in tqdm(args.checkpoint_paths, total=len(args.checkpoint_paths)): # Load model model = load_checkpoint(checkpoint_path, cuda=args.cuda) model_preds = predict(model=model, data=test_data, batch_size=args.batch_size, scaler=scaler) sum_preds += np.array(model_preds) # Ensemble predictions avg_preds = sum_preds / len(args.checkpoint_paths) avg_preds = avg_preds.tolist() # Save predictions assert len(test_data) == len(avg_preds) print(f'Saving predictions to {args.preds_path}') # Put Nones for invalid smiles full_preds = [None] * len(full_data) for i, si in enumerate(valid_indices): full_preds[si] = avg_preds[i] avg_preds = full_preds test_smiles = full_data.smiles() # Write predictions with open(args.preds_path, 'w') as f: writer = csv.writer(f) header = [] if args.use_compound_names: header.append('compound_names') header.append('smiles') if args.dataset_type == 'multiclass': for name in args.task_names: for i in range(args.multiclass_num_classes): header.append(name + '_class' + str(i)) else: header.extend(args.task_names) writer.writerow(header) for i in range(len(avg_preds)): row = [] if args.use_compound_names: row.append(compound_names[i]) row.append(test_smiles[i]) if avg_preds[i] is not None: if args.dataset_type == 'multiclass': for task_probs in avg_preds[i]: row.extend(task_probs) else: row.extend(avg_preds[i]) else: if args.dataset_type == 'multiclass': row.extend([''] * args.num_tasks * args.multiclass_num_classes) else: row.extend([''] * args.num_tasks) writer.writerow(row) return avg_preds
def run_training(args: Namespace, logger: Logger = None) -> List[float]: """ Trains a model and returns test scores on the model checkpoint with the highest validation score. :param args: Arguments. :param logger: Logger. :return: A list of ensemble scores for each task. """ if logger is not None: debug, info = logger.debug, logger.info else: debug = info = print # Set GPU if args.gpu is not None: torch.cuda.set_device(args.gpu) # Print args debug(pformat(vars(args))) # Get data debug('Loading data') args.task_names = get_task_names(args.data_path) data = get_data(path=args.data_path, args=args, logger=logger) args.num_tasks = data.num_tasks() args.features_size = data.features_size() debug(f'Number of tasks = {args.num_tasks}') # Split data debug(f'Splitting data with seed {args.seed}') if args.separate_test_path: test_data = get_data(path=args.separate_test_path, args=args, features_path=args.separate_test_features_path, logger=logger) if args.separate_val_path: val_data = get_data(path=args.separate_val_path, args=args, features_path=args.separate_val_features_path, logger=logger) if args.separate_val_path and args.separate_test_path: train_data = data elif args.separate_val_path: train_data, _, test_data = split_data(data=data, split_type=args.split_type, sizes=(0.8, 0.2, 0.0), seed=args.seed, args=args, logger=logger) elif args.separate_test_path: train_data, val_data, _ = split_data(data=data, split_type=args.split_type, sizes=(0.8, 0.2, 0.0), seed=args.seed, args=args, logger=logger) else: train_data, val_data, test_data = split_data( data=data, split_type=args.split_type, sizes=args.split_sizes, seed=args.seed, args=args, logger=logger) if args.dataset_type == 'classification': class_sizes = get_class_sizes(data) debug('Class sizes') for i, task_class_sizes in enumerate(class_sizes): debug( f'{args.task_names[i]} ' f'{", ".join(f"{cls}: {size * 100:.2f}%" for cls, size in enumerate(task_class_sizes))}' ) if args.save_smiles_splits: with open(args.data_path, 'r') as f: reader = csv.reader(f) header = next(reader) lines_by_smiles = {} indices_by_smiles = {} for i, line in enumerate(reader): smiles = line[0] lines_by_smiles[smiles] = line indices_by_smiles[smiles] = i all_split_indices = [] for dataset, name in [(train_data, 'train'), (val_data, 'val'), (test_data, 'test')]: with open(os.path.join(args.save_dir, name + '_smiles.csv'), 'w') as f: writer = csv.writer(f) writer.writerow(['smiles']) for smiles in dataset.smiles(): writer.writerow([smiles]) with open(os.path.join(args.save_dir, name + '_full.csv'), 'w') as f: writer = csv.writer(f) writer.writerow(header) for smiles in dataset.smiles(): writer.writerow(lines_by_smiles[smiles]) split_indices = [] for smiles in dataset.smiles(): split_indices.append(indices_by_smiles[smiles]) split_indices = sorted(split_indices) all_split_indices.append(split_indices) with open(os.path.join(args.save_dir, 'split_indices.pckl'), 'wb') as f: pickle.dump(all_split_indices, f) if args.features_scaling: features_scaler = train_data.normalize_features(replace_nan_token=0) val_data.normalize_features(features_scaler) test_data.normalize_features(features_scaler) else: features_scaler = None args.train_data_size = len(train_data) debug( f'Total size = {len(data):,} | ' f'train size = {len(train_data):,} | val size = {len(val_data):,} | test size = {len(test_data):,}' ) # Initialize scaler and scale training targets by subtracting mean and dividing standard deviation (regression only) if args.dataset_type == 'regression': debug('Fitting scaler') train_smiles, train_targets = train_data.smiles(), train_data.targets() scaler = StandardScaler().fit(train_targets) scaled_targets = scaler.transform(train_targets).tolist() train_data.set_targets(scaled_targets) else: scaler = None # Get loss and metric functions loss_func = get_loss_func(args) metric_func = get_metric_func(metric=args.metric) # Set up test set evaluation test_smiles, test_targets = test_data.smiles(), test_data.targets() if args.dataset_type == 'multiclass': sum_test_preds = np.zeros( (len(test_smiles), args.num_tasks, args.multiclass_num_classes)) else: sum_test_preds = np.zeros((len(test_smiles), args.num_tasks)) # Train ensemble of models for model_idx in range(args.ensemble_size): # Tensorboard writer save_dir = os.path.join(args.save_dir, f'model_{model_idx}') makedirs(save_dir) try: writer = SummaryWriter(log_dir=save_dir) except: writer = SummaryWriter(logdir=save_dir) # Load/build model if args.checkpoint_paths is not None: debug( f'Loading model {model_idx} from {args.checkpoint_paths[model_idx]}' ) model = load_checkpoint(args.checkpoint_paths[model_idx], current_args=args, logger=logger) else: debug(f'Building model {model_idx}') model = build_model(args) debug(model) debug(f'Number of parameters = {param_count(model):,}') if args.cuda: debug('Moving model to cuda') model = model.cuda() # Ensure that model is saved in correct location for evaluation if 0 epochs save_checkpoint(os.path.join(save_dir, 'model.pt'), model, scaler, features_scaler, args) # Optimizers optimizer = build_optimizer(model, args) # Learning rate schedulers scheduler = build_lr_scheduler(optimizer, args) # Run training best_score = float('inf') if args.minimize_score else -float('inf') best_epoch, n_iter = 0, 0 for epoch in trange(args.epochs): debug(f'Epoch {epoch}') n_iter = train(model=model, data=train_data, loss_func=loss_func, optimizer=optimizer, scheduler=scheduler, args=args, n_iter=n_iter, logger=logger, writer=writer) if isinstance(scheduler, ExponentialLR): scheduler.step() val_scores = evaluate(model=model, data=val_data, num_tasks=args.num_tasks, metric_func=metric_func, batch_size=args.batch_size, dataset_type=args.dataset_type, scaler=scaler, logger=logger) # Average validation score avg_val_score = np.nanmean(val_scores) debug(f'Validation {args.metric} = {avg_val_score:.6f}') writer.add_scalar(f'validation_{args.metric}', avg_val_score, n_iter) if args.show_individual_scores: # Individual validation scores for task_name, val_score in zip(args.task_names, val_scores): debug( f'Validation {task_name} {args.metric} = {val_score:.6f}' ) writer.add_scalar(f'validation_{task_name}_{args.metric}', val_score, n_iter) # Save model checkpoint if improved validation score if args.minimize_score and avg_val_score < best_score or \ not args.minimize_score and avg_val_score > best_score: best_score, best_epoch = avg_val_score, epoch save_checkpoint(os.path.join(save_dir, 'model.pt'), model, scaler, features_scaler, args) # Evaluate on test set using model with best validation score info( f'Model {model_idx} best validation {args.metric} = {best_score:.6f} on epoch {best_epoch}' ) model = load_checkpoint(os.path.join(save_dir, 'model.pt'), cuda=args.cuda, logger=logger) test_preds = predict(model=model, data=test_data, batch_size=args.batch_size, scaler=scaler) test_scores = evaluate_predictions(preds=test_preds, targets=test_targets, num_tasks=args.num_tasks, metric_func=metric_func, dataset_type=args.dataset_type, logger=logger) if len(test_preds) != 0: sum_test_preds += np.array(test_preds) # Average test score avg_test_score = np.nanmean(test_scores) info(f'Model {model_idx} test {args.metric} = {avg_test_score:.6f}') writer.add_scalar(f'test_{args.metric}', avg_test_score, 0) if args.show_individual_scores: # Individual test scores for task_name, test_score in zip(args.task_names, test_scores): info( f'Model {model_idx} test {task_name} {args.metric} = {test_score:.6f}' ) writer.add_scalar(f'test_{task_name}_{args.metric}', test_score, n_iter) # Evaluate ensemble on test set avg_test_preds = (sum_test_preds / args.ensemble_size).tolist() ensemble_scores = evaluate_predictions(preds=avg_test_preds, targets=test_targets, num_tasks=args.num_tasks, metric_func=metric_func, dataset_type=args.dataset_type, logger=logger) # Average ensemble score avg_ensemble_test_score = np.nanmean(ensemble_scores) info(f'Ensemble test {args.metric} = {avg_ensemble_test_score:.6f}') writer.add_scalar(f'ensemble_test_{args.metric}', avg_ensemble_test_score, 0) # Individual ensemble scores if args.show_individual_scores: for task_name, ensemble_score in zip(args.task_names, ensemble_scores): info( f'Ensemble test {task_name} {args.metric} = {ensemble_score:.6f}' ) return ensemble_scores