def load_checkpoint(path: str, current_args: Namespace = None, cuda: bool = None, logger: logging.Logger = None) -> MoleculeModel: """ Loads a model checkpoint. :param path: Path where checkpoint is saved. :param current_args: The current arguments. Replaces the arguments loaded from the checkpoint if provided. :param cuda: Whether to move model to cuda. :param logger: A logger. :return: The loaded MoleculeModel. """ debug = logger.debug if logger is not None else print # Load model and args state = torch.load(path, map_location=lambda storage, loc: storage) args, loaded_state_dict = state['args'], state['state_dict'] if current_args is not None: args = current_args args.cuda = cuda if cuda is not None else args.cuda if hasattr(args, 'ops') is False: # if doesn't exist, then old model version with concat only args.ops = 'concat' # Build model model = build_model(args) model_state_dict = model.state_dict() # Skip missing parameters and parameters of mismatched size pretrained_state_dict = {} for param_name in loaded_state_dict.keys(): if param_name not in model_state_dict: debug(f'Pretrained parameter "{param_name}" cannot be found in model parameters.') elif model_state_dict[param_name].shape != loaded_state_dict[param_name].shape: debug(f'Pretrained parameter "{param_name}" ' f'of shape {loaded_state_dict[param_name].shape} does not match corresponding ' f'model parameter of shape {model_state_dict[param_name].shape}.') else: debug(f'Loading pretrained parameter "{param_name}".') pretrained_state_dict[param_name] = loaded_state_dict[param_name] # Load pretrained weights model_state_dict.update(pretrained_state_dict) model.load_state_dict(model_state_dict) if cuda: debug('Moving model to cuda') model = model.cuda() return model
def objective(hyperparams: Dict[str, Union[int, float]]) -> float: # Convert hyperparams from float to int when necessary for key in INT_KEYS: hyperparams[key] = int(hyperparams[key]) # Update args with hyperparams hyper_args = deepcopy(args) if args.save_dir is not None: folder_name = '_'.join( [f'{key}_{value}' for key, value in hyperparams.items()]) hyper_args.save_dir = os.path.join(hyper_args.save_dir, folder_name) for key, value in hyperparams.items(): setattr(hyper_args, key, value) logger.info(hyperparams) # Train avg_test_score, avg_test_accuracy = run_training( hyper_args, train_logger) # Record results temp_model = build_model(hyper_args) num_params = param_count(temp_model) logger.info(f'num params: {num_params:,}') logger.info(f'{avg_test_score} {hyper_args.metric}') logger.info(f'{avg_test_accuracy}' + ' accuracy') results.append({ 'avg_test_score': avg_test_score, 'avg_test_accuracy': avg_test_accuracy, 'hyperparams': hyperparams, 'num_params': num_params }) # Deal with nan if np.isnan(avg_test_score): if hyper_args.dataset_type == 'classification': avg_test_score = 0 else: raise ValueError( 'Can\'t handle nan score for non-classification dataset.') return (1 if hyper_args.minimize_score else -1) * avg_test_score
def objective(hyperparams: Dict[str, Union[int, float]]) -> float: # Convert hyperparams from float to int when necessary for key in INT_KEYS: hyperparams[key] = int(hyperparams[key]) # Update args with hyperparams hyper_args = deepcopy(args) if args.save_dir is not None: folder_name = '_'.join([ f'{key}_{value}' if key in INT_KEYS else f'{key}_{value}' for key, value in hyperparams.items() ]) hyper_args.save_dir = os.path.join(hyper_args.save_dir, folder_name) for key, value in hyperparams.items(): setattr(hyper_args, key, value) # Record hyperparameters logger.info(hyperparams) # Cross validate mean_score, std_score = cross_validate(hyper_args, TRAIN_LOGGER) # Record results temp_model = build_model(hyper_args) num_params = param_count(temp_model) logger.info(f'num params: {num_params:,}') logger.info(f'{mean_score} +/- {std_score} {hyper_args.metric}') results.append({ 'mean_score': mean_score, 'std_score': std_score, 'hyperparams': hyperparams, 'num_params': num_params }) # Deal with nan if np.isnan(mean_score): if hyper_args.dataset_type == 'classification': mean_score = 0 else: raise ValueError( 'Can\'t handle nan score for non-classification dataset.') return (1 if hyper_args.minimize_score else -1) * mean_score
def objective(hyperparams: Dict[str, Union[int, float]]) -> float: # Convert hyperparms from float to int when necessary for key in INT_KEYS: hyperparams[key] = int(hyperparams[key]) # Copy args gs_args = deepcopy(dataset_args) for key, value in hyperparams.items(): setattr(gs_args, key, value) # Record hyperparameters logger.info(hyperparams) # Cross validate mean_score, std_score = cross_validate(gs_args, TRAIN_LOGGER) # Record results temp_model = build_model(gs_args) num_params = param_count(temp_model) logger.info('num params: {:,}'.format(num_params)) logger.info('{} +/- {} {}'.format(mean_score, std_score, metric)) results.append({ 'mean_score': mean_score, 'std_score': std_score, 'hyperparams': hyperparams, 'num_params': num_params }) # Deal with nan if np.isnan(mean_score): if gs_args.dataset_type == 'classification': mean_score = 0 else: raise ValueError( 'Can\'t handle nan score for non-classification dataset.' ) return (1 if gs_args.minimize_score else -1) * mean_score
def run_training(args: TrainArgs, logger: Logger = None) -> List[float]: """ Trains a model and returns test scores on the model checkpoint with the highest validation score. :param args: Arguments. :param logger: Logger. :return: A list of ensemble scores for each task. """ if logger is not None: debug, info = logger.debug, logger.info else: debug = info = print # Print command line debug('Command line') debug(f'python {" ".join(sys.argv)}') # Print args debug('Args') debug(args) # Save args args.save(os.path.join(args.save_dir, 'args.json')) # Get data debug('Loading data') args.task_names = args.target_columns or get_task_names(args.data_path) data = get_data(path=args.data_path, args=args, logger=logger) args.num_tasks = data.num_tasks() args.features_size = data.features_size() debug(f'Number of tasks = {args.num_tasks}') # Split data debug(f'Splitting data with seed {args.seed}') if args.separate_test_path: test_data = get_data(path=args.separate_test_path, args=args, features_path=args.separate_test_features_path, logger=logger) if args.separate_val_path: val_data = get_data(path=args.separate_val_path, args=args, features_path=args.separate_val_features_path, logger=logger) if args.separate_val_path and args.separate_test_path: train_data = data elif args.separate_val_path: train_data, _, test_data = split_data(data=data, split_type=args.split_type, sizes=(0.8, 0.0, 0.2), seed=args.seed, args=args, logger=logger) elif args.separate_test_path: train_data, val_data, _ = split_data(data=data, split_type=args.split_type, sizes=(0.8, 0.2, 0.0), seed=args.seed, args=args, logger=logger) else: train_data, val_data, test_data = split_data(data=data, split_type=args.split_type, sizes=args.split_sizes, seed=args.seed, args=args, logger=logger) if args.dataset_type == 'classification': class_sizes = get_class_sizes(data) debug('Class sizes') for i, task_class_sizes in enumerate(class_sizes): debug(f'{args.task_names[i]} ' f'{", ".join(f"{cls}: {size * 100:.2f}%" for cls, size in enumerate(task_class_sizes))}') if args.save_smiles_splits: with open(args.data_path, 'r') as f: reader = csv.reader(f) header = next(reader) lines_by_smiles = {} indices_by_smiles = {} for i, line in enumerate(reader): smiles = line[0] lines_by_smiles[smiles] = line indices_by_smiles[smiles] = i all_split_indices = [] for dataset, name in [(train_data, 'train'), (val_data, 'val'), (test_data, 'test')]: with open(os.path.join(args.save_dir, name + '_smiles.csv'), 'w') as f: writer = csv.writer(f) writer.writerow(['smiles']) for smiles in dataset.smiles(): writer.writerow([smiles]) with open(os.path.join(args.save_dir, name + '_full.csv'), 'w') as f: writer = csv.writer(f) writer.writerow(header) for smiles in dataset.smiles(): writer.writerow(lines_by_smiles[smiles]) split_indices = [] for smiles in dataset.smiles(): split_indices.append(indices_by_smiles[smiles]) split_indices = sorted(split_indices) all_split_indices.append(split_indices) with open(os.path.join(args.save_dir, 'split_indices.pckl'), 'wb') as f: pickle.dump(all_split_indices, f) if args.features_scaling: features_scaler = train_data.normalize_features(replace_nan_token=0) val_data.normalize_features(features_scaler) test_data.normalize_features(features_scaler) else: features_scaler = None args.train_data_size = len(train_data) debug(f'Total size = {len(data):,} | ' f'train size = {len(train_data):,} | val size = {len(val_data):,} | test size = {len(test_data):,}') # Initialize scaler and scale training targets by subtracting mean and dividing standard deviation (regression only) if args.dataset_type == 'regression': debug('Fitting scaler') train_smiles, train_targets = train_data.smiles(), train_data.targets() scaler = StandardScaler().fit(train_targets) scaled_targets = scaler.transform(train_targets).tolist() train_data.set_targets(scaled_targets) else: scaler = None # Get loss and metric functions loss_func = get_loss_func(args) metric_func = get_metric_func(metric=args.metric) # Set up test set evaluation test_smiles, test_targets = test_data.smiles(), test_data.targets() if args.dataset_type == 'multiclass': sum_test_preds = np.zeros((len(test_smiles), args.num_tasks, args.multiclass_num_classes)) else: sum_test_preds = np.zeros((len(test_smiles), args.num_tasks)) # Automatically determine whether to cache if len(data) <= args.cache_cutoff: cache = True num_workers = 0 else: cache = False num_workers = args.num_workers # Create data loaders train_data_loader = MoleculeDataLoader( dataset=train_data, batch_size=args.batch_size, num_workers=num_workers, cache=cache, class_balance=args.class_balance, shuffle=True, seed=args.seed ) val_data_loader = MoleculeDataLoader( dataset=val_data, batch_size=args.batch_size, num_workers=num_workers, cache=cache, seed=args.seed ) test_data_loader = MoleculeDataLoader( dataset=test_data, batch_size=args.batch_size, num_workers=num_workers, cache=cache, seed=args.seed ) # Train ensemble of models for model_idx in range(args.ensemble_size): # Tensorboard writer save_dir = os.path.join(args.save_dir, f'model_{model_idx}') makedirs(save_dir) try: writer = SummaryWriter(log_dir=save_dir) except: writer = SummaryWriter(logdir=save_dir) # Load/build model if args.checkpoint_paths is not None: debug(f'Loading model {model_idx} from {args.checkpoint_paths[model_idx]}') model = load_checkpoint(args.checkpoint_paths[model_idx], logger=logger) else: debug(f'Building model {model_idx}') model = build_model(args) debug(model) debug(f'Number of parameters = {param_count(model):,}') if args.cuda: debug('Moving model to cuda') model = model.to(args.device) # Ensure that model is saved in correct location for evaluation if 0 epochs save_checkpoint(os.path.join(save_dir, 'model.pt'), model, scaler, features_scaler, args) # Optimizers optimizer = build_optimizer(model, args) # Learning rate schedulers scheduler = build_lr_scheduler(optimizer, args) # Run training best_score = float('inf') if args.minimize_score else -float('inf') best_epoch, n_iter = 0, 0 for epoch in trange(args.epochs): debug(f'Epoch {epoch}') n_iter = train( model=model, data_loader=train_data_loader, loss_func=loss_func, optimizer=optimizer, scheduler=scheduler, args=args, n_iter=n_iter, logger=logger, writer=writer ) if isinstance(scheduler, ExponentialLR): scheduler.step() val_scores = evaluate( model=model, data_loader=val_data_loader, num_tasks=args.num_tasks, metric_func=metric_func, dataset_type=args.dataset_type, scaler=scaler, logger=logger ) # Average validation score avg_val_score = np.nanmean(val_scores) debug(f'Validation {args.metric} = {avg_val_score:.6f}') writer.add_scalar(f'validation_{args.metric}', avg_val_score, n_iter) if args.show_individual_scores: # Individual validation scores for task_name, val_score in zip(args.task_names, val_scores): debug(f'Validation {task_name} {args.metric} = {val_score:.6f}') writer.add_scalar(f'validation_{task_name}_{args.metric}', val_score, n_iter) # Save model checkpoint if improved validation score if args.minimize_score and avg_val_score < best_score or \ not args.minimize_score and avg_val_score > best_score: best_score, best_epoch = avg_val_score, epoch save_checkpoint(os.path.join(save_dir, 'model.pt'), model, scaler, features_scaler, args) # Evaluate on test set using model with best validation score info(f'Model {model_idx} best validation {args.metric} = {best_score:.6f} on epoch {best_epoch}') model = load_checkpoint(os.path.join(save_dir, 'model.pt'), device=args.device, logger=logger) test_preds = predict( model=model, data_loader=test_data_loader, scaler=scaler ) test_scores = evaluate_predictions( preds=test_preds, targets=test_targets, num_tasks=args.num_tasks, metric_func=metric_func, dataset_type=args.dataset_type, logger=logger ) if len(test_preds) != 0: sum_test_preds += np.array(test_preds) # Average test score avg_test_score = np.nanmean(test_scores) info(f'Model {model_idx} test {args.metric} = {avg_test_score:.6f}') writer.add_scalar(f'test_{args.metric}', avg_test_score, 0) if args.show_individual_scores: # Individual test scores for task_name, test_score in zip(args.task_names, test_scores): info(f'Model {model_idx} test {task_name} {args.metric} = {test_score:.6f}') writer.add_scalar(f'test_{task_name}_{args.metric}', test_score, n_iter) # Evaluate ensemble on test set avg_test_preds = (sum_test_preds / args.ensemble_size).tolist() ensemble_scores = evaluate_predictions( preds=avg_test_preds, targets=test_targets, num_tasks=args.num_tasks, metric_func=metric_func, dataset_type=args.dataset_type, logger=logger ) # Average ensemble score avg_ensemble_test_score = np.nanmean(ensemble_scores) info(f'Ensemble test {args.metric} = {avg_ensemble_test_score:.6f}') writer.add_scalar(f'ensemble_test_{args.metric}', avg_ensemble_test_score, 0) # Individual ensemble scores if args.show_individual_scores: for task_name, ensemble_score in zip(args.task_names, ensemble_scores): info(f'Ensemble test {task_name} {args.metric} = {ensemble_score:.6f}') return ensemble_scores
def predict(model: nn.Module, data: MoleculeDataset, args: Namespace, scaler: StandardScaler = None, bert_save_memory: bool = False, logger: logging.Logger = None) -> List[List[float]]: """ Makes predictions on a dataset using an ensemble of models. :param model: A model. :param data: A MoleculeDataset. :param args: Arguments. :param scaler: A StandardScaler object fit on the training targets. :param bert_save_memory: Store unused predictions as None to avoid unnecessary memory use. :param logger: Logger. :return: A list of lists of predictions. The outer list is examples while the inner list is tasks. """ model.eval() preds = [] if args.dataset_type == 'bert_pretraining': features_preds = [] if args.maml: num_iters, iter_step = data.num_tasks() * args.maml_batches_per_epoch, 1 full_targets = [] else: num_iters, iter_step = len(data), args.batch_size if args.parallel_featurization: batch_queue = Queue(args.batch_queue_max_size) exit_queue = Queue(1) batch_process = Process(target=async_mol2graph, args=(batch_queue, data, args, num_iters, iter_step, exit_queue, True)) batch_process.start() currently_loaded_batches = [] for i in trange(0, num_iters, iter_step): if args.maml: task_train_data, task_test_data, task_idx = data.sample_maml_task(args, seed=0) mol_batch = task_test_data smiles_batch, features_batch, targets_batch = task_train_data.smiles(), task_train_data.features(), task_train_data.targets(task_idx) targets = torch.Tensor(targets_batch).unsqueeze(1) if args.cuda: targets = targets.cuda() else: # Prepare batch if args.parallel_featurization: if len(currently_loaded_batches) == 0: currently_loaded_batches = batch_queue.get() mol_batch, featurized_mol_batch = currently_loaded_batches.pop(0) else: mol_batch = MoleculeDataset(data[i:i + args.batch_size]) smiles_batch, features_batch = mol_batch.smiles(), mol_batch.features() # Run model if args.dataset_type == 'bert_pretraining': batch = mol2graph(smiles_batch, args) batch.bert_mask(mol_batch.mask()) else: batch = smiles_batch if args.maml: # TODO refactor with train loop model.zero_grad() intermediate_preds = model(batch, features_batch) loss = get_loss_func(args)(intermediate_preds, targets) loss = loss.sum() / len(batch) grad = torch.autograd.grad(loss, [p for p in model.parameters() if p.requires_grad]) theta = [p for p in model.named_parameters() if p[1].requires_grad] # comes in same order as grad theta_prime = {p[0]: p[1] - args.maml_lr * grad[i] for i, p in enumerate(theta)} for name, nongrad_param in [p for p in model.named_parameters() if not p[1].requires_grad]: theta_prime[name] = nongrad_param + torch.zeros(nongrad_param.size()).to(nongrad_param) model_prime = build_model(args=args, params=theta_prime) smiles_batch, features_batch, targets_batch = task_test_data.smiles(), task_test_data.features(), task_test_data.targets(task_idx) # no mask since we only picked data points that have the desired target with torch.no_grad(): batch_preds = model_prime(smiles_batch, features_batch) full_targets.extend([[t] for t in targets_batch]) else: with torch.no_grad(): if args.parallel_featurization: previous_graph_input_mode = model.encoder.graph_input model.encoder.graph_input = True # force model to accept already processed input batch_preds = model(featurized_mol_batch, features_batch) model.encoder.graph_input = previous_graph_input_mode else: batch_preds = model(batch, features_batch) if args.dataset_type == 'bert_pretraining': if batch_preds['features'] is not None: features_preds.extend(batch_preds['features'].data.cpu().numpy()) batch_preds = batch_preds['vocab'] if args.dataset_type == 'kernel': batch_preds = batch_preds.view(int(batch_preds.size(0)/2), 2, batch_preds.size(1)) batch_preds = model.kernel_output_layer(batch_preds) batch_preds = batch_preds.data.cpu().numpy() if scaler is not None: batch_preds = scaler.inverse_transform(batch_preds) if args.dataset_type == 'regression_with_binning': batch_preds = batch_preds.reshape((batch_preds.shape[0], args.num_tasks, args.num_bins)) indices = np.argmax(batch_preds, axis=2) preds.extend(indices.tolist()) else: batch_preds = batch_preds.tolist() if args.dataset_type == 'bert_pretraining' and bert_save_memory: for atom_idx, mask_val in enumerate(mol_batch.mask()): if mask_val != 0: batch_preds[atom_idx] = None # not going to predict, so save some memory when passing around preds.extend(batch_preds) if args.dataset_type == 'regression_with_binning': preds = args.bin_predictions[np.array(preds)].tolist() if args.dataset_type == 'bert_pretraining': preds = { 'features': features_preds if len(features_preds) > 0 else None, 'vocab': preds } if args.parallel_featurization: exit_queue.put(0) # dummy var to get the subprocess to know that we're done batch_process.join() if args.maml: # return the task targets here to guarantee alignment; # there's probably no reasonable scenario where we'd use MAML directly to predict something that's actually unknown return preds, full_targets return preds
def train(model: nn.Module, data: Union[MoleculeDataset, List[MoleculeDataset]], loss_func: Callable, optimizer: Optimizer, scheduler: _LRScheduler, args: Namespace, n_iter: int = 0, logger: logging.Logger = None, writer: SummaryWriter = None, chunk_names: bool = False, val_smiles: List[str] = None, test_smiles: List[str] = None) -> int: """ Trains a model for an epoch. :param model: Model. :param data: A MoleculeDataset (or a list of MoleculeDatasets if using moe). :param loss_func: Loss function. :param optimizer: An Optimizer. :param scheduler: A learning rate scheduler. :param args: Arguments. :param n_iter: The number of iterations (training examples) trained on so far. :param logger: A logger for printing intermediate results. :param writer: A tensorboardX SummaryWriter. :param chunk_names: Whether to train on the data in chunks. In this case, data must be a list of paths to the data chunks. :param val_smiles: Validation smiles strings without targets. :param test_smiles: Test smiles strings without targets, used for adversarial setting. :return: The total number of iterations (training examples) trained on so far. """ debug = logger.debug if logger is not None else print model.train() if args.dataset_type == 'bert_pretraining': features_loss = nn.MSELoss() if chunk_names: for path, memo_path in tqdm(data, total=len(data)): featurization.SMILES_TO_FEATURES = dict() if os.path.isfile(memo_path): found_memo = True with open(memo_path, 'rb') as f: featurization.SMILES_TO_FEATURES = pickle.load(f) else: found_memo = False with open(path, 'rb') as f: chunk = pickle.load(f) if args.moe: for source in chunk: source.shuffle() else: chunk.shuffle() n_iter = train(model=model, data=chunk, loss_func=loss_func, optimizer=optimizer, scheduler=scheduler, args=args, n_iter=n_iter, logger=logger, writer=writer, chunk_names=False, val_smiles=val_smiles, test_smiles=test_smiles) if not found_memo: with open(memo_path, 'wb') as f: pickle.dump(featurization.SMILES_TO_GRAPH, f, protocol=pickle.HIGHEST_PROTOCOL) return n_iter if not args.moe: data.shuffle() loss_sum, iter_count = 0, 0 if args.adversarial: if args.moe: train_smiles = [] for d in data: train_smiles += d.smiles() else: train_smiles = data.smiles() train_val_smiles = train_smiles + val_smiles d_loss_sum, g_loss_sum, gp_norm_sum = 0, 0, 0 if args.moe: test_smiles = list(test_smiles) random.shuffle(test_smiles) train_smiles = [] for d in data: d.shuffle() train_smiles.append(d.smiles()) num_iters = min(len(test_smiles), min([len(d) for d in data])) elif args.maml: num_iters = args.maml_batches_per_epoch * args.maml_batch_size model.zero_grad() maml_sum_loss = 0 else: num_iters = len(data) if args.last_batch else len( data) // args.batch_size * args.batch_size if args.parallel_featurization: batch_queue = Queue(args.batch_queue_max_size) exit_queue = Queue(1) batch_process = Process(target=async_mol2graph, args=(batch_queue, data, args, num_iters, args.batch_size, exit_queue, args.last_batch)) batch_process.start() currently_loaded_batches = [] iter_size = 1 if args.maml else args.batch_size for i in trange(0, num_iters, iter_size): if args.moe: if not args.batch_domain_encs: model.compute_domain_encs( train_smiles) # want to recompute every batch mol_batch = [ MoleculeDataset(d[i:i + args.batch_size]) for d in data ] train_batch, train_targets = [], [] for b in mol_batch: tb, tt = b.smiles(), b.targets() train_batch.append(tb) train_targets.append(tt) test_batch = test_smiles[i:i + args.batch_size] loss = model.compute_loss(train_batch, train_targets, test_batch) model.zero_grad() loss_sum += loss.item() iter_count += len(mol_batch) elif args.maml: task_train_data, task_test_data, task_idx = data.sample_maml_task( args) mol_batch = task_test_data smiles_batch, features_batch, target_batch = task_train_data.smiles( ), task_train_data.features(), task_train_data.targets(task_idx) # no mask since we only picked data points that have the desired target targets = torch.Tensor(target_batch).unsqueeze(1) if next(model.parameters()).is_cuda: targets = targets.cuda() preds = model(smiles_batch, features_batch) loss = loss_func(preds, targets) loss = loss.sum() / len(smiles_batch) grad = torch.autograd.grad( loss, [p for p in model.parameters() if p.requires_grad]) theta = [ p for p in model.named_parameters() if p[1].requires_grad ] # comes in same order as grad theta_prime = { p[0]: p[1] - args.maml_lr * grad[i] for i, p in enumerate(theta) } for name, nongrad_param in [ p for p in model.named_parameters() if not p[1].requires_grad ]: theta_prime[name] = nongrad_param + torch.zeros( nongrad_param.size()).to(nongrad_param) else: # Prepare batch if args.parallel_featurization: if len(currently_loaded_batches) == 0: currently_loaded_batches = batch_queue.get() mol_batch, featurized_mol_batch = currently_loaded_batches.pop( ) else: if not args.last_batch and i + args.batch_size > len(data): break mol_batch = MoleculeDataset(data[i:i + args.batch_size]) smiles_batch, features_batch, target_batch = mol_batch.smiles( ), mol_batch.features(), mol_batch.targets() if args.dataset_type == 'bert_pretraining': batch = mol2graph(smiles_batch, args) mask = mol_batch.mask() batch.bert_mask(mask) mask = 1 - torch.FloatTensor(mask) # num_atoms features_targets = torch.FloatTensor( target_batch['features'] ) if target_batch[ 'features'] is not None else None # num_molecules x features_size targets = torch.FloatTensor(target_batch['vocab']) # num_atoms if args.bert_vocab_func == 'feature_vector': mask = mask.reshape(-1, 1) else: targets = targets.long() else: batch = smiles_batch mask = torch.Tensor([[x is not None for x in tb] for tb in target_batch]) targets = torch.Tensor([[0 if x is None else x for x in tb] for tb in target_batch]) if next(model.parameters()).is_cuda: mask, targets = mask.cuda(), targets.cuda() if args.dataset_type == 'bert_pretraining' and features_targets is not None: features_targets = features_targets.cuda() if args.class_balance: class_weights = [] for task_num in range(data.num_tasks()): class_weights.append( args.class_weights[task_num][targets[:, task_num].long()]) class_weights = torch.stack( class_weights).t() # num_molecules x num_tasks else: class_weights = torch.ones(targets.shape) if args.cuda: class_weights = class_weights.cuda() # Run model model.zero_grad() if args.parallel_featurization: previous_graph_input_mode = model.encoder.graph_input model.encoder.graph_input = True # force model to accept already processed input preds = model(featurized_mol_batch, features_batch) model.encoder.graph_input = previous_graph_input_mode else: preds = model(batch, features_batch) if args.dataset_type == 'regression_with_binning': preds = preds.view(targets.size(0), targets.size(1), -1) targets = targets.long() loss = 0 for task in range(targets.size(1)): loss += loss_func( preds[:, task, :], targets[:, task] ) * class_weights[:, task] * mask[:, task] # for some reason cross entropy doesn't support multi target loss = loss.sum() / mask.sum() else: if args.dataset_type == 'unsupervised': targets = targets.long().reshape(-1) if args.dataset_type == 'bert_pretraining': features_preds, preds = preds['features'], preds['vocab'] if args.dataset_type == 'kernel': preds = preds.view(int(preds.size(0) / 2), 2, preds.size(1)) preds = model.kernel_output_layer(preds) loss = loss_func(preds, targets) * class_weights * mask if args.predict_features_and_task: loss = (loss.sum() + loss[:, :-args.features_size].sum() * (args.task_weight-1)) \ / (mask.sum() + mask[:, :-args.features_size].sum() * (args.task_weight-1)) else: loss = loss.sum() / mask.sum() if args.dataset_type == 'bert_pretraining' and features_targets is not None: loss += features_loss(features_preds, features_targets) loss_sum += loss.item() iter_count += len(mol_batch) if args.maml: model_prime = build_model(args=args, params=theta_prime) smiles_batch, features_batch, target_batch = task_test_data.smiles( ), task_test_data.features(), [ t[task_idx] for t in task_test_data.targets() ] # no mask since we only picked data points that have the desired target targets = torch.Tensor([[t] for t in target_batch]) if next(model_prime.parameters()).is_cuda: targets = targets.cuda() model_prime.zero_grad() preds = model_prime(smiles_batch, features_batch) loss = loss_func(preds, targets) loss = loss.sum() / len(smiles_batch) loss_sum += loss.item() iter_count += len( smiles_batch ) # TODO check that this makes sense, but it's just for display maml_sum_loss += loss if i % args.maml_batch_size == args.maml_batch_size - 1: maml_sum_loss.backward() optimizer.step() model.zero_grad() maml_sum_loss = 0 else: loss.backward() if args.max_grad_norm is not None: clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() if args.adjust_weight_decay: current_pnorm = compute_pnorm(model) if current_pnorm < args.pnorm_target: for i in range(len(optimizer.param_groups)): optimizer.param_groups[i]['weight_decay'] = max( 0, optimizer.param_groups[i]['weight_decay'] - args.adjust_weight_decay_step) else: for i in range(len(optimizer.param_groups)): optimizer.param_groups[i][ 'weight_decay'] += args.adjust_weight_decay_step if isinstance(scheduler, NoamLR): scheduler.step() if args.adversarial: for _ in range(args.gan_d_per_g): train_val_smiles_batch = random.sample(train_val_smiles, args.batch_size) test_smiles_batch = random.sample(test_smiles, args.batch_size) d_loss, gp_norm = model.train_D(train_val_smiles_batch, test_smiles_batch) train_val_smiles_batch = random.sample(train_val_smiles, args.batch_size) test_smiles_batch = random.sample(test_smiles, args.batch_size) g_loss = model.train_G(train_val_smiles_batch, test_smiles_batch) # we probably only care about the g_loss honestly d_loss_sum += d_loss * args.batch_size gp_norm_sum += gp_norm * args.batch_size g_loss_sum += g_loss * args.batch_size n_iter += len(mol_batch) # Log and/or add to tensorboard if (n_iter // args.batch_size) % args.log_frequency == 0: lrs = scheduler.get_lr() pnorm = compute_pnorm(model) gnorm = compute_gnorm(model) loss_avg = loss_sum / iter_count if args.adversarial: d_loss_avg, g_loss_avg, gp_norm_avg = d_loss_sum / iter_count, g_loss_sum / iter_count, gp_norm_sum / iter_count d_loss_sum, g_loss_sum, gp_norm_sum = 0, 0, 0 loss_sum, iter_count = 0, 0 lrs_str = ', '.join('lr_{} = {:.4e}'.format(i, lr) for i, lr in enumerate(lrs)) debug("Loss = {:.4e}, PNorm = {:.4f}, GNorm = {:.4f}, {}".format( loss_avg, pnorm, gnorm, lrs_str)) if args.adversarial: debug( "D Loss = {:.4e}, G Loss = {:.4e}, GP Norm = {:.4}".format( d_loss_avg, g_loss_avg, gp_norm_avg)) if writer is not None: writer.add_scalar('train_loss', loss_avg, n_iter) writer.add_scalar('param_norm', pnorm, n_iter) writer.add_scalar('gradient_norm', gnorm, n_iter) for i, lr in enumerate(lrs): writer.add_scalar('learning_rate_{}'.format(i), lr, n_iter) if args.parallel_featurization: exit_queue.put( 0) # dummy var to get the subprocess to know that we're done batch_process.join() return n_iter
def load_checkpoint(path: str, current_args: Namespace = None, cuda: bool = False, logger: logging.Logger = None) -> nn.Module: """ Loads a model checkpoint. :param path: Path where checkpoint is saved. :param current_args: The current arguments. Replaces the arguments loaded from the checkpoint if provided. :param cuda: Whether to move model to cuda. :param logger: A logger. :return: The loaded model. """ debug = logger.debug if logger is not None else print # Load model and args state = torch.load(path, map_location=lambda storage, loc: storage) args, loaded_state_dict = state['args'], state['state_dict'] if current_args is not None: args = current_args load_encoder_only = current_args.load_encoder_only if current_args is not None else False # Build model model = build_model(args) model_state_dict = model.state_dict() # Skip missing parameters and parameters of mismatched size pretrained_state_dict = {} for param_name in loaded_state_dict.keys(): if load_encoder_only and 'encoder' not in param_name: continue if param_name not in model_state_dict: debug( 'Pretrained parameter "{}" cannot be found in model parameters.' .format(param_name)) elif model_state_dict[param_name].shape != loaded_state_dict[ param_name].shape: debug( 'Pretrained parameter "{}" of shape {} does not match corresponding ' 'model parameter of shape {}.'.format( param_name, loaded_state_dict[param_name].shape, model_state_dict[param_name].shape)) else: debug('Loading pretrained parameter "{}".'.format(param_name)) pretrained_state_dict[param_name] = loaded_state_dict[param_name] # Load pretrained weights model_state_dict.update(pretrained_state_dict) model.load_state_dict(model_state_dict) if args.moe: domain_encs = state['domain_encs'] if args.cuda: domain_encs = [encs.cuda() for encs in domain_encs] model.set_domain_encs(domain_encs) if cuda: debug('Moving model to cuda') model = model.cuda() return model
def run_training(args: Namespace, logger: Logger = None) -> List[float]: """ Trains a model and returns test scores on the model checkpoint with the highest validation score. :param args: Arguments. :param logger: Logger. :return: A list of ensemble scores for each task. """ if logger is not None: debug, info = logger.debug, logger.info else: debug = info = print # Set GPU if args.gpu is not None: torch.cuda.set_device(args.gpu) # Print args debug(pformat(vars(args))) # Get data debug('Loading data') args.task_names = get_task_names(args.data_path) desired_labels = get_desired_labels(args, args.task_names) data = get_data(path=args.data_path, args=args, logger=logger) args.num_tasks = data.num_tasks() args.features_size = data.features_size() args.real_num_tasks = args.num_tasks - args.features_size if args.predict_features else args.num_tasks debug(f'Number of tasks = {args.num_tasks}') if args.dataset_type == 'bert_pretraining': data.bert_init(args, logger) # Split data if args.dataset_type == 'regression_with_binning': # Note: for now, binning based on whole dataset, not just training set data, bin_predictions, regression_data = data args.bin_predictions = bin_predictions debug(f'Splitting data with seed {args.seed}') train_data, _, _ = split_data(data=data, split_type=args.split_type, sizes=args.split_sizes, seed=args.seed, args=args, logger=logger) _, val_data, test_data = split_data(regression_data, split_type=args.split_type, sizes=args.split_sizes, seed=args.seed, args=args, logger=logger) else: debug(f'Splitting data with seed {args.seed}') if args.separate_test_set: test_data = get_data(path=args.separate_test_set, args=args, features_path=args.separate_test_set_features, logger=logger) if args.separate_val_set: val_data = get_data( path=args.separate_val_set, args=args, features_path=args.separate_val_set_features, logger=logger) train_data = data # nothing to split; we already got our test and val sets else: train_data, val_data, _ = split_data( data=data, split_type=args.split_type, sizes=(0.8, 0.2, 0.0), seed=args.seed, args=args, logger=logger) else: train_data, val_data, test_data = split_data( data=data, split_type=args.split_type, sizes=args.split_sizes, seed=args.seed, args=args, logger=logger) # Optionally replace test data with train or val data if args.test_split == 'train': test_data = train_data elif args.test_split == 'val': test_data = val_data if args.dataset_type == 'classification': class_sizes = get_class_sizes(data) debug('Class sizes') for i, task_class_sizes in enumerate(class_sizes): debug( f'{args.task_names[i]} ' f'{", ".join(f"{cls}: {size * 100:.2f}%" for cls, size in enumerate(task_class_sizes))}' ) if args.class_balance: train_class_sizes = get_class_sizes(train_data) class_batch_counts = torch.Tensor( train_class_sizes) * args.batch_size args.class_weights = 1 / torch.Tensor(class_batch_counts) if args.save_smiles_splits: with open(args.data_path, 'r') as f: reader = csv.reader(f) header = next(reader) lines_by_smiles = {} indices_by_smiles = {} for i, line in enumerate(reader): smiles = line[0] lines_by_smiles[smiles] = line indices_by_smiles[smiles] = i all_split_indices = [] for dataset, name in [(train_data, 'train'), (val_data, 'val'), (test_data, 'test')]: with open(os.path.join(args.save_dir, name + '_smiles.csv'), 'w') as f: writer = csv.writer(f) writer.writerow(['smiles']) for smiles in dataset.smiles(): writer.writerow([smiles]) with open(os.path.join(args.save_dir, name + '_full.csv'), 'w') as f: writer = csv.writer(f) writer.writerow(header) for smiles in dataset.smiles(): writer.writerow(lines_by_smiles[smiles]) split_indices = [] for smiles in dataset.smiles(): split_indices.append(indices_by_smiles[smiles]) split_indices = sorted(split_indices) all_split_indices.append(split_indices) with open(os.path.join(args.save_dir, 'split_indices.pckl'), 'wb') as f: pickle.dump(all_split_indices, f) return [1 for _ in range(args.num_tasks) ] # short circuit out when just generating splits if args.features_scaling: features_scaler = train_data.normalize_features( replace_nan_token=None if args.predict_features else 0) val_data.normalize_features(features_scaler) test_data.normalize_features(features_scaler) else: features_scaler = None args.train_data_size = len( train_data ) if args.prespecified_chunk_dir is None else args.prespecified_chunks_max_examples_per_epoch if args.adversarial or args.moe: val_smiles, test_smiles = val_data.smiles(), test_data.smiles() debug( f'Total size = {len(data):,} | ' f'train size = {len(train_data):,} | val size = {len(val_data):,} | test size = {len(test_data):,}' ) # Optionally truncate outlier values if args.truncate_outliers: print('Truncating outliers in train set') train_data = truncate_outliers(train_data) # Initialize scaler and scale training targets by subtracting mean and dividing standard deviation (regression only) if args.dataset_type == 'regression' and args.target_scaling: debug('Fitting scaler') train_smiles, train_targets = train_data.smiles(), train_data.targets() scaler = StandardScaler().fit(train_targets) scaled_targets = scaler.transform(train_targets).tolist() train_data.set_targets(scaled_targets) else: scaler = None if args.moe: train_data = cluster_split(train_data, args.num_sources, args.cluster_max_ratio, seed=args.cluster_split_seed, logger=logger) # Chunk training data if too large to load in memory all at once if args.num_chunks > 1: os.makedirs(args.chunk_temp_dir, exist_ok=True) train_paths = [] if args.moe: chunked_sources = [td.chunk(args.num_chunks) for td in train_data] chunks = [] for i in range(args.num_chunks): chunks.append([source[i] for source in chunked_sources]) else: chunks = train_data.chunk(args.num_chunks) for i in range(args.num_chunks): chunk_path = os.path.join(args.chunk_temp_dir, str(i) + '.txt') memo_path = os.path.join(args.chunk_temp_dir, 'memo' + str(i) + '.txt') with open(chunk_path, 'wb') as f: pickle.dump(chunks[i], f) train_paths.append((chunk_path, memo_path)) train_data = train_paths # Get loss and metric functions loss_func = get_loss_func(args) metric_func = get_metric_func(metric=args.metric, args=args) # Set up test set evaluation test_smiles, test_targets = test_data.smiles(), test_data.targets() if args.maml: # TODO refactor test_targets = [] for task_idx in range(len(data.data[0].targets)): _, task_test_data, _ = test_data.sample_maml_task(args, seed=0) test_targets += task_test_data.targets() if args.dataset_type == 'bert_pretraining': sum_test_preds = { 'features': np.zeros((len(test_smiles), args.features_size)) if args.features_size is not None else None, 'vocab': np.zeros((len(test_targets['vocab']), args.vocab.output_size)) } elif args.dataset_type == 'kernel': sum_test_preds = np.zeros((len(test_targets), args.num_tasks)) else: sum_test_preds = np.zeros((len(test_smiles), args.num_tasks)) if args.maml: sum_test_preds = None # annoying to determine exact size; will initialize later if args.dataset_type == 'bert_pretraining': # Only predict targets that are masked out test_targets['vocab'] = [ target if mask == 0 else None for target, mask in zip(test_targets['vocab'], test_data.mask()) ] # Train ensemble of models for model_idx in range(args.ensemble_size): # Tensorboard writer save_dir = os.path.join(args.save_dir, f'model_{model_idx}') os.makedirs(save_dir, exist_ok=True) writer = SummaryWriter(log_dir=save_dir) # Load/build model if args.checkpoint_paths is not None: debug( f'Loading model {model_idx} from {args.checkpoint_paths[model_idx]}' ) model = load_checkpoint(args.checkpoint_paths[model_idx], current_args=args, logger=logger) else: debug(f'Building model {model_idx}') model = build_model(args) debug(model) debug(f'Number of parameters = {param_count(model):,}') if args.cuda: debug('Moving model to cuda') model = model.cuda() # Ensure that model is saved in correct location for evaluation if 0 epochs save_checkpoint(os.path.join(save_dir, 'model.pt'), model, scaler, features_scaler, args) if args.adjust_weight_decay: args.pnorm_target = compute_pnorm(model) # Optimizers optimizer = build_optimizer(model, args) # Learning rate schedulers scheduler = build_lr_scheduler(optimizer, args) # Run training best_score = float('inf') if args.minimize_score else -float('inf') best_epoch, n_iter = 0, 0 for epoch in trange(args.epochs): debug(f'Epoch {epoch}') if args.prespecified_chunk_dir is not None: # load some different random chunks each epoch train_data, val_data = load_prespecified_chunks(args, logger) debug('Loaded prespecified chunks for epoch') if args.dataset_type == 'unsupervised': # won't work with moe full_data = MoleculeDataset(train_data.data + val_data.data) generate_unsupervised_cluster_labels( build_model(args), full_data, args) # cluster with a new random init model.create_ffn( args ) # reset the ffn since we're changing targets-- we're just pretraining the encoder. optimizer.param_groups.pop() # remove ffn parameters optimizer.add_param_group({ 'params': model.ffn.parameters(), 'lr': args.init_lr[1], 'weight_decay': args.weight_decay[1] }) if args.cuda: model.ffn.cuda() if args.gradual_unfreezing: if epoch % args.epochs_per_unfreeze == 0: unfroze_layer = model.unfreeze_next( ) # consider just stopping early after we have nothing left to unfreeze? if unfroze_layer: debug('Unfroze last frozen layer') n_iter = train(model=model, data=train_data, loss_func=loss_func, optimizer=optimizer, scheduler=scheduler, args=args, n_iter=n_iter, logger=logger, writer=writer, chunk_names=(args.num_chunks > 1), val_smiles=val_smiles if args.adversarial else None, test_smiles=test_smiles if args.adversarial or args.moe else None) if isinstance(scheduler, ExponentialLR): scheduler.step() val_scores = evaluate(model=model, data=val_data, metric_func=metric_func, args=args, scaler=scaler, logger=logger) if args.dataset_type == 'bert_pretraining': if val_scores['features'] is not None: debug( f'Validation features rmse = {val_scores["features"]:.6f}' ) writer.add_scalar('validation_features_rmse', val_scores['features'], n_iter) val_scores = [val_scores['vocab']] # Average validation score avg_val_score = np.nanmean(val_scores) debug(f'Validation {args.metric} = {avg_val_score:.6f}') writer.add_scalar(f'validation_{args.metric}', avg_val_score, n_iter) if args.show_individual_scores: # Individual validation scores for task_name, val_score in zip(args.task_names, val_scores): if task_name in desired_labels: debug( f'Validation {task_name} {args.metric} = {val_score:.6f}' ) writer.add_scalar( f'validation_{task_name}_{args.metric}', val_score, n_iter) # Save model checkpoint if improved validation score, or always save it if unsupervised if args.minimize_score and avg_val_score < best_score or \ not args.minimize_score and avg_val_score > best_score or \ args.dataset_type == 'unsupervised': best_score, best_epoch = avg_val_score, epoch save_checkpoint(os.path.join(save_dir, 'model.pt'), model, scaler, features_scaler, args) if args.dataset_type == 'unsupervised': return [0] # rest of this is meaningless when unsupervised # Evaluate on test set using model with best validation score info( f'Model {model_idx} best validation {args.metric} = {best_score:.6f} on epoch {best_epoch}' ) model = load_checkpoint(os.path.join(save_dir, 'model.pt'), cuda=args.cuda, logger=logger) if args.split_test_by_overlap_dataset is not None: overlap_data = get_data(path=args.split_test_by_overlap_dataset, logger=logger) overlap_smiles = set(overlap_data.smiles()) test_data_intersect, test_data_nonintersect = [], [] for d in test_data.data: if d.smiles in overlap_smiles: test_data_intersect.append(d) else: test_data_nonintersect.append(d) test_data_intersect, test_data_nonintersect = MoleculeDataset( test_data_intersect), MoleculeDataset(test_data_nonintersect) for name, td in [('Intersect', test_data_intersect), ('Nonintersect', test_data_nonintersect)]: test_preds = predict(model=model, data=td, args=args, scaler=scaler, logger=logger) test_scores = evaluate_predictions( preds=test_preds, targets=td.targets(), metric_func=metric_func, dataset_type=args.dataset_type, args=args, logger=logger) avg_test_score = np.nanmean(test_scores) info( f'Model {model_idx} test {args.metric} for {name} = {avg_test_score:.6f}' ) if len( test_data ) == 0: # just get some garbage results without crashing; in this case we didn't care anyway test_preds, test_scores = sum_test_preds, [ 0 for _ in range(len(args.task_names)) ] else: test_preds = predict(model=model, data=test_data, args=args, scaler=scaler, logger=logger) test_scores = evaluate_predictions(preds=test_preds, targets=test_targets, metric_func=metric_func, dataset_type=args.dataset_type, args=args, logger=logger) if args.maml: if sum_test_preds is None: sum_test_preds = np.zeros(np.array(test_preds).shape) if args.dataset_type == 'bert_pretraining': if test_preds['features'] is not None: sum_test_preds['features'] += np.array(test_preds['features']) sum_test_preds['vocab'] += np.array(test_preds['vocab']) else: sum_test_preds += np.array(test_preds) if args.dataset_type == 'bert_pretraining': if test_preds['features'] is not None: debug( f'Model {model_idx} test features rmse = {test_scores["features"]:.6f}' ) writer.add_scalar('test_features_rmse', test_scores['features'], 0) test_scores = [test_scores['vocab']] # Average test score avg_test_score = np.nanmean(test_scores) info(f'Model {model_idx} test {args.metric} = {avg_test_score:.6f}') writer.add_scalar(f'test_{args.metric}', avg_test_score, 0) if args.show_individual_scores: # Individual test scores for task_name, test_score in zip(args.task_names, test_scores): if task_name in desired_labels: info( f'Model {model_idx} test {task_name} {args.metric} = {test_score:.6f}' ) writer.add_scalar(f'test_{task_name}_{args.metric}', test_score, n_iter) # Evaluate ensemble on test set if args.dataset_type == 'bert_pretraining': avg_test_preds = { 'features': (sum_test_preds['features'] / args.ensemble_size).tolist() if sum_test_preds['features'] is not None else None, 'vocab': (sum_test_preds['vocab'] / args.ensemble_size).tolist() } else: avg_test_preds = (sum_test_preds / args.ensemble_size).tolist() if len(test_data ) == 0: # just return some garbage when we didn't want test data ensemble_scores = test_scores else: ensemble_scores = evaluate_predictions(preds=avg_test_preds, targets=test_targets, metric_func=metric_func, dataset_type=args.dataset_type, args=args, logger=logger) # Average ensemble score if args.dataset_type == 'bert_pretraining': if ensemble_scores['features'] is not None: info( f'Ensemble test features rmse = {ensemble_scores["features"]:.6f}' ) writer.add_scalar('ensemble_test_features_rmse', ensemble_scores['features'], 0) ensemble_scores = [ensemble_scores['vocab']] avg_ensemble_test_score = np.nanmean(ensemble_scores) info(f'Ensemble test {args.metric} = {avg_ensemble_test_score:.6f}') writer.add_scalar(f'ensemble_test_{args.metric}', avg_ensemble_test_score, 0) # Individual ensemble scores if args.show_individual_scores: for task_name, ensemble_score in zip(args.task_names, ensemble_scores): info( f'Ensemble test {task_name} {args.metric} = {ensemble_score:.6f}' ) return ensemble_scores
def run_training(args: Namespace, logger: Logger = None) -> List[float]: """ Trains a model and returns test scores on the model checkpoint with the highest validation score. :param args: Arguments. :param logger: Logger. :return: A list of ensemble scores for each task. """ if logger is not None: debug, info = logger.debug, logger.info else: debug = info = print # Set GPU if args.gpu is not None: torch.cuda.set_device(args.gpu) # Print args debug(pformat(vars(args))) # Get data debug('Loading data') # FIXME # args.task_names = get_task_names(args.data_path) args.task_names = 'test' data = get_data(path=args.data_path, args=args, logger=logger) args.num_tasks = data.num_tasks() args.features_size = data.features_size() debug(f'Number of tasks = {args.num_tasks}') # Split data debug(f'Splitting data with seed {args.seed}') if args.separate_test_path: test_data = get_data(path=args.separate_test_path, args=args, features_path=args.separate_test_features_path, logger=logger) if args.separate_val_path: val_data = get_data(path=args.separate_val_path, args=args, features_path=args.separate_val_features_path, logger=logger) if args.separate_val_path and args.separate_test_path: train_data = data elif args.separate_val_path: train_data, _, test_data = split_data(data=data, split_type=args.split_type, sizes=(0.8, 0.2, 0.0), seed=args.seed, args=args, logger=logger) elif args.separate_test_path: train_data, val_data, _ = split_data(data=data, split_type=args.split_type, sizes=(0.8, 0.2, 0.0), seed=args.seed, args=args, logger=logger) else: train_data, val_data, test_data = split_data( data=data, split_type=args.split_type, sizes=args.split_sizes, seed=args.seed, args=args, logger=logger) if args.dataset_type == 'classification': class_sizes = get_class_sizes(data) debug('Class sizes') for i, task_class_sizes in enumerate(class_sizes): debug( f'{args.task_names[i]} ' f'{", ".join(f"{cls}: {size * 100:.2f}%" for cls, size in enumerate(task_class_sizes))}' ) if args.save_smiles_splits: all_split_indices = [] for dataset, name in [(train_data, 'train'), (val_data, 'val'), (test_data, 'test')]: with open(os.path.join(args.save_dir, name + '_smiles.csv'), 'w') as f: writer = csv.writer(f) writer.writerow(['smiles']) for smiles in dataset.smiles(): writer.writerow([smiles]) if args.features_scaling: features_scaler = train_data.normalize_features(replace_nan_token=0) val_data.normalize_features(features_scaler) test_data.normalize_features(features_scaler) else: features_scaler = None args.train_data_size = len(train_data) debug( f'Total size = {len(data):,} | ' f'train size = {len(train_data):,} | val size = {len(val_data):,} | test size = {len(test_data):,}' ) # Initialize scaler and scale training targets by subtracting mean and dividing standard deviation (regression only) #FIXME turn off for atomic prediction development #if args.dataset_type == 'regression': # debug('Fitting scaler') # train_smiles, train_targets = train_data.smiles(), train_data.targets() # scaler = StandardScaler().fit(train_targets) # scaled_targets = scaler.transform(train_targets).tolist() # train_data.set_targets(scaled_targets) #else: # scaler = None scaler = None # Get loss and metric functions loss_func = get_loss_func(args) metric_func = get_metric_func(metric=args.metric) # Set up test set evaluation test_smiles, test_targets = test_data.smiles(), test_data.targets() if args.dataset_type == 'multiclass': sum_test_preds = np.zeros( (len(test_smiles), args.num_tasks, args.multiclass_num_classes)) else: sum_test_preds = np.zeros((len(test_smiles), args.num_tasks)) # Train ensemble of models for model_idx in range(args.ensemble_size): # Tensorboard writer save_dir = os.path.join(args.save_dir, f'model_{model_idx}') makedirs(save_dir) try: writer = SummaryWriter(log_dir=save_dir) except: writer = SummaryWriter(logdir=save_dir) # Load/build model if args.checkpoint_paths is not None: debug( f'Loading model {model_idx} from {args.checkpoint_paths[model_idx]}' ) model = load_checkpoint(args.checkpoint_paths[model_idx], current_args=args, logger=logger) else: debug(f'Building model {model_idx}') model = build_model(args) debug(model) debug(f'Number of parameters = {param_count(model):,}') if args.cuda: debug('Moving model to cuda') model = model.cuda() # Ensure that model is saved in correct location for evaluation if 0 epochs save_checkpoint(os.path.join(save_dir, 'model.pt'), model, scaler, features_scaler, args) # Optimizers optimizer = build_optimizer(model, args) # Learning rate schedulers scheduler = build_lr_scheduler(optimizer, args, scheduler_name='Sinexp') # Run training best_score = float('inf') if args.minimize_score else -float('inf') best_epoch, n_iter = 0, 0 for epoch in trange(args.epochs): debug(f'Epoch {epoch}') n_iter = train(model=model, data=train_data, loss_func=loss_func, metric_func=metric_func, optimizer=optimizer, scheduler=scheduler, args=args, n_iter=n_iter, logger=logger, writer=writer) if isinstance(scheduler, ExponentialLR): scheduler.step() val_scores = evaluate(model=model, data=val_data, num_tasks=args.num_tasks, metric_func=metric_func, batch_size=args.batch_size, dataset_type=args.dataset_type, scaler=scaler, logger=logger) # Average validation score # FIXME ''' avg_val_score = np.nanmean(val_scores) debug(f'Validation {args.metric} = {avg_val_score:.6f}') ''' avg_val_scores = val_scores avg_val_score_str = ', '.join( f'lss_{i} = {lss:.4e}' for i, lss in enumerate(avg_val_scores)) for i, avg_val in enumerate(avg_val_scores): writer.add_scalar(f'validation_{args.metric}_task_{i}', avg_val, n_iter) debug(f'validation_matrix = {avg_val_score_str}') if args.show_individual_scores: # Individual validation scores for task_name, val_score in zip(args.task_names, val_scores): debug( f'Validation {task_name} {args.metric} = {val_score:.6f}' ) writer.add_scalar(f'validation_{task_name}_{args.metric}', val_score, n_iter) avg_val_score = np.mean(np.array(avg_val_scores)) # Save model checkpoint if improved validation score if args.minimize_score and avg_val_score < best_score or \ not args.minimize_score and avg_val_score > best_score: best_score, best_epoch = avg_val_score, epoch save_checkpoint(os.path.join(save_dir, 'model.pt'), model, scaler, features_scaler, args) # Evaluate on test set using model with best validation score info( f'Model {model_idx} best validation {args.metric} = {best_score:.6f} on epoch {best_epoch}' ) model = load_checkpoint(os.path.join(save_dir, 'model.pt'), cuda=args.cuda, logger=logger) test_preds, test_smiles_batch = predict(model=model, data=test_data, batch_size=args.batch_size, scaler=scaler) test_scores = evaluate_predictions(preds=test_preds, targets=test_targets, num_tasks=args.num_tasks, metric_func=metric_func, dataset_type=args.dataset_type, logger=logger) # FIXME ''' if len(test_preds) != 0: sum_test_preds += np.array(test_preds) # Average test score avg_test_score = np.nanmean(test_scores) ''' avg_test_score = test_scores avg_test_score_str = ', '.join(f'lss_{i} = {lss:.4e}' for i, lss in enumerate(avg_test_score)) info(f'Model {model_idx} test {args.metric} = {avg_test_score_str}') for i, avg_score in enumerate(avg_test_score): writer.add_scalar(f'test_{args.metric}_{i}', avg_score, 0) if args.show_individual_scores: # Individual test scores for task_name, test_score in zip(args.task_names, test_scores): info( f'Model {model_idx} test {task_name} {args.metric} = {test_score:.6f}' ) writer.add_scalar(f'test_{task_name}_{args.metric}', test_score, n_iter) # Evaluate ensemble on test set # FIXME ''' avg_test_preds = (sum_test_preds / args.ensemble_size).tolist() ensemble_scores = evaluate_predictions( preds=avg_test_preds, targets=test_targets, num_tasks=args.num_tasks, metric_func=metric_func, dataset_type=args.dataset_type, logger=logger ) # Average ensemble score avg_ensemble_test_score = np.nanmean(ensemble_scores) info(f'Ensemble test {args.metric} = {avg_ensemble_test_score:.6f}') writer.add_scalar(f'ensemble_test_{args.metric}', avg_ensemble_test_score, 0) # Individual ensemble scores if args.show_individual_scores: for task_name, ensemble_score in zip(args.task_names, ensemble_scores): info(f'Ensemble test {task_name} {args.metric} = {ensemble_score:.6f}') ''' return avg_test_score, test_preds, test_smiles_batch
def run_training(args: Namespace, logger: Logger = None) -> List[float]: ''' Trains a model and returns test scores on the model checkpoint with the highest validation score. :param args: Arguments. :param logger: Logger. :return: A list of ensemble scores for each task. ''' if logger: debug, info = logger.debug, logger.info else: debug = info = print # Print args: debug(pformat(vars(args))) # Set GPU if args.gpu: torch.cuda.set_device(args.gpu) train_data, val_data, test_data, scaler, features_scaler = \ get_data(args, logger, debug) # Set up test set evaluation: test_targets = test_data.targets() if args.dataset_type == 'multiclass': sum_test_preds = np.zeros((len(test_data.smiles()), args.num_tasks, args.multiclass_num_classes)) else: sum_test_preds = np.zeros((len(test_data.smiles()), args.num_tasks)) # Setup val set evaluation: if args.dataset_type == 'multiclass': sum_val_preds = np.zeros((len(val_data.smiles()), args.num_tasks, args.multiclass_num_classes)) else: sum_val_preds = np.zeros((len(val_data.smiles()), args.num_tasks)) # Train ensemble of models: for model_idx in range(args.ensemble_size): # Tensorboard writer: save_dir = os.path.join(args.save_dir, f'model_{model_idx}') makedirs(save_dir) writer = SummaryWriter(logdir=save_dir) # Load/build model: if args.checkpoint_paths: debug(f'Loading model {model_idx} from' f' {args.checkpoint_paths[model_idx]}') model = load_checkpoint(args.checkpoint_paths[model_idx], current_args=args, logger=logger) else: debug(f'Building model {model_idx}') model = build_model(args) debug(model) debug(f'Number of parameters = {param_count(model):,}') if args.cuda: debug('Moving model to cuda') model = model.cuda() best_score, best_epoch, n_iter = _train(args, model, train_data, val_data, scaler, features_scaler, save_dir, writer, logger, debug) # Evaluate on test set using model with best validation score: info(f'Model {model_idx} best validation {args.metric} =' f' {best_score:.6f} on epoch {best_epoch}') model = load_checkpoint(os.path.join(save_dir, 'model.pt'), cuda=args.cuda, logger=logger) # todo: Change code here to analyze the model on the trained data. val_preds = predict(model=model, data=val_data, batch_size=args.batch_size, scaler=scaler) if val_preds: sum_val_preds += np.array(val_preds) test_preds = predict(model=model, data=test_data, batch_size=args.batch_size, scaler=scaler) if test_preds: sum_test_preds += np.array(test_preds) test_scores = evaluate_predictions( preds=test_preds, targets=test_targets, num_tasks=args.num_tasks, metric_func=get_metric_func(metric=args.metric), dataset_type=args.dataset_type, logger=logger) # Average test score avg_test_score = np.nanmean(test_scores) info(f'Model {model_idx} test {args.metric} = {avg_test_score:.6f}') writer.add_scalar(f'test_{args.metric}', avg_test_score, 0) if args.show_individual_scores: # Individual test scores for task_name, test_score in zip(args.task_names, test_scores): info(f'Model {model_idx} test {task_name} {args.metric} =' f' {test_score:.6f}') writer.add_scalar(f'test_{task_name}_{args.metric}', test_score, n_iter) # Evaluate ensemble on test set avg_test_preds = (sum_test_preds / args.ensemble_size).tolist() avg_val_preds = (sum_val_preds / args.ensemble_size).tolist() ensemble_scores = evaluate_predictions( preds=avg_test_preds, targets=test_targets, num_tasks=args.num_tasks, metric_func=get_metric_func(metric=args.metric), dataset_type=args.dataset_type, logger=logger) print('Test Prediction Shape:- ', np.array(avg_test_preds).shape) avg_test_preds = np.array(avg_test_preds).reshape(1, -1) test_targets = np.array(test_targets).reshape(1, -1) avg_val_preds = np.array(avg_val_preds).reshape(1, -1) # val_targets = np.array(test_targets).reshape(1, -1) smaller_count = np.sum(avg_test_preds < test_targets) smaller_frac = smaller_count / (avg_test_preds.shape[1]) print('Smaller_Fraction: ', smaller_frac) # Plot: plot(avg_test_preds, test_targets) # Average ensemble score avg_ensemble_test_score = np.nanmean(ensemble_scores) info(f'Ensemble test {args.metric} = {avg_ensemble_test_score:.6f}') writer.add_scalar(f'ensemble_test_{args.metric}', avg_ensemble_test_score, 0) # Individual ensemble scores if args.show_individual_scores: for task_name, ensemble_score in zip(args.task_names, ensemble_scores): info(f'Ensemble test {task_name} {args.metric} =' f' {ensemble_score:.6f}') return ensemble_scores
def run_training(args: Namespace, logger: Logger = None) -> List[float]: """ Trains a model and returns test scores on the model checkpoint with the highest validation score. :param args: Arguments. :param logger: Logger. :return: A list of ensemble scores for each task. """ if logger is not None: debug, info = logger.debug, logger.info else: debug = info = print # Set GPU if args.gpu is not None: torch.cuda.set_device(args.gpu) # Print args debug(pformat(vars(args))) # Get data debug('Loading data') args.task_names = get_task_names(args.data_path) data = get_data(path=args.data_path, args=args, logger=logger) args.num_tasks = data.num_tasks() args.features_size = data.features_size() debug(f'Number of tasks = {args.num_tasks}') # Split data debug(f'Splitting data with seed {args.seed}') if args.separate_test_path: test_data = get_data(path=args.separate_test_path, args=args, features_path=args.separate_test_features_path, logger=logger) if args.separate_val_path: val_data = get_data(path=args.separate_val_path, args=args, features_path=args.separate_val_features_path, logger=logger) if args.separate_val_path and args.separate_test_path: train_data = data elif args.separate_val_path: train_data, _, test_data = split_data(data=data, split_type=args.split_type, sizes=( 0.8, 0.2, 0.0), seed=args.seed, args=args, logger=logger) elif args.separate_test_path: train_data, val_data, _ = split_data(data=data, split_type=args.split_type, sizes=( 0.8, 0.2, 0.0), seed=args.seed, args=args, logger=logger) else: train_data, val_data, test_data = split_data( data=data, split_type=args.split_type, sizes=args.split_sizes, seed=args.seed, args=args, logger=logger) # test_data = deepcopy(train_data) if args.dataset_type == 'classification': class_sizes = get_class_sizes(data) debug('Class sizes') for i, task_class_sizes in enumerate(class_sizes): debug(f'{args.task_names[i]} ' f'{", ".join(f"{cls}: {size * 100:.2f}%" for cls, size in enumerate(task_class_sizes))}') if args.save_smiles_splits: with open(args.data_path, 'r') as f: reader = csv.reader(f) header = next(reader) lines_by_smiles = {} indices_by_smiles = {} for i, line in enumerate(reader): smiles = line[0] lines_by_smiles[smiles] = line indices_by_smiles[smiles] = i all_split_indices = [] for dataset, name in [(train_data, 'train'), (val_data, 'val'), (test_data, 'test')]: with open(os.path.join(args.save_dir, name + '_smiles.csv'), 'w') as f: writer = csv.writer(f) writer.writerow(['smiles']) for smiles in dataset.smiles(): writer.writerow([smiles]) with open(os.path.join(args.save_dir, name + '_full.csv'), 'w') as f: writer = csv.writer(f) writer.writerow(header) for smiles in dataset.smiles(): writer.writerow(lines_by_smiles[smiles]) split_indices = [] for smiles in dataset.smiles(): split_indices.append(indices_by_smiles[smiles]) split_indices = sorted(split_indices) all_split_indices.append(split_indices) with open(os.path.join(args.save_dir, 'split_indices.pckl'), 'wb') as f: pickle.dump(all_split_indices, f) if args.features_scaling: features_scaler = train_data.normalize_features(replace_nan_token=0) val_data.normalize_features(features_scaler) test_data.normalize_features(features_scaler) else: features_scaler = None args.train_data_size = len(train_data) debug(f'Total size = {len(data):,} | ' f'train size = {len(train_data):,} | val size = {len(val_data):,} | test size = {len(test_data):,}') # Initialize scaler and scale training targets by subtracting mean and dividing standard deviation (regression only) if args.dataset_type == 'regression': debug('Fitting scaler') train_smiles, train_targets = train_data.smiles(), train_data.targets() scaler = StandardScaler().fit(train_targets) scaled_targets = scaler.transform(train_targets).tolist() train_data.set_targets(scaled_targets) else: scaler = None # Get loss and metric functions loss_func = get_loss_func(args) metric_func = get_metric_func(metric=args.metric) # Set up test set evaluation test_smiles, test_targets = test_data.smiles(), test_data.targets() val_smiles, val_targets = val_data.smiles(), val_data.targets() sum_test_preds = np.zeros((len(test_smiles), args.num_tasks)) sum_val_preds = np.zeros((len(val_smiles), args.num_tasks)) if args.uncertainty: uncertainty_estimator = uncertainty_estimator_builder(args.uncertainty)(train_data, val_data, test_data, scaler, args) # Train ensemble of models for model_idx in range(args.ensemble_size): # Tensorboard writer save_dir = os.path.join(args.save_dir, f'model_{model_idx}') makedirs(save_dir) writer = SummaryWriter(log_dir=save_dir) # Load/build model if args.uncertainty not in ['snapshot', 'dropout'] or model_idx == 0: if args.checkpoint_paths is not None: debug( f'Loading model {model_idx} from {args.checkpoint_paths[model_idx]}') model = load_checkpoint( args.checkpoint_paths[model_idx], current_args=args, logger=logger) else: debug(f'Building model {model_idx}') model = build_model(args) debug(model) debug(f'Number of parameters = {param_count(model):,}') if args.cuda: debug('Moving model to cuda') model = model.cuda() # Ensure that model is saved in correct location for evaluation if 0 epochs save_checkpoint(os.path.join(save_dir, 'model.pt'), model, scaler, features_scaler, args) # Optimizers optimizer = build_optimizer(model, args) # Learning rate schedulers scheduler = build_lr_scheduler(optimizer, args) num_epochs = args.epochs if args.uncertainty == 'snapshot': num_epochs = num_epochs // args.ensemble_size if args.uncertainty == 'dropout' and model_idx != 0: num_epochs = 0 # Run training best_score = float('inf') if args.minimize_score else -float('inf') best_epoch, n_iter = 0, 0 for epoch in trange(num_epochs): debug(f'Epoch {epoch}') train_data_sample = train_data # if args.uncertainty == 'bootstrap': # print(train_data) # train_data_sample = sample(train_data, int(args.train_data_size * (1.5 / args.ensemble_size))) n_iter = train( model=model, data=train_data_sample, loss_func=loss_func, optimizer=optimizer, scheduler=scheduler, args=args, n_iter=n_iter, logger=logger, writer=writer ) if isinstance(scheduler, ExponentialLR): scheduler.step() val_scores = evaluate( model=model, data=val_data, num_tasks=args.num_tasks, metric_func=metric_func, batch_size=args.batch_size, dataset_type=args.dataset_type, scaler=scaler, logger=logger ) # Average validation score avg_val_score = np.nanmean(val_scores) debug(f'Validation {args.metric} = {avg_val_score:.6f}') writer.add_scalar( f'validation_{args.metric}', avg_val_score, n_iter) if args.show_individual_scores: # Individual validation scores for task_name, val_score in zip(args.task_names, val_scores): debug( f'Validation {task_name} {args.metric} = {val_score:.6f}') writer.add_scalar( f'validation_{task_name}_{args.metric}', val_score, n_iter) # Save model checkpoint if improved validation score if args.minimize_score and avg_val_score < best_score or \ not args.minimize_score and avg_val_score > best_score: best_score, best_epoch = avg_val_score, epoch save_checkpoint(os.path.join(save_dir, 'model.pt'), model, scaler, features_scaler, args) # Evaluate on test set using model with best validation score info( f'Model {model_idx} best validation {args.metric} = {best_score:.6f} on epoch {best_epoch}') model = load_checkpoint(os.path.join( save_dir, 'model.pt'), cuda=args.cuda, logger=logger) test_preds = predict( model=model, data=test_data, batch_size=args.batch_size, scaler=scaler, ) val_preds = predict( model=model, data=val_data, batch_size=args.batch_size, scaler=scaler, ) test_scores = evaluate_predictions( preds=test_preds, targets=test_targets, num_tasks=args.num_tasks, metric_func=metric_func, dataset_type=args.dataset_type, logger=logger ) if len(test_preds) != 0: sum_test_preds += np.array(test_preds) sum_val_preds += np.array(val_preds) # Average test score avg_test_score = np.nanmean(test_scores) info(f'Model {model_idx} test {args.metric} = {avg_test_score:.6f}') writer.add_scalar(f'test_{args.metric}', avg_test_score, 0) if args.uncertainty: uncertainty_estimator.process_model(model) if args.show_individual_scores: # Individual test scores for task_name, test_score in zip(args.task_names, test_scores): info( f'Model {model_idx} test {task_name} {args.metric} = {test_score:.6f}') writer.add_scalar( f'test_{task_name}_{args.metric}', test_score, n_iter) # Evaluate ensemble on test set avg_test_preds = (sum_test_preds / args.ensemble_size) avg_val_preds = (sum_val_preds / args.ensemble_size) ensemble_scores = evaluate_predictions( preds=avg_test_preds.tolist(), targets=test_targets, num_tasks=args.num_tasks, metric_func=metric_func, dataset_type=args.dataset_type, logger=logger ) # Average ensemble score avg_ensemble_test_score = np.nanmean(ensemble_scores) info(f'Ensemble test {args.metric} = {avg_ensemble_test_score:.6f}') writer.add_scalar( f'ensemble_test_{args.metric}', avg_ensemble_test_score, 0) if args.uncertainty: val_targets = np.array(val_targets) test_targets = np.array(test_targets) (val_predictions, val_uncertainty, test_predictions, test_uncertainty) = uncertainty_estimator.compute_uncertainty( avg_val_preds, avg_test_preds) UncertaintyEvaluator.save(val_predictions, val_targets, val_uncertainty, test_predictions, test_targets, test_uncertainty, args) UncertaintyEvaluator.visualize(args.save_uncertainty, args.uncertainty_evaluation_methods) return ensemble_scores
def predict_i(test_data): checkpoint_path = 'saved_models/qm9_ens_seed60/fold_0/model_0/model.pt' # state = torch.load(checkpoint_path, map_location=lambda storage, loc: storage) # features_scaler = StandardScaler(state['features_scaler']['means'], # state['features_scaler']['stds'], # replace_nan_token=0) if state['features_scaler'] is not None else None # Load model and args state = torch.load(checkpoint_path, map_location=lambda storage, loc: storage) args, loaded_state_dict = state['args'], state['state_dict'] scaler = StandardScaler(state['data_scaler']['means'], state['data_scaler']['stds']) if state['data_scaler'] is not None else None for k in ['encoder.encoder.cached_zero_vector', 'encoder.encoder.W_i.weight', 'encoder.encoder.W_h.weight', 'encoder.encoder.W_o.weight', 'encoder.encoder.W_o.bias']: loaded_state_dict.pop(k, None) # if current_args is not None: # args = current_args # # args.cuda = cuda if cuda is not None else args.cuda # Build model model = build_model(args) model_state_dict = model.state_dict() # Skip missing parameters and parameters of mismatched size pretrained_state_dict = {} for param_name in loaded_state_dict.keys(): if param_name not in model_state_dict: print(f'Pretrained parameter "{param_name}" cannot be found in model parameters.') elif model_state_dict[param_name].shape != loaded_state_dict[param_name].shape: print(f'Pretrained parameter "{param_name}" ' f'of shape {loaded_state_dict[param_name].shape} does not match corresponding ' f'model parameter of shape {model_state_dict[param_name].shape}.') else: # print(f'Loading pretrained parameter "{param_name}".') pretrained_state_dict[param_name] = loaded_state_dict[param_name] # Load pretrained weights model_state_dict.update(pretrained_state_dict) model.load_state_dict(model_state_dict) # model.load_state_dict(pretrained_state_dict) # index = torch.from_numpy(np.arange(0, len(test_data))).float() model.eval() test_data = torch.from_numpy(test_data).float() with torch.no_grad(): model_preds, ale_pred = model(test_data) ale_pred = torch.exp(ale_pred) if scaler is not None: model_preds = scaler.inverse_transform(model_preds.detach()) ale_pred = scaler.inverse_transform_variance(ale_pred.detach()) model_preds = np.array(model_preds.tolist(), dtype=np.float) ale_pred = np.array(ale_pred.tolist(), dtype=np.float) # model_preds = model_preds.data.numpy() # ale_pred = ale_pred.data.numpy() return model_preds, ale_pred
def run_training(args: Namespace, logger: Logger = None) -> List[float]: """ Trains a model and returns test scores on the model checkpoint with the highest validation score. :param args: Arguments. :param logger: Logger. :return: A list of ensemble scores for each task. """ if logger is not None: debug, info = logger.debug, logger.info else: debug = info = print # Set GPU if args.gpu is not None: torch.cuda.set_device(args.gpu) # Print args debug(pformat(vars(args))) # Get data debug('Loading data') args.task_names = get_task_names(args.data_path) data = get_data(path=args.data_path, args=args, logger=logger) args.num_tasks = data.num_tasks() args.features_size = data.features_size() debug(f'Number of tasks = {args.num_tasks}') # Split data debug(f'Splitting data with seed {args.seed}') if args.separate_test_path: test_data = get_data(path=args.separate_test_path, args=args, features_path=args.separate_test_features_path, logger=logger) if args.separate_val_path: val_data = get_data(path=args.separate_val_path, args=args, features_path=args.separate_val_features_path, logger=logger) if args.separate_val_path and args.separate_test_path: train_data = data elif args.separate_val_path: train_data, _, test_data = split_data(data=data, split_type=args.split_type, sizes=(0.8, 0.2, 0.0), seed=args.seed, args=args, logger=logger) elif args.separate_test_path: train_data, val_data, _ = split_data(data=data, split_type=args.split_type, sizes=(0.8, 0.2, 0.0), seed=args.seed, args=args, logger=logger) else: train_data, val_data, test_data = split_data(data=data, split_type=args.split_type, sizes=args.split_sizes, seed=args.seed, args=args, logger=logger) if args.dataset_type == 'classification': class_sizes = get_class_sizes(data) debug('Class sizes') for i, task_class_sizes in enumerate(class_sizes): debug(f'{args.task_names[i]} ' f'{", ".join(f"{cls}: {size * 100:.2f}%" for cls, size in enumerate(task_class_sizes))}') if args.save_smiles_splits: with open(args.data_path, 'r') as f: reader = csv.reader(f) header = next(reader) lines_by_smiles = {} indices_by_smiles = {} for i, line in enumerate(reader): smiles = line[0] lines_by_smiles[smiles] = line indices_by_smiles[smiles] = i all_split_indices = [] for dataset, name in [(train_data, 'train'), (val_data, 'val'), (test_data, 'test')]: with open(os.path.join(args.save_dir, name + '_smiles.csv'), 'w') as f: writer = csv.writer(f) writer.writerow(['smiles']) for smiles in dataset.smiles(): writer.writerow([smiles]) with open(os.path.join(args.save_dir, name + '_full.csv'), 'w') as f: writer = csv.writer(f) writer.writerow(header) for smiles in dataset.smiles(): writer.writerow(lines_by_smiles[smiles]) split_indices = [] for smiles in dataset.smiles(): split_indices.append(indices_by_smiles[smiles]) split_indices = sorted(split_indices) all_split_indices.append(split_indices) with open(os.path.join(args.save_dir, 'split_indices.pckl'), 'wb') as f: pickle.dump(all_split_indices, f) if args.features_scaling: features_scaler = train_data.normalize_features(replace_nan_token=0) val_data.normalize_features(features_scaler) test_data.normalize_features(features_scaler) else: features_scaler = None args.train_data_size = len(train_data) debug(f'Total size = {len(data):,} | ' f'train size = {len(train_data):,} | val size = {len(val_data):,} | test size = {len(test_data):,}') # Initialize scaler and scale training targets by subtracting mean and dividing standard deviation (regression only) if args.dataset_type == 'regression': debug('Fitting scaler') train_smiles, train_targets = train_data.smiles(), train_data.targets() scaler = StandardScaler().fit(train_targets) scaled_targets = scaler.transform(train_targets).tolist() train_data.set_targets(scaled_targets) else: scaler = None # Get loss and metric functions loss_func = get_loss_func(args) metric_func = get_metric_func(metric=args.metric) # Set up test set evaluation test_smiles, test_targets = test_data.smiles(), test_data.targets() if args.dataset_type == 'multiclass': sum_test_preds = np.zeros((len(test_smiles), args.num_tasks, args.multiclass_num_classes)) else: sum_test_preds = np.zeros((len(test_smiles), args.num_tasks)) #Setup val set evaluation val_smiles, val_targets = val_data.smiles(), val_data.targets() if args.dataset_type == 'multiclass': sum_val_preds = np.zeros((len(val_smiles), args.num_tasks, args.multiclass_num_classes)) else: sum_val_preds = np.zeros((len(val_smiles), args.num_tasks)) # Train ensemble of models for model_idx in range(args.ensemble_size): # Tensorboard writer save_dir = os.path.join(args.save_dir, f'model_{model_idx}') makedirs(save_dir) writer = SummaryWriter(logdir=save_dir) # Load/build model if args.checkpoint_paths is not None: debug(f'Loading model {model_idx} from {args.checkpoint_paths[model_idx]}') model = load_checkpoint(args.checkpoint_paths[model_idx], current_args=args, logger=logger) else: debug(f'Building model {model_idx}') model = build_model(args) debug(model) debug(f'Number of parameters = {param_count(model):,}') if args.cuda: debug('Moving model to cuda') model = model.cuda() # Ensure that model is saved in correct location for evaluation if 0 epochs save_checkpoint(os.path.join(save_dir, 'model.pt'), model, scaler, features_scaler, args) # Optimizers optimizer = build_optimizer(model, args) # Learning rate schedulers scheduler = build_lr_scheduler(optimizer, args) # Run training best_score = float('inf') if args.minimize_score else -float('inf') best_epoch, n_iter = 0, 0 for epoch in trange(args.epochs): debug(f'Epoch {epoch}') n_iter = train( model=model, data=train_data, loss_func=loss_func, optimizer=optimizer, scheduler=scheduler, args=args, n_iter=n_iter, logger=logger, writer=writer ) if isinstance(scheduler, ExponentialLR): scheduler.step() val_scores = evaluate( model=model, data=val_data, num_tasks=args.num_tasks, metric_func=metric_func, batch_size=args.batch_size, dataset_type=args.dataset_type, scaler=scaler, logger=logger ) # Average validation score avg_val_score = np.nanmean(val_scores) debug(f'Validation {args.metric} = {avg_val_score:.6f}') writer.add_scalar(f'validation_{args.metric}', avg_val_score, n_iter) if args.show_individual_scores: # Individual validation scores for task_name, val_score in zip(args.task_names, val_scores): debug(f'Validation {task_name} {args.metric} = {val_score:.6f}') writer.add_scalar(f'validation_{task_name}_{args.metric}', val_score, n_iter) # Save model checkpoint if improved validation score if args.minimize_score and avg_val_score < best_score or \ not args.minimize_score and avg_val_score > best_score: best_score, best_epoch = avg_val_score, epoch save_checkpoint(os.path.join(save_dir, 'model.pt'), model, scaler, features_scaler, args) # Evaluate on test set using model with best validation score info(f'Model {model_idx} best validation {args.metric} = {best_score:.6f} on epoch {best_epoch}') model = load_checkpoint(os.path.join(save_dir, 'model.pt'), cuda=args.cuda, logger=logger) #todo: Perhaps change code here in order to analyze the model on the trained data val_preds = predict( model=model, data=val_data, batch_size=args.batch_size, scaler=scaler ) test_preds = predict( model=model, data=test_data, batch_size=args.batch_size, scaler=scaler ) test_scores = evaluate_predictions( preds=test_preds, targets=test_targets, num_tasks=args.num_tasks, metric_func=metric_func, dataset_type=args.dataset_type, logger=logger ) if len(val_preds) != 0: sum_val_preds += np.array(val_preds) if len(test_preds) != 0: sum_test_preds += np.array(test_preds) # Average test score avg_test_score = np.nanmean(test_scores) info(f'Model {model_idx} test {args.metric} = {avg_test_score:.6f}') writer.add_scalar(f'test_{args.metric}', avg_test_score, 0) if args.show_individual_scores: # Individual test scores for task_name, test_score in zip(args.task_names, test_scores): info(f'Model {model_idx} test {task_name} {args.metric} = {test_score:.6f}') writer.add_scalar(f'test_{task_name}_{args.metric}', test_score, n_iter) # Evaluate ensemble on test set avg_test_preds = (sum_test_preds / args.ensemble_size).tolist() avg_val_preds = (sum_val_preds/ args.ensemble_size).tolist() ensemble_scores = evaluate_predictions( preds=avg_test_preds, targets=test_targets, num_tasks=args.num_tasks, metric_func=metric_func, dataset_type=args.dataset_type, logger=logger ) print("Test Prediction Shape:- ", np.array(avg_test_preds).shape) avg_test_preds = np.array(avg_test_preds).reshape(1,-1) test_targets = np.array(test_targets).reshape(1,-1) avg_val_preds = np.array(avg_val_preds).reshape(1,-1) val_targets = np.array(test_targets).reshape(1, -1) smaller_count = np.sum(avg_test_preds < test_targets) smaller_frac = smaller_count / (avg_test_preds.shape[1]) print("Smaller_Fraction: ", smaller_frac) # plt.plot(np.concatenate((avg_test_preds,avg_val_preds) ,axis=1),np.concatenate((test_targets,val_targets), axis=1), 'rx') plt.plot(avg_test_preds,test_targets,'ro') # x = np.linspace(0, 11000, 110000) x = np.linspace(-7, 3, 100) y = x plt.plot(x,y,'-g') plt.xlabel("Test Predictions") plt.ylabel("Test Targets") plt.title("Prediction Distribution") plt.savefig("Prediction_Distriution_ro.png") # plt.show() plt.clf() plt.plot(avg_test_preds, test_targets, 'yo') # x = np.linspace(0, 11000, 110000) x = np.linspace(-7, 3, 100) y = x plt.plot(x, y, '-g') plt.xlabel("Test Predictions") plt.ylabel("Test Targets") plt.title("Prediction Distribution") plt.savefig("Prediction_Distriution_yo.png") # plt.show() plt.clf() plt.plot(avg_test_preds, test_targets, 'rx') # x = np.linspace(0, 11000, 110000) x = np.linspace(-7, 3, 100) y = x plt.plot(x, y, '-g') plt.xlabel("Test Predictions") plt.ylabel("Test Targets") plt.title("Prediction Distribution") plt.savefig("Prediction_Distriution_rx.png") # plt.show() plt.clf() plt.plot(avg_test_preds, test_targets, 'yx') # x = np.linspace(0, 11000, 110000) x = np.linspace(-7, 3, 100) y = x plt.plot(x, y, '-g') plt.xlabel("Test Predictions") plt.ylabel("Test Targets") plt.title("Prediction Distribution") plt.savefig("Prediction_Distriution_yx.png") # plt.show() plt.clf() x = np.linspace(-7, 3, 100) y = x-x plt.plot(x, y, '-g') plt.plot(test_targets, avg_test_preds-test_targets,'rx') plt.xlabel("Test Targets") plt.ylabel("Test Errors") plt.title("Prediction Errors") plt.savefig("Prediction_Errors.png") # plt.show() plt.clf() # Average ensemble score avg_ensemble_test_score = np.nanmean(ensemble_scores) info(f'Ensemble test {args.metric} = {avg_ensemble_test_score:.6f}') writer.add_scalar(f'ensemble_test_{args.metric}', avg_ensemble_test_score, 0) # Individual ensemble scores if args.show_individual_scores: for task_name, ensemble_score in zip(args.task_names, ensemble_scores): info(f'Ensemble test {task_name} {args.metric} = {ensemble_score:.6f}') return ensemble_scores
def predict_autograd(test_data): checkpoint_path = 'saved_models/curved_gn5_fix/fold_0/model_0/model.pt' state = torch.load(checkpoint_path, map_location=lambda storage, loc: storage) args, loaded_state_dict = state['args'], state['state_dict'] scaler = StandardScaler( state['data_scaler']['means'], state['data_scaler'] ['stds']) if state['data_scaler'] is not None else None print(args.features_only) for k in [ 'encoder.encoder.cached_zero_vector', 'encoder.encoder.W_i.weight', 'encoder.encoder.W_h.weight', 'encoder.encoder.W_o.weight', 'encoder.encoder.W_o.bias' ]: loaded_state_dict.pop(k, None) # Build model model = build_model(args) model_state_dict = model.state_dict() # Skip missing parameters and parameters of mismatched size pretrained_state_dict = {} for param_name in loaded_state_dict.keys(): if param_name not in model_state_dict: print( f'Pretrained parameter "{param_name}" cannot be found in model parameters.' ) elif model_state_dict[param_name].shape != loaded_state_dict[ param_name].shape: print( f'Pretrained parameter "{param_name}" ' f'of shape {loaded_state_dict[param_name].shape} does not match corresponding ' f'model parameter of shape {model_state_dict[param_name].shape}.' ) else: # print(f'Loading pretrained parameter "{param_name}".') pretrained_state_dict[param_name] = loaded_state_dict[param_name] # Load pretrained weights model_state_dict.update(pretrained_state_dict) model.load_state_dict(model_state_dict) model.eval() test_data = v(torch.from_numpy(test_data).float(), requires_grad=True) # with torch.no_grad(): model_preds, ale_pred = model(test_data) ale_pred = torch.exp(ale_pred) model_preds.backward() if scaler is not None: model_preds = scaler.inverse_transform(model_preds.detach()) ale_pred = scaler.inverse_transform_variance(ale_pred.detach()) model_preds = np.array(model_preds.tolist(), dtype=np.float) ale_pred = np.array(ale_pred.tolist(), dtype=np.float) grad_rms = torch.sqrt(torch.sum(torch.square(test_data.grad.data)) / 1000).numpy() grad_max = torch.max(torch.sqrt(torch.square(test_data.grad.data))).numpy() return model_preds, ale_pred, grad_rms, grad_max