def class_balance(data_path: str, split_type: str): # Update args args.val_fold_index, args.test_fold_index = 1, 2 args.split_type = 'predetermined' # Load data data = get_data(path=args.data_path, smiles_columns=args.smiles_column, target_columns=args.target_columns) args.task_names = args.target_columns or get_task_names( path=args.data_path, smiles_columns=args.smiles_column) # Average class sizes all_class_sizes = {'train': [], 'val': [], 'test': []} for i in range(10): print(f'Fold {i}') # Update args data_name = os.path.splitext(os.path.basename(data_path))[0] args.folds_file = f'/data/rsg/chemistry/yangk/lsc_experiments_dump_splits/data/{data_name}/{split_type}/fold_{i}/0/split_indices.pckl' if not os.path.exists(args.folds_file): print(f'Fold indices do not exist') continue # Split data train_data, val_data, test_data = split_data( data=data, split_type=args.split_type, args=args) # Determine class balance for data_split, split_name in [(train_data, 'train'), (val_data, 'val'), (test_data, 'test')]: class_sizes = get_class_sizes(data_split) print(f'Class sizes for {split_name}') for i, task_class_sizes in enumerate(class_sizes): print( f'{args.task_names[i]} ' f'{", ".join(f"{cls}: {size * 100:.2f}%" for cls, size in enumerate(task_class_sizes))}' ) all_class_sizes[split_name].append(class_sizes) print() # Mean and std across folds for split_name in ['train', 'val', 'test']: print(f'Average class sizes for {split_name}') mean_class_sizes, std_class_sizes = np.mean( all_class_sizes[split_name], axis=0), np.std(all_class_sizes[split_name], axis=0) for i, (mean_task_class_sizes, std_task_class_sizes) in enumerate( zip(mean_class_sizes, std_class_sizes)): print( f'{args.task_names[i]} ' f'{", ".join(f"{cls}: {mean_size * 100:.2f}% +/- {std_size * 100:.2f}%" for cls, (mean_size, std_size) in enumerate(zip(mean_task_class_sizes, std_task_class_sizes)))}' )
def test_ignore_columns(self): """Test behavior with ignore columns specified""" task_names = get_task_names( path='dummy_path.txt', smiles_columns=None, target_columns=None, ignore_columns=['column2', 'column3'], ) self.assertEqual(task_names, ['column4'])
def test_default_no_arguments(self): """Testing default behavior no arguments""" task_names = get_task_names( path='dummy_path.txt', smiles_columns=None, target_columns=None, ignore_columns=None, ) self.assertEqual(task_names, ['column2', 'column3', 'column4'])
def save_smiles_splits( data_path: str, save_dir: str, task_names: List[str] = None, features_path: List[str] = None, train_data: MoleculeDataset = None, val_data: MoleculeDataset = None, test_data: MoleculeDataset = None, logger: logging.Logger = None, smiles_columns: List[str] = None, ) -> None: """ Saves a csv file with train/val/test splits of target data and additional features. Also saves indices of train/val/test split as a pickle file. Pickle file does not support repeated entries with the same SMILES or entries entered from a path other than the main data path, such as a separate test path. :param data_path: Path to data CSV file. :param save_dir: Path where pickle files will be saved. :param task_names: List of target names for the model as from the function get_task_names(). If not provided, will use datafile header entries. :param features_path: List of path(s) to files with additional molecule features. :param train_data: Train :class:`~chemprop.data.data.MoleculeDataset`. :param val_data: Validation :class:`~chemprop.data.data.MoleculeDataset`. :param test_data: Test :class:`~chemprop.data.data.MoleculeDataset`. :param smiles_columns: The name of the column containing SMILES. By default, uses the first column. :param logger: A logger for recording output. """ makedirs(save_dir) info = logger.info if logger is not None else print save_split_indices = True if not isinstance(smiles_columns, list): smiles_columns = preprocess_smiles_columns( path=data_path, smiles_columns=smiles_columns) with open(data_path) as f: reader = csv.DictReader(f) indices_by_smiles = {} for i, row in enumerate(tqdm(reader)): smiles = tuple([row[column] for column in smiles_columns]) if smiles in indices_by_smiles: save_split_indices = False info( "Warning: Repeated SMILES found in data, pickle file of split indices cannot distinguish entries and will not be generated." ) break indices_by_smiles[smiles] = i if task_names is None: task_names = get_task_names(path=data_path, smiles_columns=smiles_columns) features_header = [] if features_path is not None: for feat_path in features_path: with open(feat_path, "r") as f: reader = csv.reader(f) feat_header = next(reader) features_header.extend(feat_header) all_split_indices = [] for dataset, name in [(train_data, "train"), (val_data, "val"), (test_data, "test")]: if dataset is None: continue with open(os.path.join(save_dir, f"{name}_smiles.csv"), "w") as f: writer = csv.writer(f) if smiles_columns[0] == "": writer.writerow(["smiles"]) else: writer.writerow(smiles_columns) for smiles in dataset.smiles(): writer.writerow(smiles) with open(os.path.join(save_dir, f"{name}_full.csv"), "w") as f: writer = csv.writer(f) writer.writerow(smiles_columns + task_names) dataset_targets = dataset.targets() for i, smiles in enumerate(dataset.smiles()): writer.writerow(smiles + dataset_targets[i]) if features_path is not None: dataset_features = dataset.features() with open(os.path.join(save_dir, f"{name}_features.csv"), "w") as f: writer = csv.writer(f) writer.writerow(features_header) writer.writerows(dataset_features) if save_split_indices: split_indices = [] for smiles in dataset.smiles(): index = indices_by_smiles.get(tuple(smiles)) if index is None: save_split_indices = False info( f"Warning: SMILES string in {name} could not be found in data file, and " "likely came from a secondary data file. The pickle file of split indices " "can only indicate indices for a single file and will not be generated." ) break split_indices.append(index) else: split_indices.sort() all_split_indices.append(split_indices) if name == "train": data_weights = dataset.data_weights() if any([w != 1 for w in data_weights]): with open(os.path.join(save_dir, f"{name}_weights.csv"), "w") as f: writer = csv.writer(f) writer.writerow(["data weights"]) for weight in data_weights: writer.writerow([weight]) if save_split_indices: with open(os.path.join(save_dir, "split_indices.pckl"), "wb") as f: pickle.dump(all_split_indices, f)
def train(): """Renders the train page and performs training if request method is POST.""" global PROGRESS, TRAINING warnings, errors = [], [] if request.method == 'GET': return render_train() # Get arguments data_name, epochs, ensemble_size, checkpoint_name = \ request.form['dataName'], int(request.form['epochs']), \ int(request.form['ensembleSize']), request.form['checkpointName'] gpu = request.form.get('gpu') data_path = os.path.join(app.config['DATA_FOLDER'], f'{data_name}.csv') dataset_type = request.form.get('datasetType', 'regression') # Create and modify args args = TrainArgs().parse_args([ '--data_path', data_path, '--dataset_type', dataset_type, '--epochs', str(epochs), '--ensemble_size', str(ensemble_size) ]) # Get task names args.task_names = get_task_names(path=data_path) # Check if regression/classification selection matches data data = get_data(path=data_path) targets = data.targets() unique_targets = { target for row in targets for target in row if target is not None } if dataset_type == 'classification' and len(unique_targets - {0, 1}) > 0: errors.append( 'Selected classification dataset but not all labels are 0 or 1. Select regression instead.' ) return render_train(warnings=warnings, errors=errors) if dataset_type == 'regression' and unique_targets <= {0, 1}: errors.append( 'Selected regression dataset but all labels are 0 or 1. Select classification instead.' ) return render_train(warnings=warnings, errors=errors) if gpu is not None: if gpu == 'None': args.cuda = False else: args.gpu = int(gpu) current_user = request.cookies.get('currentUser') if not current_user: # Use DEFAULT as current user if the client's cookie is not set. current_user = app.config['DEFAULT_USER_ID'] ckpt_id, ckpt_name = db.insert_ckpt(checkpoint_name, current_user, args.dataset_type, args.epochs, args.ensemble_size, len(targets)) with TemporaryDirectory() as temp_dir: args.save_dir = temp_dir process = mp.Process(target=progress_bar, args=(args, PROGRESS)) process.start() TRAINING = 1 # Run training logger = create_logger(name=TRAIN_LOGGER_NAME, save_dir=args.save_dir, quiet=args.quiet) task_scores = run_training(args, logger) process.join() # Reset globals TRAINING = 0 PROGRESS = mp.Value('d', 0.0) # Check if name overlap if checkpoint_name != ckpt_name: warnings.append( name_already_exists_message('Checkpoint', checkpoint_name, ckpt_name)) # Move models for root, _, files in os.walk(args.save_dir): for fname in files: if fname.endswith('.pt'): model_id = db.insert_model(ckpt_id) save_path = os.path.join(app.config['CHECKPOINT_FOLDER'], f'{model_id}.pt') shutil.move(os.path.join(args.save_dir, root, fname), save_path) return render_train(trained=True, metric=args.metric, num_tasks=len(args.task_names), task_names=args.task_names, task_scores=format_float_list(task_scores), mean_score=format_float(np.mean(task_scores)), warnings=warnings, errors=errors)
def cross_validate( args: TrainArgs, train_func: Callable[[TrainArgs, MoleculeDataset, Logger], Dict[str, List[float]]] ) -> Tuple[float, float]: """ Runs k-fold cross-validation. For each of k splits (folds) of the data, trains and tests a model on that split and aggregates the performance across folds. :param args: A :class:`~chemprop.args.TrainArgs` object containing arguments for loading data and training the Chemprop model. :param train_func: Function which runs training. :return: A tuple containing the mean and standard deviation performance across folds. """ logger = create_logger(name=TRAIN_LOGGER_NAME, save_dir=args.save_dir, quiet=args.quiet) if logger is not None: debug, info = logger.debug, logger.info else: debug = info = print # Initialize relevant variables init_seed = args.seed save_dir = args.save_dir args.task_names = get_task_names(path=args.data_path, smiles_column=args.smiles_column, target_columns=args.target_columns, ignore_columns=args.ignore_columns) # Print command line debug('Command line') debug(f'python {" ".join(sys.argv)}') # Print args debug('Args') debug(args) # Save args args.save(os.path.join(args.save_dir, 'args.json')) # Get data debug('Loading data') data = get_data(path=args.data_path, args=args, logger=logger, skip_none_targets=True) validate_dataset_type(data, dataset_type=args.dataset_type) args.features_size = data.features_size() debug(f'Number of tasks = {args.num_tasks}') # Run training on different random seeds for each fold all_scores = defaultdict(list) for fold_num in range(args.num_folds): info(f'Fold {fold_num}') args.seed = init_seed + fold_num args.save_dir = os.path.join(save_dir, f'fold_{fold_num}') makedirs(args.save_dir) model_scores = train_func( args, deepcopy(data), logger) # deepcopy since data may be modified for metric, scores in model_scores.items(): all_scores[metric].append(scores) all_scores = dict(all_scores) # Convert scores to numpy arrays for metric, scores in all_scores.items(): all_scores[metric] = np.array(scores) # Report results info(f'{args.num_folds}-fold cross validation') # Report scores for each fold for fold_num in range(args.num_folds): for metric, scores in all_scores.items(): info( f'\tSeed {init_seed + fold_num} ==> test {metric} = {np.nanmean(scores[fold_num]):.6f}' ) if args.show_individual_scores: for task_name, score in zip(args.task_names, scores[fold_num]): info( f'\t\tSeed {init_seed + fold_num} ==> test {task_name} {metric} = {score:.6f}' ) # Report scores across folds for metric, scores in all_scores.items(): avg_scores = np.nanmean( scores, axis=1) # average score for each model across tasks mean_score, std_score = np.nanmean(avg_scores), np.nanstd(avg_scores) info(f'Overall test {metric} = {mean_score:.6f} +/- {std_score:.6f}') if args.show_individual_scores: for task_num, task_name in enumerate(args.task_names): info( f'\tOverall test {task_name} {metric} = ' f'{np.nanmean(scores[:, task_num]):.6f} +/- {np.nanstd(scores[:, task_num]):.6f}' ) # Save scores with open(os.path.join(save_dir, TEST_SCORES_FILE_NAME), 'w') as f: writer = csv.writer(f) header = ['Task'] for metric in args.metrics: header += [f'Mean {metric}', f'Standard deviation {metric}'] + \ [f'Fold {i} {metric}' for i in range(args.num_folds)] writer.writerow(header) for task_num, task_name in enumerate(args.task_names): row = [task_name] for metric, scores in all_scores.items(): task_scores = scores[:, task_num] mean, std = np.nanmean(task_scores), np.nanstd(task_scores) row += [mean, std] + task_scores.tolist() writer.writerow(row) # Determine mean and std score of main metric avg_scores = np.nanmean(all_scores[args.metric], axis=1) mean_score, std_score = np.nanmean(avg_scores), np.nanstd(avg_scores) # Optionally merge and save test preds if args.save_preds: all_preds = pd.concat([ pd.read_csv( os.path.join(save_dir, f'fold_{fold_num}', 'test_preds.csv')) for fold_num in range(args.num_folds) ]) all_preds.to_csv(os.path.join(save_dir, 'test_preds.csv'), index=False) return mean_score, std_score
def run_sklearn(args: SklearnTrainArgs, data: MoleculeDataset, logger: Logger = None) -> Dict[str, List[float]]: """ Loads data, trains a scikit-learn model, and returns test scores for the model checkpoint with the highest validation score. :param args: A :class:`~chemprop.args.SklearnTrainArgs` object containing arguments for loading data and training the scikit-learn model. :param data: A :class:`~chemprop.data.MoleculeDataset` containing the data. :param logger: A logger to record output. :return: A dictionary mapping each metric in :code:`metrics` to a list of values for each task. """ if logger is not None: debug, info = logger.debug, logger.info else: debug = info = print debug(pformat(vars(args))) debug('Loading data') data = get_data(path=args.data_path, smiles_columns=args.smiles_columns, target_columns=args.target_columns) args.task_names = get_task_names(path=args.data_path, smiles_columns=args.smiles_columns, target_columns=args.target_columns, ignore_columns=args.ignore_columns) if args.model_type == 'svm' and data.num_tasks() != 1: raise ValueError( f'SVM can only handle single-task data but found {data.num_tasks()} tasks' ) debug(f'Splitting data with seed {args.seed}') # Need to have val set so that train and test sets are the same as when doing MPN train_data, _, test_data = split_data(data=data, split_type=args.split_type, seed=args.seed, sizes=args.split_sizes, num_folds=args.num_folds, args=args) if args.save_smiles_splits: save_smiles_splits( data_path=args.data_path, save_dir=args.save_dir, task_names=args.task_names, features_path=args.features_path, train_data=train_data, test_data=test_data, smiles_columns=args.smiles_columns, ) debug( f'Total size = {len(data):,} | train size = {len(train_data):,} | test size = {len(test_data):,}' ) debug('Computing morgan fingerprints') morgan_fingerprint = get_features_generator('morgan') for dataset in [train_data, test_data]: for datapoint in tqdm(dataset, total=len(dataset)): for s in datapoint.smiles: datapoint.extend_features( morgan_fingerprint(mol=s, radius=args.radius, num_bits=args.num_bits)) debug('Building model') if args.dataset_type == 'regression': if args.model_type == 'random_forest': model = RandomForestRegressor(n_estimators=args.num_trees, n_jobs=-1, random_state=args.seed) elif args.model_type == 'svm': model = SVR() else: raise ValueError(f'Model type "{args.model_type}" not supported') elif args.dataset_type == 'classification': if args.model_type == 'random_forest': model = RandomForestClassifier(n_estimators=args.num_trees, n_jobs=-1, class_weight=args.class_weight) elif args.model_type == 'svm': model = SVC() else: raise ValueError(f'Model type "{args.model_type}" not supported') else: raise ValueError(f'Dataset type "{args.dataset_type}" not supported') debug(model) model.train_args = args.as_dict() debug('Training') if args.single_task: scores = single_task_sklearn(model=model, train_data=train_data, test_data=test_data, metrics=args.metrics, args=args, logger=logger) else: scores = multi_task_sklearn(model=model, train_data=train_data, test_data=test_data, metrics=args.metrics, args=args, logger=logger) for metric in args.metrics: info(f'Test {metric} = {np.nanmean(scores[metric])}') return scores
def save_smiles_splits(data_path: str, save_dir: str, task_names: List[str] = None, features_path: List[str] = None, train_data: MoleculeDataset = None, val_data: MoleculeDataset = None, test_data: MoleculeDataset = None, smiles_columns: List[str] = None) -> None: """ Saves a csv file with train/val/test splits of target data and additional features. Also saves indices of train/val/test split as a pickle file. Pickle file does not support repeated entries with same SMILES. :param data_path: Path to data CSV file. :param save_dir: Path where pickle files will be saved. :param task_names: List of target names for the model as from the function get_task_names(). If not provided, will use datafile header entries. :param features_path: List of path(s) to files with additional molecule features. :param train_data: Train :class:`~chemprop.data.data.MoleculeDataset`. :param val_data: Validation :class:`~chemprop.data.data.MoleculeDataset`. :param test_data: Test :class:`~chemprop.data.data.MoleculeDataset`. :param smiles_columns: The name of the column containing SMILES. By default, uses the first column. """ makedirs(save_dir) if not isinstance(smiles_columns, list): smiles_columns = preprocess_smiles_columns( path=data_path, smiles_columns=smiles_columns) with open(data_path) as f: reader = csv.DictReader(f) indices_by_smiles = {} for i, row in enumerate(tqdm(reader)): smiles = tuple([row[column] for column in smiles_columns]) indices_by_smiles[smiles] = i if task_names is None: task_names = get_task_names(path=data_path, smiles_columns=smiles_columns) features_header = [] if features_path is not None: for feat_path in features_path: with open(feat_path, 'r') as f: reader = csv.reader(f) feat_header = next(reader) features_header.extend(feat_header) all_split_indices = [] for dataset, name in [(train_data, 'train'), (val_data, 'val'), (test_data, 'test')]: if dataset is None: continue with open(os.path.join(save_dir, f'{name}_smiles.csv'), 'w', newline='') as f: writer = csv.writer(f) #, lineterminator = '\n') if smiles_columns[0] == '': writer.writerow(['smiles']) else: writer.writerow(smiles_columns) for smiles in dataset.smiles(): writer.writerow(smiles) with open(os.path.join(save_dir, f'{name}_full.csv'), 'w', newline='') as f: writer = csv.writer(f) #, lineterminator = '\n') writer.writerow(smiles_columns + task_names) dataset_targets = dataset.targets() for i, smiles in enumerate(dataset.smiles()): writer.writerow(smiles + dataset_targets[i]) dataset_features = dataset.features() if features_path is not None: with open(os.path.join(save_dir, f'{name}_features.csv'), 'w', newline='') as f: writer = csv.writer(f) #, lineterminator = '\n') writer.writerow(features_header) writer.writerows(dataset_features) split_indices = [] for smiles in dataset.smiles(): split_indices.append(indices_by_smiles.get(tuple(smiles))) split_indices = sorted(split_indices) all_split_indices.append(split_indices) with open(os.path.join(save_dir, 'split_indices.pckl'), 'wb') as f: pickle.dump(all_split_indices, f)
def cross_validate(args: TrainArgs, logger: Logger = None) -> Tuple[float, float]: """ Runs k-fold cross-validation for a Chemprop model. For each of k splits (folds) of the data, trains and tests a model on that split and aggregates the performance across folds. :param args: A :class:`~chemprop.args.TrainArgs` object containing arguments for loading data and training the Chemprop model. :param logger: A logger for recording output. :return: A tuple containing the mean and standard deviation performance across folds. """ info = logger.info if logger is not None else print # Initialize relevant variables init_seed = args.seed save_dir = args.save_dir args.task_names = get_task_names(path=args.data_path, smiles_column=args.smiles_column, target_columns=args.target_columns, ignore_columns=args.ignore_columns) # Run training on different random seeds for each fold all_scores = [] for fold_num in range(args.num_folds): info(f'Fold {fold_num}') args.seed = init_seed + fold_num args.save_dir = os.path.join(save_dir, f'fold_{fold_num}') makedirs(args.save_dir) model_scores = run_training(args, logger) all_scores.append(model_scores) all_scores = np.array(all_scores) # Report results info(f'{args.num_folds}-fold cross validation') # Report scores for each fold for fold_num, scores in enumerate(all_scores): info( f'Seed {init_seed + fold_num} ==> test {args.metric} = {np.nanmean(scores):.6f}' ) if args.show_individual_scores: for task_name, score in zip(args.task_names, scores): info( f'Seed {init_seed + fold_num} ==> test {task_name} {args.metric} = {score:.6f}' ) # Report scores across models avg_scores = np.nanmean( all_scores, axis=1) # average score for each model across tasks mean_score, std_score = np.nanmean(avg_scores), np.nanstd(avg_scores) info(f'Overall test {args.metric} = {mean_score:.6f} +/- {std_score:.6f}') if args.show_individual_scores: for task_num, task_name in enumerate(args.task_names): info( f'Overall test {task_name} {args.metric} = ' f'{np.nanmean(all_scores[:, task_num]):.6f} +/- {np.nanstd(all_scores[:, task_num]):.6f}' ) # Save scores with open(os.path.join(save_dir, 'test_scores.csv'), 'w') as f: writer = csv.writer(f) writer.writerow([ 'Task', f'Mean {args.metric}', f'Standard deviation {args.metric}' ] + [f'Fold {i} {args.metric}' for i in range(args.num_folds)]) for task_num, task_name in enumerate(args.task_names): task_scores = all_scores[:, task_num] mean, std = np.nanmean(task_scores), np.nanstd(task_scores) writer.writerow([task_name, mean, std] + task_scores.tolist()) return mean_score, std_score
def cross_validate(args: TrainArgs, train_func: Callable[[TrainArgs, MoleculeDataset, Logger], Dict[str, List[float]]] ) -> Tuple[float, float]: """ Runs k-fold cross-validation. For each of k splits (folds) of the data, trains and tests a model on that split and aggregates the performance across folds. :param args: A :class:`~chemprop.args.TrainArgs` object containing arguments for loading data and training the Chemprop model. :param train_func: Function which runs training. :return: A tuple containing the mean and standard deviation performance across folds. """ logger = create_logger(name=TRAIN_LOGGER_NAME, save_dir=args.save_dir, quiet=args.quiet) if logger is not None: debug, info = logger.debug, logger.info else: debug = info = print # Initialize relevant variables init_seed = args.seed save_dir = args.save_dir args.task_names = get_task_names(path=args.data_path, smiles_columns=args.smiles_columns, target_columns=args.target_columns, ignore_columns=args.ignore_columns) # Print command line debug('Command line') debug(f'python {" ".join(sys.argv)}') # Print args debug('Args') debug(args) # Save args makedirs(args.save_dir) try: args.save(os.path.join(args.save_dir, 'args.json')) except subprocess.CalledProcessError: debug('Could not write the reproducibility section of the arguments to file, thus omitting this section.') args.save(os.path.join(args.save_dir, 'args.json'), with_reproducibility=False) # set explicit H option and reaction option reset_featurization_parameters(logger=logger) set_explicit_h(args.explicit_h) set_adding_hs(args.adding_h) if args.reaction: set_reaction(args.reaction, args.reaction_mode) elif args.reaction_solvent: set_reaction(True, args.reaction_mode) # Get data debug('Loading data') data = get_data( path=args.data_path, args=args, logger=logger, skip_none_targets=True, data_weights_path=args.data_weights_path ) validate_dataset_type(data, dataset_type=args.dataset_type) args.features_size = data.features_size() if args.atom_descriptors == 'descriptor': args.atom_descriptors_size = data.atom_descriptors_size() args.ffn_hidden_size += args.atom_descriptors_size elif args.atom_descriptors == 'feature': args.atom_features_size = data.atom_features_size() set_extra_atom_fdim(args.atom_features_size) if args.bond_features_path is not None: args.bond_features_size = data.bond_features_size() set_extra_bond_fdim(args.bond_features_size) debug(f'Number of tasks = {args.num_tasks}') if args.target_weights is not None and len(args.target_weights) != args.num_tasks: raise ValueError('The number of provided target weights must match the number and order of the prediction tasks') # Run training on different random seeds for each fold all_scores = defaultdict(list) for fold_num in range(args.num_folds): info(f'Fold {fold_num}') args.seed = init_seed + fold_num args.save_dir = os.path.join(save_dir, f'fold_{fold_num}') makedirs(args.save_dir) data.reset_features_and_targets() # If resuming experiment, load results from trained models test_scores_path = os.path.join(args.save_dir, 'test_scores.json') if args.resume_experiment and os.path.exists(test_scores_path): print('Loading scores') with open(test_scores_path) as f: model_scores = json.load(f) # Otherwise, train the models else: model_scores = train_func(args, data, logger) for metric, scores in model_scores.items(): all_scores[metric].append(scores) all_scores = dict(all_scores) # Convert scores to numpy arrays for metric, scores in all_scores.items(): all_scores[metric] = np.array(scores) # Report results info(f'{args.num_folds}-fold cross validation') # Report scores for each fold contains_nan_scores = False for fold_num in range(args.num_folds): for metric, scores in all_scores.items(): info(f'\tSeed {init_seed + fold_num} ==> test {metric} = {multitask_mean(scores[fold_num], metric):.6f}') if args.show_individual_scores: for task_name, score in zip(args.task_names, scores[fold_num]): info(f'\t\tSeed {init_seed + fold_num} ==> test {task_name} {metric} = {score:.6f}') if np.isnan(score): contains_nan_scores = True # Report scores across folds for metric, scores in all_scores.items(): avg_scores = multitask_mean(scores, axis=1, metric=metric) # average score for each model across tasks mean_score, std_score = np.mean(avg_scores), np.std(avg_scores) info(f'Overall test {metric} = {mean_score:.6f} +/- {std_score:.6f}') if args.show_individual_scores: for task_num, task_name in enumerate(args.task_names): info(f'\tOverall test {task_name} {metric} = ' f'{np.mean(scores[:, task_num]):.6f} +/- {np.std(scores[:, task_num]):.6f}') if contains_nan_scores: info("The metric scores observed for some fold test splits contain 'nan' values. \ This can occur when the test set does not meet the requirements \ for a particular metric, such as having no valid instances of one \ task in the test set or not having positive examples for some classification metrics. \ Before v1.5.1, the default behavior was to ignore nan values in individual folds or tasks \ and still return an overall average for the remaining folds or tasks. The behavior now \ is to include them in the average, converting overall average metrics to 'nan' as well.") # Save scores with open(os.path.join(save_dir, TEST_SCORES_FILE_NAME), 'w') as f: writer = csv.writer(f) header = ['Task'] for metric in args.metrics: header += [f'Mean {metric}', f'Standard deviation {metric}'] + \ [f'Fold {i} {metric}' for i in range(args.num_folds)] writer.writerow(header) if args.dataset_type == 'spectra': # spectra data type has only one score to report row = ['spectra'] for metric, scores in all_scores.items(): task_scores = scores[:,0] mean, std = np.mean(task_scores), np.std(task_scores) row += [mean, std] + task_scores.tolist() writer.writerow(row) else: # all other data types, separate scores by task for task_num, task_name in enumerate(args.task_names): row = [task_name] for metric, scores in all_scores.items(): task_scores = scores[:, task_num] mean, std = np.mean(task_scores), np.std(task_scores) row += [mean, std] + task_scores.tolist() writer.writerow(row) # Determine mean and std score of main metric avg_scores = multitask_mean(all_scores[args.metric], metric=args.metric, axis=1) mean_score, std_score = np.mean(avg_scores), np.std(avg_scores) # Optionally merge and save test preds if args.save_preds: all_preds = pd.concat([pd.read_csv(os.path.join(save_dir, f'fold_{fold_num}', 'test_preds.csv')) for fold_num in range(args.num_folds)]) all_preds.to_csv(os.path.join(save_dir, 'test_preds.csv'), index=False) return mean_score, std_score