def init_scores_file(name, level, structure, split_id, sample_id): """The row and column log-likelihood scores for the model used as an initialization. Stored as a (row_log_likelihood, column_log_likelihood) pair, where each is a vector giving the performance on all the test rows/columns.""" return storage.join( level_dir(name, level), 'init', 'scores-%s-%d-%d.pk' % (md5(structure), split_id, sample_id))
def data_file(name): """The original data matrix, stored as an observations.DataMatrix instance.""" return storage.join(experiment_dir(name), 'data.pk')
def experiment_dir(name): """Main directory used for all structure search results.""" return storage.join(config.RESULTS_PATH, name)
def running_time_file(name, level, structure, split_id, sample_id): """The running time for sampling from the posterior and computing predictive likelihood.""" return storage.join(level_dir(name, level), md5(structure), 'time-%d-%d.pk' % (split_id, sample_id))
def collected_scores_file(name, level, structure): """The predictive log-likelihood scores for a given structure, collected over all CV splits and ordered by the indices in the original data matrix.""" return storage.join(level_dir(name, level), md5(structure), 'collected-scores.pk')
def samples_file(name, level, structure, split_id, sample_id): """A posterior sample for a given structure.""" return storage.join(config.CACHE_PATH, name, 'level%d' % level, md5(structure), 'samples-%d-%d.pk' % (split_id, sample_id))
def structures_file(name, level): """The list of all structures to be evaluated in a given level of the search. Stored as a list of (init_structure, successor_structure) pairs.""" return storage.join(level_dir(name, level), 'structures.pk')
def components_file(name): """The true decomposition, as a recursive.Decomp instance, if applicable.""" return storage.join(experiment_dir(name), 'components.pk')
def scores_file(name, level, structure, split_id, sample_id): """The predictive log-likelihood scores on held-out data for a given CV split.""" return storage.join(level_dir(name, level), md5(structure), 'scores-%d-%d.pk' % (split_id, sample_id))
def init_scores_file(name, level, structure, split_id, sample_id): """The row and column log-likelihood scores for the model used as an initialization. Stored as a (row_log_likelihood, column_log_likelihood) pair, where each is a vector giving the performance on all the test rows/columns.""" return storage.join(level_dir(name, level), 'init', 'scores-%s-%d-%d.pk' % (md5(structure), split_id, sample_id))
def init_samples_file(name, level, structure, split_id, sample_id): """The decomposition to be used as the initialization for a given structure, i.e. one of the top performing structures from the previous level.""" return storage.join(level_dir(name, level), 'init', 'samples-%s-%d-%d.pk' % (md5(structure), split_id, sample_id))
def level_dir(name, level): """The directory containing the results of one level of the search.""" return storage.join(experiment_dir(name), 'level%d' % level)
def clean_data_file(name): """The observation matrix before noise was added, if applicable.""" return storage.join(experiment_dir(name), 'clean-data.pk')
def winning_structure_file(name, level): """The highest performing structure at a given level of the search.""" return storage.join(level_dir(name, level), 'winning-structure.pk')
def winning_samples_file(name, sample_id): """Posterior samples from each model in the sequence chosen by the structure search.""" return storage.join(experiment_dir(name), 'winning-samples-%d.pk' % sample_id)
def params_file(name): return storage.join(experiment_dir(name), 'params.pk')
def splits_file(name): """The cross-validation splits, stored as a list of (train_rows, train_cols, test_rows, test_cols) tuples.""" return storage.join(experiment_dir(name), 'splits.pk')