def compute_gmm_given_dataset(src_cfg, nr_clusters, **kwargs): """ Uses the conventions from the dataset for loading a subset of descriptors, loading the PCA object and choosing an output file. """ # Set default parameters. ip_type = kwargs.get('ip_type', 'dense5.track15mbh') nr_iterations = kwargs.get('nr_iterations', 100) nr_threads = kwargs.get('nr_threads', multiprocessing.cpu_count()) seed = kwargs.get('seed', 1) nr_redos = kwargs.get('nr_redos', 4) suffix = kwargs.get('suffix', '') nr_pca_components = kwargs.get('nr_pca_components', 64) dataset = Dataset(src_cfg, ip_type=ip_type, suffix=suffix) filename_pca = os.path.join(dataset.FEAT_DIR, 'pca', 'pca_%d.pkl' % nr_pca_components) data = load_subsample_descriptors(dataset) pca = load_pca(filename_pca) transformed_data = pca.transform(data) # Do the computation. gmm = compute_gmm( transformed_data, nr_clusters, nr_iterations, nr_threads, seed, nr_redos) outfilename = os.path.join( dataset.FEAT_DIR, 'gmm', 'gmm_%d' % nr_clusters) save_gmm(gmm, outfilename)
def compute_gmm_given_dataset(src_cfg, nr_clusters, **kwargs): """ Uses the conventions from the dataset for loading a subset of descriptors, loading the PCA object and choosing an output file. """ # Set default parameters. ip_type = kwargs.get('ip_type', 'dense5.track15mbh') nr_iterations = kwargs.get('nr_iterations', 100) nr_threads = kwargs.get('nr_threads', multiprocessing.cpu_count()) seed = kwargs.get('seed', 1) nr_redos = kwargs.get('nr_redos', 4) suffix = kwargs.get('suffix', '') nr_pca_components = kwargs.get('nr_pca_components', 64) dataset = Dataset(src_cfg, ip_type=ip_type, suffix=suffix) filename_pca = os.path.join(dataset.FEAT_DIR, 'pca', 'pca_%d.pkl' % nr_pca_components) data = load_subsample_descriptors(dataset) pca = load_pca(filename_pca) transformed_data = pca.transform(data) # Do the computation. gmm = compute_gmm(transformed_data, nr_clusters, nr_iterations, nr_threads, seed, nr_redos) outfilename = os.path.join(dataset.FEAT_DIR, 'gmm', 'gmm_%d' % nr_clusters) save_gmm(gmm, outfilename)
def compute_pca_given_dataset(src_cfg, **kwargs): """ Uses the conventions from the dataset for loading a subset of descriptors and choosing an output file. """ # Set default parameters. ip_type = kwargs.get('ip_type', 'dense5.track15mbh') n_components = kwargs.get('n_components', 64) suffix = kwargs.get('suffix', '') dataset = Dataset(src_cfg, ip_type=ip_type, suffix=suffix) data = load_subsample_descriptors(dataset) # Do the computation. pca = compute_pca(data, n_components) outfilename = os.path.join( dataset.FEAT_DIR, 'pca', 'pca_%d.pkl' % n_components) save_pca(pca, outfilename)