def test_fpc_hierarch_tm(self): os.chdir(self.thisd) # Need as otherwise tests that happen in other directories change os.cwd() mdir = os.path.join(self.testfiles_dir, "models") models = glob.glob(mdir + os.sep + "*.pdb") wdir = 'fpc_test' if not os.path.isdir(wdir): os.mkdir(wdir) fpc = fast_protein_cluster.FPC() num_clusters = 1 score_type = 'tm' cluster_method = 'hcomplete' clusters, cluster_data = fpc.cluster( models=models, num_clusters=num_clusters, score_type=score_type, cluster_method=cluster_method, work_dir=wdir, fpc_exe=self.fpc_exe, nproc=4, benchmark=True, ) self.assertEqual(len(clusters), num_clusters) d = cluster_data[0] self.assertEqual(d['cluster_num_models'], 16) self.assertEqual(d['cluster_method'], 'hcomplete_tm') self.assertEqual(os.path.basename(d['cluster_centroid']), '5_S_00000005.pdb') shutil.rmtree(wdir)
def cluster_models( self, models=None, cluster_method=SPICKER_RMSD, num_clusters=1, cluster_dir=None, max_cluster_size=200 ): """Wrapper function to run clustering of models dependent on the method""" if len(models) < 2: raise RuntimeError("Cannot cluster fewer than 2 models!") logger.info('Generating %d clusters using method: %s', num_clusters, cluster_method) if cluster_method != 'import' and not len(models): raise RuntimeError("Cannot find any models for ensembling!") # Get the cluster_method_type and cluster_score_type from the cluster_method cluster_method_type, cluster_score_type, cluster_exe = self.parse_cluster_method(cluster_method) # Set directory if cluster_method_type not in ['skip']: pass elif cluster_method_type in ['import', 'random']: if not os.path.isdir(cluster_dir): raise RuntimeError('Cannot find cluster directory: {0}'.format(cluster_dir)) else: cluster_dir = os.path.join(self.work_dir, 'clustering') if cluster_method_type == 'fast_protein_cluster': SCORE_TYPE = 'rmsd' CLUSTER_METHOD = 'kmeans' logger.info( 'Running fast_protein_cluster with: score_type: %s and cluster_method: %s', SCORE_TYPE, CLUSTER_METHOD ) clusters = fast_protein_cluster.FPC().fpc.cluster( cluster_method=CLUSTER_METHOD, fpc_exe=cluster_exe, max_cluster_size=max_cluster_size, models=models, num_clusters=num_clusters, nproc=self.nproc, score_type=SCORE_TYPE, work_dir=cluster_dir, ) elif cluster_method_type == 'import': clusters = cluster_util.import_cluster(models) elif cluster_method_type == 'random': clusters = cluster_util.random_cluster(cluster_method_type, max_cluster_size, models, num_clusters) elif cluster_method_type == 'spicker': logger.info('* Running SPICKER to cluster models *') spickerer = spicker.Spickerer(spicker_exe=cluster_exe) clusters = spickerer.cluster( models, num_clusters=num_clusters, max_cluster_size=max_cluster_size, score_type=cluster_score_type, run_dir=cluster_dir, score_matrix=None, nproc=self.nproc, ) logger.debug(spickerer.results_summary()) else: raise RuntimeError('Unrecognised clustering method: {}'.format(cluster_method_type)) return clusters