コード例 #1
0
    def test_fpc_hierarch_tm(self):
        os.chdir(self.thisd)  # Need as otherwise tests that happen in other directories change os.cwd()

        mdir = os.path.join(self.testfiles_dir, "models")
        models = glob.glob(mdir + os.sep + "*.pdb")

        wdir = 'fpc_test'
        if not os.path.isdir(wdir):
            os.mkdir(wdir)
        fpc = fast_protein_cluster.FPC()
        num_clusters = 1
        score_type = 'tm'
        cluster_method = 'hcomplete'
        clusters, cluster_data = fpc.cluster(
            models=models,
            num_clusters=num_clusters,
            score_type=score_type,
            cluster_method=cluster_method,
            work_dir=wdir,
            fpc_exe=self.fpc_exe,
            nproc=4,
            benchmark=True,
        )

        self.assertEqual(len(clusters), num_clusters)
        d = cluster_data[0]
        self.assertEqual(d['cluster_num_models'], 16)
        self.assertEqual(d['cluster_method'], 'hcomplete_tm')
        self.assertEqual(os.path.basename(d['cluster_centroid']), '5_S_00000005.pdb')
        shutil.rmtree(wdir)
コード例 #2
0
    def cluster_models(
        self, models=None, cluster_method=SPICKER_RMSD, num_clusters=1, cluster_dir=None, max_cluster_size=200
    ):
        """Wrapper function to run clustering of models dependent on the method"""
        if len(models) < 2:
            raise RuntimeError("Cannot cluster fewer than 2 models!")
        logger.info('Generating %d clusters using method: %s', num_clusters, cluster_method)

        if cluster_method != 'import' and not len(models):
            raise RuntimeError("Cannot find any models for ensembling!")

        # Get the cluster_method_type and cluster_score_type from the cluster_method
        cluster_method_type, cluster_score_type, cluster_exe = self.parse_cluster_method(cluster_method)

        # Set directory
        if cluster_method_type not in ['skip']:
            pass
        elif cluster_method_type in ['import', 'random']:
            if not os.path.isdir(cluster_dir):
                raise RuntimeError('Cannot find cluster directory: {0}'.format(cluster_dir))
        else:
            cluster_dir = os.path.join(self.work_dir, 'clustering')

        if cluster_method_type == 'fast_protein_cluster':
            SCORE_TYPE = 'rmsd'
            CLUSTER_METHOD = 'kmeans'
            logger.info(
                'Running fast_protein_cluster with: score_type: %s and cluster_method: %s', SCORE_TYPE, CLUSTER_METHOD
            )
            clusters = fast_protein_cluster.FPC().fpc.cluster(
                cluster_method=CLUSTER_METHOD,
                fpc_exe=cluster_exe,
                max_cluster_size=max_cluster_size,
                models=models,
                num_clusters=num_clusters,
                nproc=self.nproc,
                score_type=SCORE_TYPE,
                work_dir=cluster_dir,
            )
        elif cluster_method_type == 'import':
            clusters = cluster_util.import_cluster(models)
        elif cluster_method_type == 'random':
            clusters = cluster_util.random_cluster(cluster_method_type, max_cluster_size, models, num_clusters)
        elif cluster_method_type == 'spicker':
            logger.info('* Running SPICKER to cluster models *')
            spickerer = spicker.Spickerer(spicker_exe=cluster_exe)
            clusters = spickerer.cluster(
                models,
                num_clusters=num_clusters,
                max_cluster_size=max_cluster_size,
                score_type=cluster_score_type,
                run_dir=cluster_dir,
                score_matrix=None,
                nproc=self.nproc,
            )
            logger.debug(spickerer.results_summary())
        else:
            raise RuntimeError('Unrecognised clustering method: {}'.format(cluster_method_type))

        return clusters