Exemple #1
0
    def populate_gene_cluster_homogeneity_index(self, gene_clusters_dict):
        if self.skip_alignments:
            self.run.warning(
                'Skipping homogeneity calculations because gene clusters are not alligned.'
            )
            return

        if self.skip_homogeneity:
            self.run.warning(
                "Skipping homogeneity calculations per the '--skip-homogeneity' flag."
            )
            return

        pan = dbops.PanSuperclass(args=self.args, r=self.run, p=self.progress)
        gene_cluster_names = set(list(gene_clusters_dict.keys()))

        d = pan.compute_homogeneity_indices_for_gene_clusters(
            gene_cluster_names=gene_cluster_names,
            num_threads=self.num_threads)

        if d is None:
            self.run.warning(
                "Anvi'o received an empty dictionary for homogeneity indices. Not good :/ Returning empty handed,\
                              without updating anything in the pan database..."
            )
            return

        miscdata.TableForItemAdditionalData(self.args).add(
            d, [
                'functional_homogeneity_index', 'geometric_homogeneity_index',
                'combined_homogeneity_index'
            ],
            skip_check_names=True)
Exemple #2
0
    def convert(self, infile, count=False):
        class Mock:
            def __init_(self):
                self.__dict__ = {}

        import anvio.dbops as dbops
        args = Mock()
        args.__dict__['pan_db'] = infile
        pan = dbops.PanSuperclass(args)

        gene_cluster_ids = pan.gene_cluster_names

        pan.init_gene_clusters(gene_cluster_ids)
        gene_cluster = set(pan.gene_clusters.keys())

        genomes = {g for k, v in pan.gene_clusters.items() for g in v}
        genome2genecluster = {g: [] for g in genomes}
        for gc, hits in pan.gene_clusters.items():
            for genome, genes in hits.items():
                if len(genes) > 0:
                    if gc not in genome2genecluster[genome]:
                        genome2genecluster[genome].append(gc)
        if count:
            return {
                k: {vv: v.count(vv)
                    for vv in set(v)}
                for k, v in genome2genecluster.items()
            }
        else:
            return {k: list(set(v)) for k, v in genome2genecluster.items()}