def populate_gene_cluster_homogeneity_index(self, gene_clusters_dict): if self.skip_alignments: self.run.warning( 'Skipping homogeneity calculations because gene clusters are not alligned.' ) return if self.skip_homogeneity: self.run.warning( "Skipping homogeneity calculations per the '--skip-homogeneity' flag." ) return pan = dbops.PanSuperclass(args=self.args, r=self.run, p=self.progress) gene_cluster_names = set(list(gene_clusters_dict.keys())) d = pan.compute_homogeneity_indices_for_gene_clusters( gene_cluster_names=gene_cluster_names, num_threads=self.num_threads) if d is None: self.run.warning( "Anvi'o received an empty dictionary for homogeneity indices. Not good :/ Returning empty handed,\ without updating anything in the pan database..." ) return miscdata.TableForItemAdditionalData(self.args).add( d, [ 'functional_homogeneity_index', 'geometric_homogeneity_index', 'combined_homogeneity_index' ], skip_check_names=True)
def convert(self, infile, count=False): class Mock: def __init_(self): self.__dict__ = {} import anvio.dbops as dbops args = Mock() args.__dict__['pan_db'] = infile pan = dbops.PanSuperclass(args) gene_cluster_ids = pan.gene_cluster_names pan.init_gene_clusters(gene_cluster_ids) gene_cluster = set(pan.gene_clusters.keys()) genomes = {g for k, v in pan.gene_clusters.items() for g in v} genome2genecluster = {g: [] for g in genomes} for gc, hits in pan.gene_clusters.items(): for genome, genes in hits.items(): if len(genes) > 0: if gc not in genome2genecluster[genome]: genome2genecluster[genome].append(gc) if count: return { k: {vv: v.count(vv) for vv in set(v)} for k, v in genome2genecluster.items() } else: return {k: list(set(v)) for k, v in genome2genecluster.items()}