Example #1
0
def read_gene2fam(pangenome, gene2fam, disable_bar=False):
    logging.getLogger().info(
        f"Adding {len(gene2fam)} genes to the gene families")

    link = True if pangenome.status["genomesAnnotated"] in [
        "Computed", "Loaded"
    ] else False
    if link:
        if len(gene2fam) != len(
                pangenome.genes
        ):  # then maybe there are genes with identical IDs
            raise Exception(
                "Something unexpected happened during clustering "
                "(have less genes clustered than genes in the pangenome). "
                "A probable reason is that two genes in two different organisms have the same IDs;"
                " If you are sure that all of your genes have non identical IDs, "
                "please post an issue at https://github.com/labgem/PPanGGOLiN/"
            )
    bar = tqdm(gene2fam.items(), unit="gene", disable=disable_bar)
    for gene, (family, is_frag) in bar:
        fam = pangenome.addGeneFamily(family)
        if link:  # doing the linking if the annotations are loaded.
            geneObj = pangenome.getGene(gene)
        else:
            geneObj = Gene(gene)
        geneObj.is_fragment = is_frag
        fam.addGene(geneObj)
    bar.close()
Example #2
0
def readOrganism(pangenome, orgName, contigDict, circularContigs, link=False):
    org = Organism(orgName)
    for contigName, geneList in contigDict.items():
        contig = org.getOrAddContig(contigName,
                                    is_circular=circularContigs[contigName])
        for row in geneList:
            if link:  #if the gene families are already computed/loaded the gene exists.
                gene = pangenome.getGene(row["ID"].decode())
            else:  #else creating the gene.
                gene_type = row["type"].decode()
                if gene_type == "CDS":
                    gene = Gene(row["ID"].decode())
                elif "RNA" in gene_type:
                    gene = RNA(row["ID"].decode())
            try:
                local = row["local"].decode()
            except ValueError:
                local = ""
            gene.fill_annotations(start=row["start"],
                                  stop=row["stop"],
                                  strand=row["strand"].decode(),
                                  geneType=row["type"].decode(),
                                  position=row["position"],
                                  genetic_code=row["genetic_code"],
                                  name=row["name"].decode(),
                                  product=row["product"].decode(),
                                  local_identifier=local)
            gene.is_fragment = row["is_fragment"]
            gene.fill_parents(org, contig)
            if gene_type == "CDS":
                contig.addGene(gene)
            elif "RNA" in gene_type:
                contig.addRNA(gene)
            else:
                raise Exception(
                    f"A strange type '{gene_type}', which we do not know what to do with, was met."
                )
    pangenome.addOrganism(org)