Ejemplo n.º 1
0
def add_parents(args: argparse.Namespace) -> None:

    gff = GFF.parse(args.infile)
    gff.infer_missing_parents()

    for f in gff.select_type("mRNA"):
        if len(f.parents) > 0:
            continue

        if f.attributes is None:
            continue

        if f.attributes.id is None:
            continue

        id_ = f.attributes.id
        gene_id = f"gene.{id_}"
        gene = GFF3Record.infer_from_children([f], id=gene_id, type="gene")
        f.add_parent(gene)
        gff.add_record(gene)

    print("##gff-version 3", file=args.outfile)
    for feature in gff.traverse_children(sort=True):
        print(feature, file=args.outfile)
    return
Ejemplo n.º 2
0
def deal_with_block(block: List[str], gene_num: int) -> List[GFF3Record]:

    parsed: Dict[str, List[GFFRecord[GTFAttributes]]] = dict()
    for line in block:
        rec = GFFRecord.parse(line, attr=GTFAttributes)

        if rec.type in parsed:
            parsed[rec.type].append(rec)
        else:
            parsed[rec.type] = [rec]

    assert len(parsed["gene"]) == 1
    assert len(parsed["similarity"]) == 1
    gene_parsed = parsed["gene"][0]
    similarity_parsed = parsed["similarity"][0]

    custom: Dict[str, str] = dict()
    if similarity_parsed.attributes is not None:
        custom["query"] = similarity_parsed.attributes.custom["Query"]

    if gene_parsed.attributes is not None:
        custom["identity"] = gene_parsed.attributes.custom["identity"]
        custom["similarity"] = gene_parsed.attributes.custom["similarity"]

    gene = GFF3Record(
        parsed["gene"][0].seqid,
        "exonerate",
        type="gene",
        start=parsed["gene"][0].start,
        end=parsed["gene"][0].end,
        score=parsed["gene"][0].score,
        strand=parsed["gene"][0].strand,
        phase=parsed["gene"][0].phase,
        attributes=GFF3Attributes(
            id=f"gene{gene_num}",
            custom=custom,
        )
    )

    cdss = [
        GFF3Record(
            e.seqid,
            "exonerate",
            "CDS",
            e.start,
            e.end,
            e.score,
            e.strand,
            e.phase,
            attributes=GFF3Attributes(
                id=f"CDS{gene_num}",
                parent=[f"mRNA{gene_num}"],
                custom=(e.attributes.custom
                        if e.attributes is not None
                        else None)
            )
        )
        for e
        in parsed["exon"]
    ]

    for c in cdss:
        if gene.attributes is not None:
            # This is safe because we added attributes.
            assert c.attributes is not None
            c.attributes.custom["query"] = gene.attributes.custom["query"]

    mrna = GFF3Record.infer_from_children(
        cdss,
        id=f"mRNA{gene_num}",
        seqid=gene.seqid,
        source="exonerate",
        type="mRNA",
        strand=gene.strand,
        score=gene.score,
    )

    mrna.add_parent(gene)

    if gene.attributes is not None:
        # This is safe because infer_from_children adds an ID to attributes.
        assert mrna.attributes is not None
        if gene.attributes.id is not None:
            mrna.attributes.parent = [gene.attributes.id]
        mrna.attributes.custom["query"] = gene.attributes.custom["query"]

    out = [gene, mrna]
    out.extend(cdss)
    return out