Example #1
0
def make_seq_record(exon, skip_annotation=False):
    """ use pycogent to grab the sequence for the exon's region, and create a 
        new seq record with all the bells and whistles. add in the predicted
        exon start and ends and splice sites as well.
    """
    # GET SEQ FROM PYCOGENT
    # ===========================================================================
    cog_reg = hs37.getRegion(CoordName=exon["chr"], Start=exon["synth_start"] - 1, End=exon["synth_end"])
    cog_seq = cog_reg.Seq

    # reverse complement if necessary
    if exon["strand"] == -1:
        cog_seq = cog_seq.rc()
    seq = cog_seq._seq

    if not skip_annotation:
        # DO SNPS
        # ===========================================================================
        var = hs37.getFeatures(feature_types="variation", region=cog_reg)
        exon["snps"] = []
        for snp in var:
            if snp.Alleles != "HGMD_MUTATION":
                exon["snps"].append(snp)

    # TODO: APPEND OUTER INTRON CONTEXT
    # ===========================================================================
    # use the cfg information to get sequence, how much to add

    # MAKE SEQ RECORD
    # ===========================================================================
    # create the seqRecord object, adding the exon dict as annotation fields
    record = SeqRecord(Seq(seq, generic_dna), id=exon["exon"], annotations=exon)

    # add the exon features and putative splicing features, set the source as
    # ensembl_exon (not splicemod, so that it doesn't get deleted)

    if exon["strand"] == 1:
        in_front = exon["synth_us"]
    else:
        in_front = exon["synth_ds"]

    record.features = [
        SeqFeature(FeatureLocation(in_front, (exon["len"] + in_front)), type="exon"),
        SeqFeature(FeatureLocation(0, in_front), type="intron"),
        SeqFeature(FeatureLocation(in_front + exon["len"], len(seq)), type="intron"),
    ]

    for feat in record.features:
        feat.qualifiers["source"] = "ensembl_exon"

    record.description = (
        "{r.id} chr{chr}:{synth_start}-{synth_end} "
        + "strand={strand} len={synth_us}.{len}.{synth_ds} "
        + "ccds={CCDS}"
    ).format(r=record, **record.annotations)

    if not skip_annotation:
        record.populate_attribs()
        add_wiggle_data(record)
        record.add_conservation_features()

    return record