def make_seq_record(exon, skip_annotation=False): """ use pycogent to grab the sequence for the exon's region, and create a new seq record with all the bells and whistles. add in the predicted exon start and ends and splice sites as well. """ # GET SEQ FROM PYCOGENT # =========================================================================== cog_reg = hs37.getRegion(CoordName=exon["chr"], Start=exon["synth_start"] - 1, End=exon["synth_end"]) cog_seq = cog_reg.Seq # reverse complement if necessary if exon["strand"] == -1: cog_seq = cog_seq.rc() seq = cog_seq._seq if not skip_annotation: # DO SNPS # =========================================================================== var = hs37.getFeatures(feature_types="variation", region=cog_reg) exon["snps"] = [] for snp in var: if snp.Alleles != "HGMD_MUTATION": exon["snps"].append(snp) # TODO: APPEND OUTER INTRON CONTEXT # =========================================================================== # use the cfg information to get sequence, how much to add # MAKE SEQ RECORD # =========================================================================== # create the seqRecord object, adding the exon dict as annotation fields record = SeqRecord(Seq(seq, generic_dna), id=exon["exon"], annotations=exon) # add the exon features and putative splicing features, set the source as # ensembl_exon (not splicemod, so that it doesn't get deleted) if exon["strand"] == 1: in_front = exon["synth_us"] else: in_front = exon["synth_ds"] record.features = [ SeqFeature(FeatureLocation(in_front, (exon["len"] + in_front)), type="exon"), SeqFeature(FeatureLocation(0, in_front), type="intron"), SeqFeature(FeatureLocation(in_front + exon["len"], len(seq)), type="intron"), ] for feat in record.features: feat.qualifiers["source"] = "ensembl_exon" record.description = ( "{r.id} chr{chr}:{synth_start}-{synth_end} " + "strand={strand} len={synth_us}.{len}.{synth_ds} " + "ccds={CCDS}" ).format(r=record, **record.annotations) if not skip_annotation: record.populate_attribs() add_wiggle_data(record) record.add_conservation_features() return record