class TestGffWriter: RECORD1 = Gff3Record("chr1", 10, 11, "insertion", attributes=[("cat", "1"), ("dog", "2")]) RECORD2 = Gff3Record("chr1", 200, 201, "substitution", attributes=[("mouse", "1"), ("moose", "2")]) def setup_method(self): self.outfile = StringIO() self.gffWriter = GffWriter(self.outfile) def test_writeHeader(self): self.gffWriter.writeHeader("##foo bar") assert "##gff-version 3\n##foo bar\n" == self.outfile.getvalue() def test_writeRecord(self): self.gffWriter.writeRecord(self.RECORD1) self.gffWriter.writeRecord(self.RECORD2) expected = ( "##gff-version 3\n" + "chr1\t.\tinsertion\t10\t11\t.\t.\t.\tcat=1;dog=2\n" + "chr1\t.\tsubstitution\t200\t201\t.\t.\t.\tmouse=1;moose=2\n") assert expected == self.outfile.getvalue()
def setup(self): self.outfile = StringIO() self.record1 = Gff3Record("chr1", 10, 11, "insertion", attributes=[("cat", "1"), ("dog", "2")]) self.record2 = Gff3Record("chr1", 200, 201, "substitution", attributes=[("mouse", "1"), ("moose", "2")]) self.gffWriter = GffWriter(self.outfile)
def run(argv): nuc = [] basemods = [] while len(nuc) < 5000: x = random.random() if x > 0.99 and len(nuc) < 4880: n = len(nuc) + 3 basemods.append( Gff3Record("genome", n, n, "modified_base", 100, "+", ".", "kinModCall", [("coverage", "100"), ("IPDRatio", "4.0"), ("identificationQv", "50")])) nuc.extend(["G", "T", "A", "C"]) elif x < 0.01 and len(nuc) < 4880: n = len(nuc) + 3 basemods.append( Gff3Record("genome", n, n, "modified_base", 100, "+", ".", "kinModCall", [("coverage", "100"), ("IPDRatio", "4.0"), ("identificationQv", "50")])) nuc.extend(["C", "T", "A", "G"]) else: b = "ACGT"[random.randint(0, 3)] if ((b == "C" and nuc[-3:] == ["G", "T", "A"]) or (b == "G" and nuc[-3:] == ["C", "T", "A"])): nuc.append("T") else: nuc.append(b) seq = "".join(nuc) with FastaWriter("genome.fasta") as fa_out: fa_out.writeRecord("genome", "".join(nuc)) with GffWriter("basemods.gff") as gff_out: for rec in basemods: ctx_start = max(0, rec.start - 21) ctx_end = min(len(seq), rec.end + 20) context = seq[ctx_start:ctx_end] rec.attributes["context"] = context gff_out.writeRecord(rec) return 0
class TestGff3Record: RECORD = Gff3Record("chr1", 10, 11, "insertion", attributes=[("cat", "1"), ("dog", "2")]) def test_str(self): assert "chr1\t.\tinsertion\t10\t11\t.\t.\t.\tcat=1;dog=2" == str( self.RECORD) def test_modification(self): record = self.RECORD.copy() record.dog = 3 record.cat = 4 record.mouse = 5 record.start = 100 record.end = 110 assert "chr1\t.\tinsertion\t100\t110\t.\t.\t.\tcat=4;dog=3;mouse=5" == str( record) def test_fromString(self): newRecord = Gff3Record.fromString(str(self.RECORD)) assert str(self.RECORD) == str(newRecord) def test_get(self): """ Verify field access behavior """ record = self.RECORD record.dog = 3 record.cat = 4 record.mouse = 5 record.start = 100 record.end = 110 assert 3 == record.dog assert 100 == record.start with pytest.raises(AttributeError): record.god assert 3 == record.get("dog") assert record.get("god") is None assert 100 == record.get("start", 100)
def toGffRecord(var): varType = var.variantType gffType = varType.lower() gffStart = (var.refStart + 1) if (var.refSeq != "") else var.refStart gffEnd = var.refEnd if (var.refSeq != "") else var.refStart gffFreq = gffVariantFrequency(var) record = Gff3Record(reference.idToFullName(var.refId), gffStart, gffEnd, gffType) record.reference = var.refSeq or "." record.variantSeq = gffVariantSeq(var) if gffFreq: record.frequency = gffFreq record.coverage = var.coverage record.confidence = var.confidence if var.annotations: for (k, v) in var.annotations: record.put(k, v) return record
def makeM5CgffRecord(self, siteObs): start = siteObs['tpl'] + 1 end = siteObs['tpl'] + 1 attributes = [('coverage', siteObs['coverage']), ('IPDRatio', siteObs['ipdRatio'])] recordType = 'CG' refName = siteObs['refId'] score = "%.3f" % siteObs['Ca5C'] strand = '+' if siteObs['strand'] == 0 else '-' return Gff3Record(refName, start, end, type=recordType, score=score, strand=strand, source='kinModCall',attributes=attributes)
def makeGffRecord(self, siteObs): """ Convert the internal site observation object into a GFF entry """ # Some useful attributes about the observation # - cognate base # - context snippet # - ipd ratio # - coverage snippet = self.snippetFunc(siteObs['tpl'], siteObs['strand']) attributes = [('coverage', siteObs['coverage']), ('context', snippet), ('IPDRatio', siteObs['ipdRatio'])] # Base of detected mod -- single position, closed,open # interval. # Note -- internally the tool uses 0-based reference # coordinates, however in gff the template indices are # 1-based. Make that adjustment here. # On start vs. end: My reading of the gff spec # (http://www.sequenceontology.org/resources/gff3.html) says # to me that 1-base long feature (e.g. a modified base) should # have start + 1 == end, and 0-base long features # (e.g. insertions) should have start == end. This is not the # convention that Marco has apdopted in SMRTView, or the # convention that EviCons originally used. We will adopt # their convention here, for now. start = siteObs['tpl'] + 1 end = siteObs['tpl'] + 1 if siteObs.has_key('motif'): attributes.append(('motif', "%s" % siteObs['motif'])) if siteObs.has_key('id'): attributes.append(('id', "%s" % siteObs['id'])) if self.options.methylFraction and siteObs.has_key(FRAC): attributes.append(('frac', "%.3f" % siteObs[FRAC])) attributes.append(('fracLow', "%.3f" % siteObs[FRAClow])) attributes.append(('fracUp', "%.3f" % siteObs[FRACup])) if siteObs.has_key('modificationScore'): # Report the QV from the modification identification module as a special tag attributes.append(('identificationQv', "%d" % int(round(siteObs['modificationScore'])))) if siteObs.has_key('modification'): if siteObs['modification'] == '.': recordType = 'modified_base' elif siteObs['modification'] == 'nMd': recordType = '.' else: # if we have an identified mod, use it; otherwise use the old generic term recordType = siteObs['modification'] else: recordType = 'modified_base' refName = siteObs['refName'] score = int(round(siteObs['score'])) strand = '+' if siteObs['strand'] == 0 else '-' return Gff3Record(refName, start, end, type=recordType, score=score, strand=strand, source='kinModCall', attributes=attributes) return rec
def setup(self): self.record = Gff3Record("chr1", 10, 11, "insertion", attributes=[("cat", "1"), ("dog", "2")])