class TestGffWriter: RECORD1 = Gff3Record("chr1", 10, 11, "insertion", attributes=[("cat", "1"), ("dog", "2")]) RECORD2 = Gff3Record("chr1", 200, 201, "substitution", attributes=[("mouse", "1"), ("moose", "2")]) def setup_method(self): self.outfile = StringIO() self.gffWriter = GffWriter(self.outfile) def test_writeHeader(self): self.gffWriter.writeHeader("##foo bar") assert "##gff-version 3\n##foo bar\n" == self.outfile.getvalue() def test_writeRecord(self): self.gffWriter.writeRecord(self.RECORD1) self.gffWriter.writeRecord(self.RECORD2) expected = ( "##gff-version 3\n" + "chr1\t.\tinsertion\t10\t11\t.\t.\t.\tcat=1;dog=2\n" + "chr1\t.\tsubstitution\t200\t201\t.\t.\t.\tmouse=1;moose=2\n") assert expected == self.outfile.getvalue()
def m5CgffConsumer( self, filename ): f = self.openWriteHandle( filename ) gff = GffWriter( f ) # write headers describing the program that generated the data gff.writeHeader('##source ipdSummary v2.0') gff.writeHeader('##source-commandline %s' % self.options.cmdLine) # Write the reference renaming info into the gff headers ala evicons # for entry in self.refInfo: # gff.writeHeader("##sequence-region %s 1 %d" # % (entry.Name, entry.Length)) try: while True: # Pull in a single record? siteObsList = (yield) for siteObs in siteObsList: if 'Ca5C' in siteObs and siteObs['strand'] == 0: gff.writeRecord( self.makeM5CgffRecord( siteObs ) ) except GeneratorExit: f.close() return
def m5CgffConsumer( self, filename ): f = self.openWriteHandle( filename ) gff = GffWriter( f ) # write headers describing the program that generated the data gff.writeHeader('##source ipdSummary.py v2.0') gff.writeHeader('##source-commandline %s' % self.options.cmdLine) # Write the reference renaming info into the gff headers ala evicons # for entry in self.refInfo: # gff.writeHeader("##sequence-region %s 1 %d" # % (entry.Name, entry.Length)) try: while True: # Pull in a single record? siteObsList = (yield) for siteObs in siteObsList: if siteObs.has_key('Ca5C') and siteObs['strand'] == 0: gff.writeRecord( self.makeM5CgffRecord( siteObs ) ) except GeneratorExit: f.close() return
class TestGffWriter(object): def setup(self): self.outfile = StringIO() self.record1 = Gff3Record("chr1", 10, 11, "insertion", attributes=[("cat", "1"), ("dog", "2")]) self.record2 = Gff3Record("chr1", 200, 201, "substitution", attributes=[("mouse", "1"), ("moose", "2")]) self.gffWriter = GffWriter(self.outfile) def test_writeHeader(self): self.gffWriter.writeHeader("##foo bar") assert_equal("##gff-version 3\n##foo bar\n", self.outfile.getvalue()) def test_writeRecord(self): self.gffWriter.writeRecord(self.record1) self.gffWriter.writeRecord(self.record2) expected = ( "##gff-version 3\n" + "chr1\t.\tinsertion\t10\t11\t.\t.\t.\tcat=1;dog=2\n" + "chr1\t.\tsubstitution\t200\t201\t.\t.\t.\tmouse=1;moose=2\n") assert_equal(expected, self.outfile.getvalue())
class TestGffWriter: def setup(self): self.outfile = StringIO() self.record1 = Gff3Record("chr1", 10, 11, "insertion", attributes=[("cat", "1"), ("dog", "2")]) self.record2 = Gff3Record("chr1", 200, 201, "substitution", attributes=[("mouse", "1"), ("moose", "2")]) self.gffWriter = GffWriter(self.outfile) def test_writeHeader(self): self.gffWriter.writeHeader("##foo bar") assert_equal("##gff-version 3\n##foo bar\n", self.outfile.getvalue()) def test_writeRecord(self): self.gffWriter.writeRecord(self.record1) self.gffWriter.writeRecord(self.record2) expected = ("##gff-version 3\n" + "chr1\t.\tinsertion\t10\t11\t.\t.\t.\tcat=1;dog=2\n" + "chr1\t.\tsubstitution\t200\t201\t.\t.\t.\tmouse=1;moose=2\n") assert_equal(expected, self.outfile.getvalue())
def gffConsumer(self, filename): """ Consume IPD summary rows, filter them and write to GFF """ #f = file(filename, 'w', 2<<15) f = self.openWriteHandle(filename) gff = GffWriter(f) # write headers describing the program that generated the data gff.writeHeader('##source ipdSummary v2.0') gff.writeHeader('##source-commandline %s' % self.options.cmdLine) # Write the reference renaming info into the gff headers ala evicons for entry in self.refInfo: gff.writeHeader("##sequence-region %s 1 %d" % (entry.Name, entry.Length)) minScore = -10 * math.log10(self.options.pvalue) snippetRef = -1 try: while True: # Pull a record in from the siteObsList = (yield) for siteObs in siteObsList: # self.snippetFunc is a function that return a reference # snippet given a template position and a strand if snippetRef != siteObs['refId']: self.snippetFunc = self.ipdModel.snippetFunc( siteObs['refId'], 20, 20) snippetRef = siteObs['refId'] # Two cases for gff entries: # 1. 'Identified modification' - will have a 'modification' key # - use the modification name as the gff event type # - use 'modificationScore' for the gff score # 2. Detected - no 'modification' key # - use 'modified_base' as the event type # - use the single site 'score' property as the gff score # - do not put this kind into the gff if it contains the a 'offTargetPeak' tag if siteObs['coverage'] > self.options.minCoverage: # Case 1 if 'modification' in siteObs and siteObs[ 'modification'] != '.': gff.writeRecord(self.makeGffRecord(siteObs)) # Case 2 elif siteObs[ 'score'] > minScore and 'offTargetPeak' not in siteObs: gff.writeRecord(self.makeGffRecord(siteObs)) # FIXME: Try not filtering: # gff.writeRecord(self.makeGffRecord(siteObs)) except GeneratorExit: f.close() return
def gffConsumer(self, filename): """ Consume IPD summary rows, filter them and write to GFF """ #f = file(filename, 'w', 2<<15) f = self.openWriteHandle(filename) gff = GffWriter(f) # write headers describing the program that generated the data gff.writeHeader('##source ipdSummary.py v2.0') gff.writeHeader('##source-commandline %s' % self.options.cmdLine) # Write the reference renaming info into the gff headers ala evicons for entry in self.refInfo: gff.writeHeader("##sequence-region %s 1 %d" \ % (entry.Name, entry.Length)) minScore = -10*math.log10(self.options.pvalue) snippetRef = -1 try: while True: # Pull a record in from the siteObsList = (yield) for siteObs in siteObsList: # self.snippetFunc is a function that return a reference snippet given a template position and a strand if snippetRef != siteObs['refId']: self.snippetFunc = self.ipdModel.snippetFunc(siteObs['refId'],20,20) snippetRef = siteObs['refId'] # Two cases for gff entries: # 1. 'Identified modification' - will have a 'modification' key # - use the modification name as the gff event type # - use 'modificationScore' for the gff score # 2. Detected - no 'modification' key # - use 'modified_base' as the event type # - use the single site 'score' property as the gff score # - do not put this kind into the gff if it contains the a 'offTargetPeak' tag if siteObs['coverage'] > self.options.minCoverage: # Case 1 if siteObs.has_key('modification') and siteObs['modification'] != '.': gff.writeRecord(self.makeGffRecord(siteObs)) # Case 2 elif siteObs['score'] > minScore and not siteObs.has_key('offTargetPeak'): gff.writeRecord(self.makeGffRecord(siteObs)) # FIXME: Try not filtering: # gff.writeRecord(self.makeGffRecord(siteObs)) except GeneratorExit: f.close() return
class VariantsGffWriter(object): ONTOLOGY_URL = \ "http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12" def __init__(self, f, optionsDict, referenceEntries): self._gffWriter = GffWriter(f) self._gffWriter.writeHeader("##pacbio-variant-version 2.1") self._gffWriter.writeHeader("##date %s" % time.ctime()) self._gffWriter.writeHeader("##feature-ontology %s" % self.ONTOLOGY_URL) self._gffWriter.writeHeader("##source GenomicConsensus %s" % __VERSION__) self._gffWriter.writeHeader("##source-commandline %s" % optionsDict["shellCommand"]) self._gffWriter.writeHeader("##source-alignment-file %s" % optionsDict["inputFilename"]) self._gffWriter.writeHeader("##source-reference-file %s" % optionsDict["referenceFilename"]) # Reference groups. for entry in referenceEntries: self._gffWriter.writeHeader("##sequence-region %s 1 %d" \ % (entry.name, entry.length)) def writeVariants(self, variants): for var in variants: self._gffWriter.writeRecord(toGffRecord(var)) def close(self): self._gffWriter.close()
def gffConsumer(self, filename): """ Consume IPD summary rows, filter them and write to GFF """ #f = file(filename, 'w', 2<<15) f = self.openWriteHandle(filename) gff = GffWriter(f) # write headers describing the program that generated the data gff.writeHeader('##source ipdSummary v2.0') gff.writeHeader('##source-commandline %s' % self.options.cmdLine) # Write the reference renaming info into the gff headers ala evicons for entry in self.refInfo: gff.writeHeader("##sequence-region %s 1 %d" % (entry.Name, entry.Length)) minScore = -10 * math.log10(self.options.pvalue) snippetRef = -1 try: while True: # Pull a record in from the siteObsList = (yield) for siteObs in siteObsList: true_modif = False # self.snippetFunc is a function that return a reference snippet given a template position and a strand if snippetRef != siteObs['refId']: self.snippetFunc = self.ipdModel.snippetFunc( siteObs['refId'], 20, 20) snippetRef = siteObs['refId'] # Two cases for gff entries: # 1. 'Identified modification' - will have a 'modification' key # - use the modification name as the gff event type # - use 'modificationScore' for the gff score # 2. Detected - no 'modification' key # - use 'modified_base' as the event type # - use the single site 'score' property as the gff score # - do not put this kind into the gff if it contains the a 'offTargetPeak' tag gffline = self.makeGffRecord(siteObs) attributes = str(gffline).split()[8].split(';') real_modif = str(gffline).split()[2] score_modif = str(gffline).split()[5] siteObs['type_modif'] = real_modif siteObs['score_modif'] = score_modif for attribute in attributes: key = attribute.split('=')[0] value = attribute.split('=')[1] siteObs[str(key)] = value if siteObs['coverage'] > self.options.minCoverage: # Case 1 if siteObs.has_key('modification') and siteObs[ 'modification'] != '.': gff.writeRecord(gffline) true_modif = True # Case 2 elif siteObs[ 'score'] > minScore and not siteObs.has_key( 'offTargetPeak'): gff.writeRecord(gffline) true_modif = True else: true_modif = False if true_modif: siteObs['modified'] = True else: siteObs['modified'] = False siteObs['type_modif'] = 'None' # Will be printed in sys.stdout for every position (then the pt_methyl or whatsoever will chose it it wants to keep anything) current_list = [] # exec("siteObs = "+str(siteObs)) for key in siteObs: current_list.append('{}:'.format(str(key))) if key != 'capped_values' and key != 'rawData': for elt in str(siteObs[str(key)]): if elt != '\n': current_list.append("{}".format(str(elt))) else: current_list.append('None') current_list.append(';') # remove the last ';' current_list.pop(-1) # Adding the \n current_list.append('\n') output = ''.join(current_list) sys.stdout.write(output) except GeneratorExit: f.close() return
class VariantsGffWriter(object): ONTOLOGY_URL = \ "http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12" def __init__(self, f, optionsDict, referenceEntries): self._gffWriter = GffWriter(f) self._minConfidence = optionsDict["minConfidence"] self._minCoverage = optionsDict["minCoverage"] self._gffWriter.writeHeader("##pacbio-variant-version 2.1") self._gffWriter.writeHeader("##date %s" % time.ctime()) self._gffWriter.writeHeader("##feature-ontology %s" % self.ONTOLOGY_URL) self._gffWriter.writeHeader("##source GenomicConsensus %s" % __VERSION__) self._gffWriter.writeHeader("##source-commandline %s" % optionsDict["shellCommand"]) self._gffWriter.writeHeader("##source-alignment-file %s" % optionsDict["inputFilename"]) self._gffWriter.writeHeader("##source-reference-file %s" % optionsDict["referenceFilename"]) # Reference groups. for entry in referenceEntries: self._gffWriter.writeHeader("##sequence-region %s 1 %d" \ % (entry.name, entry.length)) def writeVariants(self, variants): for var in variants: if var.coverage >= self._minCoverage and var.confidence >= self._minConfidence: self._gffWriter.writeRecord(toGffRecord(var)) def close(self): self._gffWriter.close()