Esempio n. 1
0
class TestGffWriter:

    RECORD1 = Gff3Record("chr1",
                         10,
                         11,
                         "insertion",
                         attributes=[("cat", "1"), ("dog", "2")])
    RECORD2 = Gff3Record("chr1",
                         200,
                         201,
                         "substitution",
                         attributes=[("mouse", "1"), ("moose", "2")])

    def setup_method(self):
        self.outfile = StringIO()
        self.gffWriter = GffWriter(self.outfile)

    def test_writeHeader(self):
        self.gffWriter.writeHeader("##foo bar")
        assert "##gff-version 3\n##foo bar\n" == self.outfile.getvalue()

    def test_writeRecord(self):
        self.gffWriter.writeRecord(self.RECORD1)
        self.gffWriter.writeRecord(self.RECORD2)
        expected = (
            "##gff-version 3\n" +
            "chr1\t.\tinsertion\t10\t11\t.\t.\t.\tcat=1;dog=2\n" +
            "chr1\t.\tsubstitution\t200\t201\t.\t.\t.\tmouse=1;moose=2\n")
        assert expected == self.outfile.getvalue()
    def m5CgffConsumer( self, filename ):
    
        f = self.openWriteHandle( filename )        
        gff = GffWriter( f )

              
        # write headers describing the program that generated the data        
        gff.writeHeader('##source ipdSummary v2.0')        
        gff.writeHeader('##source-commandline %s' % self.options.cmdLine)
        
        # Write the reference renaming info into the gff headers ala evicons        
        # for entry in self.refInfo:        
        #     gff.writeHeader("##sequence-region %s 1 %d"
        #                     % (entry.Name, entry.Length))
                
        try:
            while True:
                # Pull in a single record?
                siteObsList = (yield)

                for siteObs in siteObsList:
                    if 'Ca5C' in siteObs and siteObs['strand'] == 0:
                        gff.writeRecord( self.makeM5CgffRecord( siteObs ) )

        except GeneratorExit:
            f.close()
            return
Esempio n. 3
0
    def m5CgffConsumer( self, filename ):
    
        f = self.openWriteHandle( filename )        
        gff = GffWriter( f )

              
        # write headers describing the program that generated the data        
        gff.writeHeader('##source ipdSummary.py v2.0')        
        gff.writeHeader('##source-commandline %s' % self.options.cmdLine)
        
        # Write the reference renaming info into the gff headers ala evicons        
        # for entry in self.refInfo:        
        #     gff.writeHeader("##sequence-region %s 1 %d"
        #                     % (entry.Name, entry.Length))
                
        try:
            while True:
                # Pull in a single record?
                siteObsList = (yield)

                for siteObs in siteObsList:
                    if siteObs.has_key('Ca5C') and siteObs['strand'] == 0:
                        gff.writeRecord( self.makeM5CgffRecord( siteObs ) )

        except GeneratorExit:
            f.close()
            return
class TestGffWriter(object):
    def setup(self):
        self.outfile = StringIO()
        self.record1 = Gff3Record("chr1",
                                  10,
                                  11,
                                  "insertion",
                                  attributes=[("cat", "1"), ("dog", "2")])
        self.record2 = Gff3Record("chr1",
                                  200,
                                  201,
                                  "substitution",
                                  attributes=[("mouse", "1"), ("moose", "2")])
        self.gffWriter = GffWriter(self.outfile)

    def test_writeHeader(self):
        self.gffWriter.writeHeader("##foo bar")
        assert_equal("##gff-version 3\n##foo bar\n", self.outfile.getvalue())

    def test_writeRecord(self):
        self.gffWriter.writeRecord(self.record1)
        self.gffWriter.writeRecord(self.record2)
        expected = (
            "##gff-version 3\n" +
            "chr1\t.\tinsertion\t10\t11\t.\t.\t.\tcat=1;dog=2\n" +
            "chr1\t.\tsubstitution\t200\t201\t.\t.\t.\tmouse=1;moose=2\n")
        assert_equal(expected, self.outfile.getvalue())
Esempio n. 5
0
class TestGffWriter:
    def setup(self):
        self.outfile = StringIO()
        self.record1 = Gff3Record("chr1", 10, 11, "insertion",
                                  attributes=[("cat", "1"), ("dog", "2")])
        self.record2 = Gff3Record("chr1", 200, 201, "substitution",
                                  attributes=[("mouse", "1"), ("moose", "2")])
        self.gffWriter = GffWriter(self.outfile)

    def test_writeHeader(self):
        self.gffWriter.writeHeader("##foo bar")
        assert_equal("##gff-version 3\n##foo bar\n",
                     self.outfile.getvalue())

    def test_writeRecord(self):
        self.gffWriter.writeRecord(self.record1)
        self.gffWriter.writeRecord(self.record2)
        expected = ("##gff-version 3\n" +
                    "chr1\t.\tinsertion\t10\t11\t.\t.\t.\tcat=1;dog=2\n" +
                    "chr1\t.\tsubstitution\t200\t201\t.\t.\t.\tmouse=1;moose=2\n")
        assert_equal(expected, self.outfile.getvalue())
Esempio n. 6
0
    def gffConsumer(self, filename):
        """
        Consume IPD summary rows, filter them and write to GFF
        """

        #f = file(filename, 'w', 2<<15)
        f = self.openWriteHandle(filename)
        gff = GffWriter(f)

        # write headers describing the program that generated the data
        gff.writeHeader('##source ipdSummary v2.0')
        gff.writeHeader('##source-commandline %s' % self.options.cmdLine)

        # Write the reference renaming info into the gff headers ala evicons
        for entry in self.refInfo:
            gff.writeHeader("##sequence-region %s 1 %d" %
                            (entry.Name, entry.Length))

        minScore = -10 * math.log10(self.options.pvalue)
        snippetRef = -1
        try:
            while True:
                # Pull a record in from the
                siteObsList = (yield)

                for siteObs in siteObsList:
                    # self.snippetFunc is a function that return a reference
                    # snippet given a template position and a strand
                    if snippetRef != siteObs['refId']:
                        self.snippetFunc = self.ipdModel.snippetFunc(
                            siteObs['refId'], 20, 20)
                        snippetRef = siteObs['refId']

                    # Two cases for gff entries:
                    # 1. 'Identified modification' - will have a 'modification' key
                    #     - use the modification name as the gff event type
                    #     - use 'modificationScore' for the gff score
                    # 2. Detected - no 'modification' key
                    #     - use 'modified_base' as the event type
                    #     - use the single site 'score' property as the gff score
                    #     - do not put this kind into the gff if it contains the a 'offTargetPeak' tag

                    if siteObs['coverage'] > self.options.minCoverage:
                        # Case 1
                        if 'modification' in siteObs and siteObs[
                                'modification'] != '.':
                            gff.writeRecord(self.makeGffRecord(siteObs))

                        # Case 2
                        elif siteObs[
                                'score'] > minScore and 'offTargetPeak' not in siteObs:
                            gff.writeRecord(self.makeGffRecord(siteObs))

                    # FIXME: Try not filtering:
                    # gff.writeRecord(self.makeGffRecord(siteObs))

        except GeneratorExit:
            f.close()
            return
Esempio n. 7
0
    def gffConsumer(self, filename):
        """
        Consume IPD summary rows, filter them and write to GFF
        """

        #f = file(filename, 'w', 2<<15)
        f = self.openWriteHandle(filename)
        gff = GffWriter(f)

        # write headers describing the program that generated the data
        gff.writeHeader('##source ipdSummary.py v2.0')
        gff.writeHeader('##source-commandline %s' % self.options.cmdLine)


        # Write the reference renaming info into the gff headers ala evicons
        for entry in self.refInfo:
            gff.writeHeader("##sequence-region %s 1 %d" \
                                % (entry.Name, entry.Length))

        minScore = -10*math.log10(self.options.pvalue)
        snippetRef = -1
        try:
            while True:
                # Pull a record in from the
                siteObsList = (yield)

                for siteObs in siteObsList:
                    # self.snippetFunc is a function that return a reference snippet given a template position and a strand
                    if snippetRef != siteObs['refId']:
                        self.snippetFunc = self.ipdModel.snippetFunc(siteObs['refId'],20,20)
                        snippetRef = siteObs['refId']

                    # Two cases for gff entries:
                    # 1. 'Identified modification' - will have a 'modification' key
                    #     - use the modification name as the gff event type
                    #     - use 'modificationScore' for the gff score
                    # 2. Detected - no 'modification' key
                    #     - use 'modified_base' as the event type
                    #     - use the single site 'score' property as the gff score
                    #     - do not put this kind into the gff if it contains the a 'offTargetPeak' tag

                    if siteObs['coverage'] > self.options.minCoverage:
                        # Case 1
                        if siteObs.has_key('modification') and siteObs['modification'] != '.':
                            gff.writeRecord(self.makeGffRecord(siteObs))

                        # Case 2
                        elif siteObs['score'] > minScore and not siteObs.has_key('offTargetPeak'):
                            gff.writeRecord(self.makeGffRecord(siteObs))

                    # FIXME: Try not filtering:
                    # gff.writeRecord(self.makeGffRecord(siteObs))

        except GeneratorExit:
            f.close()
            return
Esempio n. 8
0
class VariantsGffWriter(object):

    ONTOLOGY_URL = \
        "http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12"

    def __init__(self, f, optionsDict, referenceEntries):
        self._gffWriter = GffWriter(f)
        self._gffWriter.writeHeader("##pacbio-variant-version 2.1")
        self._gffWriter.writeHeader("##date %s" % time.ctime())
        self._gffWriter.writeHeader("##feature-ontology %s" %
                                    self.ONTOLOGY_URL)
        self._gffWriter.writeHeader("##source GenomicConsensus %s" %
                                    __VERSION__)
        self._gffWriter.writeHeader("##source-commandline %s" %
                                    optionsDict["shellCommand"])
        self._gffWriter.writeHeader("##source-alignment-file %s" %
                                    optionsDict["inputFilename"])
        self._gffWriter.writeHeader("##source-reference-file %s" %
                                    optionsDict["referenceFilename"])
        # Reference groups.
        for entry in referenceEntries:
            self._gffWriter.writeHeader("##sequence-region %s 1 %d" \
                                            % (entry.name, entry.length))

    def writeVariants(self, variants):
        for var in variants:
            self._gffWriter.writeRecord(toGffRecord(var))

    def close(self):
        self._gffWriter.close()
Esempio n. 9
0
    def gffConsumer(self, filename):
        """
        Consume IPD summary rows, filter them and write to GFF
        """

        #f = file(filename, 'w', 2<<15)
        f = self.openWriteHandle(filename)
        gff = GffWriter(f)

        # write headers describing the program that generated the data
        gff.writeHeader('##source ipdSummary v2.0')
        gff.writeHeader('##source-commandline %s' % self.options.cmdLine)

        # Write the reference renaming info into the gff headers ala evicons
        for entry in self.refInfo:
            gff.writeHeader("##sequence-region %s 1 %d" %
                            (entry.Name, entry.Length))

        minScore = -10 * math.log10(self.options.pvalue)
        snippetRef = -1
        try:
            while True:
                # Pull a record in from the
                siteObsList = (yield)

                for siteObs in siteObsList:
                    true_modif = False
                    # self.snippetFunc is a function that return a reference snippet given a template position and a strand
                    if snippetRef != siteObs['refId']:
                        self.snippetFunc = self.ipdModel.snippetFunc(
                            siteObs['refId'], 20, 20)
                        snippetRef = siteObs['refId']

                    # Two cases for gff entries:
                    # 1. 'Identified modification' - will have a 'modification' key
                    #     - use the modification name as the gff event type
                    #     - use 'modificationScore' for the gff score
                    # 2. Detected - no 'modification' key
                    #     - use 'modified_base' as the event type
                    #     - use the single site 'score' property as the gff score
                    #     - do not put this kind into the gff if it contains the a 'offTargetPeak' tag

                    gffline = self.makeGffRecord(siteObs)
                    attributes = str(gffline).split()[8].split(';')

                    real_modif = str(gffline).split()[2]
                    score_modif = str(gffline).split()[5]
                    siteObs['type_modif'] = real_modif
                    siteObs['score_modif'] = score_modif

                    for attribute in attributes:
                        key = attribute.split('=')[0]
                        value = attribute.split('=')[1]
                        siteObs[str(key)] = value

                    if siteObs['coverage'] > self.options.minCoverage:

                        # Case 1
                        if siteObs.has_key('modification') and siteObs[
                                'modification'] != '.':
                            gff.writeRecord(gffline)
                            true_modif = True
                        # Case 2
                        elif siteObs[
                                'score'] > minScore and not siteObs.has_key(
                                    'offTargetPeak'):
                            gff.writeRecord(gffline)
                            true_modif = True
                        else:
                            true_modif = False
                    if true_modif:
                        siteObs['modified'] = True
                    else:
                        siteObs['modified'] = False
                        siteObs['type_modif'] = 'None'

                    # Will be printed in sys.stdout for every position (then the pt_methyl or whatsoever will chose it it wants to keep anything)

                    current_list = []

                    # exec("siteObs = "+str(siteObs))

                    for key in siteObs:
                        current_list.append('{}:'.format(str(key)))
                        if key != 'capped_values' and key != 'rawData':
                            for elt in str(siteObs[str(key)]):
                                if elt != '\n':
                                    current_list.append("{}".format(str(elt)))
                        else:
                            current_list.append('None')
                        current_list.append(';')
                    # remove the last ';'
                    current_list.pop(-1)
                    # Adding the \n
                    current_list.append('\n')
                    output = ''.join(current_list)
                    sys.stdout.write(output)
        except GeneratorExit:
            f.close()
            return
class VariantsGffWriter(object):

    ONTOLOGY_URL = \
        "http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12"

    def __init__(self, f, optionsDict, referenceEntries):
        self._gffWriter = GffWriter(f)
        self._minConfidence = optionsDict["minConfidence"]
        self._minCoverage = optionsDict["minCoverage"]

        self._gffWriter.writeHeader("##pacbio-variant-version 2.1")
        self._gffWriter.writeHeader("##date %s" % time.ctime())
        self._gffWriter.writeHeader("##feature-ontology %s" % self.ONTOLOGY_URL)
        self._gffWriter.writeHeader("##source GenomicConsensus %s" % __VERSION__)
        self._gffWriter.writeHeader("##source-commandline %s" % optionsDict["shellCommand"])
        self._gffWriter.writeHeader("##source-alignment-file %s" % optionsDict["inputFilename"])
        self._gffWriter.writeHeader("##source-reference-file %s" % optionsDict["referenceFilename"])
        # Reference groups.
        for entry in referenceEntries:
            self._gffWriter.writeHeader("##sequence-region %s 1 %d" \
                                            % (entry.name, entry.length))

    def writeVariants(self, variants):
        for var in variants:
            if var.coverage >= self._minCoverage and var.confidence >= self._minConfidence:
                self._gffWriter.writeRecord(toGffRecord(var))

    def close(self):
        self._gffWriter.close()