class TestGffWriter(object):
    def setup(self):
        self.outfile = StringIO()
        self.record1 = Gff3Record("chr1",
                                  10,
                                  11,
                                  "insertion",
                                  attributes=[("cat", "1"), ("dog", "2")])
        self.record2 = Gff3Record("chr1",
                                  200,
                                  201,
                                  "substitution",
                                  attributes=[("mouse", "1"), ("moose", "2")])
        self.gffWriter = GffWriter(self.outfile)

    def test_writeHeader(self):
        self.gffWriter.writeHeader("##foo bar")
        assert_equal("##gff-version 3\n##foo bar\n", self.outfile.getvalue())

    def test_writeRecord(self):
        self.gffWriter.writeRecord(self.record1)
        self.gffWriter.writeRecord(self.record2)
        expected = (
            "##gff-version 3\n" +
            "chr1\t.\tinsertion\t10\t11\t.\t.\t.\tcat=1;dog=2\n" +
            "chr1\t.\tsubstitution\t200\t201\t.\t.\t.\tmouse=1;moose=2\n")
        assert_equal(expected, self.outfile.getvalue())
Exemple #2
0
 def setup(self):
     self.outfile = StringIO()
     self.record1 = Gff3Record("chr1", 10, 11, "insertion",
                               attributes=[("cat", "1"), ("dog", "2")])
     self.record2 = Gff3Record("chr1", 200, 201, "substitution",
                               attributes=[("mouse", "1"), ("moose", "2")])
     self.gffWriter = GffWriter(self.outfile)
class TestGffWriter:

    RECORD1 = Gff3Record("chr1",
                         10,
                         11,
                         "insertion",
                         attributes=[("cat", "1"), ("dog", "2")])
    RECORD2 = Gff3Record("chr1",
                         200,
                         201,
                         "substitution",
                         attributes=[("mouse", "1"), ("moose", "2")])

    def setup_method(self):
        self.outfile = StringIO()
        self.gffWriter = GffWriter(self.outfile)

    def test_writeHeader(self):
        self.gffWriter.writeHeader("##foo bar")
        assert "##gff-version 3\n##foo bar\n" == self.outfile.getvalue()

    def test_writeRecord(self):
        self.gffWriter.writeRecord(self.RECORD1)
        self.gffWriter.writeRecord(self.RECORD2)
        expected = (
            "##gff-version 3\n" +
            "chr1\t.\tinsertion\t10\t11\t.\t.\t.\tcat=1;dog=2\n" +
            "chr1\t.\tsubstitution\t200\t201\t.\t.\t.\tmouse=1;moose=2\n")
        assert expected == self.outfile.getvalue()
    def m5CgffConsumer( self, filename ):
    
        f = self.openWriteHandle( filename )        
        gff = GffWriter( f )

              
        # write headers describing the program that generated the data        
        gff.writeHeader('##source ipdSummary.py v2.0')        
        gff.writeHeader('##source-commandline %s' % self.options.cmdLine)
        
        # Write the reference renaming info into the gff headers ala evicons        
        # for entry in self.refInfo:        
        #     gff.writeHeader("##sequence-region %s 1 %d"
        #                     % (entry.Name, entry.Length))
                
        try:
            while True:
                # Pull in a single record?
                siteObsList = (yield)

                for siteObs in siteObsList:
                    if siteObs.has_key('Ca5C') and siteObs['strand'] == 0:
                        gff.writeRecord( self.makeM5CgffRecord( siteObs ) )

        except GeneratorExit:
            f.close()
            return
 def setup(self):
     self.outfile = StringIO()
     self.record1 = Gff3Record("chr1", 10, 11, "insertion",
                               attributes=[("cat", "1"), ("dog", "2")])
     self.record2 = Gff3Record("chr1", 200, 201, "substitution",
                               attributes=[("mouse", "1"), ("moose", "2")])
     self.gffWriter = GffWriter(self.outfile)
    def __init__(self, f, optionsDict, referenceEntries):
        self._gffWriter = GffWriter(f)
        self._minConfidence = optionsDict["minConfidence"]
        self._minCoverage = optionsDict["minCoverage"]

        self._gffWriter.writeHeader("##pacbio-variant-version 2.1")
        self._gffWriter.writeHeader("##date %s" % time.ctime())
        self._gffWriter.writeHeader("##feature-ontology %s" % self.ONTOLOGY_URL)
        self._gffWriter.writeHeader("##source GenomicConsensus %s" % __VERSION__)
        self._gffWriter.writeHeader("##source-commandline %s" % optionsDict["shellCommand"])
        self._gffWriter.writeHeader("##source-alignment-file %s" % optionsDict["inputFilename"])
        self._gffWriter.writeHeader("##source-reference-file %s" % optionsDict["referenceFilename"])
        # Reference groups.
        for entry in referenceEntries:
            self._gffWriter.writeHeader("##sequence-region %s 1 %d" \
                                            % (entry.name, entry.length))
    def m5CgffConsumer( self, filename ):
    
        f = self.openWriteHandle( filename )        
        gff = GffWriter( f )

              
        # write headers describing the program that generated the data        
        gff.writeHeader('##source ipdSummary v2.0')        
        gff.writeHeader('##source-commandline %s' % self.options.cmdLine)
        
        # Write the reference renaming info into the gff headers ala evicons        
        # for entry in self.refInfo:        
        #     gff.writeHeader("##sequence-region %s 1 %d"
        #                     % (entry.Name, entry.Length))
                
        try:
            while True:
                # Pull in a single record?
                siteObsList = (yield)

                for siteObs in siteObsList:
                    if 'Ca5C' in siteObs and siteObs['strand'] == 0:
                        gff.writeRecord( self.makeM5CgffRecord( siteObs ) )

        except GeneratorExit:
            f.close()
            return
class TestGffWriter:
    def setup(self):
        self.outfile = StringIO()
        self.record1 = Gff3Record("chr1", 10, 11, "insertion",
                                  attributes=[("cat", "1"), ("dog", "2")])
        self.record2 = Gff3Record("chr1", 200, 201, "substitution",
                                  attributes=[("mouse", "1"), ("moose", "2")])
        self.gffWriter = GffWriter(self.outfile)

    def test_writeHeader(self):
        self.gffWriter.writeHeader("##foo bar")
        assert_equal("##gff-version 3\n##foo bar\n",
                     self.outfile.getvalue())

    def test_writeRecord(self):
        self.gffWriter.writeRecord(self.record1)
        self.gffWriter.writeRecord(self.record2)
        expected = ("##gff-version 3\n" +
                    "chr1\t.\tinsertion\t10\t11\t.\t.\t.\tcat=1;dog=2\n" +
                    "chr1\t.\tsubstitution\t200\t201\t.\t.\t.\tmouse=1;moose=2\n")
        assert_equal(expected, self.outfile.getvalue())
 def setUpClass(cls):
     with FastaWriter(cls.REFERENCE) as fasta_out:
         with FastaReader(TestCoverageRpt.REFERENCE) as fasta_in:
             for rec in fasta_in:
                 header = rec.id + "|quiver"
                 fasta_out.writeRecord(header, rec.sequence)
     with GffWriter(cls.GFF) as gff_out:
         with GffReader(TestCoverageRpt.GFF) as gff_in:
             for header in gff_in.headers:
                 gff_out.writeHeader(header)
             for rec in gff_in:
                 rec.seqid += "|quiver"
                 gff_out.writeRecord(rec)
 def __init__(self, f, optionsDict, referenceEntries):
     self._gffWriter = GffWriter(f)
     self._gffWriter.writeHeader("##pacbio-variant-version 2.1")
     self._gffWriter.writeHeader("##date %s" % time.ctime())
     self._gffWriter.writeHeader("##feature-ontology %s" % self.ONTOLOGY_URL)
     self._gffWriter.writeHeader("##source GenomicConsensus %s" % __VERSION__)
     self._gffWriter.writeHeader("##source-commandline %s" % optionsDict["shellCommand"])
     self._gffWriter.writeHeader("##source-alignment-file %s" % optionsDict["inputFilename"])
     self._gffWriter.writeHeader("##source-reference-file %s" % optionsDict["referenceFilename"])
     # Reference groups.
     for entry in referenceEntries:
         self._gffWriter.writeHeader("##sequence-region %s 1 %d" \
                                         % (entry.name, entry.length))
def run(argv):
    nuc = []
    basemods = []
    while len(nuc) < 5000:
        x = random.random()
        if x > 0.99 and len(nuc) < 4880:
            n = len(nuc) + 3
            basemods.append(
                Gff3Record("genome", n, n, "modified_base", 100, "+", ".",
                           "kinModCall", [("coverage", "100"),
                                          ("IPDRatio", "4.0"),
                                          ("identificationQv", "50")]))
            nuc.extend(["G", "T", "A", "C"])
        elif x < 0.01 and len(nuc) < 4880:
            n = len(nuc) + 3
            basemods.append(
                Gff3Record("genome", n, n, "modified_base", 100, "+", ".",
                           "kinModCall", [("coverage", "100"),
                                          ("IPDRatio", "4.0"),
                                          ("identificationQv", "50")]))
            nuc.extend(["C", "T", "A", "G"])
        else:
            b = "ACGT"[random.randint(0, 3)]
            if ((b == "C" and nuc[-3:] == ["G", "T", "A"])
                    or (b == "G" and nuc[-3:] == ["C", "T", "A"])):
                nuc.append("T")
            else:
                nuc.append(b)
    seq = "".join(nuc)
    with FastaWriter("genome.fasta") as fa_out:
        fa_out.writeRecord("genome", "".join(nuc))
    with GffWriter("basemods.gff") as gff_out:
        for rec in basemods:
            ctx_start = max(0, rec.start - 21)
            ctx_end = min(len(seq), rec.end + 20)
            context = seq[ctx_start:ctx_end]
            rec.attributes["context"] = context
            gff_out.writeRecord(rec)
    return 0
    def gffConsumer(self, filename):
        """
        Consume IPD summary rows, filter them and write to GFF
        """

        #f = file(filename, 'w', 2<<15)
        f = self.openWriteHandle(filename)
        gff = GffWriter(f)

        # write headers describing the program that generated the data
        gff.writeHeader('##source ipdSummary.py v2.0')
        gff.writeHeader('##source-commandline %s' % self.options.cmdLine)

        # Write the reference renaming info into the gff headers ala evicons
        for entry in self.refInfo:
            gff.writeHeader("##sequence-region %s 1 %d"
                            % (entry.Name, entry.Length))

        minScore = -10 * math.log10(self.options.pvalue)
        snippetRef = -1
        try:
            while True:
                # Pull a record in from the
                siteObsList = (yield)

                for siteObs in siteObsList:
                    # self.snippetFunc is a function that return a reference snippet given a template position and a strand
                    if snippetRef != siteObs['refId']:
                        self.snippetFunc = self.ipdModel.snippetFunc(siteObs['refId'], 20, 20)
                        snippetRef = siteObs['refId']

                    # Two cases for gff entries:
                    # 1. 'Identified modification' - will have a 'modification' key
                    #     - use the modification name as the gff event type
                    #     - use 'modificationScore' for the gff score
                    # 2. Detected - no 'modification' key
                    #     - use 'modified_base' as the event type
                    #     - use the single site 'score' property as the gff score
                    #     - do not put this kind into the gff if it contains the a 'offTargetPeak' tag

                    if siteObs['coverage'] > self.options.minCoverage:
                        # Case 1
                        if siteObs.has_key('modification') and siteObs['modification'] != '.':
                            gff.writeRecord(self.makeGffRecord(siteObs))

                        # Case 2
                        elif siteObs['score'] > minScore and not siteObs.has_key('offTargetPeak'):
                            gff.writeRecord(self.makeGffRecord(siteObs))

                    # FIXME: Try not filtering:
                    # gff.writeRecord(self.makeGffRecord(siteObs))

        except GeneratorExit:
            f.close()
            return
 def setup_method(self):
     self.outfile = StringIO()
     self.gffWriter = GffWriter(self.outfile)
Exemple #14
0
    def gffConsumer(self, filename):
        """
        Consume IPD summary rows, filter them and write to GFF
        """

        #f = file(filename, 'w', 2<<15)
        f = self.openWriteHandle(filename)
        gff = GffWriter(f)

        # write headers describing the program that generated the data
        gff.writeHeader('##source ipdSummary v2.0')
        gff.writeHeader('##source-commandline %s' % self.options.cmdLine)

        # Write the reference renaming info into the gff headers ala evicons
        for entry in self.refInfo:
            gff.writeHeader("##sequence-region %s 1 %d" %
                            (entry.Name, entry.Length))

        minScore = -10 * math.log10(self.options.pvalue)
        snippetRef = -1
        try:
            while True:
                # Pull a record in from the
                siteObsList = (yield)

                for siteObs in siteObsList:
                    true_modif = False
                    # self.snippetFunc is a function that return a reference snippet given a template position and a strand
                    if snippetRef != siteObs['refId']:
                        self.snippetFunc = self.ipdModel.snippetFunc(
                            siteObs['refId'], 20, 20)
                        snippetRef = siteObs['refId']

                    # Two cases for gff entries:
                    # 1. 'Identified modification' - will have a 'modification' key
                    #     - use the modification name as the gff event type
                    #     - use 'modificationScore' for the gff score
                    # 2. Detected - no 'modification' key
                    #     - use 'modified_base' as the event type
                    #     - use the single site 'score' property as the gff score
                    #     - do not put this kind into the gff if it contains the a 'offTargetPeak' tag

                    gffline = self.makeGffRecord(siteObs)
                    attributes = str(gffline).split()[8].split(';')

                    real_modif = str(gffline).split()[2]
                    score_modif = str(gffline).split()[5]
                    siteObs['type_modif'] = real_modif
                    siteObs['score_modif'] = score_modif

                    for attribute in attributes:
                        key = attribute.split('=')[0]
                        value = attribute.split('=')[1]
                        siteObs[str(key)] = value

                    if siteObs['coverage'] > self.options.minCoverage:

                        # Case 1
                        if siteObs.has_key('modification') and siteObs[
                                'modification'] != '.':
                            gff.writeRecord(gffline)
                            true_modif = True
                        # Case 2
                        elif siteObs[
                                'score'] > minScore and not siteObs.has_key(
                                    'offTargetPeak'):
                            gff.writeRecord(gffline)
                            true_modif = True
                        else:
                            true_modif = False
                    if true_modif:
                        siteObs['modified'] = True
                    else:
                        siteObs['modified'] = False
                        siteObs['type_modif'] = 'None'

                    # Will be printed in sys.stdout for every position (then the pt_methyl or whatsoever will chose it it wants to keep anything)

                    current_list = []

                    # exec("siteObs = "+str(siteObs))

                    for key in siteObs:
                        current_list.append('{}:'.format(str(key)))
                        if key != 'capped_values' and key != 'rawData':
                            for elt in str(siteObs[str(key)]):
                                if elt != '\n':
                                    current_list.append("{}".format(str(elt)))
                        else:
                            current_list.append('None')
                        current_list.append(';')
                    # remove the last ';'
                    current_list.pop(-1)
                    # Adding the \n
                    current_list.append('\n')
                    output = ''.join(current_list)
                    sys.stdout.write(output)
        except GeneratorExit:
            f.close()
            return
class VariantsGffWriter(object):

    ONTOLOGY_URL = \
        "http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12"

    def __init__(self, f, optionsDict, referenceEntries):
        self._gffWriter = GffWriter(f)
        self._minConfidence = optionsDict["minConfidence"]
        self._minCoverage = optionsDict["minCoverage"]

        self._gffWriter.writeHeader("##pacbio-variant-version 2.1")
        self._gffWriter.writeHeader("##date %s" % time.ctime())
        self._gffWriter.writeHeader("##feature-ontology %s" % self.ONTOLOGY_URL)
        self._gffWriter.writeHeader("##source GenomicConsensus %s" % __VERSION__)
        self._gffWriter.writeHeader("##source-commandline %s" % optionsDict["shellCommand"])
        self._gffWriter.writeHeader("##source-alignment-file %s" % optionsDict["inputFilename"])
        self._gffWriter.writeHeader("##source-reference-file %s" % optionsDict["referenceFilename"])
        # Reference groups.
        for entry in referenceEntries:
            self._gffWriter.writeHeader("##sequence-region %s 1 %d" \
                                            % (entry.name, entry.length))

    def writeVariants(self, variants):
        for var in variants:
            if var.coverage >= self._minCoverage and var.confidence >= self._minConfidence:
                self._gffWriter.writeRecord(toGffRecord(var))

    def close(self):
        self._gffWriter.close()
    def gffConsumer(self, filename):
        """
        Consume IPD summary rows, filter them and write to GFF
        """

        #f = file(filename, 'w', 2<<15)
        f = self.openWriteHandle(filename)
        gff = GffWriter(f)

        # write headers describing the program that generated the data
        gff.writeHeader('##source ipdSummary v2.0')
        gff.writeHeader('##source-commandline %s' % self.options.cmdLine)

        # Write the reference renaming info into the gff headers ala evicons
        for entry in self.refInfo:
            gff.writeHeader("##sequence-region %s 1 %d"
                            % (entry.Name, entry.Length))

        minScore = -10 * math.log10(self.options.pvalue)
        snippetRef = -1
        try:
            while True:
                # Pull a record in from the
                siteObsList = (yield)

                for siteObs in siteObsList:
                    # self.snippetFunc is a function that return a reference snippet given a template position and a strand
                    if snippetRef != siteObs['refId']:
                        self.snippetFunc = self.ipdModel.snippetFunc(siteObs['refId'], 20, 20)
                        snippetRef = siteObs['refId']

                    # Two cases for gff entries:
                    # 1. 'Identified modification' - will have a 'modification' key
                    #     - use the modification name as the gff event type
                    #     - use 'modificationScore' for the gff score
                    # 2. Detected - no 'modification' key
                    #     - use 'modified_base' as the event type
                    #     - use the single site 'score' property as the gff score
                    #     - do not put this kind into the gff if it contains the a 'offTargetPeak' tag

                    if siteObs['coverage'] > self.options.minCoverage:
                        # Case 1
                        if 'modification' in siteObs and siteObs['modification'] != '.':
                            gff.writeRecord(self.makeGffRecord(siteObs))

                        # Case 2
                        elif siteObs['score'] > minScore and 'offTargetPeak' not in siteObs:
                            gff.writeRecord(self.makeGffRecord(siteObs))

                    # FIXME: Try not filtering:
                    # gff.writeRecord(self.makeGffRecord(siteObs))

        except GeneratorExit:
            f.close()
            return
class VariantsGffWriter(object):

    ONTOLOGY_URL = \
        "http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12"

    def __init__(self, f, optionsDict, referenceEntries):
        self._gffWriter = GffWriter(f)
        self._gffWriter.writeHeader("##pacbio-variant-version 2.1")
        self._gffWriter.writeHeader("##date %s" % time.ctime())
        self._gffWriter.writeHeader("##feature-ontology %s" %
                                    self.ONTOLOGY_URL)
        self._gffWriter.writeHeader("##source GenomicConsensus %s" %
                                    __VERSION__)
        self._gffWriter.writeHeader("##source-commandline %s" %
                                    optionsDict["shellCommand"])
        self._gffWriter.writeHeader("##source-alignment-file %s" %
                                    optionsDict["inputFilename"])
        self._gffWriter.writeHeader("##source-reference-file %s" %
                                    optionsDict["referenceFilename"])
        # Reference groups.
        for entry in referenceEntries:
            self._gffWriter.writeHeader("##sequence-region %s 1 %d" \
                                            % (entry.name, entry.length))

    def writeVariants(self, variants):
        for var in variants:
            self._gffWriter.writeRecord(toGffRecord(var))

    def close(self):
        self._gffWriter.close()