예제 #1
0
class TestGffWriter:

    RECORD1 = Gff3Record("chr1",
                         10,
                         11,
                         "insertion",
                         attributes=[("cat", "1"), ("dog", "2")])
    RECORD2 = Gff3Record("chr1",
                         200,
                         201,
                         "substitution",
                         attributes=[("mouse", "1"), ("moose", "2")])

    def setup_method(self):
        self.outfile = StringIO()
        self.gffWriter = GffWriter(self.outfile)

    def test_writeHeader(self):
        self.gffWriter.writeHeader("##foo bar")
        assert "##gff-version 3\n##foo bar\n" == self.outfile.getvalue()

    def test_writeRecord(self):
        self.gffWriter.writeRecord(self.RECORD1)
        self.gffWriter.writeRecord(self.RECORD2)
        expected = (
            "##gff-version 3\n" +
            "chr1\t.\tinsertion\t10\t11\t.\t.\t.\tcat=1;dog=2\n" +
            "chr1\t.\tsubstitution\t200\t201\t.\t.\t.\tmouse=1;moose=2\n")
        assert expected == self.outfile.getvalue()
예제 #2
0
 def setup(self):
     self.outfile = StringIO()
     self.record1 = Gff3Record("chr1", 10, 11, "insertion",
                               attributes=[("cat", "1"), ("dog", "2")])
     self.record2 = Gff3Record("chr1", 200, 201, "substitution",
                               attributes=[("mouse", "1"), ("moose", "2")])
     self.gffWriter = GffWriter(self.outfile)
def run(argv):
    nuc = []
    basemods = []
    while len(nuc) < 5000:
        x = random.random()
        if x > 0.99 and len(nuc) < 4880:
            n = len(nuc) + 3
            basemods.append(
                Gff3Record("genome", n, n, "modified_base", 100, "+", ".",
                           "kinModCall", [("coverage", "100"),
                                          ("IPDRatio", "4.0"),
                                          ("identificationQv", "50")]))
            nuc.extend(["G", "T", "A", "C"])
        elif x < 0.01 and len(nuc) < 4880:
            n = len(nuc) + 3
            basemods.append(
                Gff3Record("genome", n, n, "modified_base", 100, "+", ".",
                           "kinModCall", [("coverage", "100"),
                                          ("IPDRatio", "4.0"),
                                          ("identificationQv", "50")]))
            nuc.extend(["C", "T", "A", "G"])
        else:
            b = "ACGT"[random.randint(0, 3)]
            if ((b == "C" and nuc[-3:] == ["G", "T", "A"])
                    or (b == "G" and nuc[-3:] == ["C", "T", "A"])):
                nuc.append("T")
            else:
                nuc.append(b)
    seq = "".join(nuc)
    with FastaWriter("genome.fasta") as fa_out:
        fa_out.writeRecord("genome", "".join(nuc))
    with GffWriter("basemods.gff") as gff_out:
        for rec in basemods:
            ctx_start = max(0, rec.start - 21)
            ctx_end = min(len(seq), rec.end + 20)
            context = seq[ctx_start:ctx_end]
            rec.attributes["context"] = context
            gff_out.writeRecord(rec)
    return 0
예제 #4
0
class TestGff3Record:

    RECORD = Gff3Record("chr1",
                        10,
                        11,
                        "insertion",
                        attributes=[("cat", "1"), ("dog", "2")])

    def test_str(self):
        assert "chr1\t.\tinsertion\t10\t11\t.\t.\t.\tcat=1;dog=2" == str(
            self.RECORD)

    def test_modification(self):
        record = self.RECORD.copy()
        record.dog = 3
        record.cat = 4
        record.mouse = 5
        record.start = 100
        record.end = 110
        assert "chr1\t.\tinsertion\t100\t110\t.\t.\t.\tcat=4;dog=3;mouse=5" == str(
            record)

    def test_fromString(self):
        newRecord = Gff3Record.fromString(str(self.RECORD))
        assert str(self.RECORD) == str(newRecord)

    def test_get(self):
        """
        Verify field access behavior
        """
        record = self.RECORD
        record.dog = 3
        record.cat = 4
        record.mouse = 5
        record.start = 100
        record.end = 110

        assert 3 == record.dog
        assert 100 == record.start
        with pytest.raises(AttributeError):
            record.god

        assert 3 == record.get("dog")
        assert record.get("god") is None
        assert 100 == record.get("start", 100)
예제 #5
0
def toGffRecord(var):
    varType  = var.variantType
    gffType  = varType.lower()
    gffStart = (var.refStart + 1) if (var.refSeq != "") else var.refStart
    gffEnd   = var.refEnd         if (var.refSeq != "") else var.refStart
    gffFreq = gffVariantFrequency(var)

    record = Gff3Record(reference.idToFullName(var.refId), gffStart, gffEnd, gffType)
    record.reference  = var.refSeq or "."
    record.variantSeq = gffVariantSeq(var)
    if gffFreq:
        record.frequency  = gffFreq
    record.coverage   = var.coverage
    record.confidence = var.confidence
    if var.annotations:
        for (k, v) in var.annotations:
            record.put(k, v)
    return record
예제 #6
0
    def makeM5CgffRecord(self, siteObs):


        start = siteObs['tpl'] + 1
        end = siteObs['tpl'] + 1

        attributes = [('coverage', siteObs['coverage']),
                      ('IPDRatio', siteObs['ipdRatio'])]

        recordType = 'CG'
        refName = siteObs['refId']
        score = "%.3f" % siteObs['Ca5C']
        strand = '+' if siteObs['strand'] == 0 else '-'

        return Gff3Record(refName, start, end,
                          type=recordType,
                          score=score,
                          strand=strand,
                          source='kinModCall',attributes=attributes)
예제 #7
0
    def makeGffRecord(self, siteObs):
        """
        Convert the internal site observation object into a GFF entry
        """
        # Some useful attributes about the observation
        # - cognate base
        # - context snippet
        # - ipd ratio
        # - coverage
        snippet = self.snippetFunc(siteObs['tpl'], siteObs['strand'])
        attributes = [('coverage', siteObs['coverage']),
                      ('context', snippet),
                      ('IPDRatio', siteObs['ipdRatio'])]

        # Base of detected mod -- single position, closed,open
        # interval.
        # Note -- internally the tool uses 0-based reference
        # coordinates, however in gff the template indices are
        # 1-based.  Make that adjustment here.
        # On start vs. end: My reading of the gff spec
        # (http://www.sequenceontology.org/resources/gff3.html) says
        # to me that 1-base long feature (e.g. a modified base) should
        # have start + 1 == end, and 0-base long features
        # (e.g. insertions) should have start == end. This is not the
        # convention that Marco has apdopted in SMRTView, or the
        # convention that EviCons originally used.  We will adopt
        # their convention here, for now.
        start = siteObs['tpl'] + 1
        end = siteObs['tpl'] + 1

        if siteObs.has_key('motif'):
            attributes.append(('motif', "%s" % siteObs['motif']))

        if siteObs.has_key('id'):
            attributes.append(('id', "%s" % siteObs['id']))

        if self.options.methylFraction and siteObs.has_key(FRAC):
            attributes.append(('frac', "%.3f" % siteObs[FRAC]))
            attributes.append(('fracLow', "%.3f" % siteObs[FRAClow]))
            attributes.append(('fracUp', "%.3f" % siteObs[FRACup]))

        if siteObs.has_key('modificationScore'):
            # Report the QV from the modification identification module as a special tag
            attributes.append(('identificationQv', "%d" % int(round(siteObs['modificationScore']))))

        if siteObs.has_key('modification'):

            if siteObs['modification'] == '.':
                recordType = 'modified_base'

            elif siteObs['modification'] == 'nMd':
                recordType = '.'

            else:
                # if we have an identified mod, use it; otherwise use the old generic term
                recordType = siteObs['modification']

        else:
            recordType = 'modified_base'

        refName = siteObs['refName']
        score = int(round(siteObs['score']))
        strand = '+' if siteObs['strand'] == 0 else '-'

        return Gff3Record(refName, start, end,
                          type=recordType,
                          score=score,
                          strand=strand,
                          source='kinModCall',
                          attributes=attributes)
        return rec
 def setup(self):
     self.record = Gff3Record("chr1",
                              10,
                              11,
                              "insertion",
                              attributes=[("cat", "1"), ("dog", "2")])