Ejemplo n.º 1
0
    def testSecondExonScheme(self):
        """
    check that output exons have the right size and relative indices
    from chromosome indices.
    """

        # run the code...
        infh = DummyInputStream(self.readLines)
        outfh = DummyOutputStream()
        processBED(infh, outfh, SECOND_EXON)

        # see what we get..
        outlines = [l.strip() for l in outfh.itemsWritten() if l.strip() != ""]

        for i in range(0, len(outlines)):
            out = outlines[i]
            e2 = parseBEDString(out)

            gotAnswer = len(e2)
            r_len = self.readEnds[e2.name[:-2]] - self.readStarts[e2.name[:-2]]
            glob_s = (self.firstChromStarts[e2.name[:-2]] +
                      self.readStarts[e2.name[:-2]])
            expectedAns = r_len - (self.firstChromEnds[e2.name[:-2]] -
                                   (glob_s)) - 1
            self.assertTrue(gotAnswer == expectedAns)
            self.assertTrue(e2.start == self.secondChromStarts[e2.name[:-2]])
Ejemplo n.º 2
0
    def testFirstExonScheme(self):
        """
    check that output exons have the right size and relative indices
    from chromosome indices
    """

        # run the code...
        infh = DummyInputStream(self.readLines)
        outfh = DummyOutputStream()
        processBED(infh, outfh, FIRST_EXON)

        # see what we get..
        outlines = [l.strip() for l in outfh.itemsWritten() if l.strip() != ""]

        for i in range(0, len(outlines)):
            out = outlines[i]
            e1 = parseBEDString(out)

            gotAnswer = len(e1)

            read_start_global = (self.firstChromStarts[e1.name[:-2]] +
                                 self.readStarts[e1.name[:-2]])
            expectedAns = self.firstChromEnds[
                e1.name[:-2]] - read_start_global + 1
            self.assertTrue(gotAnswer == expectedAns)
            self.assertTrue(e1.end == self.firstChromEnds[e1.name[:-2]])
Ejemplo n.º 3
0
    def testBEDIteratorDropAfter(self):
        """
      make sure we can drop parts after a certain field in a BED file and
      not screw everything else up..
    """
        debug = False
        infs = "chr12" + "\t" + "83810028" + "\t" + "83810066" + "\t" +\
               "SRR189775.10000" + "\t" + "9" + "\t" + "-" + "\t" +\
               "TTTTTTTTTTTTTTTAAATTCTTCGAATGCCGTTTTCT" + "\t" +\
               "]&(2-'+0'+:34J########################\n" +\
               "chr5" + "\t" + "177570573" + "\t" + "177570611" + "\t" +\
               "SRR189775.10000001" + "\t" + "3" + "\t" + "+" + "\t" +\
               "TCACCTTTTTTTCACCTTTTAATTTTATATTATTTATC" + "\t" +\
               "K79:77:79797:7797<;>BC979:77B?997:79:7\n" +\
               "chr4" + "\t" + "78174772" + "\t" + "78174810" + "\t" +\
               "SRR189775.10000009" + "\t" + "0" + "\t" + "+" + "\t" +\
               "TTTTATTTTATTTTATTTTTTTACCCTTCCTCAAACAC" + "\t" +\
               "G77:797:77977<TS;:9:9:9:9:977<;7@?@=97\n"
        expectOut = [
            "chr12" + "\t" + "83810028" + "\t" + "83810066" + "\t"
            "SRR189775.10000" + "\t" + "9" + "\t" + "-",
            "chr5" + "\t" + "177570573" + "\t" + "177570611" + "\t" +
            "SRR189775.10000001" + "\t" + "3" + "\t" + "+",
            "chr4" + "\t" + "78174772" + "\t" + "78174810" + "\t" +
            "SRR189775.10000009" + "\t" + "0" + "\t" + "+"
        ]
        ifh = DummyInputStream(infs)
        ofh = DummyOutputStream()

        def run(istrm, ostrm):
            for e in BEDIterator(istrm, dropAfter=6):
                ostrm.write(str(e) + "\n")

        run(ifh, ofh)
        gotOutput = [x.strip() for x in ofh.itemsWritten()]

        if debug:
            sys.stderr.write("expected -------\n")
            for e in expectOut:
                sys.stderr.write(e + "\n")
            sys.stderr.write("got ------------\n")
            for e in gotOutput:
                sys.stderr.write(e + "\n")

        self.assertTrue(gotOutput == expectOut)