def _next(self, line):
        cols = line.split("\t")

        ge = GenomeElement(self._genome)
        ge.chr = self._checkValidChr(cols[0])
        ge.start = int(cols[1])
        ge.end = int(cols[2])
        self._parseVal(ge, cols[3])

        return ge
    def _next(self, line):
        if line.startswith('>'):
            self._appendBoundingRegionTuple()
            self._elCount = 0
            self._chr = self._checkValidChr(line[1:].split()[0])
        else:
            if self._chr is None:
                raise InvalidFormatError('FASTA file does not start with the ">" character.')

            self._elCount += len(line)
            ge = GenomeElement(self._genome, self._chr)
            ge.val = np.fromstring(line, dtype='S1')
            return ge
 def testAssignAndRetrieve(self):
     e = GenomeElement('TestGenome', start=5, val=1.0, extra={'a':1,'b':2}, orderedExtraKeys=['a','b'])
     self.assertEqual(e.genome, 'TestGenome')
     self.assertEqual(e.chr, None)
     self.assertEqual(e.start, 5)
     self.assertEqual(e.end, None)
     self.assertEqual(e.val, 1.0)
     self.assertEqual(e.strand, None)
     self.assertEqual(e.a, 1)
     self.assertEqual(e.b, 2)
     self.assertEqual(e.extra, {'a':1,'b':2})
     self.assertEqual(e.orderedExtraKeys, ['a', 'b'])
     
     e = GenomeElement('TestGenome', a=1)
     e.b = 2
     self.assertEqual(e.genome, 'TestGenome')
     self.assertEqual(e.a, 1)
     self.assertEqual(e.b, 2)
     self.assertEqual(e.extra, {'a':1,'b':2})
     self.assertEqual(e.orderedExtraKeys, ['a', 'b'])
     
     self.assertRaises(AttributeError, lambda : e.nonExisting)
    def _next(self, line):
        if line.startswith('##FASTA'):
            raise StopIteration

        if len(line)>0 and line[0]=='#':
            return None

        origCols = line.split('\t')
        cols = [unquote(x) for x in origCols]

        if len(cols) != 9:
            raise InvalidFormatError("Error: GFF files must contain 9 tab-separated columns")

        ge = GenomeElement(self._genome)
        ge.chr = self._checkValidChr(cols[0])
        ge.source = cols[1]

        self._parseThirdCol(ge, cols[2])

        ge.start = self._checkValidStart(ge.chr, int(cols[3]) - 1)
        ge.end =  self._checkValidEnd(ge.chr, int(cols[4]), start=ge.start)

        self._parseSixthCol(ge, cols[5])

        ge.strand = self._getStrandFromString(cols[6])
        ge.phase = cols[7]
        ge.attributes = cols[8]

        for attr in origCols[8].split(';'):
            attrSplitted = attr.split('=')
            if len(attrSplitted) == 2:
                key, val = attrSplitted
                if key.lower() == 'id':
                    ge.id = unquote(val)
                elif key.lower() == 'name':
                    ge.name = unquote(val)

        return ge
 def _wrappedTrackElsGenerator(self):
     track = self._getTrack()
     for region, tv in ((region, self._getTrackView(track, region)) for region in self._boundingRegions):
         for te in tv:
             yield GenomeElement.createGeFromTrackEl(te, tv.trackFormat, globalCoords=self._globalCoords)
Beispiel #6
0
 def next(self):
     trackEl = self._tvIter.next()
     ge = GenomeElement.createGeFromTrackEl(trackEl, self._tv.trackFormat)
     return ge