def _next(self, line): if line.startswith('#'): return ge = GenomeElement(self._genome) cols = line.split('\t') if self._numCols is not None: if len(cols) != self._numCols: raise InvalidFormatError('Error: BED files must have the same number of columns in each data line.') else: self._numCols = len(cols) if self._numCols < self.MIN_NUM_COLS or self._numCols > self.MAX_NUM_COLS: raise InvalidFormatError('Error: BED file must contain between %s and %s columns.' % (self.MIN_NUM_COLS, self.MAX_NUM_COLS)) ge.chr = self._checkValidChr(cols[0]) ge.start = self._checkValidStart(ge.chr, int(cols[1])) self._parseEnd( ge, self._checkValidEnd(ge.chr, int(cols[2]), start=ge.start)) self._parseName( ge, cols ) self._parseVal( ge, cols ) if self._numCols >= 6: ge.strand = self._getStrandFromString(cols[5]) for i,extraCol in enumerate(self.BED_EXTRA_COLUMNS): if self._numCols >= i+7: setattr(ge, extraCol, cols[i+6]) return ge
def _next(self, line): if line.startswith('#'): return ge = GenomeElement(self._genome) cols = line.split('\t') if self._numCols is not None: if len(cols) != self._numCols: raise InvalidFormatError( 'Error: BED files must have the same number of columns in each data line.' ) else: self._numCols = len(cols) if self._numCols < self.MIN_NUM_COLS or self._numCols > self.MAX_NUM_COLS: raise InvalidFormatError('Error: BED file contains %s columns, but must contain between %s and %s columns.' \ % (self._numCols, self.MIN_NUM_COLS, self.MAX_NUM_COLS)) ge.chr = self._checkValidChr(cols[0]) ge.start = self._checkValidStart(ge.chr, int(cols[1])) self._parseEnd( ge, self._checkValidEnd(ge.chr, int(cols[2]), start=ge.start)) self._parseName(ge, cols) self._parseVal(ge, cols) if self._numCols >= 6: ge.strand = self._getStrandFromString(cols[5]) for i, extraCol in enumerate(self.BED_EXTRA_COLUMNS): if self._numCols >= i + 7: setattr(ge, extraCol, cols[i + 6]) return ge
def _next(self, line): if len(line)>0 and line[0]=='#': return None origCols = line.split('\t') cols = [unquote(x) for x in origCols] if len(cols) != 9: raise InvalidFormatError("Error: GFF files must contain 9 tab-separated columns") ge = GenomeElement(self._genome) ge.chr = self._checkValidChr(cols[0]) ge.source = cols[1] ge.type = cols[2] ge.start = self._checkValidStart(ge.chr, int(cols[3]) - 1) ge.end = self._checkValidEnd(ge.chr, int(cols[4]), start=ge.start) ge.val = numpy.float(self._handleNan(cols[5])) ge.strand = self._getStrandFromString(cols[6]) ge.phase = cols[7] ge.attributes = cols[8] for attr in origCols[8].split(';'): attrSplitted = attr.split('=') if len(attrSplitted) == 2: key, val = attrSplitted if key.lower() == 'id': ge.id = unquote(val) elif key.lower() == 'name': ge.name = unquote(val) return ge
def _next(self, line): cols = line.split('\t') ge = GenomeElement(self._genome) ge.chr = self._checkValidChr(cols[0]) ge.start = int(cols[1]) ge.end = int(cols[2]) self._parseVal(ge, cols[3]) return ge
def _next(self, line): if line.startswith('##FASTA'): raise StopIteration if len(line) > 0 and line[0] == '#': return None origCols = line.split('\t') cols = [unquote(x) for x in origCols] if len(cols) != 9: raise InvalidFormatError( "Error: GFF files must contain 9 tab-separated columns") ge = GenomeElement(self._genome) ge.chr = self._checkValidChr(cols[0]) ge.source = cols[1] self._parseThirdCol(ge, cols[2]) ge.start = self._checkValidStart(ge.chr, int(cols[3]) - 1) ge.end = self._checkValidEnd(ge.chr, int(cols[4]), start=ge.start) self._parseSixthCol(ge, cols[5]) ge.strand = self._getStrandFromString(cols[6]) ge.phase = cols[7] ge.attributes = cols[8] for attr in origCols[8].split(';'): attrSplitted = attr.split('=') if len(attrSplitted) == 2: key, val = attrSplitted if key.lower() == 'id': ge.id = unquote(val) elif key.lower() == 'name': ge.name = unquote(val) return ge