def fromgff3(filename): """ Extract feature rows from a GFF3 file. .. versionadded:: 0.2 """ # parse file as tab-delimited t0 = fromtsv(filename) # push header t1 = pushheader(t0, ('seqid', 'source', 'type', 'start', 'end', 'score', 'strand', 'phase', 'attributes')) # skip comments t2 = skipcomments(t1, '#') # ignore any row not 9 values long (e.g., trailing fasta) t3 = rowlenselect(t2, 9) # parse attributes into a dict t4 = convert(t3, 'attributes', gff3_parse_attributes) # parse coordinates t5 = convert(t4, ('start', 'end'), int) return HybridRowView(t5)
def convertgff3(tbl): # push header t1 = pushheader(tbl, ('seqid', 'source', 'type', 'start', 'end', 'score', 'strand', 'phase', 'attributes')) # skip comments t2 = skipcomments(t1, '#') # ignore any row not 9 values long (e.g., trailing fasta) t3 = rowlenselect(t2, 9) # parse attributes into a dict t4 = convert(t3, 'attributes', gff3_parse_attributes) # parse coordinates t5 = convert(t4, ('start', 'end'), int) return HybridRowView(t5)
def lenstats(table, field): """ Convenience function to report statistics on value lengths under the given field. E.g.:: >>> from petl import lenstats >>> table1 = [['foo', 'bar'], ... [1, 'a'], ... [2, 'aaa'], ... [3, 'aa'], ... [4, 'aaa'], ... [5, 'aaaaaaaaaaa']] >>> lenstats(table1, 'bar') {'count': 5, 'errors': 0, 'min': 1.0, 'max': 11.0, 'sum': 20.0, 'mean': 4.0} """ return stats(convert(table, field, lambda v: len(v)), field)