Esempio n. 1
0
import re
import pandas as pd
from bioframes import to_np_int, sanger_qual_str_to_error
from itertools import groupby
from func import pmap, psplit, pstrip, compose, compose_all, merge_dicts, fzip, partial2, dictmap, starcompose
from operator import itemgetter
from functools import partial
import operator as op
from schema import Schema, Use
from itertools import ifilter
# Parse options
#from pyparsing import Regex

parse_array = compose_all(to_np_int, psplit(','), pstrip('[]'))
tabsplit = psplit('\t')

basic_scheme={
    'QNAME' : str,
    'FLAG' : int,
    'RNAME' : str,
    'POS' : int,
    'MAPQ' : int,
    'CIGAR' : str,
    'RNEXT' : str,
    'PNEXT' : int,
    'TLEN' : int,
    #'MRNM' : str,
    #'MRNM' : '*='.__contains__,
    #'MPOS' : int,
    #'ISIZE' : int,
    'SEQ' : str,
Esempio n. 2
0
from pyparsing import Regex
#_join = partial(reduce, lambda a, b: a+':'+b)
tag = Regex(r'[A-Za-z][A-Za-z0-9]')
_type = Regex(r'[AifZHB]')
value = Regex('[^\t]')
full = tag + ':' + _type + ':' + value
#reduce(operator.add, [tag, _type, value], ':')
#cigar_regex = r'\*|([0-9]+[MIDNSHPX=])+'
#? makes the regex not be too greedy

full.parseString('AS:i:213')
#full = _join( [tag, _type, value ] )
#compose3 = partial(reduce, compose)
#parse_array = re.compile(r'[cCsSiIf](,[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)+').match
#parse_array = compose_all(pmap(int), psplit(','), pstrip('[]')) #re.compile(r'[^\[\]]').match)
parse_array = compose_all(to_np_int, psplit(','), pstrip('[]')) #re.compile(r'[^\[\]]').match)
#m = re.compile(r'[^\[\]]+').match
#TODO: ASCII of Phred-scaled base QUALity+33
''' NOTE: samfiles use ASCII of Phred-scaled base QUALity+33 '''
#qual_int = ord

{
    'A' : chr,
    'i' : int,
    'f' : float,
    'Z' : str,
    'H' : int, # hex
    'B' : parse_array
}

#parse cigar string