Beispiel #1
0
def parse_option(option_str):
    ''' alternatively:   tag, type, val = option_str.split('\t')  '''
#_join = partial(reduce, lambda a, b: a+':'+b)
#    tag_type_val = op.itemgetter(0, 2, 4)
#    _tag = Regex(r'[A-Za-z][A-Za-z0-9]')
#    _type = Regex(r'[AifZHB]')
#    _value = Regex('[^\t]')
#    full = _tag + ':' + _type + ':' + _value
    #reduce(operator.add, [tag, _type, value], ':')
#    parsed_list = full.parseString(option_str)
#    return tag_type_val(parsed_list)

    tag, _type, raw_val = psplit(':')(option_str)
    val = options_scheme[_type](raw_val)
    return tag, val

    #full = _join( [tag, _type, value ] )
    #parse_array = re.compile(r'[cCsSiIf](,[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)+').match
    ''' NOTE: samfiles use ASCII of Phred-scaled base QUALity+33 '''
Beispiel #2
0
#    return ddist(centers).argmin()
#    #return min(centers, key=ddist)


def makematrices(s):
    _centers, _data = splitby(_not(isin('------')), ifilter(bool, s))
    #centers = map(makenp, islice(_centers, 1, None))
    #data = map(makenp, islice(_data, 1, None))
    centers = makenp(islice(_centers, 1, None))
    data = makenp(islice(_data, 1, None))
    return centers, data



isin = partial(methodcaller, '__contains__')
makearray = compose_all(np.array, pmap(np.array), pmap(float), psplit(' '))
makenp = compose(np.array, pmap(makearray))
def get_in_out(s):
    raw_in, raw_out = splitby(_not(isin('Output')), ifilter(bool, s))
    k = int(next(raw_in).split(' ')[0])
    _in = makenp(raw_in)
    _out =makenp(islice(raw_out, 1, None))
    return _in, _out, k


lines = open('Lloyd.txt').readlines()
input, expected, k = get_in_out(lines)
print soft_k_means_cluster(input, k=3)


from matplotlib import pyplot
Beispiel #3
0
import re
import pandas as pd
from bioframes import to_np_int, sanger_qual_str_to_error
from itertools import groupby
from func import pmap, psplit, pstrip, compose, compose_all, merge_dicts, fzip, partial2, dictmap, starcompose
from operator import itemgetter
from functools import partial
import operator as op
from schema import Schema, Use
from itertools import ifilter
# Parse options
#from pyparsing import Regex

parse_array = compose_all(to_np_int, psplit(','), pstrip('[]'))
tabsplit = psplit('\t')

basic_scheme={
    'QNAME' : str,
    'FLAG' : int,
    'RNAME' : str,
    'POS' : int,
    'MAPQ' : int,
    'CIGAR' : str,
    'RNEXT' : str,
    'PNEXT' : int,
    'TLEN' : int,
    #'MRNM' : str,
    #'MRNM' : '*='.__contains__,
    #'MPOS' : int,
    #'ISIZE' : int,
    'SEQ' : str,
Beispiel #4
0
from pyparsing import Regex
#_join = partial(reduce, lambda a, b: a+':'+b)
tag = Regex(r'[A-Za-z][A-Za-z0-9]')
_type = Regex(r'[AifZHB]')
value = Regex('[^\t]')
full = tag + ':' + _type + ':' + value
#reduce(operator.add, [tag, _type, value], ':')
#cigar_regex = r'\*|([0-9]+[MIDNSHPX=])+'
#? makes the regex not be too greedy

full.parseString('AS:i:213')
#full = _join( [tag, _type, value ] )
#compose3 = partial(reduce, compose)
#parse_array = re.compile(r'[cCsSiIf](,[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)+').match
#parse_array = compose_all(pmap(int), psplit(','), pstrip('[]')) #re.compile(r'[^\[\]]').match)
parse_array = compose_all(to_np_int, psplit(','), pstrip('[]')) #re.compile(r'[^\[\]]').match)
#m = re.compile(r'[^\[\]]+').match
#TODO: ASCII of Phred-scaled base QUALity+33
''' NOTE: samfiles use ASCII of Phred-scaled base QUALity+33 '''
#qual_int = ord

{
    'A' : chr,
    'i' : int,
    'f' : float,
    'Z' : str,
    'H' : int, # hex
    'B' : parse_array
}

#parse cigar string