def parse_option(option_str): ''' alternatively: tag, type, val = option_str.split('\t') ''' #_join = partial(reduce, lambda a, b: a+':'+b) # tag_type_val = op.itemgetter(0, 2, 4) # _tag = Regex(r'[A-Za-z][A-Za-z0-9]') # _type = Regex(r'[AifZHB]') # _value = Regex('[^\t]') # full = _tag + ':' + _type + ':' + _value #reduce(operator.add, [tag, _type, value], ':') # parsed_list = full.parseString(option_str) # return tag_type_val(parsed_list) tag, _type, raw_val = psplit(':')(option_str) val = options_scheme[_type](raw_val) return tag, val #full = _join( [tag, _type, value ] ) #parse_array = re.compile(r'[cCsSiIf](,[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)+').match ''' NOTE: samfiles use ASCII of Phred-scaled base QUALity+33 '''
# return ddist(centers).argmin() # #return min(centers, key=ddist) def makematrices(s): _centers, _data = splitby(_not(isin('------')), ifilter(bool, s)) #centers = map(makenp, islice(_centers, 1, None)) #data = map(makenp, islice(_data, 1, None)) centers = makenp(islice(_centers, 1, None)) data = makenp(islice(_data, 1, None)) return centers, data isin = partial(methodcaller, '__contains__') makearray = compose_all(np.array, pmap(np.array), pmap(float), psplit(' ')) makenp = compose(np.array, pmap(makearray)) def get_in_out(s): raw_in, raw_out = splitby(_not(isin('Output')), ifilter(bool, s)) k = int(next(raw_in).split(' ')[0]) _in = makenp(raw_in) _out =makenp(islice(raw_out, 1, None)) return _in, _out, k lines = open('Lloyd.txt').readlines() input, expected, k = get_in_out(lines) print soft_k_means_cluster(input, k=3) from matplotlib import pyplot
import re import pandas as pd from bioframes import to_np_int, sanger_qual_str_to_error from itertools import groupby from func import pmap, psplit, pstrip, compose, compose_all, merge_dicts, fzip, partial2, dictmap, starcompose from operator import itemgetter from functools import partial import operator as op from schema import Schema, Use from itertools import ifilter # Parse options #from pyparsing import Regex parse_array = compose_all(to_np_int, psplit(','), pstrip('[]')) tabsplit = psplit('\t') basic_scheme={ 'QNAME' : str, 'FLAG' : int, 'RNAME' : str, 'POS' : int, 'MAPQ' : int, 'CIGAR' : str, 'RNEXT' : str, 'PNEXT' : int, 'TLEN' : int, #'MRNM' : str, #'MRNM' : '*='.__contains__, #'MPOS' : int, #'ISIZE' : int, 'SEQ' : str,
from pyparsing import Regex #_join = partial(reduce, lambda a, b: a+':'+b) tag = Regex(r'[A-Za-z][A-Za-z0-9]') _type = Regex(r'[AifZHB]') value = Regex('[^\t]') full = tag + ':' + _type + ':' + value #reduce(operator.add, [tag, _type, value], ':') #cigar_regex = r'\*|([0-9]+[MIDNSHPX=])+' #? makes the regex not be too greedy full.parseString('AS:i:213') #full = _join( [tag, _type, value ] ) #compose3 = partial(reduce, compose) #parse_array = re.compile(r'[cCsSiIf](,[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)+').match #parse_array = compose_all(pmap(int), psplit(','), pstrip('[]')) #re.compile(r'[^\[\]]').match) parse_array = compose_all(to_np_int, psplit(','), pstrip('[]')) #re.compile(r'[^\[\]]').match) #m = re.compile(r'[^\[\]]+').match #TODO: ASCII of Phred-scaled base QUALity+33 ''' NOTE: samfiles use ASCII of Phred-scaled base QUALity+33 ''' #qual_int = ord { 'A' : chr, 'i' : int, 'f' : float, 'Z' : str, 'H' : int, # hex 'B' : parse_array } #parse cigar string