Exemple #1
0
def flag_dict(flag):
    return dict((meaning, eval_flag(bit, flag)) for bit, meaning in flag_meanings.items())
def split_list(A, idx):
    return A[:idx], A[idx:]

sam_columns = ("QNAME", "FLAG", "RNAME", "POS", "MAPQ", "CIGAR", "RNEXT", "PNEXT", "TLEN", "SEQ", "QUAL") #optiosn


#TODO: get_record function takes a filehandle and returns a single record via SeqIO, etc.
#So functions expect a dictionary I guess
#pass
parse_options = compose(dict, pmap(parse_option)) #, tabsplit)
#readfields = compose(tabsplit, next)
line_to_dict = compose_all(dict, partial(zip, sam_columns)) #, tabsplit)
validated_dict = compose(basic_schema.validate, line_to_dict)
fields_and_options = compose(partial2(split_list, len(sam_columns)), tabsplit)
parsers = partial(fzip, [validated_dict, parse_options])
parse_fields_and_options = compose(parsers, fields_and_options)
all_but_cigar_dict = starcompose(merge_dicts, parse_fields_and_options)
get_cigar_dict = compose(parse_cigar, itemgetter('CIGAR'))
get_flag_dict = compose(flag_dict, itemgetter('FLAG'))
get_error = compose(sanger_qual_str_to_error, itemgetter('QUAL'))

def load_sam(fh):
    dicts = map(get_row, ifilter(bool, fh.read().split('\n')))
    return pd.DataFrame(dicts)
#TODO: do we really need indices? it complicates querying i tlooks like maybe where plays better with them
# .set_index(index) #, index=index, columns=columns)


def get_row(row):
Exemple #2
0
def obj_to_dict(obj, names_getters_map):
    apply_to_obj = partial2(apply_to_object, obj)
    return dictmap(apply_to_obj, names_getters_map)
Exemple #3
0
pcompose = partial(partial, compose)
error_from_ints = pcompose(error)
#sanger_qual_str_to_error = cmperror(qual_to_phreds)

'''
get_fastq = partial(SeqIO.parse, format='fastq')
get_fasta = partial(SeqIO.parse, format='fasta')
to_np_int = partial(np.array, dtype=int)
gccontent = compose(ilen, pifilter('GC'.__contains__))

minus33 = partial(add, -33)
qual_int_sanger = compose(minus33, ord)

''' Error = 10^-(Phred/10) '''
qual_to_phreds = compose(to_np_int, pmap(qual_int_sanger))
error = compose(partial(pow, 10), partial2(div, -10.0))
#don't need to map because numpy vectorizes it automatically
#TODO: handle non-sanger version
sanger_qual_str_to_error = compose(error, qual_to_phreds)




#SANGER_OFFSET = 33

'''
assert len(quality) == len(error) == len(phred_scores)
'''


#validate = scheme.validate
Exemple #4
0
def obj_to_dict(obj, names_getters_map):
    apply_to_obj = partial2(apply_to_object, obj)
    return dictmap(apply_to_obj, names_getters_map)
Exemple #5
0
'''
pcompose = partial(partial, compose)
error_from_ints = pcompose(error)
#sanger_qual_str_to_error = cmperror(qual_to_phreds)

'''
get_fastq = partial(SeqIO.parse, format='fastq')
get_fasta = partial(SeqIO.parse, format='fasta')
to_np_int = partial(np.array, dtype=int)
gccontent = compose(ilen, pifilter('GC'.__contains__))

minus33 = partial(add, -33)
qual_int_sanger = compose(minus33, ord)
''' Error = 10^-(Phred/10) '''
qual_to_phreds = compose(to_np_int, pmap(qual_int_sanger))
error = compose(partial(pow, 10), partial2(div, -10.0))
#don't need to map because numpy vectorizes it automatically
#TODO: handle non-sanger version
sanger_qual_str_to_error = compose(error, qual_to_phreds)

#SANGER_OFFSET = 33
'''
assert len(quality) == len(error) == len(phred_scores)
'''

#validate = scheme.validate
#TODO: could make these validations match samtools spec
#TODO: Could treat options/cigar string as their own class with their own parsing and validation.


def flatten_vcf(record):
Exemple #6
0
import operator as op
from operator import add, div
from schema import Schema, Use
from itertools import ifilter
# Parse options
#from pyparsing import Regex

to_np_int = partial(np.array, dtype=int)
parse_array = compose_all(to_np_int, psplit(','), pstrip('[]'))
tabsplit = psplit('\t')


minus33 = partial(add, -33)
qual_int_sanger = compose(minus33, ord)
qual_to_phreds = compose(to_np_int, pmap(qual_int_sanger))
error = compose(partial(pow, 10), partial2(div, -10.0))
#don't need to map because numpy vectorizes it automatically
#TODO: handle non-sanger version
sanger_qual_str_to_error = compose(error, qual_to_phreds)

basic_scheme={
    'QNAME' : str,
    'FLAG' : int,
    'RNAME' : str,
    'POS' : int,
    'MAPQ' : int,
    'CIGAR' : str,
    'RNEXT' : str,
    'PNEXT' : int,
    'TLEN' : int,
    #'MRNM' : str,