def _create_dict(obj_func, columns, getters, validator, dictgetters): obj = apply(obj_func) pre_dict = obj_to_dict(obj, dictzip(columns, getters)) #optional intermediate schema here if dictgetters: extra_dict = obj_to_dict(pre_dict, dictgetters) full_dict = merge_dicts(pre_dict, extra_dict) else: full_dict = pre_dict return validator.validate(full_dict)
def parse_cigar(cigar_str): #? makes the regex not be too greedy cigar_regex = r'(?:([0-9]+)([MIDNSHPX=]))+?' reg = re.compile(cigar_regex) tups = reg.findall(cigar_str) key, value = itemgetter(1), itemgetter(0) groups = groupby(sorted(tups, key=key), key) get_counts = pmap(compose(int, itemgetter(0))) sum_counts = compose(sum, get_counts) s = "cigar_{0}".format cigar_dict = dict( (s(name), sum_counts(nums)) for name, nums in groups) #print cigar_dict mismatches = sum(num for k, num in cigar_dict.items() if k not in ['cigar_M', 'cigar_=']) return merge_dicts(cigar_dict, {'cigar_score': mismatches})
def get_row(row): result = all_but_cigar_dict(row) return merge_dicts(result, get_cigar_dict(result), get_flag_dict(result), {'error' : get_error(result)})