def run_tests(names, num_items, num_its, type_=int): fns = dict([ ('btrees', lambda: BTrees.OOBTree.OOBTree()), ('blist', lambda: blist.sorteddict()), ('bintrees', lambda: bintrees.FastRBTree()), ('dict', lambda: dict()), ('banyan_red_black_tree', lambda: banyan.SortedDict(key_type=type_, alg=banyan.RED_BLACK_TREE)), ('banyan_red_black_tree_rank_updator', lambda: banyan.SortedDict(key_type=type_, alg=banyan.RED_BLACK_TREE, updator=banyan.RankUpdator)), ('banyan_red_black_tree_min_max_updator', lambda: banyan.SortedDict(key_type=type_, alg=banyan.RED_BLACK_TREE, updator=banyan.MinMaxUpdator)), ('banyan_splay_tree', lambda: banyan.SortedDict(key_type=type_, alg=banyan.SPLAY_TREE)), ('banyan_sorted_list', lambda: banyan.SortedDict(key_type=type_, alg=banyan.SORTED_LIST)), ('banyan_red_black_tree_gen', lambda: banyan.SortedDict(alg=banyan.RED_BLACK_TREE)), ('banyan_splay_tree_gen', lambda: banyan.SortedDict(alg=banyan.SPLAY_TREE)), ('banyan_sorted_list_gen', lambda: banyan.SortedDict(alg=banyan.SORTED_LIST)) ]) t = dict([]) for name in names: t[name] = _run_test(fns[name], type_, num_items, num_its) return t
def construct_intervals(mesh, zhat): points, triangles = mesh intervals = banyan.SortedDict([], key_type = (float,float), updator = banyan.OverlappingIntervalsUpdator) for i, tri in enumerate(triangles): a,b,c = tri z = [np.dot(zhat, points[a]),np.dot(zhat, points[b]),np.dot(zhat, points[c])] intervals[(min(z),max(z))] = i return intervals
def run_tests(names, num_items, num_its): # Tmp Ami - make key-type separate labels below fns = dict([ ('btrees', lambda es: BTrees.OOBTree.OOBTree([(e, 1) for e in es])), ('blist', lambda es: blist.sorteddict([(e, 1) for e in es])), ('bintrees', lambda es: bintrees.FastRBTree([(e, 1) for e in es])), ('set', lambda es: set([(e, 1) for e in es])), ('banyan_red_black_tree', lambda es: banyan.SortedDict( [(e, 1) for e in es], alg=banyan.RED_BLACK_TREE)), ('banyan_splay_tree', lambda es: banyan.SortedDict( [(e, 1) for e in es], alg=banyan.SPLAY_TREE)), ('banyan_sorted_list', lambda es: banyan.SortedDict( [(e, 1) for e in es], alg=banyan.SORTED_LIST)), ('banyan_red_black_tree_gen', lambda es: banyan.SortedDict( [(e, 1) for e in es], key_type=int, alg=banyan.RED_BLACK_TREE)), ('banyan_splay_tree_gen', lambda es: banyan.SortedDic( [(e, 1) for e in es], key_type=int, alg=banyan.SPLAY_TREE)), ('banyan_sorted_list_gen', lambda es: banyan.SortedDict( [(e, 1) for e in es], key_type=int, alg=banyan.SORTED_LIST)) ]) t = dict([]) for name in names: t[name] = _run_test(fns[name], int, num_items, num_its) return t
if ((precise) and (call['RV'] >= 2)) or ((not precise) and (call['DV'] >= 2)): gqAlt.append(call['GQ']) genotypeRatio = float(len(gqAlt) + len(gqRef)) / float( len(record.samples)) if genotypeRatio > ratioGeno: if (len(gqRef)) and (len(gqAlt)) and ( numpy.median(gqRef) >= gqRefCut) and (numpy.median(gqAlt) >= gqAltCut): if (numpy.percentile(ratioRef, 99) == 0) and (numpy.median(ratioAlt) >= altAF): #print(record.INFO['END']-record.POS, len(gqRef), len(gqAlt), numpy.median(gqRef), numpy.median(gqAlt), numpy.percentile(ratioRef, 99), numpy.median(ratioAlt), genotypeRatio, sep="\t") if not sv.has_key(record.CHROM): sv[record.CHROM] = banyan.SortedDict( key_type=(int, int), alg=banyan.RED_BLACK_TREE, updator=banyan.OverlappingIntervalsUpdator) if (record.POS, record.INFO['END']) not in sv[record.CHROM]: sv[record.CHROM][(record.POS, record.INFO['END'])] = ( record.ID, record.INFO['PE'], record.INFO['CT']) else: svDups[(record.CHROM, record.POS, record.INFO['END'])].append( (record.ID, record.INFO['PE'], record.INFO['CT'])) # Output vcf records if args.vcfFile:
def __init__(self): self.ports = collections.defaultdict(lambda: banyan.SortedDict( key_type=(float, float), updator=banyan.OverlappingIntervalsUpdator))
metavar='out.vcf', required=True, dest='outVCF', help='output vcf file (required)') args = parser.parse_args() # Compute all stretches of Ns in the reference nRun = dict() refLen = dict() f_in = gzip.open(args.ref) if args.ref.endswith('.gz') else open(args.ref) for seqName, seqNuc, seqQuals in readfq(f_in): refLen[seqName] = len(seqNuc) print("Processing", seqName, refLen[seqName]) if not nRun.has_key(seqName): nRun[seqName] = banyan.SortedDict( key_type=(int, int), alg=banyan.RED_BLACK_TREE, updator=banyan.OverlappingIntervalsUpdator) for m in re.compile("([Nn]+)").finditer(seqNuc): nRun[seqName][m.span()] = 1 # Add read-depth control region to VCF file if args.vcfFile: vcf_reader = vcf.Reader( open(args.vcfFile), 'r', compressed=True) if args.vcfFile.endswith('.gz') else vcf.Reader( open(args.vcfFile), 'r', compressed=False) if 'CONTROL' not in vcf_reader.infos.keys(): vcf_reader.infos['CONTROL'] = vcf.parser._Info('CONTROL', 1, 'Integer', 'Control variant.') vcf_writer = vcf.Writer(open(args.outVCF, 'w'), vcf_reader,