def read_scores(f): scores_by_chrom = dict() for chrom, pos, val in bx.wiggle.Reader(f): if chrom not in scores_by_chrom: scores_by_chrom[chrom] = BinnedArray() scores_by_chrom[chrom][pos] = val return scores_by_chrom
def load_scores_wiggle(fname, chrom_buffer_size=3): """ Read a wiggle file and return a dict of BinnedArray objects keyed by chromosome. """ scores_by_chrom = dict() try: for chrom, pos, val in bx.wiggle.Reader(UCSCOutWrapper(open(fname))): if chrom not in scores_by_chrom: if chrom_buffer_size: scores_by_chrom[chrom] = BinnedArray() chrom_buffer_size -= 1 else: scores_by_chrom[chrom] = PositionalScoresOnDisk() scores_by_chrom[chrom][pos] = val except UCSCLimitException: # Wiggle data was truncated, at the very least need to warn the user. print( 'Encountered message from UCSC: "Reached output limit of 100000 data values", so be aware your data was truncated.' ) except IndexError: stop_err( 'Data error: one or more column data values is missing in "%s"' % fname) except ValueError: stop_err( 'Data error: invalid data type for one or more values in "%s".' % fname) return scores_by_chrom
def load_scores_wiggle( fname ): """ Read a wiggle file and return a dict of BinnedArray objects keyed by chromosome. """ scores_by_chrom = dict() for chrom, pos, val in bx.wiggle.Reader( misc.open_compressed( fname ) ): if chrom not in scores_by_chrom: scores_by_chrom[chrom] = BinnedArray() scores_by_chrom[chrom][pos] = val return scores_by_chrom
def main(): # Parse command line options, args = doc_optparse.parse( __doc__ ) try: if options.comp: comp_type = options.comp else: comp_type = None score_fname = args[0] out_fname = args[1] except: doc_optparse.exit() scores = BinnedArray() ## last_chrom = None for i, ( chrom, pos, val ) in enumerate( bx.wiggle.Reader( misc.open_compressed( score_fname ) ) ): #if last_chrom is None: # last_chrom = chrom #else: # assert chrom == last_chrom, "This script expects a 'wiggle' input on only one chromosome" scores[pos] = val # Status if i % 10000 == 0: print i, "scores processed" out = open( out_fname, "w" ) if comp_type: scores.to_file( out, comp_type=comp_type ) else: scores.to_file( out ) out.close()
def __init__(self, wigFile): '''read wig file, creat wig obj''' self.scores = {} self.num_re = re.compile(r'[\d\.\-\+]+') fh = open(wigFile) #infile=open(wigFile,'r') for i, (chrom, pos, val) in enumerate(bx.wiggle.Reader(fh)): chrom = chrom.upper() if not chrom in self.scores: self.scores[chrom] = BinnedArray() self.scores[chrom][pos] = val if i % 100000 == 0: print "%i datapoints loaded \r" % i #print self.scores.keys() print "total " + str(i) + " points loaded"
def main(): options, args = doc_optparse.parse(__doc__) scores = {} for i, (chrom, pos, val) in enumerate(bx.wiggle.Reader(open(sys.argv[1]))): if chrom not in scores: scores[chrom] = BinnedArray() scores[chrom][pos] = val # Status if i % 10000 == 0: print(i, "scores processed") for chr in scores.keys(): out = open(chr, "w") scores[chr].to_file(out) out.close()
def read_WIG(wig_file): wig = open(wig_file, "r") ribo_cov = {} for line in wig: if line.startswith('variableStep'): # specific for Shoelaces wig files stepType = 'variable' fields = line.split()[1:] declarations = dict([(p[0], p[1].strip('"')) for p in [x.split("=") for x in fields]]) chrom = declarations['chrom'] span = 1 ribo_cov[chrom] = BinnedArray() else: tmp = line.strip().split() pos = int(tmp[0]) val = float(tmp[1]) ribo_cov[chrom][pos] = val wig.close() return ribo_cov
def main(): # Parse command line options, args = doc_optparse.parse( __doc__ ) try: score_fname = args[0] except: doc_optparse.exit() scores = {} for i, ( chrom, pos, val ) in enumerate( bx.wiggle.Reader( open(sys.argv[1]) ) ): if not chrom in scores: scores[ chrom ] = BinnedArray() scores[chrom][pos] = val # Status if i % 10000 == 0: print i, "scores processed" for chr in scores.keys(): out = open( chr, "w" ) scores[chr].to_file( out ) out.close()
#!/usr/bin/env python from bx.binned_array import BinnedArray import os COMPTYPE = 'zlib' TYPECODE = 'I' newarray = lambda: BinnedArray(default=0, typecode=TYPECODE) def open_with_newdir(filename): dname = os.path.dirname(filename) if not os.path.isdir(dname): os.makedirs(dname) return open(filename, 'w') BASESPACE = ['depth', 'A', 'C', 'G', 'T', 'N'] BASESPACE2i = dict((n, i) for i, n in enumerate(BASESPACE)) def process(inpf, prefix): arrchrom = None arrtype = None arr = [] def flush(): if arrchrom is None: return filenamefmt = '%s/%%s/%s.%s' % (prefix, arrchrom[0], arrchrom[1]) if arrtype != 'M':