def main(): if len(sys.argv) != 3: drdcommon.error("Wrong # of args", usage) pattern = sys.argv[1] re_id = sys.argv[2] # Regular expression to extract id l = lambda:defaultdict(l) h = l() l_ids = [] for fn in drdcommon.files_in_dir(".", pattern): try: sid = re.search(re_id, fn).group(1) l_ids.append(sid) except: raise(Exception('Problems extracting id using regular expression.')) load_data(sid, fn, h) # print header sys.stdout.write("chrm coor type num_over_zero ") for _id in l_ids: sys.stdout.write("%s " % _id) print "" for t, one in h.items(): for chrm, two in one.items(): for coor, three in two.items(): sys.stdout.write("%s %s %s %s " % (str(chrm), str(coor), t, len(three))) for sid in l_ids: if sid in three: sys.stdout.write(str(three[sid]) + " ") else: sys.stdout.write("0 ") print ""
def main(): if len(sys.argv) != 3: drdcommon.error("Wrong # of args", usage) pattern = sys.argv[1] re_id = sys.argv[2] # Regular expression to extract id l = lambda: defaultdict(l) h = l() l_ids = [] for fn in drdcommon.files_in_dir(".", pattern): try: sid = re.search(re_id, fn).group(1) l_ids.append(sid) except: raise ( Exception('Problems extracting id using regular expression.')) load_data(sid, fn, h) # print header sys.stdout.write("chrm coor type num_over_zero ") for _id in l_ids: sys.stdout.write("%s " % _id) print "" for t, one in h.items(): for chrm, two in one.items(): for coor, three in two.items(): sys.stdout.write("%s %s %s %s " % (str(chrm), str(coor), t, len(three))) for sid in l_ids: if sid in three: sys.stdout.write(str(three[sid]) + " ") else: sys.stdout.write("0 ") print ""
def do_work(pattern): data = [] for f in drdcommon.files_in_dir('.', pattern): fd = open(f) for l in fd.readline(): print l fd.close()
def main(): if len(sys.argv) != 4: drdcommon.error("Wrong # of args", usage) pattern = sys.argv[1] re_id = sys.argv[2] # Regular expression to extract id f_dir = sys.argv[3] l_ids = [] l = lambda:defaultdict(l) h = l() # hold all data in mem files_to_iterate = drdcommon.files_in_dir(f_dir, pattern) sys.stderr.write("# of files to process: " + str(len(files_to_iterate)) + "\n") for fn in files_to_iterate: try: sid = re.search(re_id, fn).group(1) l_ids.append(sid) except: raise(Exception('Problems extracting id using regular expression.')) load_data(sid, fn, h) # print header out("chrm start ") for _id in l_ids: out("%s " % _id) out("\n") for chrm, one in h.items(): for start, two in one.items(): out("%s %s " % (str(chrm), str(start))) for sid, nr in two.items(): out(str(nr[1]) + " ") out("\n")
def load_hits(self): drdcommon.log("Loading hits") for f in drdcommon.files_in_dir('.', self.pattern): sample_id = self.extract_id(f) drdcommon.log("fn: %s | id: %s" % (f, sample_id)) for line in drdcommon.xopen(f): s = line.split() assert len(s) == 2 n_hits, p_id = s[0], s[1].rstrip() self.d_hits[p_id][sample_id] = n_hits
def load_data(pattern): data = [] for f in drdcommon.files_in_dir('.', pattern): sys.stderr.write(f + "\n") l_cov_vals = [] for l in open(f): l_cov_vals.append(int(l.split()[0])) data.append(l_cov_vals) if len(data) == 1: data = data[0] sys.stderr.write(str(len(data)) + "\n") return data
if len(sys.argv) != 3: sys.stderr.write("tool <file_pattern> <pattern_to_extract_id> > out.bed" + "\n") sys.exit(1) out = sys.stdout.write err = sys.stderr.write file_pattern, re_id = sys.argv[1], re.compile(sys.argv[2]) err("re for id: %s\n" % (re_id)) # Load data for all genes all samples data = {} ids = [] for f in drdcommon.files_in_dir(".", file_pattern): # extract sample id match = re_id.search(f) if match: _id = match.group(1) else: raise (Exception("Problems extracting id for: " + f)) err("Working on id: %s\n" % (_id)) ids.append(_id) first_line = True for l in drdcommon.xopen(f): if first_line: first_line = False continue
import re if len(sys.argv) != 3: sys.stderr.write("tool <file_pattern> <pattern_to_extract_id> > out.bed" + "\n") sys.exit(1) out = sys.stdout.write err = sys.stderr.write file_pattern, re_id = sys.argv[1], re.compile(sys.argv[2]) err("re for id: %s\n" % (re_id)) # Load data for all genes all samples data = {} ids = [] for f in drdcommon.files_in_dir(".", file_pattern): # extract sample id match = re_id.search(f) if match: _id = match.group(1) else: raise(Exception("Problems extracting id for: " + f)) err("Working on id: %s\n" % (_id) ) ids.append(_id) first_line = True for l in drdcommon.xopen(f): if first_line: first_line = False continue
def load_ids_order(self): for f in drdcommon.files_in_dir('.', self.pattern): self.ids.append(self.extract_id(f)) drdcommon.log("# of samples loaded: %s" % len(self.ids))