def call1(cvsfile, write_func, *args): """Call enrich regions from certain column """ argv = args[0] if len(argv) < 5: sys.stderr.write( "Need 5 extra arguments for 'call', options <loc column> <score column> <cutoff> <min length> <max gap>\ne.g. command: <0> <1> <0.5> <10000> <2000>, means to use the first column as genome coordinations to call enriched regions from the second column, the threshold to call enriched region is 0.5, the minimum length of region is 10k, and the maximum gap to link two nearby regions is 2k.\n" ) sys.exit() cor_column = cvsfile.fieldnames[int(argv[0])] var_column = cvsfile.fieldnames[int(argv[1])] cutoff = float(argv[2]) min_len = int(argv[3]) max_gap = int(argv[4]) wtrack = WigTrackI() add_func = wtrack.add_loc for l in cvsfile: cor = l.setdefault(cor_column, None) var = l.setdefault(var_column, None) if cor and var and cor != "NA" and var != "NA": (chrom, start, end) = cor.split(".") add_func(chrom, int(start), float(var)) wtrack.span = int(end) - int(start) write_func("# regions called from %s:%s\n" % (argv[1], var_column)) bpeaks = wtrack.call_peaks(cutoff=cutoff, min_length=min_len, max_gap=max_gap) write_func(bpeaks.tobed())
def main(): if len(sys.argv) < 4: sys.stderr.write( "Extract data for certain chromosome from a wiggle file.\n") sys.stderr.write("need 3 paras: %s <chr> <wig> <newwig>\n" % os.path.basename(sys.argv[0])) sys.exit(1) chrom = sys.argv[1] wigfhd = open(sys.argv[2]) wigtrack = WiggleIO.WiggleIO(wigfhd).build_wigtrack() wig_chr = wigtrack.get_data_by_chr(chrom) if not wig_chr: sys.stderr.write("No data for chromosome %s!\n" % chrom) sys.exit(1) newwigtrack = WigTrackI() newwigtrack.span = wigtrack.span (wig_chr_p, wig_chr_s) = wig_chr for i in range(len(wig_chr_p)): newwigtrack.add_loc(chrom, wig_chr_p[i], wig_chr_s[i]) newwigfile = sys.argv[3] newwigfhd = open(newwigfile, "w") newwigtrack.write_wig(newwigfhd, name="for chromosome %s" % chrom) newwigfhd.close()
def main(): if len(sys.argv) < 3: sys.stderr.write( "Extract data for all chromosomes from a wiggle file.\n") sys.stderr.write("need 2 paras: %s <wig> <output_prefix>\n" % os.path.basename(sys.argv[0])) sys.exit(1) wigfhd = open(sys.argv[1]) prefix = sys.argv[2] wigtrack = WiggleIO.WiggleIO(wigfhd).build_wigtrack() wigtrack.sort() for chrom in wigtrack.get_chr_names(): wig_chr = wigtrack.get_data_by_chr(chrom) newwigtrack = WigTrackI() newwigtrack.span = wigtrack.span (wig_chr_p, wig_chr_s) = wig_chr for i in range(len(wig_chr_p)): newwigtrack.add_loc(chrom, wig_chr_p[i], wig_chr_s[i]) newwigfile = sys.argv[2] + "." + chrom + ".wig" newwigfhd = open(newwigfile, "w") newwigtrack.write_wig(newwigfhd, name="for chromosome %s" % chrom) newwigfhd.close()
def main(): if len(sys.argv) < 3: sys.stderr.write("Extract data for all chromosomes from a wiggle file.\n") sys.stderr.write("need 2 paras: %s <wig> <output_prefix>\n" % os.path.basename(sys.argv[0])) sys.exit(1) wigfhd = open(sys.argv[1]) prefix = sys.argv[2] wigtrack = WiggleIO.WiggleIO(wigfhd).build_wigtrack() wigtrack.sort() for chrom in wigtrack.get_chr_names(): wig_chr = wigtrack.get_data_by_chr(chrom) newwigtrack = WigTrackI() newwigtrack.span = wigtrack.span (wig_chr_p,wig_chr_s) = wig_chr for i in range(len(wig_chr_p)): newwigtrack.add_loc(chrom,wig_chr_p[i],wig_chr_s[i]) newwigfile = sys.argv[2]+"."+chrom+".wig" newwigfhd = open(newwigfile,"w") newwigtrack.write_wig(newwigfhd,name="for chromosome %s" % chrom) newwigfhd.close()
def main(): if len(sys.argv) < 4: sys.stderr.write("Extract data for certain chromosome from a wiggle file.\n") sys.stderr.write("need 3 paras: %s <chr> <wig> <newwig>\n" % os.path.basename(sys.argv[0])) sys.exit(1) chrom = sys.argv[1] wigfhd = open(sys.argv[2]) wigtrack = WiggleIO.WiggleIO(wigfhd).build_wigtrack() wig_chr = wigtrack.get_data_by_chr(chrom) if not wig_chr: sys.stderr.write("No data for chromosome %s!\n" % chrom) sys.exit(1) newwigtrack = WigTrackI() newwigtrack.span = wigtrack.span (wig_chr_p,wig_chr_s) = wig_chr for i in range(len(wig_chr_p)): newwigtrack.add_loc(chrom,wig_chr_p[i],wig_chr_s[i]) newwigfile = sys.argv[3] newwigfhd = open(newwigfile,"w") newwigtrack.write_wig(newwigfhd,name="for chromosome %s" % chrom) newwigfhd.close()
def combcall2draw(cvsfile, write_func, *args): """User specifies several columns to consider, this tool will call regions where either of the column is above its threshold. """ argv = args[0] if len(argv) < 6: sys.stderr.write( "Need 6 extra arguments for 'combcall2draw', options <loc column> <score column1[,score column2,...]> <cutoff1[,cutoff2,cutoff3]> <min length> <max gap> <pdf filename>\ne.g. command: <0> <1,2,3> <0.5,0.6,0.7> <10000> <2000> <a.pdf>, means to use the first column as genome coordinations to call enriched regions from the combinition of #1, #2 and #3, the thresholds to call enriched region are 0.5 for column 1, 0.6 for column 2 and 0.7 for column 3, the minimum length of region is 10k, and the maximum gap to link two nearby regions is 2k. Then the figure will be saved in a.pdf.\n" ) sys.exit() cor_column = cvsfile.fieldnames[int(argv[0])] var_columns = map(lambda x: cvsfile.fieldnames[int(x)], argv[1].split(",")) cutoffs = map(float, argv[2].split(",")) min_len = int(argv[3]) max_gap = int(argv[4]) wtrack = WigTrackI() # combined track containing 1 if either of track is above cutoff add_func = wtrack.add_loc for l in cvsfile: cor = l.setdefault(cor_column, None) if not cor or cor == "NA": continue for i in range(len(var_columns)): var_column = var_columns[i] cutoff = cutoffs[i] var = l.setdefault(var_column, None) if var and var != "NA" and float(var) > cutoff: (chrom, start, end) = cor.split(".") add_func(chrom, int(start), 1.1) break wtrack.span = int(end) - int(start) bpeaks = wtrack.call_peaks(cutoff=1.0, min_length=min_len, max_gap=max_gap) # f = argv[5] fhd = open(argv[5].replace("pdf", "bed"), "w") fhd.write(bpeaks.tobed()) from Bio.Graphics import BasicChromosome from reportlab.lib.colors import gray, black, white entries = [ ("chrI", 15072419), ("chrII", 15279316), ("chrIII", 13783681), ("chrIV", 17493784), ("chrV", 20919398), ("chrX", 17718852), ] max_length = max([x[1] for x in entries]) chr_diagram = BasicChromosome.Organism() for name, length in entries: cur_chromosome = BasicChromosome.Chromosome(name) # Set the length, adding and extra 20 percent for the tolomeres: cur_chromosome.scale_num = max_length * 1.1 # Add an opening telomere start = BasicChromosome.TelomereSegment() start.scale = 0.05 * max_length start.fill_color = gray cur_chromosome.add(start) # Add a body - using bp as the scale length here. try: cpeaks = bpeaks.peaks[name] except: cpeaks = [] body_regions = [] last_pos = 0 for p in cpeaks: body_regions.append((p[0] - last_pos, white)) # outside regions body_regions.append((p[1] - p[0], black)) # enriched regions last_pos = p[1] assert p[1] < length body_regions.append((length - last_pos, white)) # last part for b, c in body_regions: body = BasicChromosome.ChromosomeSegment() body.fill_color = c body.scale = b cur_chromosome.add(body) # Add a closing telomere end = BasicChromosome.TelomereSegment(inverted=True) end.scale = 0.05 * max_length end.fill_color = gray cur_chromosome.add(end) # This chromosome is done chr_diagram.add(cur_chromosome) chr_diagram.draw(argv[5], "Highlight regions in Caenorhabditis elegans")
def call1draw(cvsfile, write_func, *args): """Call regions, then plot it in chromosome figure. A combination of drawchrom and call1 """ argv = args[0] if len(argv) < 6: sys.stderr.write( "Need 6 extra arguments for 'call1draw', options <loc column> <score column> <cutoff> <min length> <max gap> <pdf filename>\ne.g. command: <0> <1> <0.5> <10000> <2000> <a.pdf>, means to use the first column as genome coordinations to call enriched regions from the second column, the threshold to call enriched region is 0.5, the minimum length of region is 10k, and the maximum gap to link two nearby regions is 2k. Then the figure will be saved in a.pdf.\n" ) sys.exit() cor_column = cvsfile.fieldnames[int(argv[0])] var_column = cvsfile.fieldnames[int(argv[1])] cutoff = float(argv[2]) min_len = int(argv[3]) max_gap = int(argv[4]) wtrack = WigTrackI() add_func = wtrack.add_loc for l in cvsfile: cor = l.setdefault(cor_column, None) var = l.setdefault(var_column, None) if cor and var and cor != "NA" and var != "NA": (chrom, start, end) = cor.split(".") add_func(chrom, int(start), float(var)) wtrack.span = int(end) - int(start) bpeaks = wtrack.call_peaks(cutoff=cutoff, min_length=min_len, max_gap=max_gap) fhd = open(argv[5].replace("pdf", "bed"), "w") fhd.write(bpeaks.tobed()) from Bio.Graphics import BasicChromosome from reportlab.lib.colors import gray, black, white entries = [ ("chrI", 15072419), ("chrII", 15279316), ("chrIII", 13783681), ("chrIV", 17493784), ("chrV", 20919398), ("chrX", 17718852), ] max_length = max([x[1] for x in entries]) chr_diagram = BasicChromosome.Organism() for name, length in entries: cur_chromosome = BasicChromosome.Chromosome(name) # Set the length, adding and extra 20 percent for the tolomeres: cur_chromosome.scale_num = max_length * 1.1 # Add an opening telomere start = BasicChromosome.TelomereSegment() start.scale = 0.05 * max_length start.fill_color = gray cur_chromosome.add(start) # Add a body - using bp as the scale length here. try: cpeaks = bpeaks.peaks[name] except: cpeaks = [] body_regions = [] last_pos = 0 for p in cpeaks: body_regions.append((p[0] - last_pos, white)) # outside regions body_regions.append((p[1] - p[0], black)) # enriched regions last_pos = p[1] assert p[1] < length body_regions.append((length - last_pos, white)) # last part for b, c in body_regions: body = BasicChromosome.ChromosomeSegment() body.fill_color = c body.scale = b cur_chromosome.add(body) # Add a closing telomere end = BasicChromosome.TelomereSegment(inverted=True) end.scale = 0.05 * max_length end.fill_color = gray cur_chromosome.add(end) # This chromosome is done chr_diagram.add(cur_chromosome) chr_diagram.draw(argv[5], "%s regions in Caenorhabditis elegans" % (var_column))
def build_wigtrack (self): """Use this function to return a WigTrackI. """ data = WigTrackI() add_func = data.add_loc chrom = "Unknown" span = 0 pos_fixed = 0 # pos for fixedStep data 0: variableStep, 1: fixedStep for i in self.fhd: if i.startswith("track"): continue elif i.startswith("#"): continue elif i.startswith("browse"): continue elif i.startswith("variableStep"): # define line pos_fixed = 0 chromi = i.rfind("chrom=") # where the 'chrom=' is spani = i.rfind("span=") # where the 'span=' is if chromi != -1: chrom = i[chromi+6:].strip().split()[0] else: chrom = "Unknown" if spani != -1: span = int(i[spani+5:].strip().split()[0]) else: span = 0 elif i.startswith("fixedStep"): chromi = i.rfind("chrom=") # where the 'chrom=' is starti = i.rfind("start=") # where the 'chrom=' is stepi = i.rfind("step=") # where the 'chrom=' is spani = i.rfind("span=") # where the 'span=' is if chromi != -1: chrom = i[chromi+6:].strip().split()[0] else: raise Exception("fixedStep line must define chrom=XX") if spani != -1: span = int(i[spani+5:].strip().split()[0]) else: span = 0 if starti != -1: pos_fixed = int(i[starti+6:].strip().split()[0]) if pos_fixed < 1: raise Exception("fixedStep start must be bigger than 0!") else: raise Exception("fixedStep line must define start=XX") if stepi != -1: step = int(i[stepi+5:].strip().split()[0]) else: raise Exception("fixedStep line must define step=XX!") else: # read data value if pos_fixed: # fixedStep value = i.strip() add_func(chrom,int(pos_fixed),float(value)) pos_fixed += step else: # variableStep try: (pos,value) = i.split() except ValueError: print i,pos_fixed add_func(chrom,int(pos),float(value)) data.span = span self.fhd.seek(0) return data
def combcall2draw(cvsfile, write_func, *args): """User specifies several columns to consider, this tool will call regions where either of the column is above its threshold. """ argv = args[0] if len(argv) < 6: sys.stderr.write( "Need 6 extra arguments for 'combcall2draw', options <loc column> <score column1[,score column2,...]> <cutoff1[,cutoff2,cutoff3]> <min length> <max gap> <pdf filename>\ne.g. command: <0> <1,2,3> <0.5,0.6,0.7> <10000> <2000> <a.pdf>, means to use the first column as genome coordinations to call enriched regions from the combinition of #1, #2 and #3, the thresholds to call enriched region are 0.5 for column 1, 0.6 for column 2 and 0.7 for column 3, the minimum length of region is 10k, and the maximum gap to link two nearby regions is 2k. Then the figure will be saved in a.pdf.\n" ) sys.exit() cor_column = cvsfile.fieldnames[int(argv[0])] var_columns = map(lambda x: cvsfile.fieldnames[int(x)], argv[1].split(",")) cutoffs = map(float, argv[2].split(",")) min_len = int(argv[3]) max_gap = int(argv[4]) wtrack = WigTrackI( ) # combined track containing 1 if either of track is above cutoff add_func = wtrack.add_loc for l in cvsfile: cor = l.setdefault(cor_column, None) if not cor or cor == "NA": continue for i in range(len(var_columns)): var_column = var_columns[i] cutoff = cutoffs[i] var = l.setdefault(var_column, None) if var and var != "NA" and float(var) > cutoff: (chrom, start, end) = cor.split(".") add_func(chrom, int(start), 1.1) break wtrack.span = int(end) - int(start) bpeaks = wtrack.call_peaks(cutoff=1.0, min_length=min_len, max_gap=max_gap) #f = argv[5] fhd = open(argv[5].replace("pdf", "bed"), "w") fhd.write(bpeaks.tobed()) from Bio.Graphics import BasicChromosome from reportlab.lib.colors import gray, black, white entries = [("chrI", 15072419), ("chrII", 15279316), ("chrIII", 13783681), ("chrIV", 17493784), ("chrV", 20919398), ("chrX", 17718852)] max_length = max([x[1] for x in entries]) chr_diagram = BasicChromosome.Organism() for name, length in entries: cur_chromosome = BasicChromosome.Chromosome(name) #Set the length, adding and extra 20 percent for the tolomeres: cur_chromosome.scale_num = max_length * 1.1 # Add an opening telomere start = BasicChromosome.TelomereSegment() start.scale = 0.05 * max_length start.fill_color = gray cur_chromosome.add(start) #Add a body - using bp as the scale length here. try: cpeaks = bpeaks.peaks[name] except: cpeaks = [] body_regions = [] last_pos = 0 for p in cpeaks: body_regions.append((p[0] - last_pos, white)) # outside regions body_regions.append((p[1] - p[0], black)) # enriched regions last_pos = p[1] assert p[1] < length body_regions.append((length - last_pos, white)) # last part for b, c in body_regions: body = BasicChromosome.ChromosomeSegment() body.fill_color = c body.scale = b cur_chromosome.add(body) #Add a closing telomere end = BasicChromosome.TelomereSegment(inverted=True) end.scale = 0.05 * max_length end.fill_color = gray cur_chromosome.add(end) #This chromosome is done chr_diagram.add(cur_chromosome) chr_diagram.draw(argv[5], "Highlight regions in Caenorhabditis elegans")
def call1draw(cvsfile, write_func, *args): """Call regions, then plot it in chromosome figure. A combination of drawchrom and call1 """ argv = args[0] if len(argv) < 6: sys.stderr.write( "Need 6 extra arguments for 'call1draw', options <loc column> <score column> <cutoff> <min length> <max gap> <pdf filename>\ne.g. command: <0> <1> <0.5> <10000> <2000> <a.pdf>, means to use the first column as genome coordinations to call enriched regions from the second column, the threshold to call enriched region is 0.5, the minimum length of region is 10k, and the maximum gap to link two nearby regions is 2k. Then the figure will be saved in a.pdf.\n" ) sys.exit() cor_column = cvsfile.fieldnames[int(argv[0])] var_column = cvsfile.fieldnames[int(argv[1])] cutoff = float(argv[2]) min_len = int(argv[3]) max_gap = int(argv[4]) wtrack = WigTrackI() add_func = wtrack.add_loc for l in cvsfile: cor = l.setdefault(cor_column, None) var = l.setdefault(var_column, None) if cor and var and cor != "NA" and var != "NA": (chrom, start, end) = cor.split(".") add_func(chrom, int(start), float(var)) wtrack.span = int(end) - int(start) bpeaks = wtrack.call_peaks(cutoff=cutoff, min_length=min_len, max_gap=max_gap) fhd = open(argv[5].replace("pdf", "bed"), "w") fhd.write(bpeaks.tobed()) from Bio.Graphics import BasicChromosome from reportlab.lib.colors import gray, black, white entries = [("chrI", 15072419), ("chrII", 15279316), ("chrIII", 13783681), ("chrIV", 17493784), ("chrV", 20919398), ("chrX", 17718852)] max_length = max([x[1] for x in entries]) chr_diagram = BasicChromosome.Organism() for name, length in entries: cur_chromosome = BasicChromosome.Chromosome(name) #Set the length, adding and extra 20 percent for the tolomeres: cur_chromosome.scale_num = max_length * 1.1 # Add an opening telomere start = BasicChromosome.TelomereSegment() start.scale = 0.05 * max_length start.fill_color = gray cur_chromosome.add(start) #Add a body - using bp as the scale length here. try: cpeaks = bpeaks.peaks[name] except: cpeaks = [] body_regions = [] last_pos = 0 for p in cpeaks: body_regions.append((p[0] - last_pos, white)) # outside regions body_regions.append((p[1] - p[0], black)) # enriched regions last_pos = p[1] assert p[1] < length body_regions.append((length - last_pos, white)) # last part for b, c in body_regions: body = BasicChromosome.ChromosomeSegment() body.fill_color = c body.scale = b cur_chromosome.add(body) #Add a closing telomere end = BasicChromosome.TelomereSegment(inverted=True) end.scale = 0.05 * max_length end.fill_color = gray cur_chromosome.add(end) #This chromosome is done chr_diagram.add(cur_chromosome) chr_diagram.draw(argv[5], "%s regions in Caenorhabditis elegans" % (var_column))