コード例 #1
0
ファイル: ce_histone_matrix.py プロジェクト: taoliu/taolib
def call1(cvsfile, write_func, *args):
    """Call enrich regions from certain column
    """
    argv = args[0]
    if len(argv) < 5:
        sys.stderr.write(
            "Need 5 extra arguments for 'call', options <loc column> <score column> <cutoff> <min length> <max gap>\ne.g. command: <0> <1> <0.5> <10000> <2000>, means to use the first column as genome coordinations to call enriched regions from the second column, the threshold to call enriched region is 0.5, the minimum length of region is 10k, and the maximum gap to link two nearby regions is 2k.\n"
        )
        sys.exit()
    cor_column = cvsfile.fieldnames[int(argv[0])]
    var_column = cvsfile.fieldnames[int(argv[1])]
    cutoff = float(argv[2])
    min_len = int(argv[3])
    max_gap = int(argv[4])
    wtrack = WigTrackI()
    add_func = wtrack.add_loc
    for l in cvsfile:
        cor = l.setdefault(cor_column, None)
        var = l.setdefault(var_column, None)
        if cor and var and cor != "NA" and var != "NA":
            (chrom, start, end) = cor.split(".")
            add_func(chrom, int(start), float(var))
    wtrack.span = int(end) - int(start)
    write_func("# regions called from %s:%s\n" % (argv[1], var_column))

    bpeaks = wtrack.call_peaks(cutoff=cutoff, min_length=min_len, max_gap=max_gap)
    write_func(bpeaks.tobed())
コード例 #2
0
ファイル: ce_histone_matrix.py プロジェクト: pineda-vv/taolib
def call1(cvsfile, write_func, *args):
    """Call enrich regions from certain column
    """
    argv = args[0]
    if len(argv) < 5:
        sys.stderr.write(
            "Need 5 extra arguments for 'call', options <loc column> <score column> <cutoff> <min length> <max gap>\ne.g. command: <0> <1> <0.5> <10000> <2000>, means to use the first column as genome coordinations to call enriched regions from the second column, the threshold to call enriched region is 0.5, the minimum length of region is 10k, and the maximum gap to link two nearby regions is 2k.\n"
        )
        sys.exit()
    cor_column = cvsfile.fieldnames[int(argv[0])]
    var_column = cvsfile.fieldnames[int(argv[1])]
    cutoff = float(argv[2])
    min_len = int(argv[3])
    max_gap = int(argv[4])
    wtrack = WigTrackI()
    add_func = wtrack.add_loc
    for l in cvsfile:
        cor = l.setdefault(cor_column, None)
        var = l.setdefault(var_column, None)
        if cor and var and cor != "NA" and var != "NA":
            (chrom, start, end) = cor.split(".")
            add_func(chrom, int(start), float(var))
    wtrack.span = int(end) - int(start)
    write_func("# regions called from %s:%s\n" % (argv[1], var_column))

    bpeaks = wtrack.call_peaks(cutoff=cutoff,
                               min_length=min_len,
                               max_gap=max_gap)
    write_func(bpeaks.tobed())
コード例 #3
0
ファイル: wig_extract_chrom.py プロジェクト: pineda-vv/taolib
def main():
    if len(sys.argv) < 4:
        sys.stderr.write(
            "Extract data for certain chromosome from a wiggle file.\n")
        sys.stderr.write("need 3 paras: %s <chr> <wig> <newwig>\n" %
                         os.path.basename(sys.argv[0]))
        sys.exit(1)

    chrom = sys.argv[1]
    wigfhd = open(sys.argv[2])
    wigtrack = WiggleIO.WiggleIO(wigfhd).build_wigtrack()
    wig_chr = wigtrack.get_data_by_chr(chrom)
    if not wig_chr:
        sys.stderr.write("No data for chromosome %s!\n" % chrom)
        sys.exit(1)

    newwigtrack = WigTrackI()
    newwigtrack.span = wigtrack.span

    (wig_chr_p, wig_chr_s) = wig_chr

    for i in range(len(wig_chr_p)):
        newwigtrack.add_loc(chrom, wig_chr_p[i], wig_chr_s[i])

    newwigfile = sys.argv[3]
    newwigfhd = open(newwigfile, "w")
    newwigtrack.write_wig(newwigfhd, name="for chromosome %s" % chrom)
    newwigfhd.close()
コード例 #4
0
def main():
    if len(sys.argv) < 3:
        sys.stderr.write(
            "Extract data for all chromosomes from a wiggle file.\n")
        sys.stderr.write("need 2 paras: %s <wig> <output_prefix>\n" %
                         os.path.basename(sys.argv[0]))
        sys.exit(1)

    wigfhd = open(sys.argv[1])
    prefix = sys.argv[2]

    wigtrack = WiggleIO.WiggleIO(wigfhd).build_wigtrack()
    wigtrack.sort()

    for chrom in wigtrack.get_chr_names():
        wig_chr = wigtrack.get_data_by_chr(chrom)
        newwigtrack = WigTrackI()
        newwigtrack.span = wigtrack.span

        (wig_chr_p, wig_chr_s) = wig_chr

        for i in range(len(wig_chr_p)):
            newwigtrack.add_loc(chrom, wig_chr_p[i], wig_chr_s[i])

        newwigfile = sys.argv[2] + "." + chrom + ".wig"
        newwigfhd = open(newwigfile, "w")
        newwigtrack.write_wig(newwigfhd, name="for chromosome %s" % chrom)
        newwigfhd.close()
コード例 #5
0
ファイル: wig_split.py プロジェクト: ScottTaing/taolib
def main():
    if len(sys.argv) < 3:
        sys.stderr.write("Extract data for all chromosomes from a wiggle file.\n")
        sys.stderr.write("need 2 paras: %s <wig> <output_prefix>\n" % os.path.basename(sys.argv[0]))
        sys.exit(1)

    wigfhd = open(sys.argv[1])
    prefix = sys.argv[2]
    
    wigtrack = WiggleIO.WiggleIO(wigfhd).build_wigtrack()
    wigtrack.sort()
    
    for chrom in wigtrack.get_chr_names():
        wig_chr = wigtrack.get_data_by_chr(chrom)
        newwigtrack = WigTrackI()
        newwigtrack.span = wigtrack.span
	
        (wig_chr_p,wig_chr_s) = wig_chr

        for i in range(len(wig_chr_p)):
            newwigtrack.add_loc(chrom,wig_chr_p[i],wig_chr_s[i])

        newwigfile = sys.argv[2]+"."+chrom+".wig"
        newwigfhd = open(newwigfile,"w")
        newwigtrack.write_wig(newwigfhd,name="for chromosome %s" % chrom)
        newwigfhd.close()
コード例 #6
0
def main():
    if len(sys.argv) < 4:
        sys.stderr.write("Extract data for certain chromosome from a wiggle file.\n")
        sys.stderr.write("need 3 paras: %s <chr> <wig> <newwig>\n" % os.path.basename(sys.argv[0]))
        sys.exit(1)

    chrom = sys.argv[1]
    wigfhd = open(sys.argv[2])
    wigtrack = WiggleIO.WiggleIO(wigfhd).build_wigtrack()
    wig_chr = wigtrack.get_data_by_chr(chrom)
    if not wig_chr:
        sys.stderr.write("No data for chromosome %s!\n" % chrom)
        sys.exit(1)

    newwigtrack = WigTrackI()
    newwigtrack.span = wigtrack.span

    (wig_chr_p,wig_chr_s) = wig_chr

    for i in range(len(wig_chr_p)):
        newwigtrack.add_loc(chrom,wig_chr_p[i],wig_chr_s[i])

    newwigfile = sys.argv[3]
    newwigfhd = open(newwigfile,"w")
    newwigtrack.write_wig(newwigfhd,name="for chromosome %s" % chrom)
    newwigfhd.close()
コード例 #7
0
ファイル: ce_histone_matrix.py プロジェクト: taoliu/taolib
def combcall2draw(cvsfile, write_func, *args):
    """User specifies several columns to consider, this tool will call
    regions where either of the column is above its threshold.
    
    """
    argv = args[0]
    if len(argv) < 6:
        sys.stderr.write(
            "Need 6 extra arguments for 'combcall2draw', options <loc column> <score column1[,score column2,...]> <cutoff1[,cutoff2,cutoff3]> <min length> <max gap> <pdf filename>\ne.g. command: <0> <1,2,3> <0.5,0.6,0.7> <10000> <2000> <a.pdf>, means to use the first column as genome coordinations to call enriched regions from the combinition of #1, #2 and #3, the thresholds to call enriched region are 0.5 for column 1, 0.6 for column 2 and 0.7 for column 3, the minimum length of region is 10k, and the maximum gap to link two nearby regions is 2k. Then the figure will be saved in a.pdf.\n"
        )
        sys.exit()
    cor_column = cvsfile.fieldnames[int(argv[0])]
    var_columns = map(lambda x: cvsfile.fieldnames[int(x)], argv[1].split(","))
    cutoffs = map(float, argv[2].split(","))

    min_len = int(argv[3])
    max_gap = int(argv[4])
    wtrack = WigTrackI()  # combined track containing 1 if either of track is above cutoff
    add_func = wtrack.add_loc

    for l in cvsfile:
        cor = l.setdefault(cor_column, None)
        if not cor or cor == "NA":
            continue

        for i in range(len(var_columns)):
            var_column = var_columns[i]
            cutoff = cutoffs[i]
            var = l.setdefault(var_column, None)
            if var and var != "NA" and float(var) > cutoff:
                (chrom, start, end) = cor.split(".")
                add_func(chrom, int(start), 1.1)
                break

    wtrack.span = int(end) - int(start)
    bpeaks = wtrack.call_peaks(cutoff=1.0, min_length=min_len, max_gap=max_gap)
    # f = argv[5]
    fhd = open(argv[5].replace("pdf", "bed"), "w")
    fhd.write(bpeaks.tobed())

    from Bio.Graphics import BasicChromosome
    from reportlab.lib.colors import gray, black, white

    entries = [
        ("chrI", 15072419),
        ("chrII", 15279316),
        ("chrIII", 13783681),
        ("chrIV", 17493784),
        ("chrV", 20919398),
        ("chrX", 17718852),
    ]
    max_length = max([x[1] for x in entries])
    chr_diagram = BasicChromosome.Organism()
    for name, length in entries:
        cur_chromosome = BasicChromosome.Chromosome(name)
        # Set the length, adding and extra 20 percent for the tolomeres:
        cur_chromosome.scale_num = max_length * 1.1
        # Add an opening telomere
        start = BasicChromosome.TelomereSegment()
        start.scale = 0.05 * max_length
        start.fill_color = gray
        cur_chromosome.add(start)
        # Add a body - using bp as the scale length here.
        try:
            cpeaks = bpeaks.peaks[name]
        except:
            cpeaks = []
        body_regions = []
        last_pos = 0
        for p in cpeaks:
            body_regions.append((p[0] - last_pos, white))  # outside regions
            body_regions.append((p[1] - p[0], black))  # enriched regions
            last_pos = p[1]
            assert p[1] < length
        body_regions.append((length - last_pos, white))  # last part

        for b, c in body_regions:
            body = BasicChromosome.ChromosomeSegment()
            body.fill_color = c
            body.scale = b
            cur_chromosome.add(body)

        # Add a closing telomere
        end = BasicChromosome.TelomereSegment(inverted=True)
        end.scale = 0.05 * max_length
        end.fill_color = gray
        cur_chromosome.add(end)
        # This chromosome is done
        chr_diagram.add(cur_chromosome)

    chr_diagram.draw(argv[5], "Highlight regions in Caenorhabditis elegans")
コード例 #8
0
ファイル: ce_histone_matrix.py プロジェクト: taoliu/taolib
def call1draw(cvsfile, write_func, *args):
    """Call regions, then plot it in chromosome figure.

    A combination of drawchrom and call1
    
    """
    argv = args[0]
    if len(argv) < 6:
        sys.stderr.write(
            "Need 6 extra arguments for 'call1draw', options <loc column> <score column> <cutoff> <min length> <max gap> <pdf filename>\ne.g. command: <0> <1> <0.5> <10000> <2000> <a.pdf>, means to use the first column as genome coordinations to call enriched regions from the second column, the threshold to call enriched region is 0.5, the minimum length of region is 10k, and the maximum gap to link two nearby regions is 2k. Then the figure will be saved in a.pdf.\n"
        )
        sys.exit()
    cor_column = cvsfile.fieldnames[int(argv[0])]
    var_column = cvsfile.fieldnames[int(argv[1])]
    cutoff = float(argv[2])
    min_len = int(argv[3])
    max_gap = int(argv[4])
    wtrack = WigTrackI()
    add_func = wtrack.add_loc
    for l in cvsfile:
        cor = l.setdefault(cor_column, None)
        var = l.setdefault(var_column, None)
        if cor and var and cor != "NA" and var != "NA":
            (chrom, start, end) = cor.split(".")
            add_func(chrom, int(start), float(var))
    wtrack.span = int(end) - int(start)
    bpeaks = wtrack.call_peaks(cutoff=cutoff, min_length=min_len, max_gap=max_gap)
    fhd = open(argv[5].replace("pdf", "bed"), "w")
    fhd.write(bpeaks.tobed())

    from Bio.Graphics import BasicChromosome
    from reportlab.lib.colors import gray, black, white

    entries = [
        ("chrI", 15072419),
        ("chrII", 15279316),
        ("chrIII", 13783681),
        ("chrIV", 17493784),
        ("chrV", 20919398),
        ("chrX", 17718852),
    ]
    max_length = max([x[1] for x in entries])
    chr_diagram = BasicChromosome.Organism()
    for name, length in entries:
        cur_chromosome = BasicChromosome.Chromosome(name)
        # Set the length, adding and extra 20 percent for the tolomeres:
        cur_chromosome.scale_num = max_length * 1.1
        # Add an opening telomere
        start = BasicChromosome.TelomereSegment()
        start.scale = 0.05 * max_length
        start.fill_color = gray
        cur_chromosome.add(start)
        # Add a body - using bp as the scale length here.
        try:
            cpeaks = bpeaks.peaks[name]
        except:
            cpeaks = []
        body_regions = []
        last_pos = 0
        for p in cpeaks:
            body_regions.append((p[0] - last_pos, white))  # outside regions
            body_regions.append((p[1] - p[0], black))  # enriched regions
            last_pos = p[1]
            assert p[1] < length
        body_regions.append((length - last_pos, white))  # last part

        for b, c in body_regions:
            body = BasicChromosome.ChromosomeSegment()
            body.fill_color = c
            body.scale = b
            cur_chromosome.add(body)

        # Add a closing telomere
        end = BasicChromosome.TelomereSegment(inverted=True)
        end.scale = 0.05 * max_length
        end.fill_color = gray
        cur_chromosome.add(end)
        # This chromosome is done
        chr_diagram.add(cur_chromosome)

    chr_diagram.draw(argv[5], "%s regions in Caenorhabditis elegans" % (var_column))
コード例 #9
0
ファイル: WiggleIO.py プロジェクト: pineda-vv/taolib
    def build_wigtrack (self):
        """Use this function to return a WigTrackI.

        """
        data = WigTrackI()
        add_func = data.add_loc
        chrom = "Unknown"
        span = 0
        pos_fixed = 0      # pos for fixedStep data 0: variableStep, 1: fixedStep
        for i in self.fhd:
            if i.startswith("track"):
                continue
            elif i.startswith("#"):
                continue
            elif i.startswith("browse"):
                continue
            elif i.startswith("variableStep"): # define line
                pos_fixed = 0
                chromi = i.rfind("chrom=")  # where the 'chrom=' is
                spani = i.rfind("span=")   # where the 'span=' is
                if chromi != -1:
                    chrom = i[chromi+6:].strip().split()[0]
                else:
                    chrom = "Unknown"
                if spani != -1:
                    span = int(i[spani+5:].strip().split()[0])
                else:
                    span = 0
            elif i.startswith("fixedStep"):
                chromi = i.rfind("chrom=")  # where the 'chrom=' is
                starti = i.rfind("start=")  # where the 'chrom=' is
                stepi = i.rfind("step=")  # where the 'chrom=' is
                spani = i.rfind("span=")   # where the 'span=' is
                if chromi != -1:
                    chrom = i[chromi+6:].strip().split()[0]
                else:
                    raise Exception("fixedStep line must define chrom=XX")
                if spani != -1:
                    span = int(i[spani+5:].strip().split()[0])
                else:
                    span = 0
                if starti != -1:
                    pos_fixed = int(i[starti+6:].strip().split()[0])
                    if pos_fixed < 1:
                        raise Exception("fixedStep start must be bigger than 0!")
                else:
                    raise Exception("fixedStep line must define start=XX")
                if stepi != -1:
                    step = int(i[stepi+5:].strip().split()[0])
                else:
                    raise Exception("fixedStep line must define step=XX!")
            else:                       # read data value
                if pos_fixed:           # fixedStep
                    value = i.strip()
                    add_func(chrom,int(pos_fixed),float(value))
                    pos_fixed += step
                else:                   # variableStep
                    try:
                        (pos,value) = i.split()
                    except ValueError:
                        print i,pos_fixed
                    add_func(chrom,int(pos),float(value))
        data.span = span
        self.fhd.seek(0)
        return data
コード例 #10
0
ファイル: ce_histone_matrix.py プロジェクト: pineda-vv/taolib
def combcall2draw(cvsfile, write_func, *args):
    """User specifies several columns to consider, this tool will call
    regions where either of the column is above its threshold.
    
    """
    argv = args[0]
    if len(argv) < 6:
        sys.stderr.write(
            "Need 6 extra arguments for 'combcall2draw', options <loc column> <score column1[,score column2,...]> <cutoff1[,cutoff2,cutoff3]> <min length> <max gap> <pdf filename>\ne.g. command: <0> <1,2,3> <0.5,0.6,0.7> <10000> <2000> <a.pdf>, means to use the first column as genome coordinations to call enriched regions from the combinition of #1, #2 and #3, the thresholds to call enriched region are 0.5 for column 1, 0.6 for column 2 and 0.7 for column 3, the minimum length of region is 10k, and the maximum gap to link two nearby regions is 2k. Then the figure will be saved in a.pdf.\n"
        )
        sys.exit()
    cor_column = cvsfile.fieldnames[int(argv[0])]
    var_columns = map(lambda x: cvsfile.fieldnames[int(x)], argv[1].split(","))
    cutoffs = map(float, argv[2].split(","))

    min_len = int(argv[3])
    max_gap = int(argv[4])
    wtrack = WigTrackI(
    )  # combined track containing 1 if either of track is above cutoff
    add_func = wtrack.add_loc

    for l in cvsfile:
        cor = l.setdefault(cor_column, None)
        if not cor or cor == "NA":
            continue

        for i in range(len(var_columns)):
            var_column = var_columns[i]
            cutoff = cutoffs[i]
            var = l.setdefault(var_column, None)
            if var and var != "NA" and float(var) > cutoff:
                (chrom, start, end) = cor.split(".")
                add_func(chrom, int(start), 1.1)
                break

    wtrack.span = int(end) - int(start)
    bpeaks = wtrack.call_peaks(cutoff=1.0, min_length=min_len, max_gap=max_gap)
    #f = argv[5]
    fhd = open(argv[5].replace("pdf", "bed"), "w")
    fhd.write(bpeaks.tobed())

    from Bio.Graphics import BasicChromosome
    from reportlab.lib.colors import gray, black, white
    entries = [("chrI", 15072419), ("chrII", 15279316), ("chrIII", 13783681),
               ("chrIV", 17493784), ("chrV", 20919398), ("chrX", 17718852)]
    max_length = max([x[1] for x in entries])
    chr_diagram = BasicChromosome.Organism()
    for name, length in entries:
        cur_chromosome = BasicChromosome.Chromosome(name)
        #Set the length, adding and extra 20 percent for the tolomeres:
        cur_chromosome.scale_num = max_length * 1.1
        # Add an opening telomere
        start = BasicChromosome.TelomereSegment()
        start.scale = 0.05 * max_length
        start.fill_color = gray
        cur_chromosome.add(start)
        #Add a body - using bp as the scale length here.
        try:
            cpeaks = bpeaks.peaks[name]
        except:
            cpeaks = []
        body_regions = []
        last_pos = 0
        for p in cpeaks:
            body_regions.append((p[0] - last_pos, white))  # outside regions
            body_regions.append((p[1] - p[0], black))  # enriched regions
            last_pos = p[1]
            assert p[1] < length
        body_regions.append((length - last_pos, white))  # last part

        for b, c in body_regions:
            body = BasicChromosome.ChromosomeSegment()
            body.fill_color = c
            body.scale = b
            cur_chromosome.add(body)

        #Add a closing telomere
        end = BasicChromosome.TelomereSegment(inverted=True)
        end.scale = 0.05 * max_length
        end.fill_color = gray
        cur_chromosome.add(end)
        #This chromosome is done
        chr_diagram.add(cur_chromosome)

    chr_diagram.draw(argv[5], "Highlight regions in Caenorhabditis elegans")
コード例 #11
0
ファイル: ce_histone_matrix.py プロジェクト: pineda-vv/taolib
def call1draw(cvsfile, write_func, *args):
    """Call regions, then plot it in chromosome figure.

    A combination of drawchrom and call1
    
    """
    argv = args[0]
    if len(argv) < 6:
        sys.stderr.write(
            "Need 6 extra arguments for 'call1draw', options <loc column> <score column> <cutoff> <min length> <max gap> <pdf filename>\ne.g. command: <0> <1> <0.5> <10000> <2000> <a.pdf>, means to use the first column as genome coordinations to call enriched regions from the second column, the threshold to call enriched region is 0.5, the minimum length of region is 10k, and the maximum gap to link two nearby regions is 2k. Then the figure will be saved in a.pdf.\n"
        )
        sys.exit()
    cor_column = cvsfile.fieldnames[int(argv[0])]
    var_column = cvsfile.fieldnames[int(argv[1])]
    cutoff = float(argv[2])
    min_len = int(argv[3])
    max_gap = int(argv[4])
    wtrack = WigTrackI()
    add_func = wtrack.add_loc
    for l in cvsfile:
        cor = l.setdefault(cor_column, None)
        var = l.setdefault(var_column, None)
        if cor and var and cor != "NA" and var != "NA":
            (chrom, start, end) = cor.split(".")
            add_func(chrom, int(start), float(var))
    wtrack.span = int(end) - int(start)
    bpeaks = wtrack.call_peaks(cutoff=cutoff,
                               min_length=min_len,
                               max_gap=max_gap)
    fhd = open(argv[5].replace("pdf", "bed"), "w")
    fhd.write(bpeaks.tobed())

    from Bio.Graphics import BasicChromosome
    from reportlab.lib.colors import gray, black, white
    entries = [("chrI", 15072419), ("chrII", 15279316), ("chrIII", 13783681),
               ("chrIV", 17493784), ("chrV", 20919398), ("chrX", 17718852)]
    max_length = max([x[1] for x in entries])
    chr_diagram = BasicChromosome.Organism()
    for name, length in entries:
        cur_chromosome = BasicChromosome.Chromosome(name)
        #Set the length, adding and extra 20 percent for the tolomeres:
        cur_chromosome.scale_num = max_length * 1.1
        # Add an opening telomere
        start = BasicChromosome.TelomereSegment()
        start.scale = 0.05 * max_length
        start.fill_color = gray
        cur_chromosome.add(start)
        #Add a body - using bp as the scale length here.
        try:
            cpeaks = bpeaks.peaks[name]
        except:
            cpeaks = []
        body_regions = []
        last_pos = 0
        for p in cpeaks:
            body_regions.append((p[0] - last_pos, white))  # outside regions
            body_regions.append((p[1] - p[0], black))  # enriched regions
            last_pos = p[1]
            assert p[1] < length
        body_regions.append((length - last_pos, white))  # last part

        for b, c in body_regions:
            body = BasicChromosome.ChromosomeSegment()
            body.fill_color = c
            body.scale = b
            cur_chromosome.add(body)

        #Add a closing telomere
        end = BasicChromosome.TelomereSegment(inverted=True)
        end.scale = 0.05 * max_length
        end.fill_color = gray
        cur_chromosome.add(end)
        #This chromosome is done
        chr_diagram.add(cur_chromosome)

    chr_diagram.draw(argv[5],
                     "%s regions in Caenorhabditis elegans" % (var_column))