Ejemplo n.º 1
0
def read_motif2 (motif_fhd,species,cutoff=0):
    """Read motif scan result, and return a TabIO.FWTrackI object
    containing the motif locations.

    * If the motif scan data file is not big, use this function to
      load the whole file into memory. It may be faster than
      read_motif().

    motif_fhd : a file handler for binary motif scan result
    species   : must be "mm8" for mouse or "hg18" for human
    cutoff    : cutoff for the motif scan score
    """
    motif_range_list = FWTrackI(fw=0)
    if species == "hg18":
        chromosomes_fp = {                     # store start and number of file-pos for every chromosome in bin file
            "chr1":[0,0],"chr2":[0,0],"chr3":[0,0],
            "chr4":[0,0],"chr5":[0,0],"chr6":[0,0],
            "chr7":[0,0],"chr8":[0,0],"chr9":[0,0],
            "chr10":[0,0],"chr11":[0,0],"chr12":[0,0],
            "chr13":[0,0],"chr14":[0,0],"chr15":[0,0],
            "chr16":[0,0],"chr17":[0,0],"chr18":[0,0],
            "chr19":[0,0],"chr20":[0,0],"chr21":[0,0],
            "chr22":[0,0],"chrX":[0,0],"chrY":[0,0]
            }
        chromosomes = ["chr1","chr2","chr3","chr4","chr5","chr6",
                       "chr7","chr8","chr9","chr10","chr11","chr12",
                       "chr13","chr14","chr15","chr16","chr17","chr18",
                       "chr19","chr20","chr21","chr22","chrX","chrY"]
    elif species == "mm8":
        chromosomes_fp = {                     # store start and number of file-pos for every chromosome in bin file
            "chr1":[0,0],"chr2":[0,0],"chr3":[0,0],
            "chr4":[0,0],"chr5":[0,0],"chr6":[0,0],
            "chr7":[0,0],"chr8":[0,0],"chr9":[0,0],
            "chr10":[0,0],"chr11":[0,0],"chr12":[0,0],
            "chr13":[0,0],"chr14":[0,0],"chr15":[0,0],
            "chr16":[0,0],"chr17":[0,0],"chr18":[0,0],
            "chr19":[0,0],"chrX":[0,0],"chrY":[0,0]
            }
        chromosomes = ["chr1","chr2","chr3","chr4","chr5","chr6",
                       "chr7","chr8","chr9","chr10","chr11","chr12",
                       "chr13","chr14","chr15","chr16","chr17","chr18",
                       "chr19","chrX","chrY"]
    else:
        raise Exception("Only hg18/mm8 supported!")
        
    motif_fhd.seek(0)
    data = motif_fhd.read()
    # unpack the start pos
    p = 0
    for chromosome in chromosomes:
        chromosomes_fp[chromosome][0] = upk("<i",data[p:p+4])[0]
        p += 128

    # calculate number of hits
    total_motif_hits = 0
    for i in range(len(chromosomes)-1):
        mh = (chromosomes_fp[chromosomes[i+1]][0]-chromosomes_fp[chromosomes[i]][0])/8
        chromosomes_fp[chromosomes[i]][1] = mh
        total_motif_hits += mh
    # last one
    mh = (len(data)-chromosomes_fp[chromosomes[-1]][0])/8
    chromosomes_fp[chromosomes[-1]][1]=mh
    total_motif_hits += mh

    # read and write
    read_motif_hits = 0
    portion = 0
    p = 0

    n=0
    for chromosome in chromosomes:
        p = chromosomes_fp[chromosome][0]
        for i in range(chromosomes_fp[chromosome][1]):
            read_motif_hits += 1
            portion = float(read_motif_hits)/total_motif_hits
            if LOG:
                sys.stdout.write("\r  %.1f%% %s" % (portion*100,"#"*int(portion*50)))
                sys.stdout.flush()
            loc = upk("<i",data[p:p+4])[0]
            score = upk("<f",data[p+4:p+8])[0]
            p += 8
            if score < 0:
               strand = -1
               score = score*-1
            else:
               strand = 1
            #ofhd.write("%s\t%d\t%d\t%s_%s_%d\t%.2f\t%s\n" % (chromosome,loc-1,loc+motif_len-1,motif,chromosome,i,score,strand))
            if score > cutoff:
		#print score,cutoff
                n+=1
                motif_range_list.add_range(chromosome,RangeI(start=loc-1,end=loc,strand=strand))
            #print loc-1
    if LOG : sys.stdout.write("\n")
    data = None
    motif_range_list.merge_overlap()
    #print n
    return motif_range_list
Ejemplo n.º 2
0
def read_motif2(motif_fhd, species, cutoff=0):
    """Read motif scan result, and return a TabIO.FWTrackI object
    containing the motif locations.

    * If the motif scan data file is not big, use this function to
      load the whole file into memory. It may be faster than
      read_motif().

    motif_fhd : a file handler for binary motif scan result
    species   : must be "mm8" for mouse or "hg18" for human
    cutoff    : cutoff for the motif scan score
    """
    motif_range_list = FWTrackI(fw=0)
    if species == "hg18":
        chromosomes_fp = {  # store start and number of file-pos for every chromosome in bin file
            "chr1": [0, 0],
            "chr2": [0, 0],
            "chr3": [0, 0],
            "chr4": [0, 0],
            "chr5": [0, 0],
            "chr6": [0, 0],
            "chr7": [0, 0],
            "chr8": [0, 0],
            "chr9": [0, 0],
            "chr10": [0, 0],
            "chr11": [0, 0],
            "chr12": [0, 0],
            "chr13": [0, 0],
            "chr14": [0, 0],
            "chr15": [0, 0],
            "chr16": [0, 0],
            "chr17": [0, 0],
            "chr18": [0, 0],
            "chr19": [0, 0],
            "chr20": [0, 0],
            "chr21": [0, 0],
            "chr22": [0, 0],
            "chrX": [0, 0],
            "chrY": [0, 0]
        }
        chromosomes = [
            "chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8",
            "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15",
            "chr16", "chr17", "chr18", "chr19", "chr20", "chr21", "chr22",
            "chrX", "chrY"
        ]
    elif species == "mm8":
        chromosomes_fp = {  # store start and number of file-pos for every chromosome in bin file
            "chr1": [0, 0],
            "chr2": [0, 0],
            "chr3": [0, 0],
            "chr4": [0, 0],
            "chr5": [0, 0],
            "chr6": [0, 0],
            "chr7": [0, 0],
            "chr8": [0, 0],
            "chr9": [0, 0],
            "chr10": [0, 0],
            "chr11": [0, 0],
            "chr12": [0, 0],
            "chr13": [0, 0],
            "chr14": [0, 0],
            "chr15": [0, 0],
            "chr16": [0, 0],
            "chr17": [0, 0],
            "chr18": [0, 0],
            "chr19": [0, 0],
            "chrX": [0, 0],
            "chrY": [0, 0]
        }
        chromosomes = [
            "chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8",
            "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15",
            "chr16", "chr17", "chr18", "chr19", "chrX", "chrY"
        ]
    else:
        raise Exception("Only hg18/mm8 supported!")

    motif_fhd.seek(0)
    data = motif_fhd.read()
    # unpack the start pos
    p = 0
    for chromosome in chromosomes:
        chromosomes_fp[chromosome][0] = upk("<i", data[p:p + 4])[0]
        p += 128

    # calculate number of hits
    total_motif_hits = 0
    for i in range(len(chromosomes) - 1):
        mh = (chromosomes_fp[chromosomes[i + 1]][0] -
              chromosomes_fp[chromosomes[i]][0]) / 8
        chromosomes_fp[chromosomes[i]][1] = mh
        total_motif_hits += mh
    # last one
    mh = (len(data) - chromosomes_fp[chromosomes[-1]][0]) / 8
    chromosomes_fp[chromosomes[-1]][1] = mh
    total_motif_hits += mh

    # read and write
    read_motif_hits = 0
    portion = 0
    p = 0

    n = 0
    for chromosome in chromosomes:
        p = chromosomes_fp[chromosome][0]
        for i in range(chromosomes_fp[chromosome][1]):
            read_motif_hits += 1
            portion = float(read_motif_hits) / total_motif_hits
            if LOG:
                sys.stdout.write("\r  %.1f%% %s" %
                                 (portion * 100, "#" * int(portion * 50)))
                sys.stdout.flush()
            loc = upk("<i", data[p:p + 4])[0]
            score = upk("<f", data[p + 4:p + 8])[0]
            p += 8
            if score < 0:
                strand = -1
                score = score * -1
            else:
                strand = 1
            #ofhd.write("%s\t%d\t%d\t%s_%s_%d\t%.2f\t%s\n" % (chromosome,loc-1,loc+motif_len-1,motif,chromosome,i,score,strand))
            if score > cutoff:
                #print score,cutoff
                n += 1
                motif_range_list.add_range(
                    chromosome, RangeI(start=loc - 1, end=loc, strand=strand))
            #print loc-1
    if LOG: sys.stdout.write("\n")
    data = None
    motif_range_list.merge_overlap()
    #print n
    return motif_range_list
Ejemplo n.º 3
0
def read_motif (motif_fhd,species,cutoff=0):
    """Read motif scan result, and return a TabIO.FWTrackI object
    containing the motif locations.

    motif_fhd : a file handler for binary motif scan result
    species   : must be "mm8" for mouse or "hg18" for human
    cutoff    : cutoff for the motif scan score
    """
    motif_range_list = FWTrackI(fw=0)
    if species == "hg18":
        chromosomes_fp = {                     # store start and number of file-pos for every chromosome in bin file
            "chr1":[0,0],"chr2":[0,0],"chr3":[0,0],
            "chr4":[0,0],"chr5":[0,0],"chr6":[0,0],
            "chr7":[0,0],"chr8":[0,0],"chr9":[0,0],
            "chr10":[0,0],"chr11":[0,0],"chr12":[0,0],
            "chr13":[0,0],"chr14":[0,0],"chr15":[0,0],
            "chr16":[0,0],"chr17":[0,0],"chr18":[0,0],
            "chr19":[0,0],"chr20":[0,0],"chr21":[0,0],
            "chr22":[0,0],"chrX":[0,0],"chrY":[0,0]
            }
    elif species == "mm8":
        chromosomes_fp = {                     # store start and number of file-pos for every chromosome in bin file
            "chr1":[0,0],"chr2":[0,0],"chr3":[0,0],
            "chr4":[0,0],"chr5":[0,0],"chr6":[0,0],
            "chr7":[0,0],"chr8":[0,0],"chr9":[0,0],
            "chr10":[0,0],"chr11":[0,0],"chr12":[0,0],
            "chr13":[0,0],"chr14":[0,0],"chr15":[0,0],
            "chr16":[0,0],"chr17":[0,0],"chr18":[0,0],
            "chr19":[0,0],"chrX":[0,0],"chrY":[0,0]
            }
    else:
        raise Exception("Only hg18/mm8 supported!")
        
    chromosomes = chromosomes_fp.keys()
    motif_fhd.seek(0)
    # unpack the start pos
    for chromosome in chromosomes:
        chromosomes_fp[chromosome][0] = upk("<i",motif_fhd.read(4))[0]
        motif_fhd.seek(124,1)
    motif_fhd.seek(0,2)
    
    # calculate number of hits
    total_motif_hits = 0
    for i in range(len(chromosomes)-1):
        mh = (chromosomes_fp[chromosomes[i+1]][0]-chromosomes_fp[chromosomes[i]][0])/8
        chromosomes_fp[chromosomes[i]][1] = mh
        total_motif_hits += mh
    # last one
    mh = (motif_fhd.tell()-chromosomes_fp[chromosomes[-1]][0])/8
    chromosomes_fp[chromosomes[-1]][1]=mh
    total_motif_hits += mh

    # read and write
    read_motif_hits = 0
    portion = 0
    for chromosome in chromosomes:
        motif_fhd.seek(chromosomes_fp[chromosome][0],0)
        for i in range(chromosomes_fp[chromosome][1]):
            read_motif_hits += 1
            portion = float(read_motif_hits)/total_motif_hits
            if LOG:
                sys.stdout.write("\r%.1f%% %s" % (portion*100,"#"*int(portion*50)))
                sys.stdout.flush()
            loc = upk("<i",motif_fhd.read(4))[0]
            score = upk("<f",motif_fhd.read(4))[0]
            motif_fhd.read(4)
            if score < 0:
               strand = -1
               score = score*-1
            else:
               strand = 1
            #ofhd.write("%s\t%d\t%d\t%s_%s_%d\t%.2f\t%s\n" % (chromosome,loc-1,loc+motif_len-1,motif,chromosome,i,score,strand))
            if score > cutoff:
		#print score,cutoff
                motif_range_list.add_range(chromosome,RangeI(start=loc-1,end=loc,strand=strand))
            #print loc-1
    #sys.stdout.write("\n")
    motif_range_list.merge_overlap()
    return motif_range_list
Ejemplo n.º 4
0
def read_motif(motif_fhd, species, cutoff=0):
    """Read motif scan result, and return a TabIO.FWTrackI object
    containing the motif locations.

    motif_fhd : a file handler for binary motif scan result
    species   : must be "mm8" for mouse or "hg18" for human
    cutoff    : cutoff for the motif scan score
    """
    motif_range_list = FWTrackI(fw=0)
    if species == "hg18":
        chromosomes_fp = {  # store start and number of file-pos for every chromosome in bin file
            "chr1": [0, 0],
            "chr2": [0, 0],
            "chr3": [0, 0],
            "chr4": [0, 0],
            "chr5": [0, 0],
            "chr6": [0, 0],
            "chr7": [0, 0],
            "chr8": [0, 0],
            "chr9": [0, 0],
            "chr10": [0, 0],
            "chr11": [0, 0],
            "chr12": [0, 0],
            "chr13": [0, 0],
            "chr14": [0, 0],
            "chr15": [0, 0],
            "chr16": [0, 0],
            "chr17": [0, 0],
            "chr18": [0, 0],
            "chr19": [0, 0],
            "chr20": [0, 0],
            "chr21": [0, 0],
            "chr22": [0, 0],
            "chrX": [0, 0],
            "chrY": [0, 0]
        }
    elif species == "mm8":
        chromosomes_fp = {  # store start and number of file-pos for every chromosome in bin file
            "chr1": [0, 0],
            "chr2": [0, 0],
            "chr3": [0, 0],
            "chr4": [0, 0],
            "chr5": [0, 0],
            "chr6": [0, 0],
            "chr7": [0, 0],
            "chr8": [0, 0],
            "chr9": [0, 0],
            "chr10": [0, 0],
            "chr11": [0, 0],
            "chr12": [0, 0],
            "chr13": [0, 0],
            "chr14": [0, 0],
            "chr15": [0, 0],
            "chr16": [0, 0],
            "chr17": [0, 0],
            "chr18": [0, 0],
            "chr19": [0, 0],
            "chrX": [0, 0],
            "chrY": [0, 0]
        }
    else:
        raise Exception("Only hg18/mm8 supported!")

    chromosomes = chromosomes_fp.keys()
    motif_fhd.seek(0)
    # unpack the start pos
    for chromosome in chromosomes:
        chromosomes_fp[chromosome][0] = upk("<i", motif_fhd.read(4))[0]
        motif_fhd.seek(124, 1)
    motif_fhd.seek(0, 2)

    # calculate number of hits
    total_motif_hits = 0
    for i in range(len(chromosomes) - 1):
        mh = (chromosomes_fp[chromosomes[i + 1]][0] -
              chromosomes_fp[chromosomes[i]][0]) / 8
        chromosomes_fp[chromosomes[i]][1] = mh
        total_motif_hits += mh
    # last one
    mh = (motif_fhd.tell() - chromosomes_fp[chromosomes[-1]][0]) / 8
    chromosomes_fp[chromosomes[-1]][1] = mh
    total_motif_hits += mh

    # read and write
    read_motif_hits = 0
    portion = 0
    for chromosome in chromosomes:
        motif_fhd.seek(chromosomes_fp[chromosome][0], 0)
        for i in range(chromosomes_fp[chromosome][1]):
            read_motif_hits += 1
            portion = float(read_motif_hits) / total_motif_hits
            if LOG:
                sys.stdout.write("\r%.1f%% %s" %
                                 (portion * 100, "#" * int(portion * 50)))
                sys.stdout.flush()
            loc = upk("<i", motif_fhd.read(4))[0]
            score = upk("<f", motif_fhd.read(4))[0]
            motif_fhd.read(4)
            if score < 0:
                strand = -1
                score = score * -1
            else:
                strand = 1
            #ofhd.write("%s\t%d\t%d\t%s_%s_%d\t%.2f\t%s\n" % (chromosome,loc-1,loc+motif_len-1,motif,chromosome,i,score,strand))
            if score > cutoff:
                #print score,cutoff
                motif_range_list.add_range(
                    chromosome, RangeI(start=loc - 1, end=loc, strand=strand))
            #print loc-1
    #sys.stdout.write("\n")
    motif_range_list.merge_overlap()
    return motif_range_list