Ejemplo n.º 1
0
	def _calculate_mi(self,P_X,P_Y,P_X_Y):
		MI = 0.0
		for x in (0,1):
			for y in P_Y.keys():
				if P_X_Y[(x,y)] > 0:
					MI += P_X_Y[(x,y)]*(mathlog(P_X_Y[(x,y)],2) - mathlog((P_X[x]*P_Y[y]),2))
		return MI
Ejemplo n.º 2
0
 def _getGain(self, pprime, nprime, p, n):
     """Return gain."""
     if pprime <= 0 or p <= 0:
         return 0
     if p + n <= 0 or pprime + nprime <= 0:
         return 0
     return float(pprime) * (mathlog(float(pprime) / (pprime + nprime), 2) - mathlog(float(p) / (p + n), 2))
Ejemplo n.º 3
0
	def _calculate_cmi(self,P_X,P_Y,P_Z,P_X_Y,P_X_Z,P_Y_Z,P_X_Y_Z):
		CMI =0.0
		for x in (0,1):
			for y in P_Y.keys():
				for z in P_Z.keys():
					if P_X_Y_Z.has_key((x,y,z)):
						if P_X_Y_Z[(x,y,z)]>0:
							CMI += P_X_Y_Z[(x,y,z)]*(mathlog(P_Z[z],2) + mathlog(P_X_Y_Z[(x,y,z)],2) - (mathlog(P_X_Z[(x,z)],2) +mathlog(P_Y_Z[(y,z)],2)))
		return CMI
Ejemplo n.º 4
0
def mtcnn_pnet_l1_toplogic(clock, go, finished,
                     scan_in_go, scan_in_finished, scan_in_data,
                     scan_out_go, scan_out_finished, scan_out_data,
                     n=12, p=3):

    n_pixels = n
    n_filter = p # filter dimension 3 x 3
    n_l1c, n_l1m, r_pixels, r_l1c, r_l1m = computeRanges(n_pixels, n_filter)

    content = [x for x in range(p * p)]

    pixels = [Signal(intbv(3)[8:]) for i in range(r_pixels)]
    midl1c = [Signal(intbv(3)[8:]) for i in range(r_l1c)]
    midl1m = [Signal(intbv(3)[8:]) for i in range(r_l1m)]

    filters = [Signal(intbv(content[i])[8:]) for i in range(n_filter * n_filter)]

    state_in = Signal(bool(False))
    state_in_addr = Signal(intbv(0)[int(mathlog(n*n*2, 2)+1)])

    state_out = Signal(bool(False))
    state_out_addr = Signal(intbv(0)[int(mathlog(n*n*2, 2)+1)])

    hello_inst = PL1Net(clock, go, finished, pixels, midl1c, midl1m,
                        filters, n_pixels, n_filter)

    @always(clock.posedge)
    def scanIn():
        if not state_in:
            if scan_in_go:
                state_in.next = True
                state_in_addr.next = 0
        else:
            pixels[state_in_addr] = scan_in_data
            if state_in_addr >= n * n - 1:
                state_in.next = False
                scan_in_finished.next = True
            state_in_addr.next = state_in_addr + 1

    @always(clock.posedge)
    def scanOut():
        if not state_out:
            if scan_out_go:
                state_out.next = True
                state_out_addr.next = 0
        else:
            scan_out_data.next = midl1c[state_out_addr]
            if state_out_addr >= r_l1c - 1:
                state_out.next = False
                scan_out_finished.next = True
            state_out_addr.next = state_out_addr + 1

    return hello_inst, scanIn, scanOut
Ejemplo n.º 5
0
def poisson_score(t, c):
    p_tmp = poisson_cdf(t, c, lower=False)
    if p_tmp <= 0:
        log_pvalue = 3100
    else:
        log_pvalue = mathlog(p_tmp, 10) * -10
    return log_pvalue
Ejemplo n.º 6
0
def poisson_score ( t, c ):
    p_tmp = poisson_cdf(t,c,lower=False)
    if p_tmp <= 0:
        log_pvalue = 3100
    else:
        log_pvalue = mathlog(p_tmp,10) * -10
    return log_pvalue
Ejemplo n.º 7
0
 def mhyper(self,q,m,n,k):
     u = min(self.tag_length ,n)-1
     A = binomial(m,q)*binomial(n,k-q)
     B = hyper([1,q-k,q-m],[1+q,1-k+n+q], 1)
     C = -binomial(m,1+u)*binomial(n,k-1-u)
     D = hyper([1,1-k+u,1-m+u],[2+u,2-k+n+u], 1)
     return mathlog(float((A*B + C*D) / binomial(m+n, k)))
Ejemplo n.º 8
0
	def calculate_entropy(self,v_X,xmax=None,base=2):
		"""General method for calculating information of a random variable.  Assumes 0-based integer mapped.
		
		xmax can be prespecified for performance reasons, otherwise it is calculated within the function.
		"""
		if not xmax:
			xmax = max(v_X)
		xmax_plus_one = xmax+1
		print xmax_plus_one
		P_X = numpy.zeros((xmax_plus_one),dtype=float)
		
		nsamples = len(v_X)
		nsamples_reciprocal = 1.0/float(nsamples)
		
		
		for i in xrange(nsamples):
			x = v_X[i]
			P_X[x] += 1.0
		
		P_X = nsamples_reciprocal*P_X[:]
		H = 0.0
		for x in xrange(xmax_plus_one):
				if P_X[x] > 0:
					H -=  P_X[x]*mathlog(P_X[x],base)
		return float(H)
Ejemplo n.º 9
0
    def noValidGainsinPNarray(self, dblMinGainThreshold):
        """Return true if maximum gain is less than the threshold."""
        p = float(self.getWeightOfExamples(self.lstPprime))
        n = float(self.getWeightOfExamples(self.lstNprime))

        if p <= 0 or p + n <= 0:
            return True
        oldgain = mathlog(float(p), 2) - mathlog(p + n, 2)
        for intAttribute in range(0, len(self.lstPNprime)):
            if self.lstAprime[intAttribute] == 1:
                gain = 0.0
                pprime = float(self.lstPNprime[intAttribute][0])
                nprime = float(self.lstPNprime[intAttribute][1])
                if not (pprime <= 0 or pprime + nprime <= 0):
                    gain = pprime * ((mathlog(pprime, 2) - mathlog(pprime + nprime, 2)) - oldgain)
                if gain > dblMinGainThreshold:
                    return False
        return True
Ejemplo n.º 10
0
    def noValidGainsinPNarray(self, dblMinGainThreshold):
        """Return true if maximum gain is less than the threshold."""
        p = float(self.getWeightOfExamples(self.lstPprime))
        n = float(self.getWeightOfExamples(self.lstNprime))

        if p <= 0 or p + n <= 0:
            return True
        oldgain = mathlog(float(p), 2) - mathlog(p + n, 2)
        for intAttribute in range(0, len(self.lstPNprime)):
            if self.lstAprime[intAttribute] == 1:
                gain = 0.0
                pprime = float(self.lstPNprime[intAttribute][0])
                nprime = float(self.lstPNprime[intAttribute][1])
                if not (pprime <= 0 or pprime + nprime <= 0):
                    gain = pprime * (
                        (mathlog(pprime, 2) - mathlog(pprime + nprime, 2)) -
                        oldgain)
                if gain > dblMinGainThreshold:
                    return False
        return True
Ejemplo n.º 11
0
	def calculate_mi(self,v_X,v_Y,xmax=None,ymax=None,base=2):
		"""General method for calculating MI between two vectors.  Assumes 0-based integer mapped.
		
		xmax and ymax can be prespecified for performance reasons, otherwise they are calculated within the function.
		"""
		if not xmax:
			xmax = max(v_X)
		if not ymax:
			ymax = max(v_Y)
		
		xmax_plus_one = xmax+1
		ymax_plus_one = ymax+1
		print xmax_plus_one
		print ymax_plus_one
		P_X = numpy.zeros((xmax_plus_one),dtype=float)
		P_Y = numpy.zeros((ymax_plus_one),dtype=float)
		P_X_Y = numpy.zeros((xmax_plus_one,ymax_plus_one),dtype=float)
		
		nsamples = len(v_X)
		nsamples_reciprocal = 1.0/float(nsamples)
		
		
		for i in xrange(nsamples):
			x = v_X[i]
			y = v_Y[i]
			P_X[x] += 1.0
			P_Y[y] += 1.0
			P_X_Y[x,y] += 1.0
		
		
		P_X = nsamples_reciprocal*P_X[:]
		P_Y = nsamples_reciprocal*P_Y[:]
		P_X_Y = nsamples_reciprocal*P_X_Y[:,:]
		MI = 0.0
		for x in xrange(xmax_plus_one):
			for y in xrange(ymax_plus_one):
				if P_X_Y[x,y] > 0:
					MI += P_X_Y[x,y]*(mathlog(P_X_Y[x,y],base) - mathlog((P_X[x]*P_Y[y]),base))
		return float(MI)
Ejemplo n.º 12
0
def proteinsequencelowcomplexityscore(protseq):
    """
    """
    seqlen = len(protseq)
    protseq = protseq.upper()
    dinucs = [ protseq[i-1:i+1] for i in range (1,seqlen)]
    counts = [ 0 ]
    expected = max([seqlen/400,1])
    cutoff   = int(round(mathlog(seqlen,5)))
    for dinuc in Set(dinucs):
        occurrence = dinucs.count(dinuc) - expected
        if occurrence >= cutoff: 
            counts.append( occurrence - cutoff )
    return round( sum(counts) / float(seqlen), 3)
Ejemplo n.º 13
0
def proteinsequencelowcomplexityscore(protseq):
    """
    """
    seqlen = len(protseq)
    protseq = protseq.upper()
    dinucs = [protseq[i - 1:i + 1] for i in range(1, seqlen)]
    counts = [0]
    expected = max([seqlen / 400, 1])
    cutoff = int(round(mathlog(seqlen, 5)))
    for dinuc in Set(dinucs):
        occurrence = dinucs.count(dinuc) - expected
        if occurrence >= cutoff:
            counts.append(occurrence - cutoff)
    return round(sum(counts) / float(seqlen), 3)
Ejemplo n.º 14
0
def logp(x):
    return mathlog(1.0 + x, sqrt2) / 2.0
Ejemplo n.º 15
0
	def _calculate_information_scores(self,sample_set,feature,confounder):
		#def getCMI(self,feature,lstSamples,lstConfounders,CONFOUNDERINDEX):
		nsamples = len(sample_set)
		if self.P_Y == None:
			self.P_Y = self._calculate_P_Y(sample_set)
		if self.P_Z == None:
			self.P_Z = self._calculate_P_Z(sample_set,confounder)
		if self.P_Y_Z == None:
			self.P_Y_Z = self._calculate_P_Y_Z(sample_set,confounder)
		
		P_Y = self.P_Y
		P_Z = self.P_Z
		P_Y_Z = self.P_Y_Z
		
		P_X = [0,0]
		X = [feature]
		for sample in sample_set:
			if sample.satisfies(X):
				P_X[1]+=1
			else:
				P_X[0]+=1
		for x in (0,1):
			P_X[x] = float(P_X[x])/float(nsamples)
		
		P_X_Y ={}
		for sample in sample_set:
			Y = sample.current_class_label
			if not P_X_Y.has_key((1,Y)):
					P_X_Y[(1,Y)] = 0
			if not P_X_Y.has_key((0,Y)):
					P_X_Y[(0,Y)] = 0
			if sample.satisfies(X):
				P_X_Y[(1,Y)] += 1
			else:
				P_X_Y[(0,Y)] += 1
		for key in P_X_Y.keys():
			P_X_Y[key] = float(P_X_Y[key])/float(nsamples)
		P_X_Z ={}
		
		for sample in sample_set:
			Z = self.confounders[sample.id][confounder]
			if not P_X_Z.has_key((1,Z)):
					P_X_Z[(1,Z)] = 0
			if not P_X_Z.has_key((0,Z)):
					P_X_Z[(0,Z)] = 0
			if sample.satisfies(X):
				P_X_Z[(1,Z)] += 1
			else:
				P_X_Z[(0,Z)] += 1
				
		for key in P_X_Z.keys():
			P_X_Z[key] = float(P_X_Z[key])/float(nsamples)
		
		P_X_Y_Z={}
		#Calculate X,Y,Z entropy:
		for sample in sample_set:
			Y = sample.current_class_label
			Z = self.confounders[sample.id][confounder]
			if not P_X_Y_Z.has_key((1,Y,Z)):
					P_X_Y_Z[(1,Y,Z)] = 0
			if not P_X_Y_Z.has_key((0,Y,Z)):
					P_X_Y_Z[(0,Y,Z)] = 0
			if sample.satisfies(X):
				P_X_Y_Z[(1,Y,Z)] += 1
			else:
				P_X_Y_Z[(0,Y,Z)] += 1
			
		for key in P_X_Y_Z.keys():
			P_X_Y_Z[key] = float(P_X_Y_Z[key])/float(nsamples)
		
		H_Y_Given_Z = 0.0
		H_Y_Z = 0.0
		H_Z = 0.0
		for y in P_Y.keys():
			for z in P_Z.keys():
				if P_Y_Z.has_key((y,z)):
					if P_Y_Z[(y,z)] > 0:
						H_Y_Z += P_Y_Z[(y,z)]*(mathlog(P_Y_Z[(y,z)],2))
		for z in P_Z.keys():
			if P_Z[z] > 0:
				H_Z += P_Z[z]*(mathlog(P_Z[z],2))
		H_Y_Given_Z = -(H_Y_Z - H_Z)
		
		MI = self._calculate_mi(P_X,P_Y,P_X_Y)
		CMI = self._calculate_cmi(P_X,P_Y,P_Z,P_X_Y,P_X_Z,P_Y_Z,P_X_Y_Z)
		
		return_scores = {"mi":MI,"cmi":CMI,"hz":H_Z,"hyz":H_Y_Z,"hygivenz":H_Y_Given_Z,"px":P_X,"py":P_Y,"pz":P_Z,"pxy":P_X_Y,"pyz":P_Y_Z,"pxz":P_X_Z,"pxyz":P_X_Y_Z}
		return return_scores
Ejemplo n.º 16
0
def hoeffding_deviation(occurence, confidence=0.9):
     return sqrt(-mathlog(confidence / 2) / (2 * occurence))
Ejemplo n.º 17
0
def breakdown2(filename):
  #print "IV file is: ",filename
  formula="(dI/dV)/(I/V)"

  # reading data
  f = open(filename)
  lines = f.readlines()
  # writing data in lists
  V = []
  I = []
  for line in lines:
    tmp = line.split()
    v   = float(tmp[0])
    i   = float(tmp[1])
    if ( v == 0 ):
      continue
    V.append(v)
    I.append(i)

  # closing file
  f.close()

  # from lists to array
  V = np.asarray(V)
  I = np.asarray(I)
  # creating an interporlation of the IV curve
  #interpolation_of_iv = interpolate.splrep(V, I,k=5)
  #interpolated_I = interpolate.splev(V,interpolation_of_iv,der=0)
  # plot a comparison
  #plt.figure()
  #plt.plot(V, I, 'xb',V,interpolated_I,'-r')
  #plt.legend(['Data','Interpolation'],loc=6)
  #plt.xlabel('Vbias [V]')
  #plt.ylabel('Ileak [nA]')
  #plt.yscale('log')
  #plt.title('IV, W93, S1')
  #plt.show()


  # calculating log and derivative
  #interpolated_I = np.asarray(interpolated_I)
  #print interpolated_I


  index = 0
  logI = []
  for I in I:
    logi = mathlog(I)
    #print V[index],I,logi
    logI.append(logi)
    index = index + 1

  logI = np.asarray(logI)
  #logI = np.log(interpolated_I)
  interpolation_of_iv = interpolate.splrep(V, logI,k=5)
  dlogIdV = interpolate.splev(V,interpolation_of_iv,der=1)
  #observable_values = 1.0/dlogIdV
  v_bd = 0
  one_over_dlogIdV = 1.0/dlogIdV
  v_bd = V[np.argmax(dlogIdV)]
  #v_bd = V[np.argmin(one_over_dlogIdV)]

  #print "Breakdown voltage is:", v_bd,"V"


  # returning the interpolated current
  #return interpolated_I
  return v_bd
Ejemplo n.º 18
0
def pow_two_chunk(num):
    return 2 ** floor(mathlog(num, 2))
Ejemplo n.º 19
0
def logp(x):
    return mathlog(1.0+x)/2.0
Ejemplo n.º 20
0
def get_target_speed(wid, window_dimensions, batch, global_statistics, statistics, min_speed, speed_data):
    low_limit = get_low_limit(global_statistics.mmap_size>0, window_dimensions)
    #***********************************************************
    # encoding speed:
    #    0    for highest compression/slower
    #    100  for lowest compression/fast
    # here we try to minimize damage-latency and client decoding speed

    #megapixels per second:
    mpixels = low_limit/1024.0/1024.0
    #for larger window sizes, we should be downscaling,
    #and don't want to wait too long for those anyway:
    ref_damage_latency = 0.010 + 0.025 * (1+mathlog(max(1, mpixels)))

    #abs: try to never go higher than 5 times reference latency:
    dam_lat_abs = max(0, ((statistics.avg_damage_in_latency or 0)-ref_damage_latency) / (ref_damage_latency * 4.0))

    #calculate a target latency and try to get close to it
    avg_delay = batch.delay
    delays = list(batch.last_actual_delays)
    if len(delays)>0:
        #average recent actual delay:
        avg_delay = time_weighted_average(delays)
    #and average that with the current delay (which is lower or equal):
    frame_delay = (avg_delay + batch.delay) / 2.0
    #ensure we always spend at least as much time encoding as we spend batching:
    #(one frame encoding whilst one frame is batching is our ideal result)
    target_damage_latency = max(ref_damage_latency, frame_delay/1000.0)
    #current speed:
    speed = min_speed
    if len(speed_data)>0:
        speed = max(min_speed, time_weighted_average(speed_data))
    #rel: do we need to increase or decrease speed to reach the target:
    dam_lat_rel = speed/100.0 * statistics.avg_damage_in_latency / target_damage_latency

    #ensure we decode at a reasonable speed (for slow / low-power clients)
    #maybe this should be configurable?
    target_decode_speed = 8*1000*1000.0      #8 MPixels/s
    dec_lat = 0.0
    if statistics.avg_decode_speed>0:
        dec_lat = target_decode_speed/(statistics.avg_decode_speed or target_decode_speed)

    #if we have more pixels to encode, we may need to go faster
    #(this is important because the damage latency used by the other factors
    # may aggregate multiple damage requests into one packet - which may skip frames)
    #TODO: reconcile this with video regions
    #only count the last second's worth:
    now = time.time()
    lim = now-1.0
    lde = [w*h for t,_,_,w,h in list(statistics.last_damage_events) if t>=lim]
    pixels = sum(lde)
    mpixels_per_s = pixels/1024.0/1024.0
    pps = 0.0
    if len(lde)>5:
        #above 50 MPixels/s, we should reach 100% speed
        #(even x264 peaks at tens of MPixels/s)
        pps = mpixels_per_s/50.0

    #combine factors: use the highest one:
    target = min(1.0, max(dam_lat_abs, dam_lat_rel, dec_lat, pps, 0.0))

    #scale target between min_speed and 100:
    ms = min(100.0, max(min_speed, 0.0))
    target_speed = int(ms + (100.0-ms) * target)

    #expose data we used:
    info = {
            "low_limit"                 : int(low_limit),
            "min_speed"                 : int(min_speed),
            "frame_delay"               : int(frame_delay),
            "mpixels"                   : int(mpixels_per_s),
            "damage_latency"            : {
                                           "ref"        : int(1000.0*ref_damage_latency),
                                           "avg"        : int(1000.0*statistics.avg_damage_in_latency),
                                           "target"     : int(1000.0*target_damage_latency),
                                           "abs_factor" : int(100.0*dam_lat_abs),
                                           "rel_factor" : int(100.0*dam_lat_rel),
                                           },
            "decoding_latency"          : {
                                           "target"   : int(target_decode_speed),
                                           "factor"   : int(100.0*dec_lat),
                                           },
            }
    return info, target_speed
Ejemplo n.º 21
0
    def __filter_w_control_v2 (self, sf,peak_info, treatment, control, pass_sregion=False, write2wig= False, fake_when_missing=False, to_small_sample=False ):
        """Use control data to calculate several lambda values around
        1k, 5k and 10k region around peak summit. Choose the highest
        one as local lambda, then calculate p-value in poisson
        distribution.

        Parameters:

        1. pass_sregion: If set True, the slocal lambda will be
        ignored. Use this when the control is not available.
        
        2. write2wig: obselete
        
        3. fake_when_missing: when a chromosome is missing in control
        but existing in IP or vice versa, MACS will fake a tag to pass
        the process.
        
        4. to_small_sample: when set as True, balance the number of
        tags by linearly scaling larger sample to smaller sample. The
        default behaviour is to linearly scale smaller to larger one.

        Return value type in this format:
        a dictionary
        key value : chromosome
        items : array of (peak_start,peak_end,peak_length,peak_summit,peak_height,peak_num_tags,peak_pvalue,peak_fold_enrichment)
        """
        lambda_bg0 = float(self.scan_window)*treatment.total/self.gsize # bug fixed...
        
 #       if treatment.total>control.total:
 #           t_ratio = 1.00
 #           c_ratio = float(treatment.total)/control.total
 #       else:
 #           t_ratio = float(control.total)/treatment.total
 #           c_ratio = 1.00

 #       if to_small_sample:
 #           tmp = t_ratio
 #           t_ratio = 1/c_ratio
 #           c_ratio = 1/tmp
        
        t_ratio = sf[0]
        c_ratio = sf[1]
        
   #     self.info("t_ratio %s" % (t_ratio))
   #     self.info("c_ratio %s" % (c_ratio))
        
        final_peak_info = {}
        chrs = peak_info.keys()
        chrs.sort()
        total = 0
        for chrom in chrs:
          #  self.info("#3 Chromosome %s" % (chrom))
            n_chrom = 0
            final_peak_info[chrom] = []
            peak_list = peak_info[chrom]
            try:
                (ctags,ccnts) = control.get_locations_by_chr_v2(chrom)
                #ccnts = control.get_counts_by_chr(chrom,0)  
            except:
                self.warn("Missing %s data, skip it..." % (chrom))
                if fake_when_missing:
                    ctags = [-1,]
                    ccnts = [-1,]
                    self.warn("Fake a tag at %s:%d" % (chrom,-1))
                    tmp=[]
                else:
                    continue
            try:
                (ttags,tcnts) = treatment.get_locations_by_chr_v2(chrom)
                #tcnts = treatment.get_counts_by_chr(chrom,0)
            except:
                self.warn("Missing %s data, skip it..." % (chrom))
                if fake_when_missing:
                    ttags = [-1,]
                    tcnts = [-1,]
                    self.warn("Fake a tag at %s:%d" % (chrom,-1))
                    tmp=[]
                else:
                    continue
         #   self.info("ttags size %d" % (len(ttags)))  
         #   self.info("ctags size %d" % (len(ttags)))  
         #   self.info("ttags size %d" % (len(tcnts)))  
         #   self.info("ctags size %d" % (len(ccnts)))  
            
            index_ctag = 0      # index for control tags
            index_ttag = 0      # index for treatment tags
            flag_find_ctag_locally = False
            flag_find_ttag_locally = False            
            prev_index_ctag = 0
            prev_index_ttag = 0            
            len_ctags =len(ctags)
            len_ttags =len(ttags)            
            for i in range(len(peak_list)):
                (peak_start,peak_end,peak_length,peak_summit,peak_height,peak_num_tags) = peak_list[i]

                #window_size_4_lambda = min(self.first_lambda_region,max(peak_length,self.scan_window))
                window_size_4_lambda = max(peak_length,self.scan_window)
                lambda_bg = lambda_bg0/self.scan_window*window_size_4_lambda*t_ratio               
                if self.nolambda:
                    # skip local lambda
                    local_lambda = lambda_bg
                    tlambda_peak = float(peak_num_tags)/peak_length*window_size_4_lambda
                else:
                    left_peak = peak_start+self.shift_size # go to middle point of the first fragment
                    right_peak = peak_end-self.shift_size  # go to middle point of the last fragment
                    left_lregion = peak_summit-self.lregion/2
                    left_sregion = peak_summit-self.sregion/2
                    right_lregion = peak_summit+self.lregion/2
                    right_sregion = peak_summit+self.sregion/2
                    #(cnum_10k,cnum_5k,cnum_1k,cnum_peak) = (0,0,0,0)
                    #(tnum_10k,tnum_5k,tnum_1k,tnum_peak) = (0,0,0,0)
                    (cnum_sregion, cnum_lregion, cnum_peak, tnum_sregion, tnum_lregion, tnum_peak) = (0,0,0,0,0,0)
                    #smallest = min(left_peak,left_10k,left_5k,left_1k)
                    #largest = max(right_peak,right_10k,right_5k,right_1k)

                    while index_ctag < len_ctags:
                        if ctags[index_ctag] < left_lregion:
                            # go to next control tag
                            index_ctag+=1
                        elif index_ctag+1 >= len_ctags or right_lregion < ctags[index_ctag]:
                            # If move outof the lregion or reach the chromosome end
                            # finalize and go to next peak region
                            # Thanks to Jake Biesinger
                            flag_find_ctag_locally = False
                            index_ctag = prev_index_ctag 
                            break
                        else:
                            if not flag_find_ctag_locally:
                                flag_find_ctag_locally = True
                                prev_index_ctag = index_ctag
                            p = ctags[index_ctag]
                            c = ccnts[index_ctag]
                            if left_peak <= p <= right_peak:
                                cnum_peak += c
                            if left_sregion <= p <= right_sregion:
                                cnum_sregion +=c
                                cnum_lregion +=c
                            else:
                                cnum_lregion += c
                            index_ctag += 1 # go to next tag

                    while index_ttag < len_ttags:
                        if ttags[index_ttag] < left_lregion:
                            # go to next treatment tag
                            index_ttag+=1
                        elif index_ttag+1 >= len_ttags or right_lregion < ttags[index_ttag]:
                            # If move outof the lregion or reach the chromosome end
                            # finalize and go to next peak region
                            # Thanks to Jake Biesinger
                            flag_find_ttag_locally = False
                            index_ttag = prev_index_ttag 
                            break
                        else:
                            if not flag_find_ttag_locally:
                                flag_find_ttag_locally = True
                                prev_index_ttag = index_ttag
                            p = ttags[index_ttag]
                            c = tcnts[index_ttag]
                            if left_peak <= p <= right_peak:
                                tnum_peak +=c
                            if left_sregion <= p <= right_sregion:
                                tnum_sregion +=c
                                tnum_lregion += c
                            else:
                                tnum_lregion += c
                            index_ttag += 1 # go to next tag

                    clambda_peak = float(cnum_peak)/peak_length*c_ratio*window_size_4_lambda

                    clambda_lregion = float(cnum_lregion)/self.lregion*c_ratio*window_size_4_lambda

                    clambda_sregion = float(cnum_sregion)/self.sregion*c_ratio*window_size_4_lambda

                    tlambda_peak = float(tnum_peak)/peak_length*t_ratio*window_size_4_lambda

                    tlambda_lregion = float(tnum_lregion)/self.lregion*t_ratio*window_size_4_lambda

                    tlambda_sregion = float(tnum_sregion)/self.sregion*t_ratio*window_size_4_lambda

                    if pass_sregion:
                        # for experiment w/o control, peak region lambda and sregion region lambda are ignored!
                        local_lambda = max(lambda_bg,tlambda_lregion)
                    else:
                        # for experiment w/ control
                        local_lambda = max(lambda_bg,clambda_peak,clambda_lregion,clambda_sregion)

                #print(local_lambda)
               # if local_lambda == 0 :
               #     local_lambda = 0.001
                p_tmp = poisson_cdf(tlambda_peak,local_lambda,lower=False)
                if p_tmp <= 0:
                    peak_pvalue = 3100
                else:
                    peak_pvalue = mathlog(p_tmp,10) * -10

                if peak_pvalue > self.pvalue:
                    n_chrom += 1
                    total += 1
                    peak_fold_enrichment = float(peak_height)/local_lambda*window_size_4_lambda/self.d
                    final_peak_info[chrom].append((peak_start,peak_end,peak_length,peak_summit,peak_height,peak_num_tags,peak_pvalue,peak_fold_enrichment))
                # uncomment the following two lines, MACS will report the peaks been rejected.    
                #else:
                #    #self.debug("Reject the peak at %s:%d-%d with local_lambda: %.2f and -log10pvalue: %.2f" % (chrom,peak_start,peak_end,local_lambda,peak_pvalue))

            self.debug("#3 peaks whose pvalue < cutoff: %d" % (n_chrom))
        self.info("#3 Finally, %d peaks are called!" % (total))
        return final_peak_info
Ejemplo n.º 22
0
def _ispow2(i):
    v = mathlog(i, 2)
    return v == int(v)
Ejemplo n.º 23
0
def hoeffding_deviation(occurence, confidence=0.9):
    return sqrt(-mathlog(confidence / 2) / (2 * occurence))
Ejemplo n.º 24
0
def get_target_speed(wid, window_dimensions, batch, global_statistics, statistics, min_speed, speed_data):
    low_limit = get_low_limit(global_statistics.mmap_size>0, window_dimensions)
    #***********************************************************
    # encoding speed:
    #    0    for highest compression/slower
    #    100  for lowest compression/fast
    # here we try to minimize damage-latency and client decoding speed

    #megapixels per second:
    mpixels = low_limit/1024.0/1024.0
    #for larger window sizes, we should be downscaling,
    #and don't want to wait too long for those anyway:
    ref_damage_latency = 0.010 + 0.025 * (1+mathlog(max(1, mpixels)))

    #abs: try to never go higher than 5 times reference latency:
    dam_lat_abs = max(0, ((statistics.avg_damage_in_latency or 0)-ref_damage_latency) / (ref_damage_latency * 4.0))

    #calculate a target latency and try to get close to it
    avg_delay = batch.delay
    delays = list(batch.last_actual_delays)
    if len(delays)>0:
        #average recent actual delay:
        avg_delay = time_weighted_average(delays)
    #and average that with the current delay (which is lower or equal):
    frame_delay = (avg_delay + batch.delay) / 2.0
    #ensure we always spend at least as much time encoding as we spend batching:
    #(one frame encoding whilst one frame is batching is our ideal result)
    target_damage_latency = max(ref_damage_latency, frame_delay/1000.0)
    #current speed:
    speed = min_speed
    if len(speed_data)>0:
        speed = max(min_speed, time_weighted_average(speed_data))
    #rel: do we need to increase or decrease speed to reach the target:
    dam_lat_rel = speed/100.0 * statistics.avg_damage_in_latency / target_damage_latency

    #ensure we decode at a reasonable speed (for slow / low-power clients)
    #maybe this should be configurable?
    target_decode_speed = 8*1000*1000.0      #8 MPixels/s
    dec_lat = 0.0
    if statistics.avg_decode_speed:
        dec_lat = target_decode_speed/(statistics.avg_decode_speed or target_decode_speed)

    #combine factors: use the highest one:
    target = min(1.0, max(dam_lat_abs, dam_lat_rel, dec_lat, 0.0))

    #scale target between min_speed and 100:
    ms = min(100.0, max(min_speed, 0.0))
    target_speed = int(ms + (100.0-ms) * target)

    #expose data we used:
    info = {
            "low_limit"                 : int(low_limit),
            "min_speed"                 : int(min_speed),
            "frame_delay"               : int(frame_delay),
            "damage_latency.ref"        : int(1000.0*ref_damage_latency),
            "damage_latency.avg"        : int(1000.0*statistics.avg_damage_in_latency),
            "damage_latency.target"     : int(1000.0*target_damage_latency),
            "damage_latency.abs_factor" : int(100.0*dam_lat_abs),
            "damage_latency.rel_factor" : int(100.0*dam_lat_rel),
            "decoding_latency.target"   : int(target_decode_speed),
            "decoding_latency.factor"   : int(100.0*dec_lat),
            }
    return info, target_speed
Ejemplo n.º 25
0
 def _set_sizeOfLocalColorTable(self):
     num = self[1].get_num()
     if num > 0:
         return int(mathlog(num, 2) - 1)
     else:
         return 0
Ejemplo n.º 26
0
def pow_two_chunk(num):
    return 2 ** floor(mathlog(num, 2))
Ejemplo n.º 27
0
 def _getGain(self, pprime, nprime, p, n):
     """Return gain."""
     if pprime <= 0 or p <= 0: return 0
     if p + n <= 0 or pprime + nprime <= 0: return 0
     return float(pprime) * (mathlog(float(pprime) / (pprime + nprime), 2) -
                             mathlog(float(p) / (p + n), 2))
Ejemplo n.º 28
0
def get_target_speed(window_dimensions, batch, global_statistics, statistics,
                     bandwidth_limit, min_speed, speed_data):
    low_limit = get_low_limit(global_statistics.mmap_size > 0,
                              window_dimensions)
    #***********************************************************
    # encoding speed:
    #    0    for highest compression/slower
    #    100  for lowest compression/fast
    # here we try to minimize damage-latency and client decoding speed

    #backlog factor:
    _, pixels_backlog, _ = statistics.get_client_backlog()
    pb_ratio = pixels_backlog / low_limit
    pixels_bl_s = 100 - int(
        100 * logp(pb_ratio / 4))  #4 frames behind or more -> compress more

    #megapixels per second:
    mpixels = low_limit / 1024.0 / 1024.0
    #for larger window sizes, we should be downscaling,
    #and don't want to wait too long for those anyway:
    ref_damage_latency = (10 + 25 * (1 + mathlog(max(1, mpixels)))) / 1000.0

    adil = statistics.avg_damage_in_latency or 0
    #abs: try to never go higher than N times the reference latency:
    dam_lat_abs = max(0,
                      (adil - ref_damage_latency)) / (ref_damage_latency * 3)

    if batch.locked:
        target_damage_latency = ref_damage_latency
        dam_lat_rel = 0
        frame_delay = 0
        dam_lat_s = 100
    else:
        #calculate a target latency and try to get close to it
        avg_delay = batch.delay
        delays = tuple(batch.last_actual_delays)
        if delays:
            #average recent actual delay:
            avg_delay = time_weighted_average(delays)
        #and average that with the current delay (which is lower or equal):
        frame_delay = max(10, int((avg_delay + batch.delay) // 2))
        #ensure we always spend at least as much time encoding as we spend batching:
        #(one frame encoding whilst one frame is batching is our ideal result)
        target_damage_latency = max(ref_damage_latency, frame_delay / 1000.0)
        dam_target_speed = min_speed
        if speed_data:
            dam_target_speed = max(min_speed,
                                   time_weighted_average(speed_data))
        #rel: do we need to increase speed to reach the target:
        dam_lat_rel = dam_target_speed / 100.0 * adil / target_damage_latency
        #cap the speed if we're delaying frames longer than we should:
        #(so we spend more of that time compressing them better instead):
        dam_lat_s = int(100 * 2 * ref_damage_latency * 1000 // frame_delay)

    #if we have more pixels to encode, we may need to go faster
    #(this is important because the damage latency used by the other factors
    # may aggregate multiple damage requests into one packet - which may skip frames)
    #TODO: reconcile this with video regions
    #only count the last second's worth:
    now = monotonic()
    lim = now - 1.0
    lde = tuple(w * h for t, _, _, w, h in tuple(statistics.last_damage_events)
                if t >= lim)
    pixels = sum(lde)
    mpixels_per_s = pixels / (1024 * 1024)
    pps = 0.0
    pixel_rate_s = 100
    if len(lde) > 5 and mpixels_per_s >= 1:
        #above 50 MPixels/s, we should reach 100% speed
        #(even x264 peaks at tens of MPixels/s)
        pps = sqrt(mpixels_per_s / 50.0)
        #if there aren't many pixels,
        #we can spend more time compressing them better:
        #(since it isn't going to cost too much to compress)
        #ie: 2MPixels/s -> max_speed=60%
        pixel_rate_s = 20 + int(mpixels_per_s * 20)

    bandwidth_s = 100
    if bandwidth_limit > 0:
        #below N Mbps, lower the speed ceiling,
        #so we will compress better:
        N = 10
        bandwidth_s = int(100 * sqrt(bandwidth_limit / (N * 1000 * 1000)))

    gcv = global_statistics.congestion_value
    congestion_s = 100
    if gcv > 0:
        #apply strict limit for congestion events:
        congestion_s = max(0, int(100 - gcv * 1000))

    #ensure we decode at a reasonable speed (for slow / low-power clients)
    #maybe this should be configurable?
    min_decode_speed = 1 * 1000 * 1000  #MPixels/s
    ads = statistics.avg_decode_speed or 0
    dec_lat = 0
    if ads > 0:
        dec_lat = min_decode_speed / ads

    ms = min(100, max(min_speed, 0))
    max_speed = max(
        ms, min(pixels_bl_s, dam_lat_s, pixel_rate_s, bandwidth_s,
                congestion_s))
    #combine factors: use the highest one:
    target = min(1, max(dam_lat_abs, dam_lat_rel, dec_lat, pps, 0))
    #scale target between min_speed and 100:
    speed = int(ms + (100 - ms) * target)
    speed = max(ms, min(max_speed, speed))

    #expose data we used:
    info = {
        "low-limit": int(low_limit),
        "max-speed": int(max_speed),
        "min-speed": int(min_speed),
        "factors": {
            "damage-latency-abs": int(dam_lat_abs * 100),
            "damage-latency-rel": int(dam_lat_rel * 100),
            "decoding-latency": int(dec_lat * 100),
            "pixel-rate": int(pps * 100),
        },
        "limits": {
            "backlog": pixels_bl_s,
            "damage-latency": dam_lat_s,
            "pixel-rate": pixel_rate_s,
            "bandwidth-limit": bandwidth_s,
            "congestion": congestion_s,
        },
    }
    return info, int(speed), max_speed
Ejemplo n.º 29
0
def logp(x):
    return mathlog(1.0 + x) / 2.0
Ejemplo n.º 30
0
def get_target_speed(wid, window_dimensions, batch, global_statistics,
                     statistics, min_speed, speed_data):
    low_limit = get_low_limit(global_statistics.mmap_size > 0,
                              window_dimensions)
    #***********************************************************
    # encoding speed:
    #    0    for highest compression/slower
    #    100  for lowest compression/fast
    # here we try to minimize damage-latency and client decoding speed

    #megapixels per second:
    mpixels = low_limit / 1024.0 / 1024.0
    #for larger window sizes, we should be downscaling,
    #and don't want to wait too long for those anyway:
    ref_damage_latency = 0.010 + 0.025 * (1 + mathlog(max(1, mpixels)))

    #abs: try to never go higher than 5 times reference latency:
    dam_lat_abs = max(
        0, ((statistics.avg_damage_in_latency or 0) - ref_damage_latency) /
        (ref_damage_latency * 4.0))

    #calculate a target latency and try to get close to it
    avg_delay = batch.delay
    delays = list(batch.last_actual_delays)
    if len(delays) > 0:
        #average recent actual delay:
        avg_delay = time_weighted_average(delays)
    #and average that with the current delay (which is lower or equal):
    frame_delay = (avg_delay + batch.delay) / 2.0
    #ensure we always spend at least as much time encoding as we spend batching:
    #(one frame encoding whilst one frame is batching is our ideal result)
    target_damage_latency = max(ref_damage_latency, frame_delay / 1000.0)
    #current speed:
    speed = min_speed
    if len(speed_data) > 0:
        speed = max(min_speed, time_weighted_average(speed_data))
    #rel: do we need to increase or decrease speed to reach the target:
    dam_lat_rel = speed / 100.0 * statistics.avg_damage_in_latency / target_damage_latency

    #ensure we decode at a reasonable speed (for slow / low-power clients)
    #maybe this should be configurable?
    target_decode_speed = 8 * 1000 * 1000.0  #8 MPixels/s
    dec_lat = 0.0
    if statistics.avg_decode_speed > 0:
        dec_lat = target_decode_speed / (statistics.avg_decode_speed
                                         or target_decode_speed)

    #if we have more pixels to encode, we may need to go faster
    #(this is important because the damage latency used by the other factors
    # may aggregate multiple damage requests into one packet - which may skip frames)
    #TODO: reconcile this with video regions
    #only count the last second's worth:
    now = time.time()
    lim = now - 1.0
    lde = [
        w * h for t, _, _, w, h in list(statistics.last_damage_events)
        if t >= lim
    ]
    pixels = sum(lde)
    mpixels_per_s = pixels / 1024.0 / 1024.0
    pps = 0.0
    if len(lde) > 5:
        #above 50 MPixels/s, we should reach 100% speed
        #(even x264 peaks at tens of MPixels/s)
        pps = mpixels_per_s / 50.0

    #combine factors: use the highest one:
    target = min(1.0, max(dam_lat_abs, dam_lat_rel, dec_lat, pps, 0.0))

    #scale target between min_speed and 100:
    ms = min(100.0, max(min_speed, 0.0))
    target_speed = int(ms + (100.0 - ms) * target)

    #expose data we used:
    info = {
        "low_limit": int(low_limit),
        "min_speed": int(min_speed),
        "frame_delay": int(frame_delay),
        "mpixels": int(mpixels_per_s),
        "damage_latency": {
            "ref": int(1000.0 * ref_damage_latency),
            "avg": int(1000.0 * statistics.avg_damage_in_latency),
            "target": int(1000.0 * target_damage_latency),
            "abs_factor": int(100.0 * dam_lat_abs),
            "rel_factor": int(100.0 * dam_lat_rel),
        },
        "decoding_latency": {
            "target": int(target_decode_speed),
            "factor": int(100.0 * dec_lat),
        },
    }
    return info, target_speed
Ejemplo n.º 31
0
    def __filter_w_control (self, peak_info, treatment, control, treat2control_ratio, pass_1k=False, write2wig= False, fake_when_missing=False ):
        """Use control data to calculate several lambda values around
        1k, 5k and 10k region around peak summit. Choose the highest
        one as local lambda, then calculate p-value in poisson
        distribution.

        Return value type in this format:
        a dictionary
        key value : chromosome
        items : array of (peak_start,peak_end,peak_length,peak_summit,peak_height,peak_num_tags,peak_pvalue,peak_fold_enrichment)
        """
        final_peak_info = {}
        chrs = peak_info.keys()
        chrs.sort()
        total = 0
        for chrom in chrs:
            self.debug("#3 Chromosome %s" % (chrom))
            n_chrom = 0
            final_peak_info[chrom] = []
            peak_list = peak_info[chrom]
            try:
                (ctags,tmp) = control.get_ranges_by_chr(chrom)
            except:
                self.warn("Missing %s data, skip it..." % (chrom))
                if fake_when_missing:
                    ctags = [-1,]
                    self.warn("Fake a tag at %s:%d" % (chrom,-1))
                    tmp=[]
                else:
                    continue
            try:
                (ttags,tmp) = treatment.get_ranges_by_chr(chrom)
            except:
                self.warn("Missing %s data, skip it..." % (chrom))
                if fake_when_missing:
                    ttags = [-1,]
                    self.warn("Fake a tag at %s:%d" % (chrom,-1))
                    tmp=[]
                else:
                    continue
                
            index_ctag = 0      # index for control tags
            index_ttag = 0      # index for treatment tags
            flag_find_ctag_locally = False
            flag_find_ttag_locally = False            
            prev_index_ctag = 0
            prev_index_ttag = 0            
            len_ctags =len(ctags)
            len_ttags =len(ttags)            
            for i in range(len(peak_list)):
                (peak_start,peak_end,peak_length,peak_summit,peak_height,peak_num_tags) = peak_list[i]

                #window_size_4_lambda = min(self.first_lambda_region,max(peak_length,self.scan_window))
                window_size_4_lambda = max(peak_length,self.scan_window)
                lambda_bg = self.lambda_bg/self.scan_window*window_size_4_lambda                
                if self.nolambda:
                    # skip local lambda
                    local_lambda = lambda_bg
                    tlambda_peak = float(peak_num_tags)/peak_length*window_size_4_lambda
                else:
                    left_peak = peak_start+self.shift_size # go to middle point of the first fragment
                    right_peak = peak_end-self.shift_size  # go to middle point of the last fragment
                    left_10k = peak_summit-self.third_lambda_region/2
                    left_5k = peak_summit-self.second_lambda_region/2
                    left_1k = peak_summit-self.first_lambda_region/2
                    right_10k = peak_summit+self.third_lambda_region/2
                    right_5k = peak_summit+self.second_lambda_region/2
                    right_1k = peak_summit+self.first_lambda_region/2
                    (cnum_10k,cnum_5k,cnum_1k,cnum_peak) = (0,0,0,0)
                    (tnum_10k,tnum_5k,tnum_1k,tnum_peak) = (0,0,0,0)                    
                    smallest = min(left_peak,left_10k,left_5k,left_1k)
                    largest = max(right_peak,right_10k,right_5k,right_1k)
                    
                    while index_ctag < len_ctags:
                        if ctags[index_ctag] < smallest:
                            # go to next control tag
                            index_ctag+=1
                        elif largest < ctags[index_ctag]:
                            # finalize and go to next peak region
                            flag_find_ctag_locally = False
                            index_ctag = prev_index_ctag 
                            break
                        else:
                            if not flag_find_ctag_locally:
                                flag_find_ctag_locally = True
                                prev_index_ctag = index_ctag
                            p = ctags[index_ctag]
                            if left_peak <= p <= right_peak:
                                cnum_peak +=1
                            if left_1k <= p <= right_1k:
                                cnum_10k +=1
                                cnum_5k +=1
                                cnum_1k +=1                                
                            elif left_5k <= p <= right_5k:
                                cnum_10k +=1
                                cnum_5k += 1
                            elif left_10k <= p <= right_10k:
                                cnum_10k += 1
                            index_ctag += 1 # go to next tag

                    while index_ttag < len_ttags:
                        if ttags[index_ttag] < smallest:
                            # go to next treatment tag
                            index_ttag+=1
                        elif largest < ttags[index_ttag]:
                            # finalize and go to next peak region
                            flag_find_ttag_locally = False
                            index_ttag = prev_index_ttag 
                            break
                        else:
                            if not flag_find_ttag_locally:
                                flag_find_ttag_locally = True
                                prev_index_ttag = index_ttag
                            p = ttags[index_ttag]
                            if left_peak <= p <= right_peak:
                                tnum_peak +=1
                            if left_1k <= p <= right_1k:
                                tnum_10k +=1
                                tnum_5k +=1
                                tnum_1k +=1                                
                            elif left_5k <= p <= right_5k:
                                tnum_10k +=1
                                tnum_5k += 1
                            elif left_10k <= p <= right_10k:
                                tnum_10k += 1
                            index_ttag += 1 # go to next tag

                    clambda_peak = float(cnum_peak)/peak_length*treat2control_ratio*window_size_4_lambda
                    clambda_10k = float(cnum_10k)/self.third_lambda_region*treat2control_ratio*window_size_4_lambda
                    clambda_5k = float(cnum_5k)/self.second_lambda_region*treat2control_ratio*window_size_4_lambda
                    clambda_1k = float(cnum_1k)/self.first_lambda_region*treat2control_ratio*window_size_4_lambda
                    tlambda_peak = float(tnum_peak)/peak_length*window_size_4_lambda
                    tlambda_10k = float(tnum_10k)/self.third_lambda_region*window_size_4_lambda
                    tlambda_5k = float(tnum_5k)/self.second_lambda_region*window_size_4_lambda
                    tlambda_1k = float(tnum_1k)/self.first_lambda_region*window_size_4_lambda

                    if pass_1k:
                        # for experiment w/o control, peak region lambda and 1k region lambda are ignored!
                        local_lambda = max(lambda_bg,tlambda_10k,tlambda_5k,clambda_10k,clambda_5k)
                    else:
                        # for experiment w/ control
                        if self.futurefdr:
                            local_lambda = max(lambda_bg,tlambda_10k,tlambda_5k,clambda_peak,clambda_10k,clambda_5k,clambda_1k)
                        else:
                            local_lambda = max(lambda_bg,clambda_peak,clambda_10k,clambda_5k,clambda_1k)

                p_tmp = poisson_cdf(tlambda_peak,local_lambda,lower=False)
                if p_tmp <= 0:
                    peak_pvalue = 3100
                else:
                    peak_pvalue = mathlog(p_tmp,10) * -10

                if peak_pvalue > self.pvalue:
                    n_chrom += 1
                    total += 1
                    peak_fold_enrichment = float(peak_height)/local_lambda*window_size_4_lambda/self.d
                    final_peak_info[chrom].append((peak_start,peak_end,peak_length,peak_summit,peak_height,peak_num_tags,peak_pvalue,peak_fold_enrichment))

            self.debug("#3 peaks whose pvalue < cutoff: %d" % (n_chrom))
        self.info("#3 Finally, %d peaks are called!" % (total))
        return final_peak_info
Ejemplo n.º 32
0
    def __filter_w_control(self,
                           peak_info,
                           treatment,
                           control,
                           pass_sregion=False,
                           write2wig=False,
                           fake_when_missing=False,
                           to_small_sample=False):
        """Use control data to calculate several lambda values around
        1k, 5k and 10k region around peak summit. Choose the highest
        one as local lambda, then calculate p-value in poisson
        distribution.

        Parameters:

        1. pass_sregion: If set True, the slocal lambda will be
        ignored. Use this when the control is not available.
        
        2. write2wig: obselete
        
        3. fake_when_missing: when a chromosome is missing in control
        but existing in IP or vice versa, MACS will fake a tag to pass
        the process.
        
        4. to_small_sample: when set as True, balance the number of
        tags by linearly scaling larger sample to smaller sample. The
        default behaviour is to linearly scale smaller to larger one.

        Return value type in this format:
        a dictionary
        key value : chromosome
        items : array of (peak_start,peak_end,peak_length,peak_summit,peak_height,peak_num_tags,peak_pvalue,peak_fold_enrichment)
        """
        lambda_bg0 = float(
            self.scan_window) * treatment.total / self.gsize  # bug fixed...

        if treatment.total > control.total:
            t_ratio = 1.00
            c_ratio = float(treatment.total) / control.total
        else:
            t_ratio = float(control.total) / treatment.total
            c_ratio = 1.00

        if to_small_sample:
            tmp = t_ratio
            t_ratio = 1 / c_ratio
            c_ratio = 1 / tmp

        final_peak_info = {}
        chrs = peak_info.keys()
        chrs.sort()
        total = 0
        for chrom in chrs:
            self.debug("#3 Chromosome %s" % (chrom))
            n_chrom = 0
            final_peak_info[chrom] = []
            peak_list = peak_info[chrom]
            try:
                (ctags, tmp) = control.get_locations_by_chr(chrom)
            except:
                self.warn("Missing %s data, skip it..." % (chrom))
                if fake_when_missing:
                    ctags = [
                        -1,
                    ]
                    self.warn("Fake a tag at %s:%d" % (chrom, -1))
                    tmp = []
                else:
                    continue
            try:
                (ttags, tmp) = treatment.get_locations_by_chr(chrom)
            except:
                self.warn("Missing %s data, skip it..." % (chrom))
                if fake_when_missing:
                    ttags = [
                        -1,
                    ]
                    self.warn("Fake a tag at %s:%d" % (chrom, -1))
                    tmp = []
                else:
                    continue

            index_ctag = 0  # index for control tags
            index_ttag = 0  # index for treatment tags
            flag_find_ctag_locally = False
            flag_find_ttag_locally = False
            prev_index_ctag = 0
            prev_index_ttag = 0
            len_ctags = len(ctags)
            len_ttags = len(ttags)
            for i in range(len(peak_list)):
                (peak_start, peak_end, peak_length, peak_summit, peak_height,
                 peak_num_tags) = peak_list[i]

                #window_size_4_lambda = min(self.first_lambda_region,max(peak_length,self.scan_window))
                window_size_4_lambda = max(peak_length, self.scan_window)
                lambda_bg = lambda_bg0 / self.scan_window * window_size_4_lambda
                if self.nolambda:
                    # skip local lambda
                    local_lambda = lambda_bg
                    tlambda_peak = float(
                        peak_num_tags) / peak_length * window_size_4_lambda
                else:
                    left_peak = peak_start + self.shift_size  # go to middle point of the first fragment
                    right_peak = peak_end - self.shift_size  # go to middle point of the last fragment
                    left_lregion = peak_summit - self.lregion / 2
                    left_sregion = peak_summit - self.sregion / 2
                    right_lregion = peak_summit + self.lregion / 2
                    right_sregion = peak_summit + self.sregion / 2
                    #(cnum_10k,cnum_5k,cnum_1k,cnum_peak) = (0,0,0,0)
                    #(tnum_10k,tnum_5k,tnum_1k,tnum_peak) = (0,0,0,0)
                    (cnum_sregion, cnum_lregion, cnum_peak, tnum_sregion,
                     tnum_lregion, tnum_peak) = (0, 0, 0, 0, 0, 0)
                    #smallest = min(left_peak,left_10k,left_5k,left_1k)
                    #largest = max(right_peak,right_10k,right_5k,right_1k)

                    while index_ctag < len_ctags:
                        if ctags[index_ctag] < left_lregion:
                            # go to next control tag
                            index_ctag += 1
                        elif index_ctag + 1 >= len_ctags or right_lregion < ctags[
                                index_ctag]:
                            # If move outof the lregion or reach the chromosome end
                            # finalize and go to next peak region
                            # Thanks to Jake Biesinger
                            flag_find_ctag_locally = False
                            index_ctag = prev_index_ctag
                            break
                        else:
                            if not flag_find_ctag_locally:
                                flag_find_ctag_locally = True
                                prev_index_ctag = index_ctag
                            p = ctags[index_ctag]
                            if left_peak <= p <= right_peak:
                                cnum_peak += 1
                            if left_sregion <= p <= right_sregion:
                                cnum_sregion += 1
                                cnum_lregion += 1
                            else:
                                cnum_lregion += 1
                            index_ctag += 1  # go to next tag

                    while index_ttag < len_ttags:
                        if ttags[index_ttag] < left_lregion:
                            # go to next treatment tag
                            index_ttag += 1
                        elif index_ttag + 1 >= len_ttags or right_lregion < ttags[
                                index_ttag]:
                            # If move outof the lregion or reach the chromosome end
                            # finalize and go to next peak region
                            # Thanks to Jake Biesinger
                            flag_find_ttag_locally = False
                            index_ttag = prev_index_ttag
                            break
                        else:
                            if not flag_find_ttag_locally:
                                flag_find_ttag_locally = True
                                prev_index_ttag = index_ttag
                            p = ttags[index_ttag]
                            if left_peak <= p <= right_peak:
                                tnum_peak += 1
                            if left_sregion <= p <= right_sregion:
                                tnum_sregion += 1
                                tnum_lregion += 1
                            else:
                                tnum_lregion += 1
                            index_ttag += 1  # go to next tag

                    clambda_peak = float(
                        cnum_peak
                    ) / peak_length * c_ratio * window_size_4_lambda

                    clambda_lregion = float(
                        cnum_lregion
                    ) / self.lregion * c_ratio * window_size_4_lambda

                    clambda_sregion = float(
                        cnum_sregion
                    ) / self.sregion * c_ratio * window_size_4_lambda

                    tlambda_peak = float(
                        tnum_peak
                    ) / peak_length * t_ratio * window_size_4_lambda

                    tlambda_lregion = float(
                        tnum_lregion
                    ) / self.lregion * t_ratio * window_size_4_lambda

                    tlambda_sregion = float(
                        tnum_sregion
                    ) / self.sregion * t_ratio * window_size_4_lambda

                    if pass_sregion:
                        # for experiment w/o control, peak region lambda and sregion region lambda are ignored!
                        local_lambda = max(lambda_bg, tlambda_lregion)
                    else:
                        # for experiment w/ control
                        local_lambda = max(lambda_bg, clambda_peak,
                                           clambda_lregion, clambda_sregion)

                p_tmp = poisson_cdf(tlambda_peak, local_lambda, lower=False)
                if p_tmp <= 0:
                    peak_pvalue = 3100
                else:
                    peak_pvalue = mathlog(p_tmp, 10) * -10

                if peak_pvalue > self.pvalue:
                    n_chrom += 1
                    total += 1
                    peak_fold_enrichment = float(
                        peak_height
                    ) / local_lambda * window_size_4_lambda / self.d
                    final_peak_info[chrom].append(
                        (peak_start, peak_end, peak_length, peak_summit,
                         peak_height, peak_num_tags, peak_pvalue,
                         peak_fold_enrichment))
                # uncomment the following two lines, MACS will report the peaks been rejected.
                #else:
                #    #self.debug("Reject the peak at %s:%d-%d with local_lambda: %.2f and -log10pvalue: %.2f" % (chrom,peak_start,peak_end,local_lambda,peak_pvalue))

            self.debug("#3 peaks whose pvalue < cutoff: %d" % (n_chrom))
        self.info("#3 Finally, %d peaks are called!" % (total))
        return final_peak_info
Ejemplo n.º 33
0
def _ispow2(i):
    v = mathlog(i, 2)
    return v == int(v)
Ejemplo n.º 34
0
def logp(x):
    return mathlog(1.0+x, sqrt2)/2.0