def _calculate_mi(self,P_X,P_Y,P_X_Y): MI = 0.0 for x in (0,1): for y in P_Y.keys(): if P_X_Y[(x,y)] > 0: MI += P_X_Y[(x,y)]*(mathlog(P_X_Y[(x,y)],2) - mathlog((P_X[x]*P_Y[y]),2)) return MI
def _getGain(self, pprime, nprime, p, n): """Return gain.""" if pprime <= 0 or p <= 0: return 0 if p + n <= 0 or pprime + nprime <= 0: return 0 return float(pprime) * (mathlog(float(pprime) / (pprime + nprime), 2) - mathlog(float(p) / (p + n), 2))
def _calculate_cmi(self,P_X,P_Y,P_Z,P_X_Y,P_X_Z,P_Y_Z,P_X_Y_Z): CMI =0.0 for x in (0,1): for y in P_Y.keys(): for z in P_Z.keys(): if P_X_Y_Z.has_key((x,y,z)): if P_X_Y_Z[(x,y,z)]>0: CMI += P_X_Y_Z[(x,y,z)]*(mathlog(P_Z[z],2) + mathlog(P_X_Y_Z[(x,y,z)],2) - (mathlog(P_X_Z[(x,z)],2) +mathlog(P_Y_Z[(y,z)],2))) return CMI
def mtcnn_pnet_l1_toplogic(clock, go, finished, scan_in_go, scan_in_finished, scan_in_data, scan_out_go, scan_out_finished, scan_out_data, n=12, p=3): n_pixels = n n_filter = p # filter dimension 3 x 3 n_l1c, n_l1m, r_pixels, r_l1c, r_l1m = computeRanges(n_pixels, n_filter) content = [x for x in range(p * p)] pixels = [Signal(intbv(3)[8:]) for i in range(r_pixels)] midl1c = [Signal(intbv(3)[8:]) for i in range(r_l1c)] midl1m = [Signal(intbv(3)[8:]) for i in range(r_l1m)] filters = [Signal(intbv(content[i])[8:]) for i in range(n_filter * n_filter)] state_in = Signal(bool(False)) state_in_addr = Signal(intbv(0)[int(mathlog(n*n*2, 2)+1)]) state_out = Signal(bool(False)) state_out_addr = Signal(intbv(0)[int(mathlog(n*n*2, 2)+1)]) hello_inst = PL1Net(clock, go, finished, pixels, midl1c, midl1m, filters, n_pixels, n_filter) @always(clock.posedge) def scanIn(): if not state_in: if scan_in_go: state_in.next = True state_in_addr.next = 0 else: pixels[state_in_addr] = scan_in_data if state_in_addr >= n * n - 1: state_in.next = False scan_in_finished.next = True state_in_addr.next = state_in_addr + 1 @always(clock.posedge) def scanOut(): if not state_out: if scan_out_go: state_out.next = True state_out_addr.next = 0 else: scan_out_data.next = midl1c[state_out_addr] if state_out_addr >= r_l1c - 1: state_out.next = False scan_out_finished.next = True state_out_addr.next = state_out_addr + 1 return hello_inst, scanIn, scanOut
def poisson_score(t, c): p_tmp = poisson_cdf(t, c, lower=False) if p_tmp <= 0: log_pvalue = 3100 else: log_pvalue = mathlog(p_tmp, 10) * -10 return log_pvalue
def poisson_score ( t, c ): p_tmp = poisson_cdf(t,c,lower=False) if p_tmp <= 0: log_pvalue = 3100 else: log_pvalue = mathlog(p_tmp,10) * -10 return log_pvalue
def mhyper(self,q,m,n,k): u = min(self.tag_length ,n)-1 A = binomial(m,q)*binomial(n,k-q) B = hyper([1,q-k,q-m],[1+q,1-k+n+q], 1) C = -binomial(m,1+u)*binomial(n,k-1-u) D = hyper([1,1-k+u,1-m+u],[2+u,2-k+n+u], 1) return mathlog(float((A*B + C*D) / binomial(m+n, k)))
def calculate_entropy(self,v_X,xmax=None,base=2): """General method for calculating information of a random variable. Assumes 0-based integer mapped. xmax can be prespecified for performance reasons, otherwise it is calculated within the function. """ if not xmax: xmax = max(v_X) xmax_plus_one = xmax+1 print xmax_plus_one P_X = numpy.zeros((xmax_plus_one),dtype=float) nsamples = len(v_X) nsamples_reciprocal = 1.0/float(nsamples) for i in xrange(nsamples): x = v_X[i] P_X[x] += 1.0 P_X = nsamples_reciprocal*P_X[:] H = 0.0 for x in xrange(xmax_plus_one): if P_X[x] > 0: H -= P_X[x]*mathlog(P_X[x],base) return float(H)
def noValidGainsinPNarray(self, dblMinGainThreshold): """Return true if maximum gain is less than the threshold.""" p = float(self.getWeightOfExamples(self.lstPprime)) n = float(self.getWeightOfExamples(self.lstNprime)) if p <= 0 or p + n <= 0: return True oldgain = mathlog(float(p), 2) - mathlog(p + n, 2) for intAttribute in range(0, len(self.lstPNprime)): if self.lstAprime[intAttribute] == 1: gain = 0.0 pprime = float(self.lstPNprime[intAttribute][0]) nprime = float(self.lstPNprime[intAttribute][1]) if not (pprime <= 0 or pprime + nprime <= 0): gain = pprime * ((mathlog(pprime, 2) - mathlog(pprime + nprime, 2)) - oldgain) if gain > dblMinGainThreshold: return False return True
def noValidGainsinPNarray(self, dblMinGainThreshold): """Return true if maximum gain is less than the threshold.""" p = float(self.getWeightOfExamples(self.lstPprime)) n = float(self.getWeightOfExamples(self.lstNprime)) if p <= 0 or p + n <= 0: return True oldgain = mathlog(float(p), 2) - mathlog(p + n, 2) for intAttribute in range(0, len(self.lstPNprime)): if self.lstAprime[intAttribute] == 1: gain = 0.0 pprime = float(self.lstPNprime[intAttribute][0]) nprime = float(self.lstPNprime[intAttribute][1]) if not (pprime <= 0 or pprime + nprime <= 0): gain = pprime * ( (mathlog(pprime, 2) - mathlog(pprime + nprime, 2)) - oldgain) if gain > dblMinGainThreshold: return False return True
def calculate_mi(self,v_X,v_Y,xmax=None,ymax=None,base=2): """General method for calculating MI between two vectors. Assumes 0-based integer mapped. xmax and ymax can be prespecified for performance reasons, otherwise they are calculated within the function. """ if not xmax: xmax = max(v_X) if not ymax: ymax = max(v_Y) xmax_plus_one = xmax+1 ymax_plus_one = ymax+1 print xmax_plus_one print ymax_plus_one P_X = numpy.zeros((xmax_plus_one),dtype=float) P_Y = numpy.zeros((ymax_plus_one),dtype=float) P_X_Y = numpy.zeros((xmax_plus_one,ymax_plus_one),dtype=float) nsamples = len(v_X) nsamples_reciprocal = 1.0/float(nsamples) for i in xrange(nsamples): x = v_X[i] y = v_Y[i] P_X[x] += 1.0 P_Y[y] += 1.0 P_X_Y[x,y] += 1.0 P_X = nsamples_reciprocal*P_X[:] P_Y = nsamples_reciprocal*P_Y[:] P_X_Y = nsamples_reciprocal*P_X_Y[:,:] MI = 0.0 for x in xrange(xmax_plus_one): for y in xrange(ymax_plus_one): if P_X_Y[x,y] > 0: MI += P_X_Y[x,y]*(mathlog(P_X_Y[x,y],base) - mathlog((P_X[x]*P_Y[y]),base)) return float(MI)
def proteinsequencelowcomplexityscore(protseq): """ """ seqlen = len(protseq) protseq = protseq.upper() dinucs = [ protseq[i-1:i+1] for i in range (1,seqlen)] counts = [ 0 ] expected = max([seqlen/400,1]) cutoff = int(round(mathlog(seqlen,5))) for dinuc in Set(dinucs): occurrence = dinucs.count(dinuc) - expected if occurrence >= cutoff: counts.append( occurrence - cutoff ) return round( sum(counts) / float(seqlen), 3)
def proteinsequencelowcomplexityscore(protseq): """ """ seqlen = len(protseq) protseq = protseq.upper() dinucs = [protseq[i - 1:i + 1] for i in range(1, seqlen)] counts = [0] expected = max([seqlen / 400, 1]) cutoff = int(round(mathlog(seqlen, 5))) for dinuc in Set(dinucs): occurrence = dinucs.count(dinuc) - expected if occurrence >= cutoff: counts.append(occurrence - cutoff) return round(sum(counts) / float(seqlen), 3)
def logp(x): return mathlog(1.0 + x, sqrt2) / 2.0
def _calculate_information_scores(self,sample_set,feature,confounder): #def getCMI(self,feature,lstSamples,lstConfounders,CONFOUNDERINDEX): nsamples = len(sample_set) if self.P_Y == None: self.P_Y = self._calculate_P_Y(sample_set) if self.P_Z == None: self.P_Z = self._calculate_P_Z(sample_set,confounder) if self.P_Y_Z == None: self.P_Y_Z = self._calculate_P_Y_Z(sample_set,confounder) P_Y = self.P_Y P_Z = self.P_Z P_Y_Z = self.P_Y_Z P_X = [0,0] X = [feature] for sample in sample_set: if sample.satisfies(X): P_X[1]+=1 else: P_X[0]+=1 for x in (0,1): P_X[x] = float(P_X[x])/float(nsamples) P_X_Y ={} for sample in sample_set: Y = sample.current_class_label if not P_X_Y.has_key((1,Y)): P_X_Y[(1,Y)] = 0 if not P_X_Y.has_key((0,Y)): P_X_Y[(0,Y)] = 0 if sample.satisfies(X): P_X_Y[(1,Y)] += 1 else: P_X_Y[(0,Y)] += 1 for key in P_X_Y.keys(): P_X_Y[key] = float(P_X_Y[key])/float(nsamples) P_X_Z ={} for sample in sample_set: Z = self.confounders[sample.id][confounder] if not P_X_Z.has_key((1,Z)): P_X_Z[(1,Z)] = 0 if not P_X_Z.has_key((0,Z)): P_X_Z[(0,Z)] = 0 if sample.satisfies(X): P_X_Z[(1,Z)] += 1 else: P_X_Z[(0,Z)] += 1 for key in P_X_Z.keys(): P_X_Z[key] = float(P_X_Z[key])/float(nsamples) P_X_Y_Z={} #Calculate X,Y,Z entropy: for sample in sample_set: Y = sample.current_class_label Z = self.confounders[sample.id][confounder] if not P_X_Y_Z.has_key((1,Y,Z)): P_X_Y_Z[(1,Y,Z)] = 0 if not P_X_Y_Z.has_key((0,Y,Z)): P_X_Y_Z[(0,Y,Z)] = 0 if sample.satisfies(X): P_X_Y_Z[(1,Y,Z)] += 1 else: P_X_Y_Z[(0,Y,Z)] += 1 for key in P_X_Y_Z.keys(): P_X_Y_Z[key] = float(P_X_Y_Z[key])/float(nsamples) H_Y_Given_Z = 0.0 H_Y_Z = 0.0 H_Z = 0.0 for y in P_Y.keys(): for z in P_Z.keys(): if P_Y_Z.has_key((y,z)): if P_Y_Z[(y,z)] > 0: H_Y_Z += P_Y_Z[(y,z)]*(mathlog(P_Y_Z[(y,z)],2)) for z in P_Z.keys(): if P_Z[z] > 0: H_Z += P_Z[z]*(mathlog(P_Z[z],2)) H_Y_Given_Z = -(H_Y_Z - H_Z) MI = self._calculate_mi(P_X,P_Y,P_X_Y) CMI = self._calculate_cmi(P_X,P_Y,P_Z,P_X_Y,P_X_Z,P_Y_Z,P_X_Y_Z) return_scores = {"mi":MI,"cmi":CMI,"hz":H_Z,"hyz":H_Y_Z,"hygivenz":H_Y_Given_Z,"px":P_X,"py":P_Y,"pz":P_Z,"pxy":P_X_Y,"pyz":P_Y_Z,"pxz":P_X_Z,"pxyz":P_X_Y_Z} return return_scores
def hoeffding_deviation(occurence, confidence=0.9): return sqrt(-mathlog(confidence / 2) / (2 * occurence))
def breakdown2(filename): #print "IV file is: ",filename formula="(dI/dV)/(I/V)" # reading data f = open(filename) lines = f.readlines() # writing data in lists V = [] I = [] for line in lines: tmp = line.split() v = float(tmp[0]) i = float(tmp[1]) if ( v == 0 ): continue V.append(v) I.append(i) # closing file f.close() # from lists to array V = np.asarray(V) I = np.asarray(I) # creating an interporlation of the IV curve #interpolation_of_iv = interpolate.splrep(V, I,k=5) #interpolated_I = interpolate.splev(V,interpolation_of_iv,der=0) # plot a comparison #plt.figure() #plt.plot(V, I, 'xb',V,interpolated_I,'-r') #plt.legend(['Data','Interpolation'],loc=6) #plt.xlabel('Vbias [V]') #plt.ylabel('Ileak [nA]') #plt.yscale('log') #plt.title('IV, W93, S1') #plt.show() # calculating log and derivative #interpolated_I = np.asarray(interpolated_I) #print interpolated_I index = 0 logI = [] for I in I: logi = mathlog(I) #print V[index],I,logi logI.append(logi) index = index + 1 logI = np.asarray(logI) #logI = np.log(interpolated_I) interpolation_of_iv = interpolate.splrep(V, logI,k=5) dlogIdV = interpolate.splev(V,interpolation_of_iv,der=1) #observable_values = 1.0/dlogIdV v_bd = 0 one_over_dlogIdV = 1.0/dlogIdV v_bd = V[np.argmax(dlogIdV)] #v_bd = V[np.argmin(one_over_dlogIdV)] #print "Breakdown voltage is:", v_bd,"V" # returning the interpolated current #return interpolated_I return v_bd
def pow_two_chunk(num): return 2 ** floor(mathlog(num, 2))
def logp(x): return mathlog(1.0+x)/2.0
def get_target_speed(wid, window_dimensions, batch, global_statistics, statistics, min_speed, speed_data): low_limit = get_low_limit(global_statistics.mmap_size>0, window_dimensions) #*********************************************************** # encoding speed: # 0 for highest compression/slower # 100 for lowest compression/fast # here we try to minimize damage-latency and client decoding speed #megapixels per second: mpixels = low_limit/1024.0/1024.0 #for larger window sizes, we should be downscaling, #and don't want to wait too long for those anyway: ref_damage_latency = 0.010 + 0.025 * (1+mathlog(max(1, mpixels))) #abs: try to never go higher than 5 times reference latency: dam_lat_abs = max(0, ((statistics.avg_damage_in_latency or 0)-ref_damage_latency) / (ref_damage_latency * 4.0)) #calculate a target latency and try to get close to it avg_delay = batch.delay delays = list(batch.last_actual_delays) if len(delays)>0: #average recent actual delay: avg_delay = time_weighted_average(delays) #and average that with the current delay (which is lower or equal): frame_delay = (avg_delay + batch.delay) / 2.0 #ensure we always spend at least as much time encoding as we spend batching: #(one frame encoding whilst one frame is batching is our ideal result) target_damage_latency = max(ref_damage_latency, frame_delay/1000.0) #current speed: speed = min_speed if len(speed_data)>0: speed = max(min_speed, time_weighted_average(speed_data)) #rel: do we need to increase or decrease speed to reach the target: dam_lat_rel = speed/100.0 * statistics.avg_damage_in_latency / target_damage_latency #ensure we decode at a reasonable speed (for slow / low-power clients) #maybe this should be configurable? target_decode_speed = 8*1000*1000.0 #8 MPixels/s dec_lat = 0.0 if statistics.avg_decode_speed>0: dec_lat = target_decode_speed/(statistics.avg_decode_speed or target_decode_speed) #if we have more pixels to encode, we may need to go faster #(this is important because the damage latency used by the other factors # may aggregate multiple damage requests into one packet - which may skip frames) #TODO: reconcile this with video regions #only count the last second's worth: now = time.time() lim = now-1.0 lde = [w*h for t,_,_,w,h in list(statistics.last_damage_events) if t>=lim] pixels = sum(lde) mpixels_per_s = pixels/1024.0/1024.0 pps = 0.0 if len(lde)>5: #above 50 MPixels/s, we should reach 100% speed #(even x264 peaks at tens of MPixels/s) pps = mpixels_per_s/50.0 #combine factors: use the highest one: target = min(1.0, max(dam_lat_abs, dam_lat_rel, dec_lat, pps, 0.0)) #scale target between min_speed and 100: ms = min(100.0, max(min_speed, 0.0)) target_speed = int(ms + (100.0-ms) * target) #expose data we used: info = { "low_limit" : int(low_limit), "min_speed" : int(min_speed), "frame_delay" : int(frame_delay), "mpixels" : int(mpixels_per_s), "damage_latency" : { "ref" : int(1000.0*ref_damage_latency), "avg" : int(1000.0*statistics.avg_damage_in_latency), "target" : int(1000.0*target_damage_latency), "abs_factor" : int(100.0*dam_lat_abs), "rel_factor" : int(100.0*dam_lat_rel), }, "decoding_latency" : { "target" : int(target_decode_speed), "factor" : int(100.0*dec_lat), }, } return info, target_speed
def __filter_w_control_v2 (self, sf,peak_info, treatment, control, pass_sregion=False, write2wig= False, fake_when_missing=False, to_small_sample=False ): """Use control data to calculate several lambda values around 1k, 5k and 10k region around peak summit. Choose the highest one as local lambda, then calculate p-value in poisson distribution. Parameters: 1. pass_sregion: If set True, the slocal lambda will be ignored. Use this when the control is not available. 2. write2wig: obselete 3. fake_when_missing: when a chromosome is missing in control but existing in IP or vice versa, MACS will fake a tag to pass the process. 4. to_small_sample: when set as True, balance the number of tags by linearly scaling larger sample to smaller sample. The default behaviour is to linearly scale smaller to larger one. Return value type in this format: a dictionary key value : chromosome items : array of (peak_start,peak_end,peak_length,peak_summit,peak_height,peak_num_tags,peak_pvalue,peak_fold_enrichment) """ lambda_bg0 = float(self.scan_window)*treatment.total/self.gsize # bug fixed... # if treatment.total>control.total: # t_ratio = 1.00 # c_ratio = float(treatment.total)/control.total # else: # t_ratio = float(control.total)/treatment.total # c_ratio = 1.00 # if to_small_sample: # tmp = t_ratio # t_ratio = 1/c_ratio # c_ratio = 1/tmp t_ratio = sf[0] c_ratio = sf[1] # self.info("t_ratio %s" % (t_ratio)) # self.info("c_ratio %s" % (c_ratio)) final_peak_info = {} chrs = peak_info.keys() chrs.sort() total = 0 for chrom in chrs: # self.info("#3 Chromosome %s" % (chrom)) n_chrom = 0 final_peak_info[chrom] = [] peak_list = peak_info[chrom] try: (ctags,ccnts) = control.get_locations_by_chr_v2(chrom) #ccnts = control.get_counts_by_chr(chrom,0) except: self.warn("Missing %s data, skip it..." % (chrom)) if fake_when_missing: ctags = [-1,] ccnts = [-1,] self.warn("Fake a tag at %s:%d" % (chrom,-1)) tmp=[] else: continue try: (ttags,tcnts) = treatment.get_locations_by_chr_v2(chrom) #tcnts = treatment.get_counts_by_chr(chrom,0) except: self.warn("Missing %s data, skip it..." % (chrom)) if fake_when_missing: ttags = [-1,] tcnts = [-1,] self.warn("Fake a tag at %s:%d" % (chrom,-1)) tmp=[] else: continue # self.info("ttags size %d" % (len(ttags))) # self.info("ctags size %d" % (len(ttags))) # self.info("ttags size %d" % (len(tcnts))) # self.info("ctags size %d" % (len(ccnts))) index_ctag = 0 # index for control tags index_ttag = 0 # index for treatment tags flag_find_ctag_locally = False flag_find_ttag_locally = False prev_index_ctag = 0 prev_index_ttag = 0 len_ctags =len(ctags) len_ttags =len(ttags) for i in range(len(peak_list)): (peak_start,peak_end,peak_length,peak_summit,peak_height,peak_num_tags) = peak_list[i] #window_size_4_lambda = min(self.first_lambda_region,max(peak_length,self.scan_window)) window_size_4_lambda = max(peak_length,self.scan_window) lambda_bg = lambda_bg0/self.scan_window*window_size_4_lambda*t_ratio if self.nolambda: # skip local lambda local_lambda = lambda_bg tlambda_peak = float(peak_num_tags)/peak_length*window_size_4_lambda else: left_peak = peak_start+self.shift_size # go to middle point of the first fragment right_peak = peak_end-self.shift_size # go to middle point of the last fragment left_lregion = peak_summit-self.lregion/2 left_sregion = peak_summit-self.sregion/2 right_lregion = peak_summit+self.lregion/2 right_sregion = peak_summit+self.sregion/2 #(cnum_10k,cnum_5k,cnum_1k,cnum_peak) = (0,0,0,0) #(tnum_10k,tnum_5k,tnum_1k,tnum_peak) = (0,0,0,0) (cnum_sregion, cnum_lregion, cnum_peak, tnum_sregion, tnum_lregion, tnum_peak) = (0,0,0,0,0,0) #smallest = min(left_peak,left_10k,left_5k,left_1k) #largest = max(right_peak,right_10k,right_5k,right_1k) while index_ctag < len_ctags: if ctags[index_ctag] < left_lregion: # go to next control tag index_ctag+=1 elif index_ctag+1 >= len_ctags or right_lregion < ctags[index_ctag]: # If move outof the lregion or reach the chromosome end # finalize and go to next peak region # Thanks to Jake Biesinger flag_find_ctag_locally = False index_ctag = prev_index_ctag break else: if not flag_find_ctag_locally: flag_find_ctag_locally = True prev_index_ctag = index_ctag p = ctags[index_ctag] c = ccnts[index_ctag] if left_peak <= p <= right_peak: cnum_peak += c if left_sregion <= p <= right_sregion: cnum_sregion +=c cnum_lregion +=c else: cnum_lregion += c index_ctag += 1 # go to next tag while index_ttag < len_ttags: if ttags[index_ttag] < left_lregion: # go to next treatment tag index_ttag+=1 elif index_ttag+1 >= len_ttags or right_lregion < ttags[index_ttag]: # If move outof the lregion or reach the chromosome end # finalize and go to next peak region # Thanks to Jake Biesinger flag_find_ttag_locally = False index_ttag = prev_index_ttag break else: if not flag_find_ttag_locally: flag_find_ttag_locally = True prev_index_ttag = index_ttag p = ttags[index_ttag] c = tcnts[index_ttag] if left_peak <= p <= right_peak: tnum_peak +=c if left_sregion <= p <= right_sregion: tnum_sregion +=c tnum_lregion += c else: tnum_lregion += c index_ttag += 1 # go to next tag clambda_peak = float(cnum_peak)/peak_length*c_ratio*window_size_4_lambda clambda_lregion = float(cnum_lregion)/self.lregion*c_ratio*window_size_4_lambda clambda_sregion = float(cnum_sregion)/self.sregion*c_ratio*window_size_4_lambda tlambda_peak = float(tnum_peak)/peak_length*t_ratio*window_size_4_lambda tlambda_lregion = float(tnum_lregion)/self.lregion*t_ratio*window_size_4_lambda tlambda_sregion = float(tnum_sregion)/self.sregion*t_ratio*window_size_4_lambda if pass_sregion: # for experiment w/o control, peak region lambda and sregion region lambda are ignored! local_lambda = max(lambda_bg,tlambda_lregion) else: # for experiment w/ control local_lambda = max(lambda_bg,clambda_peak,clambda_lregion,clambda_sregion) #print(local_lambda) # if local_lambda == 0 : # local_lambda = 0.001 p_tmp = poisson_cdf(tlambda_peak,local_lambda,lower=False) if p_tmp <= 0: peak_pvalue = 3100 else: peak_pvalue = mathlog(p_tmp,10) * -10 if peak_pvalue > self.pvalue: n_chrom += 1 total += 1 peak_fold_enrichment = float(peak_height)/local_lambda*window_size_4_lambda/self.d final_peak_info[chrom].append((peak_start,peak_end,peak_length,peak_summit,peak_height,peak_num_tags,peak_pvalue,peak_fold_enrichment)) # uncomment the following two lines, MACS will report the peaks been rejected. #else: # #self.debug("Reject the peak at %s:%d-%d with local_lambda: %.2f and -log10pvalue: %.2f" % (chrom,peak_start,peak_end,local_lambda,peak_pvalue)) self.debug("#3 peaks whose pvalue < cutoff: %d" % (n_chrom)) self.info("#3 Finally, %d peaks are called!" % (total)) return final_peak_info
def _ispow2(i): v = mathlog(i, 2) return v == int(v)
def get_target_speed(wid, window_dimensions, batch, global_statistics, statistics, min_speed, speed_data): low_limit = get_low_limit(global_statistics.mmap_size>0, window_dimensions) #*********************************************************** # encoding speed: # 0 for highest compression/slower # 100 for lowest compression/fast # here we try to minimize damage-latency and client decoding speed #megapixels per second: mpixels = low_limit/1024.0/1024.0 #for larger window sizes, we should be downscaling, #and don't want to wait too long for those anyway: ref_damage_latency = 0.010 + 0.025 * (1+mathlog(max(1, mpixels))) #abs: try to never go higher than 5 times reference latency: dam_lat_abs = max(0, ((statistics.avg_damage_in_latency or 0)-ref_damage_latency) / (ref_damage_latency * 4.0)) #calculate a target latency and try to get close to it avg_delay = batch.delay delays = list(batch.last_actual_delays) if len(delays)>0: #average recent actual delay: avg_delay = time_weighted_average(delays) #and average that with the current delay (which is lower or equal): frame_delay = (avg_delay + batch.delay) / 2.0 #ensure we always spend at least as much time encoding as we spend batching: #(one frame encoding whilst one frame is batching is our ideal result) target_damage_latency = max(ref_damage_latency, frame_delay/1000.0) #current speed: speed = min_speed if len(speed_data)>0: speed = max(min_speed, time_weighted_average(speed_data)) #rel: do we need to increase or decrease speed to reach the target: dam_lat_rel = speed/100.0 * statistics.avg_damage_in_latency / target_damage_latency #ensure we decode at a reasonable speed (for slow / low-power clients) #maybe this should be configurable? target_decode_speed = 8*1000*1000.0 #8 MPixels/s dec_lat = 0.0 if statistics.avg_decode_speed: dec_lat = target_decode_speed/(statistics.avg_decode_speed or target_decode_speed) #combine factors: use the highest one: target = min(1.0, max(dam_lat_abs, dam_lat_rel, dec_lat, 0.0)) #scale target between min_speed and 100: ms = min(100.0, max(min_speed, 0.0)) target_speed = int(ms + (100.0-ms) * target) #expose data we used: info = { "low_limit" : int(low_limit), "min_speed" : int(min_speed), "frame_delay" : int(frame_delay), "damage_latency.ref" : int(1000.0*ref_damage_latency), "damage_latency.avg" : int(1000.0*statistics.avg_damage_in_latency), "damage_latency.target" : int(1000.0*target_damage_latency), "damage_latency.abs_factor" : int(100.0*dam_lat_abs), "damage_latency.rel_factor" : int(100.0*dam_lat_rel), "decoding_latency.target" : int(target_decode_speed), "decoding_latency.factor" : int(100.0*dec_lat), } return info, target_speed
def _set_sizeOfLocalColorTable(self): num = self[1].get_num() if num > 0: return int(mathlog(num, 2) - 1) else: return 0
def get_target_speed(window_dimensions, batch, global_statistics, statistics, bandwidth_limit, min_speed, speed_data): low_limit = get_low_limit(global_statistics.mmap_size > 0, window_dimensions) #*********************************************************** # encoding speed: # 0 for highest compression/slower # 100 for lowest compression/fast # here we try to minimize damage-latency and client decoding speed #backlog factor: _, pixels_backlog, _ = statistics.get_client_backlog() pb_ratio = pixels_backlog / low_limit pixels_bl_s = 100 - int( 100 * logp(pb_ratio / 4)) #4 frames behind or more -> compress more #megapixels per second: mpixels = low_limit / 1024.0 / 1024.0 #for larger window sizes, we should be downscaling, #and don't want to wait too long for those anyway: ref_damage_latency = (10 + 25 * (1 + mathlog(max(1, mpixels)))) / 1000.0 adil = statistics.avg_damage_in_latency or 0 #abs: try to never go higher than N times the reference latency: dam_lat_abs = max(0, (adil - ref_damage_latency)) / (ref_damage_latency * 3) if batch.locked: target_damage_latency = ref_damage_latency dam_lat_rel = 0 frame_delay = 0 dam_lat_s = 100 else: #calculate a target latency and try to get close to it avg_delay = batch.delay delays = tuple(batch.last_actual_delays) if delays: #average recent actual delay: avg_delay = time_weighted_average(delays) #and average that with the current delay (which is lower or equal): frame_delay = max(10, int((avg_delay + batch.delay) // 2)) #ensure we always spend at least as much time encoding as we spend batching: #(one frame encoding whilst one frame is batching is our ideal result) target_damage_latency = max(ref_damage_latency, frame_delay / 1000.0) dam_target_speed = min_speed if speed_data: dam_target_speed = max(min_speed, time_weighted_average(speed_data)) #rel: do we need to increase speed to reach the target: dam_lat_rel = dam_target_speed / 100.0 * adil / target_damage_latency #cap the speed if we're delaying frames longer than we should: #(so we spend more of that time compressing them better instead): dam_lat_s = int(100 * 2 * ref_damage_latency * 1000 // frame_delay) #if we have more pixels to encode, we may need to go faster #(this is important because the damage latency used by the other factors # may aggregate multiple damage requests into one packet - which may skip frames) #TODO: reconcile this with video regions #only count the last second's worth: now = monotonic() lim = now - 1.0 lde = tuple(w * h for t, _, _, w, h in tuple(statistics.last_damage_events) if t >= lim) pixels = sum(lde) mpixels_per_s = pixels / (1024 * 1024) pps = 0.0 pixel_rate_s = 100 if len(lde) > 5 and mpixels_per_s >= 1: #above 50 MPixels/s, we should reach 100% speed #(even x264 peaks at tens of MPixels/s) pps = sqrt(mpixels_per_s / 50.0) #if there aren't many pixels, #we can spend more time compressing them better: #(since it isn't going to cost too much to compress) #ie: 2MPixels/s -> max_speed=60% pixel_rate_s = 20 + int(mpixels_per_s * 20) bandwidth_s = 100 if bandwidth_limit > 0: #below N Mbps, lower the speed ceiling, #so we will compress better: N = 10 bandwidth_s = int(100 * sqrt(bandwidth_limit / (N * 1000 * 1000))) gcv = global_statistics.congestion_value congestion_s = 100 if gcv > 0: #apply strict limit for congestion events: congestion_s = max(0, int(100 - gcv * 1000)) #ensure we decode at a reasonable speed (for slow / low-power clients) #maybe this should be configurable? min_decode_speed = 1 * 1000 * 1000 #MPixels/s ads = statistics.avg_decode_speed or 0 dec_lat = 0 if ads > 0: dec_lat = min_decode_speed / ads ms = min(100, max(min_speed, 0)) max_speed = max( ms, min(pixels_bl_s, dam_lat_s, pixel_rate_s, bandwidth_s, congestion_s)) #combine factors: use the highest one: target = min(1, max(dam_lat_abs, dam_lat_rel, dec_lat, pps, 0)) #scale target between min_speed and 100: speed = int(ms + (100 - ms) * target) speed = max(ms, min(max_speed, speed)) #expose data we used: info = { "low-limit": int(low_limit), "max-speed": int(max_speed), "min-speed": int(min_speed), "factors": { "damage-latency-abs": int(dam_lat_abs * 100), "damage-latency-rel": int(dam_lat_rel * 100), "decoding-latency": int(dec_lat * 100), "pixel-rate": int(pps * 100), }, "limits": { "backlog": pixels_bl_s, "damage-latency": dam_lat_s, "pixel-rate": pixel_rate_s, "bandwidth-limit": bandwidth_s, "congestion": congestion_s, }, } return info, int(speed), max_speed
def logp(x): return mathlog(1.0 + x) / 2.0
def get_target_speed(wid, window_dimensions, batch, global_statistics, statistics, min_speed, speed_data): low_limit = get_low_limit(global_statistics.mmap_size > 0, window_dimensions) #*********************************************************** # encoding speed: # 0 for highest compression/slower # 100 for lowest compression/fast # here we try to minimize damage-latency and client decoding speed #megapixels per second: mpixels = low_limit / 1024.0 / 1024.0 #for larger window sizes, we should be downscaling, #and don't want to wait too long for those anyway: ref_damage_latency = 0.010 + 0.025 * (1 + mathlog(max(1, mpixels))) #abs: try to never go higher than 5 times reference latency: dam_lat_abs = max( 0, ((statistics.avg_damage_in_latency or 0) - ref_damage_latency) / (ref_damage_latency * 4.0)) #calculate a target latency and try to get close to it avg_delay = batch.delay delays = list(batch.last_actual_delays) if len(delays) > 0: #average recent actual delay: avg_delay = time_weighted_average(delays) #and average that with the current delay (which is lower or equal): frame_delay = (avg_delay + batch.delay) / 2.0 #ensure we always spend at least as much time encoding as we spend batching: #(one frame encoding whilst one frame is batching is our ideal result) target_damage_latency = max(ref_damage_latency, frame_delay / 1000.0) #current speed: speed = min_speed if len(speed_data) > 0: speed = max(min_speed, time_weighted_average(speed_data)) #rel: do we need to increase or decrease speed to reach the target: dam_lat_rel = speed / 100.0 * statistics.avg_damage_in_latency / target_damage_latency #ensure we decode at a reasonable speed (for slow / low-power clients) #maybe this should be configurable? target_decode_speed = 8 * 1000 * 1000.0 #8 MPixels/s dec_lat = 0.0 if statistics.avg_decode_speed > 0: dec_lat = target_decode_speed / (statistics.avg_decode_speed or target_decode_speed) #if we have more pixels to encode, we may need to go faster #(this is important because the damage latency used by the other factors # may aggregate multiple damage requests into one packet - which may skip frames) #TODO: reconcile this with video regions #only count the last second's worth: now = time.time() lim = now - 1.0 lde = [ w * h for t, _, _, w, h in list(statistics.last_damage_events) if t >= lim ] pixels = sum(lde) mpixels_per_s = pixels / 1024.0 / 1024.0 pps = 0.0 if len(lde) > 5: #above 50 MPixels/s, we should reach 100% speed #(even x264 peaks at tens of MPixels/s) pps = mpixels_per_s / 50.0 #combine factors: use the highest one: target = min(1.0, max(dam_lat_abs, dam_lat_rel, dec_lat, pps, 0.0)) #scale target between min_speed and 100: ms = min(100.0, max(min_speed, 0.0)) target_speed = int(ms + (100.0 - ms) * target) #expose data we used: info = { "low_limit": int(low_limit), "min_speed": int(min_speed), "frame_delay": int(frame_delay), "mpixels": int(mpixels_per_s), "damage_latency": { "ref": int(1000.0 * ref_damage_latency), "avg": int(1000.0 * statistics.avg_damage_in_latency), "target": int(1000.0 * target_damage_latency), "abs_factor": int(100.0 * dam_lat_abs), "rel_factor": int(100.0 * dam_lat_rel), }, "decoding_latency": { "target": int(target_decode_speed), "factor": int(100.0 * dec_lat), }, } return info, target_speed
def __filter_w_control (self, peak_info, treatment, control, treat2control_ratio, pass_1k=False, write2wig= False, fake_when_missing=False ): """Use control data to calculate several lambda values around 1k, 5k and 10k region around peak summit. Choose the highest one as local lambda, then calculate p-value in poisson distribution. Return value type in this format: a dictionary key value : chromosome items : array of (peak_start,peak_end,peak_length,peak_summit,peak_height,peak_num_tags,peak_pvalue,peak_fold_enrichment) """ final_peak_info = {} chrs = peak_info.keys() chrs.sort() total = 0 for chrom in chrs: self.debug("#3 Chromosome %s" % (chrom)) n_chrom = 0 final_peak_info[chrom] = [] peak_list = peak_info[chrom] try: (ctags,tmp) = control.get_ranges_by_chr(chrom) except: self.warn("Missing %s data, skip it..." % (chrom)) if fake_when_missing: ctags = [-1,] self.warn("Fake a tag at %s:%d" % (chrom,-1)) tmp=[] else: continue try: (ttags,tmp) = treatment.get_ranges_by_chr(chrom) except: self.warn("Missing %s data, skip it..." % (chrom)) if fake_when_missing: ttags = [-1,] self.warn("Fake a tag at %s:%d" % (chrom,-1)) tmp=[] else: continue index_ctag = 0 # index for control tags index_ttag = 0 # index for treatment tags flag_find_ctag_locally = False flag_find_ttag_locally = False prev_index_ctag = 0 prev_index_ttag = 0 len_ctags =len(ctags) len_ttags =len(ttags) for i in range(len(peak_list)): (peak_start,peak_end,peak_length,peak_summit,peak_height,peak_num_tags) = peak_list[i] #window_size_4_lambda = min(self.first_lambda_region,max(peak_length,self.scan_window)) window_size_4_lambda = max(peak_length,self.scan_window) lambda_bg = self.lambda_bg/self.scan_window*window_size_4_lambda if self.nolambda: # skip local lambda local_lambda = lambda_bg tlambda_peak = float(peak_num_tags)/peak_length*window_size_4_lambda else: left_peak = peak_start+self.shift_size # go to middle point of the first fragment right_peak = peak_end-self.shift_size # go to middle point of the last fragment left_10k = peak_summit-self.third_lambda_region/2 left_5k = peak_summit-self.second_lambda_region/2 left_1k = peak_summit-self.first_lambda_region/2 right_10k = peak_summit+self.third_lambda_region/2 right_5k = peak_summit+self.second_lambda_region/2 right_1k = peak_summit+self.first_lambda_region/2 (cnum_10k,cnum_5k,cnum_1k,cnum_peak) = (0,0,0,0) (tnum_10k,tnum_5k,tnum_1k,tnum_peak) = (0,0,0,0) smallest = min(left_peak,left_10k,left_5k,left_1k) largest = max(right_peak,right_10k,right_5k,right_1k) while index_ctag < len_ctags: if ctags[index_ctag] < smallest: # go to next control tag index_ctag+=1 elif largest < ctags[index_ctag]: # finalize and go to next peak region flag_find_ctag_locally = False index_ctag = prev_index_ctag break else: if not flag_find_ctag_locally: flag_find_ctag_locally = True prev_index_ctag = index_ctag p = ctags[index_ctag] if left_peak <= p <= right_peak: cnum_peak +=1 if left_1k <= p <= right_1k: cnum_10k +=1 cnum_5k +=1 cnum_1k +=1 elif left_5k <= p <= right_5k: cnum_10k +=1 cnum_5k += 1 elif left_10k <= p <= right_10k: cnum_10k += 1 index_ctag += 1 # go to next tag while index_ttag < len_ttags: if ttags[index_ttag] < smallest: # go to next treatment tag index_ttag+=1 elif largest < ttags[index_ttag]: # finalize and go to next peak region flag_find_ttag_locally = False index_ttag = prev_index_ttag break else: if not flag_find_ttag_locally: flag_find_ttag_locally = True prev_index_ttag = index_ttag p = ttags[index_ttag] if left_peak <= p <= right_peak: tnum_peak +=1 if left_1k <= p <= right_1k: tnum_10k +=1 tnum_5k +=1 tnum_1k +=1 elif left_5k <= p <= right_5k: tnum_10k +=1 tnum_5k += 1 elif left_10k <= p <= right_10k: tnum_10k += 1 index_ttag += 1 # go to next tag clambda_peak = float(cnum_peak)/peak_length*treat2control_ratio*window_size_4_lambda clambda_10k = float(cnum_10k)/self.third_lambda_region*treat2control_ratio*window_size_4_lambda clambda_5k = float(cnum_5k)/self.second_lambda_region*treat2control_ratio*window_size_4_lambda clambda_1k = float(cnum_1k)/self.first_lambda_region*treat2control_ratio*window_size_4_lambda tlambda_peak = float(tnum_peak)/peak_length*window_size_4_lambda tlambda_10k = float(tnum_10k)/self.third_lambda_region*window_size_4_lambda tlambda_5k = float(tnum_5k)/self.second_lambda_region*window_size_4_lambda tlambda_1k = float(tnum_1k)/self.first_lambda_region*window_size_4_lambda if pass_1k: # for experiment w/o control, peak region lambda and 1k region lambda are ignored! local_lambda = max(lambda_bg,tlambda_10k,tlambda_5k,clambda_10k,clambda_5k) else: # for experiment w/ control if self.futurefdr: local_lambda = max(lambda_bg,tlambda_10k,tlambda_5k,clambda_peak,clambda_10k,clambda_5k,clambda_1k) else: local_lambda = max(lambda_bg,clambda_peak,clambda_10k,clambda_5k,clambda_1k) p_tmp = poisson_cdf(tlambda_peak,local_lambda,lower=False) if p_tmp <= 0: peak_pvalue = 3100 else: peak_pvalue = mathlog(p_tmp,10) * -10 if peak_pvalue > self.pvalue: n_chrom += 1 total += 1 peak_fold_enrichment = float(peak_height)/local_lambda*window_size_4_lambda/self.d final_peak_info[chrom].append((peak_start,peak_end,peak_length,peak_summit,peak_height,peak_num_tags,peak_pvalue,peak_fold_enrichment)) self.debug("#3 peaks whose pvalue < cutoff: %d" % (n_chrom)) self.info("#3 Finally, %d peaks are called!" % (total)) return final_peak_info
def __filter_w_control(self, peak_info, treatment, control, pass_sregion=False, write2wig=False, fake_when_missing=False, to_small_sample=False): """Use control data to calculate several lambda values around 1k, 5k and 10k region around peak summit. Choose the highest one as local lambda, then calculate p-value in poisson distribution. Parameters: 1. pass_sregion: If set True, the slocal lambda will be ignored. Use this when the control is not available. 2. write2wig: obselete 3. fake_when_missing: when a chromosome is missing in control but existing in IP or vice versa, MACS will fake a tag to pass the process. 4. to_small_sample: when set as True, balance the number of tags by linearly scaling larger sample to smaller sample. The default behaviour is to linearly scale smaller to larger one. Return value type in this format: a dictionary key value : chromosome items : array of (peak_start,peak_end,peak_length,peak_summit,peak_height,peak_num_tags,peak_pvalue,peak_fold_enrichment) """ lambda_bg0 = float( self.scan_window) * treatment.total / self.gsize # bug fixed... if treatment.total > control.total: t_ratio = 1.00 c_ratio = float(treatment.total) / control.total else: t_ratio = float(control.total) / treatment.total c_ratio = 1.00 if to_small_sample: tmp = t_ratio t_ratio = 1 / c_ratio c_ratio = 1 / tmp final_peak_info = {} chrs = peak_info.keys() chrs.sort() total = 0 for chrom in chrs: self.debug("#3 Chromosome %s" % (chrom)) n_chrom = 0 final_peak_info[chrom] = [] peak_list = peak_info[chrom] try: (ctags, tmp) = control.get_locations_by_chr(chrom) except: self.warn("Missing %s data, skip it..." % (chrom)) if fake_when_missing: ctags = [ -1, ] self.warn("Fake a tag at %s:%d" % (chrom, -1)) tmp = [] else: continue try: (ttags, tmp) = treatment.get_locations_by_chr(chrom) except: self.warn("Missing %s data, skip it..." % (chrom)) if fake_when_missing: ttags = [ -1, ] self.warn("Fake a tag at %s:%d" % (chrom, -1)) tmp = [] else: continue index_ctag = 0 # index for control tags index_ttag = 0 # index for treatment tags flag_find_ctag_locally = False flag_find_ttag_locally = False prev_index_ctag = 0 prev_index_ttag = 0 len_ctags = len(ctags) len_ttags = len(ttags) for i in range(len(peak_list)): (peak_start, peak_end, peak_length, peak_summit, peak_height, peak_num_tags) = peak_list[i] #window_size_4_lambda = min(self.first_lambda_region,max(peak_length,self.scan_window)) window_size_4_lambda = max(peak_length, self.scan_window) lambda_bg = lambda_bg0 / self.scan_window * window_size_4_lambda if self.nolambda: # skip local lambda local_lambda = lambda_bg tlambda_peak = float( peak_num_tags) / peak_length * window_size_4_lambda else: left_peak = peak_start + self.shift_size # go to middle point of the first fragment right_peak = peak_end - self.shift_size # go to middle point of the last fragment left_lregion = peak_summit - self.lregion / 2 left_sregion = peak_summit - self.sregion / 2 right_lregion = peak_summit + self.lregion / 2 right_sregion = peak_summit + self.sregion / 2 #(cnum_10k,cnum_5k,cnum_1k,cnum_peak) = (0,0,0,0) #(tnum_10k,tnum_5k,tnum_1k,tnum_peak) = (0,0,0,0) (cnum_sregion, cnum_lregion, cnum_peak, tnum_sregion, tnum_lregion, tnum_peak) = (0, 0, 0, 0, 0, 0) #smallest = min(left_peak,left_10k,left_5k,left_1k) #largest = max(right_peak,right_10k,right_5k,right_1k) while index_ctag < len_ctags: if ctags[index_ctag] < left_lregion: # go to next control tag index_ctag += 1 elif index_ctag + 1 >= len_ctags or right_lregion < ctags[ index_ctag]: # If move outof the lregion or reach the chromosome end # finalize and go to next peak region # Thanks to Jake Biesinger flag_find_ctag_locally = False index_ctag = prev_index_ctag break else: if not flag_find_ctag_locally: flag_find_ctag_locally = True prev_index_ctag = index_ctag p = ctags[index_ctag] if left_peak <= p <= right_peak: cnum_peak += 1 if left_sregion <= p <= right_sregion: cnum_sregion += 1 cnum_lregion += 1 else: cnum_lregion += 1 index_ctag += 1 # go to next tag while index_ttag < len_ttags: if ttags[index_ttag] < left_lregion: # go to next treatment tag index_ttag += 1 elif index_ttag + 1 >= len_ttags or right_lregion < ttags[ index_ttag]: # If move outof the lregion or reach the chromosome end # finalize and go to next peak region # Thanks to Jake Biesinger flag_find_ttag_locally = False index_ttag = prev_index_ttag break else: if not flag_find_ttag_locally: flag_find_ttag_locally = True prev_index_ttag = index_ttag p = ttags[index_ttag] if left_peak <= p <= right_peak: tnum_peak += 1 if left_sregion <= p <= right_sregion: tnum_sregion += 1 tnum_lregion += 1 else: tnum_lregion += 1 index_ttag += 1 # go to next tag clambda_peak = float( cnum_peak ) / peak_length * c_ratio * window_size_4_lambda clambda_lregion = float( cnum_lregion ) / self.lregion * c_ratio * window_size_4_lambda clambda_sregion = float( cnum_sregion ) / self.sregion * c_ratio * window_size_4_lambda tlambda_peak = float( tnum_peak ) / peak_length * t_ratio * window_size_4_lambda tlambda_lregion = float( tnum_lregion ) / self.lregion * t_ratio * window_size_4_lambda tlambda_sregion = float( tnum_sregion ) / self.sregion * t_ratio * window_size_4_lambda if pass_sregion: # for experiment w/o control, peak region lambda and sregion region lambda are ignored! local_lambda = max(lambda_bg, tlambda_lregion) else: # for experiment w/ control local_lambda = max(lambda_bg, clambda_peak, clambda_lregion, clambda_sregion) p_tmp = poisson_cdf(tlambda_peak, local_lambda, lower=False) if p_tmp <= 0: peak_pvalue = 3100 else: peak_pvalue = mathlog(p_tmp, 10) * -10 if peak_pvalue > self.pvalue: n_chrom += 1 total += 1 peak_fold_enrichment = float( peak_height ) / local_lambda * window_size_4_lambda / self.d final_peak_info[chrom].append( (peak_start, peak_end, peak_length, peak_summit, peak_height, peak_num_tags, peak_pvalue, peak_fold_enrichment)) # uncomment the following two lines, MACS will report the peaks been rejected. #else: # #self.debug("Reject the peak at %s:%d-%d with local_lambda: %.2f and -log10pvalue: %.2f" % (chrom,peak_start,peak_end,local_lambda,peak_pvalue)) self.debug("#3 peaks whose pvalue < cutoff: %d" % (n_chrom)) self.info("#3 Finally, %d peaks are called!" % (total)) return final_peak_info
def logp(x): return mathlog(1.0+x, sqrt2)/2.0