def log2_oulierfilter(df_by_cell, plot=False): log2_df = np.log2(df_by_cell+1) top_log2 = find_top_common_genes(log2_df) if top_log2.empty: print("no common genes found") return log2_df, log2_df.transpose() log2_df2= pd.DataFrame(pd.to_numeric(log2_df, errors='coerce')) log_mean = top_log2.mean(axis=0).sort_values(ascending=False) log2_sorted = top_log2.reindex_axis(top_log2.mean(axis=0).sort_values(ascending=False).index, axis=1) xticks = [] keep_col= [] log2_cutoff = np.average(log2_sorted)-np.std(log2_sorted) avg_cutoff = np.average(log2_cutoff) for col, m in zip(log2_sorted.columns.tolist(),log2_sorted.mean()): if m > avg_cutoff: keep_col.append(col) xticks.append(col+' '+str("%.2f" % m)) filtered_df_by_cell = df_by_cell[keep_col] filtered_df_by_gene = filtered_df_by_cell.transpose() filtered_log2 = np.log2(filtered_df_by_cell[filtered_df_by_cell>0]) if plot: ax = sns.boxplot(data=filtered_log2, whis= .75, notch=True) ax = sns.stripplot(x=filtered_log2.columns.values, y=filtered_log2.mean(axis=0), size=4, jitter=True, edgecolor="gray") xtickNames = plt.setp(ax, xticklabels=xticks) plt.setp(xtickNames, rotation=90, fontsize=9) plt.show() plt.clf() sns.distplot(filtered_log2.mean()) plt.show() log2_expdf_cell = np.log2(filtered_df_by_cell+1) log2_expdf_gene = log2_expdf_cell.transpose() return log2_expdf_cell, log2_expdf_gene
def __add__(self, other): """ Addition in real space; an optimization of Manning & Schuetze, p. 337 (eq. 9.21) >>> a_real = .5 >>> b_real = .25 >>> a_bw = BitWeight(a_real) >>> b_bw = BitWeight(b_real) >>> BitWeight.close_enough((a_bw + b_bw).to_real, a_real + b_real) True >>> (BitWeight(.25) + BitWeight(.25)).to_real 0.5 """ other_bw = other if hasattr(other, "bw") else BitWeight(other) if other_bw.bw - self.bw > self.BIG: to_return = self.bw elif self.bw - other_bw.bw > self.BIG: to_return = other_bw.bw else: if other_bw.bw > self.bw: to_return = other_bw.bw - log2(1.0 + exp2(other_bw.bw - self.bw)) elif other_bw.bw < self.bw: to_return = self.bw - log2(exp2(self.bw - other_bw.bw) + 1.0) else: to_return = other_bw.bw - 1.0 # not 1 + x_bw.bw as you might think, as BWs are # NEGATIVE log-weights return BitWeight(to_return, True)
def SB_MotifTwo(y,binarizeHow='diff'): """ Looks at local motifs in a binary symbolization of the time series, which is performed by a given binarization method Arguments --------- y: a nitime time-series object, or numpy vector """ # Make the input a row vector of numbers: y = makeRowVector(vectorize(y)) # Make binarization on incremental differences: if binarizeHow == 'diff': yBin = ((np.sign(np.diff(y)))+1.)/2. else: raise ValueError(binarizeHow) # Initialize output dictionary out = {} # Where the difference is 0, 1 r0 = yBin==0 r1 = yBin==1 out['u'] = np.mean(r1) out['d'] = np.mean(r0) out['h'] = -(out['u']*np.log2(out['u']) + out['d']*np.log2(out['d'])) return out
def Classification(theta, tag_dict, feature_dict, features): """ Predict the tag with the value of theta for naive bayes model theta value is used to predict the tag """ total = sum(tag_dict.values()) prob = [] for tag in range(2): tag = str(tag) probability = np.log2(tag_dict[tag] / total) searchFlag = 1 for featureID in range(len(features)): feature = features[featureID] item = (tag, feature, featureID) if item not in feature_dict: searchFlag = 0 break tmpProb = np.log2(feature_dict[item] / tag_dict[tag]) probability = probability + tmpProb prob.append(probability) if searchFlag == 0: break if searchFlag == 0: if tag == '1': predict_tag = '0' else: predict_tag = '1' else: ratio = 2.0 ** (prob[1] - prob[0]) if ratio >= theta: predict_tag = '1' else: predict_tag = '0' return predict_tag
def __cqt_filter_fft(sr, fmin, n_bins, bins_per_octave, tuning, filter_scale, norm, sparsity, hop_length=None, window='hann'): '''Generate the frequency domain constant-Q filter basis.''' basis, lengths = filters.constant_q(sr, fmin=fmin, n_bins=n_bins, bins_per_octave=bins_per_octave, tuning=tuning, filter_scale=filter_scale, norm=norm, pad_fft=True, window=window) # Filters are padded up to the nearest integral power of 2 n_fft = basis.shape[1] if (hop_length is not None and n_fft < 2.0**(1 + np.ceil(np.log2(hop_length)))): n_fft = int(2.0 ** (1 + np.ceil(np.log2(hop_length)))) # re-normalize bases with respect to the FFT window length basis *= lengths[:, np.newaxis] / float(n_fft) # FFT and retain only the non-negative frequencies fft_basis = fft.fft(basis, n=n_fft, axis=1)[:, :(n_fft // 2)+1] # sparsify the basis fft_basis = util.sparsify_rows(fft_basis, quantile=sparsity) return fft_basis, n_fft, lengths
def walk(self, Q, freq_base=A0, freq_max=C8, hop=0.02, start=0, end=None, join_channels=True, win_shape=numpy.hamming): '''''' # Q = int(Q) assert Q > 1 # samplerate = self.audio.samplerate if not freq_max: freq_max = samplerate/2.0 assert 1 <= freq_base <= freq_max <= samplerate/2.0 # step = int(samplerate * hop) win = int(round(Q * float(samplerate) / freq_base)) assert 0 < step <= win # k_max = int(numpy.log2(float(freq_max)/freq_base) \ / numpy.log2(float(Q+1)/Q)) # var = self.pre_calculate(Q, k_max, win, win_shape) print len(var.WL), var.WL fqs = [] for wl in var.WL: fqs.append("%.2f" % (float(samplerate) / wl * Q)) print fqs transform = self.transform # for samples in self.audio.walk(win, step, start, end, join_channels): if join_channels: yield transform(samples, Q, k_max, pre_var=var) else: yield [transform(ch,Q,k_max,pre_var=var) \ for ch in samples]
def mutual_info(X, Y): """ Calculate the mutual information between X and Y. Note: The input X, Y may look like this: X = [event1, event2, event3, ..., eventn] Y = [event1, event2, event3, ..., eventn] Every event_i represents the outcome of that random variable. Args: X: An array-like random variable. Y: An array-like random variable. Returns: The mutual information score between X and Y. """ X_np = np.array(X) Y_np = np.array(Y) P_t1 = np.average(X_np) P_t0 = 1 - P_t1 P_c1 = np.average(Y_np) P_c0 = 1 - P_c1 P_t1c1 = np.average(X_np & Y_np) P_t1c0 = np.average(X_np & ~Y_np) P_t0c1 = np.average(~X_np & Y_np) P_t0c0 = np.average(~X_np & ~Y_np) mi = P_t1c1 * np.log2(P_t1c1 / (P_t1 * P_c1)) +\ P_t1c0 * np.log2(P_t1c0 / (P_t1 * P_c0)) +\ P_t0c1 * np.log2(P_t0c1 / (P_t0 * P_c1)) +\ P_t0c0 * np.log2(P_t0c0 / (P_t0 * P_c0)) return mi if not np.isnan(mi) else 0
def __entropy__(self, attribute, sample, free=None): """ Calculates the entropy of a given attribute. :type attribute: str :param attribute: The attribute name. :type free: str :param free: optional -- If the attribute is dependent on other attribute. In this case, it shall be provided the name of the free attribute. :rtype: tuple :return: A tuple containing the name of the attribute alongside its entropy. """ if not free: return attribute, -1. * np.sum( map( lambda x: (float(x) / len(sample)) * np.log2(float(x) / len(sample)), Counter(map(lambda y: y.nodes[attribute].color, sample)).values() ) ) else: conditionals = Counter(map(lambda x: (x.nodes[attribute].color, x.nodes[free].color), sample)) entropy = 0. for value in set( map(lambda x: x[0], conditionals.keys())): # iterates over the values of the conditioned attribute marginal = self.__marginalize__(conditionals, value) entropy += marginal * np.log2(marginal) return (attribute, free), -1. * entropy
def hurstexp_welchper(data, samp=1.05, f_max=0, consider_fBm=False): """ These functions compute the Hurst exponent of a signal using the Welch periodogram data : your signal samp : sampling rate in Hz 1 for an fMRI series f_max: the higher frequency you want to take into account """ #data could be two dimensional(but no more...) in that cas time should #be on second position time_series = TimeSeries(data=data, sampling_rate=samp) spectral_analysis = SpectralAnalyzer(time_series) frq, pwr = spectral_analysis.psd #We need to take only the small frequency, but the exact choice is a #bit arbitrary we need to have alpha between 0 and 1 if f_max==0: masker = frq > 0 else: masker = np.all([(frq > 0), (frq < f_max)], axis=0) log2frq = np.log2(frq[masker]) log2pwr = np.log2(pwr.T[masker]) tmp = np.polyfit(log2frq, log2pwr, deg=1) if consider_fBm: return (1 - tmp[0]) / 4, {'aest': tmp[1], 'log2frq': log2frq, 'log2pwr': log2pwr} return (1 - tmp[0]) / 2, {'aest': tmp[1], 'log2frq': log2frq, 'log2pwr': log2pwr}
def __lazy_cost_function__(H, Y): result = 0.0 for i in range(0, Y.shape[0]): a = np.nan_to_num(np.log2(H[i]) * Y[i]) b = np.nan_to_num((1. - Y[i]) * np.log2((1. - H[i]))) result += a + b return result
def getBits(self,cell): zero=[-self.markerArea[i]/2. for i in [0,1]] bitx=[int(i) for i in bin(int(cell[0]))[::-1][:-2]] bity=[int(i) for i in bin(int(cell[1]))[::-1][:-2]] s0=int(np.log2(self.cellsPerBlock[0]*self.noBlocks[0])) s1=int(np.log2(self.cellsPerBlock[1]*self.noBlocks[1])) for i in range(s0-len(bitx)): bitx.append(0) for i in range(s1-len(bity)): bity.append(0) tx=np.zeros(s0,dtype=np.bool) ty=np.zeros(s1,dtype=np.bool) px=np.empty((s0,2)) py=np.empty((s1,2)) for i,b in enumerate(bitx): x=zero[0]+mod(i+1,self.noBitsX)*self.bitDistance y=zero[1]+((i+1)/self.noBitsY)*self.bitDistance px[i]=(x,y) tx[i]=b for i,b in enumerate(bity): x=zero[0]+(self.noBitsX-mod(i+1,self.noBitsX)-1)*self.bitDistance y=zero[1]+(self.noBitsY-(i+1)/self.noBitsY-1)*self.bitDistance py[i]=(x,y) ty[i]=b return px,py,tx,ty
def __init__(self, data, noDataValue): #dataValues = [x[0] for x in dataTable if x[0] <> noDataValue] #data = sorted(dataValues) d = data[data["DataValue"]!= noDataValue].describe(percentiles = [.10,.25,.5,.75,.90]) count = self.NumberofObservations = d["DataValue"]["count"] self.NumberofCensoredObservations = data[data["CensorCode"]!= "nc"].count() self.ArithemticMean = round(d["DataValue"]["mean"], 5) sumval = 0 sign = 1 for dv in data["DataValue"]: if dv == 0: sumval = sumval + numpy.log2(1) else: if dv < 0: sign = sign * -1 sumval = sumval + numpy.log2(numpy.absolute(dv)) if count > 0: self.GeometricMean = round(sign * (2 ** float(sumval / float(count))), 5) self.Maximum = round(d["DataValue"]["max"], 5) self.Minimum = round(d["DataValue"]["min"], 5) self.StandardDeviation = round(d["DataValue"]["std"], 5) self.CoefficientofVariation = round(data[data["DataValue"]!= noDataValue].var(), 5) ##Percentiles self.Percentile10 = round(d["DataValue"]["10%"], 5) self.Percentile25 = round(d["DataValue"]["25%"], 5) self.Percentile50 = round(d["DataValue"]["50%"], 5) self.Percentile75 = round(d["DataValue"]["75%"], 5) self.Percentile90 = round(d["DataValue"]["90%"], 5)
def rms_total(x, window_size=256): n_windows = int(pow(2,numpy.log2(len(x))-numpy.log2(window_size))) rms_tot = numpy.zeros(n_windows) for i in range(n_windows): w = x[i*window_size:(i+1)*window_size] rms_tot[i] = rms(w,window_size) return rms_tot
def stop(k,k_l,k_r): gain = E-T[T_min] def count(lst): return len(Counter(lst).keys()) delta = np.log2(float(3**count(k)-2)) - ( count(k)*measure(k)-count(k_l)*measure(k_l)-count(k_r)*measure(k_r)) # print(gain, (np.log2(N-1)+delta)/N) return gain<(np.log2(N-1)+delta)/N or T_min==0
def prepare_logged(x, y): """ Transform `x` and `y` to a log scale while dealing with zeros. This function scales `x` and `y` such that the points that are zero in one array are set to the min of the other array. When plotting expression data, frequently one sample will have reads in a particular feature but the other sample will not. Expression data also tends to look better on a log scale, but log(0) is undefined and therefore cannot be shown on a plot. This function allows these points to be shown, piled up along one side of the plot. :param x,y: NumPy arrays """ xi = np.log2(x) yi = np.log2(y) xv = np.isfinite(xi) yv = np.isfinite(yi) global_min = min(xi[xv].min(), yi[yv].min()) global_max = max(xi[xv].max(), yi[yv].max()) xi[~xv] = global_min yi[~yv] = global_min return xi, yi
def hz_to_midi(frequencies): """Get MIDI note number(s) for given frequencies Examples -------- >>> librosa.hz_to_midi(60) 34.506 >>> librosa.hz_to_midi([110, 220, 440]) array([ 45., 57., 69.]) Parameters ---------- frequencies : float or np.ndarray [shape=(n,), dtype=float] frequencies to convert Returns ------- note_nums : number or np.ndarray [shape=(n,), dtype=float] MIDI notes to `frequencies` See Also -------- midi_to_hz note_to_midi hz_to_note """ return 12 * (np.log2(np.asanyarray(frequencies)) - np.log2(440.0)) + 69
def means2idxarrays(g1, g2, i_bins, c_bins, difference): '''take two arrays of values and return the initial values and differences as numpy digitised arrays''' if difference == "relative": # calculate difference between mean values for group1 and group2 # g1 and g2 always the same length change = [g2[x] - g1[x] for x in range(0, len(g1))] initial = g1 elif difference == "logfold": change = [np.log2((g2[x] + 1.0) / (g1[x] + 1.0)) for x in range(0, len(g1))] initial = [np.log2(g1[x] + 1.0) for x in range(0, len(g1))] elif difference == "abs_logfold": change = [abs(np.log2((g2[x] + 1.0) / (g1[x] + 1.0))) for x in range(0, len(g1))] initial = [max(np.log2(g1[x] + 1.0), np.log2(g2[x] + 1.0)) for x in range(0, len(g1))] # return arrays of len(change) with the index position in c_bins # corresponding to the bin in which the value of change falls change_idx = np.digitize(change, c_bins, right=True) initial_idx = np.digitize(initial, i_bins, right=True) return(change_idx, initial_idx)
def _ir_cum(self, alpha=1.0): code, _ = self.encode() N = self.n_states cw0 = np.zeros(N - 1) # cw0 counts the appearance of new states only cw1 = np.zeros(N - 1) # cw1 counts the appearance of all compror states BL = np.zeros(N - 1) # BL is the block length of compror codewords j = 0 for i in range(len(code)): if self.code[i][0] == 0: cw0[j] = 1 cw1[j] = 1 BL[j] = 1 j += 1 else: L = code[i][0] cw1[j] = 1 BL[j:j + L] = L # range(1,L+1) j = j + L h0 = np.log2(np.cumsum(cw0)) h1 = np.log2(np.cumsum(cw1)) h1 = h1 / BL ir = alpha * h0 - h1 ir[ir < 0] = 0 return ir, h0, h1
def _ir_cum2(self, alpha=1.0): code, _ = self.encode() N = self.n_states BL = np.zeros(N - 1) # BL is the block length of compror codewords h0 = np.log2(np.cumsum( [1.0 if sfx == 0 else 0.0 for sfx in self.sfx[1:]]) ) """ h1 = np.array([h if m == 0 else h+np.log2(m) for h,m in zip(h0,self.lrs[1:])]) h1 = np.array([h if m == 0 else h+np.log2(m) for h,m in zip(h0,self.max_lrs[1:])]) h1 = np.array([h if m == 0 else h+np.log2(m) for h,m in zip(h0,self.avg_lrs[1:])]) """ h1 = np.array([np.log2(i + 1) if m == 0 else np.log2(i + 1) + np.log2(m) for i, m in enumerate(self.max_lrs[1:])]) j = 0 for i in range(len(code)): if self.code[i][0] == 0: BL[j] = 1 j += 1 else: L = code[i][0] BL[j:j + L] = L # range(1,L+1) j = j + L h1 = h1 / BL ir = alpha * h0 - h1 ir[ir < 0] = 0 # Really a HACK here!!!!! return ir, h0, h1
def test_quasigraph(self, plot=False): sol = self.solver errz = [] errl = [] ks = np.arange(1,5) for k in ks: self.scheme.h = pow(2,-k) sol.initialize(u0=self.u0,time=1, name='{0}_{1}'.format(type(self).__name__, k)) sol.run() zexact = sol.system.exact(sol.final_time(),self.u0)[0] lexact = sol.system.exact(sol.final_time(),self.u0)[2] df = sol.final()[0] - zexact logerrz = np.log2(np.abs(df)) logerrl = np.log2(np.abs(sol.final()[2] - lexact)) errz.append(logerrz) errl.append(logerrl) plt.clf() plt.subplot(1,2,1) plt.title('z') regz = order.linear_regression(ks,errz,do_plot=True) plt.plot(ks,errz,'o-') plt.legend() plt.subplot(1,2,2) plt.title(u'λ') regl = order.linear_regression(ks,errl,do_plot=True) plt.plot(ks,errl,'o-') plt.legend() oz = -regz[0] ol = -regl[0] nt.assert_greater(ol, self.expected_orders[0] - self.tol) nt.assert_greater(oz, self.expected_orders[1] - self.tol) return sol
def calc_MI_cate_feat_target(column, target, num_bins): vals, tmp_indexer = pd.factorize(column, na_sentinel=-1) p_neg = 0.238801 p_pos = 0.761199 max_cate = np.max(vals) densitys, bin_edges = np.histogram(vals, density=True) #print densitys #print 'start' final_mi = 0 for level in xrange(-1, max_cate+1): p_cate_pos = np.sum((vals == level) & (target == 1)) / float(column.shape[0]) p_cate_neg = np.sum((vals == level) & (target == 0)) / float(column.shape[0]) p_cate = np.sum((vals == level)) / float(column.shape[0]) if p_cate_pos == 0 or p_cate_neg == 0: continue final_mi += p_cate_pos * np.log2(p_cate_pos / (p_cate * p_pos)) final_mi += p_cate_neg * np.log2(p_cate_neg / (p_cate * p_neg)) #print '%d, %f' %(level, final_mi) return final_mi
def TtoJ(T,Q=1,B=None,phibwratio=None): """ Compute the maximal wavelet scale J such that for a filter bank the largest wavelet is of bandwidth approximately T. Parameters: ----------- T: int Time bandwidth for window Q: int Number of wavelets per octave B: int The reciprocal per-octave bandwidth of the wavelets phibwratio: float ratio between the lowpass filter phi and the lowest-frequency wavelet. Default is 2 if Q is 1 and otherwise 1. Returns -------- J: int Number of logarithmically spaced wavelets """ if B is None: B = Q if phibwratio is None: if type(Q) == np.ndarray: phibwratio=1.+(Q==1).astype(int) else: phibwratio=1+int(Q==1) if type(Q) == np.ndarray: return 1+ (np.log2(T/(4*B/phibwratio))*Q+.5).astype(int) else: return 1+ int(np.log2(T/(4*B/phibwratio))*Q+.5)
def DFA(indata,scale,q,m): y = np.cumsum(indata-indata.mean()) #Equation 1 in paper RMSt = [] #Temporary RMS variable: contain F(s,v) value F = [] #F: Fluctuation function N = len(indata) print 'len indata: ',N for i in range(len(scale)): ns = int(np.floor(len(y)/scale[i])) #number of segments: Ns = int(N/s) for v in range(2*ns): if v < ns: index_start = v*scale[i] index_end = (v+1)*scale[i] else: index_start = N - (v-ns)*scale[i]-scale[i] index_end = N - (v-ns)*scale[i] index = range(index_start,index_end) #calculate index for each segment yv = y[index_start:index_end] #Extract values of time series for each segments c = np.polyfit(index,yv,m) fit = np.polyval(c,index) RMSt.append(math.sqrt(np.mean((yv-fit)**2))) #Equation 2. But calculating only F(v,s) not F(v,s)**2 RMS = np.asarray(RMSt) #Convert RMSt to array qRMS = RMS**q F.append(np.mean(qRMS)**(1.0/q)) #Equation 4 del RMSt[:] #Reset RMSt[:] C = np.polyfit(np.log2(scale),np.log2(F),1) H = C[0] #Hurst parameter return (H,scale,F)
def CostFunction(self, X, y, regularization_parameter, count_sigmas = False): X = np.matrix(X) y = np.matrix(y) outs = self.FeedForward(X) J = 0 for thetas in self.Thetas: J = J + np.sum(np.square(thetas[:, 1])) J = J * regularization_parameter #linear #J = J + np.sum(np.square(outs - y)) #logistic J = J + np.sum(- (np.multiply(y, np.log2(outs)) + np.multiply((1 - y), np.log2(1 - outs)))) J = J / len(X) if count_sigmas == False: return J #linear #self.Sigmas[-1] = np.multiply(np.multiply((outs - y), outs), 1 - outs) * 2 #logistic self.Sigmas[-1] = (outs - y) / math.log1p(1) for i in reversed(range(0, self.n_hidden_layers)): self.Sigmas[i] = np.multiply(self.activations[i + 1][:, 1:], 1 - self.activations[i + 1][:, 1:]) self.Sigmas[i] = np.multiply(self.Sigmas[i], self.Sigmas[i + 1] * self.Thetas[i + 1][:, 1:]) return J
def center_zoom(Lngs, Lats): # Find the bounding box minLon, minLat, maxLon, maxLat = min(Lngs), min(Lats), max(Lngs), max(Lats) deltaLon, deltaLat = (maxLon - minLon), (maxLat - minLat) centerLon = minLon + .5*deltaLon centerLat = minLat + .5*deltaLat zoomxfac = 3600. zoomyfac = 2925. if deltaLon != 0: pixXperdeg = (512.0/deltaLon) else: pixXperdeg = 1. if deltaLat != 0: pixYperdeg = (512.0/deltaLat) else: pixYperdeg = 1. # conversion to zoom dx = pixXperdeg/zoomyfac dy = pixYperdeg/zoomyfac zx = np.floor(12+np.log2(dx)) zy = np.floor(12+np.log2(dy)) zoom = min(zx, zy) if zoom < 10: zoom = 10 if zoom > 19: zoom = 19 return centerLon, centerLat, zoom
def smartirs_wlocal(tf, local_scheme): """Calculate local term weight for a term using the weighting scheme specified in `local_scheme`. Parameters ---------- tf : int Term frequency. local : {'n', 'l', 'a', 'b', 'L'} Local transformation scheme. Returns ------- float Calculated local weight. """ if local_scheme == "n": return tf elif local_scheme == "l": return 1 + np.log2(tf) elif local_scheme == "a": return 0.5 + (0.5 * tf / tf.max(axis=0)) elif local_scheme == "b": return tf.astype('bool').astype('int') elif local_scheme == "L": return (1 + np.log2(tf)) / (1 + np.log2(tf.mean(axis=0)))
def smartirs_wglobal(docfreq, totaldocs, global_scheme): """Calculate global document weight based on the weighting scheme specified in `global_scheme`. Parameters ---------- docfreq : int Document frequency. totaldocs : int Total number of documents. global_scheme : {'n', 't', 'p'} Global transformation scheme. Returns ------- float Calculated global weight. """ if global_scheme == "n": return 1. elif global_scheme == "t": return np.log2(1.0 * totaldocs / docfreq) elif global_scheme == "p": return max(0, np.log2((1.0 * totaldocs - docfreq) / docfreq))
def _hist_bin_doane(x): """ Doane's histogram bin estimator. Improved version of Sturges' formula which works better for non-normal data. See stats.stackexchange.com/questions/55134/doanes-formula-for-histogram-binning Parameters ---------- x : array_like Input data that is to be histogrammed, trimmed to range. May not be empty. Returns ------- h : An estimate of the optimal bin width for the given data. """ if x.size > 2: sg1 = np.sqrt(6.0 * (x.size - 2) / ((x.size + 1.0) * (x.size + 3))) sigma = np.std(x) if sigma > 0.0: # These three operations add up to # g1 = np.mean(((x - np.mean(x)) / sigma)**3) # but use only one temp array instead of three temp = x - np.mean(x) np.true_divide(temp, sigma, temp) np.power(temp, 3, temp) g1 = np.mean(temp) return x.ptp() / (1.0 + np.log2(x.size) + np.log2(1.0 + np.absolute(g1) / sg1)) return 0.0
def plot_profiles(prots, eluts, sp='Hs', plot_sums=True, shape=None, min_count=1): """ shape: (m,n) = m rows, n columns eluts: [el.NormElut(f, sp, norm_rows=False, norm_cols=False) for f in fs] """ import plotting as pl gt = seqs.GTrans() use_eluts = elutions_containing_prots(eluts, sp, seqs.names2ids(prots), min_count) shape = shape if shape else ut.sqrt_shape(len(use_eluts)+1) fig = pl.figure() for i,e in enumerate(use_eluts): sp_target = ut.shortname(e.filename)[:2] pl.subplot(shape[0],shape[1],i+1) pl.title(ut.shortname(e.filename)) pids = [gt.name2id[p] for p in prots] protsmax = max([np.max(e.normarr[r]) for p in pids if p in e.baseid2inds for r in e.baseid2inds[p]]) plot_prots(e, pids, e.baseid2inds, protsmax) if plot_sums: # plot total spectral counts normalized to match biggest peak sums = np.sum(e.normarr,axis=0) fmax = np.max(sums) pl.plot(range(sums.shape[1]), np.log2(sums[0,:]).T*np.log2(protsmax)*len(pids)/np.log2(fmax), color='k', linestyle='-', linewidth=.5) # make legend with all prots pl.subplot(shape[0],shape[1],0) for p in prots: pl.plot(0,label=p) pl.legend()
def reconstructWPT(self,new_wp,wavelet,listleaves): """ Create a new wavelet packet tree by copying in the data for the leaves and then performing the idwt up the tree to the root. Assumes that listleaves is top-to-bottom, so just reverses it. """ # Sort the list of leaves into order bottom-to-top, left-to-right working = listleaves.copy() working = working[-1::-1] level = int(np.floor(np.log2(working[0] + 1))) while level > 0: first = 2 ** level - 1 while working[0] >= first: # Note that it assumes that the whole list is backwards parent = (working[0] - 1) // 2 p = self.ConvertWaveletNodeName(parent) new_wp[p].data = pywt.idwt(new_wp[self.ConvertWaveletNodeName(working[1])].data,new_wp[self.ConvertWaveletNodeName(working[0])].data, wavelet)[:len(new_wp[p].data)] # Delete these two nodes from working working = np.delete(working, 1) working = np.delete(working, 0) # Insert parent into list of nodes at the next level ins = np.where(working > parent) if len(ins[0]) > 0: ins = ins[0][-1] + 1 else: ins = 0 working = np.insert(working, ins, parent) level = int(np.floor(np.log2(working[0] + 1))) return new_wp
cost = cost.reshape(target_sequences.shape) cost = cost * target_mask # Don't use these lines; could end up with NaN # Specially at the end of audio files where mask is # all zero for some of the shorter files in mini-batch. #cost = cost.sum(axis=1) / target_mask.sum(axis=1) #cost = cost.mean(axis=0) # Use this one instead. cost = cost.sum() cost = cost / target_mask.sum() # By default we report cross-entropy cost in bits. # Switch to nats by commenting out this line: # log_2(e) = 1.44269504089 cost = cost * lib.floatX(numpy.log2(numpy.e)) ip_cost = lib.floatX(numpy.log2(numpy.e)) * T.nnet.categorical_crossentropy( T.nnet.softmax(big_frame_independent_preds.reshape( (-1, Q_LEVELS))), target_sequences.flatten()) ip_cost = ip_cost.reshape(target_sequences.shape) ip_cost = ip_cost * target_mask ip_cost = ip_cost.sum() ip_cost = ip_cost / target_mask.sum() ### Getting the params, grads, updates, and Theano functions ### #params = lib.get_params(cost, lambda x: hasattr(x, 'param') and x.param==True) #ip_params = lib.get_params(ip_cost, lambda x: hasattr(x, 'param') and x.param==True\ # and 'BigFrameLevel' in x.name) #other_params = [p for p in params if p not in ip_params] #params = ip_params + other_params
def entropy_histogram(image_array) -> float: num_elements = np.size(image_array) histogram, _ = np.histogram(image_array, 65535) probabilities = histogram / num_elements return float(-np.sum(np.multiply(probabilities, np.log2(probabilities, where=probabilities > 0))))
def calculate_entropy(y): entropy = 0 for value in set(y): entropy += -(sum(y == value) / len(y)) * np.log2(sum(y == value) / len(y) + 1.0e-5) return entropy
def kdensityfft(X, kernel="gau", bw="normal_reference", weights=None, gridsize=None, adjust=1, clip=(-np.inf, np.inf), cut=3, retgrid=True): """ Rosenblatt-Parzen univariate kernel density estimator Parameters ---------- X : array-like The variable for which the density estimate is desired. kernel : str ONLY GAUSSIAN IS CURRENTLY IMPLEMENTED. "bi" for biweight "cos" for cosine "epa" for Epanechnikov, default "epa2" for alternative Epanechnikov "gau" for Gaussian. "par" for Parzen "rect" for rectangular "tri" for triangular bw : str, float "scott" - 1.059 * A * nobs ** (-1/5.), where A is min(std(X),IQR/1.34) "silverman" - .9 * A * nobs ** (-1/5.), where A is min(std(X),IQR/1.34) If a float is given, it is the bandwidth. weights : array or None WEIGHTS ARE NOT CURRENTLY IMPLEMENTED. Optional weights. If the X value is clipped, then this weight is also dropped. gridsize : int If gridsize is None, min(len(X), 512) is used. Note that the provided number is rounded up to the next highest power of 2. adjust : float An adjustment factor for the bw. Bandwidth becomes bw * adjust. clip : tuple Observations in X that are outside of the range given by clip are dropped. The number of observations in X is then shortened. cut : float Defines the length of the grid past the lowest and highest values of X so that the kernel goes to zero. The end points are -/+ cut*bw*{X.min() or X.max()} retgrid : bool Whether or not to return the grid over which the density is estimated. Returns ------- density : array The densities estimated at the grid points. grid : array, optional The grid points at which the density is estimated. Notes ----- Only the default kernel is implemented. Weights aren't implemented yet. This follows Silverman (1982) with changes suggested by Jones and Lotwick (1984). However, the discretization step is replaced by linear binning of Fan and Marron (1994). This should be extended to accept the parts that are dependent only on the data to speed things up for cross-validation. References ---------- :: Fan, J. and J.S. Marron. (1994) `Fast implementations of nonparametric curve estimators`. Journal of Computational and Graphical Statistics. 3.1, 35-56. Jones, M.C. and H.W. Lotwick. (1984) `Remark AS R50: A Remark on Algorithm AS 176. Kernal Density Estimation Using the Fast Fourier Transform`. Journal of the Royal Statistical Society. Series C. 33.1, 120-2. Silverman, B.W. (1982) `Algorithm AS 176. Kernel density estimation using the Fast Fourier Transform. Journal of the Royal Statistical Society. Series C. 31.2, 93-9. """ X = np.asarray(X) X = X[np.logical_and(X > clip[0], X < clip[1])] # won't work for two columns. # will affect underlying data? # Get kernel object corresponding to selection kern = kernel_switch[kernel]() try: bw = float(bw) except: bw = bandwidths.select_bandwidth(X, bw, kern) # will cross-val fit this pattern? bw *= adjust nobs = len(X) # after trim # 1 Make grid and discretize the data if gridsize == None: gridsize = np.max((nobs, 512.)) gridsize = 2**np.ceil(np.log2(gridsize)) # round to next power of 2 a = np.min(X) - cut * bw b = np.max(X) + cut * bw grid,delta = np.linspace(a, b, int(gridsize), retstep=True) RANGE = b - a #TODO: Fix this? # This is the Silverman binning function, but I believe it's buggy (SS) # weighting according to Silverman # count = counts(X,grid) # binned = np.zeros_like(grid) #xi_{k} in Silverman # j = 0 # for k in range(int(gridsize-1)): # if count[k]>0: # there are points of X in the grid here # Xingrid = X[j:j+count[k]] # get all these points # # get weights at grid[k],grid[k+1] # binned[k] += np.sum(grid[k+1]-Xingrid) # binned[k+1] += np.sum(Xingrid-grid[k]) # j += count[k] # binned /= (nobs)*delta**2 # normalize binned to sum to 1/delta #NOTE: THE ABOVE IS WRONG, JUST TRY WITH LINEAR BINNING binned = fast_linbin(X, a, b, gridsize) / (delta * nobs) # step 2 compute FFT of the weights, using Munro (1976) FFT convention y = forrt(binned) # step 3 and 4 for optimal bw compute zstar and the density estimate f # don't have to redo the above if just changing bw, ie., for cross val #NOTE: silverman_transform is the closed form solution of the FFT of the #gaussian kernel. Not yet sure how to generalize it. zstar = silverman_transform(bw, gridsize, RANGE)*y # 3.49 in Silverman # 3.50 w Gaussian kernel f = revrt(zstar) if retgrid: return f, grid, bw else: return f, bw
def entropy(y): y = np.array(y) p = [len(y[y == k]) / len(y) for k in np.unique(y)] return -np.dot(p, np.log2(p))
def main(data_path, dataset, seed, _run): ex.commands['print_config']() np.random.seed(seed) reader, (train_data, valid_data, test_data, _) = get_data(data_path, dataset) config = get_config() val_config = deepcopy(config) test_config = deepcopy(config) val_config.drop_x = test_config.drop_x = 0.0 val_config.drop_i = test_config.drop_i = 0.0 val_config.drop_h = test_config.drop_h = 0.0 val_config.drop_o = test_config.drop_o = 0.0 test_config.batch_size = test_config.num_steps = 1 with tf.Graph().as_default(), tf.Session() as session: tf.set_random_seed(seed) initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): mtrain = Model(is_training=True, config=config) with tf.variable_scope("model", reuse=True, initializer=initializer): mvalid = Model(is_training=False, config=val_config) mtest = Model(is_training=False, config=test_config) tf.global_variables_initializer().run() saver = tf.train.Saver() trains, vals, tests, best_val = [np.inf], [np.inf], [np.inf], np.inf for i in range(config.max_max_epoch): lr_decay = config.lr_decay ** max(i - config.max_epoch + 1, 0.0) mtrain.assign_lr(session, config.learning_rate / lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(mtrain.lr))) train_perplexity = run_epoch(session, mtrain, train_data, mtrain.train_op, config=config, verbose=True, is_training=True) print("Epoch: %d Train Perplexity: %.3f, Bits: %.3f" % (i + 1, train_perplexity, np.log2(train_perplexity))) valid_perplexity = run_epoch(session, mvalid, valid_data, tf.no_op(), config=val_config) print("Epoch: %d Valid Perplexity (batched): %.3f, Bits: %.3f" % (i + 1, valid_perplexity, np.log2(valid_perplexity))) test_perplexity = run_epoch(session, mvalid, test_data, tf.no_op(), config=val_config) print("Epoch: %d Test Perplexity (batched): %.3f, Bits: %.3f" % (i + 1, test_perplexity, np.log2(test_perplexity))) trains.append(train_perplexity) vals.append(valid_perplexity) tests.append(test_perplexity) if valid_perplexity < best_val: best_val = valid_perplexity print("Best Batched Valid Perplexity improved to %.03f" % best_val) save_path = saver.save(session, './' + dataset + "_" + str(seed) + "_best_model.ckpt") print("Saved to:", save_path) _run.info['epoch_nr'] = i + 1 _run.info['nr_parameters'] = mtrain.nvars.item() _run.info['logs'] = {'train_perplexity': trains, 'valid_perplexity': vals, 'test_perplexity': tests} print("Training is over.") best_val_epoch = np.argmin(vals) print("Best Batched Validation Perplexity %.03f (Bits: %.3f) was at Epoch %d" % (vals[best_val_epoch], np.log2(vals[best_val_epoch]), best_val_epoch)) print("Training Perplexity at this Epoch was %.03f, Bits: %.3f" % (trains[best_val_epoch], np.log2(trains[best_val_epoch]))) print("Batched Test Perplexity at this Epoch was %.03f, Bits: %.3f" % (tests[best_val_epoch], np.log2(tests[best_val_epoch]))) _run.info['best_val_epoch'] = best_val_epoch _run.info['best_valid_perplexity'] = vals[best_val_epoch] with tf.Session() as sess: saver.restore(sess, './' + dataset + "_" + str(seed) + "_best_model.ckpt") print("Testing on non-batched Valid ...") valid_perplexity = run_epoch(sess, mtest, valid_data, tf.no_op(), config=test_config, verbose=True) print("Full Valid Perplexity: %.3f, Bits: %.3f" % (valid_perplexity, np.log2(valid_perplexity))) print("Testing on non-batched Test ...") test_perplexity = run_epoch(sess, mtest, test_data, tf.no_op(), config=test_config, verbose=True) print("Full Test Perplexity: %.3f, Bits: %.3f" % (test_perplexity, np.log2(test_perplexity))) _run.info['full_best_valid_perplexity'] = valid_perplexity _run.info['full_test_perplexity'] = test_perplexity return vals[best_val_epoch]
dataset = paraman.get_dataset() dataset["x_train"] = dataset["x_train"].astype(np.float) if "x_test" in dataset: dataset["x_test"] = dataset["x_test"].astype(np.float) dataset["y_test"] = dataset["y_test"].astype(np.float) dataset["y_train"] = dataset["y_train"].astype(np.float) U_init = paraman.get_initialization_centroids(dataset["x_train"]) if paraman["kmeans"]: U_final, indicator_vector_final = main_kmeans( dataset["x_train"], U_init) if paraman["palm"]: if paraman["--nb-factors"] is None: paraman["--nb-factors"] = int(np.log2(min(U_init.shape))) paraman["--residual-on-right"] = True if U_init.shape[ 1] >= U_init.shape[0] else False U_final = process_palm_on_top_of_kmeans(U_final) distances = get_distances(dataset["x_train"], U_final) indicator_vector_final = np.argmin(distances, axis=1) elif paraman["qmeans"]: # paraman_q = ParameterManagerQmeans(arguments) # paraman.update(paraman_q) if paraman["--nb-factors"] is None: paraman["--nb-factors"] = int(np.log2(min(U_init.shape))) paraman["--residual-on-right"] = True if U_init.shape[ 1] >= U_init.shape[0] else False U_final, indicator_vector_final = main_qmeans(
def _cllr(lr0, lr1): with np.errstate(divide='ignore'): cllr0 = np.mean(np.log2(1 + lr0)) cllr1 = np.mean(np.log2(1 + 1/lr1)) return .5 * (cllr0 + cllr1)
def refExpScatter(exp_matrix, ref_samples: list, ref_mode='mean', ref_name='ref', id2symbol=None, corr_method='pearson', log_base=2, log_additive=1, group_gene_files: list = None, scatter_ncols=2, mad_ncols=4, prefix='refExpScatter', mad_ymax: float = None): """ 输入表达矩阵,指定参考样本,计算其他样本与其的相关性,并以scatter和MAD图的形式呈现 :param exp_matrix: 表达矩阵 :param ref_samples: 参考样列表 :param ref_mode: 如mean表示计算参考样的平均值作为参考表达量 :param ref_name: 参考样的名称 :param id2symbol: 第一列为geneid而第二列为symbol的文件 :param corr_method: 相关系数计算方法 :param log_base: log base default 2 :param log_additive: pseudo count added before log :param group_gene_files: 每个文件记录一组基因id, 方便对基因进行分组并标注不同的颜色 :param scatter_ncols: 散点图的列数 :param mad_ncols: MAD图的列数 :param prefix: 输出文件prefix :param mad_ymax: MAD图的y坐标最大值 :return: """ exp_df = pd.read_csv(exp_matrix, index_col=0, header=0, sep=None, engine='python') if log_base == 2: exp_df = np.log2(exp_df + log_additive) elif log_base == 10: exp_df = np.log10(exp_df + log_additive) elif log_base == 1: pass else: raise Exception(f'{log_base} not supported! use 1(mean no log), 2, 10') id2symbol = dict(x.strip().split()[:2] for x in open(id2symbol)) if id2symbol else dict() if ref_mode == 'mean': ref_exp_list = exp_df.loc[:, ref_samples].mean(axis=1) else: ref_exp_list = exp_df.loc[:, ref_samples].median(axis=1) grouped_gene_list = list() names = ['gene'] group_gene_files = [] if group_gene_files is None else group_gene_files for group_file in group_gene_files: names.append(os.path.basename(group_file)) grouped_gene_list.append( [x.strip().split()[0] for x in open(group_file)]) if grouped_gene_list: not_grouped_genes = set(exp_df.index) - set(x for y in grouped_gene_list for x in y) grouped_gene_list.insert(0, not_grouped_genes) else: grouped_gene_list = [list(exp_df.index)] group_and_color = list( zip(grouped_gene_list, colorpool.get_color_pool(len(grouped_gene_list)), names)) # plot scatter test_samples = [x for x in exp_df.columns if x not in ref_samples] exp_df['symbols'] = [ id2symbol[x] if x in id2symbol else x for x in exp_df.index ] plots = list() for each in test_samples: plot_data = exp_df.loc[:, [each]] plot_data[ref_name] = ref_exp_list plot_data = plot_data[plot_data[ref_name] * plot_data[each] > 0] plot_data = plot_data.round(4) plot_data['symbols'] = exp_df['symbols'] corr, pval = correlation_function(corr_method)(plot_data[each], plot_data[ref_name]) pval = format(pval, '.2e') if pval < 0.001 else round(pval, 4) corr = round(corr, 3) p = figure( title="{} vs {}({}) {}_Corr={} Pval={}".format( each, ref_mode, ref_name, corr_method, corr, pval), # tools="wheel_zoom,reset,hover", tooltips=[ ('x', '@{}'.format(ref_name)), ('y', '@{}'.format(each)), ('gene', '@symbols' if id2symbol else '@index'), ]) for group, color, name in group_and_color: target_index = list(set(group) & set(plot_data.index)) source_data = plot_data.loc[target_index, :] if source_data.shape[0] == 0: continue source = ColumnDataSource(source_data) p.scatter( x=ref_name, y=each, # line_color=mapper, color=color, fill_alpha=0.2, size=5, legend=name, source=source) p.xaxis.axis_label = 'log{}(expr+{}) of {}'.format( log_base, log_additive, ref_name) p.yaxis.axis_label = 'log{}(expr+{}) of {}'.format( log_base, log_additive, each) plots.append(p) fig = gridplot(plots, ncols=scatter_ncols) output_file(prefix + '.scatter.html') save(fig) # export_png(fig, prefix+'.scatter.png') # plot Variation in # gene expression as a function of gene expression level across sample replicates upper_list = [] plots = list() for each in test_samples: plot_data = exp_df.loc[:, [each]] plot_data[ref_name] = ref_exp_list plot_data = plot_data[plot_data[ref_name] * plot_data[each] > 0] plot_data[each] = (plot_data[each] - plot_data[ref_name]).abs() / plot_data[ref_name] plot_data = plot_data.round(4) if mad_ymax is None: describe = plot_data[each].describe() upper = describe['75%'] + 2 * (describe['75%'] - describe['25%']) else: upper = mad_ymax upper_list.append(upper) plot_data[each][plot_data[each] > upper] = upper plot_data['symbols'] = exp_df['symbols'] p = figure( title="{} vs {}({})".format(each, ref_mode, ref_name), # tools="wheel_zoom,reset,hover", tooltips=[ ('x', '@{}'.format(ref_name)), ('y', '@{}'.format(each)), ('gene', '@symbols' if id2symbol else '@index'), ]) for group, color, name in group_and_color: source_data = plot_data.loc[set(group) & set(plot_data.index)] if source_data.shape[0] == 0: continue source = ColumnDataSource(source_data) p.scatter( x=ref_name, y=each, # line_color=mapper, color=color, fill_alpha=0.2, size=5, legend=name, source=source) p.xaxis.axis_label = 'log{}(expr+{}) of {}'.format( log_base, log_additive, ref_name) p.yaxis.axis_label = '|expr - {}_expr| / {}_expr'.format( ref_mode, ref_mode) plots.append(p) if mad_ymax is not None: p.y_range = Range1d(0, mad_ymax) else: fig = gridplot(plots, ncols=mad_ncols, sizing_mode='stretch_width') output_file(prefix + '.MDA.html') save(fig)
import superimport import numpy as np import matplotlib.pyplot as plt import pyprobml_utils as pml x = np.linspace(0,1,10000) H = lambda x: -(x*np.log2(x) + (1-x) * np.log2(1-x)) plt.plot(x, H(x), '-b', lw=3) plt.xlim((-0.01, 1.01)) plt.ylim((0, 1.01)) plt.xlabel("p(X = 1)") plt.ylabel("H(X)") ticks = [0, 0.5, 1] plt.xticks(ticks) plt.yticks(ticks) pml.savefig("bernoulliEntropy.pdf") plt.show()
inter_signal_value = 0 interference = 0 for signal_dict in signal_list: if signal_dict["node_type"] == node_type: if signal_dict["node_id"] == fixed_node_id: inter_signal_value = signal_dict["signal"] else: interference += signal_dict["signal"] else: interference += signal_dict["signal"] SINR = inter_signal_value / (interference + white_gaussian_noise) channel_data_size = task_time * sub_channel_bandwidth * np.log2( 1 + SINR) task_data_size += channel_data_size task_transmission_data = { "task_id": task_id, "task_data_size": task_data_size } task_transmission_data_list.append(task_transmission_data) task_transmission_data_dict_of_all_nodes[str( fixed_node_id)] = task_transmission_data_list node_type = settings.NODE_TYPE_MOBILE for mobile_node_id in range(fixed_node_num, node_num_list[experiment_median_no]): task_id_list = task_id_under_each_node_list[mobile_node_id]
def training_schedule( cur_nimg, training_set, num_gpus, lod_initial_resolution=4, # Image resolution used at the beginning. lod_training_kimg=600, # Thousands of real images to show before doubling the resolution. lod_transition_kimg=600, # Thousands of real images to show when fading in new layers. minibatch_base=16, # Maximum minibatch size, divided evenly among GPUs. minibatch_dict={}, # Resolution-specific overrides. max_minibatch_per_gpu={}, # Resolution-specific maximum minibatch size per GPU. G_lrate_base=0.001, # Learning rate for the generator. G_lrate_dict={}, # Resolution-specific overrides. D_lrate_base=0.001, # Learning rate for the discriminator. D_lrate_dict={}, # Resolution-specific overrides. lrate_rampup_kimg=0, # Duration of learning rate ramp-up. tick_kimg_base=160, # Default interval of progress snapshots. tick_kimg_dict={ 4: 160, 8: 140, 16: 120, 32: 100, 64: 80, 128: 60, 256: 40, 512: 30, 1024: 20 }): # Resolution-specific overrides. # Initialize result dict. s = dnnlib.EasyDict() s.kimg = cur_nimg / 1000.0 # Training phase. phase_dur = lod_training_kimg + lod_transition_kimg phase_idx = int(np.floor(s.kimg / phase_dur)) if phase_dur > 0 else 0 phase_kimg = s.kimg - phase_idx * phase_dur # Level-of-detail and resolution. s.lod = training_set.resolution_log2 s.lod -= np.floor(np.log2(lod_initial_resolution)) s.lod -= phase_idx if lod_transition_kimg > 0: s.lod -= max(phase_kimg - lod_training_kimg, 0.0) / lod_transition_kimg s.lod = max(s.lod, 0.0) s.resolution = 2**(training_set.resolution_log2 - int(np.floor(s.lod))) # Minibatch size. s.minibatch = minibatch_dict.get(s.resolution, minibatch_base) s.minibatch -= s.minibatch % num_gpus if s.resolution in max_minibatch_per_gpu: s.minibatch = min(s.minibatch, max_minibatch_per_gpu[s.resolution] * num_gpus) # Learning rate. s.G_lrate = G_lrate_dict.get(s.resolution, G_lrate_base) s.D_lrate = D_lrate_dict.get(s.resolution, D_lrate_base) if lrate_rampup_kimg > 0: rampup = min(s.kimg / lrate_rampup_kimg, 1.0) s.G_lrate *= rampup s.D_lrate *= rampup # Other parameters. s.tick_kimg = tick_kimg_dict.get(s.resolution, tick_kimg_base) return s
def d_to_phi(d): return -np.log2(d / 0.001)
def compose(self, nr_of_compositions): if not self.caching_directory: self.logger.critical("This method requires caching. Abort") return None state_filepath_base = os.path.join(self.caching_directory, "compositions-") get_state_filepath = lambda needed_exp: state_filepath_base + str(int(2**needed_exp)) target_state_filepath = state_filepath_base + str(nr_of_compositions) if os.path.isdir(target_state_filepath): self.logger.info("Target state is cached. Loading it") return self.load_state(target_state_filepath) # wich compositions do we need target_exp = int( np.log2(nr_of_compositions) ) needed_compositions = [ x if (nr_of_compositions & (2**x) != 0) else -1 for x in range(target_exp + 1)] needed_compositions = list(filter(lambda a: a != -1, needed_compositions)) self.logger.info("Needed compositions: " + ", ".join(map(str, needed_compositions))) # start with a copy of the current state previous_state = self.copy() avoided_self_composition = False # which compositions already exist? Generate? for needed_exp in range(target_exp + 1): state_filepath = get_state_filepath(needed_exp) if not os.path.isdir(state_filepath): self.logger.info("[*] State 2**" + str(needed_exp) + " does not exist. Creating it.") if not needed_exp == 0: if avoided_self_composition: # only load from disk when it differs from current state previous_state_filepath = get_state_filepath(needed_exp - 1) previous_state = self.load_state(previous_state_filepath) avoided_self_composition = False previous_state.compose_with(previous_state) previous_state.print_state() # previous_state.print_state() previous_state.save_state(state_filepath) gc.collect() else: avoided_self_composition = True self.logger.info("[*] All intermediate states up to 2**" + str(target_exp) + " exist now") previous_state = self.load_state( get_state_filepath(needed_compositions[0]) ) self.logger.info("[*] Loaded state 2**" + str(needed_compositions[0])) # compose to the desired state for i in needed_compositions[1:]: self.logger.info("[*] Compose with state 2**{}".format(i)) current_state = self.load_state(get_state_filepath(i)) # while the factor of previous state and current state is not same while(current_state.factor != previous_state.factor): self.logger.info("factors are unequal ( {} != {} ), squaring".format(current_state.factor, previous_state.factor)) if current_state.factor > previous_state.factor: previous_state.squaring() else: current_state.squaring() # now the factor should be the same previous_state.compose_with(current_state) previous_state.print_state() previous_state.save_state(target_state_filepath) # caching.. return previous_state
for ii in range(len(seqs)): m[temp.index(seqs[ii][i])][i] += 1 m1 = m / np.sum(m,axis=0) m1 = m1.T m2 = m1 + 0.000000001 print 'the most probable word:' for i in m1: print temp[np.argmax(i)], # Calculating information content if args.use_back_freq: inf = np.sum( m2 * np.log2(m2/np.array(freq)), axis=-1).reshape((-1,1)) else: inf = np.log2(len(temp)) + np.sum(m2 * np.log2(m2), axis=-1).reshape((-1,1)) infm = inf * m1 # Making input to the function inp = [] for i in range(len(seqs[0])): inbetween = [] hk = [] sorting = [] sortlet = [] for ii in range(len(temp)): inbetween.append((temp[ii], infm[i][ii]))
def transfer_state(state, gate_matrix, bits): """ Transfer to the next state :param state: :param gate_matrix: :param bits: :return: """ assert (type(gate_matrix) is np.ndarray) or \ (type(gate_matrix) is paddle.Tensor and gate_matrix.dtype.name == "COMPLEX128") assert type( state) is paddle.Tensor and state.dtype.name == "COMPLEX128" and len( state.shape) == 1 # calc source_pos target_pos n = int(np.log2(state.shape[0])) source_pos = copy.deepcopy( bits) # copy bits, it should NOT change the order of bits # source_pos = [n - 1 - idex for idex in source_pos] # qubit index # source_pos = list(reversed(source_pos)) # reverse qubit index target_pos = list(range(len(bits))) # ### check # state_check = transfer_state(paddle.reshape(state, [2] * n), gate_matrix, bits) # state_check = paddle.reshape(state_check, [2 ** n]) # compressed moveaxis # compress the continuous dim before moveaxis # e.g. single operand: before moveaxis 2*2*[2]*2*2 -compress-> 4*[2]*4, after moveaxis [2]*2*2*2*2 -compress-> [2]*4*4 # double operands: before moveaxis 2*2*[2]*2*2*[2]*2*2 -compress-> 4*[2]*4*[2]*4, after moveaxis [2]*[2]*2*2*2*2*2*2 -compress-> [2]*[2]*4*4*4 # the peak rank is 5 when the number of operands is 2 assert len(source_pos) == 1 or len(source_pos) == 2 compressed_shape_before_moveaxis = [1] compressed_source_pos = [-1] * len(source_pos) for i in range(n): if i in source_pos: compressed_source_pos[source_pos.index(i)] = len( compressed_shape_before_moveaxis) compressed_shape_before_moveaxis.append(2) compressed_shape_before_moveaxis.append(1) else: compressed_shape_before_moveaxis[ -1] = compressed_shape_before_moveaxis[-1] * 2 # print([2] * n) # print(source_pos) # print('->') # print(compressed_shape) # print(compressed_source_pos) # always [1], [1, 3], or [3, 1] state = paddle.reshape(state, compressed_shape_before_moveaxis) state = complex_moveaxis(state, compressed_source_pos, target_pos) compressed_shape_after_moveaxis = state.shape # reshape state_new_shape = [2**len(bits), 2**(n - len(bits))] state = paddle.reshape(state, state_new_shape) # gate_matrix if type(gate_matrix) is np.ndarray: gate_new_shape = [ 2**(len(gate_matrix.shape) - len(bits)), 2**len(bits) ] gate_matrix = gate_matrix.reshape(gate_new_shape) gate_matrix = paddle.to_tensor(gate_matrix) elif type(gate_matrix ) is paddle.Tensor and gate_matrix.dtype.name == "COMPLEX128": pass else: assert False # matmul state = paddle.matmul(gate_matrix, state) # restore compressed moveaxis reshape state = paddle.reshape(state, compressed_shape_after_moveaxis) state = complex_moveaxis(state, target_pos, compressed_source_pos) state = paddle.reshape(state, [2**n]) # ### check # assert (np.all(state.numpy() == state_check.numpy())) return state
def update_desvars_oc(self): """ Update the design variables by means of OC-like or equivalently SAO method, using the filtered sensitivities; return the updated design variables. EXAMPLES: >>> t.update_desvars_oc() See also: sens_analysis, filter_sens_sigmund """ if not self.topydict: raise ToPyError('You must first load a TPD file!') # 'p' stays constant for a specified number of iterations from start. # 'p' is incremented, but not more than the maximum allowable value. # If continuation parameters are not specified in the input file, 'p' # will stay constant: if self.pcount >= self._phold: if (self.p + self._pincr) < self._pmax + self._pincr: if (self.pcount - self._phold) % self._pcon == 0: self.p += self._pincr if self.qcount >= self._qhold: if (self.q + self._qincr) < self._qmax: if (self.qcount - self._qhold) % self._qcon == 0: self.q += self._qincr self.pcount += 1 self.qcount += 1 # Exponential approximation of eta (damping factor): if self.itercount > 1: if self.topydict['ETA'] == 'exp': # Check TPD specified value mask = equal(self.desvarsold / self.desvars, 1) self.a = 1 + log2(abs(self.dfold / self.df)) / \ log2(self.desvarsold / self.desvars + mask) + \ mask * (self.a - 1) self.a = clip(self.a, A_LOW, A_UPP) self.eta = 1 / (1 - self.a) self.dfold = self.df.copy() self.desvarsold = self.desvars.copy() # Change move limit for compliant mechanism synthesis: if self.probtype == 'mech': move = 0.1 else: move = 0.2 lam1, lam2 = 0, 100e3 dims = self.desvars.shape while (lam2 - lam1) / (lam2 + lam1) > 1e-8 and lam2 > 1e-40: lammid = 0.5 * (lam1 + lam2) if self.probtype == 'mech': if self.approx == 'dquad': curv = -1 / (self.eta * self.desvars) * self.df beta = maximum(self.desvars - (self.df + lammid) / curv, self.void) move_upper = minimum(move, self.desvars / 3) desvars = maximum(self.void, maximum((self.desvars - move),\ minimum(self.solid, minimum((self.desvars + move), \ (self.desvars * maximum(1e-10, \ (-self.df / lammid))**self.eta)**self.q)))) else: # reciprocal or exponential desvars = maximum(self.void, maximum((self.desvars - move),\ minimum(self.solid, minimum((self.desvars + move), \ (self.desvars * maximum(1e-10, \ (-self.df / lammid))**self.eta)**self.q)))) else: # compliance or heat if self.approx == 'dquad': curv = -1 / (self.eta * self.desvars) * self.df beta = maximum(self.desvars - (self.df + lammid) / curv, self.void) move_upper = minimum(move, self.desvars / 3) desvars = maximum(self.void, maximum((self.desvars - move),\ minimum(self.solid, minimum((self.desvars + move_upper), \ beta**self.q)))) else: # reciprocal or exponential desvars = maximum(self.void, maximum((self.desvars - move),\ minimum(self.solid, minimum((self.desvars + move), \ (self.desvars * (-self.df / lammid)**self.eta)**self.q)))) # Check for passive and active elements, modify updated x: if self.pasv.any() or self.actv.any(): flatx = desvars.flatten() idx = [] if self.nelz == 0: y, x = dims for j in range(x): for k in range(y): idx.append(k * x + j) else: z, y, x = dims for i in range(z): for j in range(x): for k in range(y): idx.append(k * x + j + i * x * y) if self.pasv.any(): pasv = take(idx, self.pasv) # new indices put(flatx, pasv, self.void) # = zero density if self.actv.any(): actv = take(idx, self.actv) # new indices put(flatx, actv, self.solid) # = self.solid desvars = flatx.reshape(dims) if self.nelz == 0: if desvars.sum() - self.nelx * self.nely * self.volfrac > 0: lam1 = lammid else: lam2 = lammid else: if desvars.sum() - self.nelx * self.nely * self.nelz *\ self.volfrac > 0: lam1 = lammid else: lam2 = lammid self.lam = lammid self.desvars = desvars # Change in design variables: self.change = (abs(self.desvars - self.desvarsold)).max() # Solid-self.void fraction: nr_s = self.desvars.flatten().tolist().count(self.solid) nr_v = self.desvars.flatten().tolist().count(self.void) self.svtfrac = (nr_s + nr_v) / self.desvars.size
def make_scales(N, min_scale=None, max_scale=None, nv=32, scaletype='log', wavelet=None, downsample=None): """Recommended to first work out `min_scale` & `max_scale` with `cwt_scalebounds`. # Arguments: N: int `len(x)` or `len(x_padded)`. min_scale, max_scale: float, float Set scale range. Obtained e.g. from `utils.cwt_scalebounds`. nv: int Number of voices (wavelets) per octave. scaletype: str['log', 'log-piecewise', 'linear'] Scaling kind to make. `'log-piecewise'` uses `utils.find_downsampling_scale`. wavelet: wavelets.Wavelet Used only for `scaletype='log-piecewise'`. downsample: int Downsampling factor. Used only for `scaletype='log-piecewise'`. # Returns: scales: np.ndarray """ if scaletype == 'log-piecewise' and wavelet is None: raise ValueError( "must pass `wavelet` for `scaletype == 'log-piecewise'`") if min_scale is None and max_scale is None and wavelet is not None: min_scale, max_scale = cwt_scalebounds(wavelet, N, use_padded_N=True) else: min_scale = min_scale or 1 max_scale = max_scale or N downsample = int( gdefaults('utils.cwt_utils.make_scales', downsample=downsample)) # number of 2^-distributed scales spanning min to max na = int(np.ceil(nv * np.log2(max_scale / min_scale))) # floor to keep freq-domain peak at or to right of Nyquist # min must be more precise, if need integer rounding do on max mn_pow = int(np.floor(nv * np.log2(min_scale))) mx_pow = mn_pow + na if scaletype == 'log': scales = 2**(np.arange(mn_pow, mx_pow) / nv) elif scaletype == 'log-piecewise': scales = 2**(np.arange(mn_pow, mx_pow) / nv) idx = find_downsampling_scale(wavelet, scales) if idx is not None: # `+downsample - 1` starts `scales2` as continuing from `scales1` # at `scales2`'s sampling rate; rest of ops are based on this design, # such as `/nv` in ssq, which divides `scales2[0]` by `nv`, but if # `scales2[0]` is one sample away from `scales1[-1]`, seems incorrect scales1 = scales[:idx] scales2 = scales[idx + downsample - 1::downsample] scales = np.hstack([scales1, scales2]) elif scaletype == 'linear': # TODO poor scheme (but there may not be any good one) min_scale, max_scale = 2**(mn_pow / nv), 2**(mx_pow / nv) na = int(np.ceil(max_scale / min_scale)) scales = np.linspace(min_scale, max_scale, na) else: raise ValueError("`scaletype` must be 'log' or 'linear'; " "got: %s" % scaletype) scales = scales.reshape(-1, 1) # ensure 2D for broadcast ops later return scales
def process_tfbs(TF_name, args, log2fc_params): #per tf """ Processes single TFBS to split into bound/unbound and write out overview file """ #begin_time = datetime.now() logger = TobiasLogger("", args.verbosity, args.log_q) #sending all logger calls to log_q #Pre-scanned sites to read bed_outdir = os.path.join(args.outdir, TF_name, "beds") filename = os.path.join(bed_outdir, TF_name + ".tmp") tmp_files = [filename] no_cond = len(args.cond_names) comparisons = args.comparisons #Set distribution function diff_dist = scipy.stats.norm #Subset analysis to args.output_peaks if these were given if args.output_peaks is not None: output_peaks_bt = BedTool(args.output_peaks) sites_bt = BedTool(filename) intersection = sites_bt.intersect(output_peaks_bt, u=True) filename = intersection.fn #Overwrite filename with the path to the bedtools object tmp_files.append(intersection.fn) #Read file to list of dicts stime = datetime.now() header = ["TFBS_chr", "TFBS_start", "TFBS_end", "TFBS_name", "TFBS_score", "TFBS_strand"] + args.peak_header_list + ["{0}_score".format(condition) for condition in args.cond_names] with open(filename) as f: bedlines = [dict(zip(header, line.rstrip().split("\t"))) for line in f.readlines()] n_rows = len(bedlines) etime = datetime.now() logger.spam("{0} - Reading took:\t{1}".format(TF_name, etime - stime)) if n_rows == 0: logger.warning("No TFBS found for TF {0} - output .bed/.txt files will be empty and excel output will be skipped.".format(TF_name)) ############################## Local effects ############################### stime = datetime.now() #Sort, scale and calculate log2fc bedlines = sorted(bedlines, key=lambda line: (line["TFBS_chr"], int(line["TFBS_start"]), int(line["TFBS_end"]))) for line in bedlines: #Condition specific for condition in args.cond_names: threshold = args.thresholds[condition] line[condition + "_score"] = float(line[condition + "_score"]) original = line[condition + "_score"] line[condition + "_score"] = args.norm_objects[condition].normalize(original) #normalize score line[condition + "_score"] = line[condition + "_score"] if line[condition + "_score"] > 0 else 0 # any scores below 0 -> 0 line[condition + "_score"] = round(line[condition + "_score"], 5) if line[condition + "_score"] < 0: logger.error("negative values: {0}. Original: {1}".format(line[condition + "_score"], original)) line[condition + "_bound"] = 1 if line[condition + "_score"] > threshold else 0 #Comparison specific for i, (cond1, cond2) in enumerate(comparisons): base = "{0}_{1}".format(cond1, cond2) line[base + "_log2fc"] = round(np.log2((line[cond1 + "_score"] + args.pseudo) / (line[cond2 + "_score"] + args.pseudo)), 5) #### Write _all file #### outfile = os.path.join(bed_outdir, TF_name + "_all.bed") dict_to_tab(bedlines, outfile, header) #### Write _bound/_unbound files #### for condition in args.cond_names: chosen_columns = header[:-no_cond] + [condition + "_score"] #header[:-no_cond] removes the no_cond last columns containing scores #Subset bedlines per state for state in ["bound", "unbound"]: outfile = os.path.join(bed_outdir, "{0}_{1}_{2}.bed".format(TF_name, condition, state)) chosen_bool = 1 if state == "bound" else 0 bedlines_subset = [bedline for bedline in bedlines if bedline[condition + "_bound"] == chosen_bool] #bedlines_subset = sorted(bedlines_subset, key= lambda line: line[condition + "_score"], reverse=True) dict_to_tab(bedlines_subset, outfile, chosen_columns) ##### Write overview with scores, bound and log2fcs #### overview_columns = header + [condition + "_bound" for condition in args.cond_names] + ["{0}_{1}_log2fc".format(cond1, cond2) for (cond1, cond2) in comparisons] overview_txt = os.path.join(args.outdir, TF_name, TF_name + "_overview.txt") dict_to_tab(bedlines, overview_txt, overview_columns, header=True) #Write dictionary to table #Write xlsx overview bed_table = pd.DataFrame(bedlines, columns=overview_columns) nrow, ncol = bed_table.shape logger.spam("Read table of shape {0} for TF {1}".format((nrow, ncol), TF_name)) stime_excel = datetime.now() if args.skip_excel == False and n_rows > 0: try: overview_excel = os.path.join(args.outdir, TF_name, TF_name + "_overview.xlsx") writer = pd.ExcelWriter(overview_excel, engine='xlsxwriter') #, options=dict(constant_memory=True)) bed_table.to_excel(writer, index=False, columns=overview_columns) #autfilter not possible with constant_memory worksheet = writer.sheets['Sheet1'] no_rows, no_cols = bed_table.shape worksheet.autofilter(0,0,no_rows, no_cols) writer.save() except Exception as e: logger.warning("Could not write excelfile for TF {0}. Exception was: {1}".format(TF_name, e)) etime_excel = datetime.now() etime = datetime.now() logger.spam("{0} - Local effects took:\t{1} (excel: {2})".format(TF_name, etime - stime, etime_excel - stime_excel)) ############################## Global effects ############################## stime = datetime.now() #Get info table ready info_columns = ["total_tfbs"] info_columns.extend(["{0}_{1}".format(cond, metric) for (cond, metric) in itertools.product(args.cond_names, ["mean_score", "bound"])]) info_columns.extend(["{0}_{1}_{2}".format(comparison[0], comparison[1], metric) for (comparison, metric) in itertools.product(comparisons, ["change", "pvalue"])]) rows, cols = 1, len(info_columns) info_table = pd.DataFrame(np.nan, columns=info_columns, index=[TF_name]) #Fill in info table info_table.at[TF_name, "total_tfbs"] = n_rows for condition in args.cond_names: info_table.at[TF_name, condition + "_mean_score"] = round(np.mean(bed_table[condition + "_score"]), 5) if n_rows > 0 else np.nan info_table.at[TF_name, condition + "_bound"] = np.sum(bed_table[condition + "_bound"].values) #_bound contains bool 0/1 #### Calculate statistical test for binding in comparison to background #### fig_out = os.path.abspath(os.path.join(args.outdir, TF_name, "plots", TF_name + "_log2fcs.pdf")) log2fc_pdf = PdfPages(fig_out, keep_empty=False) #do not write if there is only 1 condition or if there are no sites if n_rows > 0: #log2fc only possible when more than one binding site was found for i, (cond1, cond2) in enumerate(comparisons): base = "{0}_{1}".format(cond1, cond2) # Compare log2fcs to background log2fcs included = np.logical_or(bed_table[cond1 + "_score"].values > 0, bed_table[cond2 + "_score"].values > 0) subset = bed_table[included].copy() #included subset subset.loc[:,"peak_id"] = ["_".join([chrom, str(start), str(end)]) for (chrom, start, end) in zip(subset["peak_chr"].values, subset["peak_start"].values, subset["peak_end"].values)] observed_log2fcs = subset.groupby('peak_id')[base + '_log2fc'].mean().reset_index()[base + "_log2fc"].values #if more than one TFBS per peak -> take mean value #Estimate mean/std bg_params = log2fc_params[(cond1, cond2)] obs_params = diff_dist.fit(observed_log2fcs) obs_mean, obs_std = obs_params #only for scipy.stats.norm bg_mean, bg_std = bg_params obs_no = np.min([len(observed_log2fcs), 50000]) #Set cap on obs_no to prevent super small p-values n_obs = len(observed_log2fcs) #If there was any change found at all (0 can happen if two bigwigs are the same) if obs_mean != bg_mean: info_table.at[TF_name, base + "_change"] = (obs_mean - bg_mean) / np.mean([obs_std, bg_std]) #effect size info_table.at[TF_name, base + "_change"] = np.round(info_table.at[TF_name, base + "_change"], 5) #Else not possible to compare groups else: info_table.at[TF_name, base + "_change"] = 0 info_table.at[TF_name, base + "_pvalue"] = 1 #Sample from background distribution np.random.seed(n_obs) sample_changes = [] for i in range(100): sample = diff_dist.rvs(*log2fc_params[(cond1, cond2)], size=n_obs) sample_mean, sample_std = np.mean(sample), np.std(sample) sample_change = (sample_mean - bg_mean) / np.mean([sample_std, bg_std]) sample_changes.append(sample_change) #Write out differential scores if args.debug: f = open(os.path.join(args.outdir, TF_name, "sampled_differential_scores.txt"), "w") f.write("\n".join([str(val) for val in sample_changes])) f.close() #Estimate p-value by comparing sampling to observed mean ttest = scipy.stats.ttest_1samp(sample_changes, info_table.at[TF_name, base + "_change"]) info_table.at[TF_name, base + "_pvalue"] = ttest[1] #### Plot comparison ### fig, ax = plt.subplots(1,1) ax.hist(observed_log2fcs, bins='auto', label="Observed log2fcs", density=True) xvals = np.linspace(plt.xlim()[0], plt.xlim()[1], 100) #Observed distribution pdf = diff_dist.pdf(xvals, *obs_params) ax.plot(xvals, pdf, label="Observed distribution (fit)", color="red", linestyle="--") ax.axvline(obs_mean, color="red", label="Observed mean") #Background distribution pdf = diff_dist.pdf(xvals, *bg_params) ax.plot(xvals, pdf, label="Background distribution (fit)", color="Black", linestyle="--") ax.axvline(bg_mean, color="black", label="Background mean") #Set size x0,x1 = ax.get_xlim() y0,y1 = ax.get_ylim() ax.set_aspect(((x1-x0)/(y1-y0)) / 1.5) #Decorate ax.legend() plt.xlabel("Log2 fold change", fontsize=8) plt.ylabel("Density", fontsize=8) plt.title("Differential binding for TF \"{0}\"\nbetween ({1} / {2})".format(TF_name, cond1, cond2), fontsize=10) ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left') plt.tight_layout() log2fc_pdf.savefig(fig, bbox_inches='tight') plt.close(fig) #etime_plot = datetime.now() #logger.debug("{0} - Plotting took:\t{1}".format(TF_name, etime_plot - stime_plot)) log2fc_pdf.close() etime = datetime.now() logger.spam("{0} - Global effects took:\t{1}".format(TF_name, etime - stime)) #################### Remove temporary file ###################### for filename in tmp_files: try: os.remove(filename) except: logger.error("Could not remove temporary file {0} - this does not effect the results of BINDetect.".format(filename) ) return(info_table)
print('Starting Active Learning') for i in range(acquisition_iterations): print('POOLING ITERATION', i) score_All = np.zeros(shape=(X_Pool.shape[0], nb_classes)) for d in range(dropout_iterations): print('Dropout Iteration', d) dropout_score = model.predict(X_Pool, batch_size=batch_size, verbose=1) np.save( '/Users/Riashat/Documents/Cambridge_THESIS/Code/Experiments/keras/active_learning/Acquisition_Functions/Bayesian_Active_Learning/GPU/Max_Entropy/Dropout_Scores/' + 'Dropout_Score_' + str(d) + '.npy', dropout_score) score_All = score_All + dropout_score Avg_Pi = np.divide(score_All, dropout_iterations) Log_Avg_Pi = np.log2(Avg_Pi) Entropy_Avg_Pi = -np.multiply(Avg_Pi, Log_Avg_Pi) Entropy_Average_Pi = np.sum(Entropy_Avg_Pi, axis=1) U_X = Entropy_Average_Pi # THIS FINDS THE MINIMUM INDEX # a_1d = U_X.flatten() # x_pool_index = a_1d.argsort()[-Queries:] a_1d = U_X.flatten() x_pool_index = U_X.argsort()[-Queries:][::-1] x_pool_All = np.append(x_pool_All, x_pool_index) #saving pooled images
# 지수와 로그 함수 # 지수 print('지수') a1 = np.random.randint(1, 10, size = 5) print(a1) print(np.exp(a1)) print(np.exp2(a1)) print(np.power(a1, 2)) print() # 로그 print('로그') print(np.log(a1)) print(np.log2(a1)) print(np.log10(a1)) print() # 삼각 함수 print('삼각 함수') t = np.linspace(0, np.pi, 3) print(t) print(np.sin(t)) print(np.cos(t)) print(np.tan(t)) x = [-1, 0, 1] print(x) print(np.arcsin(x)) print(np.arccos(x)) print(np.arctan(x))
def hz_to_midi(hz): return 12 * (torch.log2(hz) - np.log2(440.0)) + 69
def bitcoin_core_log_var(): yield from log_var_for_file("BitcoinCore-Mainnet.bin") def column_for_levels(levels): return pd.DataFrame(levels)[0] #.cummax()[0] if __name__ == "__main__": import pandas as pd import matplotlib.pyplot as plt import numpy as np cols = [ column_for_levels(lvls) for lvls in [bitcoin_cash_log_var(), bitcoin_core_log_var()] ] max_col = sorted(cols, key=lambda col: -col.index.size)[0] log_col = pd.Series(np.log2(1 + max_col.index.values)) df = pd.concat([*cols, log_col], axis=1, keys=["Bitcoin Cash", "Bitcoin", r"$log_2(x)$"]).astype(float) ylabel = r"$|interlink| + log_2(\frac{\sf variableTarget}{\sf genesisTarget})$" df.plot(logx=True).set(ylabel=ylabel, xlabel="block height") plt.show()
def G_synthesis_sb_modular( dlatents_withl_in, # Input: Disentangled latents (W) [minibatch, label_size+dlatent_size]. dlatent_size=7, # Disentangled latent (W) dimensionality. Including discrete info, rotation, scaling, xy shearing, and xy translation. label_size=0, # Label dimensionality, 0 if no labels. module_list=None, # A list containing module names, which represent semantic latents (exclude labels). num_channels=1, # Number of output color channels. resolution=64, # Output resolution. nonlinearity='lrelu', # Activation function: 'relu', 'lrelu', etc. dtype='float32', # Data type to use for activations and outputs. resample_kernel=[ 1, 3, 3, 1 ], # Low-pass filter to apply when resampling activations. None = no filtering. fused_modconv=True, # Implement modulated_conv2d_layer() as a single fused op? use_noise=False, # If noise is used in this dataset. randomize_noise=True, # True = randomize noise inputs every time (non-deterministic), False = read noise inputs from variables. single_const=True, **_kwargs): # Ignore unrecognized keyword args. ''' Modularized spatial-biased network. ''' resolution_log2 = int(np.log2(resolution)) # == 6 for resolution 64 assert resolution == 2**resolution_log2 and resolution >= 4 num_layers = resolution_log2 * 2 - 2 # == 10 for resolution 64 act = nonlinearity images_out = None # Note that module_list may include modules not containing latents, # e.g. Conv layers (size in this case means number of conv layers). key_ls, size_ls, count_dlatent_size, n_content = split_module_names( module_list) if label_size > 0: key_ls.insert(0, 'Label') size_ls.insert(0, label_size) n_content += label_size # module_dict = collections.OrderedDict(zip(key_ls, size_ls)) # Primary inputs. assert dlatent_size == count_dlatent_size dlatents_withl_in.set_shape([None, label_size + count_dlatent_size]) dlatents_withl_in = tf.cast(dlatents_withl_in, dtype) # Early layers consists of 4x4 constant layer. y = None if single_const: with tf.variable_scope('4x4'): with tf.variable_scope('Const'): x = tf.get_variable( 'const', shape=[1, 128, 4, 4], initializer=tf.initializers.random_normal()) x = tf.tile(tf.cast(x, dtype), [tf.shape(dlatents_withl_in)[0], 1, 1, 1]) else: with tf.variable_scope('4x4'): with tf.variable_scope('Const'): x = tf.get_variable( 'const', shape=[n_content, 128, 4, 4], initializer=tf.initializers.random_normal()) subkwargs = EasyDict() subkwargs.update(dlatents_withl_in=dlatents_withl_in, n_content=n_content, act=act, dtype=dtype, resample_kernel=resample_kernel, fused_modconv=fused_modconv, use_noise=use_noise, randomize_noise=randomize_noise) # Build modules by module_dict. start_idx = 0 # print('module_dict:', module_dict) # for scope_idx, k in enumerate(module_dict): for scope_idx, k in enumerate(key_ls): if (k.startswith('Label')) or (k.startswith('D_global')): # e.g. {'Label': 3}, {'D_global': 3} x = build_D_layers(x, name=k, n_latents=size_ls[scope_idx], start_idx=start_idx, scope_idx=scope_idx, single_const=single_const, **subkwargs) start_idx += size_ls[scope_idx] elif k.startswith('C_global'): # e.g. {'C_global': 2} x = build_C_global_layers(x, name=k, n_latents=size_ls[scope_idx], start_idx=start_idx, scope_idx=scope_idx, **subkwargs) start_idx += size_ls[scope_idx] elif k.startswith('SB'): # e.g. {'SB-rotation: 1}, {'SB-shearing': 2} x = build_SB_layers(x, name=k, n_latents=size_ls[scope_idx], start_idx=start_idx, scope_idx=scope_idx, **subkwargs) start_idx += size_ls[scope_idx] elif k.startswith('C_local_heat'): # e.g. {'C_local_heat': 4} x = build_local_heat_layers(x, name=k, n_latents=size_ls[scope_idx], start_idx=start_idx, scope_idx=scope_idx, **subkwargs) start_idx += size_ls[scope_idx] elif k.startswith('C_local_hfeat'): # e.g. {'C_local_hfeat_size': 4} x = build_local_hfeat_layers(x, name=k, n_latents=size_ls[scope_idx], start_idx=start_idx, scope_idx=scope_idx, **subkwargs) start_idx += size_ls[scope_idx] elif k.startswith('Noise'): # e.g. {'Noise': 1} x = build_noise_layer(x, name=k, n_layers=size_ls[scope_idx], scope_idx=scope_idx, **subkwargs) elif k.startswith('Conv'): # e.g. {'Conv-up': 2}, {'Conv-id': 1} x = build_conv_layer(x, name=k, n_layers=size_ls[scope_idx], scope_idx=scope_idx, **subkwargs) else: raise ValueError('Unsupported module type: ' + k) y = torgb(x, y, num_channels=num_channels) images_out = y assert images_out.dtype == tf.as_dtype(dtype) return tf.identity(images_out, name='images_out')
# plot_folder = './plot_folder/' # Provide the directory to store the network reconstructed audio dir_recon_audio = './dir_recon_audio_fft/' try: os.makedirs(dir_recon_audio, exist_ok = True) print("Directory '%s' created successfully" %dir_recon_audio) except OSError as error: print("Directory '%s' exists") t = 0 for k in data.keys(): nf = k.split('_')[0] f0 = dict_fmap[nf] midival = (int)(69 + 12*np.log2(f0/440)) if ((midival in pl) == False): continue print(t + 1) t = t + 1 # Load STFT matrix from data stft_in = data[k]['cc'] # Normalizing factor nf = np.max(abs(stft_in)) stft_norm = torch.FloatTensor(stft_in/nf) p = torch.FloatTensor(midival*np.ones(stft_in.shape[0])) x_recon_cVAE,mu,sig,ztot = cVAE.forward(stft_norm,(p.float()/127).view(-1,1))
def fit(self, train_data, train_labels, val_data, val_labels): """ Fits to training data. Args: train_data (ndarray): Training data. train_labels (ndarray): Training labels. val_data (ndarray): Validation data. val_labels (ndarray): Validation labels. """ split = np.append(-np.ones(train_labels.shape, dtype=np.float32), np.zeros(val_labels.shape, dtype=np.float32)) ps = PredefinedSplit(split) sh = train_data.shape train_data = np.append(train_data, val_data , axis=0) train_labels = np.append(train_labels , val_labels, axis=0) del val_data, val_labels if self.kernel == 'linear': if self.probability: clf = SVC(kernel='linear', class_weight='balanced', random_state=6, decision_function_shape='ovr', max_iter=1000, probability=self.probability, **self.scikit_args) else: clf = LinearSVC(class_weight='balanced', dual=False, random_state=6, multi_class='ovr', max_iter=1000, **self.scikit_args) #Cross-validate over these parameters params = {'C': 2.0**np.arange(-9,16,2,dtype=np.float)} elif self.kernel == 'rbf': clf = SVC(random_state=6, class_weight='balanced', cache_size=16000, decision_function_shape='ovr',max_iter=1000, tol=1e-4, probability=self.probability, **self.scikit_args) params = {'C': 2.0**np.arange(-9,16,2,dtype=np.float), 'gamma': 2.0**np.arange(-15,4,2,dtype=np.float)} #Coarse search gs = GridSearchCV(clf, params, refit=False, n_jobs=self.n_jobs, verbose=self.verbosity, cv=ps) gs.fit(train_data, train_labels) #Fine-Tune Search if self.kernel == 'linear': best_C = np.log2(gs.best_params_['C']) params = {'C': 2.0**np.linspace(best_C-2,best_C+2,10, dtype=np.float)} elif self.kernel == 'rbf': best_C = np.log2(gs.best_params_['C']) best_G = np.log2(gs.best_params_['gamma']) params = {'C': 2.0**np.linspace(best_C-2,best_C+2,10, dtype=np.float), 'gamma': 2.0**np.linspace(best_G-2,best_G+2,10, dtype=np.float)} self.gs = GridSearchCV(clf, params, refit=self.refit, n_jobs=self.n_jobs, verbose=self.verbosity, cv=ps) self.gs.fit(train_data, train_labels) if not self.refit: clf.set_params(C=gs.best_params_['C']) if self.kernel == 'rbf': clf.set_params(gamma=gs.best_params_['gamma']) self.gs = clf self.gs.fit(train_data[:sh[0]], train_labels[:sh[0]])
def nlogn(x): return x * numpy.log2(x)
def splay(vec): """ Determine two lengths to split stride the input vector by """ N2 = 2**int(numpy.log2(len(vec)) / 2) N1 = len(vec) / N2 return N1, N2
def hybrid_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84, bins_per_octave=12, tuning=None, filter_scale=1, norm=1, sparsity=0.01, window='hann', scale=True): '''Compute the hybrid constant-Q transform of an audio signal. Here, the hybrid CQT uses the pseudo CQT for higher frequencies where the hop_length is longer than half the filter length and the full CQT for lower frequencies. Parameters ---------- y : np.ndarray [shape=(n,)] audio time series sr : number > 0 [scalar] sampling rate of `y` hop_length : int > 0 [scalar] number of samples between successive CQT columns. fmin : float > 0 [scalar] Minimum frequency. Defaults to C1 ~= 32.70 Hz n_bins : int > 0 [scalar] Number of frequency bins, starting at `fmin` bins_per_octave : int > 0 [scalar] Number of bins per octave tuning : None or float in `[-0.5, 0.5)` Tuning offset in fractions of a bin (cents). If `None`, tuning will be automatically estimated. filter_scale : float > 0 Filter filter_scale factor. Larger values use longer windows. sparsity : float in [0, 1) Sparsify the CQT basis by discarding up to `sparsity` fraction of the energy in each basis. Set `sparsity=0` to disable sparsification. window : str, tuple, number, or function Window specification for the basis filters. See `filters.get_window` for details. Returns ------- CQT : np.ndarray [shape=(n_bins, t), dtype=np.float] Constant-Q energy for each frequency at each time. Raises ------ ParameterError If `hop_length` is not an integer multiple of `2**(n_bins / bins_per_octave)` Or if `y` is too short to support the frequency range of the CQT. See Also -------- cqt pseudo_cqt Notes ----- This function caches at level 20. ''' if fmin is None: # C1 by default fmin = note_to_hz('C1') if tuning is None: tuning = estimate_tuning(y=y, sr=sr) # Get all CQT frequencies freqs = cqt_frequencies(n_bins, fmin, bins_per_octave=bins_per_octave, tuning=tuning) # Compute the length of each constant-Q basis function lengths = filters.constant_q_lengths(sr, fmin, n_bins=n_bins, bins_per_octave=bins_per_octave, tuning=tuning, filter_scale=filter_scale, window=window) # Determine which filters to use with Pseudo CQT # These are the ones that fit within 2 hop lengths after padding pseudo_filters = 2.0**np.ceil(np.log2(lengths)) < 2 * hop_length n_bins_pseudo = int(np.sum(pseudo_filters)) n_bins_full = n_bins - n_bins_pseudo cqt_resp = [] if n_bins_pseudo > 0: fmin_pseudo = np.min(freqs[pseudo_filters]) cqt_resp.append(pseudo_cqt(y, sr, hop_length=hop_length, fmin=fmin_pseudo, n_bins=n_bins_pseudo, bins_per_octave=bins_per_octave, tuning=tuning, filter_scale=filter_scale, norm=norm, sparsity=sparsity, window=window, scale=scale)) if n_bins_full > 0: cqt_resp.append(np.abs(cqt(y, sr, hop_length=hop_length, fmin=fmin, n_bins=n_bins_full, bins_per_octave=bins_per_octave, tuning=tuning, filter_scale=filter_scale, norm=norm, sparsity=sparsity, window=window, scale=scale))) return __trim_stack(cqt_resp, n_bins)
def __init__(self, data, input_dims=None, output_dims=None): """Initialize a quantum channel Chi-matrix operator. Args: data (QuantumCircuit or Instruction or BaseOperator or matrix): data to initialize superoperator. input_dims (tuple): the input subsystem dimensions. [Default: None] output_dims (tuple): the output subsystem dimensions. [Default: None] Raises: QiskitError: if input data is not an N-qubit channel or cannot be initialized as a Chi-matrix. Additional Information ---------------------- If the input or output dimensions are None, they will be automatically determined from the input data. The Chi matrix representation is only valid for N-qubit channels. """ # If the input is a raw list or matrix we assume that it is # already a Chi matrix. if isinstance(data, (list, np.ndarray)): # Initialize from raw numpy or list matrix. chi_mat = np.array(data, dtype=complex) # Determine input and output dimensions dim_l, dim_r = chi_mat.shape if dim_l != dim_r: raise QiskitError('Invalid Chi-matrix input.') if input_dims: input_dim = np.product(input_dims) if output_dims: output_dim = np.product(input_dims) if output_dims is None and input_dims is None: output_dim = int(np.sqrt(dim_l)) input_dim = dim_l // output_dim elif input_dims is None: input_dim = dim_l // output_dim elif output_dims is None: output_dim = dim_l // input_dim # Check dimensions if input_dim * output_dim != dim_l: raise QiskitError("Invalid shape for Chi-matrix input.") else: # Otherwise we initialize by conversion from another Qiskit # object into the QuantumChannel. if isinstance(data, (QuantumCircuit, Instruction)): # If the input is a Terra QuantumCircuit or Instruction we # convert it to a SuperOp data = SuperOp._init_instruction(data) else: # We use the QuantumChannel init transform to initialize # other objects into a QuantumChannel or Operator object. data = self._init_transformer(data) input_dim, output_dim = data.dim # Now that the input is an operator we convert it to a Chi object chi_mat = _to_chi(data.rep, data._data, input_dim, output_dim) if input_dims is None: input_dims = data.input_dims() if output_dims is None: output_dims = data.output_dims() # Check input is N-qubit channel n_qubits = int(np.log2(input_dim)) if 2**n_qubits != input_dim: raise QiskitError("Input is not an n-qubit Chi matrix.") # Check and format input and output dimensions input_dims = self._automatic_dims(input_dims, input_dim) output_dims = self._automatic_dims(output_dims, output_dim) super().__init__('Chi', chi_mat, input_dims, output_dims)