def __add__(self, x): """ Addition in real space; an optimization of Manning & Schuetze, p. 337 (eq. 9.21) >>> a_real = .5 >>> b_real = .25 >>> a_bw = BitWeight(a_real) >>> b_bw = BitWeight(b_real) >>> BitWeight.close_enough((a_bw + b_bw).to_real, a_real + b_real) True """ x_bw = x if hasattr(x, 'bw') else BitWeight(x) if x_bw.bw - self.bw > self.BIG: to_return = self.bw elif self.bw - x_bw.bw > self.BIG: to_return = x_bw.bw else: if x_bw.bw > self.bw: to_return = x_bw.bw - log2(1. + exp2(x_bw.bw - self.bw)) elif x_bw.bw < self.bw: to_return = self.bw - log2(exp2(self.bw - x_bw.bw) + 1.) else: to_return = 1. - x_bw.bw # not 1 + x_bw.bw as you might think, as BWs are # NEGATIVE log-weights return BitWeight(to_return, True)
def summary_stats(alist): # Compute summary stats from dropout activations returned by simulate. Elist = [] NWGMlist = [] Vlist = [] #raise Exception('scipy not working!') #from scipy import stats as stats for l, a in enumerate(alist): E = np.mean(a, axis=0) # Arithmetic mean over dropout samples. #G = stats.gmean(a, axis=0) # Geometric mean. #G = np.prod(a, axis=0) ** 1.0/a.shape[0] # Geometric mean. G = np.exp2(np.sum(np.log2(a), axis=0) * 1.0/a.shape[0]) # Geometric mean. #N = stats.gmean(1.0-a, axis=0) #N = np.prod(1.0-a, axis=0) ** 1.0/a.shape[0] N = np.exp2(np.sum(np.log2(1.0-a), axis=0) * 1.0/a.shape[0]) NWGM = G / (G + N) # Normalized geometric mean. V = np.var(a, axis=0) # Change 1 x Units x Inputs matrix to Units x Inputs Elist.append(E) NWGMlist.append(NWGM) Vlist.append(V) return Elist, NWGMlist, Vlist
def __add__(self, other): """ Addition in real space; an optimization of Manning & Schuetze, p. 337 (eq. 9.21) >>> a_real = .5 >>> b_real = .25 >>> a_bw = BitWeight(a_real) >>> b_bw = BitWeight(b_real) >>> BitWeight.close_enough((a_bw + b_bw).to_real, a_real + b_real) True >>> (BitWeight(.25) + BitWeight(.25)).to_real 0.5 """ other_bw = other if hasattr(other, "bw") else BitWeight(other) if other_bw.bw - self.bw > self.BIG: to_return = self.bw elif self.bw - other_bw.bw > self.BIG: to_return = other_bw.bw else: if other_bw.bw > self.bw: to_return = other_bw.bw - log2(1.0 + exp2(other_bw.bw - self.bw)) elif other_bw.bw < self.bw: to_return = self.bw - log2(exp2(self.bw - other_bw.bw) + 1.0) else: to_return = other_bw.bw - 1.0 # not 1 + x_bw.bw as you might think, as BWs are # NEGATIVE log-weights return BitWeight(to_return, True)
def train(self, feature_stream, alpha, beta, lamba1, lamba2): validate_helper = utility.ValidateHelper() self.z = np.zeros(self.feature_count) self.n = np.zeros(self.feature_count) self.w = np.zeros(self.feature_count) for count, (click, features) in enumerate(feature_stream): no_zero_index = [] t = 0 for feature_index in features: no_zero_index.append(feature_index) if np.abs(self.z[feature_index]) > lamba1: _t = (-1.0 / ((beta + np.sqrt(self.n[feature_index])) / alpha + lamba2)) * (self.z[feature_index] - np.sign(self.z[feature_index]) * lamba1) self.w[feature_index] = _t t += _t else: self.w[feature_index] = 0 p = math.sigmoid(t) for feature_index in no_zero_index: g = p - click sigma = (1.0 / alpha) * (np.sqrt(self.n[feature_index] + np.exp2(g)) - np.sqrt(self.n[feature_index])) w_i = self.w[feature_index] self.z[feature_index] += g - sigma * w_i self.n[feature_index] += np.exp2(g) validate_helper.update(p, click, 0.5) validate_helper.out_put()
def calc_feature(centroids, patch_width, stride, path, p, q): t = time() image = misc.imread(path) # Crop here crop_size = 300 startX = (image.shape[0] - crop_size) / 2 startY = (image.shape[0] - crop_size) / 2 endX = startX + crop_size endY = startY + crop_size image = image[startX:endX, startY:endY, :] # Extract patches patches = patch_extract(image, patch_width, stride) patches = numpy.float32(patches) # Preprocessing # Normalize patches = patches - numpy.asmatrix(patches.mean(axis=1)).T patches = patches / patches.std(axis=1) patches = numpy.nan_to_num(patches) # Triangle (soft) activation function xx = numpy.sum(numpy.exp2(patches), axis=1) cc = numpy.sum(numpy.exp2(centroids), axis=1) xc = 2*numpy.dot(patches, numpy.transpose(centroids)) z = numpy.sqrt(cc + (xx - xc)) mu = z.mean(axis=1) patches = numpy.maximum(0, mu-z) # Reshape to 2D plane before pooling rows = image.shape[0] - patch_width + 1 cols = image.shape[1] - patch_width + 1 patches = numpy.array(patches, copy=False).reshape(rows, cols, centroids.shape[0], order="F") # Pool half_rows = round(rows / 2) half_cols = round(cols / 2) # Calculate pool values q1 = numpy.sum(numpy.sum(patches[1:half_rows, 1:half_cols, :], 0), 0) q2 = numpy.sum(numpy.sum(patches[half_rows+1:patches.shape[0], 1:half_cols, :], 0), 0) q3 = numpy.sum(numpy.sum(patches[1:half_rows, half_cols+1:patches.shape[1], :], 0), 0) q4 = numpy.sum(numpy.sum(patches[half_rows+1:patches.shape[0], half_cols+1:patches.shape[1], :], 0), 0) # Print time #print "Finished %s, took %.2f seconds" %(path, time() - t) output = numpy.transpose(numpy.append(q1, numpy.append(q2, numpy.append(q3, q4)))) # Put output in queue (so that it is sent to the original thread) q.put((p, output)) # Concatenate and return return 0
def decay_gene_ls(cell1='sphere', cell2='shield', FC_cutoff=2): rpkm_dict, df = read_rpkm2() decay_genes = [] for i,j in rpkm_dict.items(): if j['DMSO_%s'%(cell1)] > 0 and j['DMSO_%s'%(cell2)] > 0: fold_change = np.exp2(j['DMSO_%s'%(cell1)]) / np.exp2(j['DMSO_%s'%(cell2)]) if fold_change >= FC_cutoff: decay_genes.append(i) print "decay_genes: %s"%(len(decay_genes)), decay_genes[0:5] return decay_genes
def analyze_waittimes(): waits = np.loadtxt('results/wait_ratios.csv', delimiter=',') threads = np.exp2(np.arange(7)) color_names = map(lambda x: '{0:d} Vertices'.format(int(x)), np.exp2(np.arange(4,11))) colors = ['black', 'violet', 'blue', 'green', 'yellow', 'orange', 'red'] plt.figure(figsize=(12, 8)) plots = [] for i in range(waits.shape[0]): plots.append(plt.plot(threads, waits[i], color=colors[i], linestyle='-')[0]) plt.xscale('log', basex=2) plt.yscale('log', basey=2) plt.legend(plots, color_names, loc=4) plt.savefig('img/waits.png', dpi=200, bbox_inches='tight')
def exponential_grid(fitness_func, parameters): """Exponential parameter optimization that checks all possible values Values are visited in a grid order, linear in log space with the step being the log of the resolution. Take care not to use 0 as it will cause problems when taking the log. If a parameter bounds are (.1, 10**5, 10) then the values used are [10**-2, 10**-1, 10**1, 10**2, 10**3, 10**4] Args: fitness_func: Fitness function that takes keyword arguments whos values are keys in 'parameters'. Each keyword argument takes a float. The fitness function returns a float that we seek to maximize. parameters: Dict with keys as parameter names and values as (low, high, resolution) where generated parameters are [low, high) and resolution is a hint at the relevant scale of the parameter. Yields: Iterator of (fitness, params) where fitness: The value returned by the fitness_func given params params: Dict whos keys are those in parameters and values are floats """ ranges = [np.exp2(np.arange(*np.log2(x))) for x in parameters.values()] for param_values in itertools.product(*ranges): params = dict(zip(parameters, param_values)) yield fitness_func(**params), params
def make_raw_outputFile(data, probe_names, sample_names, adv=False, use_log=False, celltypes_to_use=None, filename='rawData.txt'): '''docstring for make_raw_outputFile prints the data to a file. If the advancedmode is on, then only samples/celltypes given, is printed, by reducing the input data. input: data, probe_names and sample names must be provided with same reduced index syntax, so that only probes wanted in file is given, an in constitent index order. ''' outfile=open(filename, 'w') if not use_log: data = numpy.exp2(data) if adv: temp_index=0 reduced_samplenames=sample_names for i in sample_names: if not i in celltypes_to_use: data = numpy.delete(data, temp_index, 1) reduced_samplenames = numpy.delete(reduced_samplenames, temp_index, 0) else: temp_index += 1 sample_names=reduced_samplenames outfile.write(',') outfile.write(', '.join(sample_names)) outfile.write('\n') for index,i in enumerate(probe_names): outfile.write(i) outfile.write(', ') for y in data[index]: outfile.write('%s,'%(y)) outfile.write('\n')
def plot_relrisk_matrix(relrisk): t = relrisk.copy() matrix_shape = (t['exposure'].nunique(), t['event'].nunique()) m = ut.daf.to.map_vals_to_ints_inplace(t, cols_to_map=['exposure']) m = m['exposure'] ut.daf.to.map_vals_to_ints_inplace(t, cols_to_map={'event': dict(zip(m, range(len(m))))}) RR = zeros(matrix_shape) RR[t['exposure'], t['event']] = t['relative_risk'] RR[range(len(m)), range(len(m))] = nan RRL = np.log2(RR) def normalizor(X): min_x = nanmin(X) range_x = nanmax(X) - min_x return lambda x: (x - min_x) / range_x normalize_this = normalizor(RRL) center = normalize_this(0) color_map = shifted_color_map(cmap=cm.get_cmap('coolwarm'), start=0, midpoint=center, stop=1) imshow(RRL, cmap=color_map, interpolation='none'); xticks(range(shape(RRL)[0]), m, rotation=90) yticks(range(shape(RRL)[1]), m) cbar = colorbar() cbar.ax.set_yticklabels(["%.02f" % x for x in np.exp2(array(ut.pplot.get.get_colorbar_tick_labels_as_floats(cbar)))])
def test_sfu(): X = np.random.uniform(0, 1, 16).astype('float32') Y = run_code(sfu, X, 4) assert np.allclose(1/X, Y[0], rtol=1e-4) assert np.allclose(1/np.sqrt(X), Y[1], rtol=1e-4) assert np.allclose(np.exp2(X), Y[2], rtol=1e-4) assert np.allclose(np.log2(X), Y[3], rtol=1e-2)
def test_exp2_values(self): x = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024] y = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] for dt in ["f", "d", "g"]: xf = np.array(x, dtype=dt) yf = np.array(y, dtype=dt) assert_almost_equal(np.exp2(yf), xf)
def process_reception(self, signal): previous_half_length = int(signal.length/2) initial_pos, final_pos = signalProcessor.SignalProcessor.make_periodical(signal) half_length = previous_half_length - initial_pos amount_points = int(np.exp2(np.ceil(np.log2(signal.length))+4)) frequency = sp.fft(np.roll(signal.signal, -half_length), amount_points)[:amount_points/2]*2/signal.length d_f = np.argmax(abs(frequency))*self.__adc_freq/amount_points print() print("Measured frequency to target:", d_f, "position:", np.argmax(abs(frequency))) distance = common.SignalProperties.T * d_f*common.SignalProperties.C/(2*self.__signal_gen.real_b) delta_r = common.SignalProperties.C/2/self.__signal_gen.real_b * signal.length/amount_points print("Measured distance to target:", distance, "Delta distance:", delta_r) d_t = d_f*common.SignalProperties.T/common.SignalProperties.B print("Measured round trip time:", d_t) k = np.pi*common.SignalProperties.B/common.SignalProperties.T phase = format_phase(2*np.pi*common.SignalProperties.F0 * d_t - k*d_t**2) final_ph = format_phase(np.angle(frequency)[np.argmax(abs(frequency))] - phase) print("Measured target's phase:", final_ph) print() delta_f = self.__calculate_gain(np.argmax(abs(frequency)), frequency) d_f = (np.argmax(abs(frequency)) + delta_f)*self.__adc_freq/signal.length distance = common.SignalProperties.T * d_f*common.SignalProperties.C/(2*self.__signal_gen.real_b) print("Measured frequency to target:", d_f) print("Measured distance to target:", distance) plt.plot(np.abs(frequency)) plt.show()
def apply_weights(cnarr, ref_matched, epsilon=1e-4): """Calculate weights for each bin. Weights are derived from: - bin sizes - average bin coverage depths in the reference - the "spread" column of the reference. """ # Relative bin sizes sizes = ref_matched['end'] - ref_matched['start'] weights = sizes / sizes.max() if (np.abs(np.mod(ref_matched['log2'], 1)) > epsilon).any(): # NB: Not used with a flat reference logging.info("Weighting bins by relative coverage depths in reference") # Penalize bins that deviate from neutral coverage flat_cvgs = ref_matched.expect_flat_cvg() weights *= np.exp2(-np.abs(ref_matched['log2'] - flat_cvgs)) if (ref_matched['spread'] > epsilon).any(): # NB: Not used with a flat or paired reference logging.info("Weighting bins by coverage spread in reference") # Inverse of variance, 0--1 variances = ref_matched['spread'] ** 2 invvars = 1.0 - (variances / variances.max()) weights = (weights + invvars) / 2 # Avoid 0-value bins -- CBS doesn't like these weights = np.maximum(weights, epsilon) return cnarr.add_columns(weight=weights)
def test_briggs_helper_function(self): np.random.seed(1234) for a in np.random.randn(10) + 1j * np.random.randn(10): for k in range(5): x_observed = _matfuncs_inv_ssq._briggs_helper_function(a, k) x_expected = a ** np.exp2(-k) - 1 assert_allclose(x_observed, x_expected)
def computeActivity(self, inputActivity): logger.debug('computing activity.') self.ensureLength(inputActivity.max()) # numpy array magic idx = numpy.mgrid[0:self.dims[0], 0:self.dims[1], 0:self.dims[2]] tInputActivity = numpy.tile(inputActivity, self.dims[:-1] + (1,)) factors = 2 * self.counts[idx[0],idx[1],idx[2],tInputActivity] / numpy.sum(self.counts, axis=3) mans,exps = numpy.frexp(factors) mantissas, exponents = numpy.frexp(numpy.prod(mans, axis=2)) exponents += exps.sum(axis=2) if self.maxexp is not None: maxexp = self.maxexp else: maxexp = exponents.max() exponents -= maxexp logger.debug("Maximum exponent: %d", maxexp) activity = mantissas * numpy.exp2(exponents) if self.p != 0: conscience = (self.coff / self.con)**self.p activity *= conscience activity *= numpy.prod(activity.shape) / activity.sum() return activity
def exp_through_polynomial_fit(value, length=12): """ http://jrfonseca.blogspot.com/2008/09/fast-sse2-pow-tables-or-polynomials.html assuming powf(x, y) == exp2(log2(x) * y)) since x**y = 2**(log2(x**y)) = 2**(y * log2(x)) then powf(e, y) == exp2(log2(e) * y)), log2(e) = 1.442695040888963387004650940071, exp2(1.442695040888963387004650940071 * y) break apart (1.442695040888963387004650940071 * y) into real and integral, IEEE doubles are represented using 64 bits where: value = -1**b[63] + (int(b[52:64]) - 1023) + 1 + sum(b[52 - i]/2**i for i in xrange(52)) since x**(real + integral) => x**real * x**integral implement the integral part using fast shifts, the real portion will be implemented using a polynomial function ... we can further increase the accuracy by reducing the interval from (-1, 1) to (-.5, .5) by: taking the square root of each side and then squaring the final answer, Proof: (e**x)**0.5 = (2**(x * log2(e)))**0.5, let y = x * log2(e) (2**y)**0.5 = (2**(floor(y) + (y - floor(y))))**0.5 = (2**(floor(y)))**05 * (2**(y - floor(y)))**0.5 (2**(y - floor(y)))**0.5 = 2**(0.5 * (y - floor(y)) since -1 < y - floor(y) < 1 we have -0.5 < 0.5 * (y - floor(y)) < 0.5 the final result would simply need to be squared since ((e**x)**0.5)**2 = (e**x)**(2*0.5) = e**x ... """ y = value * 1.442695040888963387004650940071 integral = numpy.sqrt(numpy.exp2(int(y))) return (integral * numpy.polyval(remez(numpy.exp2, (-0.5, 0.5), length), (y - int(y))/2.0))**2
def r0_max_val(r, y, kap, sig, thv, gA = 1.0, k = 0.0, p = 2.2): Gk = (4.0 - k)*gA**2.0 thP0 = thetaPrime(r, thv, 0.0) rExp = -np.power(np.divide(thP0, sig), 2.0*kap) lhs = np.divide(y - np.power(y, 5.0 - k), Gk) rhs = (np.tan(thv) + r)**2.0*np.exp2(rExp) return rhs - lhs
def _guess_average_depth(self, segments=None, window=100): """Estimate the effective average read depth from variance. Assume read depths are Poisson distributed, converting log2 values to absolute counts. Then the mean depth equals the variance , and the average read depth is the estimated mean divided by the estimated variance. Use robust estimators (Tukey's biweight location and midvariance) to compensate for outliers and overdispersion. With `segments`, take the residuals of this array's log2 values from those of the segments to remove the confounding effect of real CNVs. If `window` is an integer, calculate and subtract a smoothed trendline to remove the effect of CNVs without segmentation (skipped if `segments` are given). See: http://www.evanmiller.org/how-to-read-an-unlabeled-sales-chart.html """ # Try to drop allosomes cnarr = self.autosomes() if not len(cnarr): cnarr = self # Remove variations due to real/likely CNVs y_log2 = cnarr.residuals(segments) if segments is None and window: y_log2 -= smoothing.savgol(y_log2, window) # Guess Poisson parameter from absolute-scale values y = np.exp2(y_log2) # ENH: use weight argument to these stats loc = descriptives.biweight_location(y) spread = descriptives.biweight_midvariance(y, loc) if spread > 0: return loc / spread**2 return loc
def score(self, X, y=None): """Compute score reflecting how well the model has fitted for the input data. The scoring method is set using the `scorer` argument in :meth:`~gensim.sklearn_api.ldamodel.LdaTransformer`. Higher score is better. Parameters ---------- X : iterable of list of (int, number) Sequence of documents in BOW format. Returns ------- float The score computed based on the selected method. """ if self.scorer == 'perplexity': corpus_words = sum(cnt for document in X for _, cnt in document) subsample_ratio = 1.0 perwordbound = \ self.gensim_model.bound(X, subsample_ratio=subsample_ratio) / (subsample_ratio * corpus_words) return -1 * np.exp2(-perwordbound) # returning (-1*perplexity) to select model with minimum value elif self.scorer == 'u_mass': goodcm = models.CoherenceModel(model=self.gensim_model, corpus=X, coherence=self.scorer, topn=3) return goodcm.get_coherence() else: raise ValueError("Invalid value {} supplied for `scorer` param".format(self.scorer))
def test_exp2(self): from numpy import array, exp2 inf = float('inf') ninf = -float('inf') nan = float('nan') cmpl = complex for c, rel_err in (('complex64', 2e-7), ('complex128', 2e-15), ('clongdouble', 2e-15)): a = [cmpl(-5., 0), cmpl(-5., -5.), cmpl(-5., 5.), cmpl(0., -5.), cmpl(0., 0.), cmpl(0., 5.), cmpl(-0., -5.), cmpl(-0., 0.), cmpl(-0., 5.), cmpl(-0., -0.), cmpl(inf, 0.), cmpl(inf, 5.), cmpl(inf, -0.), cmpl(ninf, 0.), cmpl(ninf, 5.), cmpl(ninf, -0.), cmpl(ninf, inf), cmpl(inf, inf), cmpl(ninf, ninf), cmpl(5., inf), cmpl(5., ninf), cmpl(nan, 5.), cmpl(5., nan), cmpl(nan, nan), ] b = exp2(array(a,dtype=c)) for i in range(len(a)): try: res = self.c_pow((2,0), (a[i].real, a[i].imag)) except OverflowError: res = (inf, nan) except ValueError: res = (nan, nan) msg = 'result of 2**%r(%r) got %r expected %r\n ' % \ (c,a[i], b[i], res) # cast untranslated boxed results to float, # does no harm when translated t1 = float(res[0]) t2 = float(b[i].real) self.rAlmostEqual(t1, t2, rel_err=rel_err, msg=msg) t1 = float(res[1]) t2 = float(b[i].imag) self.rAlmostEqual(t1, t2, rel_err=rel_err, msg=msg)
def make_subclusters(cc, log2_expdf_cell, gene_corr_list=False, fraction_to_plot=8, filename=filename, base_name=base_name): parent = cc[0][1] p_num = cc[0][0] l_nums = [x[0] for x in cc] c_lists = [c[1] for c in cc] group_ID = 0 for num_members, cell_list in zip(l_nums, c_lists): if num_members < p_num and num_members >= p_num/fraction_to_plot: group_ID+=1 title = 'Group_'+str(group_ID)+'_with_'+str(num_members)+'_cells' cell_subset = log2_expdf_cell[cell_list] gene_subset = cell_subset.transpose() norm_df_cell1 = np.exp2(cell_subset) norm_df_cell = norm_df_cell1 -1 norm_df_cell.to_csv(os.path.join(filename, base_name+'_'+title+'_matrix.txt'), sep = '\t', index_col=0) if label_map: top_pca = plot_PCA(gene_subset, num_genes=gene_number, title=title, plot=False, label_map=label_map) else: top_pca = plot_PCA(gene_subset, num_genes=gene_number, title=title, plot=False) if top_pca != []: top_pca_by_gene = gene_subset[top_pca] top_pca_by_cell = top_pca_by_gene.transpose() if gene_corr_list: top_genes_search = [x for x in top_pca] corr_plot(gene_corr_list+top_genes_search[0:3], gene_subset, title = title) cell_linkage, plotted_df_by_gene, col_order = clust_heatmap(top_pca, top_pca_by_gene, num_to_plot=gene_number, title=title, plot=False, label_map=label_map) plt.close() else: pass
def sym_diff(self, var): """Symbolically differentiate with respect to var.""" diffmap = {np.absolute: lambda x: np.sign(x), np.sign: lambda x: 0, np.exp: lambda x: np.exp(x), np.exp2: lambda x: np.exp2(x) * np.log(2), np.log: lambda x: x**(-1), np.log2: lambda x: (x * np.log(2))**(-1), np.log10: lambda x: (x * np.log(10))**(-1), np.sqrt: lambda x: (1/2) * x**(-1/2), np.square: lambda x: 2*x, np.sin: lambda x: np.cos(x), np.cos: lambda x: -np.sin(x), np.tan: lambda x: np.cos(x)**(-2), np.arcsin: lambda x: (1 - x**2)**(-1/2), np.arccos: lambda x: -(1 - x**2)**(-1/2), np.arctan: lambda x: (1 + x**2)**(-1), np.sinh: lambda x: np.cosh(x), np.cosh: lambda x: np.sinh(x), np.tanh: lambda x: np.cosh(x)**(-2), np.arcsinh: lambda x: (x**2 + 1)**(-1/2), np.arccosh: lambda x: (x**2 - 1)**(-1/2), np.arctanh: lambda x: (1 - x**2)**(-1)} arg0 = self.args[0] diff0 = arg0.sym_diff(var) return diffmap[self.func](arg0) * diff0
def __init__(self, dataset, ranking_size): Metric.__init__(self, dataset, ranking_size) self.name='ERR' #ERR needs the maximum relevance grade for the dataset #For MQ200*, this is 2; For MSLR, this is 4 self.maxrel=None if self.dataset.name.startswith('MSLR'): self.maxrel=numpy.exp2(4) elif self.dataset.name.startswith('MQ200'): self.maxrel=numpy.exp2(2) else: print("ERR:init [ERR] Unknown dataset. Use MSLR/MQ200*", flush=True) sys.exit(0) print("ERR:init [INFO] RankingSize", ranking_size, flush=True)
def gaussPL(y, rPerp, thetaV, phi, sig, kap): """ Define the gaussian power-law energy profile. This profile is defined by: [2^(-x/sig)]^(2*kap), where x will be thetaPrime. This profile is a basic gaussian raised to a power-law component (kap) and adjusted such that sig = FWHM. y [0-1]: The scaled variable in the radial direction. y := R/Rl. rPerp [0-1]: The perpendicular distance from the LOS in scaled units of Rl. Through testing it should not generally be greater than 0.2. thetaV [0-~pi/4]: The viewing angle, in radians, between the LOS to observer and the jet emission axis. phi [0-pi]: Interior angle of spherical triangle. phi = 0 corresponds to the direction toward the main axis from the LOS. sig : The angular scale (width) of the profile. This value defines the FWHM. kap : Power-law index on the profile. kap = 0 defines a flat profile. kap < 1 defines a sharper profile (higher kurtosis). kap > 1 tends toward a Heaviside. """ func = np.divide(np.power(thetaPrime(y, rPerp, thetaV, phi), 2.0 * kap), np.power(sig, 2.0 * kap)) return np.exp2(-func)
def calculate_tmm_norm_factor(ref, sample, trim_m=.3, trim_a=.05): if np.abs(ref - sample).sum() < 1e-10: return 1. zero_positions = ((ref == 0) | (sample == 0)) ref_nonzero = ref[~zero_positions] sample_nonzero = sample[~zero_positions] log_ref_nonzero = np.log2(ref_nonzero) log_sample_nonzero = np.log2(sample_nonzero) M = log_sample_nonzero - log_ref_nonzero A = (log_sample_nonzero + log_ref_nonzero) / 2 readsum_ref = ref_nonzero.sum() readsum_sample = sample_nonzero.sum() weights = 1. / ((readsum_ref - ref_nonzero) / (readsum_ref * ref_nonzero) + (readsum_sample - sample_nonzero) / (readsum_sample * sample_nonzero)) M_trim_min, M_trim_max = M.quantile([trim_m, 1 - trim_m]) A_trim_min, A_trim_max = A.quantile([trim_a, 1 - trim_a]) trimming_mask = ((M > M_trim_min) & (M < M_trim_max) & (A > A_trim_min) & (A < A_trim_max)) M_trimmed = M[trimming_mask] weights_trimmed = weights[trimming_mask] return np.exp2((M_trimmed * weights_trimmed).sum() / weights_trimmed.sum())
def transfer_fields(segments, cnarr, ignore=params.IGNORE_GENE_NAMES): """Map gene names, weights, depths from `cnarr` bins to `segarr` segments. Segment gene name is the comma-separated list of bin gene names. Segment weight is the sum of bin weights, and depth is the (weighted) mean of bin depths. """ if not len(cnarr): return [], [], [] ignore += params.ANTITARGET_ALIASES if 'weight' not in cnarr: cnarr['weight'] = 1 if 'depth' not in cnarr: cnarr['depth'] = np.exp2(cnarr['log2']) seggenes = ['-'] * len(segments) segweights = np.zeros(len(segments)) segdepths = np.zeros(len(segments)) for i, (_seg, subprobes) in enumerate(cnarr.by_ranges(segments)): if not len(subprobes): continue segweights[i] = subprobes['weight'].sum() if subprobes['weight'].sum() > 0: segdepths[i] = np.average(subprobes['depth'], weights=subprobes['weight']) subgenes = [g for g in pd.unique(subprobes['gene']) if g not in ignore] if subgenes: seggenes[i] = ",".join(subgenes) return seggenes, segweights, segdepths
def __init__(self, dataset, ranking_size, allow_repetitions): Metric.__init__(self, dataset, ranking_size) self.discountParams=1.0+numpy.array(range(self.rankingSize), dtype=numpy.float64) self.discountParams[0]=2.0 self.discountParams[1]=2.0 self.discountParams=numpy.reciprocal(numpy.log2(self.discountParams)) self.name='NDCG' self.normalizers=[] numQueries=len(self.dataset.docsPerQuery) for currentQuery in range(numQueries): validDocs=min(self.dataset.docsPerQuery[currentQuery], ranking_size) currentRelevances=self.dataset.relevances[currentQuery] #Handle filtered datasets properly if self.dataset.mask is not None: currentRelevances=currentRelevances[self.dataset.mask[currentQuery]] maxRelevances=None if allow_repetitions: maxRelevances=numpy.repeat(currentRelevances.max(), validDocs) else: maxRelevances=-numpy.sort(-currentRelevances)[0:validDocs] maxGain=numpy.exp2(maxRelevances)-1.0 maxDCG=numpy.dot(self.discountParams[0:validDocs], maxGain) self.normalizers.append(maxDCG) if currentQuery % 1000==0: print(".", end="", flush=True) print("", flush=True) print("NDCG:init [INFO] RankingSize", ranking_size, "\t AllowRepetitions?", allow_repetitions, flush=True)
def tune(my_corpus, dictionary, min_topics=2,max_topics=50,step=2): def sym_kl(p,q): return np.sum([scipy.stats.entropy(p,q),scipy.stats.entropy(q,p)]) kl = [] Hbar = [] perplexity = [] n_topics = [] l = np.array([sum(cnt for _, cnt in doc) for doc in my_corpus]) corpus = Index.get_corpus('train features') for i in range(min_topics,max_topics,step): n_topics.append(i) lda = gensim.models.ldamodel.LdaModel(corpus=corpus, id2word=dictionary,num_topics=i, alpha = 'auto') m1 = scipy.sparse.csc_matrix(lda.expElogbeta) U,cm1,V = sparsesvd(m1, m1.shape[0]) #Document-topic matrix lda_topics = lda[my_corpus] m2 = gensim.matutils.corpus2dense(lda_topics, lda.num_topics).transpose() cm2 = l.dot(m2) cm2 = cm2 + 0.0001 cm2norm = np.linalg.norm(l) cm2 = cm2/cm2norm kl.append(sym_kl(cm1,cm2)) entropy_list = [scipy.stats.entropy([x[1] for x in lda[v]] ) for v in my_corpus] Hbar.append(np.mean(entropy_list)) perplexity.append( lda.log_perplexity(my_corpus) ) print("NumTopics: %s | Unscaled Entropy: %s | Per-word-bound: %s | Per-word-perplexity: %s | Arun measure %s" % \ (i, Hbar[-1], perplexity[-1], np.exp2(-perplexity[-1]), kl[-1])) return n_topics, Hbar, perplexity, kl
def find_freq(note): ''' Converts a note into a frequency in Hertz. https://en.wikipedia.org/wiki/Musical_note#Note_frequency_.28hertz.29 >>> find_freq('A4') 440.0 >>> find_freq('C5') 523.25113060119725 >>> find_freq('F4') 349.22823143300388 >>> find_freq('E4') 329.62755691286992 >>> find_freq('B3') 246.94165062806206 >>> find_freq('G3') 195.99771799087463 >>> find_freq('D3') 146.83238395870379 >>> find_freq('A2') 110.0 >>> find_freq('E2') 82.406889228217494 Arguments: - `note`: a string representing a note, like A4, Ab4 or C#-1 ''' letter, octave = interpret_note(note) # calculate the number of semitones away from A4 offset = NOTES_TO_SEMITONES[letter] + (octave - 4) * 12 return numpy.exp2(offset / 12.) * A4
print(repr(initial - 1.2)) # Subtract element values by 1.2 print(repr(initial * 2)) # Double element values print(repr(initial / 2)) # Halve element values print(repr(initial // 2)) # Integer division (half) print(repr(initial ** 2)) # Square element values print(repr(initial ** 0.5)) # Square root element values print(repr(np.exp(initial))) # Power of e (Euler's Number) print(repr(np.exp2(initial))) # Power of 2 print(repr(np.power(3, initial))) # Raises to power of 3 powerarray = np.array([[10.2, 4], [3, 5]]) print(repr(np.power(initial, powerarray))) # Raises each value to the power of the other array (powerarray) newarray = np.array([[1, 10], [np.e, np.pi]]) print(repr(np.log(newarray))) # Natural logarithm (ln or base e) print(repr(np.log10(newarray))) # Base 10 logarithm
nargs='+', help="""CNVkit coverage files to update (*.targetcoverage.cnn, *.antitargetcoverage.cnn).""") AP.add_argument("-d", "--output-dir", default=".", help="""Directory to write output .cnn files.""") AP.add_argument( "-s", "--suffix", default=".updated", help="""Filename suffix to add before the '.cnn' extension in output files. [Default: %(default)s]""") args = AP.parse_args() for fname in args.cnn_files: cnarr = cnvlib.read(fname) # Convert coverage depths from log2 scale to absolute scale. # NB: The log2 values are un-centered in CNVkit v0.7.0(?) through v0.7.11; # earlier than that, the average 'depth' will be about 1.0. cnarr['depth'] = np.exp2(cnarr['log2']) # Construct the output filename base, ext = os.path.basename(fname).rsplit('.', 1) if '.' in base: base, zone = base.rsplit('.', 1) out_fname = '.'.join((base + args.suffix, zone, ext)) else: # e.g. reference.cnn or .cnr file, no "*.targetcoverage.*" in name out_fname = '.'.join((base + args.suffix, ext)) cnvlib.tabio.write(cnarr, os.path.join(args.output_dir, out_fname))
def get_N_eff(self): N_eff = 1 / np.sum(np.exp2(self.mu_weights)) return N_eff
def create_mock_data( test_cases=(0, 1, 10, 20, 50, 100, 250, 500), number_of_non_pathway_genes=1000, average=28, gene_sigma=2, group_sigma=1, experiment_sigma=1, tech_rep_sigma=0.3, noise_sigma=0.5, design_combinations=((True, True), (False, True), (True, False), ( False, False)), #(has group, has technical replicates) number_of_technical_replicates=(3, 8, 1, 1), # bad fix to make it easy number_of_experiments=(3, 6, 15, 30), number_of_groups=(4, 1, 8, 1), # bad fix to make it easy seed=100, save_to_disk=True): # TODO this seed seems insufficient? np.seed = seed N = sum(test_cases) + number_of_non_pathway_genes os.makedirs(MockData.mock_data_dir, exist_ok=True) # accumulate all genes pathway_genes = {} for file_name in os.listdir(MockData.pathway_dir): file = os.path.join(MockData.pathway_dir, file_name) with open(file) as f: pathway = f.readline().strip() pathway_genes[pathway] = [] f.readline() for line in f: pathway_genes[pathway].append(line.strip()) # sort the pathways into the different possible cases # TODO this samples a lot of duplicate gene names free_pathway_genes = set(pathway_genes) test_case_dict = {t: [] for t in test_cases} for test_case in reversed(sorted(test_cases)): to_rm = set() for pathway in free_pathway_genes: if len(pathway_genes[pathway]) >= test_case: test_case_dict[test_case].append(pathway) to_rm.add(pathway) free_pathway_genes = free_pathway_genes - to_rm # randomly sample the genes from the pathways genes = [] for test_case in test_cases: if test_case == 0: continue pathway = np.random.choice(test_case_dict[test_case], 1)[0] genes += list( np.random.choice(pathway_genes[pathway], test_case, replace=False)) # add the non pathway genes for i in range(number_of_non_pathway_genes): genes.append(f"GENENP{i}") assert len(genes) == N genes = pd.Series(genes, name="Gene names") for i, (has_group, has_tech_rep) in enumerate(design_combinations): n_experiments = number_of_experiments[i] n_tech_reps = number_of_technical_replicates[i] n_groups = number_of_groups[i] print( f"n group: {n_groups}, n exp: {n_experiments}, n tech: {n_tech_reps}" ) if has_group: assert n_groups > 1 else: assert n_groups == 1 if has_tech_rep: assert n_tech_reps > 1 else: assert n_tech_reps == 1 assert n_experiments > 1 group_name = np.array([f"Group{x}_" for x in range(n_groups)]).reshape( (n_groups, 1, 1)) experiment_name = np.array( [f"Experiment{x}_" for x in range(n_experiments)]).reshape( (1, n_experiments, 1)) technical_replicate_name = np.array( [f"Rep{x}" for x in range(n_tech_reps)]).reshape( (1, 1, n_tech_reps)) names = np.core.defchararray.add( np.core.defchararray.add(group_name, experiment_name), technical_replicate_name).reshape( (n_groups * n_experiments * n_tech_reps)) average_effect = np.ones( (N, n_groups, n_experiments, n_tech_reps)) * average gene_effect = np.random.normal(0, gene_sigma, (N, 1, 1, 1)) group_effect = np.random.uniform(0, group_sigma, (N, n_groups, 1, 1)) experiment_effect = np.random.normal(0, experiment_sigma, (1, 1, n_experiments, 1)) technical_replicate_effect = np.random.normal( 0, tech_rep_sigma, (1, 1, 1, n_tech_reps)) noise = np.random.normal(0, noise_sigma, (N, n_groups, n_experiments, n_tech_reps)) experiment_data = average_effect + gene_effect + group_effect + experiment_effect + technical_replicate_effect + noise assert experiment_data.shape == (N, n_groups, n_experiments, n_tech_reps) ex = experiment_data.reshape( (N, n_groups * n_experiments * n_tech_reps)) df = pd.DataFrame(ex, index=genes, columns=names) df = df.rename(lambda x: f"Intensity {x}", axis=1) df = np.exp2(df) df_lfq = df.rename( {col: col.replace("Intensity", "LFQ intensity") for col in df}, axis=1) df_ibaq = df.rename( {col: col.replace("Intensity", "iBAQ") for col in df}, axis=1) df = pd.concat([df, df_lfq, df_ibaq], axis=1) # TODO drop some unimportant genes randomly # required csv columns: "Fasta headers", "Only identified by site", "Reverse", "Potential contaminant", all empty # required col "Gene names", which is to be sampled from the pathways df["Fasta headers"] = "" df["Only identified by site"] = "" df["Reverse"] = "" df["Potential contaminant"] = "" df = df.reset_index() df["Protein names"] = df["Gene names"] + "P" if save_to_disk: dir_name = f"{'has_group' if has_group else 'no_group'}_{'has_tech' if has_tech_rep else 'no_tech'}" dir_name = os.path.join(MockData.mock_data_dir, dir_name, "txt") os.makedirs(dir_name, exist_ok=True) df.to_csv(os.path.join(dir_name, "proteinGroups.txt"), index=False, header=True, sep="\t") else: return df
def tanh2(x): return (np.exp2(2.0 * x) - 1.0) / (np.exp2(2.0 * x) + 1.0)
def LinExp(x, y): return np.exp2(linreg[y].intercept + linreg[y].slope * x)
"conj": lambda _: NotImplemented, "conjugate": lambda _: NotImplemented, # It requires complex type "cos": F.cos, "cosh": pandas_udf(lambda s: np.cosh(s), DoubleType(), PandasUDFType.SCALAR), "deg2rad": pandas_udf(lambda s: np.deg2rad(s), DoubleType(), PandasUDFType.SCALAR), "degrees": F.degrees, "exp": F.exp, "exp2": pandas_udf(lambda s: np.exp2(s), DoubleType(), PandasUDFType.SCALAR), "expm1": F.expm1, "fabs": pandas_udf(lambda s: np.fabs(s), DoubleType(), PandasUDFType.SCALAR), "floor": F.floor, "frexp": lambda _: NotImplemented, # 'frexp' output lengths become different # and it cannot be supported via pandas UDF. "invert": pandas_udf(lambda s: np.invert(s), DoubleType(), PandasUDFType.SCALAR), "isfinite": lambda c: c != float("inf"), "isinf": lambda c: c == float("inf"),
def float2fix(fix_point, value): return value.astype(np.float32) / np.exp2(fix_point, dtype=np.float32)
def fix2float(fix_point, value): return value.astype(np.float32) * np.exp2(fix_point, dtype=np.float32)
def compare(self): dfs = [] for level in self.levels: for state in [self.monomer_qm, self.complex_qm]: if level in state.columns or ( level in ['complex_abundance', 'interactor_ratio'] and 'interactor_abundance' in state.columns): if level in state.columns: dat = state[state[level] > 0].copy() else: dat = state[state['interactor_abundance'] > 0].copy() dat = dat.rename( index=str, columns={"interactor_abundance": level}) dat['query_id'] = dat['bait_id'] + '_' + dat['prey_id'] dat['query_peptide_id'] = dat['bait_id'] + '_' + dat[ 'prey_id'] + '_' + dat['peptide_id'] dat['quantification_id'] = 'viper_' + dat[ 'condition_id'] + '_' + dat['replicate_id'] dat['run_id'] = dat['condition_id'] + '_' + dat[ 'replicate_id'] qm_ids = dat[[ 'quantification_id', 'condition_id', 'replicate_id' ]].drop_duplicates() if self.missing_peptides == 'drop': peptide_fill_value = np.nan elif self.missing_peptides == 'zero': peptide_fill_value = 0 else: sys.exit( "Error: Invalid parameter for 'missing_peptides' selected." ) # Generate matrix for fold-change quant_mx = dat.pivot_table( index=['query_id', 'is_bait', 'query_peptide_id'], columns='quantification_id', values=level, fill_value=peptide_fill_value) # Generate matrix for ratio-change ratio_mx = dat.pivot_table( index=['query_id', 'is_bait', 'query_peptide_id'], columns='quantification_id', values=level, fill_value=peptide_fill_value) # Generate matrix for VIPER data_mx = dat.pivot_table(index='query_peptide_id', columns='quantification_id', values=level, fill_value=0) # Generate subunit set for VIPER if level == 'complex_abundance': # Complex abundance testing combines bait and prey peptides into a single regulon with positive tfmode sign query_set = dat[[ 'query_id', 'is_bait', 'query_peptide_id' ]].copy() query_set['query_id'] = query_set['query_id'] + "+1" subunit_set = query_set.groupby( ['query_id'])['query_peptide_id'].apply( lambda x: x.unique().tolist()).to_dict() subunit_tfm = query_set.groupby([ 'query_id' ])['query_peptide_id'].apply( lambda x: np.repeat(1, len(x.unique()))).to_dict() elif level == 'interactor_ratio': # Complex stoichiometry testing combines bait and prey peptides into a single regulon but with different tfmode signs query_set = dat[[ 'query_id', 'is_bait', 'query_peptide_id' ]].copy() query_set.loc[query_set['is_bait'] == 0, 'is_bait'] = -1 query_set['query_id'] = query_set['query_id'] + "+1" subunit_set = query_set.groupby( ['query_id'])['query_peptide_id'].apply( lambda x: x.unique().tolist()).to_dict() subunit_tfm = query_set.groupby(['query_id'])[[ 'query_peptide_id', 'is_bait' ]].apply(lambda x: x.drop_duplicates()['is_bait']. tolist()).to_dict() else: # All other modalities are assessed on protein-level, separately for bait and prey proteins query_set = dat[[ 'query_id', 'is_bait', 'query_peptide_id' ]].copy() query_set['query_id'] = query_set[ 'query_id'] + "+" + query_set['is_bait'].astype( int).astype(str) subunit_set = query_set.groupby( ['query_id'])['query_peptide_id'].apply( lambda x: x.unique().tolist()).to_dict() subunit_tfm = query_set.groupby([ 'query_id' ])['query_peptide_id'].apply( lambda x: np.repeat(1, len(x.unique()))).to_dict() # Run VIPER results = self.viper(data_mx, subunit_set, [subunit_tfm]) results[['query_id', 'is_bait' ]] = results['query_id'].str.split("+", expand=True) results['is_bait'] = results['is_bait'].astype('int') results['level'] = level # Append reverse information for complex_abundance and interactor_ratio levels if level in ['complex_abundance', 'interactor_ratio']: results_rev = results.copy() results_rev['is_bait'] = 0 results = pd.concat([results, results_rev]) for comparison in self.comparisons: results['condition_1'] = comparison[0] results['condition_2'] = comparison[1] # Compute fold-change and absolute fold-change quant_mx_avg = quant_mx.groupby([ 'query_id', 'is_bait', 'query_peptide_id' ]).apply(lambda x: pd.Series({ 'comparison_0': np.nanmean( np.exp2(x[qm_ids[qm_ids[ 'condition_id'] == comparison[0]][ 'quantification_id'].values].values)), 'comparison_1': np.nanmean( np.exp2(x[qm_ids[qm_ids[ 'condition_id'] == comparison[1]][ 'quantification_id'].values].values)) })).reset_index( level=['query_id', 'is_bait', 'query_peptide_id']) if self.peptide_log2fx: quant_mx_log2fx = quant_mx_avg.groupby([ 'query_id', 'is_bait', 'query_peptide_id' ]).apply(lambda x: np.log2((x['comparison_0']) / ( x['comparison_1']))).reset_index(level=[ 'query_id', 'is_bait', 'query_peptide_id' ]) quant_mx_log2fx_prot = quant_mx_log2fx.groupby( ['query_id', 'is_bait']).mean().reset_index() quant_mx_log2fx_prot.columns = [ 'query_id', 'is_bait', 'log2fx' ] quant_mx_log2fx_prot['abs_log2fx'] = np.abs( quant_mx_log2fx_prot['log2fx']) else: quant_mx_avg_prot = quant_mx_avg.groupby([ 'query_id', 'is_bait' ])[['comparison_0', 'comparison_1']].mean().reset_index() quant_mx_log2fx_prot = quant_mx_avg_prot.groupby([ 'query_id', 'is_bait' ]).apply(lambda x: np.log2((x['comparison_0']) / ( x['comparison_1']))).reset_index( level=['query_id', 'is_bait']) quant_mx_log2fx_prot.columns = [ 'query_id', 'is_bait', 'log2fx' ] quant_mx_log2fx_prot['abs_log2fx'] = np.abs( quant_mx_log2fx_prot['log2fx']) results = pd.merge(results, quant_mx_log2fx_prot, on=['query_id', 'is_bait'], how='left') # Compute interactor ratio if level in ['complex_abundance', 'interactor_ratio']: ratio_mx_prot = ratio_mx.groupby( ['query_id', 'is_bait'])[[ c for c in ratio_mx.columns if c.startswith("viper_") ]].mean().reset_index() ratio_mx_prot_ratio = ratio_mx_prot.groupby( 'query_id').apply(lambda x: (x.loc[x[ 'is_bait'] == 0].squeeze( ) + 1) / (x.loc[x['is_bait'] == 1].squeeze( ) + 1)).reset_index(level='query_id') ratio_change = ratio_mx_prot_ratio.groupby( 'query_id' ).apply(lambda x: np.mean(x[qm_ids[ qm_ids['condition_id'] == comparison[0] ]['quantification_id'].values].values) / np.mean(x[ qm_ids[qm_ids['condition_id'] == comparison[1] ]['quantification_id'].values].values) ).reset_index(level='query_id') ratio_change.columns = [ 'query_id', 'interactor_ratio' ] ratio_change.loc[ ratio_change['interactor_ratio'] > 1, 'interactor_ratio'] = (1 / ratio_change.loc[ ratio_change['interactor_ratio'] > 1, 'interactor_ratio']) results = pd.merge(results, ratio_change, on=['query_id'], how='left') else: results['interactor_ratio'] = np.nan # Conduct statistical tests # Paired analysis: For example replicates 1 of conditions A & B were measured by the same SILAC experiment if self.paired: results_pvalue = results.groupby([ 'query_id', 'is_bait', 'level' ]).apply(lambda x: pd.Series({ "pvalue": ttest_rel( x[qm_ids[qm_ids['condition_id'] == comparison[0]].sort_values( by=['quantification_id']) ['quantification_id'].values].values[0], x[qm_ids[qm_ids['condition_id'] == comparison[1]].sort_values(by=[ 'quantification_id' ])['quantification_id'].values ].values[0])[1] })).reset_index() # Treat samples as independent measurements, e.g. quantification by LFQ else: results_pvalue = results.groupby([ 'query_id', 'is_bait', 'level' ]).apply(lambda x: pd.Series({ "pvalue": ttest_ind(x[qm_ids[qm_ids[ 'condition_id'] == comparison[0]][ 'quantification_id'].values].values[0], x[qm_ids[qm_ids['condition_id'] == comparison[1]] ['quantification_id'].values]. values[0], equal_var=True)[1] })).reset_index() results = pd.merge(results, results_pvalue, on=['query_id', 'is_bait', 'level']) # Set p-value to 1.0 if invalid results.loc[np.isnan(results['pvalue']), 'pvalue'] = 1.0 # Append meta information results = pd.merge( results, dat[['query_id', 'bait_id', 'prey_id']].drop_duplicates(), on='query_id') dfs.append(results[[ 'condition_1', 'condition_2', 'level', 'bait_id', 'prey_id', 'is_bait', 'log2fx', 'abs_log2fx', 'interactor_ratio', 'pvalue' ] + [ c for c in results.columns if c.startswith("viper_") ]]) return pd.concat(dfs, ignore_index=True, sort=True).sort_values(by='pvalue', ascending=True, na_position='last')
def exp2(x): return np.exp2(x)
def sig2(x): return 1. / (1. + np.exp2(-x))
def mytrainlgb(): import pandas as pd import numpy as np import lightgbm as lgb from sklearn.model_selection import train_test_split from sklearn.metrics import log_loss from sklearn import preprocessing import warnings warnings.filterwarnings("ignore") import time import pandas as pd train = pd.read_csv('../data/train.csv', encoding='gbk') test = pd.read_csv('../data/meinian_round1_test_b_20180505.csv', encoding='gbk') testt = pd.read_csv('../data/test.csv', encoding='gbk') print('haha====================================================') # ============================================================================= # print(train.columns) # train=labelencodeall(train) # train.to_csv('../tmp/train.csv') # ============================================================================= # ============================================================================= # train=pd.merge(train,targetall,on='vid',how='left') # train=train[train.收缩压.notnull()] # testt=train[train.血清高密度脂蛋白.isnull()] # ============================================================================= # ============================================================================= # data=pd.concat([train,testt]) # data=zuhe(data) # train=data[data.收缩压.notnull()] # testt=data[data.收缩压.isnull()] # ============================================================================= print('============================') print(train.shape) print(test.shape) print(testt.shape) print('============================') train['收缩压'] = train['收缩压'].astype(float) train['舒张压'] = train['舒张压'].astype(float) train['血清甘油三酯'] = train['血清甘油三酯'].astype(float) train['收缩压'] = np.log(train['收缩压'] + 1) train['舒张压'] = np.log(train['舒张压'] + 1) train['血清甘油三酯'] = np.log(train['血清甘油三酯'] - 0.099999999) train['血清高密度脂蛋白'] = np.log(train['血清高密度脂蛋白']) train['血清低密度脂蛋白'] = np.log2(train['血清低密度脂蛋白'] + 1.22001) print(train.info()) train.pop('vid') test_index = test.pop('vid') testt_index = testt.pop('vid') columns = list(train.columns) columns.remove('收缩压') columns.remove('舒张压') columns.remove('血清甘油三酯') columns.remove('血清高密度脂蛋白') columns.remove('血清低密度脂蛋白') print(train.mean()) new = pd.DataFrame() new['vid'] = testt_index col = ['收缩压', '舒张压', '血清甘油三酯', '血清高密度脂蛋白', '血清低密度脂蛋白'] for t in col: print(t) X = train[columns] y = train[t] print(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1000) gbm = lgb.LGBMRegressor( objective='regression', # num_leaves=23, learning_rate=0.08, #max_depth=25, #max_bin=10000, drop_rate=0.10, #is_unbalance=True, n_estimators=1000) #1000 gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], eval_metric='l1', early_stopping_rounds=100) y_pred = gbm.predict(testt[columns], num_iteration=gbm.best_iteration) print( '======================================================================================' ) predictors = [i for i in X_train.columns] feat_imp = pd.Series(gbm.feature_importance(), predictors).sort_values(ascending=False) print(feat_imp) new[t] = y_pred new['收缩压'] = np.exp(new['收缩压']) - 1 new['舒张压'] = np.exp(new['舒张压']) - 1 new['血清甘油三酯'] = np.exp(new['血清甘油三酯']) + 0.1 new['血清高密度脂蛋白'] = np.exp(new['血清高密度脂蛋白']) new['血清低密度脂蛋白'] = np.exp2(new['血清低密度脂蛋白']) - 1.22 zz = pd.DataFrame(test_index) zz.columns = ['vid'] zz = pd.merge(zz, new, on='vid', how='left') #zz.to_csv('../tem/old收缩压lgb_testb.csv') #zz.to_csv('../tem/all.csv') return zz
def test_exp2f(rjit): @rjit('float(float)') def exp2(x): return np.exp2(x) assert (np.isclose(exp2(2.0), np.exp2(2.0)))
# In[4]: print(np.expm1(0)) print(np.expm1(1)) print(np.expm1(2)) # ### np.exp2(x) # $2^{x}$を計算します。 # In[5]: print(np.exp2(0)) print(np.exp2(1)) print(np.exp2(2)) # ### np.log(x) # $\log{x}$を計算します。底は自然対数になります。 # In[6]: print(np.log(1)) print(np.log(2)) print(np.log(np.e))
def midinoteToFrequency(note): return 440 * np.exp2((note - 69) / 12)
def shift(ary, shift_ary): exp = np.rint(safelog(np.absolute(shift_ary)) / np.log(2)) ap2 = np.multiply(np.sign(shift_ary), np.exp2(exp)) return np.multiply(ary, ap2)
import numpy as np import logging import matplotlib.pyplot as plt import scipy.stats from copy import copy, deepcopy from gp import GP, GaussianKernel, PeriodicKernel from . import bq_c from . import linalg_c as la from . import util logger = logging.getLogger("bayesian_quadrature") DTYPE = np.dtype('float64') MIN = np.log(np.exp2(np.float64(np.finfo(np.float64).minexp + 4))) MAX = np.log(np.exp2(np.float64(np.finfo(np.float64).maxexp - 4))) class BQ(object): r""" Estimate an integral of the following form using Bayesian Quadrature with a Gaussian Process prior: .. math:: Z = \int \ell(x)\mathcal{N}(x\ |\ \mu, \sigma^2)\ \mathrm{d}x See :meth:`~bayesian_quadrature.bq.BQ.load_options` for details on allowable options. Parameters
def DeathsExp(x, y): return np.exp2(deathsreg[y].intercept + deathsreg[y].slope * x)
import numpy as np import os import sys sys.path.append( '/cbica/projects/pncSingleFuncParcel/Replication/scripts_Final/Functions') import Ridge_CZ_Random_CategoricalFeatures PredictionFolder = '/cbica/projects/pncSingleFuncParcel/Replication/Revision/PredictionAnalysis' AtlasLabel_Folder = PredictionFolder + '/AtlasLabel' # Import data AtlasLabel_Mat = sio.loadmat(AtlasLabel_Folder + '/AtlasLabel_All.mat') Behavior_Mat = sio.loadmat(PredictionFolder + '/Behavior_693.mat') SubjectsData = AtlasLabel_Mat['AtlasLabel_All'] AgeYears = Behavior_Mat['AgeYears'] AgeYears = np.transpose(AgeYears) # Range of parameters Alpha_Range = np.exp2(np.arange(16) - 10) FoldQuantity = 2 ResultantFolder = AtlasLabel_Folder + '/2Fold_RandomCV_Age' Ridge_CZ_Random_CategoricalFeatures.Ridge_KFold_RandomCV_MultiTimes( SubjectsData, AgeYears, FoldQuantity, Alpha_Range, 100, ResultantFolder, 1, 0, 'all.q') # Permutation test, 1,000 times ResultantFolder = AtlasLabel_Folder + '/2Fold_RandomCV_Age_Permutation' Ridge_CZ_Random_CategoricalFeatures.Ridge_KFold_RandomCV_MultiTimes( SubjectsData, AgeYears, FoldQuantity, Alpha_Range, 1000, ResultantFolder, 1, 1, 'all.q,basic.q')
def test_ufunc_exp2_u(A: dace.uint32[10]): return np.exp2(A)
def test_ufunc_exp2_c(A: dace.complex64[10]): return np.exp2(A)
def process_func(idx): # Load original image. orig_idx = fields['orig_idx'][idx] orig_file = fields['orig_file'][idx] orig_path = os.path.join(celeba_dir, 'img_celeba', orig_file) img = PIL.Image.open(orig_path) # Choose oriented crop rectangle. lm = landmarks[orig_idx] eye_avg = (lm[0] + lm[1]) * 0.5 + 0.5 mouth_avg = (lm[3] + lm[4]) * 0.5 + 0.5 eye_to_eye = lm[1] - lm[0] eye_to_mouth = mouth_avg - eye_avg x = eye_to_eye - rot90(eye_to_mouth) x /= np.hypot(*x) x *= max(np.hypot(*eye_to_eye) * 2.0, np.hypot(*eye_to_mouth) * 1.8) y = rot90(x) c = eye_avg + eye_to_mouth * 0.1 quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y]) zoom = 1024 / (np.hypot(*x) * 2) # Shrink. shrink = int(np.floor(0.5 / zoom)) if shrink > 1: size = (int(np.round(float(img.size[0]) / shrink)), int(np.round(float(img.size[1]) / shrink))) img = img.resize(size, PIL.Image.ANTIALIAS) quad /= shrink zoom *= shrink # Crop. border = max(int(np.round(1024 * 0.1 / zoom)), 3) crop = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))), int(np.ceil(max(quad[:, 1])))) crop = (max(crop[0] - border, 0), max(crop[1] - border, 0), min(crop[2] + border, img.size[0]), min(crop[3] + border, img.size[1])) if crop[2] - crop[0] < img.size[0] or crop[3] - crop[1] < img.size[1]: img = img.crop(crop) quad -= crop[0:2] # Simulate super-resolution. superres = int(np.exp2(np.ceil(np.log2(zoom)))) if superres > 1: img = img.resize((img.size[0] * superres, img.size[1] * superres), PIL.Image.ANTIALIAS) quad *= superres zoom /= superres # Pad. pad = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))), int(np.ceil(max(quad[:, 1])))) pad = (max(-pad[0] + border, 0), max(-pad[1] + border, 0), max(pad[2] - img.size[0] + border, 0), max(pad[3] - img.size[1] + border, 0)) if max(pad) > border - 4: pad = np.maximum(pad, int(np.round(1024 * 0.3 / zoom))) img = np.pad(np.float32(img), ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect') h, w, _ = img.shape y, x, _ = np.mgrid[:h, :w, :1] mask = 1.0 - np.minimum( np.minimum(np.float32(x) / pad[0], np.float32(y) / pad[1]), np.minimum( np.float32(w - 1 - x) / pad[2], np.float32(h - 1 - y) / pad[3])) blur = 1024 * 0.02 / zoom img += (scipy.ndimage.gaussian_filter(img, [blur, blur, 0]) - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0) img += (np.median(img, axis=(0, 1)) - img) * np.clip( mask, 0.0, 1.0) img = PIL.Image.fromarray(np.uint8(np.clip(np.round(img), 0, 255)), 'RGB') quad += pad[0:2] # Transform. img = img.transform((4096, 4096), PIL.Image.QUAD, (quad + 0.5).flatten(), PIL.Image.BILINEAR) img = img.resize((1024, 1024), PIL.Image.ANTIALIAS) img = np.asarray(img).transpose(2, 0, 1) # Verify MD5. md5 = hashlib.md5() md5.update(img.tobytes()) assert md5.hexdigest() == fields['proc_md5'][idx] # Load delta image and original JPG. with zipfile.ZipFile( os.path.join(delta_dir, 'deltas%05d.zip' % (idx - idx % 1000)), 'r') as zip: delta_bytes = zip.read('delta%05d.dat' % idx) with open(orig_path, 'rb') as file: orig_bytes = file.read() # Decrypt delta image, using original JPG data as decryption key. algorithm = cryptography.hazmat.primitives.hashes.SHA256() backend = cryptography.hazmat.backends.default_backend() salt = bytes(orig_file, 'ascii') kdf = cryptography.hazmat.primitives.kdf.pbkdf2.PBKDF2HMAC( algorithm=algorithm, length=32, salt=salt, iterations=100000, backend=backend) key = base64.urlsafe_b64encode(kdf.derive(orig_bytes)) delta = np.frombuffer(bz2.decompress( cryptography.fernet.Fernet(key).decrypt(delta_bytes)), dtype=np.uint8).reshape(3, 1024, 1024) # Apply delta image. img = img + delta # Verify MD5. md5 = hashlib.md5() md5.update(img.tobytes()) assert md5.hexdigest() == fields['final_md5'][idx] return img
def marker_score(proportion_in, proportion_out, pseudocount=.1): difference_in_proportions = proportion_in - proportion_out scores = np.exp2(difference_in_proportions) / ( np.abs(difference_in_proportions) + pseudocount) return scores
def test_ufunc_exp2_f(A: dace.float32[10]): return np.exp2(A)
import numpy as np import matplotlib.pyplot as plt a = input("scaling factor") tk = np.linspace(0, 10, 1000) #print tk ye = np.exp2(a * tk) #print ye plt.plot(tk, ye) plt.show()
lambda _: NotImplemented, # It requires complex type which Koalas does not support yet 'conjugate': lambda _: NotImplemented, # It requires complex type 'cos': F.cos, 'cosh': F.pandas_udf(lambda s: np.cosh(s), DoubleType()), 'deg2rad': F.pandas_udf(lambda s: np.deg2rad(s), DoubleType()), 'degrees': F.degrees, 'exp': F.exp, 'exp2': F.pandas_udf(lambda s: np.exp2(s), DoubleType()), 'expm1': F.expm1, 'fabs': F.pandas_udf(lambda s: np.fabs(s), DoubleType()), 'floor': F.floor, 'frexp': lambda _: NotImplemented, # 'frexp' output lengths become different # and it cannot be supported via pandas UDF. 'invert': F.pandas_udf(lambda s: np.invert(s), DoubleType()), 'isfinite': lambda c: c != float("inf"), 'isinf': lambda c: c == float("inf"),
def get_trends_safe(self, terms_array): # Parses array into a list of 4-term lists self.termsCount = len(terms_array) self.termsList = [None] * int( np.ceil(self.termsCount / self.TERMS_PER_REQUEST)) for termIndex, termValue in enumerate(self.termsList): self.startIndex = termIndex * self.TERMS_PER_REQUEST self.endIndex = np.min([(self.startIndex + self.TERMS_PER_REQUEST), len(terms_array)]) self.termsList[termIndex] = terms_array[self.startIndex:self. endIndex] self.termsList[termIndex] = [ s.lower() for s in self.termsList[termIndex] ] self.pytrend = TrendReq(self.google_username, self.google_password, custom_useragent="My Pytrends Class") self.db = data() self.db.push_period(np.arange(0, self.PERIOD_SIZE)) self.count = 1 for regionIndex, region in enumerate(self.regions): print(region + ". " + str(regionIndex) + " of " + str(len(self.regions))) # The region for the request self.geo_tag = 'BR-' + region for termsListIndex, terms in enumerate(self.termsList): print("Terms " + str(termsListIndex) + " of " + str(len(self.termsList))) # The initial empty dataframe list self.dataframeList = [None] * len(self.dates) # The terms for the request self.terms_tag = ",".join(terms) # Requests trends for all set periods and stores in a sorted list for date in self.dates: self.trend_payload = { 'q': self.terms_tag, 'geo': self.geo_tag, 'date': date } while (True): try: self.df = self.pytrend.trend( self.trend_payload, return_type='dataframe') self.dataframeList.insert(self.dates.index(date), self.df) print(date) time.sleep(120) self.count = np.max([self.count - 1, 0]) break except Exception as exp: print("Não rolou") self.count = self.count + 1 self.wait = np.exp2([self.count]) print("Espera " + str(datetime.datetime.now()) + ", " + str(self.wait / 60)) time.sleep(self.wait) # Concats all dataframes in list into a single dataframe self.dataframe = self.dataframeList[0] for i, df in enumerate(self.dataframeList): if i > 0: self.dataframe = self.dataframe.append(df) self.csvFile = self.base_csv_file + region + str( termsListIndex) + str(datetime.datetime.now()) + ".csv" self.dataframe.to_csv(self.csvFile, sep=',', encoding="utf-8") # Pushes data for each term for this region for term in terms: self.db.push_sympthom(term, region, self.dataframe[term].values) self.db.save("data" + str(datetime.datetime.now()) + ".txt") return self.db
def main(): # 01 numpy.array lst = [[1, 2, 3], [4, 5, 6]] #list中元素可以有多种类型 print(type(lst)) nplst = np.array(lst) #nuppy.array中元素只能有一种数据类型 print(type(nplst)) nplst = np.array(lst, dtype=np.float) print(type(nplst)) #变量类型 numpy.ndarray print(nplst.shape) #数组形状(2, 3) print(nplst.ndim) #数据维度 2 print(nplst.dtype) #元素类型 float print(nplst.itemsize) #元素所占字节,64位=8字节 print(nplst.size) #数组大小,元素个数 # 02 numpy数组常用方法 print(np.zeros([2, 3])) print(np.ones([2, 3])) print("rand:[0,1)内均匀分布的随机数") print(np.random.rand(2, 3)) print(np.random.rand()) print("randint:[1,10)内均匀分布的整型随机数") print(np.random.randint(1, 10)) #指定范围 print(np.random.randint(1, 10, 2)) #指定范围+个数 print(np.random.randint(1, 10, (2, 3))) #指定范围+尺寸 print("randn:正态分布随机数") print(np.random.randn(2, 3)) print("Choice:在指定值内随机选择") print(np.random.choice([1, 2, 10, 100, 33, 4], [2, 3])) print("其他数学分布随机数:beta分布") print(np.random.beta(1, 10, 100)) #[1,10]内100个满足beta分布的随机数 #03 numpy常用操作 #产生等差数列(数组) arr1 = np.arange(1, 21, 2) #[1,21)间隔为2:1,3,5,7,19 #数组变形 arr2 = np.arange(1, 21, 2).reshape([2, 5]) arr2 = np.arange(1, 21, 2).reshape([2, -1]) #-1表示列缺省,与[2,5]等效。 #常用数学函数 np.sqrt(2) #开方 np.square(2) #平方 np.exp(1) #e的指数幂 np.exp2(3) #2的指数幂 np.log(np.e) #自然对数,以e为底 np.log2(2) #以2为底的对数 np.sin(1) # sin(1) np.cos(1) # cos(1) np.sin(1)**2 + np.cos(1)**2 # 1 #sum 求和,默认求总和,可以指定维度。 arr1 = np.arange(24).reshape(2, 3, 4) print(arr1.sum(axis=0)) #对第0维求和 print(arr1.sum(0)) print(np.sum(arr1, axis=0)) print(np.sum(arr1, 0)) print(arr1.sum(axis=1)) #对第1维求和 print(arr1.sum(axis=2)) #对第2维求和 #max print(np.max(arr1)) print(np.max(arr1, 0)) #min print(np.min(arr1)) print(np.min(arr1, 0)) #矩阵加减乘除 print(arr1 + arr2) print(arr1 - arr2) print(arr1 * arr2) print(arr1 / arr2) #矩阵点乘 arr1 = np.arange(12).reshape(3, 4) arr2 = np.arange(12).reshape(4, 3) np.dot(arr1, arr2) #矩阵合并、追加、拆分 arr3 = np.arange(24).reshape(2, 3, 4) np.vstack((arr3, arr3)) #(2,3,4) ==> (4,3,4) 追加行数 np.concatenate((arr3, arr3), axis=0) #(2,3,4) ==> (4,3,4) 追加行数 np.hstack((arr3, arr3)) #(2,3,4) ==> (2,6,4) 追加列数 np.concatenate((arr3, arr3), axis=1) #(2,3,4) ==> (2,6,4) 追加列数
import flora_tools.gloria as gloria import flora_tools.lwb_round as lwb_round import flora_tools.sim.sim_node as sim_node from flora_tools.radio_configuration import RadioConfiguration GLORIA_DEFAULT_POWER_LEVELS = [1, 1, 1, 1, 0, 0, 0, 0, 0, 0] GLORIA_RETRANSMISSIONS_COUNTS = [1, 1, 1, 1, 2, 2, 2, 2, 2, 2] GLORIA_HOP_COUNTS = [1, 1, 1, 1, 2, 2, 2, 3, 3, 3] RADIO_MODULATIONS = [3, 5, 7, 9] # SF9, SF7, SF5, FSK 200 Kb/s RADIO_POWERS = [10, 22] # [10, 22] # dBm TIMER_FREQUENCY = 8E6 # 0.125 us TIME_DEPTH = 6 # 6 Bytes, ~1.116 years LWB_SCHEDULE_GRANULARITY = 1 / TIMER_FREQUENCY * np.exp2(11) # 256 us LWB_SYNC_PERIOD = 1 / TIMER_FREQUENCY * np.exp2(31) # 268.435456 s # GLORIA_HEADER: uint8_t[8] # - TYPE: uint8_t # - HOP_COUNT: uint8_t:4 # - POWER_LEVEL: uint8_t:4 # - FIELDS: uint8_t[6] # - TYPE in [SYNC, SLOT_SCHEDULE, ROUND_SCHEDULE]: # - TIMESTAMP: uint8_t[6] # - TYPE in [CONTENTION, DATA, ACK] # - SOURCE: uint16_t # - DESTINATION: uint16_t # - STREAM_ID: uint16_t GLORIA_HEADER_LENGTH = 9