def setGrid(self): from scipy import interpolate x0 = numpy.logspace(-3,1,81) etas = numpy.linspace(0.,2.,21) qs = numpy.linspace(0.2,1.,17) grid1 = numpy.empty((x0.size,x0.size,etas.size,qs.size)) grid2 = numpy.empty(grid1.shape) for i in range(qs.size): q = qs[i] q2 = q**2 b = 1-q2 for j in range(etas.size): eta = etas[j] g = 0.5*eta-1. # g = -1*gamma for k in range(x0.size): x = x0[k] for l in range(x0.size): y = x0[l] qb = ((2*x*y)/b)**g # q_bar qt = q*(x*y)**0.5/b # q_tilde sb = 0.5*(x/y - y/x) + s**2*b/(2*x*y) nu1 = s**2*b/(2*x*y) nu2 = nu1+ 0.5*b*(x/y + y/(x*q2)) nu = numpy.logspace(nu1,nu2,1001) mu = nu-sb t = (1+mu**2)**0.5 f1 = (t-mu)**0.5/t f2 = (t+mu)**0.5/t ng = nu**g I1 = interpolate.splrep(nu,f1*ng) I2 = interpolate.splrep(nu,f2*ng) grid1[k,l,i,j] = qt*interpolate.splint(nu1,nu2,I1) grid2[k,l,i,j] = qt*interpolate.splint(nu1,nu2,I2) pylab.imshow(grid1[:,:,i,j]) pylab.show()
def run_svm_evaluation(self, svmtype, inputdata, outputdata, k): """ Run SVM on training data to evaluate classifier. Return f1scores, gamma and C""" if svmtype == 'rbf': # Parameter grid param_grid = [ {'C': np.logspace(1,5,5), 'gamma': np.logspace(-3,0,5), 'kernel': ['rbf']} ] if svmtype == 'ln': param_grid =[ {'C': np.logspace(1,5,5)}] score_func = metrics.f1_score # Cross validation cv = cross_validation.KFold(inputdata.shape[0], n_folds=k, indices=True,shuffle=True) f1_scores = [] for traincv, testcv in cv: # TODO: multithreading of cross validation. (f1_score, gamma1, c) = self.do_cross_validation(param_grid, svmtype, score_func, inputdata[traincv], outputdata[traincv], inputdata[testcv], outputdata[testcv]) f1_scores.append(f1_score) print "score average: %s" + str(np.mean(f1_scores)) print f1_scores average_score =np.mean(f1_scores) tuples = (average_score, f1_scores) return (tuples, gamma1, c)
def getXYdataForContourPlot(expFile): xdata=None ydata=None xlabel='' ylabel='' # print '.exp:' + expFile try: p = loadClass(expFile) except IOError: print 'fileIO.getXYdataForContourPlot():: warning:: "' + expFile + '" not found.' return xdata, ydata, xlabel, ylabel if p.vary_spacing[0]=='linear': xdata=linspace(float(p.vary_startPos[0]),float(p.vary_stopPos[0]),int(p.vary_numPoints[0])) elif p.vary_spacing[0]=='log': xdata=logspace(float(p.vary_startPos[0]),float(p.vary_stopPos[0]),int(p.vary_numPoints[0])) else: print 'plotWidget.getXYdataForContourPlot():: WARNING:: unknown spacing type "'+str(p.vary_spacing[0])+'"' if p.vary_spacing[1]=='linear': ydata=linspace(float(p.vary_startPos[1]),float(p.vary_stopPos[1]),int(p.vary_numPoints[1])) elif p.vary_spacing[1]=='log': ydata=logspace(float(p.vary_startPos[1]),float(p.vary_stopPos[1]),int(p.vary_numPoints[1])) else: print 'plotWidget.getXYdataForContourPlot():: WARNING:: unknown spacing type "'+str(p.vary_spacing[1])+'"' xlabel = p.vary_property[0] ylabel = p.vary_property[1] return xdata, ydata, xlabel, ylabel
def _extend_ref(ref, min_w, max_w): """Extends the reference spectrum to the given limits, assuming a constant delta log-lambda scale. Args: ref (spectrum.Spectrum): Reference spectrum min_w, max_w (float): Wavelength limts """ # Delta log-lambda w = ref.w dw = np.median(np.log10(w[1:]) - np.log10(w[:-1])) if min_w < w[0]: num_points = int((np.log10(w[0]) - np.log10(min_w))/dw) left = np.logspace(np.log10(w[0]), np.log10(min_w), num_points, base=10.0)[1:] # Don't forget to reverse left w = np.concatenate((left[::-1], w)) if max_w > w[-1]: num_points = int((np.log10(max_w) - np.log10(w[-1]))/dw) right = np.logspace(np.log10(w[-1]), np.log10(max_w), num_points, base=10.0)[1:] w = np.concatenate((w, right)) if len(w) != len(ref.w): ref = ref.extend(w) return ref
def get_clfmethod (clfmethod, n_feats, n_subjs, n_jobs=1): #classifiers classifiers = { 'cart' : tree.DecisionTreeClassifier(random_state = 0), 'rf' : RandomForestClassifier(max_depth=None, min_samples_split=1, random_state=None), 'gmm' : GMM(init_params='wc', n_iter=20, random_state=0), 'svm' : SVC (probability=True, max_iter=50000, class_weight='auto'), 'linsvm' : LinearSVC (class_weight='auto'), 'sgd' : SGDClassifier (fit_intercept=True, class_weight='auto', shuffle=True, n_iter = np.ceil(10**6 / 416)), 'percep' : Perceptron (class_weight='auto'), } #Classifiers parameter values for grid search if n_feats < 10: max_feats = range(1, n_feats, 2) else: max_feats = range(1, 30, 4) max_feats.extend([None, 'auto', 'sqrt', 'log2']) clgrid = { 'cart' : dict(criterion = ['gini', 'entropy'], max_depth = [None, 10, 20, 30]), 'rf' : dict(n_estimators = [3, 5, 10, 30, 50, 100], max_features = max_feats), 'gmm' : dict(n_components = [2,3,4,5], covariance_type=['spherical', 'tied', 'diag'], thresh = [True, False] ), #'svm' : dict(kernel = ['rbf', 'linear', 'poly'], C = np.logspace(-3, 3, num=7, base=10), gamma = np.logspace(-3, 3, num=7, base=10), coef0 = np.logspace(-3, 3, num=7, base=10)), #'svm' : dict(kernel = ['rbf', 'poly'], C = np.logspace(-3, 3, num=7, base=10), gamma = np.logspace(-3, 3, num=7, base=10), coef0=np.logspace(-3, 3, num=7, base=10)), 'svm' : dict(kernel = ['rbf', 'linear'], C = np.logspace(-3, 3, num=7, base=10), gamma = np.logspace(-3, 3, num=7, base=10)), 'linsvm' : dict(C = np.logspace(-3, 3, num=7, base=10)), 'sgd' : dict(loss=['hinge', 'modified_huber', 'log'], penalty=["l1","l2","elasticnet"], alpha=np.logspace(-6, -1, num=6, base=10)), 'percep' : dict(penalty=[None, 'l2', 'l1', 'elasticnet'], alpha=np.logspace(-3, 3, num=7, base=10)), } return classifiers[clfmethod], clgrid[clfmethod]
def regression(self, snpreader, answers, cov_fn=None, num_pcs=0, strategy = "lmm_full_cv", delta=7): """ compare against previous results of this code base """ # set up grid ############################## num_steps_delta = 5 num_steps_k = 5 num_folds = 2 # log_2 space and all SNPs k_values = np.array(np.logspace(0, 9, base=2, num=num_steps_k, endpoint=True),dtype=np.int64).tolist() + [10000] #k_values = np.logspace(0, 9, base=2, num=num_steps_k, endpoint=True).tolist() + [10000] delta_values = np.logspace(-3, 3, endpoint=True, num=num_steps_delta, base=np.exp(1)) random_state = 42 output_prefix = None # select by LL fss = FeatureSelectionStrategy(snpreader, self.pheno_fn, num_folds, random_state=random_state, cov_fn=cov_fn, num_pcs=num_pcs, interpolate_delta=True) best_k, best_delta, best_obj, best_snps = fss.perform_selection(k_values, delta_values, strategy, output_prefix=output_prefix, select_by_ll=True) self.assertEqual(best_k, answers[0]) self.assertAlmostEqual(best_delta, answers[1], delta) self.assertTrue(abs(best_obj-answers[2])<.005) #accept a range answers for when standardization is done with doubles, floats, etc # select by MSE fss = FeatureSelectionStrategy(snpreader, self.pheno_fn, num_folds, random_state=random_state, cov_fn=cov_fn, num_pcs=num_pcs, interpolate_delta=True) best_k, best_delta, best_obj, best_snps = fss.perform_selection(k_values, delta_values, strategy, output_prefix=output_prefix, select_by_ll=False) self.assertEqual(best_k, answers[0]) self.assertAlmostEqual(best_delta, answers[1], delta) self.assertAlmostEqual(best_obj, answers[3])
def test_dtype(self): y = logspace(0, 6, dtype='float32') assert_equal(y.dtype, dtype('float32')) y = logspace(0, 6, dtype='float64') assert_equal(y.dtype, dtype('float64')) y = logspace(0, 6, dtype='int32') assert_equal(y.dtype, dtype('int32'))
def test_beta(): np.random.seed(1234) b = np.r_[np.logspace(-200, 200, 4), np.logspace(-10, 10, 4), np.logspace(-1, 1, 4), -1, -2.3, -3, -100.3, -10003.4] a = b ab = np.array(np.broadcast_arrays(a[:,None], b[None,:])).reshape(2, -1).T old_dps, old_prec = mpmath.mp.dps, mpmath.mp.prec try: mpmath.mp.dps = 400 assert_func_equal(sc.beta, lambda a, b: float(mpmath.beta(a, b)), ab, vectorized=False, rtol=1e-10) assert_func_equal( sc.betaln, lambda a, b: float(mpmath.log(abs(mpmath.beta(a, b)))), ab, vectorized=False, rtol=1e-10) finally: mpmath.mp.dps, mpmath.mp.prec = old_dps, old_prec
def convolved_true_maps(nu_min,nu_max,delta_nu,subdelta_nu,cmb,dust,verbose=True): sh = cmb.shape Nbpixels = sh[0] #frequencies to reconstruct Nbfreq=int(floor(log(nu_max/nu_min)/log(1+delta_nu)))+1 ## number of edge frequencies nus_edge=nu_min*np.logspace(0,log(nu_max/nu_min)/log(1+delta_nu),Nbfreq,endpoint=True,base=delta_nu+1) #edge frequencies of reconstructed bands nus=np.array([(nus_edge[i]+nus_edge[i-1])/2 for i in range(1,Nbfreq)]) deltas=(delta_nu)*(nus) deltas=np.array([(nus_edge[i]-nus_edge[i-1]) for i in range(1,Nbfreq)]) Nbbands=len(nus) #frequencies assumed to have been used for construction of TOD subnu_min=nu_min subnu_max=nu_max Nbsubfreq=int(floor(log(subnu_max/subnu_min)/log(1+subdelta_nu)))+1 sub_nus_edge=subnu_min*np.logspace(0,log(subnu_max/subnu_min)/log(1+subdelta_nu),Nbsubfreq,endpoint=True,base=subdelta_nu+1) sub_nus=np.array([(sub_nus_edge[i]+sub_nus_edge[i-1])/2 for i in range(1,Nbsubfreq)]) sub_deltas=np.array([(sub_nus_edge[i]-sub_nus_edge[i-1]) for i in range(1,Nbsubfreq)]) Nbsubbands=len(sub_nus) #Bands bands=[sub_nus[reduce(logical_and,(sub_nus<=nus_edge[i+1],sub_nus>=nus_edge[i]))] for i in range(Nbbands)] numbers=np.cumsum(np.array([len(bands[i]) for i in range(Nbbands)])) numbers=np.append(0,numbers) bands_numbers=np.array([(np.arange(numbers[i],numbers[i+1])) for i in range(Nbbands)]) if verbose: print('Nombre de bandes utilisées pour la construction : '+str(Nbsubbands)) print('Sous fréquences centrales utilisées pour la construction : '+str(sub_nus)) print('Nombre de bandes reconstruites : '+str(Nbbands)) print('Résolution spectrale : '+str(delta_nu)) print ('Bandes reconstruites : ' + str(bands)) print('Edges : '+str(nus_edge)) print('Sub Edges : '+str(sub_nus_edge)) ################# ### Input map ### ################# x0=np.zeros((Nbsubbands,Nbpixels,3)) for i in range(Nbsubbands): #x0[i,:,0]=cmb.T[0]+dust.T[0]*scaling_dust(150,sub_nus[i]e-9,1.59) x0[i,:,1]=cmb.T[1]+dust.T[1]*scaling_dust(150,sub_nus[i],1.59) x0[i,:,2]=cmb.T[2]+dust.T[2]*scaling_dust(150,sub_nus[i],1.59) ################################################################################### ### Convolution of the input map (only for comparison to the reconstructed map) ### ################################################################################### x0_convolved=np.zeros((Nbbands,Nbpixels,3)) for i in range(Nbbands): for j in bands_numbers[i]: sub_instrument=QubicInstrument(filter_nu=sub_nus[j]*10**9,filter_relative_bandwidth=sub_deltas[j]/sub_nus[j],detector_nep=2.7e-17) C=HealpixConvolutionGaussianOperator(fwhm=sub_instrument.synthbeam.peak150.fwhm * (150 / (sub_nus[j]))) x0_convolved[i]+=C(x0[j])*sub_deltas[j]/np.sum(sub_deltas[bands_numbers[i]]) return x0_convolved
def test_fitter2D(self, model_class): """ Test if the parametric model works with the fitter. """ x_lim = models_2D[model_class]['x_lim'] y_lim = models_2D[model_class]['y_lim'] parameters = models_2D[model_class]['parameters'] model = create_model(model_class, parameters) if isinstance(parameters, dict): parameters = [parameters[name] for name in model.param_names] if "log_fit" in models_2D[model_class]: if models_2D[model_class]['log_fit']: x = np.logspace(x_lim[0], x_lim[1], self.N) y = np.logspace(y_lim[0], y_lim[1], self.N) else: x = np.linspace(x_lim[0], x_lim[1], self.N) y = np.linspace(y_lim[0], y_lim[1], self.N) xv, yv = np.meshgrid(x, y) np.random.seed(0) # add 10% noise to the amplitude data = model(xv, yv) + 0.1 * parameters[0] * (np.random.rand(self.N, self.N) - 0.5) fitter = fitting.NonLinearLSQFitter() new_model = fitter(model, xv, yv, data) fitparams, _ = fitter._model_to_fit_params(new_model) utils.assert_allclose(fitparams, parameters, atol=self.fit_error)
def tsz_profile(self, nu=None): """ Return interpolation fn. for tSZ profile as a function of r [Mpc]. """ bb = np.linspace(0.0, self.bmax, 150) # Range of impact parameters rr = bb * self.r500 # Interpolate the radial pressure profile, P N_X_SAMP = 1200 # Increase this for more accurate integration _r = np.logspace(-4, np.log10(self.bmax*self.r500), 200) #_r = np.linspace(1e-4, self.bmax*self.r500, 250) _P = self.P(_r) Pinterp = scipy.interpolate.interp1d(_r, _P, kind='linear', bounds_error=False, fill_value=0.) # Sample the integrand and do Simpson-rule integration over samples ig_tsz = lambda x, b: Pinterp(x*self.r500) * (x / np.sqrt(x**2. - b**2.)) _x = [ np.logspace(np.log10(b+1e-4), np.log10(self.bmaxc), N_X_SAMP) for b in bb ] ysz = [ scipy.integrate.simps(ig_tsz(_x[i], bb[i]), _x[i]) for i in range(bb.size) ] # Spectral dependence and Y_SZ pre-factors if nu == None: g_nu = 1. else: g_nu = self.tsz_spectrum(nu) fac_ysz = (2. * 2. * 2.051 / 511.) * self.r500 ysz = g_nu * fac_ysz * np.array(ysz) # Interpolate and return interp = scipy.interpolate.interp1d( rr, ysz, kind='linear', bounds_error=False, fill_value=0.0 ) return interp
def update_grid(w, h): n = Z.shape[1] # Logarithmic grid I1 = np.logspace(np.log10(1), np.log10(2 * w), 5) * scale I2 = np.logspace(np.log10(1), np.log10(2 * w), 50) * scale I3 = np.logspace(np.log10(1), np.log10(2 * h), 5) * scale I4 = np.logspace(np.log10(1), np.log10(2 * h), 50) * scale # We are here in screen space and we want integer coordinates np.floor(I1, out=I1) np.floor(I2, out=I2) np.floor(I3, out=I3) np.floor(I4, out=I4) L = np.linspace(0, w, n) Z[..., 0] = I1[find_closest(I1, L)] Z[..., 2] = I2[find_closest(I2, L)] L = np.linspace(0, h, n) Z[..., 1] = I3[find_closest(I3, L)] Z[..., 3] = I4[find_closest(I4, L)] program["u_grid"][...] = Z program["u_size"] = w, h
def plot_optimisation(ls_of_ws, cost_func): ws1, ws2 = zip(*ls_of_ws) # Plot figures fig = plt.figure(figsize=(10, 4)) # Plot overview of cost function ax_1 = fig.add_subplot(1,2,1) ws1_1, ws2_1, cost_ws_1 = get_cost_surface(-3, 3, -3, 3, 100, cost_func) surf_1 = plot_surface(ax_1, ws1_1, ws2_1, cost_ws_1 + 1) ax_1.plot(ws1, ws2, 'b.') ax_1.set_xlim([-3,3]) ax_1.set_ylim([-3,3]) # Plot zoom of cost function ax_2 = fig.add_subplot(1,2,2) ws1_2, ws2_2, cost_ws_2 = get_cost_surface(0, 2, 0, 2, 100, cost_func) surf_2 = plot_surface(ax_2, ws1_2, ws2_2, cost_ws_2 + 1) ax_2.set_xlim([0,2]) ax_2.set_ylim([0,2]) surf_2 = plot_surface(ax_2, ws1_2, ws2_2, cost_ws_2) ax_2.plot(ws1, ws2, 'b.') # Show the colorbar fig.subplots_adjust(right=0.8) cax = fig.add_axes([0.85, 0.12, 0.03, 0.78]) cbar = fig.colorbar(surf_1, ticks=np.logspace(0, 8, 9), cax=cax) cbar.ax.set_ylabel('$\\xi$', fontsize=15) cbar.set_ticklabels(['{:.0e}'.format(i) for i in np.logspace(0, 8, 9)]) plt.suptitle('Cost surface', fontsize=15) plt.show()
def _tabulate_cumulative_mass(self): rmax = self.DF.dprof.large_r #* 1.01 # some extra splop rmin = self.DF.dprof.small_r #* 0.999 sub_sample = 0.2 r = np.zeros( self._optimize_npoints + np.ceil(sub_sample*self._optimize_npoints)) r[:self._optimize_npoints] = np.logspace(np.log10(rmin), np.log10(rmax*0.98), self._optimize_npoints) r[self._optimize_npoints:] = np.logspace(np.log10(rmax*0.98), np.log10(rmax), np.ceil(sub_sample*self._optimize_npoints)) # compute cumulative mass cumulative_mass = self.DF.dprof.cumulative_mass(r) # save logged values self._cumulative_mass_r = np.log10(r) self._cumulative_mass_m = np.log10(cumulative_mass) self._mass_umin = self._interpolate_cumulative_mass(self.DF.dprof.small_r) / self.DF.dprof.M_sys self._mass_umax = self._interpolate_cumulative_mass(self.DF.dprof.large_r) / self.DF.dprof.M_sys _my_print("Completed mass tabulation") return
def _add_eq_contour(ax, ds, ds_denom, colorbar=None, levels=[], smooth=None): """ Add contours where ds and ds_denom have equal efficiency (ratio = 1). The 'levels' argument can be used to specify contours at ratios other than 1. """ eff_array = _maximize_efficiency(np.array(ds)) other_array = _maximize_efficiency(np.array(ds_denom)) ratio_array = _smooth(eff_array / other_array, sigma=smooth) xmin = ds.attrs.get('x_min', 1.0) ymin = ds.attrs.get('y_min', 1.0) xmax = ds.attrs['x_max'] ymax = ds.attrs['y_max'] xvals = np.logspace(math.log10(xmin), math.log10(xmax), ds.shape[0]) yvals = np.logspace(math.log10(ymin), math.log10(ymax), ds.shape[1]) xgrid, ygrid = np.meshgrid(xvals, yvals) ct = ax.contour( xgrid, ygrid, ratio_array.T, linewidths = _line_width, levels = [1.0] if not levels else levels, colors = ['r','orange','y','green'], ) def fmt(value): if value == 1.0: return 'equal' return '{:+.0%}'.format(value - 1.0) ax.clabel(ct, fontsize=_text_size*0.75, inline=True, fmt=fmt) if colorbar: colorbar.add_lines(ct)
def execute(model, data, savepath, lwr_data, *args, **kwargs): parameters = {'alpha' : np.logspace(-6,0,20), 'gamma': np.logspace(-1.5,1.5,20)} grid_scores = [] for a in parameters['alpha']: for y in parameters['gamma']: model = KernelRidge(alpha= a, gamma= y, kernel='rbf') rms = leaveout_cv(model, data, num_runs = 200) #rms = kfold_cv(model, data, num_folds = 5, num_runs = 50) #rms = alloy_cv(model, data) #rms = atr2_extrap(model, data) #rms = lwr_extrap(model, data, lwr_data) grid_scores.append((a, y, rms)) grid_scores = np.asarray(grid_scores) with open(savepath.replace(".png","").format("grid_scores.csv"),'w') as f: writer = csv.writer(f, lineterminator = '\n') x = ["alpha", "gamma", "rms"] writer.writerow(x) for i in grid_scores: writer.writerow(i) #Heatmap of RMS scores vs the alpha and gamma plt.figure(1) plt.hexbin(np.log10(grid_scores[:,0]), np.log10(grid_scores[:,1]), C = grid_scores[:,2], gridsize=15, cmap=cm.plasma, bins=None, vmax = 60) plt.xlabel('log alpha') plt.ylabel('log gamma') cb = plt.colorbar() cb.set_label('rms') plt.savefig(savepath.format("alphagammahex"), dpi=200, bbox_inches='tight') plt.close()
def adjust_SVM(self): Cs = np.logspace(0, 10, 15, base=2) gammas = np.logspace(-7, 4, 15, base=2) scores = np.zeros((len(Cs), len(gammas))) scores[:] = np.nan print 'adjusting SVM (may take a long time) ...' def f(job): i, j = job samples, labels = self.get_dataset() params = dict(C = Cs[i], gamma=gammas[j]) score = cross_validate(SVM, params, samples, labels) return i, j, score ires = self.run_jobs(f, np.ndindex(*scores.shape)) for count, (i, j, score) in enumerate(ires): scores[i, j] = score print '%d / %d (best error: %.2f %%, last: %.2f %%)' % (count+1, scores.size, np.nanmin(scores)*100, score*100) print scores print 'writing score table to "svm_scores.npz"' np.savez('svm_scores.npz', scores=scores, Cs=Cs, gammas=gammas) i, j = np.unravel_index(scores.argmin(), scores.shape) best_params = dict(C = Cs[i], gamma=gammas[j]) print 'best params:', best_params print 'best error: %.2f %%' % (scores.min()*100) return best_params
def test_convergence_speed(): import itertools alpha = np.logspace(-2, 0, 5) rho = np.logspace(-2., 2., 5) adaptive = [True, False] optimal = [True, False] for (alpha_, adap, opt) in itertools.product(alpha, adaptive, optimal): for _ in range(100): f = list() Y = np.random.normal(size=(90,100)) if not opt: for rho_ in rho: if adap: gl = covl.GraphLasso(alpha_, rho=rho_, mu=2.) elif not adap: gl = covl.GraphLasso(alpha_, rho=alpha_ / covl.alpha_max(Y), mu=2.) f.append(gl.fit(Y).f_vals_) else: if adap: gl = covl.GraphLasso(alpha_, rho=alpha_ / covl.alpha_max(Y), mu=2.) elif not adap: gl = covl.GraphLasso(alpha_, rho=alpha_ / covl.alpha_max(Y)) f.append(gl.fit(Y).f_vals_)
def test_wavelets_with_event_data_chopper(self): wf_session = MorletWaveletFilter( time_series=self.session_eegs[:, :, :self.session_eegs.shape[2] / 4], freqs=np.logspace(np.log10(3), np.log10(180), 8), output='power', frequency_dim_pos=0, verbose=True ) pow_wavelet_session, phase_wavelet_session = wf_session.filter() sedc = DataChopper(events=self.base_events, session_data=pow_wavelet_session, start_time=self.start_time, end_time=self.end_time, buffer_time=self.buffer_time) chopped_session_pow_wavelet = sedc.filter() # removing buffer chopped_session_pow_wavelet = chopped_session_pow_wavelet[:, :, :, 500:-500] wf = MorletWaveletFilter(time_series=self.base_eegs, freqs=np.logspace(np.log10(3), np.log10(180), 8), output='power', frequency_dim_pos=0, verbose=True ) pow_wavelet, phase_wavelet = wf.filter() pow_wavelet = pow_wavelet[:, :, :, 500:-500] assert_array_almost_equal( (chopped_session_pow_wavelet.data - pow_wavelet.data) / pow_wavelet.data, np.zeros_like(pow_wavelet), decimal=5 )
def __init__(self, database, preprocess_stage, segmentation_stage, extraction, operator="sum", n_components=20): """init""" super(FusionFramework, self).__init__(database, preprocess_stage, segmentation_stage, extraction, None) self.op = operator grid_param = {"kernel": ("rbf", "poly", "sigmoid"), "C": np.logspace(-5, -3, num=8, base=2), "gamma": np.logspace(-15, 3, num=8, base=2)} self._extraction = extraction self._pca = RandomizedPCA(n_components=n_components, whiten=True) self.classifer = SVC(class_weight="auto", probability=True) self.hist_clf = SVC(class_weight="auto", probability=True) self.clf_lbp = GridSearchCV(self.classifer, grid_param, n_jobs=-1, cv=3) self.clf_hist = GridSearchCV(self.hist_clf, grid_param, n_jobs=-1, cv=3)
def test_DendroStat(self): min_deltas = np.append(np.logspace(-1.5, -0.7, 8), np.logspace(-0.6, -0.35, 10)) self.tester = Dendrogram_Stats(dataset1["cube"][0], min_deltas=min_deltas) self.tester.run() assert np.allclose(self.tester.numfeatures, computed_data["dendrogram_val"])
def _add_eq_contour(ax, ds, ds_denom, colorbar=None, levels=[], smooth=None): eff_array = _maximize_efficiency(np.array(ds)) other_array = _maximize_efficiency(np.array(ds_denom)) ratio_array = _smooth(eff_array / other_array, sigma=smooth) xmin = ds.attrs.get('x_min', 1.0) ymin = ds.attrs.get('y_min', 1.0) xmax = ds.attrs['x_max'] ymax = ds.attrs['y_max'] xvals = np.logspace(math.log10(xmin), math.log10(xmax), ds.shape[0]) yvals = np.logspace(math.log10(ymin), math.log10(ymax), ds.shape[1]) xgrid, ygrid = np.meshgrid(xvals, yvals) ct = ax.contour( xgrid, ygrid, ratio_array.T, linewidths = 2, levels = [1.0] if not levels else levels, colors = ['r','orange','y','green'], ) def fmt(value): if value == 1.0: return 'equal' return '{:+.0%}'.format(value - 1.0) ax.clabel(ct, fontsize=12, inline=True, fmt=fmt) if colorbar: colorbar.add_lines(ct)
def setup(self): self.energy_lo = np.logspace(0, 1, 11)[:-1] * u.TeV self.energy_hi = np.logspace(0, 1, 11)[1:] * u.TeV self.offset_lo = np.linspace(0, 1, 4)[:-1] * u.deg self.offset_hi = np.linspace(0, 1, 4)[1:] * u.deg self.migra_lo = np.linspace(0, 3, 4)[:-1] self.migra_hi = np.linspace(0, 3, 4)[1:] self.detx_lo = np.linspace(-6, 6, 11)[:-1] * u.deg self.detx_hi = np.linspace(-6, 6, 11)[1:] * u.deg self.dety_lo = np.linspace(-6, 6, 11)[:-1] * u.deg self.dety_hi = np.linspace(-6, 6, 11)[1:] * u.deg self.aeff_data = np.random.rand(10, 3) * u.cm * u.cm self.edisp_data = np.random.rand(10, 3, 3) self.bkg_data = np.random.rand(10, 10, 10) / u.MeV / u.s / u.sr self.aeff = EffectiveAreaTable2D(energy_lo=self.energy_lo, energy_hi=self.energy_hi, offset_lo=self.offset_lo, offset_hi=self.offset_hi, data=self.aeff_data) self.edisp = EnergyDispersion2D(e_true_lo=self.energy_lo, e_true_hi=self.energy_hi, migra_lo=self.migra_lo, migra_hi=self.migra_hi, offset_lo=self.offset_lo, offset_hi=self.offset_hi, data=self.edisp_data) self.bkg = Background3D(energy_lo=self.energy_lo, energy_hi=self.energy_hi, detx_lo=self.detx_lo, detx_hi=self.detx_hi, dety_lo=self.dety_lo, dety_hi=self.dety_hi, data=self.bkg_data)
def update_xscale(self): if self.logfreqscale == 2: self.xscaled = numpy.logspace(numpy.log2(self.minfreq), numpy.log2(self.maxfreq), self.canvas_height, base=2.0) elif self.logfreqscale == 1: self.xscaled = numpy.logspace(numpy.log10(self.minfreq), numpy.log10(self.maxfreq), self.canvas_height) else: self.xscaled = numpy.linspace(self.minfreq, self.maxfreq, self.canvas_height)
def benchmark_plot(data): """ log-log graph of the benchmark results """ plt.figure() idx = 0 symbols = ['o', 's', 'h', 'D','1','8','*','+','x'] for (N, tp, tnp, speedup, name) in data: plt.loglog(N, tp,'r%s' % symbols[idx],label='pure Python %s' % name ) plt.loglog(N, tnp,'b%s' % symbols[idx],label='numpy %s' % name ) idx += 1 speedup_txt = "%s speedup: %3.1fx" % (name, speedup) plt.text( 700, (0.9)/np.exp(0.3*idx), speedup_txt , fontsize=14) n = np.logspace(2,5,20) logline = [(5e-6)*nn for nn in n] plt.loglog(n,logline,'r-',label='5e-6 * N') n = np.logspace(4,6,20) logline = [(2e-8)*nn for nn in n] plt.loglog(n,logline,'b-',label='2e-8 * N') logline = [(2e-9)*nn*np.log(nn) for nn in n] plt.loglog(n,logline,'b-.',label='2e-8 * N log(N)') logline = [(2e-12)*nn*nn for nn in n] plt.loglog(n,logline,'b--',label='2e-8 * N*N') plt.xlabel('Input data size') plt.ylabel('CPU seconds') plt.legend(loc='upper left') plt.title('allantools numpy benchmark, AW 2014-08-31') plt.show()
def test_fuzz_K_to_discharge_coefficient(): ''' # Testing the different formulas from sympy import * C, beta, K = symbols('C, beta, K') expr = Eq(K, (sqrt(1 - beta**4*(1 - C*C))/(C*beta**2) - 1)**2) solns = solve(expr, C) [i.subs({'K': 5.2314291729754, 'beta': 0.05/0.07366}) for i in solns] [-sqrt(-beta**4/(-2*sqrt(K)*beta**4 + K*beta**4) + 1/(-2*sqrt(K)*beta**4 + K*beta**4)), sqrt(-beta**4/(-2*sqrt(K)*beta**4 + K*beta**4) + 1/(-2*sqrt(K)*beta**4 + K*beta**4)), -sqrt(-beta**4/(2*sqrt(K)*beta**4 + K*beta**4) + 1/(2*sqrt(K)*beta**4 + K*beta**4)), sqrt(-beta**4/(2*sqrt(K)*beta**4 + K*beta**4) + 1/(2*sqrt(K)*beta**4 + K*beta**4))] # Getting the formula from sympy import * C, beta, K = symbols('C, beta, K') expr = Eq(K, (sqrt(1 - beta**4*(1 - C*C))/(C*beta**2) - 1)**2) print(latex(solve(expr, C)[3])) ''' Ds = np.logspace(np.log10(1-1E-9), np.log10(1E-9)) for D_ratio in Ds: Ks = np.logspace(np.log10(1E-9), np.log10(50000)) Ks_recalc = [] for K in Ks: C = K_to_discharge_coefficient(D=1, Do=D_ratio, K=K) K_calc = discharge_coefficient_to_K(D=1, Do=D_ratio, C=C) Ks_recalc.append(K_calc) assert_allclose(Ks, Ks_recalc)
def entries_histogram(pandas_df): df = pandas_df ### Let's plot two histograms on the same axes to show hourly ### entries when raining vs. when not raining. ### no axis transform... plt.figure() df.ENTRIESn_hourly[df.rain == 1].plot(kind='hist', stacked=True, alpha=0.5, bins=100) df.ENTRIESn_hourly[df.rain == 0].plot(kind='hist', stacked=True, alpha=0.5, bins=100) plt.xlabel('Entries Hourly') plt.ylabel('Frequency') plt.xlim([0, 15000]) plt.ylim([0, 50000]) plt.show() # this command would close the plot # plt.clf() ### with a log scale transform on the x-axis... plt.figure() df.ENTRIESn_hourly[df.rain == 1].plot(kind='hist', stacked=True, alpha=0.5, bins=np.logspace(0.1, 6, 50)) # your code here to plot a historgram for hourly entries when it is raining df.ENTRIESn_hourly[df.rain == 0].plot(kind='hist', stacked=True, alpha=0.5, bins=np.logspace(0.1, 6, 50)) # your code here to plot a historgram for hourly entries when it is not raining plt.xlabel('Entries Hourly') plt.ylabel('Frequency') plt.gca().set_xscale("log") plt.show()
def cv_test(): """ tests the cross validation. needs working krr class! """ Xtr, Ytr = noisysincfunction(100, 0.1) Xte = np.arange(-np.pi, np.pi, 0.01)[np.newaxis, :] krr = imp.krr() pl.figure() pl.subplot(1, 2, 1) params = ["kernel", ["gaussian"], "kernelparam", np.logspace(-2, 2, 10), "regularization", np.logspace(-2, 2, 10)] cvkrr = imp.cv(Xtr, Ytr, krr, params, loss_function=squared_error_loss, nrepetitions=2) cvkrr.predict(Xte) print cvkrr.kernelparameter print cvkrr.regularization pl.plot(Xtr.T, Ytr.T) pl.plot(Xte.T, cvkrr.ypred.T) pl.title("CV with fixed regularization") pl.subplot(1, 2, 2) params = ["kernel", ["gaussian"], "kernelparam", np.logspace(-2, 2, 10), "regularization", [0]] cvkrr = imp.cv(Xtr, Ytr, krr, params, loss_function=squared_error_loss, nrepetitions=2) cvkrr.predict(Xte) print cvkrr.kernelparameter print cvkrr.regularization pl.plot(Xtr.T, Ytr.T) pl.plot(Xte.T, cvkrr.ypred.T) pl.title("CV with efficient LOOCV") print "\n(time the test takes on my notebook: approx. 6 seconds)"
def train_gbg_svm_model(X_train, y_train, verbose=False): # train SVM X_scaled = preprocessing.scale(X_train) best_score = 9999 best_params = {} # grid search to optimize parameters of SVM C_list = np.logspace(-5, 2, num=11) gamma_list = np.logspace(-3, 1, num=11) epsilon_list = [0.1] for c_test in C_list: for gamma_test in gamma_list: for epsilon_test in epsilon_list: svm_model = svm.SVR(kernel="rbf", C=c_test, gamma=gamma_test, epsilon=epsilon_test) scores = cross_validation.cross_val_score(svm_model, X_scaled, y_train, cv=5) mean_score = np.mean(scores) if verbose: print("params: " + str(svm_model.get_params())) print(mean_score) if abs(mean_score) < abs(best_score): best_score = mean_score best_params = svm_model.get_params() print("***Best Params***") print(best_params) print("Score:" + str(best_score)) # return a trained SVM with the best parameters we found ret_svm = svm.SVR() ret_svm.set_params(**best_params) ret_svm.fit(X_scaled, y_train) return ret_svm
def ColeColeFplot(siginf, eta, tau, c, fmin, fmax): eta = eta tau = 10**(tau) c = c siginf = 10**(siginf) omega = np.logspace(fmin, fmax, 56)*2.*np.pi sigmaColeF = siginf*np.ones(omega.size)-siginf*eta*(1./(1.+(1.-eta)*(1j*omega*tau)**c)) time = np.logspace(-6, 0, 64) wt, tbase, omega_int = setFrequency(time) sigmaColeF_temp = siginf-siginf*eta*(1./(1.+(1.-eta)*(1j*omega_int*tau)**c)) sigmaColeT = transFiltImpulse(sigmaColeF_temp, wt, tbase, omega_int, time, tol=1e-12) fig, ax = plt.subplots(1,2, figsize = (16, 5)) ax[0].semilogx(omega/2./np.pi, sigmaColeF.real, 'ko-') ax[0].semilogx(omega/2./np.pi, sigmaColeF.imag, 'ro-') ax[0].set_ylim(0., siginf*1.2) ax[0].grid(True) ax[0].set_xlabel("Frequency (Hz)", fontsize = 16) ax[0].set_ylabel("Frequency domain Cole-Cole (S/m)", fontsize = 16) ax[1].semilogx(time, sigmaColeT.real, 'ko-') ax[1].set_ylim(-siginf*0.5*time.min()**(-0.75), 0.) ax[1].grid(True) ax[1].set_xlabel("Time (s)", fontsize = 16) ax[1].set_ylabel("Time domain Cole-Cole (S/m)", fontsize = 16) ax[0].set_title("$\sigma_{\infty}$= "+("%5.1e")%(siginf)+", $\eta = $"+("%5.1f")%(eta)+", $\ \\tau = $"+("%5.1e")%(tau)+" and $c$="+("%5.1f")%(c), fontsize = 14)
def plot(self, experiment = None, **kwargs): """ Plot a diagnostic of the bleedthrough model computation. """ if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") if not self.op.controls: raise util.CytoflowViewError('op', "No controls specified") if not self.op.spillover: raise util.CytoflowViewError('op', "No spillover matrix specified") kwargs.setdefault('histtype', 'stepfilled') kwargs.setdefault('alpha', 0.5) kwargs.setdefault('antialiased', True) plt.figure() # the completely arbitrary ordering of the channels channels = list(set([x for (x, _) in list(self.op.spillover.keys())])) num_channels = len(channels) for from_idx, from_channel in enumerate(channels): for to_idx, to_channel in enumerate(channels): if from_idx == to_idx: continue check_tube(self.op.controls[from_channel], experiment) tube_exp = ImportOp(tubes = [Tube(file = self.op.controls[from_channel])], channels = {experiment.metadata[c]["fcs_name"] : c for c in experiment.channels}, name_metadata = experiment.metadata['name_metadata']).apply() # apply previous operations for op in experiment.history: tube_exp = op.apply(tube_exp) # subset it if self.subset: try: tube_exp = tube_exp.query(self.subset) except Exception as e: raise util.CytoflowViewError('subset', "Subset string '{0}' isn't valid" .format(self.subset)) from e if len(tube_exp.data) == 0: raise util.CytoflowViewError('subset', "Subset string '{0}' returned no events" .format(self.subset)) tube_data = tube_exp.data # for ReadTheDocs, which doesn't have swig import sys if sys.modules['cytoflow.utility.logicle_ext.Logicle'].__name__ != 'cytoflow.utility.logicle_ext.Logicle': scale_name = 'log' else: scale_name = 'logicle' xscale = util.scale_factory(scale_name, tube_exp, channel = from_channel) yscale = util.scale_factory(scale_name, tube_exp, channel = to_channel) plt.subplot(num_channels, num_channels, from_idx + (to_idx * num_channels) + 1) plt.xscale(scale_name, **xscale.mpl_params) plt.yscale(scale_name, **yscale.mpl_params) plt.xlabel(from_channel) plt.ylabel(to_channel) plt.scatter(tube_data[from_channel], tube_data[to_channel], alpha = 0.1, s = 1, marker = 'o') xs = np.logspace(-1, math.log(tube_data[from_channel].max(), 10)) ys = xs * self.op.spillover[(from_channel, to_channel)] plt.plot(xs, ys, 'g-', lw=3) plt.tight_layout(pad = 0.8)
def main( path="main.csv", number_of_repetitions=200, tolerance_values=None, problem_parameters=None, ): """ Main experiment file. Runs a timing experiment on a system with default parameters: "lambda_2": 4, "lambda_1_1": 3, "lambda_1_2": 3, "threshold_1": 4, "threshold_2": 5, "mu_1": 4, "mu_2": 3, "num_of_servers_1": 2, "num_of_servers_2": 3, "system_capacity_1": 8, "system_capacity_2": 8, "buffer_capacity_1": 8, "buffer_capacity_2": 8, and increasing system_capacity_1 This reads in the data frame and only run new experiments. """ if tolerance_values is None: tolerance_values = np.logspace(stop=-1, start=-10, num=10) if problem_parameters is None: problem_parameters = { "lambda_2": 4, "lambda_1_1": 3, "lambda_1_2": 3, "threshold_1": 4, "threshold_2": 5, "mu_1": 4, "mu_2": 3, "num_of_servers_1": 2, "num_of_servers_2": 3, "system_capacity_1": 8, "system_capacity_2": 8, "buffer_capacity_1": 8, "buffer_capacity_2": 8, } keys = sorted(problem_parameters.keys()) try: df = read_data() cache = set(tuple(row) for _, row in df[keys].iterrows()) except FileNotFoundError: header = ["repetition", "tolerance"] + keys + ["time_taken"] write_data(data=header, path=path) cache = set() while True: parameter_values = tuple((problem_parameters[key] for key in keys)) if parameter_values not in cache: for tolerance, repetition in itertools.product( tolerance_values, range(number_of_repetitions) ): time_taken = run_single_experiment( tolerance=tolerance, **problem_parameters ) data = [repetition, tolerance] + list(parameter_values) + [time_taken] write_data(data=data, path=path) problem_parameters["system_capacity_1"] += 1
fold = 0 for trainIndex, testIndex in trainAndTestIndex: # 十折交叉验证 X_train, X_test = X[trainIndex], X[testIndex] y_train, y_test = Y[trainIndex], Y[testIndex] mean = np.mean(X_train, axis=0) # 列归一化 std = np.std(X_train, axis=0) X_train_nol = (X_train - mean) / std X_test_nol = (X_test - mean) / std acc_max = 0 sen_max = 0 featureNumOp = 10 COp = 1 if not os.path.exists("selected"): os.mkdir("selected") for featureNum in range(2, X_train_nol.shape[1], 1): for C in np.logspace(-4, 4, 9, base=2): # logger.debug("{featureNum}, {C}".format(featureNum=featureNum, C=C)) lr = LinearRegression() rfe = RFE(lr, n_features_to_select=featureNum) rfe.fit(X_train_nol, y_train) selected = rfe.support_ # logger.debug("2st feature selection accomplished") clf = svm.SVC(kernel='linear', C=C, max_iter=15000).fit(X_train_nol[:, selected], y_train) score = clf.score(X_test_nol[:, selected], y_test) y_score = clf.decision_function(X_test_nol[:, selected]) res = clf.predict(X_test_nol[:, selected]) ACC, SEN, SPE = model_evaluate(test_label=y_test, res_label=res) # logger.debug("module evaluation accomplished")
x = np.linspace(1,2,5, retstep = True) print (x) # retstep here is 0.25 #numpy.logspace #This function returns an ndarray object that contains the numbers that are evenly spaced on a log scale. Start and stop endpoints of the scale are indices of the base, usually 10. #1 start--The starting point of the sequence is basestart #2 stop--The final value of sequence is basestop #3 num--The number of values between the range. Default is 50 #4 endpoint---If true, stop is the last value in the range #5 base--Base of log space, default is 10 #6 dtype--Data type of output array. If not given, it depends upon other input arguments # default base is 10 a = np.logspace(1.0, 2.0, num = 10) print(a) # set base of log space to 2 a = np.logspace(1,10,num = 10, base = 2) print (a) #Indexing & Slicing #=================== #Contents of ndarray object can be accessed and modified by indexing or slicing, just like Python's in-built container objects. #As mentioned earlier, items in ndarray object follows zero-based index. Three types of indexing methods are available − field access, basic slicing and advanced indexing. #Basic slicing is an extension of Python's basic concept of slicing to n dimensions. A Python slice object is constructed by giving start, stop, and step parameters to the built-in slice function. This slice object is passed to the array to extract a part of array. a = np.arange(10) s = slice(2,7,2) print (a[s])
def test_wavelets_cpp(self): eegs = self.eegs[:, :, :-1] base_eegs = self.base_eegs # if not sys.platform.startswith('win'): # sys.path.append('/Users/m/src/morlet_git_install') import ptsa.extensions.morlet as morlet num_freqs = 8 f_min = 3.0 f_max = 180.0 signal_length = base_eegs.shape[-1] morlet_transform = morlet.MorletWaveletTransform() samplerate = float(base_eegs['samplerate']) morlet_transform.init(5, f_min, f_max, num_freqs, samplerate, signal_length) signal = base_eegs[0:1, 0:1, :] signal_orig_eegs = eegs[0:1, 0:1, :] pow_wavelets_cpp = np.empty(shape=(base_eegs.shape[-1] * num_freqs, ), dtype=np.float) # for i in xrange(num_of_iterations): # morlet_transform.multiphasevec(signal,powers) morlet_transform.multiphasevec(signal.data.flatten(), pow_wavelets_cpp) pow_wavelets_cpp = pow_wavelets_cpp.reshape( 8, pow_wavelets_cpp.shape[0] / 8) wf = MorletWaveletFilter( time_series=signal, freqs=np.logspace(np.log10(f_min), np.log10(f_max), num_freqs), output='power', frequency_dim_pos=0, ) pow_wavelet, phase_wavelet = wf.filter() from ptsa.wavelet import phase_pow_multi pow_wavelet_ptsa_orig = phase_pow_multi(freqs=np.logspace( np.log10(3), np.log10(180), 8), dat=signal_orig_eegs, to_return='power') freq_num = 0 decimal = 1 assert_array_almost_equal( (np.squeeze(pow_wavelet[freq_num, :, :, 500:-500]) - np.squeeze(pow_wavelet_ptsa_orig[freq_num, :, :, 500:-500])) / np.squeeze(pow_wavelet_ptsa_orig[freq_num, :, :, 500:-500]), np.zeros_like( np.squeeze(pow_wavelet_ptsa_orig[freq_num, :, :, 500:-500])), decimal=decimal) assert_array_almost_equal( (pow_wavelets_cpp[freq_num, 500:-500] - np.squeeze(pow_wavelet[freq_num, :, :, 500:-500])) / pow_wavelets_cpp[freq_num, 500:-500], np.zeros_like(pow_wavelets_cpp[freq_num, 500:-500]), decimal=decimal) # assert_array_almost_equal( (pow_wavelets_cpp[freq_num, 500:-500] - np.squeeze(pow_wavelet_ptsa_orig[freq_num, :, :, 500:-500])) / pow_wavelets_cpp[freq_num, 500:-500], np.zeros_like( np.squeeze(pow_wavelet_ptsa_orig[freq_num, :, :, 500:-500])), decimal=decimal) from ptsa.wavelet import phase_pow_multi
# Load libraries import numpy as np from sklearn import linear_model, datasets from sklearn.model_selection import GridSearchCV, cross_val_score # Load data iris = datasets.load_iris() features = iris.data target = iris.target # Create logistic regression logistic = linear_model.LogisticRegression() # Create range of 20 candidate values for C C = np.logspace(0, 4, 20) # Create hyperparameter options hyperparameters = dict(C=C) # Create grid search gridsearch = GridSearchCV(logistic, hyperparameters, cv=5, n_jobs=-1, verbose=0) # Conduct nested cross-validation and outut the average score cross_val_score(gridsearch, features, target).mean() gridsearch = GridSearchCV(logistic, hyperparameters, cv=5, verbose=1)
def from_yaml(cls, data, ebl=None, magnify=1): """ Returns ------- A GammaRayBurst instance. """ cls = GammaRayBurst() # This calls the constructor cls.z = data["z"] cls.name = data["name"] cls.radec = SkyCoord(data["ra"], data["dec"], frame='icrs') cls.Eiso = u.Quantity(data["Eiso"]) cls.Epeak = u.Quantity(data['Epeak']) cls.t90 = u.Quantity(data['t90']) cls.G0H = data['G0H'] cls.G0W = data['G0W'] cls.Fluxpeak = u.Quantity(data['Fluxpeak']) cls.gamma_le = data['gamma_le'] cls.gamma_he = data['gamma_he'] cls.t_trig = Time(data["t_trig"], format="datetime", scale="utc") ### Energies, times and fluxes --- Emin = u.Quantity(data["Emin"]) Emax = u.Quantity(data["Emax"]) tmin = u.Quantity(data["tmin"]) tmax = u.Quantity(data["tmax"]) ntbin = data["ntbin"] cls.Eval = np.asarray([Emin.value, Emax.to(Emin.unit).value]) * Emin.unit if ntbin != 1: cls.tval = np.logspace(np.log10(tmin.to(u.s).value), np.log10(tmax.to(u.s).value), ntbin) * u.s else: # A single wtime window cls.tval = np.array([tmin.value, tmax.to(tmin.unit).value]) * tmin.unit flux_unit = u.Unit("1/(cm2 GeV s)") cls.fluxval = np.zeros((len(cls.tval), len(cls.Eval))) * flux_unit for i, t in enumerate(cls.tval): for j, E in enumerate(cls.Eval): dnde = (u.Quantity(data["K"]) * (E / data["E0"])**-data["gamma"] * (t / data["t0"])**-data["beta"]) #print(i,j,dnde) cls.fluxval[i][j] = magnify * dnde.to(flux_unit) ### Visibilities --- includes tval span for loc in ["North", "South"]: cls.vis[loc] = Visibility(cls, loc) # Recomputing done in the main # cls.vis[loc].compute(debug=False) ### No prompt component foreseen in this case cls.prompt = False for i, t in enumerate(cls.tval): # Note that TableModel makes an interpolation # Following a question on the Slack gammapy channel on # November 27th, and the answer by Axel Donath: # The foloowing statement later in the code gave an error # (dlist_onoff is a collection of Dataset) # dlist_onoff.write(datapath,prefix="cls",overwrite=True) # gives: # ...\gammapy\modeling\models\spectral.py", line 989, in to_dict # "data": self.energy.data.tolist(), # NotImplementedError: memoryview: unsupported format >f # This error comes from the fact that the energy list as to be # explicitely passed as a float as done below: # cls.Eval.astype(float) # (A Quantity is passed as requested but the underlying numpy # dtype is not supported by energy.data.tolist() tab = TemplateSpectralModel(energy=cls.Eval.astype(float), values=cls.fluxval[i], interp_kwargs={"values_scale": "log"}) model = cls.EBLabsorbed(tab, ebl) cls.spectra.append(model) return cls
ymin = 100.0 ymax = 13000 ax.set_xlim(10**xmin, 10**xmax) ax.set_ylim(ymin, ymax) ixx = int(nx * 0.02) xlim = ax.get_xlim() ylim = ax.get_ylim() scomp_x_elbow = [] scomp_ix_elbow = [] smem_x_elbow = [] smem_ix_elbow = [] x = np.logspace(xmin, xmax, nx) for roof in scomproofs: for ix in range(1, nx): if smemroofs[0] * x[ix] >= roof and smemroofs[0] * x[ix - 1] < roof: scomp_x_elbow.append(x[ix - 1]) scomp_ix_elbow.append(ix - 1) break for roof in smemroofs: for ix in range(1, nx): if (scomproofs[0] <= roof * x[ix] and scomproofs[0] > roof * x[ix - 1]): smem_x_elbow.append(x[ix - 1]) smem_ix_elbow.append(ix - 1) break
from jax.config import config # import numpy as np import matplotlib.pyplot as plt from scipy.special import expn from exojax.special.expn import E1 from exojax.spec import rtransfer as rt import jax.numpy as jnp import numpy as np import matplotlib.pyplot as plt from scipy.special import expn from exojax.special.expn import E1 from exojax.spec import rtransfer as rt import jax.numpy as jnp x = np.logspace(-4, 1.9, 1000) d_f32 = np.abs(rt.trans2E3(x) - (2.0 * expn(3, x))) config.update('jax_enable_x64', True) fig = plt.figure(figsize=(15, 5)) ax = fig.add_subplot(211) plt.plot(x, 2.0 * expn(3, x), label='$\mathcal{T}(x)$ by scipy.special.expn') plt.plot(x, rt.trans2E3(x), ls='dashed', label='$\mathcal{T}(x)$ by ours (AS70 w/ jax.numpy)') plt.ylabel('$\mathcal{T}(x)$', fontsize=14) plt.tick_params(labelsize=13) plt.plot(x,
train_index, test_index = list(sss.split(X, y))[0] X_train = X[train_index] y_train = y[train_index] X_test = X[test_index] y_test = y[test_index] # Estandarización scaler = preprocessing.StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) # Configuración del modelo SVM svm_model = svm.SVC(kernel='rbf') # C ∈ {0.02, 0.2, 2, 200} y γ ∈ {0.02, 0.2, 2, 200} Cs = 2 * np.logspace(-2, 0, num=3, base=10) Cs = np.append(Cs, 200) Gs = Cs # Validación cruzada anidada tipo K-fold optimo = GridSearchCV(estimator=svm_model, param_grid=dict(C=Cs, gamma=Gs), n_jobs=-1, cv=5) # Entrenar el modelo óptimo optimo.fit(X_train, y_train) # Configuración del modelo óptimo print optimo.best_params_
from sklearn.model_selection import GridSearchCV from sklearn.preprocessing import LabelEncoder from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier from sklearn import neighbors from sklearn import tree from sklearn import ensemble from sklearn.metrics import accuracy_score,confusion_matrix,classification_report x=PTrain_ad.iloc[:,1:] y=PTrain_ad.iloc[:,1] x_train,x_val,y_train,y_val = train_test_split(x,y,test_size=0.25,random_state=0) #%% Brenchmark model para_lo=[{'penalty':['l1','l2'], 'C':np.logspace(-1,1,10), 'solver':['liblinear'], 'multi_class':['ovr']}, {'penalty':['l2'], 'C':np.logspace(-1,1,20), 'solver':['lbfgs'], 'multi_class':['ovr','multinomial']}] logcv=GridSearchCV(LogisticRegression(),para_lo,cv=10,scoring='roc_auc') log=logcv.fit(x_train,y_train) yyy=log.predict(x_val) log.coef_ print("Number of defaults in test set: {0}".format(sum(y_val))) print("Number of defaults in train set: {0}".format(sum(y_pred))) print(accuracy_score(y_test,yyy)) print(confusion_matrix(y_test,yyy))
gram_train = metrics.pairwise.rbf_kernel(X_train, X_train, gamma = G_list[it]) gram_test = metrics.pairwise.rbf_kernel(X_test, X_train, gamma = G_list[it]) kernel_train_list.append(gram_train) kernel_test_list.append(gram_test) weight_v = hsic_kernel_weights_norm(kernel_train_list, y_train, 1, 0.01, 0) for i in range(n_kernels): gram_train += kernel_train_list[i]*weight_v[i] gram_test += kernel_test_list[i]*weight_v[i] cv = model_selection.StratifiedKFold(n_splits = 5, shuffle = True, random_state = 0) parameters = {'C': np.logspace(-15, 10, base = 2, num = 52)} grid = model_selection.GridSearchCV(svm.SVC(kernel = 'precomputed', probability = True), parameters, n_jobs = -1, cv = cv, verbose = 2) grid.fit(gram_train, y_train) C = grid.best_params_['C'] clf = svm.SVC(C = C, kernel = 'precomputed', probability = True) scorerMCC = metrics.make_scorer(metrics.matthews_corrcoef) scorerSP = metrics.make_scorer(specificity_score) scorerPR = metrics.make_scorer(metrics.precision_score) scorerSE = metrics.make_scorer(metrics.recall_score) scorer = {'ACC':'accuracy', 'recall':scorerSE, 'roc_auc': 'roc_auc', 'MCC':scorerMCC, 'SP':scorerSP} five_fold = model_selection.cross_validate(clf, gram_train, y_train, cv = cv, scoring = scorer)
a = np.array([1, 2, 3, 4, 5], dtype=np.int8) a = np.array([1, 2, 3, 4, 5], dtype=complex) print(a) # order a = np.array([1, 2, 3, 4, 5], order='C') # C-style row-major array a = np.array([1, 2, 3, 4, 5], order='F') # Fortran-style column-major array # minimum dimensions a = np.array([1, 2, 3, 4, 5], ndmin=7) print(a) shape = [1, 3] x = np.empty(shape, dtype=int) print('empty: ', x) x = np.zeros(shape, dtype=int) print('zeros: ', x) x = np.ones(shape, dtype=int) print('ones: ', x) x = np.zeros(shape, dtype=[('x', np.complex), ('y', int)]) print('custom dtype: ', x) x = [[1, 2, 3], [4, 5]] a = np.asarray(x) print('asarray: ', a) a = np.frombuffer(b'Hello World', dtype='S1', count=-1) print('frombuffer: ', a) x = np.arange(start=5, stop=10, step=0.5) print('arange: ', x) x = np.linspace(start=9, stop=10, num=11) print('linspace: ', x) x = np.logspace(1.0, 2.0, num=5) print('logspace: ', x)
zl = 100. #[m] depth df = 0.01 #[m] fracture aperture lr = 3 #levels of mesh refinement d_base = df/2.*2**(lr+1) #calculated dimension of base block cav_center_depth = 70. #[m] depth of cavity center radius = 20. #[m] cavity radius # grid discretization dx = 1. dy = 1. dz = 1. dxyz = np.array([dx,dy,dz]) # (1) ---- Model domain x = np.arange(0,xl+dx,dx) telx = np.concatenate((np.array([0.,df/2.]), np.logspace(log10(df),log10(xl),num=50,endpoint=True))) y = np.concatenate((np.arange(-yl/2.,0+dy,dy), np.arange(dy,yl/2+dy,dy))) z = np.arange(-zl, 0.+dz, dz) # z = np.concatenate((np.arange(-zl/2.,0+dz,dz), np.arange(dz,zl/2+dz,dz))) mins = np.array([min(x), min(y), min(z)]) maxs = np.array([max(x), max(y), max(z)]) # m = l.gridder(x,y,z,connect=True) m = l.gridder(telx,y,z,connect=True) #refines toward fracture plane m.setatt('imt',1) #matrix nodes material set to '1'
import numpy as np from scikits.learn import cross_val, datasets, svm digits = datasets.load_digits() X = digits.data y = digits.target svc = svm.SVC() gammas = np.logspace(-6, -1, 10) scores = list() scores_std = list() for gamma in gammas: svc.gamma = gamma this_scores = cross_val.cross_val_score(svc, X, y, n_jobs=-1) scores.append(np.mean(this_scores)) scores_std.append(np.std(this_scores)) import pylab as pl pl.figure(1, figsize=(2.5, 2)) pl.clf() pl.axes([.1, .25, .8, .7]) pl.semilogx(gammas, scores) pl.semilogx(gammas, np.array(scores) + np.array(scores_std), 'b--') pl.semilogx(gammas, np.array(scores) - np.array(scores_std), 'b--') pl.yticks(()) pl.ylabel('CV score') pl.xlabel('gamma') pl.ylim(0, 1.1) #pl.axhline(np.max(scores), linestyle='--', color='.5') pl.text(gammas[np.argmax(scores)], .9*np.max(scores), '%.3f' % np.max(scores),
ln = fp.readline() if len(ln) == 0: break sd = re.findall('^<([0-9]*)>', ln)[0] Sd_total += int(sd) slbl_gt = open(tslblfile).readlines() gt_sent = np.zeros((Sd_total, C)) cnt = 0 for d in range(len(slbl_gt)): sents_gt = re.findall('<(.*?)>', slbl_gt[d]) for sent in sents_gt: gt_sent[cnt, :] = np.array([float(x) for x in sent.split()]) cnt += 1 # training clist = np.logspace(-2, 2, 5) vccr = np.zeros(len(clist)) def train_model_class(c): global svmC global c0 global valid_pred if os.path.isfile('dir/tmp_%d_%d.tar.gz' % (c, c0)): os.system('tar -zxf dir/tmp_%d_%d.tar.gz ' % (c, c0)) clf = joblib.load('dir/tmp_%d_%d/model.pkl' % (c, c0)) else: (bag_pred_f, ypred_f, clf, bags) = miSVM(N, 'dir/trfile_' + str(c), svmC) #save model
# 从sklearn.datasets中导入20类新闻文本抓取器 from sklearn.datasets import fetch_20newsgroups # 分割数据集 from sklearn.model_selection import train_test_split # 导入支持向量机分类模型 from sklearn.svm import SVC # 导入TfidfVectorizer文本抽取器 from sklearn.feature_extraction.text import TfidfVectorizer # 导入Pipeline from sklearn.pipeline import Pipeline # 从sklearn.grid_search中导入网格搜索模块GridSearchCV from sklearn.model_selection import GridSearchCV # 使用新闻抓取器从互联网上下载所有数据 news = fetch_20newsgroups(subset='all') # 对前3000条新闻文本进行数据分割,25%文本用于测试 X_train, X_test, y_train, y_test = train_test_split(news.data[:3000], news.target[:3000], test_size=0.25, random_state=33) # 使用Pipline 简化系统搭建流程 将文本抽取与分类器模型串联起来 clf = Pipeline([('vect', TfidfVectorizer(stop_words='english', analyzer='word')), ('svc', SVC())]) # 超参数svc_gamma有4个 svc_C有3个 超参数组合一共有12种 使用np.logspace函数来选取超参数 parameters = {'svc__gamma': np.logspace(-2, 1, 4), 'svc__C': np.logspace(-1, 1, 3)} gs = GridSearchCV(clf, parameters, verbose=2, refit=True, cv=3) gs.fit(X_train, y_train) gs.best_params_, gs.best_score_ print(gs.score(X_test, y_test))
print(a) a = np.arange(1., 10, 3) print(a.dtype) print('-------------------') a = np.linspace(1, 10, 10, retstep=True) print(a) a = np.linspace(1, 1, 10) print(a) a = np.linspace(10, 20, 5) print(a) a = np.linspace(10, 20, 5, endpoint=False) print(a) b = np.linspace(1, 1, 10).reshape(10, 1) print(b) a = np.linspace(1, 2, 10, retstep=True) print(a) b = np.logspace(1, 2, 10) print(b) a = np.logspace(0, 9, 10, base=2) print(a)
eee = np.max([-np.min(ex.T), np.max(ex.T)]) levels = np.linspace(-eee, eee, 40) plt.contourf(X, Y, ex.T, levels=levels, cmap=cm.RdBu, alpha=0.5) cbar = plt.colorbar(ticks=[-eee, -eee / 2, 0, eee / 2, eee], orientation='horizontal') cbar.set_label(r'$E_y$ [$m_ec\omega/e$]', fontdict=font) ##generate the transparent colorbar cmap = plt.cm.Greys my_cmap = cmap(np.arange(cmap.N)) my_cmap[:, -1] = np.sqrt(np.linspace(0.0, 1, cmap.N)) my_cmap = ListedColormap(my_cmap) den = data['Derived/Number_Density/electron'].data / denunit den = den[:, 600:1799] levels = np.logspace(-3, -1, 40) plt.contourf(X, Y, den.T, levels=levels, cmap=my_cmap) #### manifesting colorbar, changing label and axis properties #### cbar = plt.colorbar(ticks=np.logspace(-3, -1, 5), orientation='horizontal') cbar.set_label(r'$n_e$ [$n_c$]', fontdict=font) plt.xlabel('X [$\mu m$]', fontdict=font) plt.ylabel('Y [$\mu m$]', fontdict=font) plt.xticks(fontsize=20) plt.yticks(fontsize=20) plt.title('Density at ' + str(round(time / 1.0e-15, 6)) + ' fs', fontdict=font) fig = plt.gcf() fig.set_size_inches(12, 24) fig.savefig('./field' + str(n).zfill(4) + '.png', format='png', dpi=160) plt.close("all")
""" # Author: Fabian Pedregosa -- <*****@*****.**> # License: BSD 3 clause print(__doc__) # X is the 10x10 Hilbert matrix X = 1. / (np.arange(1, 11) + np.arange(0, 10)[:, np.newaxis]) y = np.ones(10) ############################################################################### # Compute paths n_alphas = 200 alphas = np.logspace(-10, -2, n_alphas) clf = linear_model.Ridge(fit_intercept=False) coefs = [] for a in alphas: clf.set_params(alpha=a) clf.fit(X, y) coefs.append(clf.coef_) ############################################################################### # Display results ax = plt.gca() ax.plot(alphas, coefs) ax.set_xscale('log')
# plt.figure() # plt.plot(np.arange(1,np.alen(scores)+1),scores,c='c',lw=2,aa=True) # plt.plot(np.argmax(scores)+1,np.amax(scores),'v') # plt.title('Accuracy KNN') # plt.xlabel('K') # plt.ylabel('Accuracy') # plt.grid() # plt.show() #%% SVM Classifier # Params C, kernel, degree, params of kernel if (SVM_cl == 1): # Parameters for the validation C = np.logspace(-3, 5, 30) p = np.arange(1, 5) gamma = np.array([0.125, 0.25, 0.5, 1, 2, 4]) / 200 # Create dictionaries with the Variables for the validation ! # We create the dictinary for every TYPE of SVM we are gonna use. param_grid_linear = dict() param_grid_linear.update({'kernel': ['linear']}) param_grid_linear.update({'C': C}) param_grid_pol = dict() param_grid_pol.update({'kernel': ['poly']}) param_grid_pol.update({'C': C}) param_grid_pol.update({'degree': p}) param_grid_rbf = dict()
# Removing low variance inputs due to initial error from fitting logistic model from sklearn.feature_selection import VarianceThreshold def variance_threshold_selector(data, threshold=0.5): # https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.VarianceThreshold.html # https://stackoverflow.com/a/39813304/1956309 selector = VarianceThreshold(threshold) selector.fit(data) return data[data.columns[selector.get_support(indices=True)]] # min_variance = .9 * (1 - .9) # You can play here with different values. min_variance = 0.01 low_variance_removed = variance_threshold_selector(X_train, min_variance) print('columns removed:', low_variance_removed.columns) X_train_var = low_variance_removed param_grid={"C":np.logspace(-3,3,7), "penalty":["l1, l2"], "max_iter": [1000, 2000]} #### Option 1. Using sklearn #### LR_cv = GridSearchCV(LogisticRegression(),grid,cv=5) LR_cv = LR_cv.fit(X_train_var,Y_train) ## Initial ERROR: Check levels of inputs - Likely to have 99%~ in one level for an input # View best hyperparameters print('Best C:', LR_cv.best_estimator_.get_params()) # Using abs(coef) to get feature importance feat_labels = X_train_var.columns.values importances = LR_cv.best_estimator_.coef_[0] indices = np.argsort(importances) lr_importance = pd.DataFrame()
def main(out_dir="results"): model_metrics = metrics.BinaryMetricsRecorder(domains=skip_domains) stupid_metrics = metrics.BinaryMetricsRecorder(domains=skip_domains) human_metrics = metrics.BinaryMetricsRecorder(domains=skip_domains) # parse the risk of bias data from Cochrane print "risk of bias data!" data = riskofbias.RoBData(test_mode=False) data.generate_data(doc_level_only=False, skip_small_files=True) # filter the data by Document filtered_data = riskofbias.DocFilter(data) # get the uids of the desired training set # (for this experiment those which appear in only one review) uids_all = filtered_data.get_ids( pmid_instance=0) # those with 1 or more assessment (i.e. all) uids_double_assessed = filtered_data.get_ids( pmid_instance=1 ) # those with 2 (or more) assessments (to hide for training) uids_train = np.setdiff1d(uids_all, uids_double_assessed) ######################## # sentence prediction # ######################## # The first stage is to make the sentence prediction model using the # training data set # print "First, making sentence prediction model" sent_docs = riskofbias.MultiTaskSentFilter(data) uids = np.array(sent_docs.get_ids()) no_studies = len(uids) # sentence tokenization sent_vec = modhashvec.ModularVectorizer( norm=None, non_negative=True, binary=True, ngram_range=(1, 2), n_features=2**26) # since multitask + bigrams = huge feature space sent_vec.builder_clear() # add base features; this effectively generates the shared feature # space (i.e., features for all domains) sent_vec.builder_add_interaction_features(sent_docs.X(uids_train, domain=skip_domains), low=7) # now we add interaction features, which cross the domain with the # tokens. specifically, the X_i method returns token tuples crossing # every term with every domain, and the vectorizer (an instance of # ModularVectorizer) deals with inserting the actual interaction tokens # that cross domains with tokens. domain_interaction_tuples = sent_docs.X_i(uids_train, domain=skip_domains) sent_vec.builder_add_interaction_features(domain_interaction_tuples, low=2) # setup sentence classifier tuned_parameters = { "alpha": np.logspace(-4, -1, 5), "class_weight": [{ 1: i, -1: 1 } for i in np.logspace(-1, 1, 5)] } # bcw: are we sure we want to do 'recall' here, and not (e.g.) F1? sent_clf = GridSearchCV(SGDClassifier(loss="hinge", penalty="L2"), tuned_parameters, scoring='recall') X_train = sent_vec.builder_fit_transform() y_train = sent_docs.y(uids_train, domain=skip_domains) sent_clf.fit(X_train, y_train) del X_train, y_train # we only need the best performing sent_clf = sent_clf.best_estimator_ # now we have our multi-task sentence prediction model, # which we'll use to make sentence-level predictions for # documents. ######################## # document prediction # ######################## # we need different test ids for each domain # (since we're testing on studies with more than one RoB assessment for *each domain*) docs = riskofbias.MultiTaskDocFilter(data) X_train_d = docs.Xyi(uids_train, domain=skip_domains) tuned_parameters = {"alpha": np.logspace(-2, 2, 10)} clf = GridSearchCV(SGDClassifier(loss="hinge", penalty="L2"), tuned_parameters, scoring='f1') # bcw: note that I've amended the y method to # return interactions as well (i.e., domain strs) y_train = docs.y(uids_train, domain=skip_domains) # add interaction features (here both domain + high prob sentences) interactions = {domain: [] for domain in skip_domains} high_prob_sents = [] interaction_domains = [] for doc_index, (doc_text, doc_domain) in enumerate(X_train_d): doc_sents = sent_tokenizer.tokenize(doc_text) doc_domains = [doc_domain] * len(doc_sents) # interactions doc_X_i = izip(doc_sents, doc_domains) # sent_vec is from above. sent_vec.builder_clear() sent_vec.builder_add_interaction_features( doc_sents) # add base features sent_vec.builder_add_interaction_features( doc_X_i) # then add interactions doc_sents_X = sent_vec.builder_transform() ## bcw -- shouldn't we use the *true* sentence labels # here, rather than predictions???? # sent_clf was trained above doc_sents_preds = sent_clf.predict(doc_sents_X) high_prob_sents.append(" ".join([ sent for sent, sent_pred in zip(doc_sents, doc_sents_preds) if sent_pred == 1 ])) interaction_domains.append("-s-" + doc_domain) if doc_index % 10 == 0: print doc_index # from collections import Counter # prob_count = Counter(list(doc_sents_preds)) # print prob_count # for domain in riskofbias.CORE_DOMAINS: # if domain == doc_domain: # interactions[domain].append(True) # else: # interactions[domain].append(False) vec = modhashvec.ModularVectorizer( norm=None, non_negative=True, binary=True, ngram_range=(1, 2), n_features=2**26) # since multitask + bigrams = huge feature space vec.builder_clear() vec.builder_add_docs(docs.X(uids_train, domain=skip_domains), low=7) # add base features vec.builder_add_docs(docs.Xyi(uids_train, domain=skip_domains), low=2) # add domain interactions # removed X_train_d since already been through the generator! (needed reset) vec.builder_add_docs(izip(high_prob_sents, interaction_domains), low=2) # then add sentence interaction terms X_train = vec.builder_fit_transform() clf.fit(X_train, y_train) with open('mt_mt_production_models3.pck', 'wb') as f: pickle.dump((sent_clf, clf.best_estimator_), f) quit() ############ # testing # ############ # Test on each domain in turn for domain in skip_domains: uids_domain_all = filtered_data.get_ids(pmid_instance=0, filter_domain=domain) uids_domain_double_assessed = filtered_data.get_ids( pmid_instance=1, filter_domain=domain) uids_test_domain = np.intersect1d(uids_domain_all, uids_domain_double_assessed) X_test_d, y_test = filtered_data.Xy(uids_test_domain, domain=domain, pmid_instance=0) X_ignore, y_human = filtered_data.Xy(uids_test_domain, domain=domain, pmid_instance=1) X_ignore = None # don't need this bit # # get high prob sents from test data # high_prob_sents = [] for doc_text in X_test_d: doc_sents = sent_tokenizer.tokenize(doc_text) # bcw -- I think this (using doc_domain and not # domain) was the bug before! #doc_domains = [doc_domain] * len(doc_sents) doc_domains = [domain] * len(doc_sents) doc_X_i = izip(doc_sents, doc_domains) sent_vec.builder_clear() sent_vec.builder_add_interaction_features( doc_sents) # add base features sent_vec.builder_add_interaction_features( doc_X_i) # then add interactions doc_sents_X = sent_vec.builder_transform() doc_sents_preds = sent_clf.predict(doc_sents_X) high_prob_sents.append(" ".join([ sent for sent, sent_pred in zip(doc_sents, doc_sents_preds) if sent_pred == 1 ])) sent_domain_interactions = ["-s-" + domain] * len(high_prob_sents) domain_interactions = [domain] * len(high_prob_sents) print print "domain: %s" % domain print "High prob sents:" print '\n'.join(high_prob_sents) # build up test vector vec.builder_clear() vec.builder_add_docs(X_test_d) # add base features vec.builder_add_docs(izip(X_test_d, domain_interactions)) # add interactions vec.builder_add_docs( izip(high_prob_sents, sent_domain_interactions)) # sentence interactions X_test = vec.builder_transform() y_preds = clf.predict(X_test) model_metrics.add_preds_test(y_preds, y_test, domain=domain) human_metrics.add_preds_test(y_human, y_test, domain=domain) stupid_metrics.add_preds_test([1] * len(y_test), y_test, domain=domain) model_metrics.save_csv( os.path.join(out_dir, outputnames.filename(label="model"))) stupid_metrics.save_csv( os.path.join(out_dir, outputnames.filename(label="stupid-baseline"))) human_metrics.save_csv( os.path.join(out_dir, outputnames.filename(label="human-performance")))
from sklearn.datasets.samples_generator import make_blobs from sklearn.neighbors import LSHForest from sklearn.neighbors import NearestNeighbors import matplotlib.pyplot as plt # Parameters of the study n_samples_min = int(1e3) n_samples_max = int(1e5) n_features = 100 n_centers = 100 n_queries = 100 n_steps = 6 n_iter = 5 # Initialize the range of `n_samples` n_samples_values = np.logspace(np.log10(n_samples_min), np.log10(n_samples_max), n_steps).astype(np.int) # Generate some structured data rng = np.random.RandomState(42) all_data, _ = make_blobs(n_samples=n_samples_max + n_queries, n_features=n_features, centers=n_centers, shuffle=True, random_state=0) queries = all_data[:n_queries] index_data = all_data[n_queries:] # Metrics to collect for the plots average_times_exact = [] average_times_approx = [] std_times_approx = []
def project_prof_beam(tht, M, z, theta, nu, f_beam): disc_fac = np.sqrt(2) l0 = 30000. NNR = 100 NNR2 = 3 * NNR #fwhm = beam #fwhm *= np.pi / (180.*60.) #sigmaBeam = fwhm / np.sqrt(8.*np.log(2.)) #P0, rho0, x_f = theta2 #fstar = fstar_func(M) theta_r = [theta[0], theta[1], theta[2], theta[3]] theta_p = [theta[0], theta[1], theta[2], theta[4]] AngDis = AngDist(z) rvir = r200(M, z) / kpc_cgs / 1e3 #in MPC c = con(M, z) r_ext = AngDis * np.arctan(np.radians(tht / 60.)) r_ext2 = AngDis * np.arctan(np.radians(tht * disc_fac / 60.)) rvir_arcmin = 180. * 60. / np.pi * np.tan(rvir / AngDis) #arcmin rvir_ext = AngDis * np.arctan(np.radians(rvir_arcmin / 60.)) rvir_ext2 = AngDis * np.arctan(np.radians(rvir_arcmin * disc_fac / 60.)) rad = np.logspace(-3, 1, 200) #in MPC rad2 = np.logspace(-3, 1, 200) #in MPC radlim = r_ext radlim2 = r_ext2 dtht = np.arctan(radlim / AngDis) / NNR # rads dtht2 = np.arctan(radlim2 / AngDis) / NNR # rads thta = (np.arange(NNR) + 1.) * dtht thta2 = (np.arange(NNR) + 1.) * dtht2 thta_smooth = (np.arange(NNR2) + 1.) * dtht thta2_smooth = (np.arange(NNR2) + 1.) * dtht2 thta_smooth = thta_smooth[:, None] thta2_smooth = thta2_smooth[:, None] rint = np.sqrt(rad**2 + thta_smooth**2 * AngDis**2) rint2 = np.sqrt(rad2**2 + thta2_smooth**2 * AngDis**2) rho2D = 2 * np.trapz(rho(rint, M, z, theta_r), x=rad * kpc_cgs, axis=1) * 1e3 rho2D2 = 2 * np.trapz(rho(rint2, M, z, theta_r), x=rad2 * kpc_cgs, axis=1) * 1e3 Pth2D = 2 * np.trapz(Pth(rint, M, z, theta_p), x=rad * kpc_cgs, axis=1) * 1e3 Pth2D2 = 2 * np.trapz(Pth(rint2, M, z, theta_p), x=rad2 * kpc_cgs, axis=1) * 1e3 thta_smooth = (np.arange(NNR2) + 1.) * dtht thta = thta[:, None, None] thta2_smooth = (np.arange(NNR2) + 1.) * dtht2 thta2 = thta2[:, None, None] phi = np.linspace(0., 2 * np.pi, 100) phi = phi[None, None, :] thta_smooth = thta_smooth[None, :, None] thta2_smooth = thta2_smooth[None, :, None] rho2D = rho2D[None, :, None] rho2D2 = rho2D2[None, :, None] Pth2D = Pth2D[None, :, None] Pth2D2 = Pth2D2[None, :, None] rho2D_beam0 = np.trapz(thta_smooth * rho2D * f_beam( np.sqrt(thta**2 + thta_smooth**2 - 2 * thta * thta_smooth * np.cos(phi))), x=phi, axis=2) rho2D2_beam0 = np.trapz(thta2_smooth * rho2D2 * f_beam( np.sqrt(thta2**2 + thta2_smooth**2 - 2 * thta2 * thta2_smooth * np.cos(phi))), x=phi, axis=2) Pth2D_beam0 = np.trapz(thta_smooth * Pth2D * f_beam( np.sqrt(thta**2 + thta_smooth**2 - 2 * thta * thta_smooth * np.cos(phi))), x=phi, axis=2) Pth2D2_beam0 = np.trapz(thta2_smooth * Pth2D2 * f_beam( np.sqrt(thta2**2 + thta2_smooth**2 - 2 * thta2 * thta2_smooth * np.cos(phi))), x=phi, axis=2) thta_smooth = (np.arange(NNR2) + 1.) * dtht thta2_smooth = (np.arange(NNR2) + 1.) * dtht2 rho2D_beam = np.trapz(rho2D_beam0, x=thta_smooth, axis=1) rho2D2_beam = np.trapz(rho2D2_beam0, x=thta2_smooth, axis=1) Pth2D_beam = np.trapz(Pth2D_beam0, x=thta_smooth, axis=1) Pth2D2_beam = np.trapz(Pth2D2_beam0, x=thta2_smooth, axis=1) thta = (np.arange(NNR) + 1.) * dtht thta2 = (np.arange(NNR) + 1.) * dtht2 area_fac = 2.0 * np.pi * dtht * np.sum(thta) sig = 2.0 * np.pi * dtht * np.sum(thta * rho2D_beam) sig2 = 2.0 * np.pi * dtht2 * np.sum(thta2 * rho2D2_beam) sig_all_beam = (2 * sig - sig2) * v_rms * ST_CGS * TCMB * 1e6 * ( (1. + XH) / 2) / MP_CGS sig_p = 2.0 * np.pi * dtht * np.sum(thta * Pth2D_beam) sig2_p = 2.0 * np.pi * dtht2 * np.sum(thta2 * Pth2D2_beam) sig_all_p_beam = fnu(nu) * (2 * sig_p - sig2_p) * ST_CGS / ( ME_CGS * C_CGS**2) * TCMB * 1e6 * ( (2. + 2. * XH) / (3. + 5. * XH)) #/ area_fac # muK #print(sig_all_beam*3282.8 * 60.**2, sig_all_p_beam*3282.8 * 60.**2) return sig_all_beam, sig_all_p_beam
# In[24]: #findObservations([(630,)]) #remove25(obs) #averageFlux(obs[0], 1, 30) longflare = [] for floop in range(0, flarecycles): flareone = flaring(-1, flareperiod, amplitude=0.3) flareone = flareone[0:1440] positiveflare = [abs(x) for x in flareone] longflare.extend(positiveflare) # In[25]: PrangeLoop = np.logspace(-2.5, 2, freqlength) FrangeLoop = [(1 / x) for x in PrangeLoop] # In[26]: # reset results file with open(inFile, 'w') as f: f.write( 'fullmaglist \n\n periodlist \n\n measuredperiodlist \n\n siglist \n\n powerlist \n\n listnumberlist \n\n end of file' ) # In[57]: results = [] fullmeasuredPeriod = [] fullPeriod = []
def project_prof_beam_rho_dm(tht, M, z, f_beam): disc_fac = np.sqrt(2) l0 = 30000. NNR = 100 NNR2 = 3. * NNR #fwhm = beam #arcmin #fwhm *= np.pi / (180.*60.) #rad #sigmaBeam = fwhm / np.sqrt(8.*np.log(2.)) rvir = r200(M, z) / kpc_cgs / 1e3 #Mpc c = con(M, z) drint = 1e-3 * (kpc_cgs * 1e3) XH = 0.76 AngDis = AngDist(z) r_ext = AngDis * np.arctan(np.radians(tht / 60.)) r_ext2 = AngDis * np.arctan(np.radians(tht * disc_fac / 60.)) rad = np.logspace(-3, 1, 200) #Mpc rad2 = np.logspace(-3, 1, 200) radlim = r_ext radlim2 = r_ext2 dtht = np.arctan(radlim / AngDis) / NNR # rads dtht2 = np.arctan(radlim2 / AngDis) / NNR # rads thta = (np.arange(NNR) + 1.) * dtht thta2 = (np.arange(NNR) + 1.) * dtht2 thta_smooth = (np.arange(NNR2) + 1.) * dtht thta2_smooth = (np.arange(NNR2) + 1.) * dtht2 thta_smooth = thta_smooth[:, None] thta2_smooth = thta2_smooth[:, None] rint = np.sqrt(rad**2 + thta_smooth**2 * AngDis**2) rint2 = np.sqrt(rad2**2 + thta2_smooth**2 * AngDis**2) rho2D = 2 * np.trapz(rho_dm(rint, M, z), x=rad * kpc_cgs, axis=1) * 1e3 rho2D2 = 2 * np.trapz(rho_dm(rint2, M, z), x=rad2 * kpc_cgs, axis=1) * 1e3 thta_smooth = (np.arange(NNR2) + 1.) * dtht thta = thta[:, None, None] thta2_smooth = (np.arange(NNR2) + 1.) * dtht2 thta2 = thta2[:, None, None] phi = np.linspace(0., 2 * np.pi, 100) phi = phi[None, None, :] thta_smooth = thta_smooth[None, :, None] thta2_smooth = thta2_smooth[None, :, None] rho2D = rho2D[None, :, None] rho2D2 = rho2D2[None, :, None] rho2D_beam0 = np.trapz(thta_smooth * rho2D * f_beam( np.sqrt(thta**2 + thta_smooth**2 - 2 * thta * thta_smooth * np.cos(phi))), x=phi, axis=2) rho2D2_beam0 = np.trapz(thta2_smooth * rho2D2 * f_beam( np.sqrt(thta2**2 + thta2_smooth**2 - 2 * thta2 * thta2_smooth * np.cos(phi))), x=phi, axis=2) thta_smooth = (np.arange(NNR2) + 1.) * dtht thta2_smooth = (np.arange(NNR2) + 1.) * dtht2 rho2D_beam = np.trapz(rho2D_beam0, x=thta_smooth, axis=1) rho2D2_beam = np.trapz(rho2D2_beam0, x=thta2_smooth, axis=1) thta = (np.arange(NNR) + 1.) * dtht thta2 = (np.arange(NNR) + 1.) * dtht2 area_fac = 2.0 * np.pi * dtht * np.sum(thta) sig = 2.0 * np.pi * dtht * np.sum(thta * rho2D_beam) sig2 = 2.0 * np.pi * dtht2 * np.sum(thta2 * rho2D2_beam) sig_all_beam = (2 * sig - sig2) * v_rms * ST_CGS * TCMB * 1e6 * ( (2. + 2. * XH) / (3. + 5. * XH)) / MP_CGS #/ (np.pi * np.radians(tht/60.)**2) return sig_all_beam
def project_prof_beam_sim_y(tht, M, z, theta_pth, beam): theta_sim_pth = theta_pth disc_fac = np.sqrt(2) l0 = 30000. NNR = 100 NNR2 = 3.5 * NNR fwhm = beam fwhm *= np.pi / (180. * 60.) sigmaBeam = fwhm / np.sqrt(8. * np.log(2.)) drint = 1e-3 * (kpc_cgs * 1e3) XH = 0.76 AngDis = AngDist(z) m_med = np.median(M) #rvir = r200(m_med,z)/kpc_cgs/1e3 #Mpc r_ext = AngDis * np.arctan(np.radians(tht / 60.)) r_ext2 = AngDis * np.arctan(np.radians(tht * disc_fac / 60.)) rad = np.logspace(-3, 1, 200) #Mpc rad2 = np.logspace(-3, 1, 200) radlim = r_ext radlim2 = r_ext2 dtht = np.arctan(radlim / AngDis) / NNR # rads dtht2 = np.arctan(radlim2 / AngDis) / NNR # rads thta = (np.arange(NNR) + 1.) * dtht thta2 = (np.arange(NNR) + 1.) * dtht2 thta_smooth = (np.arange(NNR2) + 1.) * dtht thta2_smooth = (np.arange(NNR2) + 1.) * dtht2 thta_smooth = thta_smooth[:, None] thta2_smooth = thta2_smooth[:, None] rint = np.sqrt(rad**2 + thta_smooth**2 * AngDis**2) rint2 = np.sqrt(rad2**2 + thta2_smooth**2 * AngDis**2) Pth2D = 2 * np.trapz( Pth_gnfw(rint, M, z, theta_sim_pth), x=rad * kpc_cgs, axis=1) * 1e3 Pth2D2 = 2 * np.trapz( Pth_gnfw(rint2, M, z, theta_sim_pth), x=rad2 * kpc_cgs, axis=1) * 1e3 thta_smooth = (np.arange(NNR2) + 1.) * dtht thta = thta[:, None] thta2_smooth = (np.arange(NNR2) + 1.) * dtht2 thta2 = thta2[:, None] Pth2D_beam = np.trapz(thta_smooth * Pth2D * np.exp(-0.5 * thta_smooth**2 / sigmaBeam**2) * special.iv(0, thta_smooth * thta / sigmaBeam**2), x=thta_smooth, axis=1) Pth2D2_beam = np.trapz(thta2_smooth * Pth2D2 * np.exp(-0.5 * thta2_smooth**2 / sigmaBeam**2) * special.iv(0, thta2_smooth * thta2 / sigmaBeam**2), x=thta2_smooth, axis=1) thta = (np.arange(NNR) + 1.) * dtht thta2 = (np.arange(NNR) + 1.) * dtht2 area_fac = 2.0 * np.pi * dtht * np.sum(thta) Pth2D_beam *= np.exp(-0.5 * thta**2 / sigmaBeam**2) / sigmaBeam**2 Pth2D2_beam *= np.exp(-0.5 * thta2**2 / sigmaBeam**2) / sigmaBeam**2 sig_p = 2.0 * np.pi * dtht * np.sum(thta * Pth2D_beam) sig2_p = 2.0 * np.pi * dtht2 * np.sum(thta2 * Pth2D2_beam) sig_all_p_beam = (2 * sig_p - sig2_p) * ST_CGS / (ME_CGS * C_CGS**2) * ( (2. + 2. * XH) / (3. + 5. * XH)) #/ area_fac # muK return sig_all_p_beam
def evaluate_tau_dt_mu_s( nu, z, mu_s, L_disk, xi_dt, epsilon_dt, R_dt, r, u_size=100, phi_re=phi_to_integrate, ): r"""Evaluates the gamma-gamma absorption produced by a ring dust torus for the case of photon moving at an angle to the jet Parameters ---------- nu : :class:`~astropy.units.Quantity` array of frequencies, in Hz, to compute the sed **note** these are observed frequencies (observer frame) z : float redshift of the source mu_s : float cosine of the angle between the blob motion and the jet axis L_disk : :class:`~astropy.units.Quantity` Luminosity of the disk whose radiation is being reprocessed by the BLR xi_dt : float fraction of the disk radiation reprocessed by the disk epsilon_dt : string peak (dimensionless) energy of the black body radiated by the torus R_dt : :class:`~astropy.units.Quantity` radius of the ting-like torus r : :class:`~astropy.units.Quantity` distance between the dust torus and the blob u_size : int size of the array of distances from the photon origin to integrate over phi_re : :class:`~numpy.ndarray` arrays of azimuth angles of the dust torus to integrate over Returns ------- :class:`~astropy.units.Quantity` array of the SED values corresponding to each frequency """ # conversions epsilon_1 = nu_to_epsilon_prime(nu, z) # multidimensional integration # here uu is the distance that the photon traversed uu = np.logspace(-5, 5, u_size) * r _phi_re, _u, _epsilon_1 = axes_reshaper(phi_re, uu, epsilon_1) # distance between soft photon and gamma ray x = x_re_ring_mu_s(R_dt, r, _phi_re, _u, mu_s) # convert the phi angles of the ring into the actual phi angles # of the soft photon catching up with the gamma ray _phi, _mu = phi_mu_re_ring(R_dt, r, _phi_re, _u, mu_s) _cos_psi = cos_psi(mu_s, _mu, _phi) s = _epsilon_1 * epsilon_dt * (1 - _cos_psi) / 2 integrand = (1 - _cos_psi) / x**2 * sigma(s) # integrate integral_phi = np.trapz(integrand, phi_re, axis=0) integral = np.trapz(integral_phi, uu, axis=0) prefactor = (L_disk * xi_dt) / (8 * np.pi**2 * epsilon_dt * m_e * c**3) return (prefactor * integral).to_value("")
def project_prof_beam_sim_pth(tht, M, z, theta_pth, nu, f_beam): theta_sim_pth = theta_pth disc_fac = np.sqrt(2) l0 = 30000. NNR = 100 NNR2 = 3.5 * NNR #fwhm = beam #fwhm *= np.pi / (180.*60.) #sigmaBeam = fwhm / np.sqrt(8.*np.log(2.)) drint = 1e-3 * (kpc_cgs * 1e3) XH = 0.76 AngDis = AngDist(z) m_med = np.median(M) #rvir = r200(m_med,z)/kpc_cgs/1e3 #Mpc r_ext = AngDis * np.arctan(np.radians(tht / 60.)) r_ext2 = AngDis * np.arctan(np.radians(tht * disc_fac / 60.)) #rvir_arcmin = 180.*60./np.pi * np.tan(rvir/AngDis) #arcmin #rvir_ext = AngDis*np.arctan(np.radians(2*rvir_arcmin/60.)) rad = np.logspace(-3, 1, 200) #Mpc rad2 = np.logspace(-3, 1, 200) radlim = r_ext radlim2 = r_ext2 dtht = np.arctan(radlim / AngDis) / NNR # rads dtht2 = np.arctan(radlim2 / AngDis) / NNR # rads thta = (np.arange(NNR) + 1.) * dtht thta2 = (np.arange(NNR) + 1.) * dtht2 thta_smooth = (np.arange(NNR2) + 1.) * dtht thta2_smooth = (np.arange(NNR2) + 1.) * dtht2 thta_smooth = thta_smooth[:, None] thta2_smooth = thta2_smooth[:, None] rint = np.sqrt(rad**2 + thta_smooth**2 * AngDis**2) rint2 = np.sqrt(rad2**2 + thta2_smooth**2 * AngDis**2) Pth2D = 2 * np.trapz( Pth_gnfw(rint, M, z, theta_sim_pth), x=rad * kpc_cgs, axis=1) * 1e3 Pth2D2 = 2 * np.trapz( Pth_gnfw(rint2, M, z, theta_sim_pth), x=rad2 * kpc_cgs, axis=1) * 1e3 thta_smooth = (np.arange(NNR2) + 1.) * dtht thta = thta[:, None, None] thta2_smooth = (np.arange(NNR2) + 1.) * dtht2 thta2 = thta2[:, None, None] phi = np.linspace(0., 2 * np.pi, 50) phi = phi[None, None, :] thta_smooth = thta_smooth[None, :, None] thta2_smooth = thta2_smooth[None, :, None] Pth2D = Pth2D[None, :, None] Pth2D2 = Pth2D2[None, :, None] Pth2D_beam0 = np.trapz(thta_smooth * Pth2D * f_beam( np.sqrt(thta**2 + thta_smooth**2 - 2 * thta * thta_smooth * np.cos(phi))), x=phi, axis=2) Pth2D2_beam0 = np.trapz(thta2_smooth * Pth2D2 * f_beam( np.sqrt(thta2**2 + thta2_smooth**2 - 2 * thta2 * thta2_smooth * np.cos(phi))), x=phi, axis=2) thta_smooth = (np.arange(NNR2) + 1.) * dtht thta2_smooth = (np.arange(NNR2) + 1.) * dtht2 Pth2D_beam = np.trapz(Pth2D_beam0, x=thta_smooth, axis=1) Pth2D2_beam = np.trapz(Pth2D2_beam0, x=thta2_smooth, axis=1) thta = (np.arange(NNR) + 1.) * dtht thta2 = (np.arange(NNR) + 1.) * dtht2 area_fac = 2.0 * np.pi * dtht * np.sum(thta) sig_p = 2.0 * np.pi * dtht * np.sum(thta * Pth2D_beam) sig2_p = 2.0 * np.pi * dtht2 * np.sum(thta2 * Pth2D2_beam) sig_all_p_beam = fnu(nu) * (2 * sig_p - sig2_p) * ST_CGS / ( ME_CGS * C_CGS**2) * TCMB * 1e6 * ( (2. + 2. * XH) / (3. + 5. * XH)) #/ area_fac # muK return sig_all_p_beam