def build_plot(self, usePCA): import pylab self.normaliser = MorphologyForRenderingOperator(self.morph, usePCA=usePCA) # Find the Point that is the furthest distance way from # the centre when the cell is centred and rotated: rotator = lambda s: self.normaliser(s.get_distal_npa3()) rotated_section_dict = DictBuilderSectionVisitorHomo(morph=self.morph, functor=rotator)() # Add in the parents manually: dummy_scetion = self.morph._dummysection rotated_section_dict[dummy_scetion] = self.normaliser(dummy_scetion.get_distal_npa3()) max_axis = max([numpy.linalg.norm(rotated_section_pt) for rotated_section_pt in rotated_section_dict.values()]) plot_lims = (max_axis * -1.1, max_axis * 1.1) max_x = max([numpy.fabs(rotated_section_pt[0]) for rotated_section_pt in rotated_section_dict.values()]) max_y = max([numpy.fabs(rotated_section_pt[1]) for rotated_section_pt in rotated_section_dict.values()]) max_z = max([numpy.fabs(rotated_section_pt[2]) for rotated_section_pt in rotated_section_dict.values()]) maxes = [max_x, max_y, max_z] # allMax = max(maxes) for i in self.plot_views: maxes[i] = maxes[i] + 0.2 * max([max_x, max_y, max_z]) self.fig = pylab.figure(**self.fig_kwargs) # figsize=(7, 7)) self.fig.subplots_adjust(left=0.05, top=0.95, right=0.95, bottom=0.05, wspace=0.15, hspace=0.15) self.subplots = {} for i in self.plot_views: self.subplots[i] = self.build_draw_sub_plot(rotated_section_dict, self.fig, i, plot_lims)
def create_mock_sample(data,dfc,trueVsolar,spiral=False): if spiral: #Read spiral vlos field spvlos= galpy_simulations.vlos('../sim/bar_rect_alpha0.015_hivres.sav') potscale= 0.85 xgrid= numpy.linspace(_RCXMIN,_RCXMAX-_RCDX,19) ygrid= numpy.linspace(_RCYMIN,_RCYMAX-_RCDX,19) ndata= len(data) mockvel= numpy.empty(ndata) dphil= data['RC_GALPHI']+data['GLON']/180.*numpy.pi cosdphil= numpy.cos(dphil) sindphil= numpy.sin(dphil) cosl= numpy.cos(data['GLON']/180.*numpy.pi) sinl= numpy.sin(data['GLON']/180.*numpy.pi) for ii in range(ndata): vrvt= dfc.sampleVRVT(data['RC_GALR'][ii]/8.,n=1.) mockvel[ii]= -vrvt[0,0]*cosdphil[ii]\ +vrvt[0,1]*sindphil[ii]\ -10.5*cosl[ii]/220.\ -(1.+trueVsolar)*sinl[ii] if spiral: #Find which pixel this sample is in pixIndxX= numpy.argmin(numpy.fabs(data['RC_GALR'][ii]\ *numpy.cos(data['RC_GALPHI'][ii])\ -xgrid)) pixIndxY= numpy.argmin(numpy.fabs(data['RC_GALR'][ii]\ *numpy.sin(data['RC_GALPHI'][ii])\ -ygrid)) spiraldev= spvlos[pixIndxX,pixIndxY] mockvel[ii]+= spiraldev*potscale data['VHELIO_AVG']= mockvel*220. return data
def getStepsize(self, mix_den, ht): mix_den_fil = np.fabs(mix_den) > 1.E-7 a = ht[mix_den_fil] / mix_den[mix_den_fil] b = 1.0 + a b_fil = np.fabs(b) > 1.E-7 w = self.w sl = w * ht[b_fil] / b[b_fil] s11 = sl.sum() s0 = (w * ht).sum() step, oldstep = 0., 0. for i in range(50): grad1, grad2 = 0., 0. for j in range(self.n): a = mix_den[j] + step * ht[j] if math.fabs(a) > 1.E-7: b = ht[j] / a grad1 = grad1 + w * b grad2 = grad2 - w * b * b if math.fabs(grad2) > 1.E-10: step = step - grad1 / grad2 if oldstep > 1.0 and step > oldstep: step = 1. break if grad1 < 1.E-7: break oldstep = step if step > 1.0: return 1.0 return step
def ActionNodeStd(self, o): print print repr(o) print '-' * len(repr(o)) ann = self.annotations.annotations[o] print ann # Lets go symmetrical, about 0: vmin = ann.val_min.float_in_si() vmax = ann.val_max.float_in_si() ext = max( [np.abs(vmin),np.abs(vmax) ] ) if ext != 0.0: #Include some padding: upscaling_pow = int( np.ceil( np.log2(ext ) ) ) else: upscaling_pow = 0 # Lets remap the limits: upscaling_val = 2 ** (-upscaling_pow) vmin_scaled = vmin * upscaling_val vmax_scaled = vmax * upscaling_val print 'vMin, vMax', vmin, vmax print 'Scaling:', '2**', upscaling_pow, ' ->', upscaling_val print 'vMin_scaled, vMax_scaled', vmin_scaled, vmax_scaled ann.fixed_scaling_power = upscaling_pow assert 0.1 < max( [np.fabs(vmin_scaled), np.fabs(vmax_scaled) ] ) <= 1.0 or vmin_scaled == vmax_scaled == 0.0
def setup_sampling(self, gradient, soln, linear_randomization, quadratic_coef): self.accept_beta, self.total_beta = 0, 0 random_direction = 2 * quadratic_coef * soln + linear_randomization negative_subgrad = gradient + random_direction self.active_set = (soln != 0) self.initial_parameters = np.empty(1, self.dtype) self.initial_parameters['signs'] = np.sign(soln[self.active_set]) abs_l1part = np.fabs(soln[self.active_set]) l1norm_ = abs_l1part.sum() self.initial_parameters['simplex'] = (abs_l1part / l1norm_)[:-1] subgrad = -negative_subgrad[self.inactive_set] supnorm_ = np.fabs(negative_subgrad).max() if self.lagrange is not None: self.initial_parameters['cube'] = subgrad / self.lagrange self.initial_parameters['scale'] = l1norm_ else: if self._active_set.sum() != self.shape: self.initial_parameters['cube'] = subgrad / supnorm_ self.initial_parameters['scale'] = supnorm_ if self.lagrange is None: raise NotImplementedError("only lagrange form is implemented") return soln[self.active_set], subgrad
def _calc_shift_ranges(self, x_shift, y_shift): '''Calculate shift indices for input and output arrays. ''' LOGGER.debug("Calculating shift ranges.") # width of the portion to be moved width = self._img_shape[1] - int(np.fabs(x_shift)) # height of the portion to be moved height = self._img_shape[0] - int(np.fabs(y_shift)) # Calculate the corner indices of the area to be moved if x_shift < 0: n_x1, n_x2 = 0, width o_x1, o_x2 = -1*x_shift, -1*x_shift+width else: n_x1, n_x2 = x_shift, x_shift+width o_x1, o_x2 = 0, width if y_shift < 0: n_y1, n_y2 = 0, height o_y1, o_y2 = -1*y_shift, -1*y_shift+height else: n_y1, n_y2 = y_shift, y_shift+height o_y1, o_y2 = 0, height output_ranges = ((n_x1, n_x2), (n_y1, n_y2)) input_ranges = ((o_x1, o_x2), (o_y1, o_y2)) return (output_ranges[0], output_ranges[1], input_ranges[0], input_ranges[1])
def _findUSpace(self): """Find independent U components with respect to invariant rotations. """ n = len(self.invariants) R6zall = numpy.tile(-numpy.identity(6, dtype=float), (n, 1)) R6zall_iter = numpy.split(R6zall, n, axis=0) i6kl = ((0, (0, 0)), (1, (1, 1)), (2, (2, 2)), (3, (0, 1)), (4, (0, 2)), (5, (1, 2))) for op, R6z in zip(self.invariants, R6zall_iter): R = op.R for j, Ucj in enumerate(self.Ucomponents): Ucj2 = numpy.dot(R, numpy.dot(Ucj, R.T)) for i, kl in i6kl: R6z[i,j] += Ucj2[kl] Usp6 = nullSpace(R6zall) # normalize Usp6 by its maximum component mxcols = numpy.argmax(numpy.fabs(Usp6), axis=1) mxrows = numpy.arange(len(mxcols)) Usp6 /= Usp6[mxrows,mxcols].reshape(-1, 1) Usp6 = numpy.around(Usp6, 2) # normalize again after rounding to get correct signs mxcols = numpy.argmax(numpy.fabs(Usp6), axis=1) Usp6 /= Usp6[mxrows,mxcols].reshape(-1, 1) self.Uspace = numpy.tensordot(Usp6, self.Ucomponents, axes=(1, 0)) self.Uisotropy = (len(self.Uspace) == 1) return
def ransac(kernel, threshold): '''Robustly fit a model to data. >>> x = np.array([1., 2., 3.]) >>> y = np.array([2., 4., 7.]) >>> kernel = TestLinearKernel(x, y) >>> ransac(kernel, 0.1) (2.0, array([0, 1]), 0.10000000000000001) ''' max_iterations = 1000 best_error = float('inf') best_model = None best_inliers = [] i = 0 while i < max_iterations: try: samples = kernel.sampling() except AttributeError: samples = random.sample(range(kernel.num_samples()), kernel.required_samples) models = kernel.fit(samples) for model in models: errors = kernel.evaluate(model) inliers = np.flatnonzero(np.fabs(errors) < threshold) error = np.fabs(errors).clip(0, threshold).sum() if len(inliers) and error < best_error: best_error = error best_model = model best_inliers = inliers max_iterations = min(max_iterations, ransac_max_iterations(kernel, best_inliers, 0.01)) i += 1 return best_model, best_inliers, best_error
def test_actionConservation(): #_____initialize some KKSPot_____ Delta = 1.0 pot = KuzminKutuzovStaeckelPotential(ac=20.,Delta=Delta,normalize=True) #_____initialize an orbit (twice)_____ vxvv = [1.,0.1,1.1,0.01,0.1] o= Orbit(vxvv=vxvv) #_____integrate the orbit with C_____ ts= numpy.linspace(0,101,100) o.integrate(ts,pot,method='leapfrog_c') #_____Setup ActionAngle object and calculate actions (Staeckel approximation)_____ aAS = actionAngleStaeckel(pot=pot,delta=Delta,c=True) jrs,lzs,jzs = aAS(o.R(ts),o.vR(ts),o.vT(ts),o.z(ts),o.vz(ts)) assert numpy.all(numpy.fabs(jrs - jrs[0]) < 10.**-8.), \ 'Radial action is not conserved along orbit.' assert numpy.all(numpy.fabs(lzs - lzs[0]) < 10.**-8.), \ 'Angular momentum is not conserved along orbit.' assert numpy.all(numpy.fabs(jzs - jzs[0]) < 10.**-8.), \ 'Vertical action is not conserved along orbit.' return None
def trazo(ini, fin, ancho): '''Recibe la coordenada de inicio del trazo, el final y el ancho del trazo, devuelve un arreglo con las coordenadas de los puntos que lo conformman. Sólo funciona para trazos rectos. (recibe los puntos más izquierdos, o más abajo del trazo)''' ancho = int(ancho)+1 actual = ini if es_horizontal(ini, fin): paso = np.array([1, 0]) ancho_1 = np.array([0, 1]) rango = int(np.fabs(fin[0]-ini[0]))+1 else: paso = np.array([0, 1]) ancho_1 = np.array([1, 0]) rango = int(np.fabs(fin[1]-ini[1])) trazo = np.zeros([(rango)*(ancho), 2]) for a in range(ancho): for i in range(rango): trazo[i+(rango)*a] = np.array([actual]) actual += paso actual = ini actual += (a+1)*ancho_1 return trazo
def test_orbitIntegrationC(): #_____initialize some KKSPot_____ Delta = 1.0 pot = KuzminKutuzovStaeckelPotential(ac=20.,Delta=Delta,normalize=True) #_____initialize an orbit (twice)_____ vxvv = [1.,0.1,1.1,0.,0.1] o_P= Orbit(vxvv=vxvv) o_C= Orbit(vxvv=vxvv) #_____integrate the orbit with python and C_____ ts= numpy.linspace(0,100,101) o_P.integrate(ts,pot,method='leapfrog') #python o_C.integrate(ts,pot,method='leapfrog_c')#C for ii in range(5): exp3= -1.7 if ii == 0: Python, CC, string, exp1, exp2 = o_P.R(ts) , o_C.R(ts) , 'R' , -5., -10. elif ii == 1: Python, CC, string, exp1, exp2 = o_P.z(ts) , o_C.z(ts) , 'z' , -3.25, -4. elif ii == 2: Python, CC, string, exp1, exp2 = o_P.vR(ts), o_C.vR(ts), 'vR', -3., -10. elif ii == 3: Python, CC, string, exp1, exp2, exp3 = o_P.vz(ts), o_C.vz(ts), 'vz', -3., -4., -1.3 elif ii == 4: Python, CC, string, exp1, exp2 = o_P.vT(ts), o_C.vT(ts), 'vT', -5., -10. rel_diff = numpy.fabs((Python-CC)/CC) < 10.**exp1 abs_diff = (numpy.fabs(Python-CC) < 10.**exp2) * (numpy.fabs(Python) < 10.**exp3) assert numpy.all(rel_diff+abs_diff), \ 'Orbit integration for '+string+' coordinate different in ' + \ 'C and Python implementation.' return None
def weights(self, z): """ Hampel weighting function for the IRLS algorithm The psi function scaled by z Parameters ---------- z : array-like 1d array Returns ------- weights : array weights(z) = 1 for \|z\| <= a weights(z) = a/\|z\| for a < \|z\| <= b weights(z) = a*(c - \|z\|)/(\|z\|*(c-b)) for b < \|z\| <= c weights(z) = 0 for \|z\| > c """ z = np.asarray(z) a = self.a; b = self.b; c = self.c t1, t2, t3 = self._subset(z) v = (t1 + t2 * a/np.fabs(z) + t3 * a*(c-np.fabs(z))/(np.fabs(z)*(c-b))) v[np.where(np.isnan(v))]=1. # for some reason 0 returns a nan? return v
def test_estimateDelta(): #_____initialize some KKSPot_____ Delta = 1.0 pot = KuzminKutuzovStaeckelPotential(ac=20.,Delta=Delta,normalize=True) #_____initialize an orbit (twice)_____ vxvv = [1.,0.1,1.1,0.01,0.1] o= Orbit(vxvv=vxvv) #_____integrate the orbit with C_____ ts= numpy.linspace(0,101,100) o.integrate(ts,pot,method='leapfrog_c') #____estimate Focal length Delta_____ #for each time step individually: deltas_estimate = numpy.zeros(len(ts)) for ii in range(len(ts)): deltas_estimate[ii] = estimateDeltaStaeckel(pot,o.R(ts[ii]),o.z(ts[ii])) assert numpy.all(numpy.fabs(deltas_estimate - Delta) < 10.**-8), \ 'Focal length Delta estimated along the orbit is not constant.' #for all time steps together: delta_estimate = estimateDeltaStaeckel(pot,o.R(ts),o.z(ts)) assert numpy.fabs(delta_estimate - Delta) < 10.**-8, \ 'Focal length Delta estimated from the orbit is not the same as the input focal length.' return None
def truncate_hist1( self, xmin, xmax ): buf = get_buffer_hist1( self ) sbuf = get_err_buffer_hist1( self ) edges, fixed = get_bin_edges_axis( self.GetXaxis(), type=True ) e1 = numpy.fabs(edges[:-1]-xmin)<1.e-9 e2 = numpy.fabs(edges[1:]-xmax)<1.e-9 assert numpy.any( e1 ) and numpy.any( e2 ), 'Invalid new histogram limits' i1 = numpy.nonzero( e1 )[0][0] i2 = numpy.nonzero( e2 )[0][-1]+1 if fixed: newhist = self.__class__( self.GetName(), self.GetTitle(), i2-i1, xmin, xmax ) else: newhist = self.__class__( self.GetName(), self.GetTitle(), i2-i1, edges[i1:i2] ) newbuf = get_buffer_hist1( newhist ) if sbuf is None: newsbuf = None else: newhist.Sumw2() newsbuf = get_err_buffer_hist1( newhist ) newbuf[:] = buf[i1:i2] if not sbuf is None: newsbuf[:] = sbuf[i1:i2] newhist.SetEntries( newhist.Integral() ) return newhist
def test_monopole_fluxpoints(self): """Tests monopole flux points.""" field = ElectricField([PointCharge(2, [0, 0])]) circle = GaussianCircle([0, 0], 10) fluxpoints = circle.fluxpoints(field, 4) self.assertEqual(len(fluxpoints), 4) self.assertTrue(isclose(fluxpoints, [[10, 0], [0, 10], [-10, 0], [0, -10]]).all()) fluxpoints = circle.fluxpoints(field, 14) self.assertEqual(len(fluxpoints), 14) self.assertTrue(isclose(fluxpoints[0], [10, 0]).all()) self.assertTrue(isclose(fluxpoints[7], [-10, 0]).all()) x1 = fluxpoints[1:7] x2 = fluxpoints[-1:7:-1] x2[:, 1] = fabs(x2[:, 1]) self.assertTrue(isclose(x1, x2).all()) x1 = append(fluxpoints[-3:], fluxpoints[:4], axis=0) x2 = fluxpoints[-4:3:-1] x2[:, 0] = fabs(x2[:, 0]) self.assertEqual(len(x1), len(x2)) self.assertTrue(isclose(x1, x2).all())
def _get_edr(self, obs, expected, stddev, bandwidth=0.01, multiplier=3.0): """ Calculated the Euclidean Distanced-Based Rank for a set of observed and expected values from a particular GMPE """ nvals = len(obs) min_d = bandwidth / 2. kappa = self._get_kappa(obs, expected) mu_d = obs - expected d1c = np.fabs(obs - (expected - (multiplier * stddev))) d2c = np.fabs(obs - (expected + (multiplier * stddev))) dc_max = ceil(np.max(np.array([np.max(d1c), np.max(d2c)]))) num_d = len(np.arange(min_d, dc_max, bandwidth)) mde = np.zeros(nvals) for iloc in range(0, num_d): d_val = (min_d + (float(iloc) * bandwidth)) * np.ones(nvals) d_1 = d_val - min_d d_2 = d_val + min_d p_1 = norm.cdf((d_1 - mu_d) / stddev) -\ norm.cdf((-d_1 - mu_d) / stddev) p_2 = norm.cdf((d_2 - mu_d) / stddev) -\ norm.cdf((-d_2 - mu_d) / stddev) mde += (p_2 - p_1) * d_val inv_n = 1.0 / float(nvals) mde_norm = np.sqrt(inv_n * np.sum(mde ** 2.)) edr = np.sqrt(kappa * inv_n * np.sum(mde ** 2.)) return mde_norm, np.sqrt(kappa), edr
def find_nearest(array, value): """ Return the array value that is closest to the input value (Abigail Stevens: Thanks StackOverflow!) Parameters ---------- array : np.array of ints or floats 1-D array of numbers to search through. Should already be sorted from low values to high values. value : int or float The value you want to find the closest to in the array. Returns ------- array[idx] : int or float The array value that is closest to the input value. idx : int The index of the array of the closest value. """ idx = np.searchsorted(array, value, side="left") if idx == len(array) or np.fabs(value - array[idx - 1]) < \ np.fabs(value - array[idx]): return array[idx - 1], idx - 1 else: return array[idx], idx
def deliverStim(currTime): global injectionCurrent global spineVm global somaVm if numpy.fabs( currTime - baselineTime ) < frameRunTime/2.0 : #start eList = moose.wildcardFind( '/model/elec/#soma#' ) assert( len(eList) > 0 ) eList[0].inject = injectionCurrent #print "1. injected current = ", injectionCurrent injectionCurrent += deltaCurrent #print "del stim first ", moose.element('/clock').currentTime if numpy.fabs( currTime - baselineTime - currPulseTime) < frameRunTime/2.0 : #end eList = moose.wildcardFind( '/model/elec/#soma#' ) assert( len(eList) > 0 ) eList[0].inject = 0.0 #print "2. injected current = ", injectionCurrent #print "del stim second ", moose.element('/clock').currentTime if runtime - currTime < frameRunTime * 2.0 : #print "3. reinit-ing" somaVm.append( moose.element( '/graphs/VmTab' ).vector ) spineVm.append( moose.element( '/graphs/eSpineVmTab' ).vector ) iList.append(injectionCurrent) if injectionCurrent < maxCurrent : moose.reinit()
def get_weights_cs2ll(self, dst, alpha, beta, panel, gids): cs_obj = self.cs_obj (a1,b1), (a2,b2), (a3,b3), (a4,b4) = \ [cs_obj.alpha_betas[gid] for gid in gids] assert np.fabs(a1-a3)<1e-15 assert np.fabs(a2-a4)<1e-15 assert np.fabs(b1-b2)<1e-15 assert np.fabs(b3-b4)<1e-15 assert flge(a1,alpha,a2), "dst={}, a1={}, a2={}, alpha={}".format(dst,a1,a2,alpha) assert flge(b1,beta,b3), "dst={}, b1={}, b3={}, beta={}".format(dst,b1,b3,beta) panels = [cs_obj.gq_indices[gid,0] for gid in gids] for p in panels: if p != panel: print("(alpha,beta) ({},{})".foramt(alpha, beta)) print("panel: {}, {}".format(panel, panels)) print("dst: {}".format(dst)) print("gids: {}".format(gids)) sys.exit() # weights x, y = alpha, beta x1, x2 = a1, a2 y1, y2 = b1, b3 return self.get_bilinear_weights(dst, (x,y), (x1,y1), (x2,y2))
def get_weights_ll2cs(self, dst, lat, lon, idxs): cs_obj = self.cs_obj ll_obj = self.ll_obj idx1, idx2, idx3, idx4 = idxs lat1, lon1 = ll_obj.latlons[idx1] lat2, lon2 = ll_obj.latlons[idx2] lat3, lon3 = ll_obj.latlons[idx3] lat4, lon4 = ll_obj.latlons[idx4] assert np.fabs(lon1-lon3)<1e-15 assert np.fabs(lon2-lon4)<1e-15 assert np.fabs(lat1-lat2)<1e-15 assert np.fabs(lat3-lat4)<1e-15 if lon2 < lon1: lon2 = lon1 + ll_obj.dlon if lon4 < lon3: lon4 = lon3 + ll_obj.dlon if np.fabs(lon-lon1) > np.pi: lon += 2*np.pi assert flge(lon1,lon,lon2), "dst={}, lon1={}, lon2={}, lon={}".format(dst,lon1,lon2,lon) assert flge(lat1,lat,lat3), "dst={}, lat1={}, lat3={}, lat={}".format(dst,lat1,lat2,lat) # weights x, y = lon, lat x1, x2 = lon1, lon2 y1, y2 = lat1, lat3 return self.get_bilinear_weights(dst, (x,y), (x1,y1), (x2,y2))
def bartlettTest(self): M = -(self.n-1/2*(self.p+self.q+3)) for i in range(len(self.s)): #有意水準を求める alf = self.sigAlf sig = sp.special.chdtri((self.p-i)*(self.q-i), alf) test = 1 #ウィルクスのラムダ for j in range(len(self.s)-i): test = test*(1-self.s[len(self.s)-j-1]) chi = M*math.log(test) #帰無仮説棄却/採用 if chi > sig: print "test["+str(i)+"]:"+str(chi) +" > sig("+str(alf)+"):"+str(sig) run = np.fabs(self.A[:,i:i+1]) rvn = np.fabs(self.B[:,i:i+1]) arg_u = np.argmax(run) arg_v = np.argmax(rvn) self.eigArray.append(str(i)) val = [arg_u, arg_v] self.eigValArray.append(val) print "eigen:"+str(np.sqrt(self.s[i])) print "ru-max arg:"+str(arg_u) print "rv-max arg:"+str(arg_v) else: break
def plot(filename, x=0, y=1, x_abs=False, y_abs=False, xscale='linear', yscale='linear', xlabel='x', ylabel='y', scatter=False, size=12, title='', xmin=-np.inf, xmax=np.inf, ymin=-np.inf, ymax=np.inf): filtercols = (x, y) mins = (xmin, ymin) maxs = (xmax, ymax) data_filtered = __filter_outranged_x(filename, filtercols, mins, maxs) xs, ys = np.loadtxt(data_filtered, usecols=(x, y), unpack=True) if x_abs: xs = np.fabs(xs) if y_abs: ys = np.fabs(ys) f, ax = plt.subplots(1, 1) ax.set_xscale(xscale) ax.set_yscale(yscale) ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) if title: plt.title(title) if scatter: ax.scatter(xs, ys, s=size) else: ax.plot(xs, ys, linewidth=size) plt.axis('tight') plt.show() plt.close()
def replicate_line_with_threshold(g_expr, g_expr_c, M, K, threshold): '''Compare whether two lines are the same lines.''' coeffi_1 = np.zeros((M, K)) coeffi_2 = np.zeros((M, K)) coeff1_constant = g_expr[-1] coeff2_constant = g_expr_c[-1] #calculate the sum of all coefficients sum_coeff1 = coeff1_constant sum_coeff2 = coeff2_constant for i in xrange(M): for j in xrange(K): coeffi_1[i][j] = g_expr[i*K + j] coeffi_2[i][j] = g_expr_c[i*K + j] sum_coeff1 += coeffi_1[i][j] sum_coeff2 += coeffi_2[i][j] #check constant if np.fabs(sum_coeff1 * coeff2_constant - sum_coeff2 * coeff1_constant) > threshold: return 0 #check all other coefficients for i in xrange(M): for j in xrange(K): if np.fabs(sum_coeff1 * coeffi_2[i][j] - sum_coeff2 * coeffi_1[i][j]) > threshold: #sum_coeff1 * coeffi_2[i][j] != sum_coeff2 * coeffi_1[i][j] return 0 #1 represents the two lines are the same equation return 1
def weighted_mean(_line): max_weight = 50 # print _line.shape median_2d = bottleneck.nanmedian(_line, axis=1).reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1) std = bottleneck.nanstd(_line, axis=1) std_2d = std.reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1) weight_2d = numpy.fabs(std_2d / (_line - median_2d)) # weight_2d[weight_2d > max_weight] = max_weight weight_2d[numpy.isinf(weight_2d)] = max_weight for i in range(3): avg = bottleneck.nansum(_line*weight_2d, axis=1)/bottleneck.nansum(weight_2d, axis=1) avg_2d = avg.reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1) std = numpy.sqrt(bottleneck.nansum(((_line - avg_2d)**2 * weight_2d), axis=1)/bottleneck.nansum(weight_2d, axis=1)) std_2d = std.reshape(_line.shape[0],1).repeat(_line.shape[1], axis=1) weight_2d = numpy.fabs(std_2d / (_line - avg_2d)) #weight_2d[weight_2d > max_weight] = max_weight weight_2d[numpy.isinf(weight_2d)] = max_weight return bottleneck.nansum(_line*weight_2d, axis=1)/bottleneck.nansum(weight_2d, axis=1)
def __init__(self, num): global h, edge_times, edge_probs, state_density, states, Phdelta states, state_density, edge_probs, edge_times, Phdelta = [],[],[],[],[] h = (zmax-zmin)/float(num) self.tot_vert = num+1 states = np.linspace(zmin, zmax, self.tot_vert) for s in states: c = process_var + h*np.fabs(self.drift(s)) htime = h*h/c edge_times.append(htime) for i in range(self.tot_vert): s = states[i] c = process_var + h*np.fabs(self.drift(s)) if (i != 0) and (i != self.tot_vert-1): edge_probs.append([(process_var/2 + h*np.fabs(self.drift(s)))/c, process_var/2/c]) elif (i == self.tot_vert -1): edge_probs.append([1.0, 0.]) elif (i == 0): edge_probs.append([0., 1.0]) """ # get filtering one_step transition probabilities using matrices self.delta = min(edge_times)*0.999 P1 = np.zeros((self.tot_vert, self.tot_vert)) P0 = np.zeros((self.tot_vert, self.tot_vert)) for i in range(self.tot_vert): pt = edge_times[i]/(self.delta + edge_times[i]) if( (i!=0) and (i!=self.tot_vert-1)): P1[i,i-1] = edge_probs[i][0]*pt P1[i,i+1] = edge_probs[i][1]*pt P0[i,i-1] = edge_probs[i][0]*(1-pt) P0[i,i+1] = edge_probs[i][1]*(1-pt) elif(i ==0): P1[i,i+1] = edge_probs[i][1]*pt P0[i,i+1] = edge_probs[i][1]*(1-pt) else: P1[i,i-1] = edge_probs[i][0]*pt P0[i,i-1] = edge_probs[i][0]*(1-pt) Phdelta = np.linalg.inv(eye(self.tot_vert) - P0)*P1 """ self.delta = min(edge_times)*0.999 #self.delta = 0.0001 #print 'min_htime: ', min(edge_times),' delta: ', self.delta if(min(edge_times) < self.delta): print "Add less nodes" sys.exit(0) # explicit method Phdelta = np.zeros((self.tot_vert, self.tot_vert)) for i in range(self.tot_vert): ps = 1 - self.delta/edge_times[i] Phdelta[i,i] = ps if( (i!=0) and (i!=self.tot_vert-1)): Phdelta[i,i+1] = edge_probs[i][1]*(1- ps) Phdelta[i,i-1] = edge_probs[i][0]*(1- ps) elif(i ==0): Phdelta[i,i+1] = edge_probs[i][1]*(1- ps) else: Phdelta[i,i-1] = edge_probs[i][0]*(1- ps)
def test_get_distance(self): """ Test GridMol.get_distance. """ self.mol.add_atom((1, 2, 1), 1.6) self.mol.add_atom((1, 1, 1), 1.6) distances = self.mol.get_distance() # confirm that negative values are inside atoms # this tests distance correspondence with occupancy mask = self.mol.get_occupancy() assert np.all(distances[mask] <= 0) assert np.all(distances[~mask] > 0) # check for sane positive distances assert np.amax(distances) < max(self.mol.get_real_shape()) # check that negative distances are not too large # min should be no larger than the largest atom radius plus the probe # radius (by definition) assert np.fabs(np.amin(distances)) <= ( self.mol.atoms[0].radius + self.mol.probe_radius) # check that most distances are significantly less than max threshold = max(self.mol.get_real_shape()) / 2. assert np.count_nonzero(np.fabs(distances) < threshold) > ( 0.9 * distances.size)
def visiting(self, x, step, temperature): dim = x.size if step < dim: # Changing all coordinates with a new visting value visits = np.array([self.visit_fn( temperature) for _ in range(dim)]) upper_sample = self.rs.random_sample() lower_sample = self.rs.random_sample() visits[visits > self.tail_limit] = self.tail_limit * upper_sample visits[visits < -self.tail_limit] = -self.tail_limit * lower_sample x_visit = visits + x a = x_visit - self.lower b = np.fmod(a, self.b_range) + self.b_range x_visit = np.fmod(b, self.b_range) + self.lower x_visit[np.fabs( x_visit - self.lower) < self.min_visit_bound] += 1.e-10 else: # Changing only one coordinate at a time based on Markov chain step x_visit = np.copy(x) visit = self.visit_fn(temperature) if visit > self.tail_limit: visit = self.tail_limit * self.rs.random_sample() elif visit < -self.tail_limit: visit = -self.tail_limit * self.rs.random_sample() index = step - dim x_visit[index] = visit + x[index] a = x_visit[index] - self.lower[index] b = np.fmod(a, self.b_range[index]) + self.b_range[index] x_visit[index] = np.fmod(b, self.b_range[ index]) + self.lower[index] if np.fabs(x_visit[index] - self.lower[ index]) < self.min_visit_bound: x_visit[index] += self.min_visit_bound return x_visit
def remove_duplicate_candidates(self, verbosity=1): """Remove lower-significance 'duplicate' (i.e. same period) candidates from a list of candidates. For the highest significance candidate, include a list of the DMs (and SNRs) of all the other detections. Inputs: verbosity: Verbosity level. (Default: 1) Ouputs: None """ if verbosity >= 1: print " Sorting the %d candidates by frequency..." % \ self.get_numcands() self.cands.sort(cmp_freq) if verbosity >= 1: print " Searching for dupes..." ii = 0 # Find any match while ii < self.get_numcands(): jj = ii + 1 if jj < self.get_numcands() and \ Num.fabs(self.cands[ii].r-self.cands[jj].r) < r_err: # Find others that match jj += 1 while jj < self.get_numcands() and \ Num.fabs(self.cands[ii].r-self.cands[jj].r) < r_err: jj += 1 matches = self.cands[ii:jj] matches.sort(cmp_sigma) bestindex = self.cands.index(matches[0]) #sigmas = [c.sigma for c in matches] #bestindex = Num.argmax(sigmas)+ii # flag the duplicates bestcand = self.cands[bestindex] # Add other matching cands as hit of highest-sigma cand for matchind in reversed(range(ii, jj)): if matchind == bestindex: # The current candidate is the highest-sigma cand # Don't remove it continue match = self.cands[matchind] bestcand.add_as_hit(match) match.note = "This candidate is a duplicate of %s:%d" % \ (bestcand.filename, bestcand.candnum) self.duplicate_cands.append(self.cands.pop(matchind)) if verbosity >= 2: print "Removing %s:%d (index: %d)" % \ (match.filename, match.candnum, matchind) print " %s" % match.note # If the best candidate isn't at the same freq # as ii, then it's possible even more hits should # be added. So we don't increment the index # (note that the best cand has moved into position ii). else: ii += 1 # No candidates to be added as hits, move on if verbosity >= 1: print "Found %d candidates.\n" % self.get_numcands() self.cands.sort(cmp_sigma)
def fresnelR(n1, n2, theta): temp1 = np.sqrt(1.0-((n1/n2)*np.sin(theta))**2) temp2 = np.cos(theta) R_s = np.fabs( (n1*temp2-n2*temp1)/(n1*temp2+n2*temp1) )**2 R_p = np.fabs( (n1*temp1-n2*temp2)/(n1*temp1+n2*temp2) )**2 R = (R_s + R_p)/2 return R
def geigen(Amat, Bmat, Cmat): """ generalized eigenvalue problem of the form max tr L'AM / sqrt(tr L'BL tr M'CM) w.r.t. L and M :param Amat numpy ndarray of shape (M,N) :param Bmat numpy ndarray of shape (M,N) :param Bmat numpy ndarray of shape (M,N) :rtype: numpy ndarray :return values: eigenvalues :return Lmat: left eigenvectors :return Mmat: right eigenvectors """ if Bmat.shape[0] != Bmat.shape[1]: print("BMAT is not square.\n") sys.exit(1) if Cmat.shape[0] != Cmat.shape[1]: print("CMAT is not square.\n") sys.exit(1) p = Bmat.shape[0] q = Cmat.shape[0] s = min(p, q) tmp = fabs(Bmat - Bmat.transpose()) tmp1 = fabs(Bmat) if tmp.max() / tmp1.max() > 1e-10: print("BMAT not symmetric..\n") sys.exit(1) tmp = fabs(Cmat - Cmat.transpose()) tmp1 = fabs(Cmat) if tmp.max() / tmp1.max() > 1e-10: print("CMAT not symmetric..\n") sys.exit(1) Bmat = (Bmat + Bmat.transpose()) / 2. Cmat = (Cmat + Cmat.transpose()) / 2. Bfac = cholesky(Bmat) Cfac = cholesky(Cmat) Bfacinv = inv(Bfac) Bfacinvt = Bfacinv.transpose() Cfacinv = inv(Cfac) Dmat = Bfacinvt.dot(Amat).dot(Cfacinv) if p >= q: u, d, v = svd(Dmat) values = d Lmat = Bfacinv.dot(u) Mmat = Cfacinv.dot(v.transpose()) else: u, d, v = svd(Dmat.transpose()) values = d Lmat = Bfacinv.dot(u) Mmat = Cfacinv.dot(v.transpose()) return values, Lmat, Mmat
def to_offset(freq): """ Return DateOffset object from string or tuple representation or datetime.timedelta object Parameters ---------- freq : str, tuple, datetime.timedelta, DateOffset or None Returns ------- delta : DateOffset None if freq is None Raises ------ ValueError If freq is an invalid frequency See Also -------- pandas.DateOffset Examples -------- >>> to_offset('5min') <5 * Minutes> >>> to_offset('1D1H') <25 * Hours> >>> to_offset(('W', 2)) <2 * Weeks: weekday=6> >>> to_offset((2, 'B')) <2 * BusinessDays> >>> to_offset(datetime.timedelta(days=1)) <Day> >>> to_offset(Hour()) <Hour> """ if freq is None: return None if isinstance(freq, DateOffset): return freq if isinstance(freq, tuple): name = freq[0] stride = freq[1] if isinstance(stride, compat.string_types): name, stride = stride, name name, _ = _base_and_stride(name) delta = get_offset(name) * stride elif isinstance(freq, timedelta): delta = None freq = Timedelta(freq) try: for name in freq.components._fields: offset = _name_to_offset_map[name] stride = getattr(freq.components, name) if stride != 0: offset = stride * offset if delta is None: delta = offset else: delta = delta + offset except Exception: raise ValueError(_INVALID_FREQ_ERROR.format(freq)) else: delta = None stride_sign = None try: splitted = re.split(opattern, freq) if splitted[-1] != '' and not splitted[-1].isspace(): # the last element must be blank raise ValueError('last element must be blank') for sep, stride, name in zip(splitted[0::4], splitted[1::4], splitted[2::4]): if sep != '' and not sep.isspace(): raise ValueError('separator must be spaces') prefix = _lite_rule_alias.get(name) or name if stride_sign is None: stride_sign = -1 if stride.startswith('-') else 1 if not stride: stride = 1 if prefix in Resolution._reso_str_bump_map.keys(): stride, name = Resolution.get_stride_from_decimal( float(stride), prefix) stride = int(stride) offset = get_offset(name) offset = offset * int(np.fabs(stride) * stride_sign) if delta is None: delta = offset else: delta = delta + offset except Exception: raise ValueError(_INVALID_FREQ_ERROR.format(freq)) if delta is None: raise ValueError(_INVALID_FREQ_ERROR.format(freq)) return delta
def test_compute_gradient(self): for y, y_pred in zip(self.y_list, self.predict_list): fair_grad = self.fair_loss.compute_grad(y, y_pred) diff = y_pred - y grad = self.c * diff / (np.abs(diff) + self.c) self.assertTrue(np.fabs(fair_grad - grad) < consts.FLOAT_ZERO)
def test_compute_gradient(self): for y, y_pred in zip(self.y_list, self.predict_list): lse_grad = self.lse_loss.compute_grad(y, y_pred) grad = 2 * (y_pred - y) self.assertTrue(np.fabs(lse_grad - grad) < consts.FLOAT_ZERO)
def test_predict(self): for y in self.y_list: y_pred = self.lse_loss.predict(y) self.assertTrue(np.fabs(y_pred - y) < consts.FLOAT_ZERO)
def test_compute_gradient(self): for y, y_pred in zip(self.y_list, self.predict_list): tweedie_grad = self.tweedie_loss.compute_grad(y, y_pred) grad = -y * np.exp(1 - self.rho) * y_pred + np.exp( 2 - self.rho) * y_pred self.assertTrue(np.fabs(tweedie_grad - grad) < consts.FLOAT_ZERO)
def test_compute_hess(self): for y, y_pred in zip(self.y_list, self.predict_list): log_cosh_hess = self.log_cosh_loss.compute_hess(y, y_pred) diff = y_pred - y hess = 1 - np.tanh(diff)**2 self.assertTrue(np.fabs(log_cosh_hess - hess) < consts.FLOAT_ZERO)
def test_compute_gradient(self): for y, y_pred in zip(self.y_list, self.predict_list): log_cosh_grad = self.log_cosh_loss.compute_grad(y, y_pred) diff = y_pred - y grad = np.tanh(diff) self.assertTrue(np.fabs(log_cosh_grad - grad) < consts.FLOAT_ZERO)
def test_compute_hess(self): for y, y_pred in zip(self.y_list, self.predict_list): fair_hess = self.fair_loss.compute_hess(y, y_pred) diff = y_pred - y hess = self.c**2 / (np.abs(diff) + self.c)**2 self.assertTrue(np.fabs(fair_hess - hess) < consts.FLOAT_ZERO)
def test_compute_gradient(self): for y, y_pred in zip(self.y_list, self.predict_list): huber_grad = self.huber_loss.compute_grad(y, y_pred) diff = y_pred - y grad = diff / np.sqrt(diff * diff / self.delta**2 + 1) self.assertTrue(np.fabs(huber_grad - grad) < consts.FLOAT_ZERO)
def test_compute_hess(self): for y, y_pred in zip(self.y_list, self.predict_list): huber_hess = self.huber_loss.compute_hess(y, y_pred) diff = y_pred - y hess = 1.0 / (1 + diff * diff / self.delta**2)**1.5 self.assertTrue(np.fabs(huber_hess - hess) < consts.FLOAT_ZERO)
gene_fixation_positions = {} for snp_change in (mutations + reversions): gene_name = snp_change[0] position = snp_change[2] if gene_name not in gene_fixation_positions: gene_fixation_positions[gene_name] = [] gene_fixation_positions[gene_name].append(position) for gene_name in gene_fixation_positions: if len(gene_fixation_positions[gene_name]) >= 2: # Calculate max position difference between SNPs. positions = numpy.array(gene_fixation_positions[gene_name]) max_distance = numpy.fabs(positions[:, None] - positions[None, :]).max() if max_distance > 100: print gene_name highlighted_gene_set.add(gene_name) for snp_change in (mutations + reversions): if snp_change[0] in highlighted_gene_set: print snp_change highlighted_gene_names[pair_idx] = highlighted_gene_set final_line_number = 0 while final_line_number >= 0:
def test_compute_loss(self): sklearn_loss = metrics.mean_absolute_error(self.y_list, self.predict_list) lae_loss = self.lae_loss.compute_loss(self.y, self.predict) self.assertTrue(np.fabs(lae_loss - sklearn_loss) < consts.FLOAT_ZERO)
def get_body_heliographic_stonyhurst(body, time='now', observer=None, *, include_velocity=False): """ Return a `~sunpy.coordinates.frames.HeliographicStonyhurst` frame for the location of a solar-system body at a specified time. The location can be corrected for light travel time to an observer. Parameters ---------- body : `str` The solar-system body for which to calculate positions time : {parse_time_types} Time to use in a parse_time-compatible format observer : `~astropy.coordinates.SkyCoord` If None, the returned coordinate is the instantaneous or "true" location. If not None, the returned coordinate is the astrometric location (i.e., accounts for light travel time to the specified observer) Keyword Arguments ----------------- include_velocity : `bool` If True, include the body's velocity in the output coordinate. Defaults to False. Returns ------- out : `~sunpy.coordinates.frames.HeliographicStonyhurst` Location of the solar-system body in the `~sunpy.coordinates.HeliographicStonyhurst` frame Notes ----- There is no correction for aberration due to observer motion. For a body close to the Sun in angular direction relative to the observer, the correction can be negligible because the apparent location of the body will shift in tandem with the Sun. Examples -------- >>> from sunpy.coordinates.ephemeris import get_body_heliographic_stonyhurst Obtain the location of Venus >>> get_body_heliographic_stonyhurst('venus', '2012-06-06 04:07:29') <HeliographicStonyhurst Coordinate (obstime=2012-06-06T04:07:29.000, rsun=695700.0 km): (lon, lat, radius) in (deg, deg, AU) (0.07349535, 0.05223575, 0.72605496)> Obtain the location of Venus as seen from Earth when adjusted for light travel time >>> earth = get_body_heliographic_stonyhurst('earth', '2012-06-06 04:07:29') >>> get_body_heliographic_stonyhurst('venus', '2012-06-06 04:07:29', observer=earth) INFO: Apparent body location accounts for 144.07 seconds of light travel time [sunpy.coordinates.ephemeris] <HeliographicStonyhurst Coordinate (obstime=2012-06-06T04:07:29.000, rsun=695700.0 km): (lon, lat, radius) in (deg, deg, AU) (0.07084926, 0.0520573, 0.72605477)> Obtain the location and velocity of Mars >>> mars = get_body_heliographic_stonyhurst('mars', '2001-02-03', include_velocity=True) >>> mars <HeliographicStonyhurst Coordinate (obstime=2001-02-03T00:00:00.000, rsun=695700.0 km): (lon, lat, radius) in (deg, deg, AU) (63.03105777, -5.20656151, 1.6251161) (d_lon, d_lat, d_radius) in (arcsec / s, arcsec / s, km / s) (-0.02323686, 0.00073376, -1.4798387)> Transform that same location and velocity of Mars to a different frame using `~astropy.coordinates.SkyCoord`. >>> from astropy.coordinates import SkyCoord >>> from sunpy.coordinates import Helioprojective >>> SkyCoord(mars).transform_to(Helioprojective(observer=earth)) <SkyCoord (Helioprojective: obstime=2001-02-03T00:00:00.000, rsun=695700.0 km, observer=<HeliographicStonyhurst Coordinate (obstime=2012-06-06T04:07:29.000, rsun=695700.0 km): (lon, lat, radius) in (deg, deg, AU) (7.835757e-15, -0.00766698, 1.01475668)>): (Tx, Ty, distance) in (arcsec, arcsec, AU) (-298029.94625805, -21753.50941181, 1.40010091) (d_Tx, d_Ty, d_distance) in (arcsec / s, arcsec / s, km / s) (-0.01652981, -0.00059216, -15.14320414)> """ obstime = parse_time(time) if observer is None: # If there is no observer, there is not adjustment for light travel time emitted_time = obstime else: observer_icrs = SkyCoord(observer).icrs.cartesian # This implementation is modeled after Astropy's `_get_apparent_body_position` light_travel_time = 0. * u.s emitted_time = obstime delta_light_travel_time = 1. * u.s # placeholder value while np.any(np.fabs(delta_light_travel_time) > 1.0e-8 * u.s): body_icrs = get_body_barycentric(body, emitted_time) distance = (body_icrs - observer_icrs).norm() delta_light_travel_time = light_travel_time - distance / speed_of_light light_travel_time = distance / speed_of_light emitted_time = obstime - light_travel_time if light_travel_time.isscalar: ltt_string = f"{light_travel_time.to_value('s'):.2f}" else: ltt_string = f"{light_travel_time.to_value('s')}" log.info( f"Apparent body location accounts for {ltt_string} seconds of light travel time" ) if include_velocity: pos, vel = get_body_barycentric_posvel(body, emitted_time) body_icrs = pos.with_differentials( vel.represent_as(CartesianDifferential)) else: body_icrs = get_body_barycentric(body, emitted_time) body_hgs = ICRS(body_icrs).transform_to( HeliographicStonyhurst(obstime=obstime)) return body_hgs
def apply_forces(self, system, time=0.0): # calculate axial and rolling directions plane_response_force_mag, no_contact_point_idx = self.apply_normal_force(system) normal_plane_collection = np.repeat( self.plane_normal.reshape(3, 1), plane_response_force_mag.shape[0], axis=1 ) # First compute component of rod tangent in plane. Because friction forces acts in plane not out of plane. Thus # axial direction has to be in plane, it cannot be out of plane. We are projecting rod element tangent vector in # to the plane. So friction forces can only be in plane forces and not out of plane. tangent_along_normal_direction = np.einsum( "ij, ij->j", system.tangents, normal_plane_collection ) tangent_perpendicular_to_normal_direction = system.tangents - np.einsum( "j, ij->ij", tangent_along_normal_direction, normal_plane_collection ) tangent_perpendicular_to_normal_direction_mag = np.einsum( "ij, ij->j", tangent_perpendicular_to_normal_direction, tangent_perpendicular_to_normal_direction, ) # Normalize tangent_perpendicular_to_normal_direction. This is axial direction for plane. Here we are adding # small tolerance (1e-10) for normalization, in order to prevent division by 0. axial_direction = np.einsum( "ij, j-> ij", tangent_perpendicular_to_normal_direction, 1 / (tangent_perpendicular_to_normal_direction_mag + 1e-14), ) element_velocity = 0.5 * ( system.velocity_collection[..., :-1] + system.velocity_collection[..., 1:] ) # first apply axial kinetic friction velocity_mag_along_axial_direction = np.einsum( "ij,ij->j", element_velocity, axial_direction ) velocity_along_axial_direction = np.einsum( "j, ij->ij", velocity_mag_along_axial_direction, axial_direction ) # Friction forces depends on the direction of velocity, in other words sign # of the velocity vector. velocity_sign_along_axial_direction = np.sign( velocity_mag_along_axial_direction ) # Check top for sign convention kinetic_mu = 0.5 * ( self.kinetic_mu_forward * (1 + velocity_sign_along_axial_direction) + self.kinetic_mu_backward * (1 - velocity_sign_along_axial_direction) ) # Call slip function to check if elements slipping or not slip_function_along_axial_direction = find_slipping_elements( velocity_along_axial_direction, self.slip_velocity_tol ) kinetic_friction_force_along_axial_direction = -( (1.0 - slip_function_along_axial_direction) * kinetic_mu * plane_response_force_mag * velocity_sign_along_axial_direction * axial_direction ) # If rod element does not have any contact with plane, plane cannot apply friction # force on the element. Thus lets set kinetic friction force to 0.0 for the no contact points. kinetic_friction_force_along_axial_direction[..., no_contact_point_idx] = 0.0 system.external_forces[..., :-1] += ( 0.5 * kinetic_friction_force_along_axial_direction ) system.external_forces[..., 1:] += ( 0.5 * kinetic_friction_force_along_axial_direction ) # Now rolling kinetic friction rolling_direction = _batch_cross(axial_direction, normal_plane_collection) torque_arm = -system.radius * normal_plane_collection velocity_along_rolling_direction = np.einsum( "ij ,ij ->j ", element_velocity, rolling_direction ) directors_transpose = np.einsum("ijk -> jik", system.director_collection) # w_rot = Q.T @ omega @ Q @ r rotation_velocity = _batch_matvec( directors_transpose, _batch_cross( system.omega_collection, _batch_matvec(system.director_collection, torque_arm), ), ) rotation_velocity_along_rolling_direction = np.einsum( "ij,ij->j", rotation_velocity, rolling_direction ) slip_velocity_mag_along_rolling_direction = ( velocity_along_rolling_direction + rotation_velocity_along_rolling_direction ) slip_velocity_along_rolling_direction = np.einsum( "j, ij->ij", slip_velocity_mag_along_rolling_direction, rolling_direction ) slip_velocity_sign_along_rolling_direction = np.sign( slip_velocity_mag_along_rolling_direction ) slip_function_along_rolling_direction = find_slipping_elements( slip_velocity_along_rolling_direction, self.slip_velocity_tol ) kinetic_friction_force_along_rolling_direction = -( (1.0 - slip_function_along_rolling_direction) * self.kinetic_mu_sideways * plane_response_force_mag * slip_velocity_sign_along_rolling_direction * rolling_direction ) # If rod element does not have any contact with plane, plane cannot apply friction # force on the element. Thus lets set kinetic friction force to 0.0 for the no contact points. kinetic_friction_force_along_rolling_direction[..., no_contact_point_idx] = 0.0 system.external_forces[..., :-1] += ( 0.5 * kinetic_friction_force_along_rolling_direction ) system.external_forces[..., 1:] += ( 0.5 * kinetic_friction_force_along_rolling_direction ) # torque = Q @ r @ Fr system.external_torques += _batch_matvec( system.director_collection, _batch_cross(torque_arm, kinetic_friction_force_along_rolling_direction), ) # now axial static friction nodal_total_forces = system.internal_forces + system.external_forces element_total_forces = nodes_to_elements(nodal_total_forces) force_component_along_axial_direction = np.einsum( "ij,ij->j", element_total_forces, axial_direction ) force_component_sign_along_axial_direction = np.sign( force_component_along_axial_direction ) # check top for sign convention static_mu = 0.5 * ( self.static_mu_forward * (1 + force_component_sign_along_axial_direction) + self.static_mu_backward * (1 - force_component_sign_along_axial_direction) ) max_friction_force = ( slip_function_along_axial_direction * static_mu * plane_response_force_mag ) # friction = min(mu N, pushing force) static_friction_force_along_axial_direction = -( np.minimum( np.fabs(force_component_along_axial_direction), max_friction_force ) * force_component_sign_along_axial_direction * axial_direction ) # If rod element does not have any contact with plane, plane cannot apply friction # force on the element. Thus lets set static friction force to 0.0 for the no contact points. static_friction_force_along_axial_direction[..., no_contact_point_idx] = 0.0 system.external_forces[..., :-1] += ( 0.5 * static_friction_force_along_axial_direction ) system.external_forces[..., 1:] += ( 0.5 * static_friction_force_along_axial_direction ) # now rolling static friction # there is some normal, tangent and rolling directions inconsitency from Elastica total_torques = _batch_matvec( directors_transpose, (system.internal_torques + system.external_torques) ) # Elastica has opposite defs of tangents in interaction.h and rod.cpp total_torques_along_axial_direction = np.einsum( "ij,ij->j", total_torques, axial_direction ) force_component_along_rolling_direction = np.einsum( "ij,ij->j", element_total_forces, rolling_direction ) noslip_force = -( ( system.radius * force_component_along_rolling_direction - 2.0 * total_torques_along_axial_direction ) / 3.0 / system.radius ) max_friction_force = ( slip_function_along_rolling_direction * self.static_mu_sideways * plane_response_force_mag ) noslip_force_sign = np.sign(noslip_force) static_friction_force_along_rolling_direction = ( np.minimum(np.fabs(noslip_force), max_friction_force) * noslip_force_sign * rolling_direction ) # If rod element does not have any contact with plane, plane cannot apply friction # force on the element. Thus lets set plane static friction force to 0.0 for the no contact points. static_friction_force_along_rolling_direction[..., no_contact_point_idx] = 0.0 system.external_forces[..., :-1] += ( 0.5 * static_friction_force_along_rolling_direction ) system.external_forces[..., 1:] += ( 0.5 * static_friction_force_along_rolling_direction ) system.external_torques += _batch_matvec( system.director_collection, _batch_cross(torque_arm, static_friction_force_along_rolling_direction), )
def mad(x): return np.fabs(x - x.mean()).mean()
def update(self, space: Space, iteration: int, n_iterations: int) -> None: """Wraps Whale Optimization Algorithm over all agents and variables. Args: space: Space containing agents and update-related information. iteration: Current iteration. n_iterations (int): Maximum number of iterations """ # Linearly decreases the coefficient coefficient = 2 - 2 * iteration / (n_iterations - 1) # Iterates through all agents for agent in space.agents: # Generates an uniform random number r1 = r.generate_uniform_random_number() # Calculates the `A` coefficient A = 2 * coefficient * r1 - coefficient # Calculates the `C` coefficient C = 2 * r1 # Generates a random number between 0 and 1 p = r.generate_uniform_random_number() # If `p` is smaller than 0.5 if p < 0.5: # If `A` is smaller than 1 if np.fabs(A) < 1: # Calculates the distance coefficient D = np.fabs(C * space.best_agent.position - agent.position) # Updates the agent's position agent.position = space.best_agent.position - A * D # If `A` is bigger or equal to 1 else: # Generates a random-based agent a = self._generate_random_agent(agent) # Calculates the distance coefficient D = np.fabs(C * a.position - agent.position) # Updates the agent's position agent.position = a.position - A * D # If `p` is bigger or equal to 1 else: # Generates a random number between -1 and 1 l = r.generate_gaussian_random_number() # Calculates the distance coefficient D = np.fabs(space.best_agent.position - agent.position) # Updates the agent's position agent.position = ( D * np.exp(self.b * l) * np.cos(2 * np.pi * l) + space.best_agent.position )
def test_compute_hess(self): for y, y_pred in zip(self.y_list, self.predict_list): hess = 2 lse_hess = self.lse_loss.compute_hess(y, y_pred) self.assertTrue(np.fabs(lse_hess - hess) < consts.FLOAT_ZERO)
x = y = 0 wsp_x = [0] wsp_y = [0] for i in range(0, n): #Wylosuj kąt i zamień go na radiany rad = math.radians(float(random.randint(0, 360))) x = x + np.cos(rad) # wyliczamy współrzędne x i y y = y + np.sin(rad) print(x, y) wsp_x.append(x) wsp_y.append(y) c = np.sqrt((wsp_x[i] - wsp_x[i - 1])**2 + (wsp_y[i] - wsp_y[i - 1])**2) print(wsp_x, wsp_y) # Obliczamy wektor końcowego przesunięcia s = np.fabs(np.sqrt(x**2 + y**2)) w_x = [0, wsp_x[len(wsp_x) - 1]] w_y = [0, wsp_y[len(wsp_y) - 1]] print("Wektor przesunięcia: ", s) plt.plot(wsp_x, wsp_y, "o:", color="green", linewidth=3, alpha=0.5) plt.plot(wsp_x, wsp_y, c, "r:", color="blue", linewidth=1, alpha=0.2) plt.plot(w_x, w_y, s, "o:", color="blue", linewidth=2, alpha=1) plt.legend(["Dane x, y\nPrzemieszczenie: " + str(s)], loc="upper left") plt.xlabel("Wsp_x") plt.ylabel("Wsp_y") plt.title("Ruchy Browna") plt.grid(True) plt.show()
def draw_boxes(im, bboxes, is_display=True, color=None, caption="Image", wait=True): """ boxes: bounding boxes """ text_recs = np.zeros((len(bboxes), 8), np.int) im = im.copy() index = 0 for box in bboxes: if color == None: if len(box) == 8 or len(box) == 9: c = tuple(cm.jet([box[-1]])[0, 2::-1] * 255) else: c = tuple(np.random.randint(0, 256, 3)) else: c = color b1 = box[6] - box[7] / 2 b2 = box[6] + box[7] / 2 x1 = box[0] y1 = box[5] * box[0] + b1 x2 = box[2] y2 = box[5] * box[2] + b1 x3 = box[0] y3 = box[5] * box[0] + b2 x4 = box[2] y4 = box[5] * box[2] + b2 disX = x2 - x1 disY = y2 - y1 width = np.sqrt(disX * disX + disY * disY) fTmp0 = y3 - y1 fTmp1 = fTmp0 * disY / width x = np.fabs(fTmp1 * disX / width) y = np.fabs(fTmp1 * disY / width) if box[5] < 0: x1 -= x y1 += y x4 += x y4 -= y else: x2 += x y2 += y x3 -= x y3 -= y cv2.line(im, (int(x1), int(y1)), (int(x2), int(y2)), c, 2) cv2.line(im, (int(x1), int(y1)), (int(x3), int(y3)), c, 2) cv2.line(im, (int(x4), int(y4)), (int(x2), int(y2)), c, 2) cv2.line(im, (int(x3), int(y3)), (int(x4), int(y4)), c, 2) text_recs[index, 0] = x1 text_recs[index, 1] = y1 text_recs[index, 2] = x2 text_recs[index, 3] = y2 text_recs[index, 4] = x3 text_recs[index, 5] = y3 text_recs[index, 6] = x4 text_recs[index, 7] = y4 index = index + 1 #cv2.rectangle(im, tuple(box[:2]), tuple(box[2:4]), c,2) if is_display: cv2.imshow('result', im) #if wait: #cv2.waitKey(0) return text_recs
#print a uvar = file.variables["UGRD_P0_L100_GLC0"][:, :, :] uvar = numpy.squeeze(uvar[-1, :, :]) dim = numpy.shape(uvar) print dim vvar = file.variables["VGRD_P0_L100_GLC0"][:, :, :] vvar = numpy.squeeze(vvar[-1, :, :]) lat = file.variables["gridlat_0"][:, :] lon = file.variables["gridlon_0"][:, :] #Find the index of the origin print "want lat lon", Latorg, Lonorg for i in range(dim[0]): for j in range(dim[1]): if numpy.fabs(lat[i, j] - Latorg) < tol and numpy.fabs(lon[i, j] - Lonorg) < tol: print 'closest we can get is', lat[i, j], lon[i, j] print i, j iorg = i jorg = j u = 3.6 * uvar v = 3.6 * vvar umax = numpy.max(u) vmax = numpy.max(v) print "Max u", umax print "Max v", vmax space0 = gridspacing * (dim[0] - iorg - 1)
def mape_vectorized_v2(a, b): mask = a != 0 return (np.fabs(a[mask] - b[mask])/a[mask]).mean()
import numpy as np x = 1.0 #define a float y = 2.0 #define another float #exponents and logarithms print(np.exp(x)) #e^x print(np.log(x)) #ln x print(np.log10(x)) #log_10 x print(np.log2(x)) #log_2 x #min/max/misc print(np.fabs(x)) #absolute value as a float print(np.fmin(x, y)) #min of x and y print(np.fmax(x, y)) #max of x and y #populate arrays n = 100 z = np.arange(n, dtype=float) #get an array [0,0,n-1.] z *= 2.0 * np.pi / float(n - 1) #z = [0,2*pi] sin_z = np.sin(z) #get an array sin(z) #interpolation print(np.interp(0.75, z, sin_z)) #interpolate sin(0.75) print(np.sin(0.75))
def dic_constr(data3d, groundtruth, win_size, cluster_num, K, selected_dic_percent, target_dic_num): """ :param data3d: the original 3D hyperpsectral image :param groundtruth: a 2D matrix reflect the label of corresponding pixels :param win_size: the size of window, such as 3X3, 5X5, 7X7 :param cluster_num: the number of classters such as 5, 10, 15, 20 :param K: the level of sparsity :param selected_dic_percent: the selected percent of the atoms to build the background dictionary :param target_dic_num: the selected number to build the anomaly dictionary :return: data2d: the normalized data bg_dic: the background dictionary tg_dic: the anomaly dictionary bg_dic_ac_label: the index of background dictionary atoms tg_dic_label: the index of anomaly dictionary atoms """ data2d = hyper.hyperconvert2d(data3d) rows, cols, bands = data3d.shape data2d = hyper.hypernorm(data2d, "L2_norm") sio.savemat("data2d.mat", {'data2d': data2d}) data3d = hyper.hyperconvert3d(data2d, rows, cols, bands) pca = decomposition.PCA(n_components=20, copy=True, whiten=False) dim_data = pca.fit_transform(data2d.transpose()) data3d_dim = hyper.hyperconvert3d(dim_data.transpose(), rows, cols, 10) win_dim = hyper.hyperwincreat(data3d_dim, win_size) cluster_assment = hyper.Kmeans_win(win_dim, cluster_num) sio.savemat("cluster_assment.mat", {'cluster_assment': cluster_assment}) win_matrix = hyper.hyperwincreat(data3d, win_size) sio.savemat("win_matrix.mat", {'win_matrix': win_matrix}) wm_rows, wm_cols, wm_n = win_matrix.shape resdiual_stack = np.zeros((bands, win_size * win_size, wm_n)) save_num = 0 bg_dic_tuple = [] bg_dic_ac_tuple = [] bg_dic_fc_tuple = [] class_order_data_index_tuple = [] anomaly_weight_tuple = [] for i in range(cluster_num): print("current calculate cluster {0} ...".format(i)) tmp = np.where(cluster_assment == i) if tmp[0].size == 0: continue else: class_data = win_matrix[:, :, tmp[0]] cd_rows, cd_cols, cd_n = class_data.shape dictionary = class_data[:, int((win_size * win_size + 1) / 2), :] dic_rows, dic_cols = dictionary.shape class_alpha = np.zeros((K, cd_cols, cd_n)) class_index = np.zeros((K, cd_n)) for j in range(cd_n): X = class_data[:, :, j] dictionary[:, (j * cd_cols):(j * cd_cols + cd_cols - 1)] = 0 alpha, index, chosen_atom, resdiual = hyper.somp( dictionary, X, K) class_alpha[:, :, j] = alpha class_index[:, j] = index.transpose() resdiual_stack[:, :, save_num + j] = resdiual save_num = save_num + cd_n class_index = class_index.astype('int') class_global_alpha = np.zeros((dic_cols, cd_cols, cd_n)) class_global_frequency = np.zeros((dic_cols, cd_cols, cd_n)) for n_index in range(cd_n): class_global_alpha[class_index[:, n_index], :, n_index] = class_alpha[:, :, n_index] class_global_frequency[class_index[:, n_index], :, n_index] = 1 posti_class_global_alpha = np.fabs(class_global_alpha) data_frequency = class_global_frequency[:, 0, :] frequency = np.sum(data_frequency, axis=1) sum_frequency = np.sum(frequency) norm_frequency = frequency / sum_frequency data_mean_alpha = np.mean(posti_class_global_alpha, axis=1) sum_alpha_2 = np.sum(data_mean_alpha, axis=1) norm_tmp = np.linalg.norm(sum_alpha_2) sparsity_score = sum_alpha_2 / norm_tmp anomaly_weight = norm_frequency anomaly_weight[frequency > 0] = sparsity_score[ frequency > 0] / frequency[frequency > 0] # sparsity_score = sparsity_score * norm_frequency sparsity_sort_index = np.argsort(-sparsity_score) sparsity_sort_index = sparsity_sort_index.astype('int') frequency_sort_index = np.argsort(-norm_frequency) frequency_sort_index = frequency_sort_index.astype('int') tmp_class_dic_label = np.array(tmp[0]) class_order_data_index_tuple.append(tmp_class_dic_label) selected_dic_num = np.round(selected_dic_percent * cd_n) selected_dic_num = selected_dic_num.astype('int') bg_dic_ac_tuple.append( tmp_class_dic_label[sparsity_sort_index[0:selected_dic_num]]) bg_dic_fc_tuple.append( tmp_class_dic_label[frequency_sort_index[0:selected_dic_num]]) anomaly_weight_tuple.append(anomaly_weight) bg_dic_tuple.append( dictionary[:, sparsity_sort_index[0:selected_dic_num]]) # sio.savemat(result_path + "dic_{0}_frequency.mat".format(i), {'dic_frequency': frequency}) # sio.savemat(result_path + "dic_{0}_reflect.mat".format(i), {'dic_reflect': sum_alpha_2}) bg_dic = np.column_stack(bg_dic_tuple) bg_dic_ac_label = np.hstack(bg_dic_ac_tuple) bg_dic_fc_label = np.hstack(bg_dic_fc_tuple) anomaly_weight = np.hstack(anomaly_weight_tuple) class_order_data_index = np.hstack(class_order_data_index_tuple) norm_res = np.zeros((wm_n, win_size * win_size)) for i in range(wm_n): norm_res[i, :] = np.linalg.norm(resdiual_stack[:, :, i], axis=0) mean_norm_res = np.mean(norm_res, axis=1) * anomaly_weight.transpose() anomaly_level = mean_norm_res / np.linalg.norm(mean_norm_res) tg_sort_index = np.argsort(-anomaly_level) tg_dic = data2d[:, class_order_data_index[tg_sort_index[0:target_dic_num]]] print("successs!!") sio.savemat("bg_dic.mat", {'bg_dic': bg_dic}) sio.savemat("bg_dic_ac_label.mat", {'bg_dic_ac_label': bg_dic_ac_label}) sio.savemat("bg_dic_fc_label.mat", {'bg_dic_fc_label': bg_dic_fc_label}) sio.savemat("tg_dic.mat", {'tg_dic': tg_dic}) tg_dic_label = class_order_data_index[tg_sort_index[0:target_dic_num]] sio.savemat("tg_dic_label.mat", {'tg_dic_label': tg_dic_label}) return data2d, bg_dic, tg_dic, bg_dic_ac_label, tg_dic_label
def find_steady_states(dataframe, min_n_samples=2, stateThreshold=15, noise_level=70): """ Finds steady states given a DataFrame of power Parameters ---------- dataframe: pd.DataFrame with DateTimeIndex min_n_samples(int): number of samples to consider constituting a steady state. stateThreshold: maximum difference between highest and lowest value in steady state. noise_level: the level used to define significant appliances, transitions below this level will be ignored. See Hart 1985. p27. Returns ------- """ # Tells whether we have both real and reactive power or only real power num_measurements = len(dataframe.columns) estimatedSteadyPower = np.array([0] * num_measurements) lastSteadyPower = np.array([0] * num_measurements) previousMeasurement = np.array([0] * num_measurements) # These flags store state of power instantaneousChange = False # power changing this second ongoingChange = False # power change in progress over multiple seconds index_transitions = [] # Indices to use in returned Dataframe index_steadystates = [] transitions = [] # holds information on transitions steadyStates = [] # steadyStates to store in returned Dataframe N = 0 # N stores the number of samples in state time = dataframe.iloc[0].name # first state starts at beginning # Iterate over the rows performing algorithm print("Finding Edges, please wait ...", end="\n") sys.stdout.flush() for row in dataframe.itertuples(): # test if either active or reactive moved more than threshold # http://stackoverflow.com/questions/17418108/elegant-way-to-perform-tuple-arithmetic # http://stackoverflow.com/questions/13168943/expression-for-elements-greater-than-x-and-less-than-y-in-python-all-in-one-ret # Step 2: this does the threshold test and then we sum the boolean # array. thisMeasurement = row[1:3] # logging.debug('The current measurement is: %s' % (thisMeasurement,)) # logging.debug('The previous measurement is: %s' % # (previousMeasurement,)) stateChange = np.fabs(np.subtract(thisMeasurement, previousMeasurement)) # logging.debug('The State Change is: %s' % (stateChange,)) if np.sum(stateChange > stateThreshold): instantaneousChange = True else: instantaneousChange = False # Step 3: Identify if transition is just starting, if so, process it if (instantaneousChange and (not ongoingChange)): # Calculate transition size lastTransition = np.subtract(estimatedSteadyPower, lastSteadyPower) # logging.debug('The steady state transition is: %s' % # (lastTransition,)) # Sum Boolean array to verify if transition is above noise level if np.sum(np.fabs(lastTransition) > noise_level): # 3A, C: if so add the index of the transition start and the # power information # Avoid outputting first transition from zero index_transitions.append(time) # logging.debug('The current row time is: %s' % (time)) transitions.append(lastTransition) # I think we want this, though not specifically in Hart's algo notes # We don't want to append a steady state if it's less than min samples in length. # if N > min_n_samples: index_steadystates.append(time) # logging.debug('The ''time'' stored is: %s' % (time)) # last states steady power steadyStates.append(estimatedSteadyPower) # 3B lastSteadyPower = estimatedSteadyPower # 3C time = row[0] # Step 4: if a new steady state is starting, zero counter if instantaneousChange: N = 0 # Hart step 5: update our estimate for steady state's energy estimatedSteadyPower = np.divide( np.add(np.multiply(N, estimatedSteadyPower), thisMeasurement), (N + 1)) # logging.debug('The steady power estimate is: %s' % # (estimatedSteadyPower,)) # Step 6: increment counter N = N + 1 # Step 7 ongoingChange = instantaneousChange # Step 8 previousMeasurement = thisMeasurement print("Edge detection complete.") print("Creating transition frame ...") sys.stdout.flush() cols_transition = { 1: ['active transition'], 2: ['active transition', 'reactive transition'] } cols_steady = { 1: ['active average'], 2: ['active average', 'reactive average'] } if len(index_transitions) == 0: # No events return pd.DataFrame(), pd.DataFrame() else: transitions = pd.DataFrame(data=transitions, index=index_transitions, columns=cols_transition[num_measurements]) print("Transition frame created.") print("Creating states frame ...") sys.stdout.flush() steadyStates = pd.DataFrame(data=steadyStates, index=index_steadystates, columns=cols_steady[num_measurements]) print("States frame created.") print("Finished.") return steadyStates, transitions
def linear_regression(X, y, add_intercept=True, weights=None, coef_only=False, alpha=0.05, as_dataframe=True, remove_na=False, relimp=False): """(Multiple) Linear regression. Parameters ---------- X : array_like Predictor(s), of shape *(n_samples, n_features)* or *(n_samples)*. y : array_like Dependent variable, of shape *(n_samples)*. add_intercept : bool If False, assume that the data are already centered. If True, add a constant term to the model. In this case, the first value in the output dict is the intercept of the model. .. note:: It is generally recommanded to include a constant term (intercept) to the model to limit the bias and force the residual mean to equal zero. Note that intercept coefficient and p-values are however rarely meaningful. weights : array_like An optional vector of sample weights to be used in the fitting process, of shape *(n_samples)*. Missing or negative weights are not allowed. If not null, a weighted least squares is calculated. .. versionadded:: 0.3.5 coef_only : bool If True, return only the regression coefficients. alpha : float Alpha value used for the confidence intervals. :math:`\\text{CI} = [\\alpha / 2 ; 1 - \\alpha / 2]` as_dataframe : bool If True, returns a pandas DataFrame. If False, returns a dictionnary. remove_na : bool If True, apply a listwise deletion of missing values (i.e. the entire row is removed). Default is False, which will raise an error if missing values are present in either the predictor(s) or dependent variable. relimp : bool If True, returns the relative importance (= contribution) of predictors. This is irrelevant when the predictors are uncorrelated: the total :math:`R^2` of the model is simply the sum of each univariate regression :math:`R^2`-values. However, this does not apply when predictors are correlated. Instead, the total :math:`R^2` of the model is partitioned by averaging over all combinations of predictors, as done in the `relaimpo <https://cran.r-project.org/web/packages/relaimpo/relaimpo.pdf>`_ R package (``calc.relimp(type="lmg")``). .. warning:: The computation time roughly doubles for each additional predictor and therefore this can be extremely slow for models with more than 12-15 predictors. .. versionadded:: 0.3.0 Returns ------- stats : :py:class:`pandas.DataFrame` or dict Linear regression summary: * ``'names'``: name of variable(s) in the model (e.g. x1, x2...) * ``'coef'``: regression coefficients * ``'se'``: standard errors * ``'T'``: T-values * ``'pval'``: p-values * ``'r2'``: coefficient of determination (:math:`R^2`) * ``'adj_r2'``: adjusted :math:`R^2` * ``'CI[2.5%]'``: lower confidence intervals * ``'CI[97.5%]'``: upper confidence intervals * ``'relimp'``: relative contribution of each predictor to the final\ :math:`R^2` (only if ``relimp=True``). * ``'relimp_perc'``: percent relative contribution In addition, the output dataframe comes with hidden attributes such as the residuals, and degrees of freedom of the model and residuals, which can be accessed as follow, respectively: >>> lm = pg.linear_regression() # doctest: +SKIP >>> lm.residuals_, lm.df_model_, lm.df_resid_ # doctest: +SKIP Note that to follow scikit-learn convention, these hidden atributes end with an "_". When ``as_dataframe=False`` however, these attributes are no longer hidden and can be accessed as any other keys in the output dictionnary: >>> lm = pg.linear_regression() # doctest: +SKIP >>> lm['residuals'], lm['df_model'], lm['df_resid'] # doctest: +SKIP See also -------- logistic_regression, mediation_analysis, corr Notes ----- The :math:`\\beta` coefficients are estimated using an ordinary least squares (OLS) regression, as implemented in the :py:func:`scipy.linalg.lstsq` function. The OLS method minimizes the sum of squared residuals, and leads to a closed-form expression for the estimated :math:`\\beta`: .. math:: \\hat{\\beta} = (X^TX)^{-1} X^Ty It is generally recommanded to include a constant term (intercept) to the model to limit the bias and force the residual mean to equal zero. Note that intercept coefficient and p-values are however rarely meaningful. The standard error of the estimates is a measure of the accuracy of the prediction defined as: .. math:: \\sigma = \\sqrt{\\text{MSE} \\cdot (X^TX)^{-1}} where :math:`\\text{MSE}` is the mean squared error, .. math:: \\text{MSE} = \\frac{SS_{\\text{resid}}}{n - p - 1} = \\frac{\\sum{(\\text{true} - \\text{pred})^2}}{n - p - 1} :math:`p` is the total number of predictor variables in the model (excluding the intercept) and :math:`n` is the sample size. Using the :math:`\\beta` coefficients and the standard errors, the T-values can be obtained: .. math:: T = \\frac{\\beta}{\\sigma} and the p-values approximated using a T-distribution with :math:`n - p - 1` degrees of freedom. The coefficient of determination (:math:`R^2`) is defined as: .. math:: R^2 = 1 - (\\frac{SS_{\\text{resid}}}{SS_{\\text{total}}}) The adjusted :math:`R^2` is defined as: .. math:: \\overline{R}^2 = 1 - (1 - R^2) \\frac{n - 1}{n - p - 1} The relative importance (``relimp``) column is a partitioning of the total :math:`R^2` of the model into individual :math:`R^2` contribution. This is calculated by taking the average over average contributions in models of different sizes. For more details, please refer to `Groemping et al. 2006 <http://dx.doi.org/10.18637/jss.v017.i01>`_ and the R package `relaimpo <https://cran.r-project.org/web/packages/relaimpo/relaimpo.pdf>`_. Note that Pingouin will automatically remove any duplicate columns from :math:`X`, as well as any column with only one unique value (constant), excluding the intercept. Results have been compared against sklearn, R, statsmodels and JASP. Examples -------- 1. Simple linear regression >>> import numpy as np >>> import pingouin as pg >>> np.random.seed(123) >>> mean, cov, n = [4, 6], [[1, 0.5], [0.5, 1]], 30 >>> x, y = np.random.multivariate_normal(mean, cov, n).T >>> lm = pg.linear_regression(x, y) >>> lm.round(2) names coef se T pval r2 adj_r2 CI[2.5%] CI[97.5%] 0 Intercept 4.40 0.54 8.16 0.00 0.24 0.21 3.29 5.50 1 x1 0.39 0.13 2.99 0.01 0.24 0.21 0.12 0.67 2. Multiple linear regression >>> np.random.seed(42) >>> z = np.random.normal(size=n) >>> X = np.column_stack((x, z)) >>> lm = pg.linear_regression(X, y) >>> print(lm['coef'].to_numpy()) [4.54123324 0.36628301 0.17709451] 3. Get the residuals >>> np.round(lm.residuals_, 2) array([ 1.18, -1.17, 1.32, 0.76, -1.25, 0.34, -1.54, -0.2 , 0.36, -0.39, 0.69, 1.39, 0.2 , -1.14, -0.21, -1.68, 0.67, -0.69, 0.62, 0.92, -1. , 0.64, -0.21, -0.78, 1.08, -0.03, -1.3 , 0.64, 0.81, -0.04]) 4. Using a Pandas DataFrame >>> import pandas as pd >>> df = pd.DataFrame({'x': x, 'y': y, 'z': z}) >>> lm = pg.linear_regression(df[['x', 'z']], df['y']) >>> print(lm['coef'].to_numpy()) [4.54123324 0.36628301 0.17709451] 5. No intercept and return coef only >>> pg.linear_regression(X, y, add_intercept=False, coef_only=True) array([ 1.40935593, -0.2916508 ]) 6. Return a dictionnary instead of a DataFrame >>> lm_dict = linear_regression(X, y, as_dataframe=False) 7. Remove missing values >>> X[4, 1] = np.nan >>> y[7] = np.nan >>> pg.linear_regression(X, y, remove_na=True, coef_only=True) array([4.64069731, 0.35455398, 0.1888135 ]) 8. Get the relative importance of predictors >>> lm = pg.linear_regression(X, y, remove_na=True, relimp=True) >>> lm[['names', 'relimp', 'relimp_perc']] names relimp relimp_perc 0 Intercept NaN NaN 1 x1 0.217265 82.202201 2 x2 0.047041 17.797799 The ``relimp`` column is a partitioning of the total :math:`R^2` of the model into individual contribution. Therefore, it sums to the :math:`R^2` of the full model. The ``relimp_perc`` is normalized to sum to 100%. See `Groemping 2006 <https://www.jstatsoft.org/article/view/v017i01>`_ for more details. >>> lm[['relimp', 'relimp_perc']].sum() relimp 0.264305 relimp_perc 100.000000 dtype: float64 9. Weighted linear regression >>> X = [1, 2, 3, 4, 5, 6] >>> y = [10, 22, 11, 13, 13, 16] >>> w = [1, 0.1, 1, 1, 0.5, 1] # Array of weights. Must be >= 0. >>> lm = pg.linear_regression(X, y, weights=w) >>> lm.round(2) names coef se T pval r2 adj_r2 CI[2.5%] CI[97.5%] 0 Intercept 9.00 2.03 4.42 0.01 0.51 0.39 3.35 14.64 1 x1 1.04 0.50 2.06 0.11 0.51 0.39 -0.36 2.44 """ # Extract names if X is a Dataframe or Series if isinstance(X, pd.DataFrame): names = X.keys().tolist() elif isinstance(X, pd.Series): names = [X.name] else: names = [] # Convert input to numpy array X = np.asarray(X) y = np.asarray(y) assert y.ndim == 1, 'y must be one-dimensional.' assert 0 < alpha < 1 if X.ndim == 1: # Convert to (n_samples, n_features) shape X = X[..., np.newaxis] # Check for NaN / Inf if remove_na: X, y = rm_na(X, y[..., np.newaxis], paired=True, axis='rows') y = np.squeeze(y) y_gd = np.isfinite(y).all() X_gd = np.isfinite(X).all() assert y_gd, ("Target (y) contains NaN or Inf. Please remove them " "manually or use remove_na=True.") assert X_gd, ("Predictors (X) contain NaN or Inf. Please remove them " "manually or use remove_na=True.") # Check that X and y have same length assert y.shape[0] == X.shape[0], 'X and y must have same number of samples' if not names: names = ['x' + str(i + 1) for i in range(X.shape[1])] if add_intercept: # Add intercept X = np.column_stack((np.ones(X.shape[0]), X)) names.insert(0, "Intercept") # FINAL CHECKS BEFORE RUNNING LEAST SQUARES REGRESSION # 1. Let's remove column(s) with only zero, otherwise the regression fails n_nonzero = np.count_nonzero(X, axis=0) idx_zero = np.flatnonzero(n_nonzero == 0) # Find columns that are only 0 if len(idx_zero): X = np.delete(X, idx_zero, 1) names = np.delete(names, idx_zero) # 2. We also want to make sure that there is no more than one constant # column (= intercept), otherwise the regression fails # This is equivalent, but much faster, to pd.DataFrame(X).nunique() idx_unique = np.where(np.all(X == X[0, :], axis=0))[0] if len(idx_unique) > 1: # We remove all but the first "Intercept" column. X = np.delete(X, idx_unique[1:], 1) names = np.delete(names, idx_unique[1:]) # Is there a constant in our predictor matrix? Useful for dof and R^2. constant = 1 if len(idx_unique) > 0 else 0 # 3. Finally, we want to remove duplicate columns if X.shape[1] > 1: idx_duplicate = [] for pair in itertools.combinations(range(X.shape[1]), 2): if np.array_equal(X[:, pair[0]], X[:, pair[1]]): idx_duplicate.append(pair[1]) if len(idx_duplicate): X = np.delete(X, idx_duplicate, 1) names = np.delete(names, idx_duplicate) # 4. Check that we have enough samples / features n, p = X.shape[0], X.shape[1] assert n >= 3, 'At least three valid samples are required in X.' assert p >= 1, 'X must have at least one valid column.' # 5. Handle weights if weights is not None: if relimp: raise ValueError("relimp = True is not supported when using " "weights.") w = np.asarray(weights) assert w.ndim == 1, 'weights must be a 1D array.' assert w.size == n, 'weights must be of shape n_samples.' assert not np.isnan(w).any(), 'Missing weights are not accepted.' assert not (w < 0).any(), 'Negative weights are not accepted.' # Do not count weights == 0 in dof # This gives similar results as R lm() but different from statsmodels n = np.count_nonzero(w) # Rescale (whitening) wts = np.diag(np.sqrt(w)) Xw = wts @ X yw = wts @ y else: # Set all weights to one, [1, 1, 1, ...] w = np.ones(n) Xw = X yw = y # FIT (WEIGHTED) LEAST SQUARES REGRESSION USING SCIPY.LINALG.LSTST coef, ss_res, rank, _ = lstsq(Xw, yw) if coef_only: return coef # Degrees of freedom df_model = rank - constant df_resid = n - p # Calculate predicted values and (weighted) residuals pred = Xw @ coef resid = yw - pred # ss_res = (resid ** 2).sum() # Calculate total (weighted) sums of squares and R^2 ss_tot = yw @ yw ss_wtot = np.sum(w * (y - np.average(y, weights=w))**2) if constant: r2 = 1 - ss_res / ss_wtot else: r2 = 1 - ss_res / ss_tot adj_r2 = 1 - (1 - r2) * (n - constant) / df_resid # Compute mean squared error, variance and SE mse = ss_res / df_resid beta_var = mse * (np.linalg.pinv(Xw.T @ Xw).diagonal()) beta_se = np.sqrt(beta_var) # Compute T and p-values T = coef / beta_se pval = 2 * t.sf(np.fabs(T), df_resid) # Compute confidence intervals crit = t.ppf(1 - alpha / 2, df_resid) marg_error = crit * beta_se ll = coef - marg_error ul = coef + marg_error # Rename CI ll_name = 'CI[%.1f%%]' % (100 * alpha / 2) ul_name = 'CI[%.1f%%]' % (100 * (1 - alpha / 2)) # Create dict stats = {'names': names, 'coef': coef, 'se': beta_se, 'T': T, 'pval': pval, 'r2': r2, 'adj_r2': adj_r2, ll_name: ll, ul_name: ul} # Relative importance if relimp: data = pd.concat([pd.DataFrame(y, columns=['y']), pd.DataFrame(X, columns=names)], sort=False, axis=1) if 'Intercept' in names: # Intercept is the first column reli = _relimp(data.drop(columns=['Intercept']).cov()) reli['names'] = ['Intercept'] + reli['names'] reli['relimp'] = np.insert(reli['relimp'], 0, np.nan) reli['relimp_perc'] = np.insert(reli['relimp_perc'], 0, np.nan) else: reli = _relimp(data.cov()) stats.update(reli) if as_dataframe: stats = pd.DataFrame(stats) stats.df_model_ = df_model stats.df_resid_ = df_resid stats.residuals_ = 0 # Trick to avoid Pandas warning stats.residuals_ = resid # Residuals is a hidden attribute else: stats['df_model'] = df_model stats['df_resid'] = df_resid stats['residuals'] = resid return stats
def fidnet_doRecon2D(model_weights, file, ss_file, max_points, outfile, f1180='y', shift='n'): if f1180.lower() in ['y', 'n']: if f1180.lower() == 'y': f1180 = True else: f1180 = False if shift.lower() in ['y', 'n']: if shift.lower() == 'y': shift = True else: shift = False dic, data = ng.pipe.read(file) model = build_model() model.load_weights(model_weights) ss = load_ss(ss_file, max_points) ind_points = data.shape[0] # sampled points in indirect dim dir_points = data.shape[1] # sampled points in direct dim if ind_points > 512: print('the input spectrum contains too many sampled points') print('the network can have a maximum of 256 complex points in the') print('reconstructed spectra. Please reduce the size of the input') print('aborting now...') sys.exit() if ss.shape[0] == ind_points // 2: print( 'number of recorded points in indirect dimension matches sampling schedule' ) print('proceeding with reconstruction...') else: print( 'there is a mis-match between the sampling schedule and number of recorded points' ) print( 'in the indirect dimension. Please check the sampling schedule or your input spectrum' ) print('may need to be transposed') print('aborting now...') sys.exit() if max_points > 256: print( 'the maximum size of the final spectrum is 256 complex points in') print( 'the indirect dimension. The output will be truncated at this point' ) max_points = 256 data = expand_data(data, ss, max_points, dir_points) data = tf.convert_to_tensor(data) dl_dic = make_dl_dic(dic, max_points) shape = tf.shape(data).numpy() max_val = tf.reduce_max(data) data = data / max_val Hpoints = shape[1] Npoints = shape[0] padding_2 = [[0, 512 - tf.shape(data)[0]], [0, 0]] data_samp = tf.pad(data, padding_2, 'Constant', constant_values=0.0) data_samp = tf.transpose(data_samp) padding_recon = [[3, 3], [0, 0]] data_samp = tf.pad(data_samp, padding_recon, 'Constant', constant_values=0.0) scale = np.array( [np.max(np.fabs(data_samp[i:i + 4, :])) for i in range((Hpoints + 3))]) sampy = np.zeros((scale.shape[0], 4, tf.shape(data_samp)[1])) for i in range(scale.shape[0]): sampy[i, :, :] = data_samp[i:i + 4, :] samp_av = tf.convert_to_tensor(sampy) samp_av = tf.transpose(samp_av, perm=[1, 2, 0]) samp_av = samp_av / scale samp_av = tf.transpose(samp_av, perm=[2, 1, 0]) samp_av = tf.expand_dims(samp_av, axis=3) data = tf.expand_dims(data, axis=0) res = model.predict(samp_av) res = tf.convert_to_tensor(res[0]) res = rescale_dat(res, scale) res_keep = copy.deepcopy(res) res = get_average_results(res, Hpoints) res = res[:, :Npoints, :, 0] res_ft = ft_second(res, npoints1=Hpoints, npoints2=Npoints, f1180=f1180, shift=shift) data_ft = ft_second(data, npoints1=Hpoints, npoints2=Npoints, f1180=f1180, shift=shift) data_ft = data_ft / tf.reduce_max(data_ft) res_ft = res_ft / tf.reduce_max(res_ft) ng.pipe.write(outfile, dl_dic, res.numpy()[0], overwrite=True) ax1 = plt.subplot(2, 2, 1) ax2 = plt.subplot(2, 2, 3) ax3 = plt.subplot(2, 2, 2) ax4 = plt.subplot(2, 2, 4) plot_contour(ax1, data) plot_contour(ax2, data_ft, invert=True) plot_contour(ax3, res) plot_contour(ax4, res_ft, invert=True) plt.show() get_ind_spectra(res_keep, res_ft, Hpoints, Npoints, dl_dic, f1180=f1180, shift=shift)
plt.ylabel('HSE ' r'$T_{m}$') axHistx = plt.axes(rect_histx) axHisty = plt.axes(rect_histy) # no labels axHistx.xaxis.set_major_formatter(nullfmt) axHisty.yaxis.set_major_formatter(nullfmt) # the scatter plot: axScatter.scatter(x, y, s=6, facecolors='none', edgecolors='black', alpha=0.1) # now determine nice limits by hand: binwidth = 5 #binwidth = 0.25 xymax = np.max([np.max(np.fabs(x)), np.max(np.fabs(y))]) lim = (int(xymax / binwidth) + 1) * binwidth axScatter.set_xlim((-lim, lim)) axScatter.set_ylim((-lim, lim)) #axScatter.set_xlim((min_Tm-10, max_Tm+10)) #axScatter.set_ylim((min_Tm-10, max_Tm+10)) bins = np.arange(-lim, lim + binwidth, binwidth) #print bins # histogram axHistx.hist(x, bins=bins, color='grey') axHisty.hist(y, bins=bins, orientation='horizontal', color='grey')
def logistic_regression(X, y, coef_only=False, alpha=0.05, as_dataframe=True, remove_na=False, **kwargs): """(Multiple) Binary logistic regression. Parameters ---------- X : np.array or list Predictor(s). Shape = (n_samples, n_features) or (n_samples,). y : np.array or list Dependent variable. Shape = (n_samples). ``y`` must be binary, i.e. only contains 0 or 1. Multinomial logistic regression is not supported. coef_only : bool If True, return only the regression coefficients. alpha : float Alpha value used for the confidence intervals. :math:`\\text{CI} = [\\alpha / 2 ; 1 - \\alpha / 2]` as_dataframe : bool If True, returns a pandas DataFrame. If False, returns a dictionnary. remove_na : bool If True, apply a listwise deletion of missing values (i.e. the entire row is removed). Default is False, which will raise an error if missing values are present in either the predictor(s) or dependent variable. **kwargs : optional Optional arguments passed to :py:class:`sklearn.linear_model.LogisticRegression` (see Notes). Returns ------- stats : :py:class:`pandas.DataFrame` or dict Logistic regression summary: * ``'names'``: name of variable(s) in the model (e.g. x1, x2...) * ``'coef'``: regression coefficients (log-odds) * ``'se'``: standard error * ``'z'``: z-scores * ``'pval'``: two-tailed p-values * ``'CI[2.5%]'``: lower confidence interval * ``'CI[97.5%]'``: upper confidence interval See also -------- linear_regression Notes ----- This is a wrapper around the :py:class:`sklearn.linear_model.LogisticRegression` class. Importantly, Pingouin automatically disables the L2 regularization applied by scikit-learn. This can be modified by changing the ``penalty`` argument. The logistic regression assumes that the log-odds (the logarithm of the odds) for the value labeled "1" in the response variable is a linear combination of the predictor variables. The log-odds are given by the `logit <https://en.wikipedia.org/wiki/Logit>`_ function, which map a probability :math:`p` of the response variable being "1" from :math:`[0, 1)` to :math:`(-\\infty, +\\infty)`. .. math:: \\text{logit}(p) = \\ln \\frac{p}{1 - p} = \\beta_0 + \\beta X The odds of the response variable being "1" can be obtained by exponentiating the log-odds: .. math:: \\frac{p}{1 - p} = e^{\\beta_0 + \\beta X} and the probability of the response variable being "1" is given by: .. math:: p = \\frac{1}{1 + e^{-(\\beta_0 + \\beta X})} Note that the above function that converts log-odds to probability is called the `logistic function <https://en.wikipedia.org/wiki/Logistic_function>`_. The first coefficient is always the constant term (intercept) of the model. Scikit-learn will automatically add the intercept to your predictor(s) matrix, therefore, :math:`X` should not include a constant term. Pingouin will remove any constant term (e.g column with only one unique value), or duplicate columns from :math:`X`. The calculation of the p-values and confidence interval is adapted from a code found at https://gist.github.com/rspeare/77061e6e317896be29c6de9a85db301d Results have been compared against statsmodels, R, and JASP. Examples -------- 1. Simple binary logistic regression >>> import numpy as np >>> from pingouin import logistic_regression >>> np.random.seed(123) >>> x = np.random.normal(size=30) >>> y = np.random.randint(0, 2, size=30) >>> lom = logistic_regression(x, y) >>> lom.round(2) names coef se z pval CI[2.5%] CI[97.5%] 0 Intercept -0.27 0.37 -0.74 0.46 -1.00 0.45 1 x1 0.07 0.32 0.21 0.84 -0.55 0.68 2. Multiple binary logistic regression >>> np.random.seed(42) >>> z = np.random.normal(size=30) >>> X = np.column_stack((x, z)) >>> lom = logistic_regression(X, y) >>> print(lom['coef'].to_numpy()) [-0.36736745 -0.04374684 -0.47829392] 3. Using a Pandas DataFrame >>> import pandas as pd >>> df = pd.DataFrame({'x': x, 'y': y, 'z': z}) >>> lom = logistic_regression(df[['x', 'z']], df['y']) >>> print(lom['coef'].to_numpy()) [-0.36736745 -0.04374684 -0.47829392] 4. Return only the coefficients >>> logistic_regression(X, y, coef_only=True) array([-0.36736745, -0.04374684, -0.47829392]) 5. Passing custom parameters to sklearn >>> lom = logistic_regression(X, y, solver='sag', max_iter=10000, ... random_state=42) >>> print(lom['coef'].to_numpy()) [-0.36751796 -0.04367056 -0.47841908] **How to interpret the log-odds coefficients?** We'll use the `Wikipedia example <https://en.wikipedia.org/wiki/Logistic_regression#Probability_of_passing_an_exam_versus_hours_of_study>`_ of the probability of passing an exam versus the hours of study: *A group of 20 students spends between 0 and 6 hours studying for an exam. How does the number of hours spent studying affect the probability of the student passing the exam?* >>> # First, let's create the dataframe >>> Hours = [0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50, ... 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50] >>> Pass = [0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1] >>> df = pd.DataFrame({'HoursStudy': Hours, 'PassExam': Pass}) >>> # And then run the logistic regression >>> lr = logistic_regression(df['HoursStudy'], df['PassExam']).round(3) >>> lr names coef se z pval CI[2.5%] CI[97.5%] 0 Intercept -4.078 1.761 -2.316 0.021 -7.529 -0.626 1 HoursStudy 1.505 0.629 2.393 0.017 0.272 2.737 The ``Intercept`` coefficient (-4.078) is the log-odds of ``PassExam=1`` when ``HoursStudy=0``. The odds ratio can be obtained by exponentiating the log-odds: >>> np.exp(-4.078) 0.016941314421496552 i.e. :math:`0.017:1`. Conversely the odds of failing the exam are :math:`(1/0.017) \\approx 59:1`. The probability can then be obtained with the following equation .. math:: p = \\frac{1}{1 + e^{-(-4.078 + 0 * 1.505)}} >>> 1 / (1 + np.exp(-(-4.078))) 0.016659087580814722 The ``HoursStudy`` coefficient (1.505) means that for each additional hour of study, the log-odds of passing the exam increase by 1.505, and the odds are multipled by :math:`e^{1.505} \\approx 4.50`. For example, a student who studies 2 hours has a probability of passing the exam of 25%: >>> 1 / (1 + np.exp(-(-4.078 + 2 * 1.505))) 0.2557836148964987 The table below shows the probability of passing the exam for several values of ``HoursStudy``: +----------------+----------+----------------+------------------+ | Hours of Study | Log-odds | Odds | Probability | +================+==========+================+==================+ | 0 | −4.08 | 0.017 ≈ 1:59 | 0.017 | +----------------+----------+----------------+------------------+ | 1 | −2.57 | 0.076 ≈ 1:13 | 0.07 | +----------------+----------+----------------+------------------+ | 2 | −1.07 | 0.34 ≈ 1:3 | 0.26 | +----------------+----------+----------------+------------------+ | 3 | 0.44 | 1.55 | 0.61 | +----------------+----------+----------------+------------------+ | 4 | 1.94 | 6.96 | 0.87 | +----------------+----------+----------------+------------------+ | 5 | 3.45 | 31.4 | 0.97 | +----------------+----------+----------------+------------------+ | 6 | 4.96 | 141.4 | 0.99 | +----------------+----------+----------------+------------------+ """ # Check that sklearn is installed from pingouin.utils import _is_sklearn_installed _is_sklearn_installed(raise_error=True) from sklearn.linear_model import LogisticRegression # Extract names if X is a Dataframe or Series if isinstance(X, pd.DataFrame): names = X.keys().tolist() elif isinstance(X, pd.Series): names = [X.name] else: names = [] # Convert to numpy array X = np.asarray(X) y = np.asarray(y) assert y.ndim == 1, 'y must be one-dimensional.' assert 0 < alpha < 1, 'alpha must be between 0 and 1.' # Add axis if only one-dimensional array if X.ndim == 1: X = X[..., np.newaxis] # Check for NaN / Inf if remove_na: X, y = rm_na(X, y[..., np.newaxis], paired=True, axis='rows') y = np.squeeze(y) y_gd = np.isfinite(y).all() X_gd = np.isfinite(X).all() assert y_gd, ("Target (y) contains NaN or Inf. Please remove them " "manually or use remove_na=True.") assert X_gd, ("Predictors (X) contain NaN or Inf. Please remove them " "manually or use remove_na=True.") # Check that X and y have same length assert y.shape[0] == X.shape[0], 'X and y must have same number of samples' # Check that y is binary if np.unique(y).size != 2: raise ValueError('Dependent variable must be binary.') if not names: names = ['x' + str(i + 1) for i in range(X.shape[1])] # We also want to make sure that there is no column # with only one unique value, otherwise the regression fails # This is equivalent, but much faster, to pd.DataFrame(X).nunique() idx_unique = np.where(np.all(X == X[0, :], axis=0))[0] if len(idx_unique): X = np.delete(X, idx_unique, 1) names = np.delete(names, idx_unique).tolist() # Finally, we want to remove duplicate columns if X.shape[1] > 1: idx_duplicate = [] for pair in itertools.combinations(range(X.shape[1]), 2): if np.array_equal(X[:, pair[0]], X[:, pair[1]]): idx_duplicate.append(pair[1]) if len(idx_duplicate): X = np.delete(X, idx_duplicate, 1) names = np.delete(names, idx_duplicate).tolist() # Initialize and fit if 'solver' not in kwargs: kwargs['solver'] = 'lbfgs' if 'multi_class' not in kwargs: kwargs['multi_class'] = 'auto' if 'penalty' not in kwargs: kwargs['penalty'] = 'none' lom = LogisticRegression(**kwargs) lom.fit(X, y) coef = np.append(lom.intercept_, lom.coef_) if coef_only: return coef # Design matrix -- add intercept names.insert(0, "Intercept") X_design = np.column_stack((np.ones(X.shape[0]), X)) n, p = X_design.shape # Fisher Information Matrix denom = (2 * (1 + np.cosh(lom.decision_function(X)))) denom = np.tile(denom, (p, 1)).T fim = (X_design / denom).T @ X_design crao = np.linalg.pinv(fim) # Standard error and Z-scores se = np.sqrt(np.diag(crao)) z_scores = coef / se # Two-tailed p-values pval = 2 * norm.sf(np.fabs(z_scores)) # Confidence intervals crit = norm.ppf(1 - alpha / 2) ll = coef - crit * se ul = coef + crit * se # Rename CI ll_name = 'CI[%.1f%%]' % (100 * alpha / 2) ul_name = 'CI[%.1f%%]' % (100 * (1 - alpha / 2)) # Create dict stats = {'names': names, 'coef': coef, 'se': se, 'z': z_scores, 'pval': pval, ll_name: ll, ul_name: ul} if as_dataframe: return pd.DataFrame(stats) else: return stats
class RandomLayer(BaseRandomLayer): """RandomLayer is a transformer that creates a feature mapping of the inputs that corresponds to a layer of hidden units with randomly generated components. The transformed values are a specified function of input activations that are a weighted combination of dot product (multilayer perceptron) and distance (rbf) activations: input_activation = alpha * mlp_activation + (1-alpha) * rbf_activation mlp_activation(x) = dot(x, weights) + bias rbf_activation(x) = rbf_width * ||x - center||/radius alpha and rbf_width are specified by the user weights and biases are taken from normal distribution of mean 0 and sd of 1 centers are taken uniformly from the bounding hyperrectangle of the inputs, and radii are max(||x-c||)/sqrt(n_centers*2) The input activation is transformed by a transfer function that defaults to numpy.tanh if not specified, but can be any callable that returns an array of the same shape as its argument (the input activation array, of shape [n_samples, n_hidden]). Functions provided are 'sine', 'tanh', 'tribas', 'inv_tribas', 'sigmoid', 'hardlim', 'softlim', 'gaussian', 'multiquadric', or 'inv_multiquadric'. Parameters ---------- `n_hidden` : int, optional (default=20) Number of units to generate `alpha` : float, optional (default=0.5) Mixing coefficient for distance and dot product input activations: activation = alpha*mlp_activation + (1-alpha)*rbf_width*rbf_activation `rbf_width` : float, optional (default=1.0) multiplier on rbf_activation `user_components`: dictionary, optional (default=None) dictionary containing values for components that woud otherwise be randomly generated. Valid key/value pairs are as follows: 'radii' : array-like of shape [n_hidden] 'centers': array-like of shape [n_hidden, n_features] 'biases' : array-like of shape [n_hidden] 'weights': array-like of shape [n_features, n_hidden] `activation_func` : {callable, string} optional (default='tanh') Function used to transform input activation It must be one of 'tanh', 'sine', 'tribas', 'inv_tribas', 'sigmoid', 'hardlim', 'softlim', 'gaussian', 'multiquadric', 'inv_multiquadric' or a callable. If None is given, 'tanh' will be used. If a callable is given, it will be used to compute the activations. `activation_args` : dictionary, optional (default=None) Supplies keyword arguments for a callable activation_func `random_state` : int, RandomState instance or None (default=None) Control the pseudo random number generator used to generate the hidden unit weights at fit time. Attributes ---------- `input_activations_` : numpy array of shape [n_samples, n_hidden] Array containing dot(x, hidden_weights) + bias for all samples `components_` : dictionary containing two keys: `bias_weights_` : numpy array of shape [n_hidden] `hidden_weights_` : numpy array of shape [n_features, n_hidden] See Also -------- """ # triangular activation function _tribas = (lambda x: np.clip(1.0 - np.fabs(x), 0.0, 1.0)) # inverse triangular activation function _inv_tribas = (lambda x: np.clip(np.fabs(x), 0.0, 1.0)) # sigmoid activation function _sigmoid = (lambda x: 1.0 / (1.0 + np.exp(-x))) # hard limit activation function _hardlim = (lambda x: np.array(x > 0.0, dtype=float)) _softlim = (lambda x: np.clip(x, 0.0, 1.0)) # gaussian RBF _gaussian = (lambda x: np.exp(-pow(x, 2.0))) # multiquadric RBF _multiquadric = (lambda x: np.sqrt(1.0 + pow(x, 2.0))) # inverse multiquadric RBF _inv_multiquadric = (lambda x: 1.0 / (np.sqrt(1.0 + pow(x, 2.0)))) # internal activation function table _internal_activation_funcs = { 'sine': np.sin, 'tanh': np.tanh, 'tribas': _tribas, 'inv_tribas': _inv_tribas, 'sigmoid': _sigmoid, 'softlim': _softlim, 'hardlim': _hardlim, 'gaussian': _gaussian, 'multiquadric': _multiquadric, 'inv_multiquadric': _inv_multiquadric, } def __init__(self, n_hidden=20, alpha=0.5, random_state=None, activation_func='tanh', activation_args=None, user_components=None, rbf_width=1.0): super(RandomLayer, self).__init__(n_hidden=n_hidden, random_state=random_state, activation_func=activation_func, activation_args=activation_args) if (isinstance(self.activation_func, str)): func_names = self._internal_activation_funcs.keys() if (self.activation_func not in func_names): msg = "unknown activation function '%s'" % self.activation_func raise ValueError(msg) self.alpha = alpha self.rbf_width = rbf_width self.user_components = user_components self._use_mlp_input = (self.alpha != 0.0) self._use_rbf_input = (self.alpha != 1.0) def _get_user_components(self, key): """Look for given user component""" try: return self.user_components[key] except (TypeError, KeyError): return None def _compute_radii(self): """Generate RBF radii""" # use supplied radii if present radii = self._get_user_components('radii') # compute radii if (radii is None): centers = self.components_['centers'] n_centers = centers.shape[0] max_dist = np.max(pairwise_distances(centers)) radii = np.ones(n_centers) * max_dist / sqrt(2.0 * n_centers) self.components_['radii'] = radii def _compute_centers(self, X, sparse, rs): """Generate RBF centers""" # use supplied centers if present centers = self._get_user_components('centers') # use points taken uniformly from the bounding # hyperrectangle if (centers is None): n_features = X.shape[1] if (sparse): fxr = range(n_features) cols = [X.getcol(i) for i in fxr] min_dtype = X.dtype.type(1.0e10) sp_min = lambda col: np.minimum(min_dtype, np.min(col.data)) min_Xs = np.array(map(sp_min, cols)) max_dtype = X.dtype.type(-1.0e10) sp_max = lambda col: np.maximum(max_dtype, np.max(col.data)) max_Xs = np.array(map(sp_max, cols)) else: min_Xs = X.min(axis=0) max_Xs = X.max(axis=0) spans = max_Xs - min_Xs ctrs_size = (self.n_hidden, n_features) centers = min_Xs + spans * rs.uniform(0.0, 1.0, ctrs_size) self.components_['centers'] = centers def _compute_biases(self, rs): """Generate MLP biases""" # use supplied biases if present biases = self._get_user_components('biases') if (biases is None): b_size = self.n_hidden biases = rs.normal(size=b_size) self.components_['biases'] = biases def _compute_weights(self, X, rs): """Generate MLP weights""" # use supplied weights if present weights = self._get_user_components('weights') if (weights is None): n_features = X.shape[1] hw_size = (n_features, self.n_hidden) weights = rs.normal(size=hw_size) self.components_['weights'] = weights def _generate_components(self, X): """Generate components of hidden layer given X""" rs = check_random_state(self.random_state) if (self._use_mlp_input): self._compute_biases(rs) self._compute_weights(X, rs) if (self._use_rbf_input): self._compute_centers(X, sp.issparse(X), rs) self._compute_radii() def _compute_input_activations(self, X): """Compute input activations given X""" n_samples = X.shape[0] mlp_acts = np.zeros((n_samples, self.n_hidden)) if (self._use_mlp_input): b = self.components_['biases'] w = self.components_['weights'] mlp_acts = self.alpha * (safe_sparse_dot(X, w) + b) rbf_acts = np.zeros((n_samples, self.n_hidden)) if (self._use_rbf_input): radii = self.components_['radii'] centers = self.components_['centers'] scale = self.rbf_width * (1.0 - self.alpha) rbf_acts = scale * cdist(X, centers) / radii self.input_activations_ = mlp_acts + rbf_acts
def get_field(self, x, y, z): """Calculates the magnetic field at point(s) x,y,z due to a 3D magnet The calculations are always performed in local coordinates with the centre of the magnet at origin and z magnetisation pointing along the local z' axis. The rotations and translations are performed first, and the internal field calculation functions are called. Args: x (ndarray): x co-ordinates y (ndarray): y co-ordinates z (ndarray): z co-ordinates Returns: tuple: Bx(ndarray), By(ndarray), Bz(ndarray) field vector """ from ..utils._routines3D import _tile_arrays, _apply_mask # If any rotation angle is set, transform the data if _np.any( _np.fabs( _np.array([ self.alpha_rad, self.beta_rad, self.gamma_rad, ])) > Magnet.tol): forward_rotation, reverse_rotation = self._generate_rotation_quaternions( ) # Generate 3xN array for quaternion rotation pos_vec = Quaternion._prepare_vector(x - self.center[0], y - self.center[1], z - self.center[2]) # Rotate points x_rot, y_rot, z_rot = forward_rotation * pos_vec # Calls internal child method to calculate the field B_local = self._get_field_internal(x_rot, y_rot, z_rot) mask = self._generate_mask(x_rot, y_rot, z_rot) B_local = _apply_mask(self, B_local, mask) # Rearrange the field vectors in a 3xN array for quaternion rotation Bvec = Quaternion._prepare_vector(B_local.x, B_local.y, B_local.z) # Rotate the local fields back into the global frame using quaternions Bx, By, Bz = reverse_rotation * Bvec # finally return the fields return Bx, By, Bz else: # Otherwise directly calculate the magnetic fields B = self._get_field_internal(x - self.center[0], y - self.center[1], z - self.center[2]) xloc, yloc, zloc = _tile_arrays(x - self.center[0], y - self.center[1], z - self.center[2]) mask = self._generate_mask(xloc, yloc, zloc) B = _apply_mask(self, B, mask) return B.x, B.y, B.z