def _sanitize_SchedCpuCapacity(self): """ Add more columns to cpu_capacity data frame if the energy model is available and the platform is big.LITTLE. """ if not self.hasEvents('cpu_capacity') \ or 'nrg_model' not in self.platform \ or not self.has_big_little: return df = self._dfg_trace_event('cpu_capacity') # Add column with LITTLE and big CPUs max capacities nrg_model = self.platform['nrg_model'] max_lcap = nrg_model['little']['cpu']['cap_max'] max_bcap = nrg_model['big']['cpu']['cap_max'] df['max_capacity'] = np.select( [df.cpu.isin(self.platform['clusters']['little'])], [max_lcap], max_bcap) # Add LITTLE and big CPUs "tipping point" threshold tip_lcap = 0.8 * max_lcap tip_bcap = 0.8 * max_bcap df['tip_capacity'] = np.select( [df.cpu.isin(self.platform['clusters']['little'])], [tip_lcap], tip_bcap)
def soft_hard_burst(playercard): ## cards have 1 if sum(np.select([playercard == 1],[playercard]))>0: criterion = sum(np.select([playercard == 1],[playercard])) - 1 + sum(np.select([playercard != 1],[playercard])) if criterion < 11: string="Soft" count =make_score(playercard) #count = playercard[0]+playercard[1]+10 string = string + str(count) if count < 22: return string else: return "burst" elif criterion >10: string = "Hard" count =make_score(playercard) string = string + str(count) if count < 22: return string else: return "burst" ## cards have no 1 elif sum(np.select([playercard == 1],[playercard]))==0: string="Hard" count = sum(playercard) string = string+str(count) if count < 22: return string else: return "burst"
def split_x(x, split_pos): # NOTE: do not support multiple sentence tensors # sequence input , non-sequence input, and no non-sequence input # sequence input: if type(x) is not list: x=[x] if len(x) == 1: # sec1, sec2, sec3,... # sent1, sent2, sent5 x01, x02 = tuple(np.split(x[0],[split_pos])) cond_list=[x02>=0,x02<0] offset = x02[0][0] choice_list=[x02-offset, x02 ] x02 = np.select(cond_list, choice_list) return ([x01],[x02]) # doc1 doc2 doc3 # sec1 sec2 ... # sec1, sec2, ... # sent1, sent2, ... x01, x02 = tuple(np.split(x[0], [split_pos])) offset = x02[0][0] x1, x2 = split_x(x[1:], offset) cond_list = [x02 >= 0, x02 < 0] choice_list = [x02 - offset, x02] x02 = np.select(cond_list, choice_list) return ([x01] + x1, [x02]+x2)
def _sanitize_SchedEnergyDiff(self): if not self.hasEvents('sched_energy_diff') \ or 'nrg_model' not in self.platform: return nrg_model = self.platform['nrg_model'] em_lcluster = nrg_model['little']['cluster'] em_bcluster = nrg_model['big']['cluster'] em_lcpu = nrg_model['little']['cpu'] em_bcpu = nrg_model['big']['cpu'] lcpus = len(self.platform['clusters']['little']) bcpus = len(self.platform['clusters']['big']) SCHED_LOAD_SCALE = 1024 power_max = em_lcpu['nrg_max'] * lcpus + em_bcpu['nrg_max'] * bcpus + \ em_lcluster['nrg_max'] + em_bcluster['nrg_max'] print "Maximum estimated system energy: {0:d}".format(power_max) df = self.df('sched_energy_diff') df['nrg_diff_pct'] = SCHED_LOAD_SCALE * df.nrg_diff / power_max # Tag columns by usage_delta ccol = df.usage_delta df['usage_delta_group'] = np.select( [ccol < 150, ccol < 400, ccol < 600], ['< 150', '< 400', '< 600'], '>= 600') # Tag columns by nrg_payoff ccol = df.nrg_payoff df['nrg_payoff_group'] = np.select( [ccol > 2e9, ccol > 0, ccol > -2e9], ['Optimal Accept', 'SchedTune Accept', 'SchedTune Reject'], 'Suboptimal Reject')
def bin_indexes(self, arr, edgemode=0) : indmin, indmax = self._set_limit_indexes(edgemode) if self._equalbins : factor = float(self._nbins)/(self._edges[-1]-self._edges[0]) nbins1 = self._nbins-1 nparr = (np.array(arr, dtype=self._vtype)-self._edges[0])*factor ind = np.array(np.floor(nparr), dtype=np.int32) return np.select((ind<0, ind>nbins1), (indmin, indmax), default=ind) else : conds = None if self._ascending : conds = np.array([arr<edge for edge in self.binedges()], dtype=np.bool) else : conds = np.array([arr>edge for edge in self.binedges()], dtype=np.bool) inds1d = range(-1, self._nbins) inds1d[0] = indmin # re-define index for underflow inds = np.array(len(arr)*inds1d, dtype=np.int32) inds.shape = (len(arr),self._nbins+1) inds = inds.transpose() #print 'indmin, indmax = ', indmin, indmax #print 'XXX conds:\n', conds #print 'XXX inds:\n', inds return np.select(conds, inds, default=indmax)
def pseudo_colr_amount(self): """ Calculate pseudo Cost-of-Living Refund amount. Note this is simply meant to illustrate a Python programming technique; this function does NOT calculate an exact Cost-of-Living Refund amount. See setting of parameters above in specify_pseudo_COLR_policy method. """ recs = self.__records # create MARS-specific policy parameter arrays mars_indicators = [recs.MARS == 1, recs.MARS == 2, recs.MARS == 3, recs.MARS == 4, recs.MARS == 5] colr_c = np.select(mars_indicators, self.colr_param['COLR_c']) colr_ps = np.select(mars_indicators, self.colr_param['COLR_ps']) colr_rt = self.colr_param['COLR_rt'] colr_prt = self.colr_param['COLR_prt'] # compute colr_amt amt_pre_phaseout = np.minimum(recs.e00200 * colr_rt, colr_c) phaseout = np.maximum((recs.c00100 - colr_ps) * colr_prt, 0.) colr_amt = np.maximum(amt_pre_phaseout - phaseout, 0.) setattr(recs, 'colr_amount', colr_amt) # reduce income and combined taxes because COLR is a refundable credit recs.iitax -= colr_amt recs.combined -= colr_amt # delete local arrays used only in this method del mars_indicators del colr_c del colr_ps del amt_pre_phaseout del phaseout del colr_amt
def zpeaki(source,order=1,fpeak=fhigh): ''' 寻找n阶高/低点 返回值为高点数据序列,以及该高点最大跨度的坐标(即计算该高/低点所需用到的最远的未来数据的坐标) order默认为1,小于1当作1 返回值中第一个是高/低点非0,其余为0的序列 sh 第二个是该高低点的最远未来数据的坐标序列 si 其中 sh[np.nonzero(sh)]为高点序列, si[np.nonzero(sh)]为坐标序列,sif.time[si[np.nonzero(sh)]]为坐标的影响时间序列 ''' tsx1 = fpeak(source) sx1 = np.select([tsx1!=0],[source],0) icovered = rollx(np.arange(len(source)),-1) if order <= 1: return sx1,np.select([tsx1],[icovered],0) icursx = np.nonzero(tsx1)[0] for i in xrange(1,order): #必然进入循环 sxx = source[icursx] tsxx = fpeak(sxx) icovered[icursx] = rollx(icovered[icursx],-1) #当前高/低点的计算范围,即之前顶点的范围左转一位(排除掉不是顶点的) icursx = icursx[np.nonzero(tsxx)[0]] osx = np.zeros_like(source) osx[icursx] = source[icursx] iz = np.zeros_like(source) iz[icursx] = icovered[icursx] #去掉icovered之中不必要的那些数字 return osx,iz
def update_particles(self, delta): # radial: posx + posy norm = numpy.sqrt(self.particle_pos[:, 0] ** 2 + self.particle_pos[:, 1] ** 2) # XXX prevent div by 0 norm = numpy.select([norm == 0], [0.0000001], default=norm) posx = self.particle_pos[:, 0] / norm posy = self.particle_pos[:, 1] / norm radial = numpy.array([posx, posy]) tangential = numpy.array([-posy, posx]) # update dir radial = numpy.swapaxes(radial, 0, 1) radial *= self.particle_rad tangential = numpy.swapaxes(tangential, 0, 1) tangential *= self.particle_tan self.particle_dir += (tangential + radial + self.particle_grav) * delta # update pos with updated dir self.particle_pos += self.particle_dir * delta # life self.particle_life -= delta # color self.particle_color += self.particle_delta_color * delta # if life < 0, set alpha in 0 self.particle_color[:, 3] = numpy.select([self.particle_life[:, 0] < 0], [0], default=self.particle_color[:, 3])
def follow_seller(stock,buy_signal,xstop=25,ret=50,**kwargs): ''' 如果价格小于最近5日高点5%,则卖出 xstop为根据买入价的止损 ret为从高点向下的回退值 ''' t = stock.transaction #从顶下落处理,前5天的收盘/开盘的高者和今天的开盘的高者 回落ret之后 #hhret = gmax(rollx(tmax(gmax(t[OPEN],t[CLOSE]),5),1),t[OPEN])* (1000-ret)/1000 hhret = gmax(rollx(tmax(t[HIGH],5),1),t[OPEN])* (1000-ret)/1000 #hhret = rollx(tmax(t[HIGH],5),1) * (1000-ret)/1000 sdl = t[LOW] < hhret #止损处理2.5% stop_price = extend2next(rollx(stock.buyprice,1) * (1000-xstop)/1000) stopl = t[LOW] < stop_price cut_price = gmin(gmax(hhret,stop_price),t[HIGH]) #首先,止损线和退回线高者先被触及,同时,穿越时可能跳低,所以找与t[HIGH]的低点 cut_signal = gor(sdl,stopl) cut_signal = select([t[VOLUME]>0],[cut_signal]) #默认为0,即未交易的日子卖出信号不能发出,否则会合并到下一交易日 bs = gand(buy_signal,cut_signal) rbs = rollx(bs) sell_signal = select([bs],[0],default=cut_signal) + rbs #如果当日冲销,则后推一日,但如果前一日也是当日,则信号被屏蔽 stock.sellprice = select([cut_signal],[cut_price],default=t[OPEN]) #止损和退回用cut_price, 当日卖出信号平移用开盘价,停牌平移用开盘价 return cut_signal
def getLargVal_man(*inA): inputlen = len(inA) if inputlen == 2: condlist = [ inA[0] > inA[1] ] choicelist = [ inA[0] ] result = np.select(condlist, choicelist, inA[1]) elif inputlen == 3: condlist = [ np.logical_and(inA[0]>inA[1],inA[0]>inA[2]), inA[1]>inA[2] ] choicelist = [ inA[0], inA[1] ] result = np.select(condlist, choicelist, inA[2]) elif inputlen == 4: condlist = [ np.logical_and(inA[0]>inA[1], np.logical_and(inA[0]>inA[2], inA[0]>inA[3])), np.logical_and(inA[1]>inA[2], inA[1]>inA[3]), inA[2]>inA[3] ] choicelist = [ inA[0], inA[1], inA[2] ] result = np.select(condlist, choicelist, inA[3]) else: print("Only up to 4 arrays supported") return result
def vec_dam_break(x, t, h0=1.0, h1=10.0): import math import numpy from anuga import g msg = "Argument x should be a numpy array" assert isinstance(x, numpy.ndarray), msg h2 = calc_h2(h0, h1) u2 = 2.0 * (math.sqrt(g * h1) - math.sqrt(g * h2)) try: s = u2 * h2 / (h2 - h0) except ZeroDivisionError: s = math.sqrt(g * h2) c1 = math.sqrt(g * h1) c2 = math.sqrt(g * h2) condlist = [x < -t * c1, x < t * (u2 - c2), x < s * t] hchoicelist = [h1, 1.0 / g * (2.0 / 3.0 * c1 - 1.0 / 3.0 * x / t) ** 2, h2] uchoicelist = [0.0, 2.0 / 3.0 * (c1 + x / t), u2] h = numpy.select(condlist, hchoicelist, default=h0) u = numpy.select(condlist, uchoicelist, default=0.0) return h, u
def update_particles(self, tick): mtick = tick/1000.0 norm = np.sqrt(self.particle_pos[:,0]**2 + self.particle_pos[:,1]**2) norm = np.select([norm==0], [0.0000001], default=norm) posx = self.particle_pos[:,0]/norm posy = self.particle_pos[:,1]/norm radial = np.array([posx, posy]) tangential = np.array([-posy, posx]) radial = np.swapaxes(radial, 0, 1) radial *= self.particle_rad tangential = np.swapaxes(tangential, 0, 1) tangential *= self.particle_tan self.particle_dir += (tangential + radial + self.particle_grav)*mtick self.particle_pos += self.particle_dir*mtick self.particle_life -= mtick if self.position_type == POSITION_FREE: tuple = np.array(self.origin) tmp = tuple - self.particle_start_pos self.particle_pos -= tmp self.particle_color += self.particle_delta_color*mtick self.particle_color[:,3] = np.select([self.particle_life[:,0] < 0], [0], default=self.particle_color[:,3])
def divideArraysSafely(num, den) : """Per evement divides numpy arrays result = num/den. Protected for 0 values. Arrays should have the same size.""" if num.shape != den.shape : print 'divideArraysSafely: non-equal array shapes for numerator and denumerator: ', num.shape, den.shape num_corr = np.select([den<1], [0], default=num) den_corr = np.select([den<1], [1], default=den) return num_corr/den_corr
def __getitem__(self, key): # If the key is a string, just get the subnode if isinstance(key, str): return self.__getattr__(key) # If the key is a vector, e.g. ['zone_1', 'zone_2', 'zone_1'] elif isinstance(key, np.ndarray): if not np.issubdtype(key.dtype, np.str_): # In case the key is not a string vector, stringify it if key.dtype == object and issubclass(type(key[0]), Enum): enum = type(key[0]) key = np.select([key == item for item in enum], [item.name for item in enum]) elif isinstance(key, EnumArray): enum = key.possible_values key = np.select([key == item.index for item in enum], [item.name for item in enum]) else: key = key.astype('str') names = list(self.dtype.names) # Get all the names of the subnodes, e.g. ['zone_1', 'zone_2'] default = np.full_like(self.vector[key[0]], np.nan) # In case of unexpected key, we will set the corresponding value to NaN. conditions = [key == name for name in names] values = [self.vector[name] for name in names] result = np.select(conditions, values, default) if contains_nan(result): unexpected_key = set(key).difference(self.vector.dtype.names).pop() raise ParameterNotFound('.'.join([self._name, unexpected_key]), self._instant_str) # If the result is not a leaf, wrap the result in a vectorial node. if np.issubdtype(result.dtype, np.record): return VectorialParameterNodeAtInstant(self._name, result.view(np.recarray), self._instant_str) return result
def loyer_retenu(): # loyer mensuel réel, multiplié par 2/3 pour les meublés L1 = round_(loyer * where(statut_occupation == 5, 2 / 3, 1)) zone_apl = simulation.calculate('zone_apl_famille', period) # Paramètres contenant les plafonds de loyer pour cette zone plafonds_by_zone = [[0] + [al.loyers_plafond[ 'zone' + str(zone) ][ 'L' + str(i) ] for zone in range(1, 4)] for i in range(1, 5)] L2_personne_seule = take(plafonds_by_zone[0], zone_apl) L2_couple = take(plafonds_by_zone[1], zone_apl) L2_famille = take(plafonds_by_zone[2], zone_apl) + (al_pac > 1) * (al_pac - 1) * take(plafonds_by_zone[3], zone_apl) L2 = select( [personne_seule * (al_pac == 0) + chambre, al_pac > 0], [L2_personne_seule, L2_famille], default = L2_couple ) # taux à appliquer sur le loyer plafond coeff_chambre_colloc = select( [chambre, coloc], [al.loyers_plafond.chambre, al.loyers_plafond.colocation], default = 1) L2 = round_(L2 * coeff_chambre_colloc, 2) # loyer retenu L = min_(L1, L2) return L
def supdown2(sopen,sclose,shigh,slow): ''' 计算每日的上升行程和下降行程 以距离开盘价距离近的方向为运行方向 则若最低近,运行轨迹为 开盘-->最低-->最高-->收盘 若最高近,运行轨迹为 开盘-->最高-->最低-->收盘 平开往低走 另,如果开盘大于昨日收盘,则上升段 + 开盘-昨收盘 小于昨日收盘,则下降段 + 昨收盘 - 开盘 ''' if len(sopen) == 0: return np.array([],int),np.array([],int) sc1 = rollx(sclose) sc1[0] = sopen[0] #前一日收盘价视同首日开盘价 u_hlc = shigh-sopen+sclose-slow u_lhc = shigh - slow d_hlc = shigh - slow d_lhc = sopen-slow+shigh-sclose ou = np.where(sopen > sc1) od = np.where(sopen < sc1) doc = sopen-sc1 u_hlc[ou] = u_hlc[ou] + doc[ou] u_lhc[ou] = u_lhc[ou] + doc[ou] d_hlc[od] = d_hlc[od] - doc[od] #doc[od]<0 d_lhc[od] = d_lhc[od] - doc[od] #doc[od]<0 is_up = shigh-sopen < sopen-slow #True为向上,False为向下 u = np.select([is_up],[u_hlc],default=u_lhc) d = np.select([is_up],[d_hlc],default=d_lhc) return u,d
def get_intens_for_stat_q_bins(sp, intens_map) : q_map_stat = sp.get_q_map_for_stat_bins() counts = sp.get_counts_for_stat_q_bins() # print 'counts = ', counts intens = bincount(q_map_stat, intens_map, sp.ana_stat_part_q+1) counts_prot = np.select([counts<=0.], [-1.], default=counts) intens_aver = np.select([counts_prot<=0.], [0.], default=intens/counts_prot) return intens_aver
def get_g2_for_dyna_bins_itau(sp, itau) : q_phi_map_dyna = sp.get_q_phi_map_for_dyna_bins() g2_map = sp.get_g2_map_for_itau(itau) intens_dyna = sp.bincount(q_phi_map_dyna, g2_map, sp.npart_dyna) counts = sp.get_counts_for_dyna_bins() counts_prot = np.select([counts==0], [-1], default=counts) sp.g2_for_dyna_bins = np.select([counts_prot<0], [0], default=intens_dyna/counts_prot) return sp.g2_for_dyna_bins
def soft_or_hard(card): if sum(np.select([card == 1],[card]))>0: hyoukajiku = sum(np.select([card == 1],[card])) - 1 + sum(np.select([card != 1],[card])) if hyoukajiku < 11: string="Soft" else: string = "Hard" return str(string) elif sum(np.select([card == 1],[card]))==0: return "Hard"
def get_q_average_for_dyna_bins(sp) : if sp.q_average_dyna != None : return sp.q_average_dyna q_map_masked = sp.get_q_map() * sp.get_mask_total() sum_q_dyna = sp.bincount(sp.get_q_phi_map_for_dyna_bins(), q_map_masked, length=sp.npart_dyna) counts_dyna = sp.get_counts_for_dyna_bins() counts_dyna_prot = np.select([counts_dyna<=0], [-1], counts_dyna) sp.q_average_dyna = np.select([counts_dyna_prot<0], [0], default=sum_q_dyna/counts_dyna_prot) print 'get_q_average_for_dyna_bins():\n', sp.q_average_dyna return sp.q_average_dyna
def histogram(f, p): μ, σ = p binCentre, hist = heights_data() width = binCentre[1]-binCentre[0] bins = binCentre - width / 2 lineColour = white barColour = greenTrans barColourUnder = magentaTrans barColourOver = orangeTrans xmin=130 xmax=210 ymax = 0.08 labelFontSize=13 fig,ax = plt.subplots(figsize=(8, 6), dpi= 80, facecolor=blue1) ax.set_xlim([xmin,xmax]) ax.set_ylim([0,ymax]) ax.set_xlabel("$x$", fontsize=14) ax.set_ylabel("$f(x)$", fontsize=14) xs = np.arange(xmin, xmax, 0.1) f = lambda x,μ,σ: np.exp(-(x-μ)**2/(2*σ**2))/np.sqrt(2*np.pi)/σ fμ = lambda x: f(x, μ, σ) fμxs = fμ(xs) SSR = np.linalg.norm(f(binCentre,μ,σ)-hist)**2 xx = np.arange(xmin,xmax,(xmax-xmin)/100) yy = np.arange(0,ymax,(ymax)/100) X, Y = np.meshgrid(xx, yy) Z = ( (X - (xmax+xmin)/2)**2 + ((xmax-xmin)/ymax / 1.333)**2*(Y - ymax)**2 )**0.5 /( (xmin - (xmax+xmin)/2)**2 + ((xmax-xmin)/ymax / 1.333)**2*(0 - ymax)**2 )**0.5 im = ax.imshow(Z, vmin=0, vmax=1, extent=[xmin, xmax, ymax, 0], cmap=blueMap) ax.set_aspect((xmax-xmin)/ymax / 1.333) histBelow = np.select([hist < fμ(binCentre)], [hist], fμ(binCentre)) histAbove = np.abs(fμ(binCentre)-hist) histAbove1 = np.select([hist < fμ(binCentre)], [histAbove], 0) histAbove2 = np.select([hist >= fμ(binCentre)], [histAbove], 0) # The lines below are different from the code block. Here using 'bins' rather than 'binCentre'. ax.bar(binCentre, histBelow, width=width, color=barColour, edgecolor=green) ax.bar(binCentre, histAbove1, width=width, bottom=histBelow, color=barColourUnder, edgecolor=np.array(magenta)/2) ax.bar(binCentre, histAbove2, width=width, bottom=histBelow, color=barColourOver, edgecolor=orange) ax.plot(xs, fμxs, color=white, linewidth=2) ax.text(132, 0.074, "$\chi^2$ = " + str(SSR), fontsize=14, color=white) fig.show()
def get_q_average_for_dyna_bins(sp) : if sp.q_average_dyna != None : return sp.q_average_dyna q_map_masked = sp.get_q_map() * sp.get_mask_total() sum_q_dyna = sp.bincount(sp.get_q_phi_map_for_dyna_bins(), q_map_masked, length=sp.npart_dyna) counts_dyna = sp.get_counts_for_dyna_bins() counts_dyna_prot = np.select([counts_dyna<=0], [-1], counts_dyna) sp.q_average_dyna = np.select([counts_dyna_prot<0], [0], default=sum_q_dyna/counts_dyna_prot) msg = 'get_q_average_for_dyna_bins():\n' + str(sp.q_average_dyna) logger.info(msg, __name__) #print msg return sp.q_average_dyna
def get_q_average_for_stat_q_bins(sp) : if sp.q_average_stat_q != None : return sp.q_average_stat_q q_map_masked = sp.get_q_map() * sp.get_mask_total() sum_q_stat = bincount(sp.get_q_map_for_stat_bins(), q_map_masked, length=sp.ana_stat_part_q+1) counts_stat_q = sp.get_counts_for_stat_q_bins() counts_stat_q_prot = np.select([counts_stat_q<=0], [-1], counts_stat_q) sp.q_average_stat_q = np.select([counts_stat_q_prot<0], [0], default=sum_q_stat/counts_stat_q_prot) #print 'sp.ana_stat_part_q, sp.q_average_stat_q.shape=', sp.ana_stat_part_q, sp.q_average_stat_q.shape msg = 'get_q_average_for_stat_q_bins():\n' + str(sp.q_average_stat_q) logger.info(msg, __name__) return sp.q_average_stat_q
def evaluate(x, y, amplitude, x_0, y_0): """Two dimensional delta model function using a local rectangular pixel approximation. """ _, grad_x = np.gradient(x) grad_y, _ = np.gradient(y) x_diff = np.abs((x - x_0) / grad_x) y_diff = np.abs((y - y_0) / grad_y) x_val = np.select([x_diff < 1], [1 - x_diff], 0) y_val = np.select([y_diff < 1], [1 - y_diff], 0) return x_val * y_val * amplitude
def test_stochastic_grad_descent(X, y, X_vali,y_vali, alpha=0.05, lambda_reg=50, num_iter=100): num_instances, num_features = X.shape[0], X.shape[1] rand = np.arange(num_instances) theta_p = np.random.rand(num_features) theta_n = np.random.rand(num_features) # theta_p = np.ones(num_features) # theta_n = np.ones(num_features) np.random.shuffle(rand) t = 1 losses = [] for i in range(num_iter): for j in range(num_instances): # alpha = 0.01/t # t = t+1 alpha = 0.01 x = X[rand[j]] yy = y[rand[j]] tmp1 = projected_SGD_p(x,yy,theta_p,theta_n,lambda_reg) theta_p_tmp = theta_p - alpha*tmp1 a = [theta_p_tmp>=0,theta_p_tmp<0] choice = [theta_p_tmp,0] theta_p_tmp = np.select(a,choice) tmp2 = projected_SGD_n(x,yy,theta_p,theta_n,lambda_reg) theta_n_tmp = theta_n - alpha*tmp2 a = [theta_n_tmp>=0,theta_n_tmp<0] choice = [theta_n_tmp,0] theta_n_tmp = np.select(a,choice) theta_p = theta_p_tmp.copy() theta_n = theta_n_tmp.copy() # a = [theta_p>=0,theta_p<0] # choice = [theta_p,0] # theta_p = np.select(a,choice) # # a = [theta_n>=0,theta_n<0] # choice = [theta_n,0] # theta_n = np.select(a,choice) loss = compute_square_loss(X_vali,y_vali,(theta_p-theta_n))[0,0] losses.append(loss) return losses,theta_p-theta_n
def get_norm_factor_map_for_stat_bins(sp, intens_map) : q_phi_map_stat = sp.get_q_phi_map_for_stat_bins() counts = sp.get_counts_for_stat_bins() # print 'counts = ', counts intens = sp.bincount(q_phi_map_stat, intens_map, sp.npart_stat) intens_prot = np.select([intens<=0.], [-1.], default=intens) normf = np.select([intens_prot<=0.], [0.], default=counts/intens_prot) #norm_facotr_map = np.choose(q_phi_map_stat, normf, mode='clip') # DOES NOT WORK! #norm_facotr_map = q_phi_map_stat.choose(normf, mode='clip') # DOES NOT WORK! #norm_facotr_map = np.array(map(lambda i : normf[i], q_phi_map_stat)) # 0.26sec norm_facotr_map = np.array([normf[i] for i in q_phi_map_stat]) # WORKS! # 0.24sec norm_facotr_map.shape = (sp.rows,sp.cols) return norm_facotr_map # sp.get_random_img()
def dm(shigh,slow): ''' 动向计算 通达信公式 HD :=HIGH-REF(HIGH,1); LD :=REF(LOW,1)-LOW; DMP:=EXPMEMA(IF(HD>0&&HD>LD,HD,0),N); DMM:=EXPMEMA(IF(LD>0&&LD>HD,LD,0),N); 这里取消了N的EXP ''' tpdm = subd(shigh) tndm = -subd(slow) pdm = np.select([gand(tpdm>0,tpdm>tndm)],[tpdm],default=0) ndm = np.select([gand(tndm>0,tndm>tpdm)],[tndm],default=0) return pdm,ndm
def get_containment_mask(glon_pos, glat_pos, r_containment, shape): """Get mask from pre-computed containment radius""" hdulist = fits.open('../counts.fits') w = wcs.WCS(hdulist[0].header) y, x = np.indices(shape) glon, glat = w.wcs_pix2world(x, y, 1) # Fix glon and glon_pos glon = np.select([glon > 180, glon <= 180], [glon - 360, glon]) glon_pos = np.select([glon_pos > 180, glon_pos <= 180], [glon_pos - 360, glon_pos]) # Compute containment radius mask = (glon - glon_pos) ** 2 + (glat - glat_pos) ** 2 <= r_containment ** 2 return np.array(np.reshape(mask, shape), dtype=np.int)
def extractfeatures(filename_): # read audio samples input_data = read(RECSPATH+filename_) audio = np.abs(input_data[1]) # Features: ## max: smax=int(np.max(audio)) ## median: median=int(np.median(audio)) #print("median: ",median) ## mean: average=int(np.average(audio).round(decimals=3)) #print("average: ",average) ## sumsq: sumsqtot=int(np.sum(audio**2)) #print("sumtot: ",sumtot) ## sum sq < 25%: condlist1 = [audio<smax/4] choicelist1 = [audio] sumsq25 = int(np.sum(np.select(condlist1, choicelist1))) # *10000/sumtot #sumsq25.round(decimals=5) # print("sumsq25: ",sumsq25) ## sum sq > 75%: condlist2 = [audio>smax/4*3] choicelist2 = [audio**2] sumsq75 = int(np.sum(np.abs(np.select(condlist2, choicelist2)))) # *10000/sumtot # print("sumsq75: ",sumsq75) ## standard deviation: std=int(np.std(audio).round(decimals=3)) # print("std: ",std) ## sum diff: sumdiff=int(np.sum(np.abs(np.diff(audio)))) # print("sumdiff: ",sumdiff) # print(np.diff(audio)) header = ('max','median', 'average', 'sumsqtot', 'sumsq25', 'sumsq75', 'std', 'sumdiff') results = [smax, median, average, sumsqtot, sumsq25, sumsq75, std, sumdiff] #print(header) #print(results) return results
f = np.empty([2, 3, 5], dtype=int) print(f) """ #16. Populate the values in f. For each value in d, if it's larger than d_min but smaller than d_mean, assign 25 to the corresponding value in f. If a value in d is larger than d_mean but smaller than d_max, assign 75 to the corresponding value in f. If a value equals to d_mean, assign 50 to the corresponding value in f. Assign 0 to the corresponding value(s) in f for d_min in d. Assign 100 to the corresponding value(s) in f for d_max in d. In the end, f should have only the following values: 0, 25, 50, 75, and 100. Note: you don't have to use Numpy in this question. """ conditions = [(d > d_min) & (d < d_mean), (d > d_mean) & (d < d_max), d == d_mean, d == d_min, d == d_max] replacements = [25.0, 75.0, 50.0, 0.0, 100.0] f = np.select(conditions, replacements) print(f) """ #17. Print d and f. Do you have your expected f? For instance, if your d is: array([[[1.85836099, 1.67064465, 1.62576044, 1.40243961, 1.88454931], [1.75354326, 1.69403643, 1.36729252, 1.61415071, 1.12104981], [1.72201435, 1.1862918 , 1.87078449, 1.7726778 , 1.88180042]], [[1.44747908, 1.31673383, 1.02000951, 1.52218947, 1.97066381], [1.79129243, 1.74983003, 1.96028037, 1.85166831, 1.65450881], [1.18068344, 1.9587381 , 1.00656599, 1.93402165, 1.73514584]]]) Your f should be: array([[[ 75., 75., 75., 25., 75.], [ 75., 75., 25., 25., 25.],
# calculate BMI as per the formula clean_df[ 'BMI Range (kg/m2)'] = clean_df['Weight (Kg)'] % clean_df['Height (cm)']**2 bmi_category_lst = [ 'Underweight', 'Normal weight', 'Overweight', 'Moderately obese', 'Severely obese', 'Very severely obese' ] health_risk_lst = [ 'Malnutrition risk', 'Low risk', 'Enhanced risk', 'Medium risk', 'High risk', 'Very high risk' ] bmi_check = [(clean_df['BMI Range (kg/m2)'] < '18.4'), (clean_df['BMI Range (kg/m2)'] < '24.9'), (clean_df['BMI Range (kg/m2)'] < '29.9'), (clean_df['BMI Range (kg/m2)'] < '39.9'), (clean_df['BMI Range (kg/m2)'] > '40')] # add two new column to existing csv file clean_df['BMI Category'] = np.select(bmi_check, bmi_category_lst) clean_df['Health risk'] = np.select(bmi_check, health_risk_lst) # calculate total overweight people overweight_people = clean_df[(clean_df['BMI Range (kg/m2)'] > 25) & (clean_df['BMI Range (kg/m2)'] < 29.5)] total_overweight_people = len(overweight_people) # create new output csv file with selected existing column from input csv clean_df = pd.read_csv( 'Data_for_BMI_Calculator_Height_Weight.csv', usecols=['BMI Category', 'BMI Range (kg/m2)', 'Health risk']) clean_df.to_csv('Output_for_BMI_Calculator_Height_Weight.csv', index=False)
# column 내 특정 단어가 들어간 행들만 필터링하고, # 그 열들을 숫자로 전처리하고 싶을 때 사용하는 코드 # 라이브러리 불러오기 import pandas as pd import numpy as np # 필터링 기준 설정 condition_li = [ (sample_df['대상이 될 column 명'].str.contains('column 내 값들 중 필터링하고 싶은 단어 1')), (sample_df['대상이 될 column 명'].str.contains('column 내 값들 중 필터링하고 싶은 단어 2')), (sample_df['대상이 될 column 명'].str.contains('column 내 값들 중 필터링하고 싶은 단어 3')) ] # 필터링 조건에 해당하는 열들을 변환할 값 지정 value_li = [1, 2, 3] # 새로운 열 생성 sample_df['새로 생성될 column 명'] = np.select(condition_li, value_li)
# including comorbodities. This can be adjusted depending on data availability. del models['NEWS2 + DBPC'] fitted = {} for label, features in models.items(): fitted[label] = test_model(label, validation) # Test threshold model -------------------------------------------------------- thresholds = define_thresholds(validation) final = ['news2', 'crp', 'neutrophils', 'estimatedgfr', 'albumin'] y = validation['y'] X = validation[['age'] + final] # Dichotomise, based on decision tree for f in final: if f != 'age': v = thresholds[f] X[f + '_bin'] = np.select(v['conditions'], v['choices']) print(X[f + '_bin'].value_counts()) # Impute, based on continuous variables imputer = KNNImputer() X = pd.DataFrame(imputer.fit_transform(X), columns=list(X)) X = X[['age'] + [f + '_bin' for f in final]] # Load pre-trained model and predict clf = load('training/trained_models/' + 'clf_THRESHOLD.joblib') y_pred = clf.predict(X) y_prob = clf.predict_proba(X)[:, 1] # Save fitted['THRESHOLD'] = { 'clf': clf, 'X': X, 'y': y, 'y_pred': y_pred,
K = int(sys.argv[1]) #拟合几次多项式 t = np.arange(N - 1, len(bhp_returns)) poly_bhp = np.polyfit(t, smooth_bhp, K) #np.plotfit()返回拟合多项式的系数 poly_vale = np.polyfit(t, smooth_vale, K) poly_sub = np.polysub(poly_bhp, poly_vale) #np.plotsub()计算两个多项式的差构成的多项式 poly_sub_val = np.polyval(poly_sub, t) poly_der = np.polyder(poly_bhp) poly_der_val = np.polyval(poly_der, t) xpoints = np.roots(poly_sub) #np.roots()计算多项式的根 print "Intersection points", xpoints reals = np.isreal(xpoints) #np.isreal()判断多项式的根是否为实数,并返回布尔值 print "Real number?", reals xpoints = np.select( [reals], [xpoints]) #np.select([reals],[xpoints])将xpoints数组中实数值返回,虚数值返回0 xpoints = xpoints.real #返回数组中标签为实数的值 print "Real intersection points", xpoints print "Sans 0s", np.trim_zeros(xpoints) #np.trim_zeros()去除一维数组中开头和末尾的0元素 plt.figure() plt.subplot(3, 1, 1) plt.plot(t, bhp_returns[N - 1:], lw=1.0, label='bhp_return normal data') plt.plot(t, smooth_bhp, lw=2.0, label='hanning smoothng') plt.grid(True) plt.title('bhp_returns polyfit and smoothing') plt.legend(loc='best') plt.ylim(-0.05, 0.05, 2) plt.yticks([-0.04, -0.02, 0.00, 0.02, 0.04], ['100M', '200M', '300M', '400M', '500M']) plt.subplot(3, 1, 2)
def execute(context): df = pd.read_csv("%s/population/full_population.csv" % context.config("data_path")) df_households = pd.read_csv("%s/population/full_households.csv" % context.config("data_path")) p_seed = pd.read_csv("%s/population/psam_p06.csv" % context.config("data_path")) #p_x = p_seed[p_seed["PUMA"].isin([11106,11103,11104,11105,11102,11101,6514,6511,7106,7108,7109,7110,7114,7113,6501,6515,5911,5902,5906,5917,5916,5910,5909,5913,5907,5904,5912,5905,6505,6506,6507,6502,6503,6504,6509,6508,6512,6510,6513,7101,7102,7103,7104,7105,7107,7112,7111,7115,5914,5901,5903,5918,5908,5915,3726,3705,3763,3762,3729,3724,3758,3725,3703,3704,3702,3709,3710,3711,3757,3750,3747,3706,3768,3760,3767,3765,3769,3766,3701,3748,3728,3727,3707,3761,3759,3749,3708,3719,3722,3717,3764,3731,3718,3738,3715,3716,3713,3712,3735,3756,3751,3730,3723,3720,3736,3714,3746,3721,3737,3742,3754,3755,3752,3745,3732,3739,3753,3734,3733,3740,3744,3741,3743])] #p_x = p_seed[p_seed["PUMA"].isin([7313,7321,7315,7305,7314,7317,7303,7312,7311,7306,7318,7308,7302,7319,7322,7310,7307,7320,7316,7309,7304,7301])] #San Diego h_seed = pd.read_csv("%s/population/psam_h06.csv" % context.config("data_path")) p_attr = pd.DataFrame() p_attr["hid"] = p_seed["SERIALNO"] p_attr["employment"] = p_seed["ESR"] p_attr["school"] = p_seed["SCH"] p_attr["school_grade"] = p_seed["SCHG"] p_attr["pid"] = p_seed["SPORDER"] p_attr["age"] = p_seed["AGEP"] df = df.merge(p_attr,how="left",on=["hid","pid"]) h_attr = pd.DataFrame() h_attr["hid"] = h_seed["SERIALNO"] h_attr["income"] = h_seed["HINCP"] # define attributes to merge h_attr["VEH"] = h_seed["VEH"] df_households = df_households.merge(h_attr,how="left",on="hid") df_ptaccessible_zones = context.stage("data.osm.add_pt_variable")[1] # Put person IDs df.loc[:, "person_id"] = df["unique_person_id"] df.loc[:, "weight"] = 1 # Spatial df["zone_id"] = df["geo"].astype(np.str) df["geo"] = df["zone_id"] df["zone_id"] = df["zone_id"].astype(np.int64) df_ptaccessible_zones["zone_id"] = df_ptaccessible_zones["zone_id"].astype(np.int64) #merge information if the residence zone is pt accessible df = pd.merge(df,df_ptaccessible_zones, on=["zone_id"], how="left") #join persons and households df_households["geo"] = df_households["geo"].astype(str) df_households["unique_id_in_geo"] = df_households["unique_id_in_geo"].astype(str) df["unique_id_in_geo"] = df["unique_id_in_geo"].astype(str) df = pd.merge(df,df_households,on=["geo","unique_id_in_geo"],how='left') # Attributes df.loc[df["pgender"] == 1, "sex"] = "male" df.loc[df["pgender"] == 2, "sex"] = "female" df["sex"] = df["sex"].astype("category") df["__employment"] = df["employment"] df["employment"] = "no" df.loc[df["__employment"] == 1, "employment"] = "yes" df.loc[df["__employment"] == 2, "employment"] = "yes" df.loc[df["__employment"] == 4, "employment"] = "yes" df.loc[df["__employment"] == 5, "employment"] = "yes" df.loc[df["__employment"] == 3, "employment"] = "no" df.loc[df["__employment"] == 6, "employment"] = "no" df.loc[df['school'].isna(), "employment"] = "student" df.loc[df["school"] == 2, "employment"] = "student" df.loc[df["school"] == 3, "employment"] = "student" df["employment"] = df["employment"].astype("category") #df["age"] = df["page"].astype(np.int) ##SF and LA conditions_ageclass = [(df["age"] < 16), (df["age"] >= 16) & (df["age"] <= 25), (df["age"] >= 26) & (df["age"] <= 35), (df["age"] >= 36) & (df["age"] <= 45), (df["age"] >= 46) & (df["age"] <= 55), (df["age"] >= 56) & (df["age"] <= 65), (df["age"] >= 66) & (df["age"] < 79), (df["age"] >= 80) ] ##SD if (len(context.config("counties")) < 3): conditions_ageclass = [(df["age"] < 16), (df["age"] >= 16) & (df["age"] <= 25), (df["age"] >= 26) & (df["age"] <= 45), (df["age"] >= 46) & (df["age"] <= 65), (df["age"] >= 66)] #SF and LA choices_ageclass = ['1', '2', '3','4', '5', '6', '7', '8'] #SD if (len(context.config("counties")) < 3): choices_ageclass = ['1', '2', '3','4', '5'] df["age_class_hts"] = np.select(conditions_ageclass, choices_ageclass, default='1') df["age_class_hts"] = df["age_class_hts"].astype(int) #SD df["number_of_vehicles"] = df["VEH"] #SF/LA #df["number_of_vehicles"] = df["hhlvehic"] - 1 # Household size df_size = df[["unique_housing_id"]].groupby("unique_housing_id").size().reset_index(name = "household_size") df = pd.merge(df, df_size) df["household_type"] = df["hhltype"] df["household_id"] = df["unique_housing_id"] df.loc[df["income"] == 0, "income"] = 5000.0 if (context.config("region") == "la"): df["home_region"]=0 df.loc[df["zone_id"].astype(np.str).str.contains('6037', regex=False), "home_region"] = 1 df.loc[df["zone_id"].astype(np.str).str.contains('6071', regex=False), "home_region"] = 2 df.loc[df["zone_id"].astype(np.str).str.contains('6059', regex=False), "home_region"] = 3 df.loc[df["zone_id"].astype(np.str).str.contains('6065', regex=False), "home_region"] = 4 df.loc[df["zone_id"].astype(np.str).str.contains('6111', regex=False), "home_region"] = 5 elif (context.config("region") == "sf"): df["home_region"]=df["zone_id"].astype(np.str).str.contains('6075', regex=False) df.loc[df["home_region"]==True, "home_region"] = 1 df.loc[df["home_region"]==False, "home_region"] = 0 else: raise Exception("This region name (%s) is not supported, Try one of the following [sf, la]" % context.config("region")) # Clean up df = df[[ "person_id", "household_id", "household_type", "household_size", "number_of_vehicles", "weight", "zone_id", "age", "sex", "employment","age_class_hts", "income", "home_region", "pt_accessible" ]] # remove all people outside of the study area df_zones = context.stage("data.spatial.zones") zone_ids = set(np.unique(df_zones["zone_id"])) df = df[df["zone_id"].isin(zone_ids)] return df
#xoffset, yoffset = 300, 300 #xsize, ysize = 1150, 1150 #xoffset, yoffset = 0, 0 #xsize, ysize = (1920, 1920) #(768,704) # 800, 800 # (704, 768) xoffset, yoffset = 600, 600 xsize, ysize = 700, 700 # Pixel image indexes iX = np.array(det.indexes_x(runnum), dtype=np.int64) #- xoffset iY = np.array(det.indexes_y(runnum), dtype=np.int64) #- yoffset # Protect indexes (should be POSITIVE after offset subtraction) imRow = np.select([iX < xoffset], [0], default=iX - xoffset) imCol = np.select([iY < yoffset], [0], default=iY - yoffset) # Pixel coordinates [um] (transformed as needed) Xum = det.coords_y(runnum) Yum = -det.coords_x(runnum) # Derived pixel raduius in [um] and angle phi[degree] Rum = np.sqrt(Xum * Xum + Yum * Yum) Phi = np.arctan2(Yum, Xum) * 180 / np.pi imRow.shape = imCol.shape = \ Xum.shape = Yum.shape = \ Rum.shape = Phi.shape = det.shape() #------------------------------
def select(condlist, choicelist, default=0): raw_array = _np.select(list(condlist), list(choicelist), default=default) return array(list(raw_array.ravel())).reshape(raw_array.shape)
def gsea_significance(enrichment_scores, enrichment_nulls): """Compute nominal pvals, normalized ES, and FDR q value. For a given NES(S) = NES* >= 0. The FDR is the ratio of the percentage of all (S,pi) with NES(S,pi) >= 0, whose NES(S,pi) >= NES*, divided by the percentage of observed S wih NES(S) >= 0, whose NES(S) >= NES*, and similarly if NES(S) = NES* <= 0. """ # For a zero by zero division (undetermined, results in a NaN), # np.seterr(divide='ignore', invalid='ignore') import warnings warnings.simplefilter("ignore") logging.debug("Start to compute pvals..................................") # compute pvals. enrichmentPVals = gsea_pval(enrichment_scores, enrichment_nulls).tolist() # new normalize enrichment score calculating method. this could speed up significantly. esnull_meanPos = [] esnull_meanNeg = [] es = np.array(enrichment_scores) esnull = np.array(enrichment_nulls) for i in range(len(enrichment_scores)): enrNull = esnull[i] meanPos = enrNull[enrNull >= 0].mean() esnull_meanPos.append(meanPos) meanNeg = enrNull[enrNull < 0].mean() esnull_meanNeg.append(meanNeg) pos = np.array(esnull_meanPos).reshape(len(es), 1) neg = np.array(esnull_meanNeg).reshape(len(es), 1) # compute normalized enrichment score and normalized esnull logging.debug("Compute normalized enrichment score and normalized esnull") try: condlist1 = [es >= 0, es < 0] choicelist1 = [es / esnull_meanPos, -es / esnull_meanNeg] nEnrichmentScores = np.select(condlist1, choicelist1).tolist() condlist2 = [esnull >= 0, esnull < 0] choicelist2 = [esnull / pos, -esnull / neg] nEnrichmentNulls = np.select(condlist2, choicelist2) except: #return if according nes, nesnull is uncalculable nEnrichmentScores = np.repeat(0.0, es.size).tolist() nEnrichmentNulls = np.repeat(0.0, es.size).reshape(esnull.shape) logging.debug("start to compute fdrs..................................") # FDR null distribution histogram # create a histogram of all NES(S,pi) over all S and pi # Use this null distribution to compute an FDR q value, # vals = reduce(lambda x,y: x+y, nEnrichmentNulls, []) # nvals = np.array(sorted(vals)) # or nvals = np.sort(nEnrichmentNulls.flatten()) nnes = np.array(sorted(nEnrichmentScores)) fdrs = [] # FDR computation for i in range(len(enrichment_scores)): nes = nEnrichmentScores[i] if nes >= 0: allPos = int(len(nvals) - np.searchsorted(nvals, 0, side="left")) allHigherAndPos = int( len(nvals) - np.searchsorted(nvals, nes, side="left")) nesPos = len(nnes) - int(np.searchsorted(nnes, 0, side="left")) nesHigherAndPos = len(nnes) - int( np.searchsorted(nnes, nes, side="left")) else: allPos = int(np.searchsorted(nvals, 0, side="left")) allHigherAndPos = int(np.searchsorted(nvals, nes, side="right")) nesPos = int(np.searchsorted(nnes, 0, side="left")) nesHigherAndPos = int(np.searchsorted(nnes, nes, side="right")) try: pi_norm = allHigherAndPos / float(allPos) #p value pi_obs = nesHigherAndPos / float(nesPos) fdr = pi_norm / pi_obs if pi_norm / pi_obs < 1.0 else 1.0 fdrs.append(fdr) except: fdrs.append(1000000000.0) logging.debug("Statistical testing finished.............................") return zip(enrichment_scores, nEnrichmentScores, enrichmentPVals, fdrs)
def unload(request) -> HttpResponse: if request.method == "POST": form = request.POST.get("date") form2 = request.POST.get("high") form3 = request.POST.get("low") form4 = request.POST.get("max_day") form5 = request.POST.get("min_day") con = sqlalchemy.create_engine( 'sqlite:////Users/Yeldos/PycharmProjects/no_related/no_relates/db.sqlite3' ) # Connect to db df = pd.read_sql("SELECT * FROM myapp_information WHERE Visit_count>0", con) df_data = pd.read_sql( "SELECT * FROM myapp_additional WHERE Visit_status='Клиент пришёл' and Visit_data <" + "'" + form + "'" + "", con) df_sum = df[[ 'Number', 'Visit_count', 'Income' ]].groupby(by='Number').sum() #Сложение income и visits_number df_sum.reset_index(inplace=True) df_by_data = df_data[['Number', 'Visit_data']].groupby(by='Number').max() merged = pd.merge(df_sum, df_by_data[['Visit_data']], on='Number', how='inner') merged["days"] = (pd.to_datetime(merged["Visit_data"]).sub( pd.Timestamp(form)).dt.days) * -1 conditions = [ (merged["days"] <= np.percentile(merged["days"], 33)), # Меньше 109 (merged["days"] <= np.percentile(merged["days"], 66)), # Меньше 209 (merged["days"] >= np.percentile(merged["days"], 66)) # Больше 209 ] values = ['3', '2', '1'] conditions2 = [ (merged["Visit_count"] <= float(form5)), (merged["Visit_count"] >= float(form4)), (merged["Visit_count"] > float(form5)) | (merged["Visit_count"] < float(form4)), ] values2 = ['1', '3', '2'] conditions3 = [ (merged["Income"] <= float(form3)), # Меньше чем 20000 (merged["Income"] >= float(form2)), # Больше чем 80000 (merged["Income"] > float(form3)) | (merged["Income"] < float(form2)) ] values3 = ['1', '3', '2'] merged["R"] = np.select(conditions, values) merged["F"] = np.select(conditions2, values2) merged["M"] = np.select(conditions3, values3) merged["RFM"] = merged["R"] + merged["F"] + merged["M"] conditions4 = [(merged["RFM"] == '333'), (merged["RFM"] == '121') | (merged["RFM"] == '122') | (merged["RFM"] == '211') | (merged["RFM"] == '212') | (merged["RFM"] == '221') | (merged["RFM"] == '231') | (merged["RFM"] == '222') | (merged["RFM"] == '321'), (merged["RFM"] == '113'), (merged["RFM"] == '112') | (merged["RFM"] == '131'), (merged["RFM"] == '111'), (merged["RFM"] == '311') | (merged["RFM"] == '312'), (merged["RFM"] == '132') | (merged["RFM"] == '133') | (merged["RFM"] == '232') | (merged["RFM"] == '233') | (merged["RFM"] == '332') | (merged["RFM"] == '232') | (merged["RFM"] == '322') | (merged["RFM"] == '331'), (merged["RFM"] == '123') | (merged["RFM"] == '213') | (merged["RFM"] == '223') | (merged["RFM"] == '313') | (merged["RFM"] == '323')] values4 = [ 'ЯДРО', 'Стандарт', 'Сонные киты', 'Сони', 'Потерянные', 'Новички', 'Лояльные', 'Киты' ] merged["LABEL"] = np.select(conditions4, values4) with BytesIO() as b: writer = pd.ExcelWriter(b, engine='xlsxwriter') merged.to_excel(writer, sheet_name='Sheet1') writer.save() filename = 'test' content_type = 'application/vnd.ms-excel' response = HttpResponse(b.getvalue(), content_type=content_type) response[ 'Content-Disposition'] = 'attachment; filename="' + filename + '.xlsx"' return response else: userform = UserForm() high = IncomeHigh() low = IncomeLow() max_day = Max_day() min_day = Min_day() return render( request, "index.html", { "form": userform, "form2": high, "form3": low, "form4": max_day, "form5": min_day })
fraud.loc[(fraud['City.Population'] < lower_lim4)] = lower_lim4 fraud.loc[(fraud['Work.Experience'] > upper_lim5)] = upper_lim5 fraud.loc[(fraud['Work.Experience'] < lower_lim5)] = lower_lim5 fraud.loc[(fraud['Taxable.Income'] > upper_lim6)] = upper_lim6 fraud.loc[(fraud['Taxable.Income'] < lower_lim6)] = lower_lim6 ###### Let's check Binning:It can be applied on both categorical and numerical data: #####The main aim of binning is to make the model more robust and prevent overfitting, however, it has a cost to the performance ####Numerical Binning fraud['bin1'] = pd.cut(fraud['City.Population'], bins=[25779,50000,199778], labels=["Good","Risky"]) fraud['bin2'] = pd.cut(fraud['Work.Experience'], bins=[0,10,30], labels=["Low","Good"]) fraud['bin3'] = pd.cut(fraud['Taxable.Income'], bins=[10000,30000,99619], labels=["Good","Risky"]) conditions = [ fraud['Undergrad'].str.contains('NO'), fraud['Undergrad'].str.contains('YES')] choices=['1','2'] fraud['choices']=np.select(conditions,choices,default='Other') conditions1 = [ fraud['Marital.Status'].str.contains('Single'), fraud['Marital.Status'].str.contains('Divorced'), fraud['Marital.Status'].str.contains('Married')] choices1= ['1','2','3'] fraud['choices1']=np.select(conditions1,choices1,default='Other') conditions2 = [ fraud['Work.Experience'].str.contains('NO'), fraud['Work.Experience'].str.contains('YES')] choices2= ['1','2'] fraud['choices2']=np.select(conditions2,choices2,default='Other') ###Log Transform- It helps to handle skewed data and after transformation, the distribution becomes more approximate to normal. ###It also decreases the effect of the outliers, due to the normalization of magnitude differences and the model become more robust. fraud = pd.DataFrame({'City.Population':fraud.iloc[:,2]}) fraud['log+1'] = (fraud['City.Population']+1).transform(np.log)
def adtm(self, n: int = 23, m: int = 8): """动态买卖气指数 (https://bkso.baidu.com/item/%E5%8A%A8%E6%80%81%E4%B9%B0%E5%8D%96%E6%B0%94%E6%8C%87%E6%A0%87) 规则: 1.如果开盘价≤昨日开盘价,DTM=0 如果开盘价>昨日开盘价,DTM=(最高价-开盘价)和(开盘价-昨日开盘价)的较大值 2.如果开盘价≥昨日开盘价,DBM=0 如果开盘价<昨日开盘价,DBM=(开盘价-最低价)和(开盘价-昨日开盘价)的较大值 3.STM=DTM在N日内的和 4.SBM=DBM在N日内的和 5.如果STM>SBM,ADTM=(STM-SBM)/STM 如果STM<SBM,ADTM=(STM-SBM)/SBM 如果STM=SBM,ADTM=0 6.ADTMMA=ADTM的M日简单移动平均 7.参数N设置为23日,参数M设置为8日 参考值: 1.ADTM指标在+1到-1之间波动。 2.低于-0.5时为低风险区,高于+0.5时为高风险区,需注意风险。 3.ADTM上穿ADTMMA时,买入股票;ADTM跌穿ADTMMA时,卖出股票。 """ df_adtm = self._df.loc[:, ADTM_COLS] df_adtm.loc[:, "open_diff"] = df_adtm["open"] - df_adtm["open"].shift(1) df_adtm.loc[:, "high_open_diff"] = df_adtm["high"] - df_adtm["open"] df_adtm.loc[:, "open_low_diff"] = df_adtm["open"] - df_adtm["low"] df_adtm = ( df_adtm.assign( dtm=lambda x: np.where( x["open_diff"] > 0, np.where( x["high_open_diff"] >= x["open_low_diff"], x["high_open_diff"], x["open_low_diff"], ), 0, ) ) ).assign(dbm=lambda x: np.where(x["open_diff"] >= 0, 0, x["open_low_diff"])) df_adtm.loc[:, "stm"] = df_adtm["dtm"].rolling(n).sum() df_adtm.loc[:, "sbm"] = df_adtm["dbm"].rolling(n).sum() df_adtm = df_adtm.assign( adtm=lambda x: np.select( condlist=[ x["stm"] > x["sbm"], x["stm"] < x["sbm"], x["stm"] == x["sbm"], ], choicelist=[ (x["stm"] - x["sbm"]) / x["stm"], (x["stm"] - x["sbm"]) / x["sbm"], 0, ], ) ) df_adtm.loc[:, "adtmma"] = self._ma(col="adtm", n=m, df=df_adtm) return df_adtm
def evaluate(x, amplitude, x_0, width): """One dimensional Box model function""" return np.select( [np.logical_and(x >= x_0 - width / 2., x <= x_0 + width / 2.)], [amplitude], 0)
def execute_simple_case_series(op, value, whens, thens, otherwise, **kwargs): if otherwise is None: otherwise = np.nan raw = np.select([value == when for when in whens], thens, otherwise) return wrap_case_result(raw, op.to_expr())
def evaluate(x, y, amplitude, x_0, y_0, r_in, width): """Two dimensional Ring model function.""" rr = (x - x_0)**2 + (y - y_0)**2 r_range = np.logical_and(rr >= r_in**2, rr <= (r_in + width)**2) return np.select([r_range], [amplitude])
def execute_simple_case_scalar(op, value, whens, thens, otherwise, **kwargs): if otherwise is None: otherwise = np.nan raw = np.select(np.asarray(whens) == value, thens, otherwise) return wrap_case_result(raw, op.to_expr())
def evaluate(x, y, amplitude, x_0, y_0, R_0): """Two dimensional Disk model function""" rr = (x - x_0)**2 + (y - y_0)**2 return np.select([rr <= R_0**2], [amplitude])
smooth_tencent = np.convolve(weights, tencent_returns)[N-1:-N+1] t=np.arange(N-1, len(bidu_returns)) plt.clf() plt.plot(t, bidu_returns[N-1:], lw=1.0, label='bidu_returns') plt.plot(t, smooth_bidu, lw=2.0, label='bidu_smooth') plt.plot(t, tencent_returns[N-1:], lw=1.0, label='tencent_returns') plt.plot(t, smooth_tencent, lw=2.0, label='tencent_smooth') plt.legend(loc='upper left') plt.savefig('images/hanning_smooth.png', format='png') K=3 t=np.arange(N-1, len(tencent_returns)) poly_bidu = np.polyfit(t, smooth_bidu, K) poly_tencent = np.polyfit(t, smooth_tencent, K) poly_sub = np.polysub(poly_bidu, poly_tencent) xpoints = np.roots(poly_sub) print "Intersection points:", xpoints reals = np.isreal(xpoints) print "Real number?", reals xpoints = np.select([reals], [xpoints]) xpoints = xpoints.real print "Real intersection points", xpoints print "Sans 0s", np.trim_zeros(xpoints)
def graph3_4(): def mds_special(): font = "Arial" axisColor = "#000000" gridColor = "#DEDDDD" return { "config": { "title": { "fontSize": 24, "font": font, "anchor": "start", # equivalent of left-aligned. "fontColor": "#000000" }, 'view': { "height": 300, "width": 400 }, "axisX": { "domain": True, #"domainColor": axisColor, "gridColor": gridColor, "domainWidth": 1, "grid": False, "labelFont": font, "labelFontSize": 12, "labelAngle": 0, "tickColor": axisColor, "tickSize": 5, # default, including it just to show you can change it "titleFont": font, "titleFontSize": 16, "titlePadding": 10, # guessing, not specified in styleguide "title": "X Axis Title (units)", }, "axisY": { "domain": False, "grid": True, "gridColor": gridColor, "gridWidth": 1, "labelFont": font, "labelFontSize": 14, "labelAngle": 0, #"ticks": False, # even if you don't have a "domain" you need to turn these off. "titleFont": font, "titleFontSize": 16, "titlePadding": 10, # guessing, not specified in styleguide "title": "Y Axis Title (units)", # titles are by default vertical left of axis so we need to hack this #"titleAngle": 0, # horizontal #"titleY": -10, # move it up #"titleX": 18, # move it to the right so it aligns with the labels }, } } # register the custom theme under a chosen name alt.themes.register('mds_special', mds_special) # enable the newly registered theme alt.themes.enable('mds_special') #alt.themes.enable('none') # to return to default # Wrangling data crime_data_n = crime_data crime_data_n['avg_hatecrimes_fbi_10days'] = ( (crime_data_n['avg_hatecrimes_per_100k_fbi'] / 365) * 10) crime_data_n['prop'] = (crime_data_n['hate_crimes_per_100k_splc'] - crime_data_n['avg_hatecrimes_fbi_10days'] ) / crime_data_n['avg_hatecrimes_fbi_10days'] mean_crime = crime_data_n['avg_hatecrimes_fbi_10days'].mean() conditions = [(crime_data_n['avg_hatecrimes_fbi_10days'] <= mean_crime), (crime_data_n['avg_hatecrimes_fbi_10days'] > mean_crime)] choices = ['low baseline crime rate', 'high baseline crime rate'] crime_data_n['crime_rate_bracket'] = np.select(conditions, choices) crime_data_n['diff_hatecrime'] = ( crime_data_n['hate_crimes_per_100k_splc'] - crime_data_n['avg_hatecrimes_fbi_10days']) crime_data_sorted_trump = crime_data_n.sort_values( by='share_voters_voted_trump') state_selector = alt.selection_multi(fields=['state']) # Create the plots l = alt.Chart( crime_data_n, title="States with low baseline crime rate").mark_bar().encode( alt.X('state:N', title='', axis=alt.Axis(labelAngle=-45)), alt.Y('prop:Q', title='Rate of change of hate crime pre and post election'), color=alt.condition( state_selector, alt.ColorValue("steelblue"), alt.ColorValue("grey"))).transform_filter( (datum.crime_rate_bracket == 'low baseline crime rate')) h = alt.Chart( crime_data_n, title="States with high baseline crime rate").mark_bar().encode( alt.X('state:N', axis=alt.Axis(labelAngle=-45), title=''), alt.Y('prop:Q', title='Rate of change of hate crime pre and post election', scale=alt.Scale(domain=[0, 30])), color=alt.condition( state_selector, alt.ColorValue("steelblue"), alt.ColorValue("grey"))).transform_filter( (datum.crime_rate_bracket == 'high baseline crime rate' )).properties(width=500) heatmap = alt.Chart(crime_data_sorted_trump, width=450).mark_rect().encode( alt.X('state', sort=None, title=" ", axis=alt.Axis(labelAngle=-45)), alt.Y('share_voters_voted_trump', title="Share of Trump voters (%)"), alt.Color('diff_hatecrime', title="Change in hate crime rate (%)"), tooltip=[ alt.Tooltip('state', title='State'), alt.Tooltip('hate_crimes_per_100k_splc', title="Hate crime rate 10 days after election"), alt.Tooltip('avg_hatecrimes_fbi_10days', title="Average rate of hate crime (for 10 days") ]).properties(width=1000).add_selection(state_selector) return alt.vconcat(heatmap, l | h)
def log_decoding_CanonLog3(clog3, bit_depth=10, in_normalised_code_value=True, out_reflection=True, **kwargs): """ Defines the *Canon Log 3* log decoding curve / electro-optical transfer function. Parameters ---------- clog3 : numeric or array_like *Canon Log 3* non-linear data. bit_depth : int, optional Bit depth used for conversion. in_normalised_code_value : bool, optional Whether the *Canon Log 3* non-linear data is encoded with normalised code values. out_reflection : bool, optional Whether the light level :math:`x` to a camera is reflection. Other Parameters ---------------- \\**kwargs : dict, optional Keywords arguments for deprecation management. Returns ------- numeric or ndarray Linear data :math:`x`. Notes ----- +------------+-----------------------+---------------+ | **Domain** | **Scale - Reference** | **Scale - 1** | +============+=======================+===============+ | ``clog3`` | [0, 1] | [0, 1] | +------------+-----------------------+---------------+ +------------+-----------------------+---------------+ | **Range** | **Scale - Reference** | **Scale - 1** | +============+=======================+===============+ | ``x`` | [0, 1] | [0, 1] | +------------+-----------------------+---------------+ References ---------- :cite:`Canona` Examples -------- >>> log_decoding_CanonLog3(34.338936938868677 / 100) # doctest: +ELLIPSIS 0.1800000... """ in_normalised_code_value = handle_arguments_deprecation({ 'ArgumentRenamed': [['in_legal', 'in_normalised_code_value']], }, **kwargs).get('in_normalised_code_value', in_normalised_code_value) clog3 = to_domain_1(clog3) clog3 = (legal_to_full(clog3, bit_depth) if in_normalised_code_value else clog3) x = np.select( (clog3 < 0.04076162, clog3 <= 0.105357102, clog3 > 0.105357102), (-(10 ** ((0.07623209 - clog3) / 0.42889912) - 1) / 14.98325, (clog3 - 0.073059361) / 2.3069815, (10 ** ((clog3 - 0.069886632) / 0.42889912) - 1) / 14.98325)) if out_reflection: x = x * 0.9 return as_float(from_range_1(x))
def pullandcat(inputstatement, outputstatement, inputdate): trans = pd.read_csv('/Users/luisgoate/Desktop/Budget/{0}.csv'.format(inputstatement), header = None) trans.columns = ['Date', 'Vendor', 'Amount'] date = datetime.strptime(inputdate, '%d/%m/%Y') #Format date trans['Date'] = pd.to_datetime(trans['Date'], format='%d/%m/%Y') #Choose relevant dates trans = trans[trans['Date'] >= date ] #Only include rows that have '-' to capture expenditures trans = trans[trans.Amount.str.contains("-")] #Format amounts trans['Amount'] = trans['Amount'].str[1:] trans['Amount'] = trans['Amount'].astype(float) #Categrorise data conditions = [ #Groceries (trans['Vendor'].str.contains('TESCO') == True), (trans['Vendor'].str.contains('GATHER') == True), (trans['Vendor'].str.contains('SAINS') == True), (trans['Vendor'].str.contains('M&S') == True), (trans['Vendor'].str.contains('MANGER') == True), (trans['Vendor'].str.contains('DELIVEROO') == True), #Socialising (trans['Vendor'].str.contains('CAFE') == True), (trans['Vendor'].str.contains('PUB') == True), #Travel (trans['Vendor'].str.contains('TFL') == True), (trans['Vendor'].str.contains('UBER') == True), (trans['Vendor'].str.contains('LUL') == True), #Rent (trans['Vendor'].str.contains('8BFORDAM') == True), #Gym (trans['Vendor'].str.contains('PURE') == True), #Mobile (trans['Vendor'].str.contains('MOBILE') == True)] choices = ['Groceries', 'Groceries', 'Groceries', 'Groceries', 'Groceries', 'Groceries', 'Socialising', 'Socialising', 'Travel', 'Travel', 'Travel', 'Rent', 'Gym', 'Mobile'] trans['Category'] = np.select(conditions, choices, default='Other') #Pivot columns = ['Date', 'Vendor', 'Category'] excel_dump = pd.pivot_table(trans, values = 'Amount' , index = columns, aggfunc = np.sum) #Export excel_dump.to_excel('/Users/luisgoate/Desktop/Budget/{0}.xlsx'.format(outputstatement))
(point_table['bye'] == 1), (point_table['total_matches'] == 4) & (point_table['matches_won'] == 3) & (point_table['bye'] == 1), (point_table['total_matches'] == 4) & (point_table['matches_won'] == 4) & (point_table['bye'] == 1), (point_table['total_matches'] == 2) & (point_table['matches_won'] == 1) & (point_table['bye'] == 0), (point_table['total_matches'] == 3) & (point_table['matches_won'] == 2) & (point_table['bye'] == 0), (point_table['total_matches'] == 4) & (point_table['matches_won'] == 3) & (point_table['bye'] == 0), (point_table['total_matches'] == 5) & (point_table['matches_won'] == 4) & (point_table['bye'] == 0), (point_table['total_matches'] == 5) & (point_table['matches_won'] == 5) & (point_table['bye'] == 0), (point_table['total_matches'] == 1) & (point_table['matches_won'] == 0) ] choices = [4, 7, 11, 16, 3, 7, 10, 14, 19, 1] point_table['points'] = np.select(conditions, choices, default=-1) point_table2 = point_table.drop( ['home_win', 'away_win', 'home_loss', 'away_loss', 'matches_drawn', 'bye'], axis=1) # ### calcolo bonus df_bonus = pd.read_csv(path + "bonus_mn_mtg.csv", sep=";").fillna(0) df_bonus[ 'bonus'] = df_bonus.bigscore + df_bonus.bigchk + df_bonus.darts18 + df_bonus.darts15 * 2 + df_bonus.darts12 * 3 + df_bonus.other monday_night = point_table2.merge(df_bonus, how='left', on=['season', 'round', 'player']).fillna(0).drop(['data'], axis=1)
def get_joyner_boore_distance(self, mesh): """ See :meth:`superclass' method <openquake.hazardlib.geo.surface.base.BaseSurface.get_joyner_boore_distance>`. This is an optimized version specific to planar surface that doesn't make use of the mesh. """ # we define four great circle arcs that contain four sides # of projected planar surface: # # ↓ II ↓ # I ↓ ↓ I # ↓ + ↓ # →→→→→TL→→→→1→→→→TR→→→→→ → azimuth direction → # ↓ - ↓ # ↓ ↓ # III -3+ IV -4+ III ↓ # ↓ ↓ downdip direction # ↓ + ↓ ↓ # →→→→→BL→→→→2→→→→BR→→→→→ # ↓ - ↓ # I ↓ ↓ I # ↓ II ↓ # # arcs 1 and 2 are directed from left corners to right ones (the # direction has an effect on the sign of the distance to an arc, # as it shown on the figure), arcs 3 and 4 are directed from top # corners to bottom ones. # # then we measure distance from each of the points in a mesh # to each of those arcs and compare signs of distances in order # to find a relative positions of projections of points and # projection of a surface. # # then we consider four special cases (labeled with Roman numerals) # and either pick one of distances to arcs or a closest distance # to corner. # # indices 0, 2 and 1 represent corners TL, BL and TR respectively. arcs_lons = self.corner_lons.take([0, 2, 0, 1]) arcs_lats = self.corner_lats.take([0, 2, 0, 1]) downdip_azimuth = (self.strike + 90) % 360 arcs_azimuths = [ self.strike, self.strike, downdip_azimuth, downdip_azimuth ] mesh_lons = mesh.lons.reshape((-1, 1)) mesh_lats = mesh.lats.reshape((-1, 1)) # calculate distances from all the target points to all four arcs dists_to_arcs = geodetic.distance_to_arc(arcs_lons, arcs_lats, arcs_azimuths, mesh_lons, mesh_lats) # ... and distances from all the target points to each of surface's # corners' projections (we might not need all of those but it's # better to do that calculation once for all). dists_to_corners = geodetic.min_geodetic_distance( self.corner_lons, self.corner_lats, mesh.lons.flatten(), mesh.lats.flatten()) # extract from ``dists_to_arcs`` signs (represent relative positions # of an arc and a point: +1 means on the left hand side, 0 means # on arc and -1 means on the right hand side) and minimum absolute # values of distances to each pair of parallel arcs. ds1, ds2, ds3, ds4 = numpy.sign(dists_to_arcs).transpose() dists_to_arcs = numpy.abs(dists_to_arcs).reshape(-1, 2, 2).min(axis=-1) jb_dists = numpy.select( # consider four possible relative positions of point and arcs: condlist=[ # signs of distances to both parallel arcs are the same # in both pairs, case "I" on a figure above (ds1 == ds2) & (ds3 == ds4), # sign of distances to two parallels is the same only # in one pair, case "II" ds1 == ds2, # ... or another (case "III") ds3 == ds4 # signs are different in both pairs (this is a "default"), # case "IV" ], choicelist=[ # case "I": closest distance is the closest distance to corners dists_to_corners, # case "II": closest distance is distance to arc "1" or "2", # whichever is closer dists_to_arcs[:, 0], # case "III": closest distance is distance to either # arc "3" or "4" dists_to_arcs[:, 1] ], # default -- case "IV" default=0) return jb_dists.reshape(mesh.lons.shape)
# data = pd.read_csv('covid_impact_education.csv', parse_dates=[["Date"]], # date_parser=lambda x: pd.to_datetime(x, format="%d%m%Y"), # ) data = pd.read_csv('covid_impact_education.csv', infer_datetime_format=True) data.loc[:, 'Date'] = pd.to_datetime(data['Date'], format='%d/%m/%Y') data['Date'] = data['Date'].dt.strftime('%m/%d/%Y') data = data.fillna("") conditions = [(data['Scale'] == 'Open'), (data['Scale'] == 'National'), (data['Scale'] == 'Localized')] choices = [0, 2, 1] data['S1_School closing'] = np.select(conditions, choices, default=0) numericcodes = pd.read_json("codes.json", dtype={'numeric': object}) for index, row in numericcodes.iterrows(): row["numeric"] = str(row["numeric"]) merged = pd.merge(data, numericcodes, left_on='ISO', right_on='alpha_3') merged = merged.groupby(['Date']) perDate = {} for date, group in merged: perDate[date] = group.to_dict(orient='records') with open('sorted_school_data.json', 'w') as fp: json.dump(perDate, fp)
def execute_searched_case(op, whens, thens, otherwise, **kwargs): if otherwise is None: otherwise = np.nan raw = np.select(whens, thens, otherwise) return wrap_case_result(raw, op.to_expr())
def get_min_distance(self, mesh): """ See :meth:`superclass' method <openquake.hazardlib.geo.surface.base.BaseSurface.get_min_distance>`. This is an optimized version specific to planar surface that doesn't make use of the mesh. """ # we project all the points of the mesh on a plane that contains # the surface (translating coordinates of the projections to a local # 2d space) and at the same time calculate the distance to that # plane. dists, xx, yy = self._project(mesh.lons, mesh.lats, mesh.depths) # the actual resulting distance is a square root of squares # of a distance from a point to a plane that contains the surface # and a distance from a projection of that point on that plane # and a surface rectangle. we have former (``dists``), now we need # to find latter. # # we process separately two coordinate components of the point # projection. for abscissa we consider three possible cases: # # I . III . II # . . # 0-----+ → x axis direction # | | # +-----+ # . . # . . # mxx = numpy.select( condlist=[ # case "I": point on the left hand side from the rectangle xx < 0, # case "II": point is on the right hand side xx > self.length # default -- case "III": point is in between vertical sides ], choicelist=[ # case "I": we need to consider distance between a point # and a line containing left side of the rectangle xx, # case "II": considering a distance between a point and # a line containing the right side xx - self.length ], # case "III": abscissa doesn't have an effect on a distance # to the rectangle default=0) # for ordinate we do the same operation (again three cases): # # I # - - - 0---+ - - - ↓ y axis direction # III | | # - - - +---+ - - - # II # myy = numpy.select( condlist=[ # case "I": point is above the rectangle top edge yy < 0, # case "II": point is below the rectangle bottom edge yy > self.width # default -- case "III": point is in between lines containing # top and bottom edges ], choicelist=[ # case "I": considering a distance to a line containing # a top edge yy, # case "II": considering a distance to a line containing # a bottom edge yy - self.width ], # case "III": ordinate doesn't affect the distance default=0) # distance between a point project and a rectangle combines from # both components dists2d_squares = mxx**2 + myy**2 # finding a resulting distance combining a distance on a plane # with a distance to a plane return numpy.sqrt(dists**2 + dists2d_squares)
'Gene1', 'Gene2', 'Gene1 ID', 'Gene2 ID', 'Fusion Inspector', 'Pred' ]] else: selectSF = combSF[[ 'Gene1', 'Gene2', 'Gene1 ID', 'Gene2 ID', 'Fusion Inspector', 'P_VAL_CORR', 'DRIVER_PROB', 'EXPRESSION_GAIN', 'Pred' ]] x = pd.concat([selectFC, selectSF], axis=0) x = x.astype(str) x = x.groupby(['Gene1', 'Gene2', 'Gene1 ID', 'Gene2 ID']).agg('|'.join) x.reset_index(inplace=True) conditions = [(x['Pred'] == 'Catch|Star'), (x['Pred'] == 'Star'), (x['Pred'] == 'Catch')] choices = ['Both', 'Star', 'Catcher'] x['Predicted By'] = np.select(conditions, choices) #x.drop(['Pred'], axis=1, inplace=True) x['Sample'] = sys.argv[1].split('/')[1] x.to_csv(path_or_buf=sys.argv[7], sep='\t', index=False) combFC.drop(['Pred'], axis=1, inplace=True) combSF.drop(['Pred'], axis=1, inplace=True) combFC['Sample'] = sys.argv[1].split('/')[1] combSF['Sample'] = sys.argv[1].split('/')[1] combFC.to_csv(path_or_buf=sys.argv[8], sep='\t') combSF.to_csv(path_or_buf=sys.argv[9], sep='\t') ofsf = oncoFuseSF[[ '5_FPG_GENE_NAME', '3_FPG_GENE_NAME', 'P_VAL_CORR', 'DRIVER_PROB', 'EXPRESSION_GAIN'
def log_encoding_CanonLog3(x, bit_depth=10, out_normalised_code_value=True, in_reflection=True, **kwargs): """ Defines the *Canon Log 3* log encoding curve / opto-electronic transfer function. Parameters ---------- x : numeric or array_like Linear data :math:`x`. bit_depth : int, optional Bit depth used for conversion. out_normalised_code_value : bool, optional Whether the *Canon Log 3* non-linear data is encoded as normalised code values. in_reflection : bool, optional Whether the light level :math:`x` to a camera is reflection. Other Parameters ---------------- \\**kwargs : dict, optional Keywords arguments for deprecation management. Returns ------- numeric or ndarray *Canon Log 3* non-linear data. Notes ----- - Introspection of the grafting points by Shaw, N. (2018) shows that the *Canon Log 3* IDT was likely derived from its encoding curve as the later is grafted at *+/-0.014*:: >>> clog3 = 0.04076162 >>> (clog3 - 0.073059361) / 2.3069815 -0.014000000000000002 >>> clog3 = 0.105357102 >>> (clog3 - 0.073059361) / 2.3069815 0.013999999999999997 +------------+-----------------------+---------------+ | **Domain** | **Scale - Reference** | **Scale - 1** | +============+=======================+===============+ | ``x`` | [0, 1] | [0, 1] | +------------+-----------------------+---------------+ +------------+-----------------------+---------------+ | **Range** | **Scale - Reference** | **Scale - 1** | +============+=======================+===============+ | ``clog3`` | [0, 1] | [0, 1] | +------------+-----------------------+---------------+ References ---------- :cite:`Canona` Examples -------- >>> log_encoding_CanonLog3(0.18) * 100 # doctest: +ELLIPSIS 34.3389369... """ out_normalised_code_value = handle_arguments_deprecation({ 'ArgumentRenamed': [['out_legal', 'out_normalised_code_value']], }, **kwargs).get('out_normalised_code_value', out_normalised_code_value) x = to_domain_1(x) if in_reflection: x = x / 0.9 with domain_range_scale('ignore'): clog3 = np.select( (x < log_decoding_CanonLog3(0.04076162, bit_depth, False, False), x <= log_decoding_CanonLog3(0.105357102, bit_depth, False, False), x > log_decoding_CanonLog3(0.105357102, bit_depth, False, False)), (-0.42889912 * np.log10(-x * 14.98325 + 1) + 0.07623209, 2.3069815 * x + 0.073059361, 0.42889912 * np.log10(x * 14.98325 + 1) + 0.069886632)) clog3 = (full_to_legal(clog3, bit_depth) if out_normalised_code_value else clog3) return as_float(from_range_1(clog3))
def generate_baselines(pred, pred_ci, pred_dy, pred_dy_ci, label): # Get Weekly predicted table - mean prediction from the model is used as Weekly Baseline W_predicted = pd.DataFrame() W_predicted['W_baseline'] = pred.predicted_mean W_predicted['W_lower_bl'] = pred_ci.iloc[:, 0] try: adjusted_baseline = min( pred_ci[pred_ci['lower TaskCount'] > 0]['lower TaskCount']) W_predicted['W_lower_bl'] = np.where(W_predicted['W_lower_bl'] <= 0, adjusted_baseline, W_predicted['W_lower_bl']) except: W_predicted['W_lower_bl'] = np.where(W_predicted['W_lower_bl'] <= 0, W_predicted['W_baseline'], W_predicted['W_lower_bl']) W_predicted['W_upper_bl'] = pred_ci.iloc[:, 1] W_predicted['month_num'] = pd.Series( W_predicted.index).dt.strftime('%m').astype(int).tolist() W_predicted['month'] = pd.Series( W_predicted.index).dt.month_name().str.slice( stop=3).tolist() #.dt.strftime('%m') W_predicted['week_in_month'] = pd.to_numeric(W_predicted.index.day / 7) W_predicted['week_in_month'] = W_predicted['week_in_month'].apply( lambda x: math.ceil(x)) W_predicted['Language'] = label W_baseline = W_predicted.groupby([ 'Language', 'month_num', 'month', 'week_in_month' ]).mean().reset_index().sort_values(['month_num', 'week_in_month']) W_baseline = W_baseline[[ 'Language', 'month', 'week_in_month', 'W_baseline', 'W_lower_bl', 'W_upper_bl' ]] # Get Monthly predicted table - aggregated from the Weekly baseline M_predicted = W_predicted[['W_baseline', 'W_lower_bl', 'W_upper_bl']].resample('M').sum() M_predicted.columns = ['M_baseline', 'M_lower_bl', 'M_upper_bl'] M_predicted['Language'] = label M_predicted['month_num'] = pd.Series( M_predicted.index).dt.strftime('%m').astype(int).tolist() M_predicted['month'] = pd.Series( M_predicted.index).dt.month_name().str.slice(stop=3).tolist() M_baseline = M_predicted.groupby( ['Language', 'month_num', 'month']).mean().reset_index().sort_values('month_num') M_baseline = M_baseline[[ 'Language', 'month', 'M_baseline', 'M_lower_bl', 'M_upper_bl' ]] # Get Quarterly predicted table - aggregated from the Monthly baseline Q_predicted = M_predicted[['M_baseline', 'M_lower_bl', 'M_upper_bl']].resample('Q').sum() Q_predicted.columns = ['Q_baseline', 'Q_lower_bl', 'Q_upper_bl'] Q_predicted['Language'] = label Q_predicted['month_num'] = pd.Series( Q_predicted.index).dt.strftime('%m').astype(int).tolist() Q_predicted['month'] = pd.Series( Q_predicted.index).dt.month_name().str.slice(stop=3).tolist() Q_baseline = Q_predicted.groupby( ['Language', 'month_num', 'month']).mean().reset_index().sort_values(['month_num']) conditions = [ Q_baseline['month_num'] == 3, Q_baseline['month_num'] == 6, Q_baseline['month_num'] == 9, Q_baseline['month_num'] == 12 ] quarter_name, quarter_num = ['Q1', 'Q2', 'Q3', 'Q4'], [1, 2, 3, 4] Q_baseline['quarter'] = np.select(conditions, quarter_name, default=np.nan) Q_baseline['q'] = np.select(conditions, quarter_num, default=np.nan).astype(int) Q_baseline = Q_baseline[[ 'Language', 'quarter', 'Q_baseline', 'Q_lower_bl', 'Q_upper_bl' ]] W_forecast = pd.DataFrame() W_forecast['W_forecast'] = pred_dy.predicted_mean W_forecast['W_lower_fc'] = pred_dy_ci.iloc[:, 0] try: adjusted_baseline_2 = min( pred_dy_ci[pred_dy_ci['lower TaskCount'] > 0]['lower TaskCount']) W_forecast['W_lower_fc'] = np.where(W_forecast['W_lower_fc'] <= 0, adjusted_baseline_2, W_forecast['W_lower_fc']) except: W_forecast['W_lower_fc'] = np.where(W_forecast['W_lower_fc'] <= 0, W_forecast['W_forecast'], W_forecast['W_lower_fc']) W_forecast['W_upper_fc'] = pred_dy_ci.iloc[:, 1] W_forecast['month_num'] = pd.Series( W_forecast.index).dt.strftime('%m').astype(int).tolist() W_forecast['month'] = pd.Series( W_forecast.index).dt.month_name().str.slice( stop=3).tolist() #.dt.strftime('%m') W_forecast['week_in_month'] = pd.to_numeric(W_forecast.index.day / 7) W_forecast['week_in_month'] = W_forecast['week_in_month'].apply( lambda x: math.ceil(x)) W_forecast['Language'] = label W_forecast = W_forecast[[ 'Language', 'month', 'week_in_month', 'W_forecast', 'W_lower_fc', 'W_upper_fc' ]] M_forecast = W_forecast[['W_forecast', 'W_lower_fc', 'W_upper_fc']].resample('M').sum() M_forecast.columns = ['M_forecast', 'M_lower_fc', 'M_upper_fc'] M_forecast['Language'] = label M_forecast['month_num'] = pd.Series( M_forecast.index).dt.strftime('%m').astype(int).tolist() M_forecast['month'] = pd.Series( M_forecast.index).dt.month_name().str.slice(stop=3).tolist() M_forecast = M_forecast[[ 'Language', 'month', 'M_forecast', 'M_lower_fc', 'M_upper_fc' ]] Q_forecast = M_forecast[['M_forecast', 'M_lower_fc', 'M_upper_fc']].resample('Q').sum() Q_forecast.columns = ['Q_forecast', 'Q_lower_fc', 'Q_upper_fc'] Q_forecast['Language'] = label Q_forecast['month_num'] = pd.Series( Q_forecast.index).dt.strftime('%m').astype(int).tolist() Q_forecast['month'] = pd.Series( Q_forecast.index).dt.month_name().str.slice(stop=3).tolist() conditions = [ Q_forecast['month_num'] == 3, Q_forecast['month_num'] == 6, Q_forecast['month_num'] == 9, Q_forecast['month_num'] == 12 ] quarter_name, quarter_num = ['Q1', 'Q2', 'Q3', 'Q4'], [1, 2, 3, 4] Q_forecast['quarter'] = np.select(conditions, quarter_name, default=np.nan) Q_forecast['q'] = np.select(conditions, quarter_num, default=np.nan).astype(int) Q_forecast = Q_forecast.sort_values(['month_num']) Q_forecast = Q_forecast[[ 'Language', 'quarter', 'Q_forecast', 'Q_lower_fc', 'Q_upper_fc' ]] W_baseline[['W_baseline', 'W_lower_bl', 'W_upper_bl' ]] = W_baseline[['W_baseline', 'W_lower_bl', 'W_upper_bl']].astype(int) M_baseline[['M_baseline', 'M_lower_bl', 'M_upper_bl' ]] = M_baseline[['M_baseline', 'M_lower_bl', 'M_upper_bl']].astype(int) Q_baseline[['Q_baseline', 'Q_lower_bl', 'Q_upper_bl' ]] = Q_baseline[['Q_baseline', 'Q_lower_bl', 'Q_upper_bl']].astype(int) W_forecast[['W_forecast', 'W_lower_fc', 'W_upper_fc' ]] = W_forecast[['W_forecast', 'W_lower_fc', 'W_upper_fc']].astype(int) M_forecast[['M_forecast', 'M_lower_fc', 'M_upper_fc' ]] = M_forecast[['M_forecast', 'M_lower_fc', 'M_upper_fc']].astype(int) Q_forecast[['Q_forecast', 'Q_lower_fc', 'Q_upper_fc' ]] = Q_forecast[['Q_forecast', 'Q_lower_fc', 'Q_upper_fc']].astype(int) return W_baseline, M_baseline, Q_baseline, W_forecast, M_forecast, Q_forecast
def main(): data = np.loadtxt("test_data.txt") # 时间序列(有缺失) arr1 = data[0, :] arr2 = data[1, :] # 索引值 arr1_index = np.linspace(0, len(arr1), len(arr1), endpoint=False).astype(np.int) eliminating = Eliminating(arr1, arr2, arr1_index) eliminating.spline_smoothing() num1 = [] # 首先进行检测是否需要数据分段处理,并得到数据分段位置 f = eliminating.zhengjian_test(arr1, arr2, arr1_index) if f != 0: num1.append(f) for i in range(10): f = eliminating.zhengjian_test(arr1[f:], arr2[f:], arr1_index[f:]) if f != 0: num1.append(f) else: break # 没有数据分段处理的结果 if len(num1) == 0: result1, k = eliminating.zhengjian(num1) result2, k1 = eliminating.nijian(num1) # 由于是采用双向检验,对于双向检验的结果都判定为非异常时,则该数据正常, # 对于双向检验结果都判定为异常时,则该数据为异常值, # 对于双向检验结果不同时,则需要进一步检验 result3 = result1 + result2 result3 = np.select([result3 == 2, result3 == 1], [1, 2], result3) result3[0:k - 3] = result2[0:k - 3] result3[k1 + 4:] = result1[k1 + 4:] result3[k - 3:k + 1] = 1 result3[k1:k1 + 4] = 1 # 数据分段之后处理的结果 else: num2 = sorted(num1 + [0, len(arr1)]) result1, list_k1 = eliminating.zhengjian(num2) result2, list_k2 = eliminating.nijian(num2) # 由于是采用双向检验,对于双向检验的结果都判定为非异常时,则该数据正常, # 对于双向检验结果都判定为异常时,则该数据为异常值, # 对于双向检验结果不同时,则需要进一步检验 result3 = result1 + result2 result3 = np.select([result3 == 2, result3 == 1], [1, 2], result3) list_k1 = sorted(list_k1) list_k2 = sorted(list_k2) for i in range(len(num2) - 1): result3[num2[i]:list_k1[i] - 3] = result2[num2[i]:list_k1[i] - 3] result3[list_k2[i] + 4:num2[i + 1]] = result1[list_k2[i] + 4:num2[i + 1]] result3[list_k1[i] - 3:list_k1[i] + 1] = 1 result3[list_k2[i]:list_k2[i] + 4] = 1 # 进一步检验 result = eliminating.pro_test(result3) print(np.sum(result == 0)) # 异常数据剔除后的效果图 arr_1 = arr1[np.nonzero(result)[0]] arr_2 = arr2[np.nonzero(result)[0]] fig = plt.figure() ax1 = fig.add_subplot(111) ax1.plot(arr_1, arr_2, 'r-') plt.tick_params(labelsize=14) ax1.set_xlabel('t/s') ax1.set_ylabel(r"$V_x/(m/s)$") # 异常数据剔除后的残差图 spline1 = interpolate.UnivariateSpline(arr_1, arr_2, k=3) arr3 = spline1(arr_1) arr4 = arr_2 - arr3 fig2 = plt.figure() ax2 = fig2.add_subplot(111) ax2.plot(arr_1, arr4, 'r-') ax2.set_xlabel('t/s') ax2.set_ylabel(r"$\sigma(V_x)/(m/s)$") plt.yticks([-20, 0, 20]) plt.tick_params(labelsize=14) plt.show()
def up_seller(stock,buy_signal,xstop=25,ret=50,**kwargs): ''' 如果买入日为阴线,则开盘卖出 如果价格小于最近5日高点5%,则卖出 xstop为根据买入价的止损 ret为从高点向下的回退值 ''' t = stock.transaction #阴线处理 sol = rollx(gand(buy_signal,t[CLOSE] < t[OPEN]),1) #从顶下落处理,前5天的收盘/开盘的高者和今天的开盘的高者 回落ret之后 #hhret = gmax(rollx(tmax(gmax(t[OPEN],t[CLOSE]),5),1),t[OPEN])* (1000-ret)/1000 hhret = gmax(rollx(tmax(t[HIGH],5),1),t[OPEN])* (1000-ret)/1000 sdl = t[LOW] < hhret #止损处理2.5% stop_price = extend2next(rollx(stock.buyprice,1) * (1000-xstop)/1000) #要求buyprice只有在buyer日才有数据,否则extend2next无意义 stopl = t[LOW] < stop_price cut_price = gmin(gmax(hhret,stop_price),t[HIGH]) #首先,止损线和退回线高者先被触及,同时,穿越时可能跳低,所以找与t[HIGH]的低点 cut_signal = gor(sdl,stopl) cut_signal = select([t[VOLUME]>0],[cut_signal]) #默认为0,即未交易的日子卖出信号不能发出,否则会合并到下一交易日 ssignal = gor(sol,cut_signal) stock.sellprice = select([cut_signal],[cut_price],default=t[OPEN]) #止损和退回用cut_price, 阴线出局和停牌平移都用开盘价 return ssignal