def continuous_components(delta_X, delta_Y, delta_t, t, T, K): p = np.arange(K) delta_xp = np.take(delta_X, p) delta_yp = np.take(delta_Y, p) delta_tp = np.take(delta_t, p) tp = np.take(t, p) tp = np.hstack( ( np.array([0]) , tp ) ) first_term_xi = np.cumsum(delta_X[0:K-1]) second_term_xi = (delta_X[1:K]/delta_t[1:K]) * np.cumsum(delta_t[0:K-1]) xi = np.hstack( ( np.array([0]), first_term_xi - second_term_xi ) ) first_term_delta = np.cumsum(delta_Y[0:K-1]) second_term_delta = (delta_Y[1:K]/delta_t[1:K]) * np.cumsum(delta_t[0:K-1]) delta = np.hstack( ( np.array([0]), first_term_delta - second_term_delta ) ) A0 = (1/T)*np.sum( (delta_xp/(2*delta_tp) * (np.square(tp[1:K+1]) - np.square(tp[0:K]))) + \ xi * (tp[1:K+1] - tp[0:K])) C0 = (1/T)*np.sum( (delta_yp/(2*delta_tp) * (np.square(tp[1:K+1]) - np.square(tp[0:K]))) + \ delta * (tp[1:K+1] - tp[0:K])) return A0, C0
def __init__(self, fname, group, DM_lim): chain = hdf5io.TChain(fname, '%s/clouds' % group) mu_range = np.linspace(DM_lim[0], DM_lim[1], chain.get_nDim()) self.lnp = chain.get_lnp()[0, 1:] lnp_min, lnp_max = np.percentile(self.lnp, [10., 90.]) self.color = (self.lnp - lnp_min) / (lnp_max - lnp_min) self.color[self.color > 1.] = 1. self.color[self.color < 0.] = 0. # Plot all paths self.N_clouds = chain.get_nDim() / 2 self.N_paths = chain.get_nSamples() mu_tmp = np.cumsum(chain.get_samples(0)[:,:self.N_clouds], axis=1) EBV_tmp = np.cumsum(np.exp(chain.get_samples(0)[:,self.N_clouds:]), axis=1) self.mu_all = np.zeros((self.N_paths, 2*(self.N_clouds+1)), dtype='f8') self.EBV_all = np.zeros((self.N_paths, 2*(self.N_clouds+1)), dtype='f8') self.mu_all[:,0] = mu_range[0] self.mu_all[:,1:-1:2] = mu_tmp self.mu_all[:,2:-1:2] = mu_tmp self.mu_all[:,-1] = mu_range[-1] self.EBV_all[:,2:-1:2] = EBV_tmp self.EBV_all[:,3::2] = EBV_tmp
def cumsum(v,strict=False): if not strict: return np.cumsum(v,axis=0) else: out = np.zeros_like(v) out[1:] = np.cumsum(v[:-1],axis=0) return out
def pos_neg_integral(scores): """Works only for 1D arrays at the moment, but can be easily extended.""" scores = np.hstack([[0], scores]) # Padding. pos_scores, neg_scores = scores.copy(), scores.copy() idxs = scores >= 0 pos_scores[~idxs], neg_scores[idxs] = 0, 0 return np.cumsum(pos_scores), np.cumsum(neg_scores)
def rcumsum(v,strict=False): if not strict: return np.cumsum(v[::-1],axis=0)[::-1] else: out = np.zeros_like(v) out[:-1] = np.cumsum(v[-1:0:-1],axis=0)[::-1] return out
def histogram(self, idx=None, critval=None): '''calculate histogram values does not do any plotting ''' if self.mcres.ndim == 2: if not idx is None: mcres = self.mcres[:,idx] else: raise ValueError('currently only 1 statistic at a time') else: mcres = self.mcres if critval is None: histo = np.histogram(mcres, bins=10) else: if not critval[0] == -np.inf: bins=np.r_[-np.inf, critval, np.inf] if not critval[0] == -np.inf: bins=np.r_[bins, np.inf] histo = np.histogram(mcres, bins=np.r_[-np.inf, critval, np.inf]) self.histo = histo self.cumhisto = np.cumsum(histo[0])*1./self.nrepl self.cumhistoreversed = np.cumsum(histo[0][::-1])[::-1]*1./self.nrepl return histo, self.cumhisto, self.cumhistoreversed
def drawPrfastscore(tp,fp,scr,tot,show=True): tp=numpy.cumsum(tp) fp=numpy.cumsum(fp) rec=tp/tot prec=tp/(fp+tp) #dif=numpy.abs(prec[1:]-rec[1:]) dif=numpy.abs(prec[::-1]-rec[::-1]) pos=dif.argmin() pos=len(dif)-pos-1 ap=0 for t in numpy.linspace(0,1,11): pr=prec[rec>=t] if pr.size==0: pr=0 p=numpy.max(pr); ap=ap+p/11; if show: pylab.plot(rec,prec,'-g') pylab.title("AP=%.3f EPRthr=%.3f"%(ap,scr[pos])) pylab.xlabel("Recall") pylab.ylabel("Precision") pylab.grid() pylab.show() pylab.draw() return rec,prec,scr,ap,scr[pos]
def single_threshold_otsu(img, mask=None): lval = 256 # compute normalized histogram hist = cv2.calcHist([img], [0], mask, [256], [0,256]) cumHist = np.cumsum(hist) norm_hist = hist.ravel()/cumHist[255] cumSum = np.cumsum(np.array(norm_hist), dtype=float) m = [norm_hist[i] * i for i in range(len(norm_hist))] means = np.array(m) cumMean = np.cumsum(means, dtype=float) mg =cumMean[lval - 1] global_variance = 0 for i in range(len(norm_hist)): global_variance += ((i - mg)**2 * norm_hist[i]) between_class_variance = np.zeros(256) max_variance = -1 for k in range(1, lval - 2): p1 = cumSum[k] p2 = 1 - p1 m1 = (1/p1) * cumMean[k] if p1 > 0 else 0 m2 = (1/p2) * (cumMean[lval - 1] - cumMean[k]) if p2 > 0 else 0 if between_class_variance[k] == 0: between_class_variance[k] = (p1 * (m1 - mg)**2) + (p2 * (m2 - mg)**2) if between_class_variance[k] > max_variance: max_variance = between_class_variance[k] th = 0 maxs = 0 # find max variances and sum for averages if more than 1 for each for i in range(1, lval - 1): if between_class_variance[i] == max_variance: th += i maxs += 1 th = th / maxs return th
def Bh_Bv_timeseries(igramsFile): h5file = h5py.File(igramsFile) igramList = h5file['interferograms'].keys() Bh_igram=[] Bv_igram=[] for igram in igramList: Bh_igram.append(float(h5file['interferograms'][igram].attrs['H_BASELINE_TOP_HDR'])) Bv_igram.append(float(h5file['interferograms'][igram].attrs['V_BASELINE_TOP_HDR'])) A,B=design_matrix(h5file) tbase,dateList,dateDict,dateList1 = date_list(h5file) dt = np.diff(tbase) Bh_rate=np.dot(np.linalg.pinv(B),Bh_igram) zero = np.array([0.],np.float32) Bh = np.concatenate((zero,np.cumsum([Bh_rate*dt]))) Bv_rate=np.dot(np.linalg.pinv(B),Bv_igram) zero = np.array([0.],np.float32) Bv = np.concatenate((zero,np.cumsum([Bv_rate*dt]))) h5file.close() return Bh,Bv
def run(pars): verbose = pars.get('verbose', False) data = pars.get('data')['samples'] t = np.round(pars.get('target', 5)) # target sample size per option s = pars.get('s', 1.) # continue scale factor counts_A = np.cumsum((data==0)) counts_B = np.cumsum((data==1)) p_sample_A = 1. / (1. + np.exp((counts_A + 1 - t) * s)) p_sample_B = 1. / (1. + np.exp((counts_B + 1 - t) * s)) p_sample_A = p_sample_A * (data==0) + (1 - p_sample_B) * p_sample_A * (data==1) p_sample_B = p_sample_B * (data==1) + (1 - p_sample_A) * p_sample_B * (data==0) p_sample_A = np.concatenate(([0.5], p_sample_A)) p_sample_B = np.concatenate(([0.5], p_sample_B)) p_stop = 1 - (p_sample_A + p_sample_B) return {'p_stop': p_stop, 'p_sample_A': p_sample_A, 'p_sample_B': p_sample_B}
def _additive_estimate(events, timeline, _additive_f, _additive_var, reverse): """ Called to compute the Kaplan Meier and Nelson-Aalen estimates. """ if reverse: events = events.sort_index(ascending=False) at_risk = events['entrance'].sum() - events['removed'].cumsum().shift(1).fillna(0) deaths = events['observed'] estimate_ = np.cumsum(_additive_f(at_risk, deaths)).sort_index().shift(-1).fillna(0) var_ = np.cumsum(_additive_var(at_risk, deaths)).sort_index().shift(-1).fillna(0) else: deaths = events['observed'] at_risk = events['at_risk'] estimate_ = np.cumsum(_additive_f(at_risk, deaths)) var_ = np.cumsum(_additive_var(at_risk, deaths)) timeline = sorted(timeline) estimate_ = estimate_.reindex(timeline, method='pad').fillna(0) var_ = var_.reindex(timeline, method='pad') var_.index.name = 'timeline' estimate_.index.name = 'timeline' return estimate_, var_
def traj_ss(lon1, lat1, lon2, lat2): ''' Trajectory skill score, from Liu and Weisberg, 2011 ''' # distance between drifters in time dist = get_dist(lon1, lon2, lat1, lat2) # in time # distance along path for control case, which is taken as lon1, lat1 # first cumsum is to make length distance traveled up to that index length = np.cumsum(get_dist(lon1[:,:-1], lon1[:,1:], lat1[:,:-1], lat1[:,1:]), axis=1) # calculate s using cumulative sums # the first entry in time would be divided by zero, so this starts at the 2nd step # second cumsum is to sum up distances traveled s = np.cumsum(dist[:,1:], axis=1)/np.cumsum(length, axis=1) # # pdb.set_trace() # # calculate skill score based on n=1 # ind = (s>1) # ss = 1-s # ss[ind] = 0. # Return s instead of skill score so n parameter can be different return s
def boxfilter(I, r): """Fast box filter implementation. Parameters ---------- I: a single channel/gray image data normalized to [0.0, 1.0] r: window radius Return ----------- The filtered image data. """ M, N = I.shape dest = np.zeros((M, N)) # cumulative sum over Y axis sumY = np.cumsum(I, axis=0) # difference over Y axis dest[:r + 1] = sumY[r: 2 * r + 1] dest[r + 1:M - r] = sumY[2 * r + 1:] - sumY[:M - 2 * r - 1] dest[-r:] = np.tile(sumY[-1], (r, 1)) - sumY[M - 2 * r - 1:M - r - 1] # cumulative sum over X axis sumX = np.cumsum(dest, axis=1) # difference over Y axis dest[:, :r + 1] = sumX[:, r:2 * r + 1] dest[:, r + 1:N - r] = sumX[:, 2 * r + 1:] - sumX[:, :N - 2 * r - 1] dest[:, -r:] = np.tile(sumX[:, -1][:, None], (1, r)) - \ sumX[:, N - 2 * r - 1:N - r - 1] return dest
def rvs(self, n=1): """Generate random samples from the model. Parameters ---------- n : int Number of samples to generate. Returns ------- obs : array_like, length `n` List of samples """ startprob_pdf = self.startprob startprob_cdf = np.cumsum(startprob_pdf) transmat_pdf = self.transmat transmat_cdf = np.cumsum(transmat_pdf, 1); # Initial state. rand = np.random.rand() currstate = (startprob_cdf > rand).argmax() obs = [self._generate_sample_from_state(currstate)] for x in xrange(n-1): rand = np.random.rand() currstate = (transmat_cdf[currstate] > rand).argmax() obs.append(self._generate_sample_from_state(currstate)) return np.array(obs)
def cumulative_moment(self, year, mag): '''Calculation of Mmax using aCumulative Moment approach, adapted from the cumulative strain energy method of Makropoulos & Burton (1983) :param year: Year of Earthquake :type year: numpy.ndarray :param mag: Magnitude of Earthquake :type mag: numpy.ndarray :keyword iplot: Include cumulative moment plot :type iplot: Boolean :return mmax: Returns Maximum Magnitude :rtype mmax: Float ''' # Calculate seismic moment m_o = 10. ** (9.05 + 1.5 * mag) year_range = np.arange(np.min(year), np.max(year) + 1, 1) nyr = np.shape(year_range)[0] morate = np.zeros(nyr, dtype=float) # Get moment release per year for loc, tyr in enumerate(year_range): idx = np.abs(year - tyr) < 1E-5 if np.sum(idx) > 0: # Some moment release in that year morate[loc] = np.sum(m_o[idx]) ave_morate = np.sum(morate) / nyr # Average moment rate vector exp_morate = np.cumsum(ave_morate * np.ones(nyr)) modiff = (np.abs(np.max(np.cumsum(morate) - exp_morate)) + np.abs(np.min(np.cumsum(morate) - exp_morate))) # Return back to Mw if fabs(modiff) < 1E-20: return -np.inf mmax = (2. / 3.) * (np.log10(modiff) - 9.05) return mmax
def __init__(self, jetP4s, jetResolutions, massT=172.0, massW=80.4): self.Wm2, self.Tm2 = massW ** 2, massT ** 2 self.rawJ = jetP4s self.invJ = 1.0 / np.array(jetResolutions) self.fit() _, self.fitW, self.fitT = np.cumsum(self.fitJ) _, self.rawW, self.rawT = np.cumsum(self.rawJ)
def __init__(self, b, bResolution, mu, nuXY, nuErr, massT=172.0, widthT=widthTop, massW=80.4, zPlus=True): for key, val in zip( ["", "XY", "Z", "E", "T2", "T", "Phi"], [mu, np.array([mu.x(), mu.y()]), mu.z(), mu.e(), mu.Perp2(), mu.Pt(), mu.Phi()], ): setattr(self, "mu" + key, val) for key, val in zip( ["massW2", "massT", "invT", "bound", "sign", "rawB", "nuXY", "fitNu"], [massW ** 2, massT, 1.0 / widthT, False, [-1, 1][zPlus], b, nuXY, utils.LorentzV()], ): setattr(self, key, val) self.bXY = np.array([b.x(), b.y()]) eig, self.Einv = np.linalg.eig(nuErr) self.E = self.Einv.T self.inv = 1.0 / np.append([bResolution], np.sqrt(np.maximum(1, eig))) self.setFittedNu(nuXY) _, self.rawW, self.rawT = np.cumsum([mu, self.fitNu, self.rawB]) self.residualsBSLT = self.fit() self.chi2 = self.residualsBSLT.dot(self.residualsBSLT) _, self.fitW, self.fitT = np.cumsum([mu, self.fitNu, self.fitB])
def random2Choice(V, n=1): """ Make a random binary choice from a vector V of values which are unnormalised probabilities. Return the corresponding index. For example if v = [1, 2] then the probability of the indices repectively are [1/3, 2/3]. The parameter n is the number of random choices to make. If V is a matrix, then the rows are taken as probabilities, and a choice is made for each row. """ Parameter.checkClass(V, numpy.ndarray) if V.ndim == 1 and V.shape[0] != 2: raise ValueError("Function only works on binary probabilities") if V.ndim == 2 and V.shape[1] != 2: raise ValueError("Function only works on binary probabilities") if V.ndim == 1: cumV = numpy.cumsum(V) p = numpy.random.rand(n)*cumV[-1] cumV2 = numpy.ones(n)*cumV[0] - p return numpy.array(cumV2 <= 0, numpy.int) elif V.ndim == 2: cumV = numpy.cumsum(V, 1) P = numpy.random.rand(V.shape[0], n)*numpy.array([cumV[:, -1]]).T cumV2 = numpy.outer(cumV[:, 0], numpy.ones(n)) - P return numpy.array(cumV2 <= 0, numpy.int) else: raise ValueError("Invalid number of dimensions")
def get_blotter_pnl(order_qty, filled_qty, filled_price, cum_position, data, drawdown): #import pdb; pdb.set_trace() mid = midpoint(data) cash = np.sum(filled_qty * filled_price) * (-1.0) open_cash = cum_position[-1] * mid[-1] pnl = cash + open_cash pnl_t = np.cumsum(cum_position[:-1] * np.diff(mid)) spread = np.cumsum((mid - filled_price) * filled_qty) pnl_t = spread[1:] + pnl_t assert abs(pnl - pnl_t[-1]) < 0.01 running_max = np.maximum.accumulate(pnl_t) idx = np.where(pnl_t - running_max < drawdown)[0] if len(idx) > 0: stop_idx = np.min(idx) cum_position[(stop_idx+1):] = 0.0 pnl_t = np.cumsum(cum_position[:-1] * np.diff(mid)) order_qty[(stop_idx+1):] = 0.0 filled_qty[(stop_idx+1):] = 0.0 spread = np.cumsum((mid - filled_price) * filled_qty) pnl_t = spread[1:] + pnl_t order_volume = np.sum(np.abs(order_qty)) trade_volume = np.sum(np.abs(filled_qty)) result = np.array([(pnl_t[-1], np.min(pnl_t), np.max(pnl_t), np.min(cum_position), np.max(cum_position), trade_volume, order_volume, trade_volume * 1.0 / order_volume)], dtype = [('total_pnl', 'f'), ('min_pnl', 'f'), ('max_pnl', 'f'), ('min_position', int), ('max_position', int), ('volume', int), ('order_volume', int), ('fill_ratio', float)]) return result
def kittler(in_arr): """ The reimplementation of Kittler-Illingworth Thresholding algorithm: https://www.mathworks.com/matlabcentral/fileexchange/45685 Paper: [Kittler and Illingworth 1986] Minimum error thresholding. Args: in_arr(numpy.ndarray): Input 8-bits array. Returns: t(int): Calculated threshold. """ h, g = np.histogram(in_arr.ravel(), 256, [0, 256]) h = h.astype(np.float) g = g.astype(np.float) g = g[:-1] c = np.cumsum(h) m = np.cumsum(h * g) s = np.cumsum(h * g**2) sigma_f = np.sqrt(s/c - (m/c)**2) cb = c[-1] - c mb = m[-1] - m sb = s[-1] - s sigma_b = np.sqrt(sb/cb - (mb/cb)**2) p = c / c[-1] v = p * np.log(sigma_f) + (1-p)*np.log(sigma_b) - p*np.log(p) - \ (1-p)*np.log(1-p) v[~np.isfinite(v)] = np.inf idx = np.argmin(v) t = g[idx] return t
def randomChoice(V, n=1): """ Make a random choice from a vector V of values which are unnormalised probabilities. Return the corresponding index. For example if v = [1, 2, 4] then the probability of the indices repectively are [1/7, 2/7, 4/7]. The parameter n is the number of random choices to make. If V is a matrix, then the rows are taken as probabilities, and a choice is made for each row. """ Parameter.checkClass(V, numpy.ndarray) if V.shape[0]==0: return -1 if V.ndim == 1: cumV = numpy.cumsum(V) p = numpy.random.rand(n)*cumV[-1] return numpy.searchsorted(cumV, p) elif V.ndim == 2: cumV = numpy.cumsum(V, 1) P = numpy.random.rand(V.shape[0], n)*numpy.array([cumV[:, -1]]).T inds = numpy.zeros(P.shape, numpy.int) for i in range(P.shape[0]): inds[i, :] = numpy.searchsorted(cumV[i, :], P[i, :]) return inds else: raise ValueError("Invalid number of dimensions")
def tests(self, distribution='exp', pdelete=0., independent=True, dither=0., tilewindow=1.0): assert distribution in ['exp', 'exponential', 'poisson', 'regular'] samplerate = 0.1 # ms spikerate = 0.001 # firing rate nspikes = 100 # number of spikes to test if distribution in ['exp', 'exponential']: st1 = np.random.exponential(1./spikerate, nspikes) st1 = np.cumsum(st1) elif distribution == 'regular': st1 = np.linspace(int(10./samplerate), int(9000./samplerate), int(10./samplerate)) elif distribution == 'poisson': st1 = np.random.poisson(1./spikerate, nspikes) st1 = np.cumsum(st1) if independent: st2 = np.random.exponential(1./spikerate, nspikes) st2 = np.cumsum(st1) else: st2 = st1 st2 = np.random.choice(st2, int((1.0-pdelete)*st1.shape[0]), replace=False) if dither > 0: st2 = st2 + np.random.randn(len(st2))*dither # print('len st1, st2: ', len(st1), len(st2), np.max(st1), np.max(st2)) self.set_spikes(samplerate, st1, st2, tilewindow=tilewindow) sttc = self.calc_sttc() print('# of spikes in spike train 1: {0:d}, in spike train 2: {1:d} '.format(st1.shape[0], st2.shape[0])) print('STTC value: {0:.3f} '.format(sttc)) self.plot_sttc(st1, st2)
def _divide(self): block_size = self.spec.block_size # shortcut half_block = (block_size-1)/2 rows, columns = self.dividing.nonzero() for i in range(len(rows)): row = rows[i] column = columns[i] write_block(self._cell_block, self.cells, row, column, block_size) cv2.filter2D(self._cell_block, cv2.CV_32F, self._tension_kernel, self._probability, borderType=cv2.BORDER_CONSTANT) cv2.threshold(self._probability, self._tension_min, 0, cv2.THRESH_TOZERO, self._probability) self._probability[self._cell_block] = 0 self._probability **= self.spec.tension_power self._probability *= self._distance_kernel # optimized version of np.random.choice np.cumsum(self._probability.flat, out=self._cumulative) total = self._cumulative[-1] if total < 1.0e-12: # no viable placements, we'll have precision problems anyways continue self._cumulative /= total index = self._indices[np.searchsorted(self._cumulative, rdm.random())] local_row, local_column = np.unravel_index(index, self._probability.shape) self.set_alive(row+(local_row-half_block), column+(local_column-half_block))
def windower(thing, max_radius): thing_pad = numpy.concatenate(( thing[-max_radius:], thing, thing[:max_radius] )) thing_sum = numpy.cumsum(numpy.cumsum(thing_pad)) return (len(thing), thing_sum, max_radius)
def _generate_sample(self, X, nn_data, nn_num, row, col, step): """Generate a synthetic sample with an additional steps for the categorical features. Each new sample is generated the same way than in SMOTE. However, the categorical features are mapped to the most frequent nearest neighbors of the majority class. """ rng = check_random_state(self.random_state) sample = super(SMOTENC, self)._generate_sample(X, nn_data, nn_num, row, col, step) # To avoid conversion and since there is only few samples used, we # convert those samples to dense array. sample = (sample.toarray().squeeze() if sparse.issparse(sample) else sample) all_neighbors = nn_data[nn_num[row]] all_neighbors = (all_neighbors.toarray() if sparse.issparse(all_neighbors) else all_neighbors) categories_size = ([self.continuous_features_.size] + [cat.size for cat in self.ohe_.categories_]) for start_idx, end_idx in zip(np.cumsum(categories_size)[:-1], np.cumsum(categories_size)[1:]): col_max = all_neighbors[:, start_idx:end_idx].sum(axis=0) # tie breaking argmax col_sel = rng.choice(np.flatnonzero( np.isclose(col_max, col_max.max()))) sample[start_idx:end_idx] = 0 sample[start_idx + col_sel] = 1 return sparse.csr_matrix(sample) if sparse.issparse(X) else sample
def _major_index_fancy(self, idx): """Index along the major axis where idx is an array of ints. """ idx_dtype = self.indices.dtype indices = np.asarray(idx, dtype=idx_dtype).ravel() _, N = self._swap(self.shape) M = len(indices) new_shape = self._swap((M, N)) if M == 0: return self.__class__(new_shape) row_nnz = np.diff(self.indptr) idx_dtype = self.indices.dtype res_indptr = np.zeros(M+1, dtype=idx_dtype) np.cumsum(row_nnz[idx], out=res_indptr[1:]) nnz = res_indptr[-1] res_indices = np.empty(nnz, dtype=idx_dtype) res_data = np.empty(nnz, dtype=self.dtype) csr_row_index(M, indices, self.indptr, self.indices, self.data, res_indices, res_data) return self.__class__((res_data, res_indices, res_indptr), shape=new_shape, copy=False)
def _major_slice(self, idx, copy=False): """Index along the major axis where idx is a slice object. """ if idx == slice(None): return self.copy() if copy else self M, N = self._swap(self.shape) start, stop, step = idx.indices(M) M = len(xrange(start, stop, step)) new_shape = self._swap((M, N)) if M == 0: return self.__class__(new_shape) row_nnz = np.diff(self.indptr) idx_dtype = self.indices.dtype res_indptr = np.zeros(M+1, dtype=idx_dtype) np.cumsum(row_nnz[idx], out=res_indptr[1:]) if step == 1: all_idx = slice(self.indptr[start], self.indptr[stop]) res_indices = np.array(self.indices[all_idx], copy=copy) res_data = np.array(self.data[all_idx], copy=copy) else: nnz = res_indptr[-1] res_indices = np.empty(nnz, dtype=idx_dtype) res_data = np.empty(nnz, dtype=self.dtype) csr_row_slice(start, stop, step, self.indptr, self.indices, self.data, res_indices, res_data) return self.__class__((res_data, res_indices, res_indptr), shape=new_shape, copy=False)
def SNfunc(self,data,sig,significancefloor=0.5): D=data.ravel() S=sig.ravel() args=numpy.argsort(-D/S) D=numpy.take(D,args) S=numpy.take(S,args) Dsum=numpy.cumsum(D) Ssum=numpy.cumsum(S**2)**0.5 SN=(Dsum/Ssum).max() #regional SN import scipy.ndimage as ndimage data[data/sig<significancefloor]=0 masks, multiplicity = ndimage.measurements.label(data) labels=numpy.arange(1, multiplicity+1) SNs=numpy.zeros(multiplicity+1) SNs[0]=SN for i in range(multiplicity): D=data[masks==i+1].ravel() S=sig[masks==i+1].ravel() args=numpy.argsort(-D/S) D=numpy.take(D,args) S=numpy.take(S,args) Dsum=numpy.cumsum(D) Ssum=numpy.cumsum(S**2)**0.5 SNi=(Dsum/Ssum).max() SNs[i+1]=SNi SNs=-numpy.sort(-SNs) return SNs
def resize(self, *shape): shape = check_shape(shape) if hasattr(self, 'blocksize'): bm, bn = self.blocksize new_M, rm = divmod(shape[0], bm) new_N, rn = divmod(shape[1], bn) if rm or rn: raise ValueError("shape must be divisible into %s blocks. " "Got %s" % (self.blocksize, shape)) M, N = self.shape[0] // bm, self.shape[1] // bn else: new_M, new_N = self._swap(shape) M, N = self._swap(self.shape) if new_M < M: self.indices = self.indices[:self.indptr[new_M]] self.data = self.data[:self.indptr[new_M]] self.indptr = self.indptr[:new_M + 1] elif new_M > M: self.indptr = np.resize(self.indptr, new_M + 1) self.indptr[M + 1:].fill(self.indptr[M]) if new_N < N: mask = self.indices < new_N if not np.all(mask): self.indices = self.indices[mask] self.data = self.data[mask] major_index, val = self._minor_reduce(np.add, mask) self.indptr.fill(0) self.indptr[1:][major_index] = val np.cumsum(self.indptr, out=self.indptr) self._shape = shape
def __init__(self, lc, voltage, t_tot, t_anchoring, pretilt=0, totaltwist=0, nlayers=100, data_file=None): self.lc = lc self.t_tot = t_tot self.t_anchoring = t_anchoring self.pretilt = pretilt self.totaltwist = totaltwist self.nlayers = nlayers self.data_file = data_file # thicknesses of internal layers tlc_internal = (self.t_tot - 2. * self.t_anchoring) / \ (self.nlayers - 2.) * numpy.ones(self.nlayers - 2) # thicknesses of layers self.tlc = numpy.r_[self.t_anchoring, tlc_internal, self.t_anchoring] # internal sample points lhs = numpy.r_[0, numpy.cumsum(tlc_internal)] # normalized sample points: at the center of internal layers, plus the # boundaries (i.e. the anchoring layers) self.normalized_sample_points = numpy.r_[ 0, (lhs[1:] + lhs[:-1]) / 2. / (self.t_tot - 2 * self.t_anchoring), 1] tmp = numpy.r_[0, numpy.cumsum(self.tlc)] self.sample_points = .5 * (tmp[1:] + tmp[:-1]) # finally, apply voltage self.voltage = voltage
def _process_estimation_data(df, state_space, optim_paras, options): """Process estimation data. All necessary objects for :func:`_internal_log_like_obs` dependent on the data are produced. Some objects have to be repeated for each type which is a desirable format for the estimation where every observations is weighted by type probabilities. Parameters ---------- df : pandas.DataFrame The DataFrame which contains the data used for estimation. The DataFrame contains individual identifiers, periods, experiences, lagged choices, choices in current period, the wage and other observed data. state_space : ~respy.state_space.StateSpace optim_paras : dict Returns ------- choices : numpy.ndarray Array with shape (n_observations, n_types) where information is only repeated over the second axis. idx_indiv_first_obs : numpy.ndarray Array with shape (n_individuals,) containing indices for the first observations of each individual. indices : numpy.ndarray Array with shape (n_observations, n_types) containing indices for states which correspond to observations. log_wages_observed : numpy.ndarray Array with shape (n_observations, n_types) containing clipped log wages. type_covariates : numpy.ndarray Array with shape (n_individuals, n_type_covariates) containing covariates to predict probabilities for each type. """ labels, _ = generate_column_labels_estimation(optim_paras) df = df.sort_values(["Identifier", "Period"])[labels] df = df.rename(columns=lambda x: x.replace("Experience", "exp").lower()) df = convert_choice_variables_from_categorical_to_codes(df, optim_paras) # Get indices of states in the state space corresponding to all observations for all # types. The indexer has the shape (n_observations, n_types). indices = () for period in range(df.period.max() + 1): period_df = df.query("period == @period") period_experience = tuple( period_df[col].to_numpy() for col in period_df.filter(like="exp_").columns) period_lagged_choice = tuple( period_df[f"lagged_choice_{i}"].to_numpy() for i in range(1, optim_paras["n_lagged_choices"] + 1)) period_observables = tuple( period_df[observable].to_numpy() for observable in optim_paras["observables"]) period_indices = state_space.indexer[period][period_experience + period_lagged_choice + period_observables] indices += (period_indices, ) indices = np.row_stack(indices) # The indexer is now sorted in period-individual pairs whereas the estimation needs # individual-period pairs. Sort it! indices_to_reorder = (df.sort_values( ["period", "identifier"]).assign(__index__=np.arange(df.shape[0])).sort_values( ["identifier", "period"])["__index__"].to_numpy()) indices = indices[indices_to_reorder] # Get an array of positions of the first observation for each individual. This is # used in :func:`_internal_log_like_obs` to aggregate probabilities of the # individual over all periods. n_obs_per_indiv = np.bincount(df.identifier.to_numpy()) idx_indiv_first_obs = np.hstack((0, np.cumsum(n_obs_per_indiv)[:-1])) # For the estimation, log wages are needed with shape (n_observations, n_types). log_wages_observed = (np.log(df.wage.to_numpy()).clip( -HUGE_FLOAT, HUGE_FLOAT).repeat(optim_paras["n_types"])) # For the estimation, choices are needed with shape (n_observations * n_types). choices = df.choice.to_numpy().repeat(optim_paras["n_types"]) # For the type covariates, we only need the first observation of each individual. states = df.groupby("identifier").first() type_covariates = (create_type_covariates(states, optim_paras, options) if optim_paras["n_types"] > 1 else None) return choices, idx_indiv_first_obs, indices, log_wages_observed, type_covariates
# h(i) h = np.zeros(101) for i in range(H1, H2 + 1): for j in range(W1, W2 + 1): L, u, v = LuvMatrix[i, j] if (L > maxL): L = 100 elif (L < minL): L = 0 else: L = math.floor(L) LuvMatrix[i, j] = [L, u, v] h[L] += +1 fs = np.cumsum(h) # Frequency sum fi1 = np.zeros(101) # (f(i) + f(i-1))/2 for i in range(0, 101): if (i == 0): fi1[i] = math.floor(fs[i] * 101 / (2 * fs[100])) else: fi1[i] = math.floor((fs[i - 1] + fs[i]) * 101 / (2 * fs[100])) HSOutput = inputImage.copy() # Histogram equalization and conversions for i in range(H1, H2 + 1): for j in range(W1, W2 + 1): # Histogram equalization in Luv Domain L, u, v = LuvMatrix[i, j] L = fi1[int(L)]
def hist_skip(inFname, bandIndex, percentileMin, percentileMax, outFname, nbuckets=1000): """ Given a filename, finds approximate percentile values and provides the gdal_translate invocation required to create an 8-bit PNG. Works by evaluating a histogram of the original raster with a large number of buckets between the raster minimum and maximum, then estimating the probability mass and distribution functions before reporting the percentiles requested. N.B. This technique is very approximate and hasn't been checked for asymptotic convergence. Heck, it uses GDAL's `GetHistogram` function in approximate mode, so you're getting approximate percentiles using an approximated histogram. Optional arguments: - `percentiles`: list of percentiles, between 0 and 100 (inclusive). - `nbuckets`: the more buckets, the better percentile approximations you get. """ src = gdal.Open(inFname) band = src.GetRasterBand(int(bandIndex)) percentiles = [float(percentileMin), float(percentileMax)] # Use GDAL to find the min and max (lo, hi, avg, std) = band.GetStatistics(True, True) # Use GDAL to calculate a big histogram rawhist = band.GetHistogram(min=lo, max=hi, buckets=nbuckets) binEdges = np.linspace(lo, hi, nbuckets + 1) # Probability mass function. Trapezoidal-integration of this should yield 1.0. pmf = rawhist / (np.sum(rawhist) * np.diff(binEdges[:2])) # Cumulative probability distribution. Starts at 0, ends at 1.0. distribution = np.cumsum(pmf) * np.diff(binEdges[:2]) # Which histogram buckets are close to the percentiles requested? idxs = [np.sum(distribution < p / 100.0) for p in percentiles] # These: vals = [binEdges[i] for i in idxs] # Append 0 and 100% percentiles (min & max) percentiles = [0] + percentiles + [100] vals = [lo] + vals + [hi] # Print the percentile table print "percentile (out of 100%),value at percentile" for (p, v) in zip(percentiles, vals): print "%f,%f" % (p, v) if vals[1] == 0: print "percentile " + str(percentileMin) + " is equal to 0" print "Percentile recomputation as pNoZero+" + str( percentileMin ) + ", where pNoZero is the first percentile with no zero value" pNoZero = 0 for p in range(int(percentileMin), 100): idx = np.sum(distribution < float(p) / 100.0) val = binEdges[idx] if val > 0: pNoZero = p + int(percentileMin) break percentiles = [float(pNoZero), float(percentileMax)] # Which histogram buckets are close to the percentiles requested? idxs = [np.sum(distribution < p / 100.0) for p in percentiles] # These: vals = [binEdges[i] for i in idxs] # Append 0 and 100% percentiles (min & max) percentiles = [0] + percentiles + [100] vals = [lo] + vals + [hi] # Print the percentile table print "percentile (out of 100%),value at percentile" for (p, v) in zip(percentiles, vals): print "%f,%f" % (p, v) # Print out gdal_calc command gdalCalcCommand = "gdal_calc.py -A " + inFname + " --A_band=" + bandIndex + " --calc=" + '"' + str( vals[1]) + "*logical_and(A>0, A<=" + str(vals[1]) + ")+A*(A>" + str( vals[1] ) + ")" + '"' + " --outfile=gdal_calc_result.tif --NoDataValue=0" print "running " + gdalCalcCommand os.system(gdalCalcCommand) # Print out gdal_translate command (what we came here for anyway) gdalTranslateCommand = "gdal_translate -b 1 -co TILED=YES -co BLOCKXSIZE=512 -co BLOCKYSIZE=512 -co ALPHA=YES -ot Byte -a_nodata 0 -scale " + str( vals[1]) + " " + str( vals[2]) + " 1 255 gdal_calc_result.tif " + outFname print "running " + gdalTranslateCommand os.system(gdalTranslateCommand) # remove temp file os.system("rm gdal_calc_result.tif") return (vals, percentiles)
def plotkpi(kpi, filePath): # kpi = kpi_frame; filePath = analysis_path rst_path = get_path(filePath) # 周转天数图 xlabeln = u'周转天数' ylabeln = u'频数' ylabel2n = u'周转天数累积分布' titlen = u'周转分布' fig, ax1 = plt.subplots() ret = plt.hist(kpi.TD, bins=50, range=[0.1, 200], color='#0070C0') # 这里将一些特殊的sku去掉:为0的,和超过200的 counts, bins, patches = ret[0], ret[1], ret[2] ax2 = ax1.twinx() sum_counts = np.cumsum(counts) / counts.sum() plt.plot(bins[1:], sum_counts, color='#C44E52') ax1.set_xlabel(xlabeln) ax1.set_ylabel(ylabeln) ax2.set_ylabel(ylabel2n) ax1.set_ylim(-counts.max() * 0.05, counts.max() * 1.05) ax1.set_xlim(-10, 200 * 1.05) ax2.set_ylim(-0.05, 1.05) ax2.yaxis.grid(False) plt.savefig(rst_path + '\\td') # 第二版 binsn = range(0,100,10) + [np.inf] save_path = rst_path + '\\td2' plothistper(kpi[(kpi.TD.notnull())&(kpi.TD != np.inf)&(kpi.TD != -np.inf)]["TD"], binsn, xlabeln, ylabeln, titlen, save_path, cum_True=True) # 现货率图 xlabeln = u'现货率' ylabeln = u'频数' ylabel2n = u'现货率累积分布' titlen = u'现货率分布' fig, ax1 = plt.subplots() ret = plt.hist(kpi.CR, bins=50, label='Z', color='#0070C0') counts, bins, patches = ret[0], ret[1], ret[2] ax2 = ax1.twinx() sum_counts = np.cumsum(counts) / counts.sum() plt.plot(bins[1:], sum_counts, color='#C44E52') ax1.set_xlabel(xlabeln) ax1.set_ylabel(ylabeln) ax2.set_ylabel(ylabel2n) ax1.set_ylim(-counts.max() * 0.05, counts.max() * 1.05) ax1.set_xlim(-0.05, 1.05) ax2.set_ylim(-0.05, 1.05) ax2.yaxis.grid(False) plt.savefig(rst_path+'\\cr') # 第二版 binsn = np.linspace(0,1,11) save_path = rst_path + '\\cr2' plothistper(kpi[(kpi.CR.notnull())&(kpi.CR != np.inf)&(kpi.CR != -np.inf)]["CR"], binsn, xlabeln, ylabeln, titlen, save_path, cum_True=True, intshu=False) # 现货率和周转天数图 fig2 = plt.figure() ax = fig2.add_subplot(111) # ax.grid() ax.set_xlabel(u"周转天数") ax.set_ylabel(u"现货率") ax.set_xlim(0, 200) ax.set_ylim(0, 1) plt.scatter(kpi.TD, kpi.CR, color='#0070C0') plt.plot([60, 60], [0, 1], '--', color='red') plt.plot([0, 200], [0.8, 0.8], '--', color='red') plt.annotate('(1)', xy=(1, 1), xytext=(25, 0.9), fontsize=20, color='red') plt.annotate('(2)', xy=(1, 1), xytext=(130, 0.9), fontsize=20, color='red') plt.annotate('(3)', xy=(1, 1), xytext=(25, 0.4), fontsize=20, color='red') plt.annotate('(4)', xy=(1, 1), xytext=(130, 0.4), fontsize=20, color='red') plt.savefig(rst_path+'\\cr_td')
def PlotHypsometry(hypsometer): zbins = hypsometer['z'].values areas = hypsometer['area'].values fig = plt.figure(1, facecolor='white', figsize=(6.25, 3.5)) gs = plt.GridSpec(100, 150, bottom=0.15, left=0.1, right=1.0, top=1.0) nodata_area = areas[0] z = zbins[1:] areas = areas[1:] ax = fig.add_subplot(gs[10:95, 40:140]) cum_areas = np.flip(np.cumsum(np.flip(areas, axis=0)), axis=0) total_area = np.sum(areas) # if hypsometry: # ax.fill_between(100 *cum_areas / total_area, 0, z, color='#f2f2f2') # ax.plot(100 * cum_areas / total_area, z, color='k', linestyle='--', linewidth=0.6) ax.fill_between(100 * cum_areas / total_area, 0, z, color='lightgray') ax.plot(100 * cum_areas / total_area, z, color='k', linewidth=1.0) minz = np.min(z[areas > 0]) maxz = np.max(z[areas > 0]) dz = 100.0 ax.spines['top'].set_linewidth(1) ax.spines['left'].set_linewidth(1) ax.spines['right'].set_linewidth(1) ax.spines['bottom'].set_linewidth(1) ax.set_xlabel("Cumulative surface (%)") ax.set_xlim([0, 100]) ax.set_ylim(minz, maxz) ax.tick_params(axis='both', width=1, pad=2) for tick in ax.xaxis.get_major_ticks(): tick.set_pad(2) z = np.arange(minz, maxz + dz, dz) groups = np.digitize(zbins[:-1], z) ax = fig.add_subplot(gs[10:95, 10:30]) # if hypsometry: # grouped_hyp = np.array([np.sum(areas[groups == k]) for k in range(1, z.size)]) # ax.barh(z[:-1], 100.0 * grouped_hyp / total_area, dz, align='edge', color='#f2f2f2', edgecolor='k') grouped = np.array([np.sum(areas[groups == k]) for k in range(1, z.size)]) ax.barh(z[:-1], 100.0 * grouped / total_area, dz, align='edge', color='lightgray', edgecolor='k') ax.spines['top'].set_linewidth(1) ax.spines['left'].set_linewidth(1) ax.spines['right'].set_linewidth(1) ax.spines['bottom'].set_linewidth(1) ax.set_xlabel("Surface (%)") ax.set_ylim(minz, maxz) ax.set_ylabel("Altitude (m)") ax.tick_params(axis='both', width=1, pad=2) for tick in ax.xaxis.get_major_ticks(): tick.set_pad(2) fig_size_inches = 12.50 aspect_ratio = 2.0 cbar_L = "None" [fig_size_inches, map_axes, cbar_axes] = MapFigureSizer(fig_size_inches, aspect_ratio, cbar_loc=cbar_L, title="None") fig.set_size_inches(fig_size_inches[0], fig_size_inches[1]) return fig
print("------ GPTS stationary ------") (stationary_rewards, stationary_final_environment, stationary_final_subcampaign_algos, regression_errors_max, regression_errors_sum) = experiment.perform(timesteps_stationary) print("------ GPTS context generation ------") (context_generation_rewards, context_generation_final_environment, context_generation_final_subcampaign_algos) = experiment.perform( timesteps_context_generation, context_generation_rate) ############################################ ## Plot results ############################################ cumulative_stationary_reward = np.cumsum(stationary_rewards) cumulative_context_generation_reward = np.cumsum(context_generation_rewards) cumulative_clairvoyant_reward = np.cumsum(clairvoyant_rewards) cumulative_disaggregated_clairvoyant_reward = np.cumsum( disaggregated_clairvoyant_rewards) # Cumulative rewards plt.plot(cumulative_stationary_reward) plt.plot(cumulative_context_generation_reward) plt.plot(cumulative_clairvoyant_reward) plt.plot(cumulative_disaggregated_clairvoyant_reward) plt.legend([ 'GPTS - Stationary', 'GPTS - Context generation', 'Clairvoyant', 'Clairvoyant - Optimal context' ],
def __init__(self, dist): red = np.cumsum(list(dist['red'].values())) green = np.cumsum(list(dist['green'].values())) blue = np.cumsum(list(dist['blue'].values())) self.dist = {'red':red, 'green':green, 'blue':blue}
def moving_average(a, n=3) : ret = np.cumsum(a, dtype=float) ret[n:] = ret[n:] - ret[:-n] return ret[n - 1:] / n
def accumulate(self, p=None): ''' Accumulate per image evaluation results and store the result in self.eval :param p: input params for evaluation :return: None ''' print('Accumulating evaluation results...') tic = time.time() if not self.evalImgs: print('Please run evaluate() first') # allows input customized parameters if p is None: p = self.params p.catIds = p.catIds if p.useCats == 1 else [-1] T = len(p.iouThrs) R = len(p.fppiThrs) K = len(p.catIds) if p.useCats else 1 M = len(p.maxDets) # -1 for the precision of absent categories ys = -np.ones((T, R, K, M)) # create dictionary for future indexing _pe = self._paramsEval catIds = [1] # _pe.catIds if _pe.useCats else [-1] setK = set(catIds) setM = set(_pe.maxDets) setI = set(_pe.imgIds) # get inds to evaluate k_list = [n for n, k in enumerate(p.catIds) if k in setK] m_list = [m for n, m in enumerate(p.maxDets) if m in setM] i_list = [n for n, i in enumerate(p.imgIds) if i in setI] I0 = len(_pe.imgIds) # retrieve E at each category, area range, and max number of detections for k, k0 in enumerate(k_list): Nk = k0 * I0 for m, maxDet in enumerate(m_list): E = [self.evalImgs[Nk + i] for i in i_list] E = [e for e in E if e is not None] if len(E) == 0: continue dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E]) # different sorting method generates slightly different results. # mergesort is used to be consistent as Matlab implementation. inds = np.argsort(-dtScores, kind='mergesort') dtm = np.concatenate([e['dtMatches'][:, 0:maxDet] for e in E], axis=1)[:, inds] dtIg = np.concatenate([e['dtIgnore'][:, 0:maxDet] for e in E], axis=1)[:, inds] gtIg = np.concatenate([e['gtIgnore'] for e in E]) npig = np.count_nonzero(gtIg == 0) if npig == 0: continue tps = np.logical_and(dtm, np.logical_not(dtIg)) fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg)) # print(tps,fps) inds = np.where(dtIg == 0)[1] # print(inds) tps = tps[:, inds] fps = fps[:, inds] # print(tps,fps) tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float) fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float) # print(tp_sum,fp_sum) for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): tp = np.array(tp) fppi = np.array(fp) / I0 nd = len(tp) recall = tp / npig q = np.zeros((R, )) # numpy is slow without cython optimization for accessing elements # use python array gets significant speed improvement recall = recall.tolist() q = q.tolist() for i in range(nd - 1, 0, -1): if recall[i] < recall[i - 1]: recall[i - 1] = recall[i] inds = np.searchsorted(fppi, p.fppiThrs, side='right') - 1 try: for ri, pi in enumerate(inds): q[ri] = recall[pi] except BaseException: pass ys[t, :, k, m] = np.array(q) self.eval = { 'params': p, 'counts': [T, R, K, M], 'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'TP': ys, } toc = time.time() print('DONE (t={:0.2f}s).'.format(toc - tic))
def voc_eval(detpath, annopath, imagesetfile, classname, cachedir, ovthresh=0.5, use_07_metric=True): """rec, prec, ap = voc_eval(detpath, annopath, imagesetfile, classname, [ovthresh], [use_07_metric]) Top level function that does the PASCAL VOC evaluation. detpath: Path to detections detpath.format(classname) should produce the detection results file. annopath: Path to annotations annopath.format(imagename) should be the xml annotations file. imagesetfile: Text file containing the list of images, one image per line. classname: Category name (duh) cachedir: Directory for caching the annotations [ovthresh]: Overlap threshold (default = 0.5) [use_07_metric]: Whether to use VOC07's 11 point AP computation (default True) """ # assumes detections are in detpath.format(classname) # assumes annotations are in annopath.format(imagename) # assumes imagesetfile is a text file with each line an image name # cachedir caches the annotations in a pickle file # first load gt if not os.path.isdir(cachedir): os.mkdir(cachedir) cachefile = os.path.join(cachedir, 'annots.pkl') # read list of images with open(imagesetfile, 'r') as f: lines = f.readlines() imagenames = [x.strip() for x in lines] if not os.path.isfile(cachefile): # load annots recs = {} for i, imagename in enumerate(imagenames): recs[imagename] = parse_rec(annopath % (imagename)) if i % 100 == 0: print('Reading annotation for {:d}/{:d}'.format( i + 1, len(imagenames))) # save print('Saving cached annotations to {:s}'.format(cachefile)) with open(cachefile, 'wb') as f: pickle.dump(recs, f) else: # load with open(cachefile, 'rb') as f: recs = pickle.load(f) # extract gt objects for this class class_recs = {} npos = 0 for imagename in imagenames: R = [obj for obj in recs[imagename] if obj['name'] == classname] bbox = np.array([x['bbox'] for x in R]) difficult = np.array([x['difficult'] for x in R]).astype(np.bool) det = [False] * len(R) npos = npos + sum(~difficult) class_recs[imagename] = {'bbox': bbox, 'difficult': difficult, 'det': det} # read dets detfile = detpath.format(classname) with open(detfile, 'r') as f: lines = f.readlines() if any(lines) == 1: splitlines = [x.strip().split(' ') for x in lines] image_ids = [x[0] for x in splitlines] confidence = np.array([float(x[1]) for x in splitlines]) BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) # sort by confidence sorted_ind = np.argsort(-confidence) sorted_scores = np.sort(-confidence) BB = BB[sorted_ind, :] image_ids = [image_ids[x] for x in sorted_ind] # go down dets and mark TPs and FPs nd = len(image_ids) tp = np.zeros(nd) fp = np.zeros(nd) for d in range(nd): R = class_recs[image_ids[d]] bb = BB[d, :].astype(float) ovmax = -np.inf BBGT = R['bbox'].astype(float) if BBGT.size > 0: # compute overlaps # intersection ixmin = np.maximum(BBGT[:, 0], bb[0]) iymin = np.maximum(BBGT[:, 1], bb[1]) ixmax = np.minimum(BBGT[:, 2], bb[2]) iymax = np.minimum(BBGT[:, 3], bb[3]) iw = np.maximum(ixmax - ixmin, 0.) ih = np.maximum(iymax - iymin, 0.) inters = iw * ih uni = ((bb[2] - bb[0]) * (bb[3] - bb[1]) + (BBGT[:, 2] - BBGT[:, 0]) * (BBGT[:, 3] - BBGT[:, 1]) - inters) overlaps = inters / uni ovmax = np.max(overlaps) jmax = np.argmax(overlaps) if ovmax > ovthresh: if not R['difficult'][jmax]: if not R['det'][jmax]: tp[d] = 1. R['det'][jmax] = 1 else: fp[d] = 1. else: fp[d] = 1. # compute precision recall fp = np.cumsum(fp) tp = np.cumsum(tp) rec = tp / float(npos) # avoid divide by zero in case the first detection matches a difficult # ground truth prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) ap = voc_ap(rec, prec, use_07_metric) else: rec = -1. prec = -1. ap = -1. return rec, prec, ap
fig, ax = plt.subplots(figsize=(5, 3)) fig.subplots_adjust(bottom=0.15, left=0.2) ax.plot(x1, y1) ax.set_xlabel('time [s]', fontsize='large', fontweight='bold') ax.set_ylabel('Damped oscillation [V]', fontproperties=font) plt.show() ############################################################################## # Finally, we can use native TeX rendering in all text objects and have # multiple lines: fig, ax = plt.subplots(figsize=(5, 3)) fig.subplots_adjust(bottom=0.2, left=0.2) ax.plot(x1, np.cumsum(y1**2)) ax.set_xlabel('time [s] \n This was a long experiment') ax.set_ylabel(r'$\int\ Y^2\ dt\ \ [V^2 s]$') plt.show() ############################################################################## # Titles # ====== # # Subplot titles are set in much the same way as labels, but there is # the *loc* keyword arguments that can change the position and justification # from the default value of ``loc=center``. fig, axs = plt.subplots(3, 1, figsize=(5, 6), tight_layout=True) locs = ['center', 'left', 'right']
print(digit_train.info()) X = digit_train.iloc[:, 1:] / 255.0 y = digit_train['label'] X_train, X_eval, y_train, y_eval = model_selection.train_test_split( X, y, test_size=0.1, random_state=1) zv = feature_selection.VarianceThreshold(threshold=0.0) X_train1 = zv.fit_transform(X_train) sns.heatmap(X_train.corr()) lpca = decomposition.PCA(n_components=0.95) X_train2 = lpca.fit_transform(X_train1) np.cumsum(lpca.explained_variance_ratio_) tsne = manifold.TSNE() tsne_data = tsne.fit_transform(X_train2) cutils.plot_data_2d_classification(tsne_data, y_train) kernel_svm_estimator = svm.SVC(kernel='rbf') kernel_svm_grid = { 'gamma': [0.01, 0.1, 1, 2, 5, 10], 'C': [0.001, 0.01, 0.1, 0.5] } kernel_svm_grid_estimator = model_selection.GridSearchCV(kernel_svm_estimator, kernel_svm_grid, scoring='accuracy', cv=10) kernel_svm_grid_estimator.fit(X_train2, y_train)
from mantid.simpleapi import * import numpy as np import matplotlib.pyplot as plt import math filename = 'CORELLI_666' ws=LoadEventNexus(Filename=r'/SNS/CORELLI/IPTS-12008/shared/SNS/CORELLI/IPTS-12008/nexus/'+filename+'.nxs.h5',OutputWorkspace=filename,BankName='bank42',SingleBankPixelsOnly='0',FilterByTofMin='0',FilterByTofMax='16667') LoadInstrument(Workspace=filename,Filename='/SNS/users/rwp/CORELLI_Definition.xml') sequence = map(float,ws.getInstrument().getComponentByName('correlation-chopper').getStringParameter('sequence')[0].split()) sequence_sum=np.cumsum(sequence) #i=ws.getInstrument() #r=i.getDetector(169600).getDistance(i.getSample()) chopper_tdc = ws.getSampleDetails().getProperty("chopper4_TDC").times #chopper_frq = ws.getSampleDetails().getProperty("BL9:Chop:Skf4:SpeedSetReq").value[0] chopper_frq = ws.getSampleDetails().getProperty("BL9:Chop:Skf4:MotorSpeed").timeAverageValue() chopper_per = 1e6/chopper_frq print 'Chopper Frequency =', chopper_frq, 'Hz, Period =', chopper_per, 'uS' bin_size=10. # 10ms bins y = int(math.ceil( chopper_per/bin_size )) x = int(math.ceil( 1e6/60/bin_size )) out = np.zeros((y,x)) total_counts=0 for pi in range(0,4): #4
def bayesian_blocks(t, x=None, sigma=None, fitness='events', **kwargs): """Bayesian Blocks Implementation This is a flexible implementation of the Bayesian Blocks algorithm described in Scargle 2012 [1]_ Parameters ---------- t : array_like data times (one dimensional, length N) x : array_like (optional) data values sigma : array_like or float (optional) data errors fitness : str or object the fitness function to use. If a string, the following options are supported: - 'events' : binned or unbinned event data extra arguments are `p0`, which gives the false alarm probability to compute the prior, or `gamma` which gives the slope of the prior on the number of bins. - 'regular_events' : non-overlapping events measured at multiples of a fundamental tick rate, `dt`, which must be specified as an additional argument. The prior can be specified through `gamma`, which gives the slope of the prior on the number of bins. - 'measures' : fitness for a measured sequence with Gaussian errors The prior can be specified using `gamma`, which gives the slope of the prior on the number of bins. If `gamma` is not specified, then a simulation-derived prior will be used. Alternatively, the fitness can be a user-specified object of type derived from the FitnessFunc class. Returns ------- edges : ndarray array containing the (N+1) bin edges Examples -------- Event data: t = np.random.normal(size=100) bins = bayesian_blocks(t, fitness='events', p0=0.01) Event data with repeats: t = np.random.normal(size=100) t[80:] = t[:20] bins = bayesian_blocks(t, fitness='events', p0=0.01) Regular event data: dt = 0.01 t = dt * np.arange(1000) x = np.zeros(len(t)) x[np.random.randint(0, len(t), int(len(t) / 10))] = 1 bins = bayesian_blocks(t, x, fitness='regular_events', dt=dt, gamma=0.9) Measured point data with errors: t = 100 * np.random.random(100) x = np.exp(-0.5 * (t - 50) ** 2) sigma = 0.1 x_obs = np.random.normal(x, sigma) bins = bayesian_blocks(t, x=x_obs, fitness='measures') References ---------- .. [1] Scargle, J `et al.` (2012) http://adsabs.harvard.edu/abs/2012arXiv1207.5578S See Also -------- astroML.plotting.hist : histogram plotting function which can make use of bayesian blocks. """ # validate array input t = np.asarray(t, dtype=float) if x is not None: x = np.asarray(x) if sigma is not None: sigma = np.asarray(sigma) # verify the fitness function if fitness == 'events': if x is not None and np.any(x % 1 > 0): raise ValueError("x must be integer counts for fitness='events'") fitfunc = Events(**kwargs) elif fitness == 'regular_events': if x is not None and (np.any(x % 1 > 0) or np.any(x > 1)): raise ValueError("x must be 0 or 1 for fitness='regular_events'") fitfunc = RegularEvents(**kwargs) elif fitness == 'measures': if x is None: raise ValueError("x must be specified for fitness='measures'") fitfunc = PointMeasures(**kwargs) else: if not (hasattr(fitness, 'args') and hasattr(fitness, 'fitness') and hasattr(fitness, 'prior')): raise ValueError("fitness not understood") fitfunc = fitness # find unique values of t t = np.array(t, dtype=float) assert t.ndim == 1 unq_t, unq_ind, unq_inv = np.unique(t, return_index=True, return_inverse=True) # if x is not specified, x will be counts at each time if x is None: if sigma is not None: raise ValueError("If sigma is specified, x must be specified") if len(unq_t) == len(t): x = np.ones_like(t) else: x = np.bincount(unq_inv) t = unq_t sigma = 1 # if x is specified, then we need to sort t and x together else: x = np.asarray(x) if len(t) != len(x): raise ValueError("Size of t and x does not match") if len(unq_t) != len(t): raise ValueError("Repeated values in t not supported when " "x is specified") t = unq_t x = x[unq_ind] # verify the given sigma value N = t.size if sigma is not None: sigma = np.asarray(sigma) if sigma.shape not in [(), (1,), (N,)]: raise ValueError('sigma does not match the shape of x') else: sigma = 1 # validate the input fitfunc.validate_input(t, x, sigma) # compute values needed for computation, below if 'a_k' in fitfunc.args: ak_raw = np.ones_like(x) / sigma / sigma if 'b_k' in fitfunc.args: bk_raw = x / sigma / sigma if 'c_k' in fitfunc.args: ck_raw = x * x / sigma / sigma # create length-(N + 1) array of cell edges edges = np.concatenate([t[:1], 0.5 * (t[1:] + t[:-1]), t[-1:]]) block_length = t[-1] - edges # arrays to store the best configuration best = np.zeros(N, dtype=float) last = np.zeros(N, dtype=int) #----------------------------------------------------------------- # Start with first data cell; add one cell at each iteration #----------------------------------------------------------------- for R in range(N): # Compute fit_vec : fitness of putative last block (end at R) kwds = {} # T_k: width/duration of each block if 'T_k' in fitfunc.args: kwds['T_k'] = block_length[:R + 1] - block_length[R + 1] # N_k: number of elements in each block if 'N_k' in fitfunc.args: kwds['N_k'] = np.cumsum(x[:R + 1][::-1])[::-1] # a_k: eq. 31 if 'a_k' in fitfunc.args: kwds['a_k'] = 0.5 * np.cumsum(ak_raw[:R + 1][::-1])[::-1] # b_k: eq. 32 if 'b_k' in fitfunc.args: kwds['b_k'] = - np.cumsum(bk_raw[:R + 1][::-1])[::-1] # c_k: eq. 33 if 'c_k' in fitfunc.args: kwds['c_k'] = 0.5 * np.cumsum(ck_raw[:R + 1][::-1])[::-1] # evaluate fitness function fit_vec = fitfunc.fitness(**kwds) A_R = fit_vec - fitfunc.prior(R + 1, N) A_R[1:] += best[:R] i_max = np.argmax(A_R) last[R] = i_max best[R] = A_R[i_max] #----------------------------------------------------------------- # Now find changepoints by iteratively peeling off the last block #----------------------------------------------------------------- change_points = np.zeros(N, dtype=int) i_cp = N ind = N while True: i_cp -= 1 change_points[i_cp] = ind if ind == 0: break ind = last[ind - 1] change_points = change_points[i_cp:] bins = edges[change_points] bins[0] = -np.inf bins[-1] = np.inf return bins
def deg_gen(A, K, D, m, eta, gamma, model_var, s_fun): mseed = np.size(np.where(A.flat)) // 2 k = np.sum(A, axis=1) if type(model_var) == tuple: mv1, mv2 = model_var else: mv1, mv2 = model_var, model_var if mv1 in ('powerlaw', 'power_law'): Fd = D**eta elif mv1 in ('exponential', ): Fd = np.exp(eta * D) if mv2 in ('powerlaw', 'power_law'): Fk = K**gamma elif mv2 in ('exponential', ): Fk = np.exp(gamma * K) P = Fd * Fk * np.logical_not(A) u, v = np.where(np.triu(np.ones((n, n)), 1)) b = np.zeros((m, ), dtype=int) # print(mseed) # print(np.shape(u),np.shape(v)) # print(np.shape(b)) # print(np.shape(A[u,v])) # print(np.shape(np.where(A[u,v])), 'sqishy') # print(np.shape(P), 'squnnaq') #b[:mseed] = np.where(A[np.ix_(u,v)]) b[:mseed] = np.squeeze(np.where(A[u, v])) #print(mseed, m) for i in range(mseed, m): C = np.append(0, np.cumsum(P[u, v])) r = np.sum(np.random.random() * C[-1] >= C) uu = u[r] vv = v[r] k[uu] += 1 k[vv] += 1 if mv2 in ('powerlaw', 'power_law'): Fk[:, uu] = Fk[uu, :] = s_fun(k, k[uu])**gamma Fk[:, vv] = Fk[vv, :] = s_fun(k, k[vv])**gamma elif mv2 in ('exponential', ): Fk[:, uu] = Fk[uu, :] = np.exp(s_fun(k, k[uu]) * gamma) Fk[:, vv] = Fk[vv, :] = np.exp(s_fun(k, k[vv]) * gamma) P = Fd * Fk b[i] = r P[u[b[:i]], v[b[:i]]] = P[v[b[:i]], u[b[:i]]] = 0 A[u[r], v[r]] = A[v[r], u[r]] = 1 #P[b[u[:i]], b[v[:i]]] = P[b[v[:i]], b[u[:i]]] = 0 #A[uu,vv] = A[vv,uu] = 1 # indx = v*n + u # indx[b] # # nH = np.zeros((n,n)) # nH.ravel()[indx[b]]=1 # # nG = np.zeros((n,n)) # nG[ u[b], v[b] ]=1 # nG = nG + nG.T # # print(np.shape(np.where(A != nG))) # # import pdb # pdb.set_trace() return A
def build (self, coord_, atype_, natoms, box, mesh, input_dict, suffix = '', reuse = None): with tf.variable_scope('model_attr' + suffix, reuse = reuse) : t_tmap = tf.constant(' '.join(self.type_map), name = 'tmap', dtype = tf.string) t_mt = tf.constant(self.model_type, name = 'model_type', dtype = tf.string) if self.srtab is not None : tab_info, tab_data = self.srtab.get() self.tab_info = tf.get_variable('t_tab_info', tab_info.shape, dtype = tf.float64, trainable = False, initializer = tf.constant_initializer(tab_info, dtype = tf.float64)) self.tab_data = tf.get_variable('t_tab_data', tab_data.shape, dtype = tf.float64, trainable = False, initializer = tf.constant_initializer(tab_data, dtype = tf.float64)) coord = tf.reshape (coord_, [-1, natoms[1] * 3]) atype = tf.reshape (atype_, [-1, natoms[1]]) dout \ = self.descrpt.build(coord_, atype_, natoms, box, mesh, davg = self.davg, dstd = self.dstd, suffix = suffix, reuse = reuse) dout = tf.identity(dout, name='o_descriptor') if self.srtab is not None : nlist, rij, sel_a, sel_r = self.descrpt.get_nlist() nnei_a = np.cumsum(sel_a)[-1] nnei_r = np.cumsum(sel_r)[-1] atom_ener = self.fitting.build (dout, input_dict, natoms, bias_atom_e = self.bias_atom_e, reuse = reuse, suffix = suffix) if self.srtab is not None : sw_lambda, sw_deriv \ = op_module.soft_min_switch(atype, rij, nlist, natoms, sel_a = sel_a, sel_r = sel_r, alpha = self.smin_alpha, rmin = self.sw_rmin, rmax = self.sw_rmax) inv_sw_lambda = 1.0 - sw_lambda # NOTICE: # atom energy is not scaled, # force and virial are scaled tab_atom_ener, tab_force, tab_atom_virial \ = op_module.tab_inter(self.tab_info, self.tab_data, atype, rij, nlist, natoms, sw_lambda, sel_a = sel_a, sel_r = sel_r) energy_diff = tab_atom_ener - tf.reshape(atom_ener, [-1, natoms[0]]) tab_atom_ener = tf.reshape(sw_lambda, [-1]) * tf.reshape(tab_atom_ener, [-1]) atom_ener = tf.reshape(inv_sw_lambda, [-1]) * atom_ener energy_raw = tab_atom_ener + atom_ener else : energy_raw = atom_ener energy_raw = tf.reshape(energy_raw, [-1, natoms[0]], name = 'o_atom_energy'+suffix) energy = tf.reduce_sum(global_cvt_2_ener_float(energy_raw), axis=1, name='o_energy'+suffix) force, virial, atom_virial \ = self.descrpt.prod_force_virial (atom_ener, natoms) if self.srtab is not None : sw_force \ = op_module.soft_min_force(energy_diff, sw_deriv, nlist, natoms, n_a_sel = nnei_a, n_r_sel = nnei_r) force = force + sw_force + tab_force force = tf.reshape (force, [-1, 3 * natoms[1]], name = "o_force"+suffix) if self.srtab is not None : sw_virial, sw_atom_virial \ = op_module.soft_min_virial (energy_diff, sw_deriv, rij, nlist, natoms, n_a_sel = nnei_a, n_r_sel = nnei_r) atom_virial = atom_virial + sw_atom_virial + tab_atom_virial virial = virial + sw_virial \ + tf.reduce_sum(tf.reshape(tab_atom_virial, [-1, natoms[1], 9]), axis = 1) virial = tf.reshape (virial, [-1, 9], name = "o_virial"+suffix) atom_virial = tf.reshape (atom_virial, [-1, 9 * natoms[1]], name = "o_atom_virial"+suffix) model_dict = {} model_dict['energy'] = energy model_dict['force'] = force model_dict['virial'] = virial model_dict['atom_ener'] = energy_raw model_dict['atom_virial'] = atom_virial return model_dict
def calc_ks_distance(pd_dataframe, ww, precision, time_start, time_split, time_end, crop=True, autobin=False): """ Calculate the Hellinger distance between reference (divided at time_split) and target period :param pd_dataframe: input pandas dataframe that is split into target and reference parts :param ww: window width :param precision: the number of points to evaluate the PDF :param time_start: start of series :param time_split: where to split the series :param time_end: where to stop :param crop: set values at half window size at the beginning and end of the time series to NaN :param autobin: automatically determine bin number and bin sizes :return: the hellinger distances of each target series time step with respect to the reference, and the sign """ # # pd_dataframe = new_data_pd # ww = WW # precision = precision # time_start = XMIN # time_split = XSPLIT # time_end = XMAX # autobin = True # make copy .. some issues before without ... ts_index = pd_dataframe.index t_2d_ann_pd = copy(pd_dataframe) OUT_overlap_pd = copy(pd.DataFrame(t_2d_ann_pd)) OUT_overlap_pd[:] = np.NaN OUT_overlap_pd_sign = copy(pd.DataFrame(t_2d_ann_pd)) OUT_overlap_pd_sign[:] = np.NaN # get they column keys (lat/lon) RowCols = t_2d_ann_pd.keys() # RowCol loop; go through the keys of pandas dataframe that are the row and column indices for RowCol in RowCols: # RowCol = u'0061.00113.0' # all y_all = t_2d_ann_pd[RowCol].values # check if there is data; some pixels are only NA if np.isnan(y_all).all(): continue # check if every data record is 0 if (y_all == 0).all(): continue # reference y_ref = t_2d_ann_pd[RowCol].ix[time_start:time_split].values if np.isnan(y_ref).all(): continue # target y_tar = t_2d_ann_pd[RowCol].ix[time_split:time_end].values # target - half window y_tar_ww = t_2d_ann_pd[RowCol].ix[str(int(time_split.split('-')[0]) - int(ww / 2)) + '-01-01':time_end] y_times_tar_ww = t_2d_ann_pd[RowCol].ix[ str(int(time_split.split('-')[0]) - int(ww / 2)) + '-01-01':time_end].index if np.isnan(y_tar).all(): continue # get the values for evaluation ybnds = x_range(y_all, stretch=1) # get the values for evaluation if autobin: # VERSION 1: # for the autmoatic bin size determination: # find the increment of the reference period and use this increment # to generate a seq from min to max y value _, tmp_bin_cent = np.histogram(y_ref) tmp_bin_inc = np.diff(tmp_bin_cent).mean() tmp_xseq_steps_n = np.diff(ybnds[0:2]) / tmp_bin_inc tmp_xseq = np.linspace(ybnds[0], ybnds[1], tmp_xseq_steps_n) # --- Version 1 is problematic for HD --- # # # VERSION 2: # # for the autmoatic bin size determination: # # find the increment of the reference period and use this increment # # to generate a seq from min to max y value # _, tmp_bin_cent = np.histogram(y_all) # tmp_bin_inc = np.diff(tmp_bin_cent).mean() # tmp_xseq_steps_n = np.diff(ybnds[0:2]) / tmp_bin_inc # tmp_xseq = np.linspace(ybnds[0], ybnds[1], tmp_xseq_steps_n) else: tmp_xseq = np.linspace(ybnds[0], ybnds[1], precision) # tmp_xseq_centers = tmp_xseq[0:-1] + ((ybnds[1] - ybnds[0]) / precision) ref_h_freq, _ = np.histogram(y_ref, bins=tmp_xseq) ref_pdf_list = ref_h_freq / (np.sum(ref_h_freq) * 1.0) ref_cdf_list = np.cumsum(ref_pdf_list) # tar_h_freq, _ = np.histogram(y_tar, bins=tmp_xseq) # tar_pdf_list = tar_h_freq / (np.sum(tar_h_freq) * 1.0) # tar_cdf_list = np.cumsum(tar_pdf_list) # dist_ks = np.nanmax(np.abs(tar_cdf_list - ref_cdf_list)) dist_ks, dist_ks_sign = calc_run_ks_hist(time_seq=y_times_tar_ww, tar_values=y_tar_ww, ref_values=ref_cdf_list, ww=ww, bin_centers=tmp_xseq) # and finally write into complete PD data frame OUT_overlap_pd[RowCol] = pd.DataFrame(dist_ks, index=y_times_tar_ww) if crop: # set values at (index <= XSPLIT and index > time_end - (ww/2)) to NaN time_split_year = re.split('-', time_split)[0] time_end_year = ts_index.max() ind_crop_lw = OUT_overlap_pd.index <= int(time_split_year) ind_crop_up = OUT_overlap_pd.index > int(time_end_year - int(ww / 2)) # OUT_overlap_pd_sign.ix[(ind_crop_lw | ind_crop_up)] = np.NaN OUT_overlap_pd.ix[(ind_crop_lw | ind_crop_up)] = np.NaN return OUT_overlap_pd #, OUT_overlap_pd_sign
def matching_gen(A, K, D, m, eta, gamma, model_var): K += epsilon mseed = np.size(np.where(A.flat)) // 2 if type(model_var) == tuple: mv1, mv2 = model_var else: mv1, mv2 = model_var, model_var if mv1 in ('powerlaw', 'power_law'): Fd = D**eta elif mv1 in ('exponential', ): Fd = np.exp(eta * D) if mv2 in ('powerlaw', 'power_law'): Fk = K**gamma elif mv2 in ('exponential', ): Fk = np.exp(gamma * K) Ff = Fd * Fk * np.logical_not(A) u, v = np.where(np.triu(np.ones((n, n)), 1)) for ii in range(mseed, m): C = np.append(0, np.cumsum(Ff[u, v])) r = np.sum(np.random.random() * C[-1] >= C) uu = u[r] vv = v[r] A[uu, vv] = A[vv, uu] = 1 updateuu, = np.where(np.inner(A, A[:, uu])) np.delete(updateuu, np.where(updateuu == uu)) np.delete(updateuu, np.where(updateuu == vv)) c1 = np.append(A[:, uu], A[uu, :]) for i in range(len(updateuu)): j = updateuu[i] c2 = np.append(A[:, j], A[j, :]) use = np.logical_or(c1, c2) use[uu] = use[uu + n] = use[j] = use[j + n] = 0 ncon = np.sum(c1[use]) + np.sum(c2[use]) if ncon == 0: K[uu, j] = K[j, uu] = epsilon else: K[uu, j] = K[j, uu] = ( 2 / ncon * np.sum(np.logical_and(c1[use], c2[use])) + epsilon) updatevv, = np.where(np.inner(A, A[:, vv])) np.delete(updatevv, np.where(updatevv == uu)) np.delete(updatevv, np.where(updatevv == vv)) c1 = np.append(A[:, vv], A[vv, :]) for i in range(len(updatevv)): j = updatevv[i] c2 = np.append(A[:, j], A[j, :]) use = np.logical_or(c1, c2) use[vv] = use[vv + n] = use[j] = use[j + n] = 0 ncon = np.sum(c1[use]) + np.sum(c2[use]) if ncon == 0: K[vv, j] = K[j, vv] = epsilon else: K[vv, j] = K[j, vv] = ( 2 / ncon * np.sum(np.logical_and(c1[use], c2[use])) + epsilon) Ff = Fd * Fk * np.logical_not(A) return A
print("tEnd = ", tEnd, ", deltat = ", deltat) T = np.arange(tBegin, tEnd, deltat) ## ## Q = np.identity(len(X0)) * 0.1 print(Q) X = X0 LCE_T = [] norm_T = [] for t in T: X = integrate.odeint(Lor, X, (0, deltat))[-1] B = Q + deltat * np.dot(JacLor(t, X), Q) Q, R = np.linalg.qr( B ) # Factor the matrix B as qr, where q is orthonormal and r is upper-triangular. LCE_T.append(np.log2(np.abs(np.diag(R))) / deltat) LCE_T = np.array(LCE_T) ## ## # Discard initial timestesp i_init = 10 LCEv = np.cumsum(LCE_T[i_init:, :], 0) / (T / deltat + deltat)[i_init:, None] print("\nSystems's Lyapunov Exponents = ", LCEv[-1]) # Plot plt.plot(T[i_init:], LCEv) plt.xlabel("Time") plt.show()
def clu_gen(A, K, D, m, eta, gamma, model_var, x_fun): mseed = np.size(np.where(A.flat)) // 2 A = A > 0 if type(model_var) == tuple: mv1, mv2 = model_var else: mv1, mv2 = model_var, model_var if mv1 in ('powerlaw', 'power_law'): Fd = D**eta elif mv1 in ('exponential', ): Fd = np.exp(eta * D) if mv2 in ('powerlaw', 'power_law'): Fk = K**gamma elif mv2 in ('exponential', ): Fk = np.exp(gamma * K) c = clustering_coef_bu(A) k = np.sum(A, axis=1) Ff = Fd * Fk * np.logical_not(A) u, v = np.where(np.triu(np.ones((n, n)), 1)) #print(mseed, m) for i in range(mseed + 1, m): C = np.append(0, np.cumsum(Ff[u, v])) r = np.sum(np.random.random() * C[-1] >= C) uu = u[r] vv = v[r] A[uu, vv] = A[vv, uu] = 1 k[uu] += 1 k[vv] += 1 bu = A[uu, :].astype(bool) bv = A[vv, :].astype(bool) su = A[np.ix_(bu, bu)] sv = A[np.ix_(bu, bu)] bth = np.logical_and(bu, bv) c[bth] += 2 / (k[bth]**2 - k[bth]) c[uu] = np.size(np.where(su.flat)) / (k[uu] * (k[uu] - 1)) c[vv] = np.size(np.where(sv.flat)) / (k[vv] * (k[vv] - 1)) c[k <= 1] = 0 bth[uu] = 1 bth[vv] = 1 k_result = x_fun(c, bth) #print(np.shape(k_result)) #print(np.shape(K)) #print(K) #print(np.shape(K[bth,:])) K[bth, :] = k_result K[:, bth] = k_result.T if mv2 in ('powerlaw', 'power_law'): Ff[bth, :] = Fd[bth, :] * K[bth, :]**gamma Ff[:, bth] = Fd[:, bth] * K[:, bth]**gamma elif mv2 in ('exponential', ): Ff[bth, :] = Fd[bth, :] * np.exp(K[bth, :]) * gamma Ff[:, bth] = Fd[:, bth] * np.exp(K[:, bth]) * gamma Ff = Ff * np.logical_not(A) return A
def drawPNL(dtesPnl, pnl, dtes, strategy_name, showFigure='no', toDatabase='no', dateStart=-1, pnlType='pnl'): db = db_quanLiang() if dateStart == -1: s1 = np.nonzero(dtes >= dtesPnl[0])[0][0] else: s1 = np.nonzero(dtes >= dateStart)[0][0] #s2 = np.nonzero(dtes<=dtesPnl[-1])[0][-1] d = dtes[s1:] r = np.zeros(d.shape) l = list(d) for (i, x) in enumerate(dtesPnl): r[l.index(x)] = pnl[i] r = np.array(r) m = np.mean(r) m2 = np.mean(r[r != 0]) v = np.std(r) v2 = np.std(r[r != 0]) s = m / v * np.sqrt(250) ar = np.maximum.accumulate(np.cumsum(r)) - np.cumsum(r) md = np.max(ar) dailyReturnRate = np.round(m, 5) tradeReturnRate = np.round(m2, 5) dailyStd = np.round(v, 5) tradeStd = np.round(v2, 5) sharpe = np.round(s, 2) mdd = np.round(md, 5) sortino = np.round(m * 250 / mdd, 2) if showFigure == 'yes': plt.figure() plt.plot(dtes2Label(d), np.cumsum(r)) plt.grid() plt.gcf().autofmt_xdate() # 自动旋转日期标记 plt.title(strategy_name + ' ' + pnlType + ' 策略平均日回报率:' + str(dailyReturnRate * 100) + '%, 平均每笔交易回报率:' + str(tradeReturnRate * 100) + '%,平均波动:' + str(dailyStd * 1e2) + '%, 平均每笔交易波动率:' + str(tradeStd * 1e2) + '%, sharpe值:' + str(sharpe) + ' 最大回撤:' + str(mdd * 100)) if (toDatabase == 'yes'): statisticsInfo = { 'tag': pnlType, '平均日回报率': dailyReturnRate, '平均每笔交易回报率': tradeReturnRate, '平均日波动率': dailyStd, '平均每笔交易波动率': tradeStd, 'Sharpe值': sharpe, '最大回撤': mdd, '索提诺比率': sortino } if pnlType == 'pnl': db.strategyBackTest.update_one({'strategy_name': strategy_name}, { '$set': { 'labels': list([int(x) for x in d]), 'pnl': list(np.cumsum(r)) } }, upsert=True) db.strategyBackTest.update_one({'strategy_name': strategy_name}, {'$set': { 'statistics': [] }}, upsert=True) db.strategyBackTest.update_one( {'strategy_name': strategy_name}, {'$set': { 'performance': statisticsInfo }}, upsert=True) else: db.strategyBackTest.update_one( {'strategy_name': strategy_name}, {'$set': { pnlType: list(np.cumsum(r)) }}, upsert=True) db.strategyBackTest.update_one( {'strategy_name': strategy_name}, {'$push': { 'statistics': statisticsInfo }})
def running_mean(x, N): cumsum = np.cumsum(np.insert(x, 0, 0)) return (cumsum[N:] - cumsum[:-N]) / float(N)
def evaluate_matches(matches): overlaps = opt.overlaps min_region_sizes = [ opt.min_region_sizes[0] ] dist_threshes = [ opt.distance_threshes[0] ] dist_confs = [ opt.distance_confs[0] ] # results: class x overlap ap = np.zeros( (len(dist_threshes) , len(CLASS_LABELS) , len(overlaps)) , np.float ) for di, (min_region_size, distance_thresh, distance_conf) in enumerate(zip(min_region_sizes, dist_threshes, dist_confs)): for oi, overlap_th in enumerate(overlaps): pred_visited = {} for m in matches: for p in matches[m]['pred']: for label_name in CLASS_LABELS: for p in matches[m]['pred'][label_name]: if 'filename' in p: pred_visited[p['filename']] = False for li, label_name in enumerate(CLASS_LABELS): y_true = np.empty(0) y_score = np.empty(0) hard_false_negatives = 0 has_gt = False has_pred = False for m in matches: pred_instances = matches[m]['pred'][label_name] gt_instances = matches[m]['gt'][label_name] # filter groups in ground truth gt_instances = [ gt for gt in gt_instances if gt['instance_id']>=1000 and gt['vert_count']>=min_region_size and gt['med_dist']<=distance_thresh and gt['dist_conf']>=distance_conf ] if gt_instances: has_gt = True if pred_instances: has_pred = True cur_true = np.ones ( len(gt_instances) ) cur_score = np.ones ( len(gt_instances) ) * (-float("inf")) cur_match = np.zeros( len(gt_instances) , dtype=np.bool ) # collect matches for (gti,gt) in enumerate(gt_instances): found_match = False num_pred = len(gt['matched_pred']) for pred in gt['matched_pred']: # greedy assignments if pred_visited[pred['filename']]: continue overlap = float(pred['intersection']) / (gt['vert_count']+pred['vert_count']-pred['intersection']) if overlap > overlap_th: confidence = pred['confidence'] # if already have a prediction for this gt, # the prediction with the lower score is automatically a false positive if cur_match[gti]: max_score = max( cur_score[gti] , confidence ) min_score = min( cur_score[gti] , confidence ) cur_score[gti] = max_score # append false positive cur_true = np.append(cur_true,0) cur_score = np.append(cur_score,min_score) cur_match = np.append(cur_match,True) # otherwise set score else: found_match = True cur_match[gti] = True cur_score[gti] = confidence pred_visited[pred['filename']] = True if not found_match: hard_false_negatives += 1 # remove non-matched ground truth instances cur_true = cur_true [ cur_match==True ] cur_score = cur_score[ cur_match==True ] # collect non-matched predictions as false positive for pred in pred_instances: found_gt = False for gt in pred['matched_gt']: overlap = float(gt['intersection']) / (gt['vert_count']+pred['vert_count']-gt['intersection']) if overlap > overlap_th: found_gt = True break if not found_gt: num_ignore = pred['void_intersection'] for gt in pred['matched_gt']: # group? if gt['instance_id'] < 1000: num_ignore += gt['intersection'] # small ground truth instances if gt['vert_count'] < min_region_size or gt['med_dist']>distance_thresh or gt['dist_conf']<distance_conf: num_ignore += gt['intersection'] proportion_ignore = float(num_ignore)/pred['vert_count'] # if not ignored append false positive if proportion_ignore <= overlap_th: cur_true = np.append(cur_true,0) confidence = pred["confidence"] cur_score = np.append(cur_score,confidence) # append to overall results y_true = np.append(y_true,cur_true) y_score = np.append(y_score,cur_score) # compute average precision if has_gt and has_pred: # compute precision recall curve first # sorting and cumsum score_arg_sort = np.argsort(y_score) y_score_sorted = y_score[score_arg_sort] y_true_sorted = y_true[score_arg_sort] y_true_sorted_cumsum = np.cumsum(y_true_sorted) # unique thresholds (thresholds,unique_indices) = np.unique( y_score_sorted , return_index=True ) num_prec_recall = len(unique_indices) + 1 # prepare precision recall num_examples = len(y_score_sorted) num_true_examples = y_true_sorted_cumsum[-1] precision = np.zeros(num_prec_recall) recall = np.zeros(num_prec_recall) # deal with the first point y_true_sorted_cumsum = np.append( y_true_sorted_cumsum , 0 ) # deal with remaining for idx_res,idx_scores in enumerate(unique_indices): cumsum = y_true_sorted_cumsum[idx_scores-1] tp = num_true_examples - cumsum fp = num_examples - idx_scores - tp fn = cumsum + hard_false_negatives p = float(tp)/(tp+fp) r = float(tp)/(tp+fn) precision[idx_res] = p recall [idx_res] = r # first point in curve is artificial precision[-1] = 1. recall [-1] = 0. # compute average of precision-recall curve recall_for_conv = np.copy(recall) recall_for_conv = np.append(recall_for_conv[0], recall_for_conv) recall_for_conv = np.append(recall_for_conv, 0.) stepWidths = np.convolve(recall_for_conv,[-0.5,0,0.5],'valid') # integrate is now simply a dot product ap_current = np.dot(precision, stepWidths) elif has_gt: ap_current = 0.0 else: ap_current = float('nan') ap[di,li,oi] = ap_current return ap
def drawPriceChange(r, strategy_name, timeLabels, title='priceChange', tp=[240, 604], showGraph='yes'): if len(r) == 0: return db = db_quanLiang() dbt = db_tinySoftData() r[np.isfinite(r) == False] = 0 priceChange = np.mean(r, axis=0) priceChangeStd = np.std(r, axis=0) priceChangeStd[np.isfinite(priceChangeStd) == False] = 0 if (showGraph == 'yes'): plt.figure() plt.rcParams['font.sans-serif'] = [u'SimHei'] plt.rcParams['axes.unicode_minus'] = False default_dpi = plt.rcParamsDefault['figure.dpi'] plt.rcParams['figure.dpi'] = default_dpi * 2 plt.title('平均价格随时间变化图') plt.title(title) legends = [] if np.max(tp) < 600: linewidth = 1 else: linewidth = 0.25 for k in tp: plt.plot([k, k], [-0.023, 0.013], linewidth=linewidth) legends.append(timeLabels[k]) plt.plot(np.cumsum(priceChange), 'b-', marker="o", linewidth=0.25, markersize=0.25) plt.plot(priceChangeStd, 'k', linewidth=0.25, markersize=0.25) legends.append('分钟价格回报率变化累积') legends.append('分钟价格回报率标准差') plt.legend(legends, bbox_to_anchor=(1.0, 1.0), loc=2, borderaxespad=0.) plt.grid() db.strategyMinuteBar.update_one({'strategy_name': strategy_name}, { '$set': { 'labels': timeLabels, title: list(np.cumsum(priceChange)), title + 'Std': list(priceChangeStd), 'concernPoints': tp, 'priceType': '开盘价', '买入时间': timeLabels[tp[0]], '卖出时间': timeLabels[tp[1]], } }, upsert=True) if (title == 'priceChange'): db.strategyBackTest.update_one( {'strategy_name': strategy_name}, {'$set': { '买入时间': timeLabels[tp[0]], '卖出时间': timeLabels[tp[1]], }}, upsert=True) updateStrategyGeneratingStatus( strategy_name, '生成进度:55%。价格聚合分析完成。 ' + str(datetime.datetime.now()), 55)
plt.savefig(plotName) plt.close() if opts.analysisType == "cbclist": bounds = [15, 35] xlims = [15.0, 35.0] ylims = [1, 100000] plotName = "%s/rates.pdf" % (plotDir) plt.figure(figsize=(10, 8)) for ii, model in enumerate(models): legend_name = get_legend(model) bins, hist1 = lightcurve_utils.hist_results( model_tables[model]["peak_appmag_i"], Nbins=25, bounds=bounds) hist1_cumsum = float(cbccnt) * hist1 / np.sum(hist1) hist1_cumsum = np.cumsum(hist1_cumsum) plt.semilogy(bins, hist1_cumsum, '-', color=colors_names[ii], linewidth=3, label=legend_name) plt.xlabel(r"Apparent Magnitude [mag]", fontsize=24) plt.ylabel("Rate of apparent magnitude [per year]", fontsize=24) plt.legend(loc="best", prop={'size': 24}) plt.xticks(fontsize=24) plt.yticks(fontsize=24) plt.xlim(xlims) #plt.ylim(ylims) plt.savefig(plotName) plt.close()
def _compute_cs(self, folded_data, N): sigma = np.std(folded_data) m = np.mean(folded_data) s = np.cumsum(folded_data - m) * 1.0 / (N * sigma) R = np.max(s) - np.min(s) return R
scaler = preprocessing.StandardScaler().fit(trainx_filled) train_x = scaler.transform(trainx_filled) test_x = scaler.transform(testx_filled) # In[26]: train_x # In[27]: test_x # In[28]: pca = PCA().fit(train_x) itemindex = np.where(np.cumsum(pca.explained_variance_ratio_) > 0.9999) print('np.cumsum(pca.explained_variance_ratio_)', np.cumsum(pca.explained_variance_ratio_)) #Plotting the Cumulative Summation of the Explained Variance plt.figure(np.cumsum(pca.explained_variance_ratio_)[0]) plt.plot(np.cumsum(pca.explained_variance_ratio_)) plt.xlabel('Number of Components') plt.ylabel('Variance (%)') #for each component plt.title('Principal Components Explained Variance') plt.show() pca_std = PCA(n_components=itemindex[0][0]).fit(train_x) train_x = pca_std.transform(train_x) test_x = pca_std.transform(test_x) print(train_x) print(test_x)
def load_tf_weights_in_openai_gpt(model, config, openai_checkpoint_folder_path): """Load tf pre-trained weights in a pytorch model (from NumPy arrays here)""" import re import numpy as np if ".ckpt" in openai_checkpoint_folder_path: openai_checkpoint_folder_path = os.path.dirname(openai_checkpoint_folder_path) logger.info("Loading weights from {}".format(openai_checkpoint_folder_path)) with open(openai_checkpoint_folder_path + "/parameters_names.json", "r", encoding="utf-8") as names_handle: names = json.load(names_handle) with open(openai_checkpoint_folder_path + "/params_shapes.json", "r", encoding="utf-8") as shapes_handle: shapes = json.load(shapes_handle) offsets = np.cumsum([np.prod(shape) for shape in shapes]) init_params = [np.load(openai_checkpoint_folder_path + "/params_{}.npy".format(n)) for n in range(10)] init_params = np.split(np.concatenate(init_params, 0), offsets)[:-1] init_params = [param.reshape(shape) for param, shape in zip(init_params, shapes)] # This was used when we had a single embedding matrix for positions and tokens # init_params[0] = np.concatenate([init_params[1], init_params[0]], 0) # del init_params[1] init_params = [arr.squeeze() for arr in init_params] try: assert model.tokens_embed.weight.shape == init_params[1].shape assert model.positions_embed.weight.shape == init_params[0].shape except AssertionError as e: e.args += (model.tokens_embed.weight.shape, init_params[1].shape) e.args += (model.positions_embed.weight.shape, init_params[0].shape) raise model.tokens_embed.weight.data = torch.from_numpy(init_params[1]) model.positions_embed.weight.data = torch.from_numpy(init_params[0]) names.pop(0) # Pop position and token embedding arrays init_params.pop(0) init_params.pop(0) for name, array in zip(names, init_params): # names[1:n_transfer], init_params[1:n_transfer]): name = name[6:] # skip "model/" assert name[-2:] == ":0" name = name[:-2] name = name.split("/") pointer = model for m_name in name: if re.fullmatch(r"[A-Za-z]+\d+", m_name): scope_names = re.split(r"(\d+)", m_name) else: scope_names = [m_name] if scope_names[0] == "g": pointer = getattr(pointer, "weight") elif scope_names[0] == "b": pointer = getattr(pointer, "bias") elif scope_names[0] == "w": pointer = getattr(pointer, "weight") else: pointer = getattr(pointer, scope_names[0]) if len(scope_names) >= 2: num = int(scope_names[1]) pointer = pointer[num] try: assert ( pointer.shape == array.shape ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched" except AssertionError as e: e.args += (pointer.shape, array.shape) raise try: assert ( pointer.shape == array.shape ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched" except AssertionError as e: e.args += (pointer.shape, array.shape) raise logger.info("Initialize PyTorch weight {}".format(name)) pointer.data = torch.from_numpy(array) return model
def calc_detection_voc_prec_rec( pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults=None, iou_thresh=0.5): """Calculate precision and recall based on evaluation code of PASCAL VOC. This function calculates precision and recall of predicted bounding boxes obtained from a dataset which has :math:`N` images. The code is based on the evaluation code used in PASCAL VOC Challenge. Args: pred_bboxes (iterable of numpy.ndarray): An iterable of :math:`N` sets of bounding boxes. Its index corresponds to an index for the base dataset. Each element of :obj:`pred_bboxes` is a set of coordinates of bounding boxes. This is an array whose shape is :math:`(R, 4)`, where :math:`R` corresponds to the number of bounding boxes, which may vary among boxes. The second axis corresponds to :math:`y_{min}, x_{min}, y_{max}, x_{max}` of a bounding box. pred_labels (iterable of numpy.ndarray): An iterable of labels. Similar to :obj:`pred_bboxes`, its index corresponds to an index for the base dataset. Its length is :math:`N`. pred_scores (iterable of numpy.ndarray): An iterable of confidence scores for predicted bounding boxes. Similar to :obj:`pred_bboxes`, its index corresponds to an index for the base dataset. Its length is :math:`N`. gt_bboxes (iterable of numpy.ndarray): An iterable of ground truth bounding boxes whose length is :math:`N`. An element of :obj:`gt_bboxes` is a bounding box whose shape is :math:`(R, 4)`. Note that the number of bounding boxes in each image does not need to be same as the number of corresponding predicted boxes. gt_labels (iterable of numpy.ndarray): An iterable of ground truth labels which are organized similarly to :obj:`gt_bboxes`. gt_difficults (iterable of numpy.ndarray): An iterable of boolean arrays which is organized similarly to :obj:`gt_bboxes`. This tells whether the corresponding ground truth bounding box is difficult or not. By default, this is :obj:`None`. In that case, this function considers all bounding boxes to be not difficult. iou_thresh (float): A prediction is correct if its Intersection over Union with the ground truth is above this value.. Returns: tuple of two lists: This function returns two lists: :obj:`prec` and :obj:`rec`. * :obj:`prec`: A list of arrays. :obj:`prec[l]` is precision \ for class :math:`l`. If class :math:`l` does not exist in \ either :obj:`pred_labels` or :obj:`gt_labels`, :obj:`prec[l]` is \ set to :obj:`None`. * :obj:`rec`: A list of arrays. :obj:`rec[l]` is recall \ for class :math:`l`. If class :math:`l` that is not marked as \ difficult does not exist in \ :obj:`gt_labels`, :obj:`rec[l]` is \ set to :obj:`None`. """ pred_bboxes = iter(pred_bboxes) pred_labels = iter(pred_labels) pred_scores = iter(pred_scores) gt_bboxes = iter(gt_bboxes) gt_labels = iter(gt_labels) if gt_difficults is None: gt_difficults = itertools.repeat(None) else: gt_difficults = iter(gt_difficults) n_pos = defaultdict(int) score = defaultdict(list) match = defaultdict(list) for pred_bbox, pred_label, pred_score, gt_bbox, gt_label, gt_difficult in \ six.moves.zip( pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults): if gt_difficult is None: gt_difficult = np.zeros(gt_bbox.shape[0], dtype=bool) for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)): pred_mask_l = pred_label == l pred_bbox_l = pred_bbox[pred_mask_l] pred_score_l = pred_score[pred_mask_l] # sort by score order = pred_score_l.argsort()[::-1] pred_bbox_l = pred_bbox_l[order] pred_score_l = pred_score_l[order] gt_mask_l = gt_label == l gt_bbox_l = gt_bbox[gt_mask_l] gt_difficult_l = gt_difficult[gt_mask_l] n_pos[l] += np.logical_not(gt_difficult_l).sum() score[l].extend(pred_score_l) if len(pred_bbox_l) == 0: continue if len(gt_bbox_l) == 0: match[l].extend((0,) * pred_bbox_l.shape[0]) continue # VOC evaluation follows integer typed bounding boxes. pred_bbox_l = pred_bbox_l.copy() pred_bbox_l[:, 2:] += 1 gt_bbox_l = gt_bbox_l.copy() gt_bbox_l[:, 2:] += 1 iou = bbox_iou(pred_bbox_l, gt_bbox_l) gt_index = iou.argmax(axis=1) # set -1 if there is no matching ground truth gt_index[iou.max(axis=1) < iou_thresh] = -1 del iou selec = np.zeros(gt_bbox_l.shape[0], dtype=bool) for gt_idx in gt_index: if gt_idx >= 0: if gt_difficult_l[gt_idx]: match[l].append(-1) else: if not selec[gt_idx]: match[l].append(1) else: match[l].append(0) selec[gt_idx] = True else: match[l].append(0) for iter_ in ( pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults): if next(iter_, None) is not None: raise ValueError('Length of input iterables need to be same.') n_fg_class = max(n_pos.keys()) + 1 prec = [None] * n_fg_class rec = [None] * n_fg_class for l in n_pos.keys(): score_l = np.array(score[l]) match_l = np.array(match[l], dtype=np.int8) order = score_l.argsort()[::-1] match_l = match_l[order] tp = np.cumsum(match_l == 1) fp = np.cumsum(match_l == 0) # If an element of fp + tp is 0, # the corresponding element of prec[l] is nan. prec[l] = tp / (fp + tp) # If n_pos[l] is 0, rec[l] is None. if n_pos[l] > 0: rec[l] = tp / n_pos[l] return prec, rec
def angOffset_plateScale(dateString,plotTitle,plot=True): # read in pickled star positions in (x,y) picklefile = "identified_stars_"+dateString+".p" fo1=open(picklefile,'rb') dat=pickle.load(fo1) fo1.close() # read in text file of star positions in (RA,DEC) df = pd.read_csv('star_ra_dec_list.csv') # initialize quantities baselineNumber = 0 # for counting number of baselines angleRadecMinusXYArray = [] # for collecting angle differences plateScaleArray = [] # for collecting plate scale measurements # loop over dither positions for ditherPos in range(0,len(dat.keys())): keyName = "dither_pos_"+"%02i"%ditherPos # key for this dither parent = dat[keyName][1] # N star names # list of all combinations of names (no degeneracies) allCombs = list(itertools.combinations(parent, 2)) # loop over all baselines (i.e., take N, pick 2) for baseline in range(0,len(allCombs)): ## find position angle and distance in (x,y) space # retrieve x, y star1name = allCombs[baseline][0] star2name = allCombs[baseline][1] names = np.array(dat[keyName][1][:]) star1elem = int(np.where(names==star1name)[0]) # element number in names array star2elem = int(np.where(names==star2name)[0]) star1elem_2 = dat[keyName][0][star1elem] star2elem_2 = dat[keyName][0][star2elem] star1coords_xy = [dat[keyName][2]['[x]'][star1elem_2],dat[keyName][2]['[y]'][star1elem_2]] star2coords_xy = [dat[keyName][2]['[x]'][star2elem_2],dat[keyName][2]['[y]'][star2elem_2]] # sort according to y-position coords_low, coords_high, pos_angle_xy, name_low, name_high = order_Y( star1coords_xy, star2coords_xy, star1name, star2name) # find distance between the two stars del_y = np.subtract(coords_high[1],coords_low[1]) del_x = np.subtract(coords_high[0],coords_low[0]) dist_xy = np.sqrt(np.power(del_x,2)+np.power(del_y,2)) ## find position angle in (RA,DEC) space # (note that stars are in same order, which is important if they have nearly equal DEC or y) # retrieve RA, DEC radecStarElem_low = np.where(df[' shorthand']==' '+name_low)[0] # find element number radecStarElem_high = np.where(df[' shorthand']==' '+name_high)[0] raString_low = df[' RA'][radecStarElem_low].values decString_low = df[' DEC'][radecStarElem_low].values raString_high = df[' RA'][radecStarElem_high].values decString_high = df[' DEC'][radecStarElem_high].values # find angle, separation c_low = SkyCoord(raString_low+decString_low, unit=(u.hourangle, u.deg)) c_high = SkyCoord(raString_high+decString_high, unit=(u.hourangle, u.deg)) pos_angle_radec = c_low.position_angle(c_high).degree[0] # position angle, E of N sep_radec = c_low.separation(c_high).arcsec[0] # separation in asec baselineNumber += 1 # chalk up this baseline to the total # how much further E of W is the position angle from (RA,DEC) than (x,y)? angleDiff_1 = np.subtract(pos_angle_radec,pos_angle_xy) if (angleDiff_1 > 0): # if x,y angle opens further E of N than the RA,DEC angle angleDiff = np.mod( angleDiff_1, 360.) # mod is in case one angle is <0 and the other >180 else: # if difference between x,y angle and RA,DEC angle is negative angleDiff = np.copy(angleDiff_1) angleRadecMinusXYArray = np.append(angleRadecMinusXYArray,angleDiff) # append del_angle to array plateScaleArray = np.append(plateScaleArray,1000.*np.divide(sep_radec,dist_xy)) # append plate scale (mas/pix) ## make CDFs... # ...of angular offsets angleDiffArraySorted = sorted(angleRadecMinusXYArray) angleDiff_csf = np.cumsum(angleDiffArraySorted).astype("float32") angleDiff_csf_norm = np.divide(angleDiff_csf,np.max(angleDiff_csf)) # ...of plate scales plateScaleArraySorted = sorted(plateScaleArray) plateScale_csf = np.cumsum(plateScaleArraySorted).astype("float32") plateScale_csf_norm = np.divide(plateScale_csf,np.max(plateScale_csf)) ## find median, +- sigma values... # ...of angular offsets angleDiff_negSigmaPercentile = np.percentile(angleRadecMinusXYArray,15.9) angleDiff_50Percentile = np.percentile(angleRadecMinusXYArray,50) angleDiff_posSigmaPercentile = np.percentile(angleRadecMinusXYArray,84.1) # ...of plate scales plateScale_negSigmaPercentile = np.percentile(plateScaleArray,15.9) plateScale_50Percentile = np.percentile(plateScaleArray,50) plateScale_posSigmaPercentile = np.percentile(plateScaleArray,84.1) # prepare strings string1 = '{0:.3f}'.format(angleDiff_50Percentile) string2 = '{0:.3f}'.format(np.subtract(angleDiff_posSigmaPercentile,angleDiff_50Percentile)) string3 = '{0:.3f}'.format(np.subtract(angleDiff_50Percentile,angleDiff_negSigmaPercentile)) string4 = '{0:.3f}'.format(plateScale_50Percentile) string5 = '{0:.3f}'.format(np.subtract(plateScale_posSigmaPercentile,plateScale_50Percentile)) string6 = '{0:.3f}'.format(np.subtract(plateScale_50Percentile,plateScale_negSigmaPercentile)) # print info print('------------------------------') print('Number of stellar pair baselines:') print(baselineNumber) print('------------------------------') print('Need to rotate array E of N:\n'+string1+'/+'+string2+'/-'+string3+' deg') print('------------------------------') print('Plate scale:') print(string4+'/+'+string5+'/-'+string6+' mas/pix') if (plot): # plot rotation angle fig = plt.figure() ax = fig.add_subplot(111) ax.axvline(x=angleDiff_negSigmaPercentile,linestyle='--',color='k') ax.axvline(x=angleDiff_50Percentile,linestyle='-',color='k') ax.axvline(x=angleDiff_posSigmaPercentile,linestyle='--',color='k') ax.scatter(angleDiffArraySorted, angleDiff_csf_norm) ax.text(0.8, 0.1,s='Need to rotate array E of N:\n'+string1+'/+'+string2+'/-'+string3+' deg\n\nStellar baselines:\n'+str(baselineNumber)) plt.title('CDF of difference (E of N) between (RA, DEC) and (x, y) position angles on LMIRcam, '+plotTitle) plt.xlabel('Degrees E of N') plt.ylabel('Normalized CDF') plt.show() # plot plate scale fig = plt.figure() ax = fig.add_subplot(111) ax.axvline(x=plateScale_negSigmaPercentile,linestyle='--',color='k') ax.axvline(x=plateScale_50Percentile,linestyle='-',color='k') ax.axvline(x=plateScale_posSigmaPercentile,linestyle='--',color='k') ax.scatter(plateScaleArraySorted, plateScale_csf_norm) ax.text(10.4, 0.8,s='Plate scale:\n'+string4+'/+'+string5+'/-'+string6+' mas/pix\n\nStellar baselines:\n'+str(baselineNumber)) plt.title('Plate scale of LMIRcam, '+plotTitle) plt.xlabel('PS (mas/pix)') plt.ylabel('Normalized CDF') plt.show()