def csr_to_fm(self, X_csr, return_oh=True, indices=None): assert (X_csr.shape == (self.n_samples, self.n_features)) if indices is None: y = check_array(X_csr.data, ensure_2d=False, copy=True) else: if isinstance(indices, tuple): indices_samples, indices_features = indices elif isinstance(indices, sp.csc_matrix): indices_samples, indices_features = self.fm_to_indices(indices) y = X_csr[indices_samples, indices_features].A[0].copy() if not return_oh: return y else: X = check_array(X_csr, accept_sparse='coo', force_all_finite=False) n_rows, n_cols = X_csr.shape assert ((n_rows, n_cols) == (self.n_samples, self.n_features)) if indices is None: encoder = OneHotEncoder(n_values=[self.n_samples, self.n_features]) X_ix = np.column_stack([X.row, X.col]) else: assert (np.sorted(indices_samples) == np.sorted(X.row)) assert (np.sorted(indices_features) == np.sorted(X.col)) X_ix = np.column_stack([indices_samples, indices_features]) X_oh = encoder.fit_transform(X_ix) return X_oh, y
def line_intersect_grid(pt, xGr, yGr, zGr, returnvertices=False): """ Intersect a list of polyline vertices with a rectilinear MODFLOW grid. Vertices at the intersection of the polyline with the grid cell edges is returned. Optionally the original polyline vertices are returned. Points outside the grid are not returned unless returnvertices==True. Parameters ---------- ptsin : list A list of x, y points defining the vertices of a polyline that will be intersected with the rectilinear MODFLOW grid xedge : numpy.ndarray x-coordinate of the edge of each MODFLOW column. xedge is dimensioned to NCOL + 1. If xedge is not a numpy.ndarray it is converted to a numpy.ndarray. yedge : numpy.ndarray y-coordinate of the edge of each MODFLOW row. yedge is dimensioned to NROW + 1. If yedge is not a numpy.ndarray it is converted to a numpy.ndarray. returnvertices: bool Return the original polyline vertices in the list of numpy.ndarray containing vertices resulting from intersection of the provided polygon and the MODFLOW model grid if returnvertices=True. (default is False). Returns ------- (x, y, dlen) : numpy.ndarray of tuples numpy.ndarray of tuples containing the x, y, and segment length of the intersection of the provided polyline with the rectilinear MODFLOW grid. Examples -------- >>> import flopy >>> ptsout = flopy.plotutil.line_intersect_grid(ptsin, xedge, yedge) """ xGr = np.array(xGr) yGr = np.array(yGr) zGr = np.array(zGr) pt = np.matrix(pt) dp = np.matrix(np.diff(axi=0)) lam = np.hstack(((xGr[np.newaxis, :] - pt[:-1, 0]) / dp[:, 0], (yGr[np.newaxis, :] - pt[:-1, 1]) / dp[:, 1], (zGr[np.newaxis, :] - pt[:-1, 2]) / dp[:, 2])) p_out = [] for i, la in enumerate(lam): la = np.sorted(la[np.logical_and(la > 0.0, la < 1.0)]) if returnvertices: la = np.hstack((0, la)) p_out += list(pt[i] + la * dp[i]) if returnvertices: p_out.append(pt[-1]) return np.array(p_out)
def recommend(dataMat, user, N, simMeas=cos_sim, percentage=0.9): """生成评分最高的N个结果""" unratedItems = nonzero(dataMat[user, :].A == 0)[1] # 建立一个用户未评分item的列表 print("==========non-predicted items=========") print(unratedItems) print("用户", user, "有", len(unratedItems), "部电影未评分") print("==========non-predicted items=========\n") if len(unratedItems) == 0: return 'you rated everything' # 如果都已经评过分,则退出 # 先对整个矩阵进行奇异值分解 xformedItems = svdExt(dataMat, percentage) itemScores = [] # 对于每个未评分的item,都计算其预测评分 for item in unratedItems: # print("now predicting item_id:", item) estimatedScore = svdEst(xformedItems, dataMat, user, cos_sim, item) # print("the estimated score of item_id=", item, "------>", estimatedScore, "\n") itemScores.append((item, estimatedScore)) # 此时只对预测的列表进行排序,推荐前N个,没有考虑用户之前评分吗过的项目 itemScores = np.sorted(itemScores, key=lambda x: x[1], reverse=True) # 按照item的得分进行从大到小排序 print(itemScores) return itemScores[:N] # 返回前N大评分值的item名,及其预测评分值
def local_empirical_measure(tau, X, T, f, b = lambda tau, t : 2*np.sqrt(f(tau, t))): b0 = b(tau, T) bb = np.sorted(b0) bsum = 0. for j in range(T.shape[0]): bsum += bb[j] if bb[j] >= (0.5 + bsum)/(j+1) bsum -= bb[j] mu = (0.5 + bsum)/j return 2*np.maximum(mu - b0, 0.)
def plot_elements(tris, nodes): edges = set() for t in range(tris.shape[0]): t_verts = np.sorted(tris[t]) edges.add((t_verts[0], t_verts[1])) edges.add((t_verts[0], t_verts[2])) edges.add((t_verts[1], t_verts[2])) edges = np.array(list(edges)) all_lines = nodes[edges] coll = LineCollection(all_lines) ax = plt.gca() ax.add_collection(coll)
def distances_between_nodes(heat_print,mode,node1,node2,type_comp="auc",mode_diff="agg",normalize="True",plot=False,savefig=False,filefig="plots/nodes_dist.png"): ### Computes the distance between two nodes for the same graph/ based on their heat profiles if type_comp=="auc": d=compute_auc(heat_print[mode].iloc[:,node1],heat_print[mode].iloc[:,node2],normalize=normalize, mode_diff=mode_diff,plot=plot, savefig=savefig,filefig=filefig) elif type_comp=="emd": ### Required params: ### P,Q - Two histograms of size H ### D - The HxH matrix of the ground distance between bins of P and Q H=30 hist1,bins_arr=np.histogram(heat_print[mode].iloc[:,node1],H) #### Normalize histogram w=[bins_arr[i+1]-bins_arr[i] for i in range(len(bins_arr)-1)] hist1=hist1*1.0/np.matrix(w).dot(hist1) hist2,_=np.histogram(heat_print[mode].iloc[:,node2],bins_arr) hist2=hist2*1.0/np.matrix(w).dot(hist2) hist1=np.reshape(np.matrix(hist1), [1, H]) hist2=np.reshape(np.matrix(hist2), [1, H]) D=np.zeros((H,H)) for i in range(H): for j in range(H): D[i,j]=np.abs(bins_arr[i+1]-bins_arr[j+1]) d=emd(np.array(hist1.tolist()[0]),np.array(hist2.tolist()[0]),D) elif type_comp=="corr": d=1-1.0/(np.linalg.norm((heat_print[mode]).iloc[:,node1])*np.linalg.norm((heat_print[mode]).iloc[:,node2]))*((heat_print[mode]).iloc[:,node1]).dot((heat_print[mode]).iloc[:,node2]) elif type_comp=="corr_sorted": d=1-1.0/(np.linalg.norm((heat_print[mode]).iloc[:,node1])*np.linalg.norm( heat_print[mode].iloc[:,node2]))*(np.sorted(heat_print[mode].iloc[:,node1])).dot(np.sorted(heat_print[mode].iloc[:,node2])) elif type_comp=="ks": test1=heat_print[mode].iloc[:,node1] test2=heat_print[mode].iloc[:,node2] d=np.max(np.abs(np.sort(test1)-np.sort(test2))) elif type_comp=="ks_p": test1=heat_print[mode].iloc[:,node1] test2=heat_print[mode].iloc[:,node2] stats=sc.stats.ks_2samp(test1, test2) d=1-stats[1] #print stats[1] elif type_comp=="ks_r": sorted1=np.sort(heat_print[mode].iloc[:,node1]) sorted2=np.sort(heat_print[mode].iloc[:,node2]) sorted3=np.sort(sorted1.tolist()+sorted2.tolist()) ks=[None]*len(sorted3) #print "ne sorted 3:",len(sorted3) for i in range(len(sorted3)): ks[i]=(len([e for e in sorted1 if e<=sorted3[i]])-len([e for e in sorted2 if e<=sorted3[i]]))*1.0/len(sorted1) return np.max(np.abs(ks)) else: print "comparison type not recognized!!!" d=np.nan return d
import numpy as np import csv import operator import readcol files = readcol.readcol('/home/shared/data/h148/testarraymainbh.orbit') csv1 = csv.reader(files, delimiter=',') sort = np.sorted(csv1, key=operator.itemgetter(0)) for eachline in sort: print(eachline) print files.ndim print files[0:, 1]
def generate(self, N=None, K=None, hyperparams=None, mode='predictive', symmetric=True, **kwargs): if mode == 'generative': self.update_hyper(hyperparams) alpha, gmma, delta = self.get_hyper() N = int(N) _name = self.__module__.split('.')[-1] if _name == 'immsb_cgs': # @todo: compute the variance for random simulation # Number of table in the CRF if symmetric is True: m = alpha * N * (digamma(N + alpha) - digamma(alpha)) else: m = alpha * N * (digamma(2 * N + alpha) - digamma(alpha)) # Number of class in the CRF K = int(gmma * (digamma(m + gmma) - digamma(gmma))) alpha = gem(gmma, K) i = 0 while i < 3: try: dirichlet(alpha, size=N) i = 0 break except ZeroDivisionError: # Sometimes umprobable values ! alpha = gem(gmma, K) i += 1 # Generate Theta if i > 0: params, order = zip( *np.sorted(zip(alpha, range(len(alpha)), reverse=True))) _K = int(1 / 3. * len(alpha)) alpha[order[:_K]] = 1 alpha[order[_K:]] = 0 theta = multinomial(1, alpha, size=N) else: theta = dirichlet(alpha, size=N) # Generate Phi phi = beta(delta[0], delta[1], size=(K, K)) if symmetric is True: phi = np.triu(phi) + np.triu(phi, 1).T self._theta = theta self._phi = phi elif mode == 'predictive': try: theta, phi = self.get_params() except: return self.generate(N, K, hyperparams, 'generative', symmetric) K = theta.shape[1] pij = self.likelihood(theta, phi) # Treshold #pij[pij >= 0.5 ] = 1 #pij[pij < 0.5 ] = 0 #Y = pij # Sampling pij = np.clip(pij, 0, 1) Y = sp.stats.bernoulli.rvs(pij) #for j in xrange(N): # print 'j %d' % j # for i in xrange(N): # zj = categorical(theta[j]) # zi = categorical(theta[i]) # Y[j, i] = sp.stats.bernoulli.rvs(B[zj, zi]) return Y, theta, phi
def preprocess_dataset(Xtrain, Ytrain, Xtest=None, Ytest=None, dtype=None, zscore=True, denan=True, delays=[1, 2, 3, 4], order='C', trim_random=False, trim_regressors=None, trim_regressands=None): """preprocess a dataset Parameters ---------- - Xtrain: array - Ytrain: array - Xtest: array - Ytest: array - dtype: numpy dtype to use - zscore: bool of whether to zscore data - denan: bool of whether to denan arrays - order: str of 'C' or 'F' for C-ordering or Fortran ordering """ data = { 'Xtrain': Xtrain, 'Ytrain': Ytrain, 'Xtest': Xtest, 'Ytest': Ytest, } data = {key: value for key, value in data.items() if value is not None} if dtype is not None: data = {key: value.astype(dtype) for key, value in data.items()} if zscore: data = {key: scipy.stats.zscore(value) for key, value in data.items()} if denan: data = {key: np.nan_to_num(value) for key, value in data.items()} if delays: for key in list(data.keys()): if key.startswith('X'): data[key] = make_delayed(data[key], delays) if order == 'F': data = {key: np.asfortranarray(value) for key, value in data.items()} elif order == 'C': data = { key: np.ascontiguousarray(value) for key, value in data.items() } # trim dimensions if trim_random: f_keep = lambda before, after: np.sorted( np.random.choice( np.arange(n_regressors), new_n_regressors, replace=False, )) else: f_keep = lambda before, after: slice(None, after) if trim_regressors is not None: n_regressors = data['Xtrain'].shape[1] new_n_regressors = int(n_regressors * trim_regressors) keep = f_keep(n_regressors, new_n_regressors) data['Xtrain'] = data['Xtrain'][:, keep] data['Xtest'] = data['Xtest'][:, keep] if trim_regressands is not None: n_regressands = data['Ytrain'].shape[1] new_n_regressands = int(n_regressands * trim_regressands) keep = f_keep(n_regressands, new_n_regressands) data['Ytrain'] = data['Ytrain'][:, keep] data['Ytest'] = data['Ytest'][:, keep] return data