def build_sparse_crank_nicolson(s): """(internal) Set up the sparse matrices for the Crank-Nicolson method. """ A = sparse.lil_matrix((s.n, s.n)) C = sparse.lil_matrix((s.n, s.n)) for j in xrange(0, s.n): xd = j+1+s.xs ssxx = (s.sigma * xd) ** 2 A[j,j] = 1.0 - 0.5*s.dt*(ssxx + s.r) C[j,j] = 1.0 + 0.5*s.dt*(ssxx + s.r) if j > 0: A[j,j-1] = 0.25*s.dt*(+ssxx - s.r*xd) C[j,j-1] = 0.25*s.dt*(-ssxx + s.r*xd) if j < s.n-1: A[j,j+1] = 0.25*s.dt*(+ssxx + s.r*xd) C[j,j+1] = 0.25*s.dt*(-ssxx - s.r*xd) s.A = A.tocsr() s.C = linsolve.splu(C) # perform sparse LU decomposition # Buffer to store right-hand side of the linear system Cu = v s.v = empty((n, ))
def _submit_mapnode(self, jobid): if jobid in self.mapnodes: return True self.mapnodes.append(jobid) mapnodesubids = self.procs[jobid].get_subnodes() numnodes = len(mapnodesubids) logger.info('Adding %d jobs for mapnode %s' % (numnodes, self.procs[jobid]._id)) for i in range(numnodes): self.mapnodesubids[self.depidx.shape[0] + i] = jobid self.procs.extend(mapnodesubids) self.depidx = ssp.vstack((self.depidx, ssp.lil_matrix(np.zeros( (numnodes, self.depidx.shape[1])))), 'lil') self.depidx = ssp.hstack((self.depidx, ssp.lil_matrix( np.zeros((self.depidx.shape[0], numnodes)))), 'lil') self.depidx[-numnodes:, jobid] = 1 self.proc_done = np.concatenate((self.proc_done, np.zeros(numnodes, dtype=bool))) self.proc_pending = np.concatenate((self.proc_pending, np.zeros(numnodes, dtype=bool))) return False
def compute_cond(self, X, y): self.knn = NearestNeighbors(self.k).fit(X) c = sparse.lil_matrix((self.num_labels, self.k + 1), dtype="i8") cn = sparse.lil_matrix((self.num_labels, self.k + 1), dtype="i8") label_info = get_matrix_in_format(y, "dok") neighbors = self.knn.kneighbors(X, self.k, return_distance=False) for instance in xrange(self.num_instances): deltas = label_info[neighbors[instance], :].sum(axis=0) for label in xrange(self.num_labels): if label_info[instance, label] == 1: c[label, deltas[0, label]] += 1 else: cn[label, deltas[0, label]] += 1 c_sum = c.sum(axis=1) cn_sum = cn.sum(axis=1) cond_prob_true = sparse.lil_matrix((self.num_labels, self.k + 1), dtype="float") cond_prob_false = sparse.lil_matrix((self.num_labels, self.k + 1), dtype="float") for label in xrange(self.num_labels): for neighbor in xrange(self.k + 1): cond_prob_true[label, neighbor] = (self.s + c[label, neighbor]) / ( self.s * (self.k + 1) + c_sum[label, 0] ) cond_prob_false[label, neighbor] = (self.s + cn[label, neighbor]) / ( self.s * (self.k + 1) + cn_sum[label, 0] ) return cond_prob_true, cond_prob_false
def __init__(self, name): ManyBodyHam.__init__(self) self.mbhd = np.zeros((self.mbDim)) self.mbhc = sparse.lil_matrix((self.mbDim, self.mbDim), dtype=complex) self.mbhr = sparse.lil_matrix((self.mbDim, self.mbDim), dtype=float) self.name = name
def criarMatrizes(self, listaCompleta): # matriz1 = numpy.zeros((self.grupo.numeroDeMembros(), self.grupo.numeroDeMembros()), dtype=numpy.int32) # matriz2 = numpy.zeros((self.grupo.numeroDeMembros(), self.grupo.numeroDeMembros()), dtype=numpy.float32) matriz1 = sparse.lil_matrix((self.grupo.numeroDeMembros(), self.grupo.numeroDeMembros())) matriz2 = sparse.lil_matrix((self.grupo.numeroDeMembros(), self.grupo.numeroDeMembros())) keys = listaCompleta.keys() keys.sort(reverse=True) for k in keys: for pub in listaCompleta[k]: numeroDeCoAutores = len(pub.idMembro) if numeroDeCoAutores>1: # Para todos os co-autores da publicacao: # (1) atualizamos o contador de colaboracao (adjacencia) # (2) incrementamos a 'frequencia' de colaboracao combinacoes = self.calcularCombinacoes(pub.idMembro) for c in combinacoes: matriz1[c[0] , c[1]] += 1 matriz1[c[1] , c[0]] += 1 matriz2[c[0] , c[1]] += 1.0/(numeroDeCoAutores-1) matriz2[c[1] , c[0]] += 1.0/(numeroDeCoAutores-1) return [matriz1, matriz2]
def compute_Belief_Prop(H): ''' generate the matricies for P, S_ and q from H ''' global B, P, S_, q, m, n m,n = np.shape(H) q = np.count_nonzero(H) P = lil_matrix(q,q, np.transpose(np.sum(H,2)) * np.sum(H,2)) S = lil_matrix(q,q, (np.sum(H,1)-1) * np.transpose(np.sum(H,1))) k = 0 for j in range(1,n): I = nonzero(H[:,j]) for x in range(1,length(I)): for y in range(x+1,length(I)): P[k+x,k+y] = 1 P[k+y,k+x] = 1 k += length(I) k = 0 for i in range(1,m): J = nonzero(H[i,:]) for x in range(1,length(J)): for y in range(x+1,length(J)): S_[k+x,k+y] = 1 S_[k+y,k+x] = 1 k += length(J) B = lil_matrix(q,n,q) b = [] for k in range(1,m): b = [nonzero(H[k,:])] B = lil_matrix(np.transpose([1,q]),np.transpose(b),np.ones(q,1),q,n)
def QPModel(self, addW=False): A = self.A c = self.c s = CyClpSimplex() x = s.addVariable('x', self.nCols) if addW: w = s.addVariable('w', self.nCols) s += A * x >= 1 n = self.nCols if not addW: s += 0 <= x <= 1 else: s += x + w == 1 s += 0 <= w <= 1 ## s += -1 <= x <= 1 s.objective = c * x if addW: G = sparse.lil_matrix((2*n, 2*n)) for i in xrange(n/2, n): #xrange(n-1): G[i, i] = 1 G[2*n-1, 2*n-1] = 10**-10 else: G = sparse.lil_matrix((n, n)) for i in xrange(n/2, n): #xrange(n-1): G[i, i] = 1 s.Hessian = G return s
def convert_graph_connectivity_to_sparse(G, nodes): """ Given a networkx graph, return sparse adjacency matrix S and H S and H are different in that S's entires contain edge weights (if there are multiple edges, behavior is overwrite), and H just has a 1 for every non-zero entry. The edge data right now is ((strand1, start1, end1),(strand2, start2, end2), score) """ n = len(nodes) S = sparse.lil_matrix((n,n)) H = sparse.lil_matrix((n,n)) nodes_to_index = dict(zip(nodes,range(n))) for e in G.edges_iter(data=True): i = nodes_to_index[e[0]] j = nodes_to_index[e[1]] try: w = e[2][2] except: w = e[2] S[i,j] = w S[j,i] = w H[i,j] = 1 H[j,i] = 1 # we do a lot of column-slicing, so convert to CSC for efficiency S = S.tocsc() H = H.tocsr() return S,H
def get_corr_pred( self, sctx, u, tn, tn1 ): ndofs = self.domain.n_dofs #self.K.data[::] = 0.0 self.K = sparse.lil_matrix((ndofs, ndofs), float_ ) self.F_int[:] = 0.0 e_arr_size = self.e_arr_size for elem in sctx.sdomain.elements: e_id = elem.id_number ix = elem.get_dof_map() sctx.elem = elem sctx.elem_state_array = sctx.state_array[ e_id*e_arr_size : (e_id+1)*e_arr_size ] sctx.X = elem.get_X_mtx() f, k = self.fets_eval.get_corr_pred( sctx, u[ix_(ix)], tn, tn1 ) #self.K_temp.data[:][:] = 0. self.K_temp = sparse.lil_matrix((ndofs, ndofs), float_ ) a = 0 for i in ix: self.K_temp.rows[i] = ix self.K_temp.data[i][:] = k[a][:] a =+1 #print K_temp self.K = self.K + self.K_temp self.F_int[ ix_(ix) ] += f return self.F_int, self.K
def partition_train_data( counts, nonzero, percent=0.8, num_users=NUM_USER, num_items=NUM_SONG ): print "Start to partition data...\n" t0 = time.time() num_train = int(np.floor(nonzero * percent)) num_validate = int(nonzero - num_train) shuffle_index = range(nonzero) np.random.shuffle(shuffle_index) validate_index = shuffle_index[:num_validate] shuffle_index[:num_validate].sort() validate_counts = sparse.lil_matrix((num_users, num_items), dtype=np.int32) idx, curr = 0, 0 counts = sparse.lil_matrix(counts) counts_coo = counts.tocoo() for row, col, count in itertools.izip(counts_coo.row, counts_coo.col, counts_coo.data): if idx < num_validate and validate_index[idx] == curr: validate_counts[row, col] = count counts[row, col] = 0 idx += 1 curr += 1 t1 = time.time() print 'Finished partitioning data in %f seconds\n' % (t1 - t0) return counts.tocsr(), validate_counts.tocoo()
def transformation_matrices(self): """Returns the sparse transformation matrix to turn quantities defined on faces to loop-star basis For vector quantities, assumes that the face-based quantity has been packed to a 2D array of size (n_basis*3, n_basis*3) For scalar quantities, assumes that the face-based quantity has been packed to a 2D array of size (n_basis, n_basis) """ num_basis = len(self) num_tri = len(self.mesh.polygons) # scalar_transform = np.zeros((num_basis, num_tri), np.float64) # vector_transform=np.zeros((num_basis, 3*num_tri), np.float64) scalar_transform = lil_matrix((num_basis, num_tri)) vector_transform = lil_matrix((num_basis, 3*num_tri)) for basis_count, (tri_p, tri_m, node_p, node_m) in enumerate(self): scalar_transform[basis_count, tri_p] = 1.0 scalar_transform[basis_count, tri_m] = -1.0 vector_transform[basis_count, tri_p*3+node_p] = 1.0 vector_transform[basis_count, tri_m*3+node_m] = -1.0 return vector_transform.tocsr(), scalar_transform.tocsr()
def _reformat_mask(mask): """Convert mask to a list of sparse matrices (scipy.sparse.lil_matrix) Accepts a 2 or 3D array, a list of 2D arrays, or a sequence of sparse matrices. Parameters ---------- mask : a 2 or 3 dimensional numpy array, a list of 2D numpy arrays, or a sequence of sparse matrices. Masks are assumed to follow a (z, y, x) convention. If mask is a list of 2D arrays or of sparse matrices, each element is assumed to correspond to the mask for a single plane (and is assumed to follow a (y, x) convention) """ if isinstance(mask, np.ndarray): # user passed in a 2D or 3D np.array if mask.ndim == 2: mask = [lil_matrix(mask, dtype=mask.dtype)] elif mask.ndim == 3: new_mask = [] for s in range(mask.shape[0]): new_mask.append(lil_matrix(mask[s, :, :], dtype=mask.dtype)) mask = new_mask else: raise ValueError('numpy ndarray must be either 2 or 3 dimensions') elif issparse(mask): # user passed in a single lil_matrix mask = [lil_matrix(mask)] else: new_mask = [] for plane in mask: new_mask.append(lil_matrix(plane, dtype=plane.dtype)) mask = new_mask return mask
def __init__(self): self.review_data = pd.read_csv(review_fileName) print 'Finished loading data...' # Mapping all business_ids self.business_ids = list() for business in self.review_data['business_id']: self.business_ids.append(business) unique_business_ids = list(set(self.business_ids)) self.n_businesses = len(unique_business_ids) self.business_dict = dict() for index, b_id in enumerate(unique_business_ids): self.business_dict[index] = b_id self.business_dict[b_id] = index # Mapping all user_ids self.user_ids = list() for user in self.review_data['user_id']: self.user_ids.append(user) unique_user_ids = list(set(self.user_ids)) self.n_users = len(unique_user_ids) self.user_dict = dict() for index, u_id in enumerate(unique_user_ids): self.user_dict[index] = u_id self.user_dict[u_id] = index self.reviews = lil_matrix((self.n_businesses, n_words)) self.ratings = lil_matrix((self.n_users, self.n_businesses))
def build_CF_matrix(CF_categories, dammage_factors, H, EF_list_for_CF_global, EF_list, CF_matrices, EF_list_for_CF_per_category, impact_method): from scipy.sparse import lil_matrix, find from copy import deepcopy #building a transient matrix (the columns correspond to the system set up by the impact method, NOT the one of ecoinvent) transient_CF = lil_matrix((len(CF_categories[impact_method]), len(EF_list_for_CF_global))) for [matrix_line, matrix_column, CF] in H: transient_CF[matrix_line, matrix_column] = CF #building the matrix CF_matrices[impact_method] = lil_matrix((len(CF_categories[impact_method]), len(EF_list))) for category in CF_categories[impact_method]: matrix_line = CF_categories[impact_method].index(category) for EF in EF_list: column_number_EF = EF_list.index(EF) if EF_list_for_CF_per_category[category].count(EF): #if the exact EF is found in the list of EF with a CF in this specefic category column_number_CF = EF_list_for_CF_global.index(EF) #find the number in the global list else: EF_transient = deepcopy(EF) EF_transient[2] = '(unspecified)' #a EF without exact match will recieve the (unspecified) CF if compartment and EF ID match if EF_list_for_CF_per_category[category].count(EF_transient): column_number_CF = EF_list_for_CF_global.index(EF_transient) else: #otherwise, no match, the CF is left to zero column_number_CF = 'NA' if column_number_CF != 'NA': CF_matrices[impact_method][matrix_line, column_number_EF] = transient_CF[matrix_line, column_number_CF] del transient_CF del H, EF_list_for_CF_global, EF_list_for_CF_per_category, dammage_factors return CF_matrices
def getColumnSum(subTermDoc, avg=False): """ Recieves a sub term document matrix and optional flag for getting average instead of sum. """ sumVector = sparse.lil_matrix((2,subTermDoc.shape[1])) sumVector = sumVector.todense() if avg: counter = 0 for i in range(1, subTermDoc.shape[0]): row = subTermDoc.getrow(i) row = row.todense()[0,1:] sumVector[1,1:] += row if avg: counter+=1 if avg: sumVector[1,1:]/=counter return sparse.lil_matrix(sumVector)
def __init__( self, policy, representation, discount_factor, max_window, steps_between_LSPI, lspi_iterations=5, tol_epsilon=1e-3, re_iterations=100, use_sparse=False): self.steps_between_LSPI = steps_between_LSPI self.tol_epsilon = tol_epsilon self.lspi_iterations = lspi_iterations self.re_iterations = re_iterations self.use_sparse = use_sparse # Make A and r incrementally if the representation can not expand self.fixedRep = not representation.isDynamic if self.fixedRep: f_size = representation.features_num * representation.actions_num self.b = np.zeros((f_size, 1)) self.A = np.zeros((f_size, f_size)) # Cache calculated phi vectors if self.use_sparse: self.all_phi_s = sp.lil_matrix( (max_window, representation.features_num)) self.all_phi_ns = sp.lil_matrix( (max_window, representation.features_num)) self.all_phi_s_a = sp.lil_matrix((max_window, f_size)) self.all_phi_ns_na = sp.lil_matrix((max_window, f_size)) else: self.all_phi_s = np.zeros( (max_window, representation.features_num)) self.all_phi_ns = np.zeros( (max_window, representation.features_num)) self.all_phi_s_a = np.zeros((max_window, f_size)) self.all_phi_ns_na = np.zeros((max_window, f_size)) super(LSPI, self).__init__(policy, representation, discount_factor, max_window)
def k(self,x1,x2,chi2max=25.0): """ The default kernel function Parameters ---------- x1,x2 : numpy.ndarray Vectors of positions. chi2max : float, optional Set clipping for sparseness. Returns ------- k : numpy.ndarray Covariance matrix between x1 and x2. Note ---- This works well for small matrices but it is poorly implemented for larger matrices --- especially if they are actually sparse! """ d = (x1-x2)**2/self._l2 k = sp.lil_matrix(d.shape) k = self._a*np.exp(-0.5*d) k[d > chi2max] = 0.0 return sp.lil_matrix(k).tocsc()
def __init__(self, plotid, xmin, xmax, ymin, ymax, step, localRadius, overviewStep, xlabel, ylabel): # Initialize local map self.localRadius = localRadius / float(step) self.step = step self.xrange = np.linspace(xmin, xmax, (xmax-xmin)/float(step)+1) self.yrange = np.linspace(ymin, ymax, (ymax-ymin)/float(step)+1) self.Nx = self.xrange.shape[0] self.Ny = self.yrange.shape[0] self.localXmin = self.xrange[self.Nx/2-self.localRadius] self.localXmax = self.xrange[self.Nx/2+self.localRadius] self.localYmin = self.yrange[self.Ny/2-self.localRadius] self.localYmax = self.yrange[self.Ny/2+self.localRadius] self.sparseSum = lil_matrix((self.Ny, self.Nx), dtype=np.float32) self.sparseNorm = lil_matrix((self.Ny, self.Nx), dtype=np.float32) self.localMap = np.zeros((2*self.localRadius+1, 2*self.localRadius+1)) # Initialize overview map self.overviewXrange = np.linspace(xmin, xmax, (xmax-xmin)/float(overviewStep)) self.overviewYrange = np.linspace(ymin, ymax, (ymax-ymin)/float(overviewStep)) overviewNx = self.overviewXrange.shape[0] overviewNy = self.overviewYrange.shape[0] self.overviewMap = np.zeros((overviewNy, overviewNx)) # Initialize plots self.counter = 0 ipc.broadcast.init_data(plotid+' -> Overview', data_type='image', history_length=1, flipy=True, \ xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, xlabel=xlabel, ylabel=ylabel) ipc.broadcast.init_data(plotid+' -> Local', data_type='image', history_length=1, flipy=True, \ xmin=self.localXmin, xmax=self.localXmax, \ ymin=self.localYmin, ymax=self.localYmax, xlabel=xlabel, ylabel=ylabel)
def learnProjection(dataset): """ Learn the projection matrix and store it to a file. """ h = 50 # no. of latent dimensions. print "Loading the bipartite matrix...", coocData = sio.loadmat("../work/%s/DSxDI.mat" % (dataset)) M = sp.lil_matrix(coocData['DSxDI']) (nDS, nDI) = M.shape print "Done." print "Computing the Laplacian...", D1 = sp.lil_matrix((nDS, nDS), dtype=np.float64) D2 = sp.lil_matrix((nDI, nDI), dtype=np.float64) for i in range(0, nDS): D1[i,i] = 1.0 / np.sqrt(np.sum(M[i,:].data[0])) for i in range(0, nDI): D2[i,i] = 1.0 / np.sqrt(np.sum(M[:,i].T.data[0])) B = (D1.tocsr().dot(M.tocsr())).dot(D2.tocsr()) print "Done." # Perform SVD on B print "Perform SVD on the weight matrix...", startTime = time.time() # ut, s, vt = sparsesvd(B.tocsc(), h) B = sp.csc_matrix(B, dtype=float) ut, s, vt = sp.linalg.svds(B, h) # print ut.shape endTime = time.time() print "%ss" % str(round(endTime-startTime, 2)), # sio.savemat("../work/%s/proj_sfa.mat" % (dataset), {'proj':ut.T}) sio.savemat("../work/%s/proj_sfa.mat" % (dataset), {'proj':ut}) print "Done." pass
def get_rsas_rehosps_7x(rehosps_dict, rsas_file_path=rsa_clean_file_path_2013, rsa_format=formats.rsa_2013_format, cll=column_label_list): ''' This method parses the lines of the file rsas_file_path and takes only those whose line_number (starting from 1) are included in rehosps_dict, i. e. the RSAs with rehosp. It returns two arrays: X : the features according to colum_label_list Y : responsewith 1 = rehosp delay 1 or multiple of 7 (days), 0 otherwise ''' line_number = 1 i = 0 rows_count = len(rehosps_dict) cols_count = len(cll) sparse_X = sparse.lil_matrix((rows_count, cols_count)) sparse_y = sparse.lil_matrix((rows_count, 1)) with open(rsas_file_path) as rsa_file: while True: rsa_line = rsa_file.readline().strip() if (line_number in rehosps_dict): rsa_data_dict = get_rsa_data(rsa_line, rsa_format) rsa_to_X(rsa_data_dict, sparse_X, i) if rehosps_dict[line_number]: sparse_y[i] = 1 i += 1 line_number += 1 if line_number % 10000 == 0: print '\rLines processed ', line_number, ', % processed ', (i*100/rows_count), if (not rsa_line): break return sparse_X, sparse_y
def one_hot_encode(train_discrete_features, test_discrete_features): """ Perform one hot encoding to both train and test set. Use this when having memory limitation, otherwise to use scikit-learn's OneHotEncoder. parameters: -------------------------------------------------------- train_discrete_features: discrete features of training data test_discrete_features: discrete features of test data """ m, n = train_discrete_features.shape train_encoded_features = lil_matrix((LENGTH_OF_TRAIN, MAX_OF_DIM)) test_encoded_features = lil_matrix((LENGTH_OF_TEST, MAX_OF_DIM)) cnt = 0 for i in range(n): print "processing " + str(i) + "th feature..." train_column = train_discrete_features[:, i] test_column = test_discrete_features[:, i] # one hot encode the value in train and test encoder = OneHotEncoder(handle_unknown="ignore") train_encoded_column = lil_matrix(encoder.fit_transform(np.mat(train_column).T)) test_encoded_column = lil_matrix(encoder.transform(np.mat(test_column).T)) # get number of features _, num = train_encoded_column.shape # put the column into matrix for j in range(num): train_encoded_features[:,cnt+j] = train_encoded_column[:,j] test_encoded_features[:,cnt+j] = test_encoded_column[:,j] cnt += num return csr_matrix(train_encoded_features[:, 0:cnt]), csr_matrix(test_encoded_features[:, 0:cnt])
def advection_matrix(self): """ Construct the advection matrix operator """ M = sprs.lil_matrix((self.N, self.N)) dz = self.dz v = self.aacc # Upwinding """ Upwind formula from the wiki """ A = sprs.lil_matrix((self.N, self.N)) if v < 0: A.setdiag(np.ones(self.N) * -1.0) A.setdiag(np.ones(self.N), k=1) elif v == 0: A = A * 0 elif v > 0: A.setdiag(np.ones(self.N) * -1.0, k=-1) A.setdiag(np.ones(self.N)) A[0,:] = np.zeros(self.N) A[-1,:] = np.zeros(self.N) A = A / (2 * dz) # Set up the final row so that it can handle the base of the system. B = sprs.lil_matrix((self.N, self.N)) B.setdiag(np.ones(self.N) * 3.0, k= 0) B.setdiag(np.ones(self.N) * -4.0, k=-1) B.setdiag(np.ones(self.N) , k=-2) B = B / (2 * dz) A[-1,:] = B[-1, :] return A
def fromList(shape, coords, weights=None): if weights is not None: assert len(coords) == len(weights) if len(shape) == 1: NX = NY = shape[0] else: NX = shape[0] NY = shape[1] if weights is None: X = [a for (a, b) in coords] Y = [b for (a, b) in coords] G = sps.lil_matrix((NX * NY, 1), dtype=np.float64) lin_I = common.sub2ind((NX, NY), X, Y) G[lin_I, 0] = 1 G = G.reshape((NX, NY)) else: G = sps.lil_matrix((NX, NY), dtype=np.float64) for (i, w) in enumerate(weights): c = coords[i] x = c[0] y = c[1] G[x, y] = w return G
def stack_D(Xw,u_hat,W,N,T,R): Dstack = ssp.lil_matrix((N*R*T,W*R*T)) i = 0; NW = N * W # logger.debug("Calling _stack_D") for r in range(R): # I expect this to be (N * W) * U Xwr = Xw[r*NW:(r+1)*NW,:] for t in range(T): # I expect this to be (N * W) * 1 # Xwrt = Xwr.dot(u_hat[r,t,:]) Xwrt = ssp.csr_matrix(Xwr.dot(u_hat[r,t,:])) for n in range(N): DSn = (i * N + n) DSw = (i * W ) # Dstack.rows[DSn] = range(DSw,DSw + W) # Dstack.data[DSn] = Xwrt[n*W:n*W + W] sub = ssp.lil_matrix(Xwrt[:,n*W:n*W + W]) Dstack.rows[DSn] = [x + DSw for x in sub.rows[0]] Dstack.data[DSn] = sub.data[0] i+=1 # logger.debug("Done") return Dstack pass
def initialize(self): words = {} for en_line, fr_line in self.iterator(): en_words = en_line.split(" ") fr_words = fr_line.split(" ") if "NULL" in words: words["NULL"] = words["NULL"].union(set(fr_words)) else: words["NULL"] = set(fr_words) for en_word in en_words: if en_word in words: words[en_word] = words[en_word].union(set(fr_words)) else: words[en_word] = set(fr_words) self.tmatrix = lil_matrix((len(words.keys()), len(words["NULL"]))) self.cef = lil_matrix((len(words.keys()), len(words["NULL"]))) i = 0 for word in words["NULL"]: self.fr_dict[word] = i i += 1 i = 0 for en_word, value in words.iteritems(): self.en_dict[en_word] = i if len(value): for fr_word in value: self.tmatrix[i, self.fr_dict[fr_word]] = pow(len(value), -1) i += 1 del words
def __init__(self): # Files # Inputs self.train_file = "training.txt" self.test_file = "testing.txt" self.label_training_file = "label_training.txt" # Output self.nb_classifier_output = "nb_classifier_output" self.svm_classifier_output = "svm_classifier_output" # Estimator value self.estimatorSize = 500 # Sparse Matrix # Training data self.sparse_matrix = lil_matrix((1842, 26364), dtype=float) self.training_labels_list = [] # Test data self.test_data = lil_matrix((952, 26364), dtype=float) # Results self.predicted_labels = None
def init_est(self): for m in xrange(len(self.trn_data)): self.z.append([]) N = len(self.trn_data[m]) for n in xrange(N): self.z[m].append(0) self.nksum = [0 for i in xrange(self.topics)] print "init nwk" self.nwk = sparse.lil_matrix((self.word_dict_sz, self.topics)) print self.rank,self.nwk.shape self.phi = sparse.lil_matrix((self.topics, self.word_dict_sz)) self.nwkp = sparse.lil_matrix((self.word_dict_sz, self.topics)) for i in xrange(self.topics): self.p.append(0) M = len(self.z) self.ndsum = [0 for i in xrange(M)] print "init ndk" self.ndk = sparse.lil_matrix((M, self.topics)) self.theta = sparse.lil_matrix((M, self.topics)) for m in xrange(M): N = len(self.z[m]) for n in xrange(N): w = self.trn_data[m][n] self.z[m][n] = int(random.random()*self.topics) w_topic = self.z[m][n] self.nwkp[w, w_topic] += 1 self.ndk[m, w_topic] += 1 '''self.nksum[w_topic] += 1''' self.ndsum[m] += N print "init estimate complete!"
def rand_matrices(A, t, nonzero_ids): '''Create random matrices A, B for the meetFriend_matrix model. The matrices are created as follows. A_t has a diagonal of (t-1)/t and B_t is the zero matrix. We make a weighted random choice for each node depending on the values of its row on A matrix. Depending on the outcome of this choice, either the B[i, i] = 1/t or A[i, r] = 1/t, where r is the random choice. Args: A (NxN numpy array): Weights matrix (its diagonal is the stubborness) t (int): Round number Returns: Two NxN matrices, A_t and B_t ''' N = A.shape[0] A_t = sparse.lil_matrix((N, N)) A_t.setdiag(np.ones(N) * (t-1)/t) B_t = sparse.lil_matrix((N, N)) for i in xrange(N): r = rchoice(A[i, :], nonzero_ids[i]) if r == i: B_t[i, i] = 1/t else: A_t[i, r] = 1/t return A_t.tocsr(), B_t.tocsr()
def setUp(self): self.nbr_elements = 1000 self.size = 10000 self.A_c = LLSparseMatrix(size=self.size, size_hint=self.nbr_elements, itype=INT32_T, dtype=FLOAT64_T) self.A_s = lil_matrix((self.size, self.size), dtype=np.float64) self.list_of_matrices = [] self.list_of_matrices.append(self.A_c) self.list_of_matrices.append(self.A_s) construct_random_matrices(self.list_of_matrices, self.size, self.nbr_elements) self.CSR_c = self.A_c.to_csr() self.CSR_s = self.A_s.tocsr() self.B_c = LLSparseMatrix(size=self.size, size_hint=self.nbr_elements, itype=INT32_T, dtype=FLOAT64_T) self.B_s = lil_matrix((self.size, self.size), dtype=np.float64) self.list_of_matrices = [] self.list_of_matrices.append(self.B_c) self.list_of_matrices.append(self.B_s) construct_random_matrices(self.list_of_matrices, self.size, self.nbr_elements) self.CSC_c = self.B_c.to_csc() self.CSC_s = self.B_s.tocsc() self.v = np.arange(0, self.size, dtype=np.float64)
def _pade(A, m): n = np.shape(A)[0] c = _padecoeff(m) if m != 13: apows = [[] for jj in range(int(np.ceil((m + 1) / 2)))] apows[0] = sp.eye(n, n, format='csc') apows[1] = A * A for jj in range(2, int(np.ceil((m + 1) / 2))): apows[jj] = apows[jj - 1] * apows[1] U = sp.lil_matrix((n, n)).tocsc() V = sp.lil_matrix((n, n)).tocsc() for jj in range(m, 0, -2): U = U + c[jj] * apows[jj // 2] U = A * U for jj in range(m - 1, -1, -2): V = V + c[jj] * apows[(jj + 1) // 2] F = spla.spsolve((-U + V), (U + V)) return F.tocsr() elif m == 13: A2 = A * A A4 = A2 * A2 A6 = A2 * A4 U = A * (A6 * (c[13] * A6 + c[11] * A4 + c[9] * A2) + c[7] * A6 + c[5] * A4 + c[3] * A2 + c[1] * sp.eye(n, n).tocsc()) V = A6 * (c[12] * A6 + c[10] * A4 + c[8] * A2) + c[6] * A6 + c[4] * \ A4 + c[2] * A2 + c[0] * sp.eye(n, n).tocsc() F = spla.spsolve((-U + V), (U + V)) return F.tocsr()
def main(): # Parse command line arguments parser = argparse.ArgumentParser( description='Map word embeddings in two languages into a shared space') parser.add_argument('src_input', help='the input source embeddings') parser.add_argument('trg_input', help='the input target embeddings') parser.add_argument('sense_input', help='the input sense mapping matrix') parser.add_argument('src_output', help='the output source embeddings') parser.add_argument('trg_output', help='the output target embeddings') parser.add_argument('tsns_output', default='tsns.pkl', help='the output target senses pickle file') parser.add_argument( '--encoding', default='utf-8', help='the character encoding for input/output (defaults to utf-8)') parser.add_argument('--precision', choices=['fp16', 'fp32', 'fp64'], default='fp32', help='the floating-point precision (defaults to fp32)') parser.add_argument('--cuda', action='store_true', help='use cuda (requires cupy)') parser.add_argument('--seed', type=int, default=0, help='the random seed (defaults to 0)') recommended_group = parser.add_argument_group( 'recommended settings', 'Recommended settings for different scenarios') recommended_type = recommended_group.add_mutually_exclusive_group() recommended_type.add_argument( '--unsupervised', action='store_true', help= 'recommended if you have no seed dictionary and do not want to rely on identical words' ) recommended_type.add_argument('--future', action='store_true', help='experiment with stuff') recommended_type.add_argument('--toy', action='store_true', help='experiment with stuff on toy dataset') recommended_type.add_argument('--acl2018', action='store_true', help='reproduce our ACL 2018 system') init_group = parser.add_argument_group( 'advanced initialization arguments', 'Advanced initialization arguments') init_type = init_group.add_mutually_exclusive_group() init_type.add_argument('--init_unsupervised', action='store_true', help='use unsupervised initialization') init_group.add_argument( '--unsupervised_vocab', type=int, default=0, help= 'restrict the vocabulary to the top k entries for unsupervised initialization' ) mapping_group = parser.add_argument_group( 'advanced mapping arguments', 'Advanced embedding mapping arguments') mapping_group.add_argument( '--normalize', choices=['unit', 'center', 'unitdim', 'centeremb', 'none'], nargs='*', default=[], help='the normalization actions to perform in order') mapping_group.add_argument('--whiten', action='store_true', help='whiten the embeddings') mapping_group.add_argument('--src_reweight', type=float, default=0, nargs='?', const=1, help='re-weight the source language embeddings') mapping_group.add_argument('--trg_reweight', type=float, default=0, nargs='?', const=1, help='re-weight the target language embeddings') mapping_group.add_argument('--src_dewhiten', choices=['src', 'trg'], help='de-whiten the source language embeddings') mapping_group.add_argument('--trg_dewhiten', choices=['src', 'trg'], help='de-whiten the target language embeddings') mapping_group.add_argument('--dim_reduction', type=int, default=0, help='apply dimensionality reduction') mapping_type = mapping_group.add_mutually_exclusive_group() mapping_type.add_argument('-c', '--orthogonal', action='store_true', help='use orthogonal constrained mapping') self_learning_group = parser.add_argument_group( 'advanced self-learning arguments', 'Advanced arguments for self-learning') self_learning_group.add_argument( '--vocabulary_cutoff', type=int, default=0, help='restrict the vocabulary to the top k entries') self_learning_group.add_argument( '--threshold', default=0.000001, type=float, help='the convergence threshold (defaults to 0.000001)') self_learning_group.add_argument( '--stochastic_initial', default=0.1, type=float, help= 'initial keep probability stochastic dictionary induction (defaults to 0.1)' ) self_learning_group.add_argument( '--stochastic_multiplier', default=2.0, type=float, help='stochastic dictionary induction multiplier (defaults to 2.0)') self_learning_group.add_argument( '--stochastic_interval', default=50, type=int, help='stochastic dictionary induction interval (defaults to 50)') self_learning_group.add_argument( '--log', default='map.log', help='write to a log file in tsv format at each iteration') self_learning_group.add_argument( '-v', '--verbose', action='store_true', help='write log information to stderr at each iteration') future_group = parser.add_argument_group('experimental arguments', 'Experimental arguments') future_group.add_argument('--skip_top', type=int, default=0, help='Top k words to skip, presumably function') future_group.add_argument( '--start_src', action='store_true', help='Algorithm starts by tuning sense embeddings based on source') future_group.add_argument('--trim_senses', action='store_true', help='Trim sense table to working vocab') future_group.add_argument( '--lamb', type=float, default=0.5, help='Weight hyperparameter for sense alignment objectives') future_group.add_argument('--reglamb', type=float, default=1., help='Lasso regularization hyperparameter') future_group.add_argument( '--ccreglamb', type=float, default=0.1, help='Sense embedding regularization hyperparameter') future_group.add_argument('--inv_delta', type=float, default=0.0001, help='Delta_I added for inverting sense matrix') future_group.add_argument('--lasso_iters', type=int, default=10, help='Number of iterations for LASSO/NMF') future_group.add_argument('--iterations', type=int, default=-1, help='Number of overall model iterations') future_group.add_argument('--trg_batch', type=int, default=5000, help='Batch size for target steps') future_group.add_argument( '--trg_knn', action='store_true', help='Perform target sense mapping by k-nearest neighbors') future_group.add_argument( '--trg_sns_csls', type=int, default=10, help='K-nearest neighbors for CSLS target sense search') future_group.add_argument( '--senses_per_trg', type=int, default=1, help='K-max target sense mapping (default = 1 = off)') future_group.add_argument( '--gd', action='store_true', help='Apply gradient descent for assignment and synset embeddings') future_group.add_argument('--gd_lr', type=float, default=1e-2, help='Learning rate for SGD (default=0.01)') future_group.add_argument('--gd_wd', action='store_true', help='Weight decay in SGD') future_group.add_argument( '--gd_wd_hl', type=int, default=100, help='Weight decay half-life in SGD, default=100') future_group.add_argument( '--gd_clip', type=float, default=5., help='Per-coordinate gradient clipping (default=5)') future_group.add_argument( '--gd_map_steps', type=int, default=1, help='Consecutive steps for each target-sense mapping update phase') future_group.add_argument( '--gd_emb_steps', type=int, default=1, help='Consecutive steps for each sense embedding update phase') future_group.add_argument( '--base_prox_lambda', type=float, default=0.99, help='Lambda for proximal gradient in lasso step') future_group.add_argument( '--prox_decay', action='store_true', help='Multiply proximal lambda by itself each iteration') future_group.add_argument( '--sense_limit', type=float, default=1.1, help= 'Maximum amount of target sense mappings, in terms of source mappings (default=1.1x)' ) future_group.add_argument( '--gold_pairs', help='Gold data for evaluation, if exists (not for tuning)') future_group.add_argument( '--gold_threshold', type=float, default=0.0, help='Threshold for gold mapping (0 is fine if sparse)') future_group.add_argument('--debug', action='store_true') args = parser.parse_args() # pre-setting groups if args.toy: parser.set_defaults(init_unsupervised=True, unsupervised_vocab=4000, normalize=['unit', 'center', 'unit'], whiten=True, src_reweight=0.5, trg_reweight=0.5, src_dewhiten='src', trg_dewhiten='trg', vocabulary_cutoff=50, trim_senses=True, inv_delta=1., reglamb=0.2, lasso_iters=100, gd_wd=True, log='map-toy.log') if args.unsupervised or args.future: parser.set_defaults(init_unsupervised=True, unsupervised_vocab=4000, normalize=['unit', 'center', 'unit'], whiten=True, src_reweight=0.5, trg_reweight=0.5, src_dewhiten='src', trg_dewhiten='trg', vocabulary_cutoff=2000, trim_senses=True, gd_wd=True) if args.unsupervised or args.acl2018: parser.set_defaults(init_unsupervised=True, unsupervised_vocab=4000, normalize=['unit', 'center', 'unit'], whiten=True, src_reweight=0.5, trg_reweight=0.5, src_dewhiten='src', trg_dewhiten='trg', vocabulary_cutoff=20000) args = parser.parse_args() # Check command line arguments if (args.src_dewhiten is not None or args.trg_dewhiten is not None) and not args.whiten: print('ERROR: De-whitening requires whitening first', file=sys.stderr) sys.exit(-1) # Choose the right dtype for the desired precision if args.precision == 'fp16': dtype = 'float16' # many operations not supported by cupy elif args.precision == 'fp32': # default dtype = 'float32' elif args.precision == 'fp64': dtype = 'float64' # Read input embeddings print('reading embeddings...') srcfile = open(args.src_input, encoding=args.encoding, errors='surrogateescape') trgfile = open(args.trg_input, encoding=args.encoding, errors='surrogateescape') src_words, x = embeddings.read(srcfile, dtype=dtype) trg_words, z = embeddings.read(trgfile, dtype=dtype) print('embeddings read') # Read input source sense mapping print('reading sense mapping') src_senses = pickle.load(open(args.sense_input, 'rb')) if src_senses.shape[0] != x.shape[0]: src_senses = csr_matrix(src_senses.transpose() ) # using non-cuda scipy because of 'inv' impl #src_senses = get_sparse_module(src_senses) print( f'source sense mapping of shape {src_senses.shape} loaded with {src_senses.getnnz()} nonzeros' ) # NumPy/CuPy management if args.cuda: if not supports_cupy(): print('ERROR: Install CuPy for CUDA support', file=sys.stderr) sys.exit(-1) xp = get_cupy() x = xp.asarray(x) z = xp.asarray(z) print('CUDA loaded') else: xp = np xp.random.seed(args.seed) # removed word to index map (only relevant in supervised learning or with validation) # STEP 0: Normalization embeddings.normalize(x, args.normalize) embeddings.normalize(z, args.normalize) print('normalization complete') # removed building the seed dictionary # removed validation step # Create log file if args.log: log = open(args.log, mode='w', encoding=args.encoding, errors='surrogateescape') print(f'logging into {args.log}') # Allocate memory # Initialize the projection matrices W(s) = W(t) = I. xw = xp.empty_like(x) zw = xp.empty_like(z) xw[:] = x zw[:] = z src_size = x.shape[0] if args.vocabulary_cutoff <= 0 else min( x.shape[0] - args.skip_top, args.vocabulary_cutoff) trg_size = z.shape[0] if args.vocabulary_cutoff <= 0 else min( z.shape[0] - args.skip_top, args.vocabulary_cutoff) emb_dim = x.shape[1] cutoff_end = min(src_size + args.skip_top, x.shape[0]) if args.trim_senses: # reshape sense assignment src_senses = src_senses[args.skip_top:cutoff_end] # new columns for words with no senses in original input ### TODO might also need this if not trimming (probably kinda far away) newcols = [csc_matrix(([1],([i],[0])),shape=(src_size,1)) for i in range(src_size)\ if src_senses.getrow(i).getnnz() == 0] #with open(f'data/synsets/dummy_synsets_v3b_{src_size}','wb') as dummy_cols_file: # dummy_col_idcs = [i for i in range(src_size) if src_senses.getrow(i).getnnz() == 0] # pickle.dump(np.array(dummy_col_idcs), dummy_cols_file) # trim senses no longer used, add new ones colsums = src_senses.sum(axis=0).tolist()[0] kept_senses = [i for i, j in enumerate(colsums) if j > 0] #with open(f'data/synsets/kept_synsets_v3b_{src_size}','wb') as kept_save_file: # pickle.dump(np.array(kept_senses), kept_save_file) src_senses = hstack([src_senses[:, kept_senses]] + newcols) print( f'trimmed sense dictionary dimensions: {src_senses.shape} with {src_senses.getnnz()} nonzeros' ) sense_size = src_senses.shape[1] if args.gold_pairs is not None: with open(args.gold_pairs, 'rb') as gold_pairs_f: gold_pairs = pickle.load(gold_pairs_f) gold_pairs = [(i-args.skip_top,j) for i,j in gold_pairs \ if i >= args.skip_top and i < src_senses.shape[0] and j < src_senses.shape[1]] gold_trgs = sorted(set([x[0] for x in gold_pairs])) gold_senses = sorted(set([x[1] for x in gold_pairs])) gold_domain_size = len(gold_trgs) * len(gold_senses) print( f'evaluating on {len(gold_pairs)} pairs with {len(gold_trgs)} unique words and {len(gold_senses)} unique senses' ) # Initialize the concept embeddings from the source embeddings ### TODO maybe try gradient descent instead? ### TODO (pre-)create non-singular alignment matrix cc = xp.empty((sense_size, emb_dim), dtype=dtype) # \tilde{E} t01 = time.time() print('starting psinv calc') src_sns_psinv = psinv(src_senses, dtype, args.inv_delta) xecc = x[args.skip_top:cutoff_end].T.dot( get_sparse_module(src_senses).toarray()).T # sense_size * emb_dim cc[:] = src_sns_psinv.dot(xecc) print(f'initialized concept embeddings in {time.time()-t01:.2f} seconds', file=sys.stderr) if args.verbose: # report precision of psedo-inverse operation, checked by inverting pseudo_id = src_senses.transpose().dot(src_senses).dot( src_sns_psinv.get()) real_id = sparse_id(sense_size) rel_diff = (pseudo_id - real_id).sum() / (sense_size * sense_size) print(f'per-coordinate pseudo-inverse precision is {rel_diff:.5f}') ### TODO initialize trg_senses using seed dictionary instead? trg_sns_size = trg_size if args.trim_senses else z.shape[0] trg_senses = csr_matrix( (trg_sns_size, sense_size)) # using non-cuda scipy because of 'inv' impl zecc = xp.empty_like(xecc) # sense_size * emb_dim #tg_grad = xp.empty((trg_sns_size, sense_size)) if args.gd: # everything can be done on gpu src_senses = get_sparse_module(src_senses, dtype=dtype) trg_senses = get_sparse_module(trg_senses, dtype=dtype) if args.sense_limit > 0.0: trg_sense_limit = int(args.sense_limit * src_senses.getnnz()) if args.verbose: print( f'limiting target side to {trg_sense_limit} sense mappings' ) else: trg_sense_limit = -1 ### TODO return memory assignment for similarities? # Training loop if args.gd: prox_lambda = args.base_prox_lambda else: lasso_model = Lasso(alpha=args.reglamb, fit_intercept=False, max_iter=args.lasso_iters,\ positive=True, warm_start=True) # TODO more parametrization if args.log is not None: if args.gd: print(f'gradient descent lr: {args.gd_lr}', file=log) print(f'base proximal lambda: {args.base_prox_lambda}', file=log) else: print(f'lasso regularization: {args.reglamb}', file=log) print(f'lasso iterations: {args.lasso_iters}', file=log) print(f'inversion epsilon: {args.inv_delta}', file=log) if args.gold_pairs is not None: print(f'gold mappings: {len(gold_pairs)}', file=log) print( f'Iteration\tObjective\tSource\tTarget\tL_1\tDuration\tNonzeros\tCorrect_mappings', file=log) log.flush() best_objective = objective = 1000000000. correct_mappings = -1 regularization_lambda = args.base_prox_lambda if args.gd else args.reglamb it = 1 last_improvement = 0 t = time.time() map_gd_lr = args.gd_lr emb_gd_lr = args.gd_lr end = False print('starting training') if args.start_src: print('starting with converging synset embeddings') it_range = range( args.iterations ) ### TODO possibly add arg, but there's early stopping if not args.verbose: it_range = tqdm(it_range) prev_obj = float('inf') for pre_it in it_range: if args.gd_wd: emb_gd_lr = args.gd_lr * pow(0.5, floor( pre_it / args.gd_wd_hl)) # Synset embedding cc_grad = src_senses.T.dot( xw[args.skip_top:cutoff_end] - src_senses.dot(cc)) - args.ccreglamb * cc cc_grad.clip(-args.gd_clip, args.gd_clip, out=cc_grad) cc += emb_gd_lr * cc_grad # Source projection u, s, vt = xp.linalg.svd(cc.T.dot(xecc)) wx = vt.T.dot(u.T).astype(dtype) x.dot(wx, out=xw) pre_objective = ((xp.linalg.norm( xw[args.skip_top:cutoff_end] - get_sparse_module(src_senses).dot(cc), 'fro'))**2) / 2 pre_objective = float(pre_objective) if args.verbose and pre_it > 0 and pre_it % 10 == 0: print( f'source synset embedding objective iteration {pre_it}: {pre_objective:.3f}' ) if pre_objective > prev_obj: print( f'stopping at pre-iteration {pre_it}, source-sense objective {prev_obj:.3f}' ) # revert cc -= emb_gd_lr * cc_grad break prev_obj = pre_objective while True: if it % 50 == 0: print( f'starting iteration {it}, last objective was {objective}, correct mappings at {correct_mappings}' ) # Increase the keep probability if we have not improved in args.stochastic_interval iterations if it - last_improvement > args.stochastic_interval: last_improvement = it if args.iterations > 0 and it > args.iterations: end = True ### update target assignments (6) - lasso-esque regression time6 = time.time() # optimize: 0.5 * (xp.linalg.norm(zw[i] - trg_senses[i].dot(cc))^2) + (regularization_lambda * xp.linalg.norm(trg_senses[i],1)) if args.trg_knn: # for csls-based neighborhoods knn_sense = xp.full(sense_size, -100) for i in range(0, sense_size, args.trg_batch): batch_end = min(i + args.trg_batch, sense_size) sim_sense_trg = cc[i:batch_end].dot( zw[args.skip_top:cutoff_end].T) knn_sense[i:batch_end] = topk_mean(sim_sense_trg, k=args.trg_sns_csls, inplace=True) # calculate new target mappings trg_senses = lil_matrix(trg_senses.shape) for i in range(0, trg_size, args.trg_batch): sns_batch_end = min(i + args.trg_batch, trg_size) z_i = i + args.skip_top z_batch_end = min(sns_batch_end + args.skip_top, zw.shape[0]) sims = zw[z_i:z_batch_end].dot(cc.T) sims -= knn_sense / 2 # equivalent to the real CSLS scores for NN best_idcs = sims.argmax(1).tolist() trg_senses[(list(range(i, sns_batch_end)), best_idcs)] = sims.max(1).tolist() # second-to-lth-best for l in range(args.senses_per_trg - 1): sims[(list(range(sims.shape[0])), best_idcs)] = 0. best_idcs = sims.argmax(1).tolist() trg_senses[(list(range(i, sns_batch_end)), best_idcs)] = sims.max(1).tolist() trg_senses = get_sparse_module(trg_senses.tocsr()) elif args.gd: ### TODO add args.skip_top calculations if args.gd_wd: true_it = (it - 1) * args.gd_map_steps map_gd_lr = args.gd_lr * pow( 0.5, floor((1 + true_it) / args.gd_wd_hl)) if args.verbose: print(f'mapping learning rate: {map_gd_lr}') for k in range(args.gd_map_steps): # st <- st + eta * (ew - st.dot(es)).dot(es.T) # allow up to sense_limit updates, clip gradient batch_grads = [] for i in range(0, trg_size, args.trg_batch): batch_end = min(i + args.trg_batch, trg_size) tg_grad_b = (zw[i:batch_end] - trg_senses[i:batch_end].dot(cc)).dot(cc.T) # proximal gradient tg_grad_b += prox_lambda tg_grad_b.clip(None, 0.0, out=tg_grad_b) batch_grads.append(batch_sparse(tg_grad_b)) tg_grad = get_sparse_module(vstack(batch_grads)) del tg_grad_b if args.prox_decay: prox_lambda *= args.base_prox_lambda ### TODO consider weight decay here as well (args.gd_wd) trg_senses -= map_gd_lr * tg_grad # allow up to sense_limit nonzeros if trg_sense_limit > 0: trg_senses = trim_sparse(trg_senses, trg_sense_limit, clip=None) ### TODO consider finishing up with lasso (maybe only in final iteration) else: ### TODO add args.skip_top calculations # parallel LASSO (no cuda impl) cccpu = cc.get().T # emb_dim * sense_size lasso_model.fit(cccpu, zw[:trg_size].get().T) ### TODO maybe trim, keep only above some threshold (0.05) OR top f(#it) trg_senses = lasso_model.sparse_coef_ if args.verbose: print( f'target sense mapping step: {(time.time()-time6):.2f} seconds, {trg_senses.getnnz()} nonzeros', file=sys.stderr) objective = ((xp.linalg.norm(xw[args.skip_top:cutoff_end] - get_sparse_module(src_senses).dot(cc),'fro') ** 2)\ + (xp.linalg.norm(zw[args.skip_top:cutoff_end] - get_sparse_module(trg_senses).dot(cc),'fro')) ** 2) / 2 \ + regularization_lambda * trg_senses.sum() # TODO consider thresholding reg part objective = float(objective) print(f'objective: {objective:.3f}') # Write target sense mapping with open(f'tmp_outs/{args.tsns_output[:-4]}-it{it:03d}.pkl', mode='wb') as tsnsfile: pickle.dump(trg_senses.get(), tsnsfile) ### update synset embeddings (10) time10 = time.time() if args.gd and args.gd_emb_steps > 0: ### TODO probably handle sizes and/or threshold sparse matrix if args.gd_wd: true_it = (it - 1) * args.gd_emb_steps emb_gd_lr = args.gd_lr * pow( 0.5, floor((1 + true_it) / args.gd_wd_hl)) if args.verbose: print(f'embedding learning rate: {emb_gd_lr}') ### replace block for no-source-tuning mode all_senses = trg_senses if args.start_src else get_sparse_module( vstack((src_senses.get(), trg_senses.get()), format='csr'), dtype=dtype) aw = zw[args. skip_top:cutoff_end] if args.start_src else xp.concatenate( (xw[args.skip_top:cutoff_end], zw[args.skip_top:cutoff_end])) for i in range(args.gd_emb_steps): cc_grad = all_senses.T.dot( aw - all_senses.dot(cc)) - args.ccreglamb * cc cc_grad.clip(-args.gd_clip, args.gd_clip, out=cc_grad) cc += emb_gd_lr * cc_grad else: ### TODO add args.skip_top calculations all_senses = get_sparse_module( vstack((src_senses, trg_senses), format='csr')) xzecc = xp.concatenate((xw[:src_size], zw[:trg_size])).T\ .dot(all_senses.toarray()).T # sense_size * emb_dim all_sns_psinv = psinv( all_senses.get(), dtype, args.inv_delta ) ### TODO only update target side? We still have src_sns_psinv [it doesn't matter, dimensions are the same] cc[:] = all_sns_psinv.dot(xzecc) if args.verbose: print(f'synset embedding update: {time.time()-time10:.2f}', file=sys.stderr) objective = ((xp.linalg.norm(xw[args.skip_top:cutoff_end] - get_sparse_module(src_senses).dot(cc),'fro')) ** 2\ + (xp.linalg.norm(zw[args.skip_top:cutoff_end] - get_sparse_module(trg_senses).dot(cc),'fro')) ** 2) / 2 \ + regularization_lambda * trg_senses.sum() # TODO consider thresholding reg part objective = float(objective) print(f'objective: {objective:.3f}') ### update projections (3,5) # write to zw and xw if args.orthogonal or not end: ### remove block for no-source-tuning mode # source side - mappings don't change so xecc is constant #if not args.start_src: # need to do this anyway whenever cc updates time3 = time.time() u, s, vt = xp.linalg.svd(cc.T.dot(xecc)) wx = vt.T.dot(u.T).astype(dtype) x.dot(wx, out=xw) if args.verbose: print(f'source projection update: {time.time()-time3:.2f}', file=sys.stderr) # target side - compute sense mapping first time3 = time.time() zecc.fill(0.) for i in range(0, trg_size, args.trg_batch): end_idx = min(i + args.trg_batch, trg_size) zecc += z[i:end_idx].T.dot( get_sparse_module(trg_senses[i:end_idx]).toarray()).T u, s, vt = xp.linalg.svd(cc.T.dot(zecc)) wz = vt.T.dot(u.T).astype(dtype) z.dot(wz, out=zw) if args.verbose: print(f'target projection update: {time.time()-time3:.2f}', file=sys.stderr) ### TODO add parts from 'advanced mapping' part - transformations, whitening, etc. # Objective function evaluation time_obj = time.time() trg_senses_l1 = float(trg_senses.sum()) src_obj = (float( xp.linalg.norm( xw[args.skip_top:cutoff_end] - get_sparse_module(src_senses).dot(cc), 'fro'))**2) / 2 trg_obj = (float( xp.linalg.norm( zw[args.skip_top:cutoff_end] - get_sparse_module(trg_senses).dot(cc), 'fro'))**2) / 2 objective = src_obj + trg_obj + regularization_lambda * trg_senses_l1 # TODO consider thresholding reg part if args.verbose: print(f'objective calculation: {time.time()-time_obj:.2f}', file=sys.stderr) if objective - best_objective <= -args.threshold: last_improvement = it best_objective = objective # WordNet transduction evaluation (can't tune on this) if args.gold_pairs is not None: np_trg_senses = trg_senses.get() trg_corr = [ p for p in gold_pairs if np_trg_senses[p] > args.gold_threshold ] correct_mappings = len(trg_corr) domain_trgs = np_trg_senses[gold_trgs][:, gold_senses] else: correct_mappings = -1 # Logging duration = time.time() - t if args.verbose: print('ITERATION {0} ({1:.2f}s)'.format(it, duration), file=sys.stderr) print('objective: {0:.3f}'.format(objective), file=sys.stderr) print('target senses l_1 norm: {0:.3f}'.format(trg_senses_l1), file=sys.stderr) if len(gold_pairs) > 0 and domain_trgs.getnnz() > 0: print( f'{correct_mappings} correct target mappings: {(correct_mappings/len(gold_pairs)):.3f} recall, {(correct_mappings/domain_trgs.getnnz()):.3f} precision', file=sys.stderr) print(file=sys.stderr) sys.stderr.flush() if args.log is not None: print( f'{it}\t{objective:.3f}\t{src_obj:.3f}\t{trg_obj:.3f}\t{trg_senses_l1:.3f}\t{duration:.3f}\t{trg_senses.getnnz()}\t{correct_mappings}', file=log) log.flush() if end: break t = time.time() it += 1 # Write mapped embeddings with open(args.src_output, mode='w', encoding=args.encoding, errors='surrogateescape') as srcfile: embeddings.write(src_words, xw, srcfile) with open(args.trg_output, mode='w', encoding=args.encoding, errors='surrogateescape') as trgfile: embeddings.write(trg_words, zw, trgfile) # Write target sense mapping with open(args.tsns_output, mode='wb') as tsnsfile: pickle.dump(trg_senses.get(), tsnsfile)
def _mutual_proximity_gumbel_sparse(S: np.ndarray, min_nnz: int = 30, test_set_ind: np.ndarray = None, verbose: int = 0, log=None): """MP Gumbel for sparse similarity matrices. Please do not directly use this function, but invoke via mutual_proximity_gumbel() """ n = S.shape[0] self_value = 1. if test_set_ind is None: train_set_ind = slice(0, n) else: train_set_ind = np.setdiff1d(np.arange(n), test_set_ind) # mean, variance WITHOUT zero values (missing values), ddof=1 if S.diagonal().max() != 1. or S.diagonal().min() != 1.: raise ValueError("Self similarities must be 1.") S_param = S[train_set_ind] # the -1 accounts for self similarities that must be excluded from the calc mu = np.array((S_param.sum(0) - 1) / (S_param.getnnz(0) - 1)).ravel() E2 = mu**2 X = S_param.copy() X.data **= 2 n_x = (X.getnnz(0) - 1) E1 = np.array((X.sum(0) - 1) / (n_x)).ravel() del X # for an unbiased sample variance va = n_x / (n_x - 1) * (E1 - E2) del E1, E2 sd = np.sqrt(va) del va # Euler-Mascheroni gamma=.57721566490153286 (https://oeis.org/A001620) EULER_MASCHERONI = np.euler_gamma beta_hat = sd * np.sqrt(6) / np.pi mu_hat = mu - EULER_MASCHERONI * beta_hat del mu, sd S_mp = lil_matrix(S.shape, dtype=np.float32) nnz = S.getnnz(axis=1) # nnz per row for i in range(n): if verbose and log and ((i + 1) % 1000 == 0 or i + 1 == n): log.message("MP_gumbel: {} of {}".format(i + 1, n), flush=True) j_idx = slice(i + 1, n) Dij = S[i, j_idx].toarray().ravel() #Extract dense rows temporarily tmp = np.empty(n - i) tmp[0] = self_value / 2. if nnz[i] <= min_nnz: tmp[1:] = np.nan else: # Rescale iff there are enough neighbors for current point p1 = _gumbelcdf(Dij, mu_hat[i], beta_hat[i]) p1[Dij == 0] = 0. del Dij Dji = S[j_idx, i].toarray().ravel() #for vectorization below. p2 = _gumbelcdf(Dji, mu_hat[j_idx], beta_hat[j_idx]) p2[Dji == 0] = 0. del Dji tmp[1:] = (p1 * p2).ravel() S_mp[i, i:] = tmp del tmp, j_idx S_mp += S_mp.T # Retain original distances for objects with too few neighbors. # That is, keep distances FROM these objects to others (rows), but # set distances of other objects TO them to NaN (columns). # Returned matrix is thus NOT SYMMETRIC. for row in np.argwhere(nnz <= min_nnz): row = row[0] # use scalar for indexing instead of array S_mp[row, :] = S.getrow(row) return S_mp.tocsr()
def main(): # Load CNN original_model = models.alexnet(pretrained=True) class AlexNetConv3(nn.Module): def __init__(self): super(AlexNetConv3, self).__init__() self.features = nn.Sequential( # stop at conv3 *list(original_model.features.children())[:7] ) def forward(self, x): x = self.features(x) return x model = AlexNetConv3() model.eval() tm = TemporalMemory( # Must be the same dimensions as the SP columnDimensions=(2048,), # How many cells in each mini-column. cellsPerColumn=32, # A segment is active if it has >= activationThreshold connected synapses # that are active due to infActiveState activationThreshold=4,#1,4(melhor), initialPermanence=0.55, connectedPermanence=0.5, # Minimum number of active synapses for a segment to be considered during # search for the best-matching segments. minThreshold=1, #1 # The max number of synapses added to a segment during learning maxNewSynapseCount=20, #6 permanenceIncrement=0.01, permanenceDecrement=0.01, predictedSegmentDecrement=0.0005,#0.0001,#0.0005, maxSegmentsPerCell=100, #8 16(colou) maxSynapsesPerSegment=100, #8 16(colou) seed=42 ) numberImages = 200 features = [] labels = [] DIR = "/home/cappizzino/Documentos/doutorado/dataset" path_im = [os.path.join(DIR,sp) for sp in [ 'fall/', 'spring/', 'summer/', 'winter/']] # Seasons to compare. # First season is the input one. Second season is the reference season. # 0 = fall, 1 = spring, 2 = summer, 3 = winter. # simul 1 = 2 and 3 # simul 2 = 1 and 0 # simul 3 = 0 and 3 reference_season = 2 input_season = 3 # Extract Features reference_features, reference_labels = extractFeatures(numberImages, reference_season, model,path_im) input_features, input_labels = extractFeatures(numberImages, input_season, model, path_im) #print len(input_features[0]) #print input_labels[0] #print input_features # Experiments # Ground truth print 'Ground truth' GT = np.identity(numberImages, dtype = bool) for i in range(GT.shape[0]): for j in range(GT.shape[0]-1): if i==j: GT[i,j]=1 # Pairwise (raw descriptors) print 'Pairwise descriptors' t = time.time() S_pairwise = cosine_similarity(reference_features[:numberImages], input_features[:numberImages]) elapsed = time.time() - t print("Elapsed time: %f seconds\n" %elapsed) # Dimension Reduction and binarizarion print 'Dimension Reduction' P = np.random.randn(len(input_features[0]), 1024) P = normc(P) # sLSBH (binarized descriptors) print 'sLSBH' t = time.time() D1_slsbh = getLSBH(reference_features[:numberImages],P,0.25) D2_slsbh = getLSBH(input_features[:numberImages],P,0.25) Sb_pairwise = pairwiseDescriptors(D1_slsbh[:numberImages], D2_slsbh[:numberImages]) elapsed = time.time() - t print("Elapsed time: %f seconds\n" %elapsed) #print len(np.nonzero(D1_slsbh[0])[0]) D1_tm=[] D2_tm=[] id_max=[] id_max1=[] id_max2=[] print 'Temporal Pooler (1) descriptors' t = time.time() for i in range(numberImages): for _ in range(1): activeColumnIndices = np.nonzero(D1_slsbh[i,:])[0] tm.compute(activeColumnIndices, learn=True) activeCells = tm.getWinnerCells() D1_tm.append(activeCells) id_max1.append(max(activeCells)) print 'Temporal Pooler (2) descriptors' for i in range(numberImages): activeColumnIndices = np.nonzero(D2_slsbh[i,:])[0] tm.compute(activeColumnIndices, learn=False) activeCells = tm.getWinnerCells() D2_tm.append(activeCells) id_max2.append(max(activeCells)) id_max = max(max(id_max1),max(id_max2)) D1_sparse = sparse.lil_matrix((numberImages, id_max+1), dtype='int8') for i in range(numberImages): D1_sparse[i,D1_tm[i]] = 1 D2_sparse = sparse.lil_matrix((numberImages, id_max+1), dtype='int8') for i in range(numberImages): D2_sparse[i,D2_tm[i]] = 1 S_TM = pairwiseDescriptors(D1_sparse, D2_sparse) elapsed = time.time() - t print("Elapsed time: %f seconds\n" %elapsed) D1_mcn=[] D2_mcn=[] id_max=[] id_max1=[] id_max2=[] # Simple HTM parameters params = Params() params.probAdditionalCon = 0.05 # probability for random connection params.nCellPerCol = 32 # number of cells per minicolumn params.nInConPerCol = 200 # number of connections per minicolumn params.minColumnActivity = 0.75 # minicolumn activation threshold params.nColsPerPattern = 50 # minimum number of active minicolumns k_min params.kActiveColumn = 100 # maximum number of active minicolumns k_max # conversion of the parameter to a natural number that contains the # required number of 1s for activation params.minColumnActivity = np.round(params.minColumnActivity*params.nInConPerCol) htm = MCN('htm',params) nCols_MCN=[] nCols_HTM=[] print ('Simple HTM (1)') t = time.time() for i in range(numberImages): htm.compute(D1_slsbh[i,:],0) nCols_MCN.append(htm.nCols) nCols_HTM.append(tm.columnDimensions[0]) id_max1.append(max(htm.winnerCells)) D1_mcn.append(htm.winnerCells) print ('Simple HTM (2)') for i in range(numberImages): htm.compute(D2_slsbh[i,:],1) #nCols_MCN.append(htm.nCols) #nCols_HTM.append(tm.columnDimensions[0]) id_max2.append(max(htm.winnerCells)) D2_mcn.append(htm.winnerCells) id_max = max(max(id_max1),max(id_max2)) D1_sparse = sparse.lil_matrix((numberImages, id_max+1), dtype='int8') for i in range(numberImages): D1_sparse[i,D1_mcn[i]] = 1 D2_sparse = sparse.lil_matrix((numberImages, id_max+1), dtype='int8') for i in range(numberImages): D2_sparse[i,D2_mcn[i]] = 1 S_MCN = pairwiseDescriptors(D1_sparse, D2_sparse) elapsed = time.time() - t print("Elapsed time: %f seconds\n" %elapsed) # Results print 'Results 1' fig, ax = plt.subplots() P, R = createPR(S_pairwise,GT) ax.plot(R, P, label='pairwise / raw (avgP=%f)' %np.trapz(P,R)) P, R = createPR(S_MCN,GT) ax.plot(R, P, label='MCN (avgP=%f)' %np.trapz(P,R)) P, R = createPR(Sb_pairwise,GT) ax.plot(R, P, label='sLSBH / raw (avgP=%f)' %np.trapz(P,R)) P, R = createPR(S_TM,GT) ax.plot(R, P, label='HTM TM (avgP=%f)' %np.trapz(P,R)) ax.legend() ax.grid(True) plt.xlabel("Recall") plt.ylabel("Precision") plt.show() print 'Results 2' fig2, ax2 = plt.subplots() ax2.plot(nCols_MCN,'g',label='MCN = %i cols' %htm.nCols) ax2.plot(nCols_HTM,'b',label='HTM TM = %i cols' %tm.columnDimensions[0]) ax2.legend() ax2.grid(True) plt.xlabel('Number of seen images') plt.ylabel('Number of MiniColumns') plt.show() print 'Results 3' fig3, (ax3, ax4) = plt.subplots(nrows=1, ncols=2, gridspec_kw={'width_ratios': [2, 1]}, figsize=(9,4)) P, R = createPR(S_pairwise,GT) ax3.plot(R, P, label='pairwise / raw (AUC=%f)' %np.trapz(P,R)) P, R = createPR(S_MCN,GT) ax3.plot(R, P, label='MCN (AUC=%f)' %np.trapz(P,R)) P, R = createPR(Sb_pairwise,GT) ax3.plot(R, P, label='sLSBH / raw (AUC=%f)' %np.trapz(P,R)) P, R = createPR(S_TM,GT) ax3.plot(R, P, label='HTM TM (AUC=%f)' %np.trapz(P,R)) ax3.grid(True) ax3.set_xlabel("Recall", fontsize = 12.0) ax3.set_ylabel("Precision", fontsize = 12.0) ax3.legend(fontsize=10) ax4.plot(nCols_MCN,'g',label='MCN = %i cols' %htm.nCols) ax4.plot(nCols_HTM,'b',label='HTM TM = %i cols' %tm.columnDimensions[0]) ax4.grid(True) ax4.tick_params(axis='both', labelsize=6) ax4.set_xlabel('Number of seen images', fontsize = 12.0) ax4.set_ylabel('Number of MiniColumns', fontsize = 12.0) ax4.legend(fontsize=10) fig3.savefig('tes.eps') plt.show()
def init_parameters(): """ This function defines the material parameters for silicon. After executing this function the parameters can be altered. This function must be executed after **init_geometry()** """ global dt dt = 1E-12 global Chi Chi = np.full(n, 4.05) global Eg Eg = np.full(n, 1.12) global Nc Nc = np.full(n, 2.81E25) global Nv Nv = np.full(n, 1.83E25) global Epsilon Epsilon = np.full(n, Epsilon_r * Epsilon_0) global mu_p mu_p = np.full(n, 0.045) global mu_n mu_n = np.full(n, 0.14) # Doping-Profile global C C = np.zeros(n) # Cheet Charge global CA CA = np.zeros(n) global Cau Cau = np.full(n, 0) # 1E-28 global generation generation = np.full(n, 0.0) # global u u = np.zeros(3 * n) # global u_old u_old = np.zeros(3 * n) # global b b = np.zeros(3 * n) # global A A = sparse.lil_matrix((3 * n, 3 * n)) # Vector dx to be solved global x x = np.zeros(3 * n)
def __init__(self, X, Y, regparam=1.0, qids=None, callbackfun=None, **kwargs): self.regparam = regparam self.callbackfun = None self.Y = array_tools.as_2d_array(Y) #Number of training examples self.size = Y.shape[0] if self.Y.shape[1] > 1: raise Exception( 'CGRankRLS does not currently work in multi-label mode') self.learn_from_labels = True self.callbackfun = callbackfun self.X = csc_matrix(X.T) #if qids != None: # self.setQids(qids) #else: # self.qidmap = None #self.train() if qids != None: self.qids = map_qids(qids) self.splits = qids_to_splits(self.qids) else: self.qids = None regparam = self.regparam #regparam = 0. qids = self.qids if qids != None: P = sp.lil_matrix((self.size, len(set(qids)))) for qidind in range(len(self.splits)): inds = self.splits[qidind] qsize = len(inds) for i in inds: P[i, qidind] = 1. / sqrt(qsize) P = P.tocsr() PT = P.tocsc().T else: P = 1. / sqrt(self.size) * (np.mat( np.ones((self.size, 1), dtype=np.float64))) PT = P.T X = self.X.tocsc() X_csr = X.tocsr() def mv(v): v = np.mat(v).T return X_csr * (X.T * v) - X_csr * (P * (PT * (X.T * v))) + regparam * v G = LinearOperator((X.shape[0], X.shape[0]), matvec=mv, dtype=np.float64) Y = self.Y if not self.callbackfun == None: def cb(v): self.A = np.mat(v).T self.b = np.mat(np.zeros((1, 1))) self.callbackfun.callback(self) else: cb = None XLY = X_csr * Y - X_csr * (P * (PT * Y)) try: self.A = np.mat(cg(G, XLY, callback=cb)[0]).T except Finished: pass self.b = np.mat(np.zeros((1, 1))) self.predictor = predictor.LinearPredictor(self.A, self.b)
def _get_projection(n_samples, n_features, density='auto', eps=0.1): p = SparseRandomProjection() mat = lil_matrix((n_samples, n_features)) return p.fit(mat)
alpha_range = (0.99, 1.01) beta_range = (0.99, 1.01) dPde.set_perturbation(alpha_range, beta_range) ######################################################## # test verifier class verifier = Verifier() toTimeStep = 2 dis_reachable_set = verifier.get_dreach_set( dPde, toTimeStep) # compute discrete reachable set dis_min_vec, _, dis_max_vec, _ = dis_reachable_set[toTimeStep - 1].get_min_max() unsafe_mat = lil_matrix((1, dPde.matrix_a.shape[0]), dtype=float) unsafe_mat[0, dPde.matrix_a.shape[0] - 1] = 1 unsafe_vector = lil_matrix((1, 1), dtype=float) unsafe_vector[0, 0] = -1 dPde.set_unsafe_set(unsafe_mat.tocsc(), unsafe_vector.tocsc()) verifier.on_fly_check_dPde(dPde, toTimeStep) ############################################################ # test ReachSetAssembler RSA = ReachSetAssembler() u_dset, e_dset, bloated_dset = RSA.get_dreachset(dPde, toTimeStep) print "\nu_dset = {}".format(u_dset) print "\ne_dset = {}".format(e_dset) print "\nbloated_dset = {}".format(bloated_dset) u_min, u_min_points, u_max, u_max_points = u_dset[toTimeStep].get_min_max()
def load_gcn_data(dataset_str): """ Loads input data from gcn/data directory ind.dataset_str.x => the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object; ind.dataset_str.tx => the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object; ind.dataset_str.allx => the feature vectors of both labeled and unlabeled training instances (a superset of ind.dataset_str.x) as scipy.sparse.csr.csr_matrix object; ind.dataset_str.y => the one-hot labels of the labeled training instances as numpy.ndarray object; ind.dataset_str.ty => the one-hot labels of the test instances as numpy.ndarray object; ind.dataset_str.ally => the labels for instances in ind.dataset_str.allx as numpy.ndarray object; ind.dataset_str.graph => a dict in the format {index: [index_of_neighbor_nodes]} as collections.defaultdict object; ind.dataset_str.test.index => the indices of test instances in graph, for the inductive setting as list object. All objects above must be saved using python pickle module. :param dataset_str: Dataset name :return: All data input files loaded (as well the training/test data). """ names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph'] objects = [] for i in range(len(names)): with open("data/{}/ind.{}.{}".format(dataset_str,dataset_str, names[i]), 'rb') as f: if sys.version_info > (3, 0): objects.append(pkl.load(f, encoding='latin1')) else: objects.append(pkl.load(f)) x, y, tx, ty, allx, ally, graph = tuple(objects) test_idx_reorder = parse_index_file("data/{}/ind.{}.test.index".format(dataset_str,dataset_str)) test_idx_range = np.sort(test_idx_reorder) if dataset_str == 'citeseer': # Fix citeseer dataset (there are some isolated nodes in the graph) # Find isolated nodes, add them as zero-vecs into the right position test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder) + 1) tx_extended = sparse.lil_matrix((len(test_idx_range_full), x.shape[1])) tx_extended[test_idx_range - min(test_idx_range), :] = tx tx = tx_extended ty_extended = np.zeros((len(test_idx_range_full), y.shape[1])) ty_extended[test_idx_range - min(test_idx_range), :] = ty ty = ty_extended features = sparse.vstack((allx, tx)).tolil() features[test_idx_reorder, :] = features[test_idx_range, :] G=nx.from_dict_of_lists(graph, create_using=nx.Graph()) A = nx.adjacency_matrix(G) labels = np.vstack((ally, ty)) labels[test_idx_reorder, :] = labels[test_idx_range, :] idx_test = test_idx_range.tolist() idx_train = range(len(y)) idx_val = range(len(y), len(y) + 500) features = normalize_feature(features) features = torch.FloatTensor(np.array(features.todense())) if dataset_str == 'citeseer': kk = np.zeros(len(labels)).astype(int) for i in range(len(labels)): t = labels[i] if sum(t) == 0: kk[i] = len(t) else: kk[i] = np.argwhere(t != 0)[0] labels=kk labels = torch.LongTensor(labels) else: labels = torch.LongTensor(np.where(labels)[1]) idx_train = torch.LongTensor(idx_train) idx_test = torch.LongTensor(idx_test) idx_val = torch.LongTensor(idx_val) A_processed = preprocess_adj(A) return (G,A_processed, features, labels, idx_train, idx_test, idx_val)
def assembly_quads_mindlin_plate_geometric(nodes, elements, thickness, sigma_x, sigma_y, tau_xy, gauss_order=3): from quadrature import legendre_quad from shape_functions import iso_quad from numpy import sum print "The assembly routine is started" freedom = 3 element_nodes = 4 nodes_count = len(nodes) dimension = freedom * nodes_count element_dimension = freedom * element_nodes geometric = lil_matrix((dimension, dimension)) elements_count = len(elements) (xi, eta, w) = legendre_quad(gauss_order) for element_index in range(elements_count): kg = zeros((element_dimension, element_dimension)) vertices = nodes[elements[element_index, :], :] sx = sigma_x[elements[element_index, :]] sy = sigma_y[elements[element_index, :]] txy = tau_xy[elements[element_index, :]] for i in range(len(w)): (jacobian, shape, shape_dx, shape_dy) = iso_quad(vertices, xi[i], eta[i]) s0 = array([[sum(shape * sx), sum(shape * txy)], [sum(shape * txy), sum(shape * sy)]]) bb = array([[ shape_dx[0], 0.0, 0.0, shape_dx[1], 0.0, 0.0, shape_dx[2], 0.0, 0.0, shape_dx[3], 0.0, 0.0 ], [ shape_dy[0], 0.0, 0.0, shape_dy[1], 0.0, 0.0, shape_dy[2], 0.0, 0.0, shape_dy[3], 0.0, 0.0 ]]) bs1 = array([[ 0.0, shape_dx[0], 0.0, 0.0, shape_dx[1], 0.0, 0.0, shape_dx[2], 0.0, 0.0, shape_dx[3], 0.0 ], [ 0.0, shape_dy[0], 0.0, 0.0, shape_dy[1], 0.0, 0.0, shape_dy[2], 0.0, 0.0, shape_dy[3], 0.0 ]]) bs2 = array([[ 0.0, 0.0, shape_dx[0], 0.0, 0.0, shape_dx[1], 0.0, 0.0, shape_dx[2], 0.0, 0.0, shape_dx[3] ], [ 0.0, 0.0, shape_dy[0], 0.0, 0.0, shape_dy[1], 0.0, 0.0, shape_dy[2], 0.0, 0.0, shape_dy[3] ]]) kg = kg + thickness * bb.transpose().dot(s0).dot( bb) * jacobian * w[i] + thickness**3.0 / 12.0 * ( bs1.transpose().dot(s0).dot(bs1) + bs2.transpose().dot(s0).dot(bs2)) * jacobian * w[i] for i in range(element_dimension): ii = elements[element_index, i / freedom] * freedom + i % freedom for j in range(i, element_dimension): jj = elements[element_index, j / freedom] * freedom + j % freedom geometric[ii, jj] += kg[i, j] if ii != jj: geometric[jj, ii] = geometric[ii, jj] print_progress(element_index, elements_count - 1) print "\nThe assembly routine is completed" return geometric.tocsr()
def create_param_dict(param_dict): '''Fills a dictionary with some parameters that can be put into a trajectory. ''' param_dict['Normal'] = {} param_dict['Numpy'] = {} param_dict['Sparse'] ={} param_dict['Numpy_2D'] = {} param_dict['Numpy_3D'] = {} param_dict['Tuples'] ={} param_dict['Lists'] ={} param_dict['Pickle']={} normal_dict = param_dict['Normal'] normal_dict['string'] = 'Im a test string!' normal_dict['int'] = 42 normal_dict['long'] = compat.long_type(42) normal_dict['double'] = 42.42 normal_dict['bool'] =True normal_dict['trial'] = 0 numpy_dict=param_dict['Numpy'] numpy_dict['string'] = np.array(['Uno', 'Dos', 'Tres']) numpy_dict['int'] = np.array([1,2,3,4]) numpy_dict['double'] = np.array([1.0,2.0,3.0,4.0]) numpy_dict['bool'] = np.array([True, False, True]) param_dict['Numpy_2D']['double'] = np.matrix([[1.0,2.0],[3.0,4.0]]) param_dict['Numpy_3D']['double'] = np.array([[[1.0,2.0],[3.0,4.0]],[[3.0,-3.0],[42.0,41.0]]]) spsparse_csc = spsp.lil_matrix((222,22)) spsparse_csc[1,2] = 44.6 spsparse_csc[1,9] = 44.5 spsparse_csc = spsparse_csc.tocsc() spsparse_csr = spsp.lil_matrix((222,22)) spsparse_csr[1,3] = 44.7 spsparse_csr[17,17] = 44.755555 spsparse_csr = spsparse_csr.tocsr() spsparse_bsr = spsp.bsr_matrix(np.matrix([[1, 1, 0, 0, 2, 2], [1, 1, 0, 0, 2, 2], [0, 0, 0, 0, 3, 3], [0, 0, 0, 0, 3, 3], [4, 4, 5, 5, 6, 6], [4, 4, 5, 5, 6, 6]])) spsparse_dia = spsp.dia_matrix(np.matrix([[1, 0, 3, 0], [1, 2, 0, 4], [0, 2, 3, 0], [0, 0, 3, 4]])) param_dict['Sparse']['bsr_mat'] = spsparse_bsr param_dict['Sparse']['csc_mat'] = spsparse_csc param_dict['Sparse']['csr_mat'] = spsparse_csr param_dict['Sparse']['dia_mat'] = spsparse_dia param_dict['Tuples']['empty'] = () param_dict['Tuples']['int'] = (1,2,3) param_dict['Tuples']['float'] = (44.4,42.1,3.) param_dict['Tuples']['str'] = ('1','2wei','dr3i') param_dict['Lists']['lempty'] = [] param_dict['Lists']['lint'] = [1,2,3] param_dict['Lists']['lfloat'] = [44.4,42.1,3.] param_dict['Lists']['lstr'] = ['1','2wei','dr3i'] param_dict['Pickle']['list']= ['b','h', 53, (), 0] param_dict['Pickle']['list']= ['b','h', 42, (), 1] param_dict['Pickle']['list']= ['b',[444,43], 44, (),2]
curPath = os.path.abspath(os.path.dirname(__file__)) rootPath = os.path.split(curPath)[0] sys.path.append(rootPath) import pandas as pd import numpy as np from scipy.sparse import lil_matrix import scipy as scp from code_file.utils import get_logs_from from code_file.model import calculate_matrix # 要想计算协同过滤矩阵,要获得物品的编号最大数 ITEM_NUM = 4318203 # 获取当前组的用户行为日志 user_logs = get_logs_from('../full_logs/user_logs_group7.txt') # 转化成链表的形式 user_logs = list(user_logs.items()) for i in range(0, len(user_logs), 10000): print("The %d " % i + 'batch is started...........') print("--------------------------") mat = lil_matrix((ITEM_NUM, ITEM_NUM), dtype=float) mat = calculate_matrix(mat, user_logs[i:i + 10000], alpha=0.5) # 计算每一千条之后好之后开始存下来 # scp.sparse.save_npz('../tmpData/sparse_matrix_%d_batch_group4.npz' % i, mat.tocsr()) scp.sparse.save_npz('../tmpdata_iuf/sparse_matrix_%d_batch_group7.npz' % i, mat.tocsr()) print("save successfully!!!!") print("************************")
def load_data(dataset_str): """ Loads input data from gcn/data directory ind.dataset_str.x => the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object; ind.dataset_str.tx => the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object; ind.dataset_str.allx => the feature vectors of both labeled and unlabeled training instances (a superset of ind.dataset_str.x) as scipy.sparse.csr.csr_matrix object; ind.dataset_str.y => the one-hot labels of the labeled training instances as numpy.ndarray object; ind.dataset_str.ty => the one-hot labels of the test instances as numpy.ndarray object; ind.dataset_str.ally => the labels for instances in ind.dataset_str.allx as numpy.ndarray object; ind.dataset_str.graph => a dict in the format {index: [index_of_neighbor_nodes]} as collections.defaultdict object; ind.dataset_str.test.index => the indices of test instances in graph, for the inductive setting as list object. All objects above must be saved using python pickle module. :param dataset_str: Dataset name :return: All data input files loaded (as well the training/test data). """ names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph'] objects = [] for i in range(len(names)): with open("data/ind.{}.{}".format(dataset_str, names[i]), 'rb') as f: if sys.version_info > (3, 0): objects.append(pkl.load(f, encoding='latin1')) else: objects.append(pkl.load(f)) x, y, tx, ty, allx, ally, graph = tuple(objects) test_idx_reorder = parse_index_file( "data/ind.{}.test.index".format(dataset_str)) test_idx_range = np.sort(test_idx_reorder) if dataset_str == 'citeseer': # Fix citeseer dataset (there are some isolated nodes in the graph) # Find isolated nodes, add them as zero-vecs into the right position test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder) + 1) tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1])) tx_extended[test_idx_range - min(test_idx_range), :] = tx tx = tx_extended ty_extended = np.zeros((len(test_idx_range_full), y.shape[1])) ty_extended[test_idx_range - min(test_idx_range), :] = ty ty = ty_extended features = sp.vstack((allx, tx)).tolil() features[test_idx_reorder, :] = features[test_idx_range, :] adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph)) labels = np.vstack((ally, ty)) labels[test_idx_reorder, :] = labels[test_idx_range, :] idx_test = test_idx_range.tolist() idx_train = range(len(y)) idx_val = range(len(y), len(y) + 500) train_mask = sample_mask(idx_train, labels.shape[0]) val_mask = sample_mask(idx_val, labels.shape[0]) test_mask = sample_mask(idx_test, labels.shape[0]) y_train = np.zeros(labels.shape) y_val = np.zeros(labels.shape) y_test = np.zeros(labels.shape) y_train[train_mask, :] = labels[train_mask, :] y_val[val_mask, :] = labels[val_mask, :] y_test[test_mask, :] = labels[test_mask, :] return adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask
def main(): """ Plasma PIC simulation """ # Simulation parameters N = 40000 # Number of particles Nx = 400 # Number of mesh cells t = 0 # current time of the simulation tEnd = 50 # time at which simulation ends dt = 1 # timestep boxsize = 50 # periodic domain [0,boxsize] n0 = 1 # electron number density vb = 3 # beam velocity vth = 1 # beam width A = 0.1 # perturbation plotRealTime = True # switch on for plotting as the simulation goes along # Generate Initial Conditions np.random.seed(42) # set the random number generator seed # construct 2 opposite-moving Guassian beams pos = np.random.rand(N,1) * boxsize vel = vth * np.random.randn(N,1) + vb Nh = int(N/2) vel[Nh:] *= -1 # add perturbation vel *= (1 + A*np.sin(2*np.pi*pos/boxsize)) # Construct matrix G to computer Gradient (1st derivative) dx = boxsize/Nx e = np.ones(Nx) diags = np.array([-1,1]) vals = np.vstack((-e,e)) Gmtx = sp.spdiags(vals, diags, Nx, Nx); Gmtx = sp.lil_matrix(Gmtx) Gmtx[0,Nx-1] = -1 Gmtx[Nx-1,0] = 1 Gmtx /= (2*dx) Gmtx = sp.csr_matrix(Gmtx) # Construct matrix L to computer Laplacian (2nd derivative) diags = np.array([-1,0,1]) vals = np.vstack((e,-2*e,e)) Lmtx = sp.spdiags(vals, diags, Nx, Nx); Lmtx = sp.lil_matrix(Lmtx) Lmtx[0,Nx-1] = 1 Lmtx[Nx-1,0] = 1 Lmtx /= dx**2 Lmtx = sp.csr_matrix(Lmtx) # calculate initial gravitational accelerations acc = getAcc( pos, Nx, boxsize, n0, Gmtx, Lmtx ) # number of timesteps Nt = int(np.ceil(tEnd/dt)) # prep figure fig = plt.figure(figsize=(5,4), dpi=80) # Simulation Main Loop for i in range(Nt): # (1/2) kick vel += acc * dt/2.0 # drift (and apply periodic boundary conditions) pos += vel * dt pos = np.mod(pos, boxsize) # update accelerations acc = getAcc( pos, Nx, boxsize, n0, Gmtx, Lmtx ) # (1/2) kick vel += acc * dt/2.0 # update time t += dt # plot in real time - color 1/2 particles blue, other half red if plotRealTime or (i == Nt-1): plt.cla() plt.scatter(pos[0:Nh],vel[0:Nh],s=.4,color='blue', alpha=0.5) plt.scatter(pos[Nh:], vel[Nh:], s=.4,color='red', alpha=0.5) plt.axis([0,boxsize,-6,6]) plt.pause(0.001) # Save figure plt.xlabel('x') plt.ylabel('v') plt.savefig('pic.png',dpi=240) plt.show() return 0
def assembly_quads_mindlin_plate_laminated(nodes, elements, thicknesses, elasticity_matrices, gauss_order=3, kappa=5.0 / 6.0): # type: (array, array, float, float, float, int, float) -> lil_matrix """ Assembly Routine for the Mindlin Plates Analysis :param nodes: A two-dimensional array of plate's nodes coordinates :param elements: A two-dimensional array of plate's triangles (mesh) :param thicknesses: An array of thicknesses that stores thicknesses of each layer :param elasticity_matrices: A list or a sequence of two-dimensional arrays. Each array represents stress-strain relations of corresponded layer :param gauss_order: An order of gaussian quadratures :param kappa: The shear correction factor :return: Global stiffness matrix in the CSR sparse format Order: u_0, v0, u_1, v_1, ..., u_(n-1), v_(n-1); n - nodes count """ from quadrature import legendre_quad from shape_functions import iso_quad from numpy import sum print "The assembly routine is started" freedom = 5 element_nodes = 4 nodes_count = len(nodes) dimension = freedom * nodes_count element_dimension = freedom * element_nodes global_matrix = lil_matrix((dimension, dimension)) elements_count = len(elements) (xi, eta, w) = legendre_quad(gauss_order) h = sum(thicknesses) for element_index in range(elements_count): local = zeros((element_dimension, element_dimension)) element = nodes[elements[element_index, :], :] for i in range(len(w)): (jacobian, shape, shape_dx, shape_dy) = iso_quad(element, xi[i], eta[i]) bm = array([[ shape_dx[0], 0.0, 0.0, 0.0, 0.0, shape_dx[1], 0.0, 0.0, 0.0, 0.0, shape_dx[2], 0.0, 0.0, 0.0, 0.0, shape_dx[3], 0.0, 0.0, 0.0, 0.0 ], [ 0.0, shape_dy[0], 0.0, 0.0, 0.0, 0.0, shape_dy[1], 0.0, 0.0, 0.0, 0.0, shape_dy[2], 0.0, 0.0, 0.0, 0.0, shape_dy[3], 0.0, 0.0, 0.0 ], [ shape_dy[0], shape_dx[0], 0.0, 0.0, 0.0, shape_dy[1], shape_dx[1], 0.0, 0.0, 0.0, shape_dy[2], shape_dx[2], 0.0, 0.0, 0.0, shape_dy[3], shape_dx[3], 0.0, 0.0, 0.0 ]]) bf = array([[ 0.0, 0.0, 0.0, shape_dx[0], 0.0, 0.0, 0.0, 0.0, shape_dx[1], 0.0, 0.0, 0.0, 0.0, shape_dx[2], 0.0, 0.0, 0.0, 0.0, shape_dx[3], 0.0 ], [ 0.0, 0.0, 0.0, 0.0, shape_dy[0], 0.0, 0.0, 0.0, 0.0, shape_dy[1], 0.0, 0.0, 0.0, 0.0, shape_dy[2], 0.0, 0.0, 0.0, 0.0, shape_dy[3] ], [ 0.0, 0.0, 0.0, shape_dy[0], shape_dx[0], 0.0, 0.0, 0.0, shape_dy[1], shape_dx[1], 0.0, 0.0, 0.0, shape_dy[2], shape_dx[2], 0.0, 0.0, 0.0, shape_dy[3], shape_dx[3] ]]) bc = array([[ 0.0, 0.0, shape_dx[0], shape[0], 0.0, 0.0, 0.0, shape_dx[1], shape[1], 0.0, 0.0, 0.0, shape_dx[2], shape[2], 0.0, 0.0, 0.0, shape_dx[3], shape[3], 0.0 ], [ 0.0, 0.0, shape_dy[0], 0.0, shape[0], 0.0, 0.0, shape_dy[1], 0.0, shape[1], 0.0, 0.0, shape_dy[2], 0.0, shape[2], 0.0, 0.0, shape_dy[3], 0.0, shape[3] ]]) z0 = -h / 2.0 for j in range(len(thicknesses)): z1 = z0 + thicknesses[j] df = elasticity_matrices[j] dc = array([[df[2, 2], 0.0], [0.0, df[2, 2]]]) local = local + (z1 - z0) * ( bm.transpose().dot(df).dot(bm)) * jacobian * w[i] local = local + (z1**2.0 - z0**2.0) / 2.0 * ( bm.transpose().dot(df).dot(bf)) * jacobian * w[i] local = local + (z1**2.0 - z0**2.0) / 2.0 * ( bf.transpose().dot(df).dot(bm)) * jacobian * w[i] local = local + (z1**3.0 - z0**3.0) / 3.0 * ( bf.transpose().dot(df).dot(bf)) * jacobian * w[i] local = local + (z1 - z0) * kappa * ( bc.transpose().dot(dc).dot(bc)) * jacobian * w[i] z0 = z1 for i in range(element_dimension): ii = elements[element_index, i / freedom] * freedom + i % freedom for j in range(i, element_dimension): jj = elements[element_index, j / freedom] * freedom + j % freedom global_matrix[ii, jj] += local[i, j] if i != j: global_matrix[jj, ii] = global_matrix[ii, jj] print_progress(element_index, elements_count - 1) print "\nThe assembly routine is completed" return global_matrix.tocsr()
def ridge_regression(X, y, alpha, sample_weight=1.0, solver='auto', tol=1e-3): """Solve the ridge equation by the method of normal equations. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Training data y : array-like, shape = [n_samples] or [n_samples, n_responses] Target values sample_weight : float or numpy array of shape [n_samples] Individual weights for each sample solver : {'auto', 'dense_cholesky', 'sparse_cg'}, optional Solver to use in the computational routines. 'delse_cholesky' will use the standard scipy.linalg.solve function, 'sparse_cg' will use the a conjugate gradient solver as found in scipy.sparse.linalg.cg while 'auto' will chose the most appropiate depending on the matrix X. tol: float Precision of the solution. Returns ------- coef: array, shape = [n_features] or [n_responses, n_features] Weight vector(s). Notes ----- This function won't compute the intercept. """ n_samples, n_features = X.shape is_sparse = False if hasattr(X, 'todense'): # lazy import of scipy.sparse from scipy import sparse is_sparse = sparse.issparse(X) if is_sparse: if n_features > n_samples or \ isinstance(sample_weight, np.ndarray) or \ sample_weight != 1.0: I = sparse.lil_matrix((n_samples, n_samples)) I.setdiag(np.ones(n_samples) * alpha * sample_weight) c = _solve(X * X.T + I, y, solver, tol) coef = X.T * c else: I = sparse.lil_matrix((n_features, n_features)) I.setdiag(np.ones(n_features) * alpha) coef = _solve(X.T * X + I, X.T * y, solver, tol) else: if n_features > n_samples or \ isinstance(sample_weight, np.ndarray) or \ sample_weight != 1.0: # kernel ridge # w = X.T * inv(X X^t + alpha*Id) y A = np.dot(X, X.T) A.flat[::n_samples + 1] += alpha * sample_weight coef = np.dot(X.T, _solve(A, y, solver, tol)) else: # ridge # w = inv(X^t X + alpha*Id) * X.T y A = np.dot(X.T, X) A.flat[::n_features + 1] += alpha coef = _solve(A, np.dot(X.T, y), solver, tol) return coef.T
if __name__ == '__main__': path = argv[1] num_movies = argv[2] user_file = argv[3] #user_file = 'user_ids.csv' if num_movies == 'all': files = os.listdir(path) else: files = os.listdir(path)[:int(num_movies)] write_user_ids(path, files, user_file) users = get_user_dict(user_file) data = lil_matrix((len(users), len(files))) ct = 0 for f in files: if ct % 100 == 0: print ct infile = open('%s/%s' % (path, f), 'r') j = int(infile.readline().strip()[:-1]) - 1 # Move number for line in infile: id, rating, _ = line.split(',') i = users[id] data[i, j] = int(rating) infile.close() ct += 1 mmwrite('ratings_matrix', data)
def process(self): dataset_str = self.dataset_name names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph'] objects = [] for i in range(len(names)): data_name = "ind.{}.{}".format(dataset_str, names[i]) data_path = os.path.join(self.raw_root_path, data_name) with open(data_path, 'rb') as f: if sys.version_info > (3, 0): objects.append(pickle.load(f, encoding='latin1')) else: objects.append(pickle.load(f)) x, y, tx, ty, allx, ally, graph = tuple(objects) with open(os.path.join(self.raw_root_path, "ind.{}.test.index".format(dataset_str)), "r", encoding="utf-8") as f: test_idx_reorder = [int(line.strip()) for line in f] test_idx_range = np.sort(test_idx_reorder) if self.dataset_name == 'citeseer': # Fix citeseer dataset (there are some isolated nodes in the graph) # Find isolated nodes, add them as zero-vecs into the right position test_idx_range_full = list( range(min(test_idx_reorder), max(test_idx_reorder) + 1)) tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1])) tx_extended[test_idx_range - min(test_idx_range), :] = tx tx = tx_extended ty_extended = np.zeros((len(test_idx_range_full), y.shape[1])) ty_extended[test_idx_range - min(test_idx_range), :] = ty ty = ty_extended features = sp.vstack((allx, tx)).tolil() features[test_idx_reorder, :] = features[test_idx_range, :] # adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph)) labels = np.vstack((ally, ty)) labels[test_idx_reorder, :] = labels[test_idx_range, :] test_index = test_idx_range.tolist() if self.task == "semi_supervised": train_index = list(range(len(y))) valid_index = list(range(len(y), len(y) + 500)) else: train_index = range(len(ally) - 500) valid_index = range(len(ally) - 500, len(ally)) x = np.array(features.todense()).astype(np.float32) inv_sum_x = 1.0 / np.sum(x, axis=-1, keepdims=True) inv_sum_x[np.isnan(inv_sum_x)] = 1.0 inv_sum_x[np.isinf(inv_sum_x)] = 1.0 x *= inv_sum_x edge_index = np.array(nx.from_dict_of_lists(graph).edges).T edge_index, _ = remove_self_loop_edge(edge_index) edge_index, _ = convert_edge_to_directed(edge_index) y = np.argmax(labels, axis=-1).astype(np.int32) graph = Graph(x=x, edge_index=edge_index, y=y) return graph, (train_index, valid_index, test_index)
bc_fix[i * ndofV + 1] = True bc_val[i * ndofV + 1] = velocity_y(x[i], y[i]) #end for #end if print("setup: boundary conditions: %.3f s" % (time.time() - start)) ################################################################# # build FE matrix # [ K G ][u]=[f] # [GT 0 ][p] [h] ################################################################# start = time.time() if pnormalise: A_mat = lil_matrix((Nfem + 1, Nfem + 1), dtype=np.float64) # matrix A rhs = np.zeros((Nfem + 1), dtype=np.float64) # right hand side A_mat[Nfem, NfemV:Nfem] = 1 A_mat[NfemV:Nfem, Nfem] = 1 else: A_mat = lil_matrix((Nfem, Nfem), dtype=np.float64) # matrix A rhs = np.zeros(Nfem, dtype=np.float64) # right hand side b_mat = np.zeros((3, ndofV * mV), dtype=np.float64) # gradient matrix B N = np.zeros(mV, dtype=np.float64) # shape functions dNdx = np.zeros(mV, dtype=np.float64) # shape functions derivatives dNdy = np.zeros(mV, dtype=np.float64) # shape functions derivatives dNdr = np.zeros(mV, dtype=np.float64) # shape functions derivatives dNds = np.zeros(mV, dtype=np.float64) # shape functions derivatives u = np.zeros(NV, dtype=np.float64) # x-component velocity v = np.zeros(NV, dtype=np.float64) # y-component velocity
def calc(self, exposures, impact_funcs, hazard, save_mat=False): """Compute impact of an hazard to exposures. Parameters: exposures (Exposures): exposures impact_funcs (ImpactFuncSet): impact functions hazard (Hazard): hazard self_mat (bool): self impact matrix: events x exposures Examples: Use Entity class: >>> haz = Hazard('TC') # Set hazard >>> haz.read_mat(HAZ_DEMO_MAT) >>> haz.check() >>> ent = Entity() # Load entity with default values >>> ent.read_excel(ENT_TEMPLATE_XLS) # Set exposures >>> ent.check() >>> imp = Impact() >>> imp.calc(ent.exposures, ent.impact_funcs, haz) >>> imp.calc_freq_curve().plot() Specify only exposures and impact functions: >>> haz = Hazard('TC') # Set hazard >>> haz.read_mat(HAZ_DEMO_MAT) >>> haz.check() >>> funcs = ImpactFuncSet() >>> funcs.read_excel(ENT_TEMPLATE_XLS) # Set impact functions >>> funcs.check() >>> exp = Exposures(pd.read_excel(ENT_TEMPLATE_XLS)) # Set exposures >>> exp.check() >>> imp = Impact() >>> imp.calc(exp, funcs, haz) >>> imp.aai_agg """ # 1. Assign centroids to each exposure if not done assign_haz = INDICATOR_CENTR + hazard.tag.haz_type if assign_haz not in exposures: exposures.assign_centroids(hazard) else: LOGGER.info('Exposures matching centroids found in %s', assign_haz) # 2. Initialize values self.unit = exposures.value_unit self.event_id = hazard.event_id self.event_name = hazard.event_name self.date = hazard.date self.coord_exp = np.stack( [exposures.latitude.values, exposures.longitude.values], axis=1) self.frequency = hazard.frequency self.at_event = np.zeros(hazard.intensity.shape[0]) self.eai_exp = np.zeros(exposures.value.size) self.tag = { 'exp': exposures.tag, 'if_set': impact_funcs.tag, 'haz': hazard.tag } self.crs = exposures.crs # Select exposures with positive value and assigned centroid exp_idx = np.where(np.logical_and(exposures.value > 0, \ exposures[assign_haz] >= 0))[0] if exp_idx.size == 0: LOGGER.warning("No affected exposures.") num_events = hazard.intensity.shape[0] LOGGER.info('Calculating damage for %s assets (>0) and %s events.', exp_idx.size, num_events) # Get damage functions for this hazard if_haz = INDICATOR_IF + hazard.tag.haz_type haz_imp = impact_funcs.get_func(hazard.tag.haz_type) if if_haz not in exposures and INDICATOR_IF not in exposures: LOGGER.error('Missing exposures impact functions %s.', INDICATOR_IF) raise ValueError if if_haz not in exposures: LOGGER.info('Missing exposures impact functions for hazard %s. ' +\ 'Using impact functions in %s.', if_haz, INDICATOR_IF) if_haz = INDICATOR_IF # Check if deductible and cover should be applied insure_flag = False if ('deductible' in exposures) and ('cover' in exposures) \ and exposures.cover.max(): insure_flag = True if save_mat: self.imp_mat = sparse.lil_matrix( (self.date.size, exposures.value.size)) # 3. Loop over exposures according to their impact function tot_exp = 0 for imp_fun in haz_imp: # get indices of all the exposures with this impact function exp_iimp = np.where( exposures[if_haz].values[exp_idx] == imp_fun.id)[0] tot_exp += exp_iimp.size exp_step = int(CONFIG['global']['max_matrix_size'] / num_events) if not exp_step: LOGGER.error( 'Increase max_matrix_size configuration parameter' ' to > %s', str(num_events)) raise ValueError # separte in chunks chk = -1 for chk in range(int(exp_iimp.size / exp_step)): self._exp_impact( \ exp_idx[exp_iimp[chk*exp_step:(chk+1)*exp_step]],\ exposures, hazard, imp_fun, insure_flag) self._exp_impact(exp_idx[exp_iimp[(chk+1)*exp_step:]],\ exposures, hazard, imp_fun, insure_flag) if not tot_exp: LOGGER.warning('No impact functions match the exposures.') self.aai_agg = sum(self.at_event * hazard.frequency) if save_mat: self.imp_mat = self.imp_mat.tocsr()
all_images = set(all_images) # create a dict that will provide us with the mapping between the image and the # index on distance matrix count = 0 image_index = {} index_image = {} for image in all_images: image_index[image] = count index_image[count] = image count += 1 # create a distance matrix for the images (use sparse matrix instead of dense # to avoid memory issues) n = len(all_images) distance_matrix = lil_matrix((n, n)) for pair in all_pairs: image1, image2 = extract_pairs(pair, phashes_dict) distance = data[pair] if distance == 0: distance = 0.00000000000001 index1 = image_index[image1] index2 = image_index[image2] distance_matrix[index1, index2] = distance distance_matrix[index2, index1] = distance savemat(distance_matrix_file, {'M': distance_matrix.tocsr()}) pickle.dump(index_image, open(index_image_file, 'wb')) print("Done with dumping data...")
def prepare_system_matrices(Ybus, Vbus, bus_idx, pqpv, pq, pv, ref): """ Prepare the system matrices :param Ybus: :param Vbus: :param pqpv: :param ref: :return: """ n_bus = len(Vbus) n_bus2 = 2 * n_bus npv = len(pv) # ################################################################################################################## # Compute the starting voltages # ################################################################################################################## # System matrix A = lil_matrix((n_bus2, n_bus2)) # lil matrices are faster to populate # Expanded slack voltages Vslack = zeros(n_bus2) # Populate A for a in pqpv: # rows for ii in range(Ybus.indptr[a], Ybus.indptr[a + 1]): # columns in sparse format b = Ybus.indices[ii] A[2 * a + 0, 2 * b + 0] = Ybus[a, b].real A[2 * a + 0, 2 * b + 1] = -Ybus[a, b].imag A[2 * a + 1, 2 * b + 0] = Ybus[a, b].imag A[2 * a + 1, 2 * b + 1] = Ybus[a, b].real # set vd elements for a in ref: A[a * 2, a * 2] = 1.0 A[a * 2 + 1, a * 2 + 1] = 1.0 Vslack[a * 2] = Vbus[a].real Vslack[a * 2 + 1] = Vbus[a].imag # Solve starting point voltages Vst_expanded = factorized(A.tocsc())(Vslack) print('Vst_expanded:\n', Vst_expanded) # Invert the voltages obtained: Get the complex voltage and voltage inverse vectors Vst = Vst_expanded[2 * bus_idx] + 1j * Vst_expanded[2 * bus_idx + 1] Wst = 1.0 / Vst # ################################################################################################################## # Compute the final system matrix # ################################################################################################################## # System matrices B = lil_matrix((n_bus2, npv)) C = lil_matrix((npv, n_bus2 + npv)) for i, a in enumerate(pv): # "a" is the actual bus index # "i" is the number of the pv bus in the pv buses list B[2 * a + 0, i + 0] = Wst[a].imag B[2 * a + 1, i + 0] = Wst[a].real C[i + 0, 2 * a + 0] = Vst[a].real C[i + 0, 2 * a + 1] = Vst[a].imag Asys = vstack_s([hstack_s([A, B]), C], format="csc") return Asys, Vst, Wst
def buildKirchhoff(self, coords, cutoff=10., gamma=1., **kwargs): """Build Kirchhoff matrix for given coordinate set. :arg coords: a coordinate set or an object with ``getCoords`` method :type coords: :class:`numpy.ndarray` or :class:`.Atomic` :arg cutoff: cutoff distance (Å) for pairwise interactions default is 10.0 Å, , minimum is 4.0 Å :type cutoff: float :arg gamma: spring constant, default is 1.0 :type gamma: float :arg sparse: elect to use sparse matrices, default is **False**. If Scipy is not found, :class:`ImportError` is raised. :type sparse: bool :arg kdtree: elect to use KDTree for building Kirchhoff matrix faster, default is **True** :type kdtree: bool Instances of :class:`Gamma` classes and custom functions are accepted as *gamma* argument. When Scipy is available, user can select to use sparse matrices for efficient usage of memory at the cost of computation speed.""" try: coords = (coords._getCoords() if hasattr(coords, '_getCoords') else coords.getCoords()) except AttributeError: try: checkCoords(coords) except TypeError: raise TypeError('coords must be a Numpy array or an object ' 'with `getCoords` method') cutoff, g, gamma = checkENMParameters(cutoff, gamma) self._reset() self._cutoff = cutoff self._gamma = g n_atoms = coords.shape[0] start = time.time() sparse = kwargs.get('sparse', False) if sparse: try: from scipy import sparse as scipy_sparse except ImportError: raise ImportError('failed to import scipy.sparse, which is ' 'required for sparse matrix calculations') kirchhoff = scipy_sparse.lil_matrix((n_atoms, n_atoms)) else: kirchhoff = np.zeros((n_atoms, n_atoms), 'd') if kwargs.get('kdtree', True): kdtree = KDTree(coords) kdtree.search(cutoff) dist2 = kdtree.getDistances()**2 r = 0 for i, j in kdtree.getIndices(): g = gamma(dist2[r], i, j) kirchhoff[i, j] = -g kirchhoff[j, i] = -g kirchhoff[i, i] = kirchhoff[i, i] + g kirchhoff[j, j] = kirchhoff[j, j] + g r += 1 else: LOGGER.info('Using slower method for building the Kirchhoff.') cutoff2 = cutoff * cutoff mul = np.multiply for i in range(n_atoms): xyz_i = coords[i, :] i_p1 = i + 1 i2j = coords[i_p1:, :] - xyz_i mul(i2j, i2j, i2j) for j, dist2 in enumerate(i2j.sum(1)): if dist2 > cutoff2: continue j += i_p1 g = gamma(dist2, i, j) kirchhoff[i, j] = -g kirchhoff[j, i] = -g kirchhoff[i, i] = kirchhoff[i, i] + g kirchhoff[j, j] = kirchhoff[j, j] + g if sparse: kirchhoff = kirchhoff.tocsr() LOGGER.debug('Kirchhoff was built in {0:.2f}s.'.format(time.time() - start)) self._kirchhoff = kirchhoff self._n_atoms = n_atoms self._dof = n_atoms
def generateGS(Mesher, Lvl, RestrictDomain=None, ColTol=0.999999): Node, Elem, Supp, Load = Mesher.get() if RestrictDomain == None: RestrictDomain = NoneRestriction #Get element connectivity matrix Nn = max([max(Node) for Node in Elem ]) + 1 # find the largest node to find the number of nodes Ne = len(Elem) # how many elements are there A1 = lil_matrix((Nn, Nn)) # sparse matrix for i in range(0, Ne): A1[ix_(Elem[i], Elem[i])] = 1 #first situation is connections in the element A1 = A1 - identity(Nn) # disconnect from yourself An = A1 # #Level 1 connectivity I, J = An.nonzero( ) # where there is a connection / this is the opposite because matlab is per column Bars = np.column_stack([I, J]) D = np.column_stack([Node[I, 0] - Node[J, 0], Node[I, 1] - Node[J, 1]]) L = (np.sqrt(D[:, 0]**2 + D[:, 1]**2)) D = np.column_stack([D[:, 0].flatten() / L, D[:, 1].flatten() / L]) #Levels 2 and above for i in range(1, Lvl): Aold = An An = (An * A1).astype(bool) Gn = An - Aold Gn.setdiag(0) I, J = np.nonzero(Gn) if len(J) == 0: Lvl = i - 1 print(f'-INFO- No new bars at Level {Lvl}') break RemoveFlag = RestrictDomain(Node, np.column_stack([I, J])) # I = np.delete(I, np.nonzero(RemoveFlag)[0]) J = np.delete(J, np.nonzero(RemoveFlag)[0]) newD = np.column_stack( [Node[I, 0] - Node[J, 0], Node[I, 1] - Node[J, 1]]) L = np.sqrt(newD[:, 0]**2 + newD[:, 1]**2).flatten() newD = np.column_stack( [newD[:, 0].flatten() / L, newD[:, 1].flatten() / L]) # Collinearity Check p = 0 # where the bars come from m = 0 RemoveFlag = np.zeros(np.size(I)) Nb = np.size(Bars, 0) RemoveFlag = selectRemoveFlag(RemoveFlag, I, Bars, Nn, Nb, ColTol, D, newD) #change due to the fact that python starts with 0 '''Remove collinear bars and make sym[D[:,0].flatten()/L,D[:,1].flatten()/L]metric again. Bars that have one angle marked as collinear but the other not, will be spared ''' ind, = np.nonzero(RemoveFlag == 0) H = csr_matrix((np.ones(np.size(ind)), (I[ind], J[ind])), shape=(Nn, Nn)) I, J = np.nonzero( H + H.T ) # guarantees symmetry and eliminates the situation of the node being eliminated in q and not in p print( f'Lvl {i} - Collinear bars removed: {(len(RemoveFlag)-len(I))/2}') Bars = np.concatenate((Bars, np.column_stack([I, J])), axis=0) Bars = Bars[Bars[:, 0].argsort()] # effectively adds the new bars D = np.column_stack([ Node[Bars[:, 0], 0] - Node[Bars[:, 1], 0], Node[Bars[:, 0], 1] - Node[Bars[:, 1], 1] ]) #directional unit vector L = np.sqrt(D[:, 0]**2 + D[:, 1]**2) # D = np.column_stack([D[:, 0].flatten() / L, D[:, 1].flatten() / L]) A = csr_matrix( (np.ones(np.size(Bars, 0)), (Bars[:, 0], Bars[:, 1])), shape=(Nn, Nn)) # ends, but still needs to remove repeated bars I, J = tril(A).nonzero() # for this use only the upper triangle Bars = np.column_stack([I, J]) return Bars
else: print('erro') import pdb pdb.set_trace() wirebasket_numbers_nv1 = [ len(ids_nv1_internos), len(ids_nv1_faces), len(ids_nv1_arestas), len(ids_nv1_vertices) ] elems_wirebasket_nv1 = ids_nv1_internos + ids_nv1_faces + ids_nv1_arestas + ids_nv1_vertices elems_wirebasket_nv1_sep = [ ids_nv1_internos, ids_nv1_faces, ids_nv1_arestas, ids_nv1_vertices ] G_nv1 = lil_matrix((len(all_ids_nv1), len(all_ids_nv1))) G_nv1[all_ids_nv1, elems_wirebasket_nv1] = np.ones(len(all_ids_nv1)) #### nivel 1 ids_wirebasket = M1.mb.tag_get_data(M1.ID_reordenado_tag, elems_wirebasket, flat=True) map_global = dict(zip(elems_wirebasket, ids_wirebasket)) faces_boundary = M1.mb.tag_get_data(get_tag('FACES_BOUNDARY'), 0, flat=True)[0] faces_boundary = M1.mb.get_entities_by_handle(faces_boundary) T, b = oth.fine_transmissibility_structured(M1.mb, M1.mtu, map_global, faces_in=rng.subtract( M1.all_faces, faces_boundary))
def white(reg): """ Calculates the White test to check for heteroscedasticity. Parameters ---------- reg : regression object output instance from a regression model Returns ------- white_result : dictionary contains the statistic (white), degrees of freedom (df) and the associated p-value (pvalue) for the White test. white : float scalar value for the White test statistic. df : integer degrees of freedom associated with the test pvalue : float p-value associated with the statistic (chi^2 distributed with k df) Note ---- x attribute in the reg object must have a constant term included. This is standard for spreg.OLS so no testing done to confirm constant. References ---------- .. [1] H. White. 1980. A heteroscedasticity-consistent covariance matrix estimator and a direct test for heteroskdasticity. Econometrica. 48(4) 817-838. Examples -------- >>> import numpy as np >>> import pysal >>> import diagnostics >>> from ols import OLS Read the DBF associated with the Columbus data. >>> db = pysal.open(pysal.examples.get_path("columbus.dbf"),"r") Create the dependent variable vector. >>> y = np.array(db.by_col("CRIME")) >>> y = np.reshape(y, (49,1)) Create the matrix of independent variables. >>> X = [] >>> X.append(db.by_col("INC")) >>> X.append(db.by_col("HOVAL")) >>> X = np.array(X).T Run an OLS regression. >>> reg = OLS(y,X) Calculate the White test for heteroscedasticity. >>> testresult = diagnostics.white(reg) Print the degrees of freedom for the test. >>> testresult['df'] 5 Print the test statistic. >>> print("%12.12f"%testresult['wh']) 19.946008239903 Print the associated p-value. >>> print("%12.12f"%testresult['pvalue']) 0.001279222817 """ e = reg.u**2 k = reg.k n = reg.n y = reg.y X = reg.x #constant = constant_check(X) # Check for constant, if none add one, see Greene 2003, pg. 222 #if constant == False: # X = np.hstack((np.ones((n,1)),X)) # Check for multicollinearity in the X matrix ci = condition_index(reg) if ci > 30: white_result = "Not computed due to multicollinearity." return white_result # Compute cross-products and squares of the regression variables if type(X).__name__ == 'ndarray': A = np.zeros((n, (k*(k+1))/2.)) elif type(X).__name__ == 'csc_matrix' or type(X).__name__ == 'csr_matrix': # this is probably inefficient A = SP.lil_matrix((n, (k*(k+1))/2.)) else: raise Exception, "unknown X type, %s" %type(X).__name__ counter = 0 for i in range(k): for j in range(i,k): v = spmultiply(X[:,i], X[:,j], False) A[:,counter] = v counter += 1 # Append the original variables A = sphstack(X,A) # note: this also converts a LIL to CSR n,k = A.shape # Check to identify any duplicate or constant columns in A omitcolumn = [] for i in range(k): current = A[:,i] # remove all constant terms (will add a constant back later) if spmax(current) == spmin(current): omitcolumn.append(i) pass # do not allow duplicates for j in range(k): check = A[:,j] if i < j: test = abs(current - check).sum() if test == 0: omitcolumn.append(j) uniqueomit = set(omitcolumn) omitcolumn = list(uniqueomit) # Now the identified columns must be removed if type(A).__name__ == 'ndarray': A = np.delete(A,omitcolumn,1) elif type(A).__name__ == 'csc_matrix' or type(A).__name__ == 'csr_matrix': # this is probably inefficient keepcolumn = range(k) for i in omitcolumn: keepcolumn.remove(i) A = A[:,keepcolumn] else: raise Exception, "unknown A type, %s" %type(X).__name__ A = sphstack(np.ones((A.shape[0],1)), A) # add a constant back in n,k = A.shape # Conduct the auxiliary regression and calculate the statistic import ols as OLS aux_reg = OLS.BaseOLS(e,A) aux_r2 = r2(aux_reg) wh = aux_r2*n df = k-1 pvalue = stats.chisqprob(wh,df) white_result={'df':df,'wh':wh, 'pvalue':pvalue} return white_result
def graph_setup(self,n,r,p,seed=None): """ Creates the graph to use for poisson learning. Parameters ---------- n : int The number of vertices to sample for the graph. r : float Radius for graph construction. p : float Weight matrix parameter. seed : int, default is None Optional seed for random number generator. Returns ------- poisson_W_matrix : (n,n) scipy.sparse.lil_matrix Weight matrix describing similarities of normal vectors. poisson_J_matrix : (num_verts,n) scipy.sparse.lil_matrix Matrix with indices of nearest neighbors. poisson_node_idx : (num_verts,1) int array The indices of the closest point in the sample. """ rng = ( np.random.default_rng(seed=seed) if seed is not None else np.random.default_rng() ) if self.poisson_W_matrix is None or self.poisson_J_matrix is None or self.poisson_node_idx is None: v = self.vertex_normals() N = self.num_verts() #Random subsample ss_idx = np.matrix(rng.choice(self.points.shape[0],n,replace=False)) y = np.squeeze(self.points[ss_idx,:]) w = np.squeeze(v[ss_idx,:]) xTree = spatial.cKDTree(self.points) nn_idx = xTree.query_ball_point(y, r) yTree = spatial.cKDTree(y) nodes_idx = yTree.query_ball_point(y, r) bn = np.zeros((n,3)) J = sparse.lil_matrix((N,n)) for i in range(n): vj = v[nn_idx[i],:] normal_diff = w[i] - vj weights = np.exp(-8 * np.sum(np.square(normal_diff),1,keepdims=True)) bn[i] = np.sum(weights*vj,0) / np.sum(weights,0) #Set ith row of J normal_diff = bn[i]- vj weights = np.exp(-8 * np.sum(np.square(normal_diff),1))#,keepdims=True)) J[nn_idx[i],i] = weights #Normalize rows of J RSM = sparse.spdiags((1 / np.sum(J,1)).ravel(),0,N,N) J = RSM @ J #Compute weight matrix W W = sparse.lil_matrix((n,n)) for i in range(n): nj = bn[nodes_idx[i]] normal_diff = bn[i] - nj weights = np.exp(-32 * ((np.sqrt(np.sum(np.square(normal_diff),1)))/2)**p) W[i,nodes_idx[i]] = weights #Find nearest node to each vertex nbrs = NearestNeighbors(n_neighbors=1, algorithm='ball_tree').fit(y) instances, node_idx = nbrs.kneighbors(self.points) self.poisson_W_matrix = W self.poisson_J_matrix = J self.poisson_node_idx = node_idx return self.poisson_W_matrix, self.poisson_J_matrix, self.poisson_node_idx
def iterative_profiler(X, G, train_ids, test_ids, id_index, label_slices, preserve_coef=0.9, iterations=10, alpha=10, c=0, node_order='random', keep_topK=10, edgexplain__scaler=False): if edgexplain__scaler: print 'edgexplain is so on!' else: print 'edgexplain is so off!' print 'alpha', alpha, 'preserver', preserve_coef, 'c', c, 'topK', keep_topK, 'edgexplain', edgexplain__scaler converged = False iter_num = 0 ids = train_ids + test_ids train_ids_set = set(train_ids) logging.info("iterating with max_iter = " + str(iterations)) X = X.tolil() while not converged: if node_order == 'random': random.shuffle(ids) logging.info("iter: " + str(iter_num)) for node in ids: node_index = id_index[node] neighbors_labeldist = lil_matrix((1, X.shape[1])) nbrs = [nbr for nbr in G[node]] ################ nbr_indices = [id_index[n] for n in nbrs] #all neighbours nbrlabeldists = X[nbr_indices] #diagonal matrix for edge weight of each neighbor weights = lil_matrix( (nbrlabeldists.shape[0], nbrlabeldists.shape[0])) if edgexplain__scaler: #edge weights are scaled according to EdgExplain scalar scales = nbrlabeldists.tocsr().dot( X[node_index].tocsr().transpose(copy=True)) if iter_num == -1: pdb.set_trace() scales = expit(-alpha * scales.toarray() - c) weights.setdiag(scales) else: #all edge weights are 1 weights.setdiag(np.ones(nbrlabeldists.shape[0])) neighbors_labeldist = weights.dot(nbrlabeldists) neighbors_labeldist = lil_matrix( neighbors_labeldist.sum(axis=0) / weights.sum()) if node in train_ids_set: new_labeldist = ( preserve_coef * X[node_index] + (1 - preserve_coef) * neighbors_labeldist).tolil() else: new_labeldist = neighbors_labeldist new_labeldist_normalized = None for label_slice in label_slices: start_index, end_index = label_slice slice = new_labeldist[0, start_index:end_index] if keep_topK > 0: if keep_topK < 1: keep_topK = min(1, int(keep_topK * slice.shape[1])) sorted_indices = np.argsort(slice.toarray()) #topK_indices = sorted_indices[0, -keep_topK:] zero_indices = sorted_indices[0, 0:-keep_topK] slice[0, zero_indices] = 0 slice = normalize(slice, norm='l1', axis=1, copy=False) if str(type( new_labeldist_normalized)) == '<type \'NoneType\'>': new_labeldist_normalized = slice else: new_labeldist_normalized = sp.hstack( [new_labeldist_normalized, slice]) new_labeldist = new_labeldist_normalized X[node_index] = new_labeldist.tolil() iter_num += 1 if iter_num == iterations: converged = True X = X.tocsr() return X
import numpy as np from scipy import sparse from numpy.testing import (assert_array_almost_equal, assert_array_equal, assert_equal) from sklearn import datasets, svm, linear_model, base from sklearn.datasets import make_classification, load_digits, make_blobs from sklearn.svm.tests import test_svm from sklearn.utils import ConvergenceWarning from sklearn.utils.extmath import safe_sparse_dot from sklearn.utils.testing import assert_warns, assert_raise_message # test sample 1 X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]) X_sp = sparse.lil_matrix(X) Y = [1, 1, 1, 2, 2, 2] T = np.array([[-1, -1], [2, 2], [3, 2]]) true_result = [1, 2, 2] # test sample 2 X2 = np.array([[0, 0, 0], [1, 1, 1], [2, 0, 0, ], [0, 0, 2], [3, 3, 3]]) X2_sp = sparse.dok_matrix(X2) Y2 = [1, 2, 2, 2, 3] T2 = np.array([[-1, -1, -1], [1, 1, 1], [2, 2, 2]]) true_result2 = [1, 2, 3] iris = datasets.load_iris() # permute
#------------------------------ENCODING------------------------------------- #seqList = ['abcdab', 'abcdef'] def findIndexOfFeature(subStr, startIndexOfCurSize=0): for index in xrange(startIndexOfCurSize, len(listSubString)): if listSubString[index] == subStr: return index return -1 sizeOfSubstr = (2, 3, 4) numOfSeq = len(seqList) listSubString = [] #list of substrings or list of feature #encodingMat= np.zeros((0,numOfSeq), dtype = 'uint8') #dòng là feature(substr), cột là seq encodingMat = sparse.lil_matrix((0, numOfSeq), dtype='uint8') for sizeOfSub in sizeOfSubstr: #số lượng kí tự của substr curNumOfFeature = len(listSubString) startIndexOfCurSize = curNumOfFeature for indexOfCurSeq in xrange(0, numOfSeq): #duyệt từng sequence seq = seqList[indexOfCurSeq] sizeOfSeq = len(seq) for index in xrange(0, sizeOfSeq - sizeOfSub + 1): # duyệt substr curSubStr = seq[index:index + sizeOfSub] #chỉ tìm trong các sub string có cùng số lượng kí tự nên có startIndexOfCurSize foundIndexOfFeature = findIndexOfFeature(curSubStr, startIndexOfCurSize) if foundIndexOfFeature != -1:
mesh = cfm.GmshMeshGenerator(g) mesh.el_size_factor = el_size_factor mesh.el_type = el_type mesh.dofs_per_node = dofs_per_node # Mesh the geometry: # The first four return values are the same as those that trimesh2d() returns. # value elementmarkers is a list of markers, and is used for finding the # marker of a given element (index). coords, edof, dofs, bdofs, elementmarkers = mesh.create() # ---- Solve problem -------------------------------------------------------- nDofs = np.size(dofs) K = lil_matrix((nDofs, nDofs)) ex, ey = cfc.coordxtr(edof, coords, dofs) cfu.info("Assembling K... (" + str(nDofs) + ")") for eltopo, elx, ely, elMarker in zip(edof, ex, ey, elementmarkers): if el_type == 2: Ke = cfc.plante(elx, ely, elprop[elMarker][0], elprop[elMarker][1]) else: Ke = cfc.planqe(elx, ely, elprop[elMarker][0], elprop[elMarker][1]) cfc.assem(eltopo, K, Ke) cfu.info("Applying bc and loads...")