def setMs(self, nSensors=10): '''Creates an n-grid mesh across the surface for the 3D case ''' self.nSen = nSensors*nSensors '''First find the appropriate 10 indexes within the PML & illumination region ''' indx = np.round(np.linspace(self.npml+5,self.nx-self.npml-5, nSensors)).astype(int)-1; indx = np.unique(indx) # print (indx + 1) ''' make the exact X operator using strides ''' xl,zl = np.meshgrid(indx+1,indx) Mx = sparse.dok_matrix((self.nSen,(self.nx+1)*self.ny*self.nz)) for ix,loc in enumerate(zip(xl.flatten(),zl.flatten())): pts = loc[0]*self.ny*self.nz + self.div*self.nz + loc[1] Mx[ix,pts] = 1.0 xl,zl = np.meshgrid(indx,indx) My = sparse.dok_matrix((self.nSen,self.nx*(self.ny+1)*self.nz)) for ix,loc in enumerate(zip(xl.flatten(),zl.flatten())): pts = loc[0]*(self.ny+1)*self.nz + (self.div+1)*self.nz + loc[1] My[ix,pts] = 1.0 '''make the exact Z operator using strides ''' xl,zl = np.meshgrid(indx,indx+1) Mz = sparse.dok_matrix((self.nSen,self.nx*self.ny*(self.nz+1))) for ix,loc in enumerate(zip(xl.flatten(),zl.flatten())): pts = loc[0]*self.ny*(self.nz+1) + self.div*(self.nz+1) + loc[1] Mz[ix,pts] = 1.0 ''' smush together in block diagonal format ''' self.Ms = sparse.block_diag((Mx,My,Mz),'csr') self.nSen = 3*self.nSen
def build_sparse_matrix(list_of_dicts, vector_length, orient='columns', verbose=False): """ Function for building sparse matrix from list of dicts :param list_of_dicts: list of dictionaries representing sparse vectors :param vector_length: number of values in dense representation of sparse vector :param orient: build matrix by rows or columns - default is columns :return: sparse matrix """ if orient == 'columns': columns = len(list_of_dicts) matrix = dok_matrix((vector_length, columns)) for column, vector in enumerate(list_of_dicts): if verbose: print("Building matrix {:0.2%}".format(column / columns), end='\r') for term in vector.keys(): matrix[int(term), column] = vector[term] elif orient == 'rows': rows = len(list_of_dicts) matrix = dok_matrix(shape=(rows, vector_length)) for row, vector in enumerate(list_of_dicts): if verbose: print("Building matrix {:0.2%}".format(row / rows), end='\r') for term in vector.keys(): matrix[row, term] = vector[term] else: raise ValueError('Orient must be either \'columns\' or \'rows\'') print("Matrix complete. ") return csc_matrix(matrix)
def get_Heisenberg_H(Sx,Sy,Sz,b,n): '''Build the Hamiltonian for the N-particle system.''' D = Sx.get_shape()[0] # Dimensions of the spin matrices. Sz_sum = dok_matrix((D**N,D**N)) # Contribution from the B field. Sx_int_sum = dok_matrix((D**N,D**N)) # Contribution from interactions. Sy_int_sum = dok_matrix((D**N,D**N)) Sz_int_sum = dok_matrix((D**N,D**N)) S_int_sum = [[Sx_int_sum,Sy_int_sum,Sz_int_sum],[Sx,Sy,Sz]] for k in range(N): # Compute the B-field contribution in the Hamiltonian. Sz_sum += get_full_matrix(Sz,k,N) # Compute the S-S interaction dot product in the Hamiltonian. for i in range(3): # For x, y and z orientations. i_k = get_full_matrix(S_int_sum[1][i],k,N) i_k_1 = get_full_matrix(S_int_sum[1][i],k+1,N) S_int_sum[0][i] += i_k*i_k_1 S_dot_S_sum = Sx_int_sum + Sy_int_sum + Sz_int_sum # Interaction Hamiltonian. H = -b/2 * Sz_sum - S_dot_S_sum return H
def read_given_train_test(self, train_file, test_file): """ read given data set """ users, items = set(), set() ratings = list() with codecs.open(train_file, mode="r", encoding="utf-8") as read_file: for line in read_file: user_item_rating = re.split('\t|,|::', line.strip()) user_id = int(user_item_rating[0]) item_id = int(user_item_rating[1]) rating = int(user_item_rating[2]) users.add(user_id) items.add(item_id) ratings.append((user_id, item_id, rating)) # Convert user_num, item_num = len(users), len(items) users_dict = {user_id: index for index, user_id in enumerate(list(users))} items_dict = {item_id: index for index, item_id in enumerate(list(items))} train_matrix = dok_matrix((user_num, item_num)) test_matrix = dok_matrix((user_num, item_num)) for user_id, item_id, rating in ratings: train_matrix[users_dict[user_id], items_dict[item_id]] = rating with codecs.open(test_file, mode='r', encoding='utf-8') as read_file: for line in read_file: user_item_rating = re.split('\t|,|::', line.strip()) user_id = int(user_item_rating[0]) item_id = int(user_item_rating[1]) rating = int(user_item_rating[2]) test_matrix[users_dict[user_id], items_dict[item_id]] = rating return train_matrix, test_matrix
def write_cv_data(K, data_dir, idx, W, L, T, D, N, phi, alpha, beta, chains): B = generate_lda(T, W, D, N, phi, alpha) # split cv data B_sparse = csr_matrix(B) Bs = [dok_matrix((D, W), dtype=np.float32) for k in range(K)] test_counts = [dok_matrix((D, W), dtype=np.float32) for k in range(K)] for d in range(B_sparse.shape[0]): crow = B_sparse[d,:].tocoo() list_of_tokens = [] for term_idx,count in itertools.izip(crow.col, crow.data): list_of_tokens += [term_idx]*count list_of_tokens = list(np.random.permutation(np.array(list_of_tokens))) kf = KFold(len(list_of_tokens), n_folds=K) for k,(train, test) in enumerate(kf): l = [list_of_tokens[i] for i in train] dict_of_counts = collections.Counter(l) for w,count in dict_of_counts.iteritems(): Bs[k][d,w] = count l = [list_of_tokens[i] for i in test] dict_of_counts = collections.Counter(l) for w,count in dict_of_counts.iteritems(): test_counts[k][d,w] = count Bs = [csr_matrix(i) for i in Bs] test_counts = [csr_matrix(i) for i in test_counts] for i,counts in enumerate(test_counts): pickle.dump(counts, open(os.path.join(data_dir, 'counts_{}.pkl'.format(i)), 'w')) write_pb_cv(data_dir, idx, W, T, D, alpha, beta, Bs, write_params=False) write_stan_cv(data_dir, idx, W, T, D, alpha, beta, Bs, chains=chains, write_params=False) write_prism_cv(data_dir, idx, W, T, D, alpha, beta, Bs, write_params=False) write_txt_cv(data_dir, idx, Bs, T, alpha, beta, write_params=False)
def getMatrix(self, date_time0, date_time1, filterBy=(), groupBy=''): looks = [l for l in self.looks if date_time0 <= l.date_time < date_time1] for l in looks: if l.interaction ==1 and l.observed._id in ['SIN', 'MARCADOR', 'FOTO']: l.interaction = 0 if len(filterBy) > 0: attribute = filterBy[0] values = filterBy[1] exec 'looks = [l for l in looks if l.' + attribute + ' in values]' m = sp.dok_matrix((37,38)) t = sp.dok_matrix((37,38)) d = sp.dok_matrix((37,38)) studentIds = [p._id for p in self.people if self.id2int(p._id) < 37] for l in looks: i = min([ self.id2int(l.observer._id), 37 ]) if l.interaction == 1: j = min([ self.id2int(l.observed._id), 37 ]) m[(i,j)] += 1 presentStudents = [self.id2int(sId) for sId in studentIds if self.isPresent(sId, l.date_time)] presentStudents.append(37) for k in presentStudents: t[(i,k)] += 1 if l.interaction == 1 and not j in presentStudents: print 'error: ' + l.observer._id + ' mira a ' + l.observed._id + ' pero no esta presente en ' + str(l.date_time) #d = self.divide(m, t) d = (m.todense()/t.todense())*100 if groupBy == 'gender': males = [self.id2int(s._id) for s in self.people if 0 < self.id2int(s._id) < 37 and s.gender == '1'] females = [self.id2int(s._id) for s in self.people if self.id2int(s._id) < 37 and s.gender == '2'] m = np.c_[m[:,0].todense(), m[:,males].sum(1), m[:,females].sum(1), m[:,37].todense()] t = np.c_[t[:,0].todense(), t[:,37].todense(), t[:,37].todense(), t[:,37].todense()] d = (m/t)*100 return d, m, t
def test_load_branches_medium(case14): demand_dict, root, _ = load_buses(case14) e2i, _, _ = renumber_buses(demand_dict, root) n = len(e2i) Ghat = dok_matrix((n, n)) Bhat = dok_matrix((n, n)) s_dict = {(0, 1): (499.9131600798035-1526.3086523179554j), (0, 4): (102.58974549701888-423.4983682334831j), (1, 2): (113.50191923073959-478.1863151757718j), (3, 4): (684.0980661495671-2157.855398169159j), (3, 6): -478.1943381790359j, (4, 5): -396.79390524561546j, (5, 10): (195.50285631772607-409.4074344240442j), (5, 11): (152.59674404509738-317.5963965029401j), (5, 12): (309.89274038379875-610.2755448193116j), (6, 7): -567.6979846721543j, (6, 8): -909.0082719752751j, (8, 9): (390.2049552447428-1036.5394127060915j), (8, 13): (142.4005487019931-302.90504569306034j)} for (i, j), y in s_dict.iteritems(): Ghat[i, j] = y.real Ghat[j, i] = y.real Bhat[i, j] = y.imag Bhat[j, i] = y.imag branch_list_hat = list(sorted(s_dict.keys())) branch_map_hat = {} for i, (fbus, tbus) in enumerate(branch_list_hat): branch_map_hat[(fbus, tbus)] = i branch_map_hat[(tbus, fbus)] = i G, B, branch_list, branch_map = load_branches(case14, e2i) assert_almost_equal(G.todense(), Ghat.todense()) assert_almost_equal(B.todense(), Bhat.todense()) assert branch_list == branch_list_hat assert branch_map == branch_map_hat
def __init__(self, nFeat, nAction, epLen, epsilon=0.0, sigma=1.0, lam=1.0, maxHist=5000): self.nFeat = nFeat self.nAction = nAction self.epLen = epLen self.epsilon = epsilon self.sigma = sigma self.maxHist = maxHist self.isRLSVI = ( epsilon == 0.0 ) # sample from belief only if epsilon is 0. Setting this here allows epsilon to be changed later (to stop LSVI from exploring further) # Make the computation structures self.covs = [] self.thetaMeans = [] self.thetaSamps = [] self.memory = [] for i in range(epLen + 1): self.covs.append(sp.identity(nFeat) / float(lam)) self.thetaMeans.append(sp.dok_matrix((nFeat, 1))) self.thetaSamps.append(sp.dok_matrix((nFeat, 1))) self.memory.append( { "oldFeat": sp.dok_matrix((maxHist, nFeat)), "rewards": sp.dok_matrix((maxHist, 1)), "newFeat": {j: sp.dok_matrix((nAction, nFeat)) for j in range(maxHist)}, } )
def get_ising_XY_H(Sx,Sy,Sz,b,N): ''' Build the Hamiltonian for the N-particle system using the XY Ising model. ''' D = Sx.get_shape()[0] Sx_sum = dok_matrix((D**N,D**N)) Sy_sum = dok_matrix((D**N,D**N)) Sz_sum = dok_matrix((D**N,D**N)) Sz_k_sum = dok_matrix((D**N,D**N)) for k in range(N): Sz_sum += get_full_matrix(Sz,k,N) if k == 0: Sx_k = get_full_matrix(Sx,k,N) else: Sx_k = Sx_k_1.copy() Sx_k_1 = get_full_matrix(Sx,k+1,N) Sx_sum += Sx_k.dot(Sx_k_1) del Sx_k if k == 0: Sy_k = get_full_matrix(Sy,k,N) else: Sy_k = Sy_k_1.copy() Sy_k_1 = get_full_matrix(Sy,k+1,N) Sy_sum += Sy_k.dot(Sy_k_1) del Sy_k del Sx_k_1,Sy_k_1 H = -b/2 * Sz_sum - (Sx_sum + Sy_sum) return H
def __init__(self, programEntities, sim=ssd.correlation): cleaner = DataCleaner() nusers = len(programEntities.userIndex.keys()) fin = open("../Data/users.csv", 'rb') colnames = fin.readline().strip().split(",") self.userMatrix = ss.dok_matrix((nusers, len(colnames) - 1)) for line in fin: cols = line.strip().split(",") # consider the user only if he exists in train.csv if programEntities.userIndex.has_key(cols[0]): i = programEntities.userIndex[cols[0]] self.userMatrix[i, 0] = cleaner.getLocaleId(cols[1]) self.userMatrix[i, 1] = cleaner.getBirthYearInt(cols[2]) self.userMatrix[i, 2] = cleaner.getGenderId(cols[3]) self.userMatrix[i, 3] = cleaner.getJoinedYearMonth(cols[4]) self.userMatrix[i, 4] = cleaner.getCountryId(cols[5]) self.userMatrix[i, 5] = cleaner.getTimezoneInt(cols[6]) fin.close() # normalize the user matrix self.userMatrix = normalize(self.userMatrix, norm="l1", axis=0, copy=False) sio.mmwrite("../Models/US_userMatrix", self.userMatrix) # calculate the user similarity matrix and save it for later self.userSimMatrix = ss.dok_matrix((nusers, nusers)) for i in range(0, nusers): self.userSimMatrix[i, i] = 1.0 for u1, u2 in programEntities.uniqueUserPairs: i = programEntities.userIndex[u1] j = programEntities.userIndex[u2] if not self.userSimMatrix.has_key((i, j)): usim = sim(self.userMatrix.getrow(i).todense(), self.userMatrix.getrow(j).todense()) self.userSimMatrix[i, j] = usim self.userSimMatrix[j, i] = usim sio.mmwrite("../Models/US_userSimMatrix", self.userSimMatrix)
def Pi(terms,tfidf,w=None,eps=0.15,max_iter=100,tol=1e-08): """ terms は語のリスト tfidf は語とtfidf値の辞書のリスト """ # dok_matrixをつくる n = len(terms) # 語の種類 m = len(tfidf) # 文書の数 # 文書から単語への遷移行列 A = dok_matrix((m,n)) for i,d in enumerate(tfidf): s = sum(d.values()) # sum of tfidf for each document for k,v in d.iteritems(): j = terms.index(k) A[i,j] = v / s # 単語から文書への遷移行列 B = dok_matrix((n,m)) s = sum(A) # sum of col stop = terms.index(w) if w else -1 # 排除する語 for (i,j),v in A.iteritems(): if j == stop: continue B[j,i] = v / s[0,j] # 文書から文書への遷移行列 C = A * B # ランダムウォークの定常状態を求める x = one = SP.identity(m) for i in range(max_iter): x_ = eps * one + (1.0 - eps) * x * C if LA.norm(x_.todense() - x.todense()) < tol: break x = x_ return x_
def read_counts_matrix(counts_path): """ Reads the counts into a sparse matrix (CSR) from the count-word-context textual format. """ words = load_count_vocabulary(counts_path + '.words.vocab') contexts = load_count_vocabulary(counts_path + '.contexts.vocab') words = list(words.keys()) contexts = list(contexts.keys()) iw = sorted(words) ic = sorted(contexts) wi = dict([(w, i) for i, w in enumerate(iw)]) ci = dict([(c, i) for i, c in enumerate(ic)]) counts = csr_matrix((len(wi), len(ci)), dtype=np.float32) tmp_counts = dok_matrix((len(wi), len(ci)), dtype=np.float32) update_threshold = 100000 i = 0 with open(counts_path) as f: for line in f: count, word, context = line.strip().split() if word in wi and context in ci: tmp_counts[wi[word], ci[context]] = int(count) i += 1 if i == update_threshold: counts = counts + tmp_counts.tocsr() tmp_counts = dok_matrix((len(wi), len(ci)), dtype=np.float32) i = 0 counts = counts + tmp_counts.tocsr() return counts, iw, ic
def out_degree_fraction(cover, weights=None, allow_nan = False): ''' Out Degree Fraction (ODF) of a node in a cluster is the ratio between its number of external (boundary) edges and its internal edges. ''' w_attr, remove = __get_weight_attr(cover.graph, 'out_degree_fraction', weights) mode = "nan" if allow_nan else 0 #do this outside the loop because it is computationally expensive membership = cover.membership external_edges = cover.external_edges() degree_per_node = cover.graph.strength(weights=w_attr) # Intialize return value rv = dok_matrix((cover.graph.vcount(), len(cover))) # Rows = Vertex, cols = Cover for i in range(len(cover)): ext_edge_per_node = dok_matrix((cover.graph.vcount(), 1)) for edge in external_edges[i]: node_index = edge.source if i in membership[edge.source] else edge.target ext_edge_per_node[node_index, 0] += 1.0 if weights is None else edge[w_attr] for (node, always_zero), ext_edges_for_this_node in ext_edge_per_node.items(): rv[node, i] += ext_edges_for_this_node/float(degree_per_node[node]) if degree_per_node[node] != 0 else float(mode) __remove_weight_attr(cover.graph, w_attr, remove) return rv
def get_exp_val(S,psi): ''' Find the expected values for an observable with a given state. S must be a sparse matrix. ''' psi = np.matrix(psi) exp_value = dok_matrix(psi.conj()).dot(S.dot(dok_matrix(psi.T))) return exp_value[0,0]
def evaluate(self,testdataset,pipp_radius=None,pipp_confid=[0.95]): """evaluate the instances and return a list of probability intervals with the given parameters :param pipp_radius: overcome default radius built during learning :type pipp_radius: float :param pipp_confid: set of confidence values used to predict rankings :type pipp_confid: list of floats :returns: for each value of pipp_confid, retuning voting scores :rtype: lists of :class:`~classifip.representations.voting.Scores` """ if pipp_radius != None: self.radius=pipp_radius dataset=np.array(testdataset).astype(float) answers=[] if self.normal[0] == True: dataset=(dataset-self.normal[2])/self.normal[1] #build matrix of majority opinions majority=dok_matrix((len(self.labels),len(self.labels))) for i in self.truerankings: majority=majority+i for k in range(len(self.labels)): for l in range(k)+range(k+1,len(self.labels)): if majority[k,l] > majority[l,k]: majority[k,l]=1. majority[l,k]=0. elif majority[k,l] < majority[l,k]: majority[l,k]=1. majority[k,l]=0. else: majority[l,k]=1. majority[k,l]=1. for i in dataset: #add every neighbours in the given radius result=dok_matrix((len(self.labels),len(self.labels))) if self.tree.query_ball_point(i,self.radius) !=[]: for ind in self.tree.query_ball_point(i,self.radius): result=result+self.truerankings[ind] #if no neighbour in radius, take the closest one else: result=result+self.truerankings[self.tree.query(i)[1]] #compute the final scores from the sample matrix for each conf values score_val=np.zeros((len(self.labels),2)) for k in range(len(self.labels)): for l in range(k)+range(k+1,len(self.labels)): #if no samples for a given comparison, simply use majority if result[k,l]+result[l,k] > 0.: score_val[k,:]+=get_binomial_int(result[k,l] +result[l,k], result[k,l],pipp_confid) else: score_val[k,:]+=get_binomial_int(majority[k,l] +majority[l,k], majority[k,l],pipp_confid) answers.append(Scores(score_val)) return answers
def test_increment_edge_ages(self): self.soinn.adjacent_mat[0, 1:3] = 1 self.soinn.adjacent_mat[1:3, 0] = 1 self.soinn._Soinn__increment_edge_ages(0) expected = dok_matrix([[0, 2, 2, 0], [2, 0, 0, 0], [2, 0, 0, 0], [0, 0, 0, 0]]) np.testing.assert_array_equal(self.soinn.adjacent_mat.toarray(), expected.toarray()) self.soinn._Soinn__increment_edge_ages(1) expected = dok_matrix([[0, 3, 2, 0], [3, 0, 0, 0], [2, 0, 0, 0], [0, 0, 0, 0]]) np.testing.assert_array_equal(self.soinn.adjacent_mat.toarray(), expected.toarray())
def get_exp_val(S,psi): ''' Find the expected values for an observable with a given state. S must be a sparse matrix. ''' #exp_value = np.dot(np.dot(np.conj(psi),S.toarray()),np.transpose(psi)) psi = np.matrix(psi) exp_value = dok_matrix(psi.conj()).dot(S.dot(dok_matrix(psi.T))) return exp_value[0,0]
def criaXY(dictionary,total,y,standard): #print "A criar X e Y" #calcula o X e Y para as palavras encontradas na review (dictionary), total para indexar as palavras na matriz X(as palavras presentes em dictionary que nao estejam em total nao sao consideradas) e y o lucro de cada filme #print "A criar estrutura " X=sparse.dok_matrix((len(dictionary),len(total)),dtype=np.int32) #pdb.set_trace() #print "matriz criada" Y=sparse.dok_matrix((len(y),1)) maximo=0 for indice,movie in enumerate(dictionary): #print "Filme:", indice,movie for palavra in dictionary[movie]: ''' print movie print "indice:",indice, "\n" print "palavra",palavra, "\n" print "toltal.index",total.index(palavra),"\n" print dictionary[movie][palavra], "\n" ''' if palavra not in total: pass else: X[indice,total[palavra]]=int(dictionary[movie][palavra]) Y[indice,0]=float(y[movie]) if np.abs(Y[indice,0])>maximo: maximo=np.abs(Y[indice,0]) X[indice,total["_____"]]=1 #print "Processamento concluido", indice,movie #print X #print total if standard==True: from sklearn import preprocessing #Y_test = preprocessing.scale(Y.todense()) mediaY=sum(Y.toarray())/Y.shape[0] Y=Y-mediaY[0]*np.ones_like(Y.todense()) Y=sparse.csr_matrix(Y) #std=np.std(Y.toarray()) #Y=Y/std #Y=sparse.dok_matrix(Y) #stdY=1 #X = as_float_array(X, copy) #X_mean = sparse.csr_matrix(X,dtype="float").mean(axis=0) #X_mean=np.array(X_mean)[0] #X=sparse.dok_matrix(X,dtype="float") #for indexe in xrange(len(X_mean)): #for indexe_linha in xrange(X.shape[0]): #X[indexe_linha,indexe]-=X_mean[indexe] return X.tocsr(),Y.tocsr() #,mediaY[0] #,mediaY,stdY,X_mean #X e Y sao matrizes esparcas else: return X.tocsr(),Y.tocsr() #X e Y sao matrizes esparcas
def initlize(self): #process syn0 with prod2idx f = open(self.prod_vector, "r") line = f.readline() vocab_size, self.vector_size = map(int, line.split()) self.syn0 = np.zeros((vocab_size, self.vector_size), dtype=float) def add_prod(prod, weights): if prod not in self.prod2idx: #process word2idx & idx2word self.prod2idx[prod] = len(self.idx2prod) self.idx2prod.append(prod) #process syn0 self.syn0[self.prod2idx[prod]] = weights for line_no, line in enumerate(f): parts = line.split() if len(parts) != self.vector_size+1: raise ValueError("invalid vector on line %s (is this really the text format?)" % (line_no)) prod, weights = parts[0], list(map(float, parts[1:])) add_prod(prod, weights) #process user2idx with open(self.data_path, "r") as ins: for line in ins: obj = json.loads(line) user_id = obj["user_id"] if user_id not in self.user2idx: self.user2idx[user_id] = len(self.idx2user) self.idx2user.append(user_id) #process rating matrix self.train_score_matrix = dok_matrix((len(self.user2idx), len(self.prod2idx)), dtype=np.float) with open(self.train_path, "r") as ins: for line in ins: obj = json.loads(line) user_id = obj["user_id"] review_id = obj["review_id"] business_id = obj["business_id"] stars = obj["stars"] user_idx = self.user2idx[user_id] prod_idx = self.prod2idx[business_id] self.train_score_matrix[user_idx, prod_idx] = stars self.test_score_matrix = dok_matrix((len(self.user2idx), len(self.prod2idx)), dtype=np.float) with open(self.test_path, "r") as ins: for line in ins: obj = json.loads(line) user_id = obj["user_id"] review_id = obj["review_id"] business_id = obj["business_id"] stars = obj["stars"] user_idx = self.user2idx[user_id] prod_idx = self.prod2idx[business_id] self.test_score_matrix[user_idx, prod_idx] = stars
def build_U_matrices(G, B): S2 = sqrt(2) n = G.shape[0] Ureal = dok_matrix((n-1, n)) Ureac = dok_matrix((n-1, n)) S2 = 2**.5 for i in range(1, n): Ureal[i-1, i] = S2*G[i, :].sum() Ureac[i-1, i] = -S2*B[i, :].sum() return Ureal, Ureac
def SynthePsize(params, trueY, h): result = None if h == 0: result = sparse.dok_matrix((params.totalLength, 1)) result[trueY + 1, 0] = params.syntheticParams.strength else: result = sparse.dok_matrix((numpy.random.randn(params.totalLength, 1))) result[0,0] = 1 #still have bias - might be useful, and at worst will do nothing return result
def __init__(self, n0, n1, n2): self.n0 = n0 self.n1 = n1 self.n2 = n2 self.m0 = n1 * n2 self.m1 = n2 self.x_pos = np.zeros(n0 * n1 * n2) self.y_pos = np.zeros(n0 * n1 * n2) self.k = sparse.dok_matrix((n0 * n1 * n2, n0 * n1 * n2)) self.rest_lengths = sparse.dok_matrix((n0 * n1 * n2, n0 * n1 * n2))
def test_shape_compatibility(self): use_solver(useUmfpack=True) A = csc_matrix([[1., 0], [0, 2]]) bs = [ [1, 6], array([1, 6]), [[1], [6]], array([[1], [6]]), csc_matrix([[1], [6]]), csr_matrix([[1], [6]]), dok_matrix([[1], [6]]), bsr_matrix([[1], [6]]), array([[1., 2., 3.], [6., 8., 10.]]), csc_matrix([[1., 2., 3.], [6., 8., 10.]]), csr_matrix([[1., 2., 3.], [6., 8., 10.]]), dok_matrix([[1., 2., 3.], [6., 8., 10.]]), bsr_matrix([[1., 2., 3.], [6., 8., 10.]]), ] for b in bs: x = np.linalg.solve(A.toarray(), toarray(b)) for spmattype in [csc_matrix, csr_matrix, dok_matrix, lil_matrix]: x1 = spsolve(spmattype(A), b, use_umfpack=True) x2 = spsolve(spmattype(A), b, use_umfpack=False) # check solution if x.ndim == 2 and x.shape[1] == 1: # interprets also these as "vectors" x = x.ravel() assert_array_almost_equal(toarray(x1), x, err_msg=repr((b, spmattype, 1))) assert_array_almost_equal(toarray(x2), x, err_msg=repr((b, spmattype, 2))) # dense vs. sparse output ("vectors" are always dense) if isspmatrix(b) and x.ndim > 1: assert_(isspmatrix(x1), repr((b, spmattype, 1))) assert_(isspmatrix(x2), repr((b, spmattype, 2))) else: assert_(isinstance(x1, np.ndarray), repr((b, spmattype, 1))) assert_(isinstance(x2, np.ndarray), repr((b, spmattype, 2))) # check output shape if x.ndim == 1: # "vector" assert_equal(x1.shape, (A.shape[1],)) assert_equal(x2.shape, (A.shape[1],)) else: # "matrix" assert_equal(x1.shape, x.shape) assert_equal(x2.shape, x.shape) A = csc_matrix((3, 3)) b = csc_matrix((1, 3)) assert_raises(ValueError, spsolve, A, b)
def __init__(self, inputfile, userSize, movieSize): self.__similarityMatrix = None self.__sortedIndexMatrix = None self.__inputfile = inputfile self.__userSize = userSize self.__movieSize = movieSize self.__rating_avg = 0.0 self.__userMovieMatrix = dok_matrix((userSize+1,movieSize+1)) self.__userMovieOrigin = dok_matrix((userSize+1,movieSize+1)) self.__userMovieBinary = dok_matrix((userSize+1,movieSize+1)) self.__resultMatrix = None
def create_lp_matrices(a, min_reviewers_per_paper, max_reviewers_per_paper, min_papers_per_reviewer, max_papers_per_reviewer): """This function creates the matrices suitable for running Camillo J. Taylor algorithm a: affinity matrix """ npapers = a.shape[0] nreviewers = a.shape[1] nedges = len(a.nonzero()[0]) i, j = a.nonzero() v = a[i, j] # reviewers per paper and papers per reviewer ne = sparse.dok_matrix((npapers+nreviewers, nedges), dtype=np.float) ne[i, range(nedges)] = 1 ne[j+npapers, range(nedges)] = 1 d = np.zeros((1, npapers + nreviewers)) d[0, 0:npapers] = max_reviewers_per_paper d[0, npapers:] = max_papers_per_reviewer # at least reviewers_per_paper ne_atleast1_rev_per_paper = sparse.dok_matrix((npapers, nedges), dtype=np.int) ne_atleast1_rev_per_paper[i, range(nedges)] = -1 d_atleast1_rev_per_paper = -np.ones((1, npapers))*min_reviewers_per_paper # at least papers_per_reviewer ne_atleast1_paper_per_rev = sparse.dok_matrix((nreviewers, nedges), dtype=np.int) ne_atleast1_paper_per_rev[j, range(nedges)] = -1 d_atleast1_paper_per_rev = -np.ones((1, nreviewers))*min_papers_per_reviewer # append the other constrants where x >= 0 and x <= 1 # x <= 1 ne0 = sparse.dok_matrix((nedges, nedges), dtype=np.int) ne0[range(nedges), range(nedges)] = 1 d0 = np.ones((nedges, 1)) # -x <= 0 => x >= 0 ne1 = sparse.dok_matrix((nedges, nedges), dtype=np.int) ne1[range(nedges), range(nedges)] = -1 d1 = np.zeros((nedges, 1)) final_ne = sparse.vstack([ne, ne_atleast1_rev_per_paper, ne_atleast1_paper_per_rev, ne0, ne1]) final_d = np.vstack((d.T, d_atleast1_rev_per_paper.T, d_atleast1_paper_per_rev.T, d0, d1)) return v, final_ne, final_d
def get_exp_values(b): '''Find the expectation value of the spin for a given b.''' H = get_tran_ising_H(Sx,Sz,b,N) E,V = eigsh(H,k=4,which='LM') E,V = sort_eigs(E,V) S__k = get_full_matrix(Sx,1,N) # Needed to avoid TypeError when converting to dok_matrix. psi = np.matrix(V[:,0]) exp_value = dok_matrix(psi.conj()).dot(S__k.dot(dok_matrix(psi.T))) return exp_value[0,0]
def bond_life(self): """When do bonds first form and are last seen""" bstart = sparse.dok_matrix(tuple([self.nb_trajs]*2), int) blast = sparse.dok_matrix(tuple([self.nb_trajs]*2), int) for t,name in self.enum(ext='bonds'): bonds = np.sort(self.p2tr(t)[np.loadtxt(name, int)], 1) for a,b in bonds: blast[a,b] = t if not bstart.has_key((a,b)): bstart[a,b] = t return bstart, blast
def setMd(self, xrng, yrng, zrng): '''Tell me the xrange,yrange, and zrange and Ill 1) specify nRx,nRy, and nRz 2) produce a matrix that achieves a 1:1 sampling, self.Md ''' '''set the right dimensions''' self.nRx = xrng[1]-xrng[0] self.nRy = yrng[1]-yrng[0] self.nRz = zrng[1]-zrng[0] nR = self.nRx*self.nRy*self.nRz ''' ok have to use spans: loc = i*J*K + j*K + k for row-major ordering ''' ''' populate the locations in the X grid''' #sX = sparse.dok_matrix((self.nx+1,self.ny,self.nz),dtype='bool') #sX[xrng[0]+1:xrng[1]+1,yrng[0]:yrng[1],zrng[0]:zrng[1]] = True ''' make it an operator ''' ''' nested for should give reshape-able vectors ''' cnt = 0 Mx = sparse.dok_matrix((nR,(self.nx+1)*self.ny*self.nz)) for x in xrange(xrng[0]+1,xrng[1]+1): for y in xrange(yrng[0],yrng[1]): for z in xrange(zrng[0],zrng[1]): pts = x*self.ny*self.nz + y*self.nz + z Mx[cnt,pts] = 1.0 cnt += 1 '''populate the locations in the Y grid''' My = sparse.dok_matrix((nR,self.nx*(self.ny+1)*self.nz)) cnt = 0 for x in xrange(xrng[0],xrng[1]): for y in xrange(yrng[0]+1,yrng[1]+1): for z in xrange(zrng[0],zrng[1]): pts = x*(self.ny+1)*self.nz + y*self.nz + z My[cnt,pts] = 1.0 cnt += 1 '''populate the locations in the Z grid''' Mz = sparse.dok_matrix((nR,self.nx*self.ny*(self.nz+1))) cnt = 0 for x in xrange(xrng[0],xrng[1]): for y in xrange(yrng[0],yrng[1]): for z in xrange(zrng[0]+1,zrng[1]+1): pts = x*(self.ny)*(self.nz+1) + y*(self.nz+1) + z Mz[cnt,pts] = 1.0 cnt += 1 ''' put them all together in a block matrix ''' self.Md = spt.vCat([Mx.T,My.T,Mz.T]).T print 'Md shape ' + repr(self.Md.shape) self.x2u = sparse.block_diag((Mx,My,Mz), 'csc').T print 'x2u shape ' + repr(self.x2u.shape)
def depths(mask, normals): """Reconstructs the depths from normals. Args: normals: width x height x 3 array """ width, height, three = normals.shape assert three == 3 m = dok_matrix((width*height*2, width*height), dtype=float) b = np.zeros(width*height*2, dtype=float) log.debug('maximal shape: %s', m.shape) row = 0 coords = ConsistentBimap() for x in range(width): for y in range(height): if not mask[x,y]: continue elif not (mask[x+1,y] and mask[x,y+1] and mask[x-1,y] and mask[x,y-1]): continue else: # n_z (z(x+1, y) - z(x, y)) = -n_x m[row, coords[(x+1,y)]] = 1 m[row, coords[(x,y)]] = -1 b[row] = normals[x,y,X]/normals[x,y,Z] row += 1 # n_z (z(x, y+1) - z(x, y)) = -n_y m[row, coords[(x,y+1)]] = 1 m[row, coords[(x,y)]] = -1 b[row] = normals[x,y,Y]/normals[x,y,Z] row += 1 # Now we know how many pixels are used and we restrict the matrix to the # rows needed. m_p = dok_matrix((row+1, coords.i), dtype=float) for (x,y), v in m.items(): try: m_p[x,y] = v except Exception as e: log.error('error at (%s, %s)', x, y) raise # normalization m_p[row,0] = 1 m_p = m_p.tocsr() b = b[:row+1] log.debug('actual shape: %s', m_p.shape) s = lsqr(m_p, b, atol=1e-3, btol=1e-6, show=True) z_p = s[0] z_p = normalize(z_p) z = np.zeros((width, height)) for row,(x,y) in coords.r.items(): z[x,y] = z_p[row] log.debug('z(0,0) = %s', z[0,0]) return z
def build_R_matrices(G, B, branch_map): """ rows are buses 2 to n; cols are branches """ n = G.shape[0] Rreal = dok_matrix((n-1, n-1)) Rreac = dok_matrix((n-1, n-1)) for fbus in range(1, n): for tbus in B[fbus, :].nonzero()[1]: branch = branch_map[(fbus, tbus)] Rreal[fbus-1, branch] = -G[fbus, tbus] Rreac[fbus-1, branch] = B[fbus, tbus] return Rreal, Rreac
def find_translation(cls, resolver, src_type, dst_type, *, exact=False) -> Optional["MultiStepTranslator"]: if isinstance(dst_type, type) and not issubclass(dst_type, ConcreteType): dst_type = resolver.class_to_concrete.get(dst_type, dst_type) if not isinstance(dst_type, type): dst_type = dst_type.__class__ if exact: trns = resolver.translators.get((src_type, dst_type), None) if trns is None: return mst = MultiStepTranslator(src_type) mst.add_after(trns, dst_type) return mst abstract = dst_type.abstract if abstract not in resolver.translation_matrices: # Build translation matrix concrete_list = [] concrete_lookup = {} included_abstract_types = set() for ct in resolver.concrete_types: if (abstract is ct.abstract or abstract in ct.abstract.unambiguous_subcomponents): concrete_lookup[ct] = len(concrete_list) concrete_list.append(ct) included_abstract_types.add(ct.abstract) m = ss.dok_matrix((len(concrete_list), len(concrete_list)), dtype=bool) for s, d in resolver.translators: # only accept destinations of included abstract types if d.abstract in included_abstract_types: sidx = concrete_lookup[s] didx = concrete_lookup[d] m[sidx, didx] = True sssp, predecessors = ss.csgraph.dijkstra(m.tocsr(), return_predecessors=True, unweighted=True) resolver.translation_matrices[abstract] = ( concrete_list, concrete_lookup, sssp, predecessors, ) # Lookup shortest path from stored results packed_data = resolver.translation_matrices[abstract] concrete_list, concrete_lookup, sssp, predecessors = packed_data try: sidx = concrete_lookup[src_type] didx = concrete_lookup[dst_type] except KeyError: return None if sssp[sidx, didx] == np.inf: return None # Path exists; use predecessor matrix to build up required transformations mst = MultiStepTranslator(src_type) while sidx != didx: parent_idx = predecessors[sidx, didx] next_translator = resolver.translators[(concrete_list[parent_idx], concrete_list[didx])] next_dst_type = concrete_list[didx] mst.add_before(next_translator, next_dst_type) didx = parent_idx return mst
def RP_AddExon(peaks_info, genes_info_full, genes_info_tss, decay): """Multiple processing function to calculate regulation potential.""" Sg = lambda x: 2**(-x) checkInclude = lambda x, y: all([x >= y[0], x <= y[1]]) gene_distance = 15 * decay genes_peaks_score_array = sp_sparse.dok_matrix( (len(genes_info_full), len(peaks_info)), dtype=np.float64) w = genes_info_tss + peaks_info A = {} w.sort() for elem in w: if elem[-3] == 1: A[elem[-1]] = elem else: dlist = [] for gene_name in list(A.keys()): g = A[gene_name] tmp_distance = elem[1] - g[1] if all([ g[0] == elem[0], any( list( map(checkInclude, [elem[1]] * len(g[5]), list(g[5])))) ]): genes_peaks_score_array[gene_name, elem[-1]] = 1.0 / g[-4] elif all([g[0] == elem[0], tmp_distance <= gene_distance]): genes_peaks_score_array[gene_name, elem[-1]] = Sg( tmp_distance / decay) else: dlist.append(gene_name) for gene_name in dlist: del A[gene_name] w.reverse() for elem in w: if elem[-3] == 1: A[elem[-1]] = elem else: dlist = [] for gene_name in list(A.keys()): g = A[gene_name] tmp_distance = g[1] - elem[1] if all([ g[0] == elem[0], any( list( map(checkInclude, [elem[1]] * len(g[5]), list(g[5])))) ]): genes_peaks_score_array[gene_name, elem[-1]] = 1.0 / g[-4] if all([g[0] == elem[0], tmp_distance <= gene_distance]): genes_peaks_score_array[gene_name, elem[-1]] = Sg( tmp_distance / decay) else: dlist.append(gene_name) for gene_name in dlist: del A[gene_name] return (genes_peaks_score_array)
def topoStatistics(W, nWrook, regular=False): frontiers = list(set(W.keys()) - set(nWrook.keys())) nw = [] areas_nngs = {} wSparse = dok_matrix((len(W), len(W))) n1 = 0 for w in W: n1 += len(W[w]) for j in W[w]: wSparse[w, j] = 1 try: eig = max(linalg.eigsh(wSparse, 2)[0]) except: eig = -9999 for w in nWrook: nw += [len(W[w])] if nw[-1] != 0: if areas_nngs.has_key(nw[-1]): areas_nngs[nw[-1]].append(w) else: areas_nngs[nw[-1]] = [w] # Calulating second moment of P(n) mu2 = 0 mu1 = numpy.mean(nw) mu2 = numpy.var(nw) m = {} p = {} for n in areas_nngs: # mean average neighbors of areas wich are neighbor of an area with n neighbors mean = 0 # number of areas wich are neighbor of areas areas with n neighbors nareas_n = 0 for a in areas_nngs[n]: if a not in frontiers: neighs = W[a] for a1 in neighs: mean += len(W[a1]) nareas_n += 1 mean = mean / float(nareas_n) m[n] = mean p[n] = len(areas_nngs[n]) / float(len(nWrook)) X1 = [] X2 = [] Y = [] Y2 = [] for n in m: for k in areas_nngs[n]: X1.append(1) X2.append(n) Y.append(n * m[n]) Y2.append((n**2) * m[n]) X = numpy.matrix(zip(X1, X2)) Y = numpy.matrix(Y) sparseness = n1 / float(len(W)**2 - len(W)) if regular: a1 = 0 a2 = 0 a3 = 0 mu2 = 0 else: B = (X.transpose() * X)**(-1) * X.transpose() * Y.transpose() a1 = (mu1 * (mu2 + numpy.mean(Y)) - numpy.mean(Y2)) / float(mu2) a2 = (B[0] - mu2) / mu1 #ESTE NO DA a3 = -1 * (B[1] - mu1) return max(nw), min(nw), numpy.mean(nw), mu2, a1, sparseness, eig
train_bow_features = bag_of_words_vectorizer.transform(train_corpus) test_tokens = testset['tokens'].tolist() test_corpus = createCorpus(test_tokens) test_bow_features = bag_of_words_vectorizer.transform(test_corpus) # Obtain LDA features def createCorpusLDA(tokens): dictionary = corpora.Dictionary(tokens) corpus = [dictionary.doc2bow(token_list) for token_list in tokens] return (dictionary,corpus) train_lda_dictionary, train_lda_corpus = createCorpusLDA(train_tokens) train_lda_features = dok_matrix((len(train_lda_corpus),100)) for i in tqdm(range(len(train_lda_corpus))): topic_distribution = lda_model[train_lda_corpus[i]] for (topic_nr,prob) in topic_distribution: train_lda_features[i, topic_nr] = prob train_lda_features_csr = train_lda_features.tocsr() test_lda_dictionary, test_lda_corpus = createCorpusLDA(test_tokens) test_lda_features = dok_matrix((len(test_lda_corpus),100)) for i in tqdm(range(len(test_lda_corpus))): topic_distribution = lda_model[test_lda_corpus[i]] for (topic_nr,prob) in topic_distribution:
def load_pre_splitter_data(self): pos_per_user = {} num_items, num_users = 0, 0 userids, itemids, idusers, iditems = {}, {}, {}, {} # Get number of users and items with open(self.path + ".train.rating", 'r') as f: for line in f.readlines(): useridx, itemidx, rating, time = line.strip().split( self.separator) if float(rating) >= self.threshold: if itemidx not in itemids: iditems[num_items] = itemidx itemids[itemidx] = num_items num_items += 1 if useridx not in userids: idusers[num_users] = useridx userids[useridx] = num_users num_users += 1 pos_per_user[userids[useridx]] = [] pos_per_user[userids[useridx]].append( [itemids[itemidx], 1, int(time)]) else: if itemidx not in itemids: iditems[num_items] = itemidx itemids[itemidx] = num_items num_items += 1 if useridx not in userids: idusers[num_users] = useridx userids[useridx] = num_users num_users += 1 pos_per_user[userids[useridx]] = [] pos_per_user[userids[useridx]].append( (itemids[itemidx], rating, int(time))) train_dict = {} for u in range(num_users): pos_per_user[u] = sorted(pos_per_user[u], key=lambda d: d[2]) items = [] for enlement in pos_per_user[u]: items.append(enlement[0]) train_dict[u] = items with open(self.path + ".test.rating", 'r') as f: for line in f.readlines(): useridx, itemidx, rating, time = line.strip().split( self.separator) if float(rating) >= self.threshold: if itemidx not in itemids: iditems[num_items] = itemidx itemids[itemidx] = num_items num_items += 1 if useridx not in userids: idusers[num_users] = useridx userids[useridx] = num_users num_users += 1 pos_per_user[userids[useridx]] = [] pos_per_user[userids[useridx]].append( [itemids[itemidx], 1, int(time)]) else: if itemidx not in itemids: iditems[num_items] = itemidx itemids[itemidx] = num_items num_items += 1 if useridx not in userids: idusers[num_users] = useridx userids[useridx] = num_users num_users += 1 pos_per_user[userids[useridx]] = [] pos_per_user[userids[useridx]].append( [itemids[itemidx], rating, int(time)]) for u in range(num_users): pos_per_user[u] = sorted(pos_per_user[u], key=lambda d: d[2]) train_matrix = sp.dok_matrix((num_users, num_items), dtype=np.float32) time_matrix = sp.dok_matrix((num_users, num_items), dtype=np.float32) with open(self.path + ".train.rating", "r") as f: line = f.readline() while line and line != "": arr = line.split("\t") user, item, rating, time = userids[arr[0]], itemids[ arr[1]], float(arr[2]), float(arr[3]) if float(rating) >= self.threshold: train_matrix[user, item] = 1 else: train_matrix[user, item] = rating time_matrix[user, item] = time line = f.readline() print("already load the trainMatrix...") test_matrix = sp.dok_matrix((num_users, num_items), dtype=np.float32) with open(self.path + ".test.rating", "r") as f: line = f.readline() while line and line != "": arr = line.split("\t") user, item, rating, time = userids[arr[0]], itemids[ arr[1]], float(arr[2]), float(arr[3]) if float(rating) >= self.threshold: test_matrix[user, item] = 1 else: test_matrix[user, item] = rating time_matrix[user, item] = time line = f.readline() print("already load the trainMatrix...") return train_matrix, train_dict, test_matrix, pos_per_user, userids, itemids, time_matrix
counts[wordId,userId]=count*log(float(U)/(1+idfs[wordId])) file.close() # In[28]: def proc(k): (u, s, v) = svds(counts, k=k) return (u, s, v) # In[29]: vocab=[None]*V reverseVocab={} counts=sparse.dok_matrix((V,U)) # In[30]: readWords("unigrams.txt") # In[31]: readIdfs("idf.txt") # In[32]: print "reading data"
def __init__(self, treedata: tuple): self.result = [] self.count = GetMax(treedata) self.dm = dok_matrix((self.count, self.count), dtype=bool) for e in treedata: self.dm[e[0], e[1]] = True
def __init__(self, path, batch_size): self.path = path self.batch_size = batch_size train_file = path + '/train.txt' test_file = path + '/test.txt' self.n_users, self.n_items = 0, 0 self.n_train, self.n_test = 0, 0 self.neg_pools = {} self.exist_users = [] with open(train_file) as f: for l in f.readlines(): if len(l) > 0: l = l.strip('\n').split(' ') items = [int(i) for i in l[1:]] uid = int(l[0]) self.exist_users.append(uid) self.n_items = max(self.n_items, max(items)) self.n_users = max(self.n_users, uid) self.n_train += len(items) with open(test_file) as f: for l in f.readlines(): if len(l) > 0: l = l.strip('\n') try: items = [int(i) for i in l.split(' ')[1:]] except Exception: continue self.n_items = max(self.n_items, max(items)) self.n_test += len(items) self.n_items += 1 self.n_users += 1 self.print_statistics() self.R = sp.dok_matrix((self.n_users, self.n_items), dtype=np.float32) self.train_items, self.test_set = {}, {} with open(train_file) as f_train: with open(test_file) as f_test: for l in f_train.readlines(): if len(l) == 0: break l = l.strip('\n') items = [int(i) for i in l.split(' ')] uid, train_items = items[0], items[1:] for i in train_items: self.R[uid, i] = 1. self.train_items[uid] = train_items for l in f_test.readlines(): if len(l) == 0: break l = l.strip('\n') try: items = [int(i) for i in l.split(' ')] except Exception: continue uid, test_items = items[0], items[1:] self.test_set[uid] = test_items
def load_data_by_user_time(self): logger.info("Loading interaction records from %s " % (self.path)) pos_per_user = {} num_ratings = 0 num_items = 0 num_users = 0 #user/item {raw id, inner id} map userids = {} itemids = {} # inverse views of userIds, itemIds, idusers = {} iditems = {} with open(self.path, 'r') as f: for line in f.readlines(): if self.data_format == "UIRT": useridx, itemidx, rating, time = line.strip().split( self.separator) if float(rating) < self.threshold: continue elif self.data_format == "UIT": useridx, itemidx, time = line.strip().split(self.separator) rating = 1 elif self.data_format == "UIR": useridx, itemidx, rating = line.strip().split( self.separator) if float(rating) < self.threshold: continue elif self.data_format == "UI": useridx, itemidx = line.strip().split(self.separator) rating = 1 else: print("please choose a correct data format. ") num_ratings += 1 if itemidx not in itemids: iditems[num_items] = itemidx itemids[itemidx] = num_items num_items += 1 if useridx not in userids: idusers[num_users] = useridx userids[useridx] = num_users num_users += 1 pos_per_user[userids[useridx]] = [] if self.data_format == "UIRT" or self.data_format == "UIT": pos_per_user[userids[useridx]].append( (itemids[itemidx], rating, int(float(time)))) else: pos_per_user[userids[useridx]].append( (itemids[itemidx], rating, 1)) if self.data_format == "UIRT" or self.data_format == "UIT": for u in range(num_users): pos_per_user[u] = sorted(pos_per_user[u], key=lambda d: d[2]) logger.info("\"num_users\": %d,\"num_items\":%d, \"num_ratings\":%d" % (num_users, num_items, num_ratings)) userseq = deepcopy(pos_per_user) train_dict = {} train_matrix = sp.dok_matrix((num_users, num_items), dtype=np.float32) test_matrix = sp.dok_matrix((num_users, num_items), dtype=np.float32) time_matrix = sp.dok_matrix((num_users, num_items), dtype=np.float32) for u in range(num_users): num_ratings_by_user = len(pos_per_user[u]) num_test_ratings = math.floor( float(self.splitterRatio[1]) * num_ratings_by_user) if len(pos_per_user[u]) >= 2 and num_test_ratings >= 1: for _ in range(num_test_ratings): test_item = pos_per_user[u][-1] pos_per_user[u].pop() test_matrix[u, test_item[0]] = test_item[1] time_matrix[u, test_item[0]] = test_item[2] items = [] for enlement in pos_per_user[u]: items.append(enlement[0]) train_matrix[u, enlement[0]] = enlement[1] time_matrix[u, enlement[0]] = enlement[2] train_dict[u] = items return train_matrix, train_dict, test_matrix, userseq, userids, itemids, time_matrix
def solve_fourier(self, kappa, **argv): if np.isscalar(kappa): kappa = np.diag(np.diag(kappa * np.eye(self.dim))) if kappa.ndim == 2: kappa = np.repeat(np.array([np.diag(np.diag(kappa))]), self.n_elems, axis=0) F = sp.dok_matrix((self.n_elems, self.n_elems)) B = np.zeros(self.n_elems) for ll in self.mesh['active_sides']: area = self.mesh['areas'][ll] (i, j) = self.mesh['side_elem_map_vec'][ll] vi = self.mesh['volumes'][i] vj = self.mesh['volumes'][j] kappa_loc = self.get_kappa(i, j, ll, kappa) #kappa_loc = np.eye(2)*kappa_loc[0,0] if not i == j: (v_orth, dummy) = self.get_decomposed_directions(ll, rot=kappa_loc) F[i, i] += v_orth / vi * area F[i, j] -= v_orth / vi * area F[j, j] += v_orth / vj * area F[j, i] -= v_orth / vj * area if ll in self.mesh['periodic_sides']: kk = list(self.mesh['periodic_sides']).index(ll) B[i] += self.mesh['periodic_side_values'][ kk] * v_orth / vi * area B[j] -= self.mesh['periodic_side_values'][ kk] * v_orth / vj * area ##rescaleand fix one point to 0 #scale = 1/F.max(axis=0).toarray()[0] #n = np.random.randint(self.n_elems) #scale[n] = 0 #F.data = F.data * scale[F.indices] #F[n,n] = 1 #B[n] = 0 SU = splu(F.tocsc()) #----------------------- C = np.zeros(self.n_elems) n_iter = 0 kappa_old = 0 error = 1 grad = np.zeros((self.n_elems, self.dim)) while error > self.max_fourier_error and \ n_iter < self.max_fourier_iter : RHS = B + C #for n in range(self.n_elems): # RHS[n] = RHS[n]*scale[n] temp = SU.solve(RHS) temp = temp - (max(temp) + min(temp)) / 2.0 kappa_eff = self.compute_diffusive_thermal_conductivity( temp, grad, kappa) error = abs((kappa_eff - kappa_old) / kappa_eff) kappa_old = kappa_eff n_iter += 1 C, grad = self.compute_secondary_flux(temp, kappa) flux = -np.einsum('cij,cj->ci', kappa, grad) meta = [kappa_eff, error, n_iter] return { 'flux_fourier': flux, 'temperature_fourier': temp, 'meta': np.array(meta), 'grad': grad }
def solve(self, objective, constraints, cached_data, warm_start, verbose, solver_opts): """Returns the result of the call to the solver. Parameters ---------- objective : LinOp The canonicalized objective. constraints : list The list of canonicalized cosntraints. cached_data : dict A map of solver name to cached problem data. warm_start : bool Not used. verbose : bool Should the solver print output? solver_opts : dict Additional arguments for the solver. Returns ------- tuple (status, optimal value, primal, equality dual, inequality dual) """ import gurobipy # Get problem data data = self.get_problem_data(objective, constraints, cached_data) c = data[s.C] b = data[s.B] A = dok_matrix(data[s.A]) # Save the dok_matrix. data[s.A] = A n = c.shape[0] solver_cache = cached_data[self.name()] # TODO warmstart with SOC constraints. if warm_start and solver_cache.prev_result is not None \ and len(data[s.DIMS][s.SOC_DIM]) == 0: model = solver_cache.prev_result["model"] variables = solver_cache.prev_result["variables"] gur_constrs = solver_cache.prev_result["gur_constrs"] c_prev = solver_cache.prev_result["c"] A_prev = solver_cache.prev_result["A"] b_prev = solver_cache.prev_result["b"] # If there is a parameter in the objective, it may have changed. if len(lu.get_expr_params(objective)) > 0: c_diff = c - c_prev I_unique = list(set(np.where(c_diff)[0])) for i in I_unique: variables[i].Obj = c[i] else: # Stay consistent with Gurobi's representation of the problem c = c_prev # Get equality and inequality constraints. sym_data = self.get_sym_data(objective, constraints, cached_data) all_constrs, _, _ = self.split_constr(sym_data.constr_map) # If there is a parameter in the constraints, # A or b may have changed. if self._param_in_constr(all_constrs): A_diff = dok_matrix(A - A_prev) b_diff = b - b_prev # Figure out which rows of A and elements of b have changed try: I, _ = zip(*[x for x in A_diff.iterkeys()]) except ValueError: I = [] I_unique = list(set(I) | set(np.where(b_diff)[0])) nonzero_locs = gurobipy.tuplelist([x for x in A.iterkeys()]) # Update locations which have changed for i in I_unique: # Remove old constraint if it exists if gur_constrs[i] is not None: model.remove(gur_constrs[i]) gur_constrs[i] = None # Add new constraint if len(nonzero_locs.select(i, "*")) > 0: expr_list = [] for loc in nonzero_locs.select(i, "*"): expr_list.append((A[loc], variables[loc[1]])) expr = gurobipy.LinExpr(expr_list) if i < data[s.DIMS][s.EQ_DIM]: ctype = gurobipy.GRB.EQUAL elif data[s.DIMS][s.EQ_DIM] <= i \ < data[s.DIMS][s.EQ_DIM] + data[s.DIMS][s.LEQ_DIM]: ctype = gurobipy.GRB.LESS_EQUAL gur_constrs[i] = model.addConstr(expr, ctype, b[i]) model.update() else: # Stay consistent with Gurobi's representation of the problem A = A_prev b = b_prev else: model = gurobipy.Model() variables = [] for i in range(n): # Set variable type. if i in data[s.BOOL_IDX]: vtype = gurobipy.GRB.BINARY elif i in data[s.INT_IDX]: vtype = gurobipy.GRB.INTEGER else: vtype = gurobipy.GRB.CONTINUOUS variables.append( model.addVar( obj=c[i], name="x_%d" % i, vtype=vtype, # Gurobi's default LB is 0 (WHY???) lb=-gurobipy.GRB.INFINITY, ub=gurobipy.GRB.INFINITY)) model.update() nonzero_locs = gurobipy.tuplelist([x for x in A.iterkeys()]) eq_constrs = self.add_model_lin_constr( model, variables, range(data[s.DIMS][s.EQ_DIM]), gurobipy.GRB.EQUAL, nonzero_locs, A, b) leq_start = data[s.DIMS][s.EQ_DIM] leq_end = data[s.DIMS][s.EQ_DIM] + data[s.DIMS][s.LEQ_DIM] ineq_constrs = self.add_model_lin_constr(model, variables, range(leq_start, leq_end), gurobipy.GRB.LESS_EQUAL, nonzero_locs, A, b) soc_start = leq_end soc_constrs = [] new_leq_constrs = [] for constr_len in data[s.DIMS][s.SOC_DIM]: soc_end = soc_start + constr_len soc_constr, new_leq, new_vars = self.add_model_soc_constr( model, variables, range(soc_start, soc_end), nonzero_locs, A, b) soc_constrs.append(soc_constr) new_leq_constrs += new_leq variables += new_vars soc_start += constr_len gur_constrs = eq_constrs + ineq_constrs + \ soc_constrs + new_leq_constrs model.update() # Set verbosity and other parameters model.setParam("OutputFlag", verbose) # TODO user option to not compute duals. model.setParam("QCPDual", True) for key, value in solver_opts.items(): model.setParam(key, value) results_dict = {} try: model.optimize() results_dict["primal objective"] = model.ObjVal results_dict["x"] = np.array([v.X for v in variables]) # Only add duals if not a MIP. # Not sure why we need to negate the following, # but need to in order to be consistent with other solvers. if not self.is_mip(data): vals = [] for lc in gur_constrs: if lc is not None: if isinstance(lc, gurobipy.QConstr): vals.append(lc.QCPi) else: vals.append(lc.Pi) else: vals.append(0) results_dict["y"] = -np.array(vals) except: pass results_dict["model"] = model results_dict["variables"] = variables results_dict["gur_constrs"] = gur_constrs results_dict["status"] = self.STATUS_MAP.get(model.Status, s.SOLVER_ERROR) return self.format_results(results_dict, data, cached_data)
import numpy as np from scipy.sparse import dok_matrix S = dok_matrix((10000, 10000), dtype=np.float32) S[0:10, 0:10] = 0.5 print(type(S))
earth_height = earth_map.height my_width = earth_width my_height = earth_height for x in range(-1, earth_width+1): for y in range(-1, earth_height+1): coords = (x, y) if x==-1 or y==-1 or x == earth_map.width or y== earth_map.height: passable_locations_earth[coords]= False elif earth_map.is_passable_terrain_at(bc.MapLocation(earth, x, y)): passable_locations_earth[coords] = True else: passable_locations_earth[coords]= False number_of_cells = earth_width * earth_height S = dok_matrix((number_of_cells, number_of_cells), dtype=int) for x in range(earth_width): for y in range(earth_height): curr = (x, y) if passable_locations_earth[curr]: val = y*earth_width + x for coord in explore.coord_neighbors(curr): if passable_locations_earth[coord]: val2 = coord[1]*earth_width + coord[0] S[val, val2] = 1 S[val2, val] = 1 bfs_array = csgraph.shortest_path(S, method = 'D', unweighted = True) #bfs_dict = {} # stores the distances found by BFS so far #precomputed_bfs = explore.precompute_earth(passable_locations_earth, coord_to_direction, wavepoints) #start_time = time.time()
def construct_matrix(self): train_path = self.path + '/train.txt' test_path = self.path + '/test.txt' user_list_file = pd.read_csv(self.path + '/user_list.txt', sep=' ') item_list_file = pd.read_csv(self.path + '/item_list.txt', sep=' ') self.n_users, self.n_items = 0, 0 self.n_train, self.n_test = 0, 0 self.neg_pools = {} self.exist_users = [] with open(train_path) as f: for l in f.readlines(): if len(l) > 0: l = l.strip('\n').split(' ') items = [int(i) for i in l[1:]] uid = int(l[0]) self.exist_users.append(uid) self.n_items = max(self.n_items, max(items)) self.n_users = max(self.n_users, uid) self.n_train += len(items) with open(test_path) as f: for l in f.readlines(): if len(l) > 0: l = l.strip('\n') try: items = [int(i) for i in l.split(' ')[1:]] except Exception: continue self.n_items = max(self.n_items, max(items)) self.n_test += len(items) self.n_items += 1 self.n_users += 1 start_ts = time() users_set = set() items_set = set() self.R = sp.dok_matrix((self.n_users, self.n_items), dtype=np.float32) # max_train_user_item = 0 # with open(train_path, 'r') as f_train: # for l in f_train.readlines(): # if len(l) == 0: # break # items = [int(i) for i in l.strip().split(' ')] # user, train_items = items[0], items[1:] # max_train_user_item = max( # max_train_user_item, len(train_items)) # users_set.add(user) # for item in train_items: # u2i_matrix[user, item] = 1. # items_set.add(item) # self.max_train_user_item = max_train_user_item # self.u2i_matrix = u2i_matrix.tocsr() # self.i2u_matrix = u2i_matrix.transpose().tocsr() # u2u_matrix = self.u2i_matrix * self.i2u_matrix # i2i_matrix = self.i2u_matrix * self.u2i_matrix # self.users_arr = np.sort(np.asarray(list(users_set))) # self.items_arr = np.sort(np.asarray(list(items_set))) # self.u2i_adj = self.construct_adj(self.u2i_matrix, self.n_users) # self.i2u_adj = self.construct_adj(self.i2u_matrix, self.n_items) # self.u2u_adj = self.construct_adj(u2u_matrix, self.n_users) # self.i2i_adj = self.construct_adj(i2i_matrix, self.n_items) # # self.test_u2i_dict = dict() # test_u2i_matrix = sp.dok_matrix( # (self.n_users, self.n_items), dtype=np.float32) # max_test_user_item = 0 # with open(test_path, 'r') as f_test: # for l in f_test.readlines(): # if len(l) == 0: # break # items = [int(i) for i in l.strip().split(' ')] # user, test_items = items[0], items[1:] # # self.test_u2i_dict[user] = test_items # for item in test_items: # test_u2i_matrix[user, item] = 1. # max_test_user_item = max(max_test_user_item, len(test_items)) # self.max_test_user_item = max_test_user_item # self.test_u2i_adj = self.construct_adj( # test_u2i_matrix.tocsr(), self.n_users) self.train_items, self.test_set = {}, {} with open(train_path) as f_train: with open(test_path) as f_test: for l in f_train.readlines(): if len(l) == 0: break l = l.strip('\n') items = [int(i) for i in l.split(' ')] uid, train_items = items[0], items[1:] for i in train_items: self.R[uid, i] = 1. # self.R[uid][i] = 1 items_set.add(i) self.train_items[uid] = train_items for l in f_test.readlines(): if len(l) == 0: break l = l.strip('\n') try: items = [int(i) for i in l.split(' ')] except Exception: continue uid, test_items = items[0], items[1:] self.test_set[uid] = test_items self.items_arr = np.sort(np.asarray(list(items_set))) self.u2i_matrix = self.R.tocsr() self.i2u_matrix = self.R.transpose().tocsr() u2u_matrix = self.u2i_matrix * self.i2u_matrix i2i_matrix = self.i2u_matrix * self.u2i_matrix self.u2i_adj = self.construct_adj(self.u2i_matrix, self.n_users) self.i2u_adj = self.construct_adj(self.i2u_matrix, self.n_items) self.u2u_adj = self.construct_adj(u2u_matrix, self.n_users) self.i2i_adj = self.construct_adj(i2i_matrix, self.n_items) # print('Start construct negative') # t1 = time() # self.negative_u2i_pool = [self.get_negative_pool(self.i2i_adj, user_items, self.items_arr) for user_items in self.u2i_adj] # t2 = time() # print('Construct negative_u2i_pool. Time: {:5.3f}'.format(t2 - t1)) # self.negative_u2u_pool = [self.get_negative_pool(self.u2u_adj, user_users, self.users_arr) for user_users in self.u2u_adj] # t3 = time() # print('Construct negative_u2u_pool. Time: {:5.3f}'.format(t3 - t2)) # self.negative_i2i_pool = [self.get_negative_pool(self.i2i_adj, item_items, self.items_arr) for item_items in self.i2i_adj] end_ts = time() print('Construct adjust matrix. Time: {:5.3f}'.format(end_ts - start_ts))
def convertCompatible(h,J): h=dict(zip(range(len(h)),h)) J=sparse.dok_matrix(J) J=dict(zip(J.keys(),J.values())) return h,J
def main(param2val): # param2val appears auto-magically via Ludwig cwc_param_name = param2val['cwc_param_name'] window_size = param2val['window_size'] window_weight = param2val['window_weight'] window_type = param2val['window_type'] vocab_name = param2val['vocab_name'] article_coverage = param2val['article_coverage'] # added by Ludwig project_path = Path(param2val['project_path']) save_path = Path( param2val['save_path']) # all data that is saved must be saved here for k, v in param2val.items(): print(k, v) # step 0 print('Making vocab...') vocab_path = project_path / 'data' / '{}.txt'.format(vocab_name) if not vocab_path.exists(): raise FileNotFoundError('{} not found on server'.format(vocab_path)) vocab = SortedSet(vocab_path.read_text().split('\n')) vocab.discard('') # not sure why empty string is in vocab - but it is assert len(vocab) > 0 print('Loaded {} words from vocab'.format(len(vocab))) # step 1 print('Tokenizing...', flush=True) param_path = project_path.parent / 'CreateWikiCorpus' / 'runs' / cwc_param_name bodies_path = get_text_file_path(param_path, 'bodies') titles_path = get_text_file_path(param_path, 'titles') num_docs = len(titles_path.read_text().split( '\n')) - 1 # "wc -l" says there is 1 less line print(f'Number of articles in text file={num_docs}') tokenized_docs = gen_tokenized_articles(bodies_path, num_docs) # this also lower-cases # step 2 print('Making co-occurrence matrix', flush=True) w2id = {w: n for n, w in enumerate(vocab) } # python 3 integers have dynamic size id2w = {n: w for n, w in enumerate(vocab)} max_num_docs = int(num_docs * article_coverage) cooc_matrix = make_sparse_ww_matrix( tokenized_docs, w2id, max_num_docs=max_num_docs, window_size=window_size, window_type=window_type, window_weight=window_weight, ) verbose = True if cooc_matrix.size < 1000 else False ids2cf = sparse.dok_matrix(cooc_matrix).todok() ww2cf = {} print('Converting sparse matrix to dictionary...', flush=True) for ids, cf in ids2cf.items(): i1, i2 = ids word1 = id2w[i1] word2 = id2w[i2] ww = (word1, word2) ww2cf[ww] = cf # check if verbose: print(w2id) print(cooc_matrix.toarray()) print(cooc_matrix.shape) print(ww2cf) # step 3 - save the dictionary containing co-occurrence frequencies to Ludwig-supplied save_path print('Saving dictionary to disk...') ww2cf_path = save_path / 'ww2cf.pkl' if not ww2cf_path.parent.exists(): ww2cf_path.parent.mkdir(parents=True) pickle.dump(ww2cf, ww2cf_path.open('wb')) print( "Emily is done making a wiki co-occurrence dictionary! Wait for the folders to finish moving!" ) return []
i2t = {} for x in f.xreadlines(): t_ids = map(lambda x: int(x), x.split(" ")) for t_id in t_ids: if t_id in t2i: i = t2i[t_id] else: i = len(t2i) + 1 t2i[t_id] = i i2t[i] = t_id from scipy.sparse import dok_matrix import numpy as np n = len(t2i) A = dok_matrix((n, n), dtype=np.int8) print n f = open("social_graph.txt") for x in f.xreadlines(): t_ids = map(lambda x: int(x), x.split(" ")) source = t_ids[0] for t_id in t_ids[1:]: dest = t2i[t_id] print dest, source A[dest, source] = 1 import scipy.io as sio savemat('A.mat', dict(A=A))
def test_pairwise_distances_argmin_min(): # Check pairwise minimum distances computation for any metric X = [[0], [1]] Y = [[-2], [3]] Xsp = dok_matrix(X) Ysp = csr_matrix(Y, dtype=np.float32) expected_idx = [0, 1] expected_vals = [2, 2] expected_vals_sq = [4, 4] # euclidean metric idx, vals = pairwise_distances_argmin_min(X, Y, metric="euclidean") idx2 = pairwise_distances_argmin(X, Y, metric="euclidean") assert_array_almost_equal(idx, expected_idx) assert_array_almost_equal(idx2, expected_idx) assert_array_almost_equal(vals, expected_vals) # sparse matrix case idxsp, valssp = pairwise_distances_argmin_min(Xsp, Ysp, metric="euclidean") assert_array_almost_equal(idxsp, expected_idx) assert_array_almost_equal(valssp, expected_vals) # We don't want np.matrix here assert_equal(type(idxsp), np.ndarray) assert_equal(type(valssp), np.ndarray) # euclidean metric squared idx, vals = pairwise_distances_argmin_min(X, Y, metric="euclidean", metric_kwargs={"squared": True}) assert_array_almost_equal(idx, expected_idx) assert_array_almost_equal(vals, expected_vals_sq) # Non-euclidean scikit-learn metric idx, vals = pairwise_distances_argmin_min(X, Y, metric="manhattan") idx2 = pairwise_distances_argmin(X, Y, metric="manhattan") assert_array_almost_equal(idx, expected_idx) assert_array_almost_equal(idx2, expected_idx) assert_array_almost_equal(vals, expected_vals) # sparse matrix case idxsp, valssp = pairwise_distances_argmin_min(Xsp, Ysp, metric="manhattan") assert_array_almost_equal(idxsp, expected_idx) assert_array_almost_equal(valssp, expected_vals) # Non-euclidean Scipy distance (callable) idx, vals = pairwise_distances_argmin_min(X, Y, metric=minkowski, metric_kwargs={"p": 2}) assert_array_almost_equal(idx, expected_idx) assert_array_almost_equal(vals, expected_vals) # Non-euclidean Scipy distance (string) idx, vals = pairwise_distances_argmin_min(X, Y, metric="minkowski", metric_kwargs={"p": 2}) assert_array_almost_equal(idx, expected_idx) assert_array_almost_equal(vals, expected_vals) # Compare with naive implementation rng = np.random.RandomState(0) X = rng.randn(97, 149) Y = rng.randn(111, 149) dist = pairwise_distances(X, Y, metric="manhattan") dist_orig_ind = dist.argmin(axis=0) dist_orig_val = dist[dist_orig_ind, range(len(dist_orig_ind))] dist_chunked_ind, dist_chunked_val = pairwise_distances_argmin_min( X, Y, axis=0, metric="manhattan") np.testing.assert_almost_equal(dist_orig_ind, dist_chunked_ind, decimal=7) np.testing.assert_almost_equal(dist_orig_val, dist_chunked_val, decimal=7) # Test batch_size deprecation warning assert_warns_message(DeprecationWarning, "version 0.22", pairwise_distances_argmin_min, X, Y, batch_size=500, metric='euclidean')
def deviceid_app(): deviceid_packages=pd.read_csv(file_path+'deviceid_packages.csv') def app_list(text): app_list=text.split('|') # print (app_list) return app_list deviceid_packages['add_list']=deviceid_packages['add_id_list'].apply(lambda line:app_list(line)).tolist() # 统计训练集中有多少不同的用户的events unique_deviceid = set(deviceid_packages['device_id'].values.tolist()) unique_app = set(deviceid_packages['add_list'].values.tolist()) n_unique_deviceid = len(unique_deviceid) n_unique_app = len(unique_app) print("number of uniqueUsers :%d" % n_unique_deviceid) print("number of uniqueEvents :%d" % n_unique_app) #用户关系矩阵表,可用于后续LFM/SVD++处理的输入 #这是一个稀疏矩阵,记录用户对活动感兴趣 dev_app_Scores = ss.dok_matrix((n_unique_deviceid, n_unique_app)) dev_Index = dict() app_Index = dict() #重新编码用户索引字典 for i, u in enumerate(unique_deviceid): dev_Index[u] = i #重新编码活动索引字典 for i, e in enumerate(unique_app): app_Index[e] = i #统计每个用户参加的活动 / 每个活动参加的用户 appFordev = defaultdict(set) devForapp = defaultdict(set) n_records = 0 train_dict=deviceid_packages.loc[:,['device_id','add_list']].to_dict(orient='records') for line in train_dict: device_id = line.get('device_id','') app_list = line.get('add_list','') for app in app_list: i = dev_Index[device_id] #用户 j = app_Index[app_list] #活动 appFordev[i].add(j) #该用户参加了这个活动 devForapp[j].add(i) #该活动被用户参加 dev_app_Scores[i, j] = 1 ##统计每个用户参加的活动,后续用于将用户朋友参加的活动影响到用户 pk.dump(devForapp, open("devForapp.pkl", 'wb')) ##统计活动参加的用户 pk.dump(appFordev, open("appFordev.pkl", 'wb')) #保存用户-活动关系矩阵R,以备后用 sio.mmwrite("dev_app_Scores", dev_app_Scores) #保存用户索引表 pk.dump(dev_Index, open("dev_Index.pkl", 'wb')) #保存活动索引表 pk.dump(app_Index, open("app_Index.pkl", 'wb')) # 为了防止不必要的计算,我们找出来所有关联的用户 或者 关联的event # 所谓的关联用户,指的是至少在同一个event上有行为的用户pair # 关联的event指的是至少同一个user有行为的event pair unique_dev_Pairs = set() unique_app_Pairs = set() for deviceid in unique_deviceid: i = dev_Index[deviceid] dev_id = devForapp[i] if len(dev_id) > 2: unique_dev_Pairs.update(itertools.combinations(dev_id, 2)) for user in n_unique_app: u = app_Index[user] apps = appFordev[u] if len(apps) > 2: unique_app_Pairs.update(itertools.combinations(apps, 2)) #保存用户-事件关系对索引表 pk.dump(unique_dev_Pairs, open("unique_dev_Pairs.pkl", 'wb')) pk.dump(unique_app_Pairs, open("unique_app_Pairs.pkl", 'wb'))
print 'done with round 1' sys.stdout.flush() if not fix_vocab: vocab = [w for w in vocab if vocab[w] > 40] inv_vocab = dict(zip(vocab, xrange(len(vocab)))) else: vocab, inv_vocab, _, = pickle.load(file('vocab.pk')) #for pat in pool.imap_unordered(realPatient, real_patient_generator(src=xml_src, max_patients=max_patients), chunksize=100): for n, pat in enumerate( real_patient_generator(src=xml_src, max_patients=max_patients)): pat = realPatient(pat) txt = set(pat['Text'].split('|')) m = sparse.dok_matrix((1, len(vocab))) for w in txt: if w in inv_vocab: m[0, inv_vocab[w]] = 1 pat['sparse_X'] = m index = pat['index'] if n % 100 == 0: print n sys.stdout.flush() visitShelf[index] = pat print 'done with round 2' sys.stdout.flush() visitShelf.close()
def construct_linear_system(self): """ construct the sparse matrix more algorithms are available if the matrix is symmetric, which requires evaluating the Dirichlet BCs rather than putting them in the matrix """ N = self.grid.Ncells() Nbc = len(self.dirichlet_bcs) self.Ncalc = Ncalc = N - Nbc # map cells to forced values dirichlet = dict([(c, v) for c, v, xy in self.dirichlet_bcs]) self.is_calc_c = is_calc_c = np.ones(N, np.bool8) for c, v, xy in self.dirichlet_bcs: is_calc_c[c] = False # is_calc_c[self.c_mask] = False # c_map is indexed by real cell indices, and returns the matrix index c_map = self.c_map = np.zeros(N, np.int32) self.c_map[is_calc_c] = np.arange(Ncalc) dzc = self.dzc dzf = self.dzf area_c = self.area_c meth = 'coo' # 'dok' if meth is 'dok': A = sparse.dok_matrix((Ncalc, Ncalc), np.float64) else: # construct the matrix from a sequence of indices and values ij = [] values = [] # successive value for the same i.j will be summed b = np.zeros(Ncalc, np.float64) flux_per_gradient_j = -self.K_j * self.l_j * dzf / self.d_j * self.dt self.grid.edge_to_cells() # makes sure that edges['cells'] exists. for j in range(self.grid.Nedges()): if self.grid.edges['cells'][j, 1] < 0: continue # boundary edge else: e = self.grid.edges[j] flux_per_gradient = flux_per_gradient_j[j] # this is the desired operation: # Cdiff[ic1] -= flux_per_gradient / (An[ic1]*dzc) * (C[ic2] - C[ic1]) # Cdiff[ic2] += flux_per_gradient / (An[ic2]*dzc) * (C[ic2] - C[ic1]) # Where Cdiff is row, C is col ic1, ic2 = e['cells'] if is_calc_c[ic1] and is_calc_c[ic2]: mic2 = c_map[ic2] mic1 = c_map[ic1] v1 = flux_per_gradient / (area_c[ic1] * dzc[ic1]) v2 = flux_per_gradient / (area_c[ic2] * dzc[ic2]) if meth is 'dok': A[mic1, mic2] -= v1 A[mic1, mic1] += v1 A[mic2, mic2] += v2 A[mic2, mic1] -= v2 else: ij.append((mic1, mic2)) values.append(-v1) ij.append((mic1, mic1)) values.append(v1) ij.append((mic2, mic2)) values.append(v1) ij.append((mic2, mic1)) values.append(-v1) elif not (is_calc_c[ic1] or is_calc_c[ic2]): # both are dirichlet, so nothing to do pass elif not is_calc_c[ic2]: mic1 = c_map[ic1] v = flux_per_gradient / (self.area_c[ic1] * dzc[ic1]) if meth is 'dok': A[mic1, mic1] += v else: ij.append((mic1, mic1)) values.append(v) # roughly # A[1,1]*x[1] + A[1,2]*x[2] + ... = b[1] # but we already know x[2], # A[1,1]*x[1] + ... = b[1] - A[1,2]*x[2] # so flip the sign, multiply by known dirichlet value, and # add to the RHS b[mic1] += flux_per_gradient / (area_c[ic1] * dzc[ic1]) * dirichlet[ic2] else: # not is_calc_c[c1] mic2 = c_map[ic2] # A[mic2,mic2] += flux_per_gradient / (area_c[ic2]*dzc[ic2]) # A[mic2,mic1] -= flux_per_gradient / (area_c[ic2]*dzc[ic2]) # A[mic2,mic2]*x[2] + A[mic2,mic1]*x[1] = b[2] # ... # A[mic2,mic2]*x[2] - flux_per_gradient / (area_c[ic2]*dzc[ic2])*x[1] = b[2] # ... # A[mic2,mic2]*x[2] = b[2] + flux_per_gradient / (area_c[ic2]*dzc[ic2])*x[1] v = flux_per_gradient / (area_c[ic2] * dzc[ic2]) if meth is 'dok': A[mic2, mic2] += v else: ij.append((mic2, mic2)) values.append(v) b[mic2] += flux_per_gradient / (area_c[ic2] * dzc[ic2]) * dirichlet[ic1] if self.alpha is not 0: for c in range(N): if self.is_calc_c[c]: mic = self.c_map[c] v = self.alpha[c] * self.dt if meth is 'dok': A[mic, mic] -= v else: ij.append((mic, mic)) values.append(-v) # Flux boundary conditions: for ic, value, xy in self.neumann_bcs: mic = c_map[ic] # make mass/time into concentration/step # arrived at minus sign by trial and error. b[mic] -= value / (area_c[ic2] * dzc[ic2]) * self.dt if meth is 'dok': self.A = sparse.coo_matrix(A) else: ijs = np.array(ij, dtype=np.int32) data = np.array(values, dtype=np.float64) A = sparse.coo_matrix((data, (ijs[:, 0], ijs[:, 1])), shape=(Ncalc, Ncalc)) self.A = A # report scale to get a sense of whether dt is too large Ascale = A.diagonal().min() log.debug("Ascale is %s" % Ascale) self.b = b
proportion_lower = 1. - proj % 1 A[x + y * shape[0], offset + lower_sensor_pixel] += proportion_lower if upper_sensor_pixel != lower_sensor_pixel: proportion_upper = 1. - proportion_lower A[x + y * shape[0], offset + upper_sensor_pixel] += proportion_upper return A if __name__ == "__main__": image_y = np.load("hs_tomography_2/y_77_.npy") image_alphas = np.load("hs_tomography_2/y_77_alphas.npy").astype("float") image_flattened = image_y.flatten() c = np.array([-77, -33, -12, -3, 21, 42, 50, 86]).astype("float") a = makeA_jens((77, 77), image_alphas).transpose() import matplotlib.pyplot as plt plt.imshow(a.transpose(), cmap="gray", interpolation="none") plt.close() # plt.show() a_sparse = dok_matrix(a) res = lsqr(a_sparse, image_y) res_new = res.reshape((77, 77)) plt.imshow(res_new, cmap="gray") plt.show() IPython.embed()
def build(um_dict, output_filename, latent_factors, wnmf_iterations, user_id_dict, business_id_dict): print('id dicts loading') with open(user_id_dict, 'r') as f: user_id_dict = json.load(f) with open(business_id_dict, 'r') as f: business_id_dict = json.load(f) print('loading um') um_dok = sps.dok_matrix((len(user_id_dict), len(business_id_dict)), dtype=np.int8) for key_i, value_i in um_dict.items(): for key_j, value_j in value_i.items(): um_dok[user_id_dict[key_i], business_id_dict[key_j]] = value_j a = um_dok.tocsr() missing_u, missing_b = find_missing(set(user_id_dict.keys()), set(business_id_dict.keys()), um_dict) del um_dict u = np.random.random(size=(len(user_id_dict), latent_factors)) v = np.random.random(size=(latent_factors, len(business_id_dict))) for i in missing_u: u[user_id_dict[i], :] = 0 for i in missing_b: v[:, business_id_dict[i]] = 0 np.set_printoptions(threshold=np.inf) print(u) #get nonzero rows, columns x, y = a.nonzero() #copy um_csr into new sparse matrix w = a.copy() for i, j in zip(x, y): w[i, j] = 1 i = 0 prev_norm = 0 curr_norm = 0 change = 999999 print('starting wnmf loop') while (i < wnmf_iterations and change > 2): print('iteration ' + str(i)) vt = v.transpose() u_num = a * vt u_denom = w.multiply(np.matmul(u, v)) * vt for ui in range(np.size(u, 0)): for uj in range(np.size(u, 1)): # print("Old u " + str(ui) + ', ' + str(uj) + ': ' + str(u[ui, uj])) #u_denom = w[ui, :].multiply(u[ui, :] * v) * vt[:, uj] u[ui, uj] = u[ui, uj] * (u_num[ui, uj] / (u_denom[ui, uj] + 0.0000001)) # print("New u " + str(ui) + ', ' + str(uj) + ': ' + str(u[ui, uj])) ut = u.transpose() v_num = ut * a v_denom = ut * w.multiply(np.matmul(u, v)) for vi in range(np.size(v, 0)): for vj in range(np.size(v, 1)): # print("Old v " + str(vi) + ', ' + str(vj) + ': ' + str(v[vi, vj])) #v_denom = ut * w[:, vj].multiply(u * v[:, vj]) v[vi, vj] = v[vi, vj] * (v_num[vi, vj] / (v_denom[vi, vj] + 0.0000001)) # print("New v" + str(vi) + ', ' + str(vj) + ': ' + str(v[vi, vj])) # print('U:') # print(u) # print('V:') # print(v) i += 1 # This takes two matrices, multiplies by weight, subtracts them, and then finds its norm2 uv = np.matmul(u, v) uv = w.multiply(uv) norm = a - uv norm = norm.power(2) norm = norm.sum() norm = math.sqrt(norm) prev_norm = curr_norm curr_norm = norm change = math.fabs(curr_norm - prev_norm) print(change) u = pd.DataFrame(u) v = pd.DataFrame(v) u.to_csv((output_filename + 'u.csv')) v.to_csv((output_filename + 'v.csv')) log_data = str(i) + ',' + str(change) return u, v, log_data
tags = package_keywords_dict.get(each_dep, []) current_tags.extend(tags) current_tags_index_wise = [ map_keywords_dict[keyword] for keyword in current_tags ] rating_matrix_dict[map_packages_dict[package_name]] = list( set(current_tags_index_wise)) print("Delete apckage/dep/key dict") del (package_dependencies_dict) del (package_keywords_dict) print("Generate Sparse Matrix using the Package_Aggregated_Tags_Dict") # Generate Sparse Matrix using the Package_Aggregated_Tags_Dict sparse_mat = sp.dok_matrix((len(map_packages_dict), len(map_keywords_dict)), dtype=np.int64) for package_id, tag_ids in rating_matrix_dict.items(): sparse_mat[package_id, tag_ids] = 1 print("Generate Sparse Coordinate matrix") # Generate the Sparse Cooridnate Matrix sparse_coo = sparse_mat.tocoo() # print("Delete sparse mat temp") # del(sparse_mat) indices = np.mat([sparse_coo.row, sparse_coo.col]).transpose() print("Genearte the Sparse Tensor using Sparse Coo. Matrix") # Genearte the Sparse Tensor using Sparse Coo. Matrix rating_matrix = tf.SparseTensor(indices, sparse_coo.data, sparse_coo.shape) print("Size of content matrix = {}".format(rating_matrix.get_shape()))
def getAMatrix(self): '''A is a sparse matrix consisting of response functions. It has dimensions #Healpixels by #QSO pairs.''' A = dok_matrix((self.Np, self.Nd), dtype=np.float32) #sigma=np.sqrt(4*np.pi/(12*self.Nside**2)) sigma = (np.sqrt(4 * np.pi / (12 * self.Nside**2))) / self.cutoff ## SY 8/3/19 print("sigma=", sigma / np.pi * 180, 'deg') ## we loop over pixels for i, hpix in enumerate(self.pixid): ## first find nearby healpixels theta, phi = self.pixtheta[i], self.pixphi[i] mvec = (sin(theta) * cos(phi), sin(theta) * sin(phi), cos(theta)) neipixels = hp.query_disc(self.Nside, mvec, self.sradius) assert (hpix in neipixels) B = np.zeros(self.Nd, type(False)) for neipix in neipixels: B = B | (neipix == self.d.hi1) | (neipix == self.d.hi2) s = np.where(B)[0] ### so at this point we have for map pixel i, the list of data ### pixels that are close enough to matter. ### we need to loop over them and get the relevant matrix elements qtheta1 = self.q.theta[self.d.i1[s]] qphi1 = self.q.phi[self.d.i1[s]] qtheta2 = self.q.theta[self.d.i2[s]] qphi2 = self.q.phi[self.d.i2[s]] ## we are going to employ 3 vectors as a foolproof method dx1 = sin(qtheta1) * cos(qphi1) - mvec[0] dy1 = sin(qtheta1) * sin(qphi1) - mvec[1] dz1 = cos(qtheta1) - mvec[2] dr1 = np.sqrt(dx1**2 + dy1**2 + dz1**2) ## we have response1 in direction dr1 (normalised by pixel area) response1 = (1 / dr1) * (1 - exp(-dr1**2 / (2 * sigma**2))) ## SY ## ditto for q2 dx2 = sin(qtheta2) * cos(qphi2) - mvec[0] dy2 = sin(qtheta2) * sin(qphi2) - mvec[1] dz2 = cos(qtheta2) - mvec[2] dr2 = np.sqrt(dx2**2 + dy2**2 + dz2**2) ## we have response2 in direction dr2 response2 = (1 / dr2) * (1 - exp(-dr2**2 / (2 * sigma**2))) ## SY ## now we take the difference dxr = dx1 * response1 - dx2 * response2 dyr = dy1 * response1 - dy2 * response2 dzr = dz1 * response1 - dz2 * response2 ## the difference in vector dx = dx1 - dx2 dy = dy1 - dy2 dz = dz1 - dz2 ## total response is movevement/distance totresponse = (dxr * dx + dyr * dy + dzr * dz) / (dx * dx + dy * dy + dz * dz) totresponse *= np.sqrt( self.d.weight[s]) ## we downweigh response by weight A[i, s] = totresponse if (i % 100 == 0): print(i) print("Transposing matrix.") A = A.transpose() print("A.tocsr") A = A.tocsr() return A
def set_topology(self): """ Use functions of unstructured grid class for remaining topology """ self.nedges = self.grd.Nedges() self.ncells = self.grd.Ncells() self.nnodes = self.grd.Nnodes() self.grd.update_cell_edges() self.grd.update_cell_nodes() self.grd.edge_to_cells() self.grd.cells_area() self.grd.cells['_center'] = self.grd.cells_center() self.grd.edges['mark'] = 0 # default is internal cell self.extern = np.where(np.min(self.grd.edges['cells'], axis=1) < 0)[0] self.grd.edges['mark'][self.extern] = 1 # boundary edge self.intern = np.where(self.grd.edges['mark'] == 0)[0] self.nedges_intern = len(self.intern) # number of internal edges self.exy = self.grd.edges_center() self.en = self.grd.edges_normals() self.len = self.grd.edges_length() # Reflect edge neighbors at boundaries ii = self.grd.edge_to_cells().copy() nc1 = ii[:, 0] nc2 = ii[:, 1] ii[:, 0] = np.where(ii[:, 0] >= 0, ii[:, 0], ii[:, 1]) ii[:, 1] = np.where(ii[:, 1] >= 0, ii[:, 1], ii[:, 0]) self.edge_to_cells_reflect = ii # number of valid sides for each cell self.ncsides = np.asarray( [sum(jj >= 0) for jj in self.grd.cells['edges']]) # Used to be in prepare_to_run(). # But anything that depends only on the grid should be here self.set_edge_cell_spacings() # sets self.dc, dist, alpha self.sil = self.get_sign_array() # cell center values self.ei = np.zeros(self.ncells, np.float64) # water surface elevation, cells self.vi = np.zeros(self.ncells, np.float64) # cell volumes self.pi = np.zeros(self.ncells, np.float64) # cell wetted areas # edge values self.uj = np.zeros(self.nedges, np.float64) # normal velocity at side self.qj = np.zeros(self.nedges, np.float64) # normal velocity*h at side self.aj = np.zeros(self.nedges, np.float64) # edge wet areas self.cf = np.zeros(self.nedges, np.float64) # edge friction coefs self.zj = np.zeros(self.nedges, np.float64) # edge depth -- to replace w/subgrid self.cfterm = np.zeros( self.nedges, np.float64) # edge friction coefs - term for matrices # Matrix self.Ai = sparse.dok_matrix((self.ncells, self.ncells), np.float64) self.bi = np.zeros(self.ncells, np.float64) self.Ao = sparse.dok_matrix((self.ncells, self.ncells), np.float64) # outer iterations self.bo = np.zeros(self.ncells, np.float64) self.x0 = np.zeros(self.ncells, np.float64)
None, reduce_func=_reduce_func, working_memory=2**-16) assert isinstance(S_chunks, GeneratorType) S_chunks = list(S_chunks) assert len(S_chunks) > 1 # atol is for diagonal where S is explicitly zeroed on the diagonal assert_allclose(np.vstack(S_chunks), S, atol=1e-7) @pytest.mark.parametrize('good_reduce', [ lambda D, start: list(D), lambda D, start: np.array(D), lambda D, start: csr_matrix(D), lambda D, start: (list(D), list(D)), lambda D, start: (dok_matrix(D), np.array(D), list(D)), ]) def test_pairwise_distances_chunked_reduce_valid(good_reduce): X = np.arange(10).reshape(-1, 1) S_chunks = pairwise_distances_chunked(X, None, reduce_func=good_reduce, working_memory=64) next(S_chunks) @pytest.mark.parametrize(('bad_reduce', 'err_type', 'message'), [ (lambda D, s: np.concatenate([D, D[-1:]]), ValueError, r'length 11\..* input: 10\.'), (lambda D, s: (D, np.concatenate([D, D[-1:]])), ValueError, r'length \(10, 11\)\..* input: 10\.'),
def RP_AddExonRemovePromoter(peaks_info, genes_info_full, genes_info_tss, decay): """Multiple processing function to calculate regulation potential.""" Sg = lambda x: 2**(-x) checkInclude = lambda x, y: all([x >= y[0], x <= y[1]]) gene_distance = 15 * decay genes_peaks_score_array = sp_sparse.dok_matrix( (len(genes_info_full), len(peaks_info)), dtype=np.float64) peaks_info_inbody = [] peaks_info_outbody = [] w = genes_info_full + peaks_info A = {} w.sort() # print(w[:100]) for elem in w: if elem[-3] == 1: A[elem[-1]] = elem else: dlist = [] for gene_name in list(A.keys()): g = A[gene_name] ### NOTE: main change here ### if peak center in the gene area if all([g[0] == elem[0], elem[1] >= g[1], elem[1] <= g[2]]): ### if peak center in the exons if any( list( map(checkInclude, [elem[1]] * len(g[5]), list(g[5])))): genes_peaks_score_array[gene_name, elem[-1]] = 1.0 / g[-4] peaks_info_inbody.append(elem) ### if peak cencer in the promoter elif checkInclude(elem[1], g[4]): tmp_distance = abs(elem[1] - g[3]) genes_peaks_score_array[gene_name, elem[-1]] = Sg( tmp_distance / decay) peaks_info_inbody.append(elem) ### intron regions else: continue else: dlist.append(gene_name) for gene_name in dlist: del A[gene_name] ### remove genes in promoters and exons peaks_info_set = [tuple(i) for i in peaks_info] peaks_info_inbody_set = [tuple(i) for i in peaks_info_inbody] peaks_info_outbody_set = list( set(peaks_info_set) - set(peaks_info_inbody_set)) peaks_info_outbody = [list(i) for i in peaks_info_outbody_set] print("peaks number: ", len(peaks_info_set)) print("peaks number in gene promoters and exons: ", len(set(peaks_info_inbody_set))) print("peaks number out gene promoters and exons:", len(peaks_info_outbody_set)) w = genes_info_tss + peaks_info_outbody A = {} w.sort() for elem in w: if elem[-3] == 1: A[elem[-1]] = elem else: dlist = [] for gene_name in list(A.keys()): g = A[gene_name] tmp_distance = elem[1] - g[1] if all([g[0] == elem[0], tmp_distance <= gene_distance]): genes_peaks_score_array[gene_name, elem[-1]] = Sg( tmp_distance / decay) else: dlist.append(gene_name) for gene_name in dlist: del A[gene_name] w.reverse() for elem in w: if elem[-3] == 1: A[elem[-1]] = elem else: dlist = [] for gene_name in list(A.keys()): g = A[gene_name] tmp_distance = g[1] - elem[1] if all([g[0] == elem[0], tmp_distance <= gene_distance]): genes_peaks_score_array[gene_name, elem[-1]] = Sg( tmp_distance / decay) else: dlist.append(gene_name) for gene_name in dlist: del A[gene_name] return (genes_peaks_score_array)
def solve_via_data(self, data, warm_start, verbose, solver_opts, solver_cache=None): import cplex c = data[s.C] b = data[s.B] A = dok_matrix(data[s.A]) # Save the dok_matrix. data[s.A] = A dims = dims_to_solver_dict(data[s.DIMS]) n = c.shape[0] model = cplex.Cplex() variables = [] # cpx_constrs will contain CpxConstr namedtuples (see above). cpx_constrs = [] vtype = [] if data[s.BOOL_IDX] or data[s.INT_IDX]: for i in range(n): # Set variable type. if i in data[s.BOOL_IDX]: vtype.append('B') elif i in data[s.INT_IDX]: vtype.append('I') else: vtype.append('C') else: # If we specify types (even with 'C'), then the problem will # be interpreted as a MIP. Leaving vtype as an empty list # here, will ensure that the problem type remains an LP. pass # Add the variables in a batch variables = list(model.variables.add( obj=[c[i] for i in range(n)], lb=[-cplex.infinity]*n, # default LB is 0 ub=[cplex.infinity]*n, types="".join(vtype), names=["x_%d" % i for i in range(n)])) # Add equality constraints cpx_constrs += [_CpxConstr(_LIN, x) for x in self.add_model_lin_constr( model, variables, range(dims[s.EQ_DIM]), 'E', A, b)] # Add inequality (<=) constraints leq_start = dims[s.EQ_DIM] leq_end = dims[s.EQ_DIM] + dims[s.LEQ_DIM] cpx_constrs += [_CpxConstr(_LIN, x) for x in self.add_model_lin_constr( model, variables, range(leq_start, leq_end), 'L', A, b)] # Add SOC constraints soc_start = leq_end for constr_len in dims[s.SOC_DIM]: soc_end = soc_start + constr_len soc_constr, new_leq, new_vars = self.add_model_soc_constr( model, variables, range(soc_start, soc_end), A, b) cpx_constrs.append(_CpxConstr(_QUAD, soc_constr)) cpx_constrs += [_CpxConstr(_LIN, x) for x in new_leq] variables += new_vars soc_start += constr_len # Set verbosity if not verbose: hide_solver_output(model) # For CVXPY, we set the qcpduals parameter here, but the user can # easily override it via the "cplex_params" solver option (see # set_parameters function). model.parameters.preprocessing.qcpduals.set( model.parameters.preprocessing.qcpduals.values.force) # Set parameters set_parameters(model, solver_opts) # Solve problem solution = {"model": model} try: start_time = model.get_time() model.solve() solution[s.SOLVE_TIME] = model.get_time() - start_time except Exception: pass return solution
def MottonenStatePreparation(state_vector, wires): r""" Prepares an arbitrary state on the given wires using a decomposition into gates developed by Möttönen et al. (Quantum Info. Comput., 2005). The state is prepared via a sequence of "uniformly controlled rotations". A uniformly controlled rotation on a target qubit is composed from all possible controlled rotations on said qubit and can be used to address individual elements of the state vector. In the work of Mottonen et al., the inverse of their state preparation is constructed by first equalizing the phases of the state vector via uniformly controlled Z rotations and then rotating the now real state vector into the direction of the state :math:`|0\rangle` via uniformly controlled Y rotations. This code is adapted from code written by Carsten Blank for PennyLane-Qiskit. Args: state_vector (array): Input array of shape ``(2^N,)``, where N is the number of wires the state preparation acts on. ``N`` must be smaller or equal to the total number of wires. wires (Sequence[int]): sequence of qubit indices that the template acts on Raises: ValueError: if inputs do not have the correct format """ ############### # Input checks wires, n_wires = _check_wires(wires) msg = "The state vector must be of size {}; got {}.".format( 2**n_wires, len(state_vector)) _check_shape(state_vector, (2**n_wires, ), msg=msg) # check if state_vector is normalized if isinstance(state_vector[0], Variable): state_vector_values = [s.val for s in state_vector] norm = np.sum(np.abs(state_vector_values)**2) else: norm = np.sum(np.abs(state_vector)**2) if not np.isclose(norm, 1.0, atol=1e-3): raise ValueError( "State vector probabilities have to sum up to 1.0, got {}".format( norm)) ####################### # Change ordering of indices, original code was for IBM machines state_vector = np.array(state_vector).reshape( [2] * n_wires).T.flatten()[:, np.newaxis] state_vector = sparse.dok_matrix(state_vector) wires = np.array(wires) a = sparse.dok_matrix(state_vector.shape) omega = sparse.dok_matrix(state_vector.shape) for (i, j), v in state_vector.items(): if isinstance(v, Variable): a[i, j] = np.absolute(v.val) omega[i, j] = np.angle(v.val) else: a[i, j] = np.absolute(v) omega[i, j] = np.angle(v) # This code is directly applying the inverse of Carsten Blank's # code to avoid inverting at the end # Apply y rotations for k in range(n_wires, 0, -1): alpha_y_k = _get_alpha_y(a, n_wires, k) # type: sparse.dok_matrix control = wires[k:] target = wires[k - 1] _uniform_rotation_y_dagger(alpha_y_k, control, target) # Apply z rotations for k in range(n_wires, 0, -1): alpha_z_k = _get_alpha_z(omega, n_wires, k) control = wires[k:] target = wires[k - 1] if len(alpha_z_k) > 0: _uniform_rotation_z_dagger(alpha_z_k, control, target)