def print_network(self, time): s = "TIME {0:.5f}\n".format(time) adj_matrix = sparse_matrix((self.node_number, self.node_number)) adj_matrix2 = sparse_matrix((self.node_number, self.node_number)) for n in self.network.nodes: adj_list = self.network.adj[n] # for nbr in adj_list: # mlogp = adj_list[nbr]['mlogp'] # adj_matrix[n, nbr] += mlogp # if n < nbr: # adj_matrix[n, nbr] += mlogp # else: # adj_matrix[nbr, n] += mlogp # s += "{0:d}; {1:d}; {2:.5f}\n".format(n,nbr,mlogp) # print dynamic network if not self.event_network is None: adj_list = self.event_network.adj[n] for nbr in adj_list: mlogp = adj_list[nbr]['mlogp'] adj_matrix[n, nbr] += mlogp # if n < nbr: # adj_matrix[n, nbr] += mlogp # else: # adj_matrix[nbr, n] += mlogp # s += "{0:d}; {1:d}; {2:.5f}\n".format(n,nbr,mlogp) row, col = adj_matrix.nonzero() for i, j in zip(row, col): s += "{0:d}; {1:d}; {2:.5f}\n".format(i, j, adj_matrix[i, j]) return s
def __rotate__(self): # First shrink the buffer [Vt, s] = simIter(self.buffer, self.ell) # insert the shrunk part into the sketch if len(s) >= self.ell: sShrunk = sqrt(s[:self.ell]**2 - s[self.ell - 1]**2) self._sketch[self.ell:, :] = dot(diag(sShrunk), Vt[:self.ell, :]) else: self._sketch[self.ell:self.ell + len(s), :] = dot( diag(s), Vt[:len(s), :]) # resetting the buffer matrix del self.buffer self.buffer = sparse_matrix((self.buffer_ell, self.d)) self.buffer_nnz = 0 self.buffer_nextZeroRow = 0 # A dense shrink of the sketch [_, s, Vt] = svd(self._sketch, full_matrices=False) if len(s) >= self.ell: sShrunk = sqrt(s[:self.ell]**2 - s[self.ell - 1]**2) self._sketch[:self.ell, :] = dot(diag(sShrunk), Vt[:self.ell, :]) self._sketch[self.ell:, :] = 0 else: self._sketch[:len(s), :] = dot(diag(s), Vt[:len(s), :]) self._sketch[len(s):, :] = 0
def create_user_item_matrix(ratings, user_key="user", item_key="item"): n = len(set(ratings[user_key])) d = len(set(ratings[item_key])) user_mapper = dict(zip(np.unique(ratings[user_key]), list(range(n)))) item_mapper = dict(zip(np.unique(ratings[item_key]), list(range(d)))) user_inverse_mapper = dict( zip(list(range(n)), np.unique(ratings[user_key]))) item_inverse_mapper = dict( zip(list(range(d)), np.unique(ratings[item_key]))) #Number of user_ind = [user_mapper[i] for i in ratings[user_key]] item_ind = [item_mapper[i] for i in ratings[item_key]] """ plt.hist(ratings["rating"]) plt.xlabel("Rating") plt.ylabel("# of ratings") plt.show() """ X = sparse_matrix((ratings["rating"], (user_ind, item_ind)), shape=(n, d)) return X, user_mapper, item_mapper, user_inverse_mapper, item_inverse_mapper, user_ind, item_ind
def create_recipe_ingredients_matrix(data,recipe_id="id",ingredients_key="ingredients"): recipe_ids = list(recipe[recipe_id] for recipe in data) ingredients = list(set([i for recipe in data for i in recipe[ingredients_key]])) cuisines = list(set([recipe["cuisine"] for recipe in data])) n = len(data) d = len(ingredients) k = len(cuisines) print("Number of recipes:",n) print("Number of ingredients:",d) print("Different kinds of cuisines:",k) # {'recipe-name': index} # {'ingredient-name': index} recipe_mapper = dict(zip(recipe_ids, list(range(n)))) ingredient_mapper = dict(zip(ingredients, list(range(d)))) cuisine_mapper = dict(zip(cuisines, list(range(k)))) # {index: 'recipe-name'} # {index: 'ingredient-name'} recipe_inverse_mapper = dict(zip(list(range(n)), recipe_ids)) ingredient_inverse_mapper = dict(zip(list(range(d)), ingredients)) cuisines_inverse_mapper = dict(zip(range(k), cuisines)) X = sparse_matrix([[int(ingredient_inverse_mapper[i] in recipe[ingredients_key]) for i in range(d)] for recipe in data]) y = np.array([cuisine_mapper[recipe["cuisine"]] for recipe in data]) sparse.save_npz('../data/train.npz', X) with open('../data/ingredients.csv','w') as outfile: writer = csv.DictWriter(outfile, ingredient_mapper.keys()) writer.writeheader() writer.writerow(ingredient_mapper)
def __rotate__(self): # First shrink the buffer [Vt, s] = simIter(self.buffer, self.ell) # insert the shrunk part into the sketch if len(s) >= self.ell: sShrunk = sqrt(s[:self.ell]**2 - s[self.ell-1]**2) self._sketch[self.ell:,:] = dot(diag(sShrunk), Vt[:self.ell,:]) else: self._sketch[self.ell : self.ell+len(s),:] = dot(diag(s), Vt[:len(s),:]) # resetting the buffer matrix del self.buffer self.buffer = sparse_matrix( (self.buffer_ell, self.d) ) self.buffer_nnz = 0 self.buffer_nextZeroRow = 0 # A dense shrink of the sketch [_,s,Vt] = svd(self._sketch, full_matrices = False) if len(s) >= self.ell: sShrunk = sqrt(s[:self.ell]**2 - s[self.ell-1]**2) self._sketch[:self.ell,:] = dot(diag(sShrunk), Vt[:self.ell,:]) self._sketch[self.ell:,:] = 0 else: self._sketch[:len(s),:] = dot(diag(s), Vt[:len(s),:]) self._sketch[len(s):,:] = 0
def save_fm(X, file_path=None, sparse=False): if file_path is None: file_path = "%s/models/X.pkl" % N7_DATA_DIR else: file_path = "%s/models/%s" % (N7_DATA_DIR, file_path) if sparse: logging.info("CONVERTING TO SPARSE REPRESENTATION") X = sparse_matrix(X, dtype=X.dtype) logging.info("SAVING FEATURE MATRIX %r -> %s" % (X.shape, file_path)) joblib.dump(X, file_path, compress=9)
def __init__(self, d, ell): self.class_name = 'SparseSketcher' self.d = d self.ell = ell self._sketch = zeros((2 * self.ell, self.d)) self.sketch_nextZeroRow = 0 self.buffer_ell = self.d self.buffer = sparse_matrix((self.buffer_ell, self.d)) self.buffer_nnz = 0 self.buffer_nextZeroRow = 0 self.buffer_nnz_threshold = 2 * self.ell * self.d
def __init__(self, d, ell): self.class_name = 'SparseSketcher' self.d = d self.ell = ell self._sketch = zeros( (2*self.ell, self.d) ) self.sketch_nextZeroRow = 0 self.buffer_ell = self.d self.buffer = sparse_matrix( (self.buffer_ell, self.d) ) self.buffer_nnz = 0 self.buffer_nextZeroRow = 0 self.buffer_nnz_threshold = 2 * self.ell * self.d
def create_X(ratings,n,d,user_key="user",item_key="item"): user_mapper = dict(zip(np.unique(ratings[user_key]), list(range(n)))) item_mapper = dict(zip(np.unique(ratings[item_key]), list(range(d)))) user_inverse_mapper = dict(zip(list(range(n)), np.unique(ratings[user_key]))) item_inverse_mapper = dict(zip(list(range(d)), np.unique(ratings[item_key]))) user_ind = [user_mapper[i] for i in ratings[user_key]] item_ind = [item_mapper[i] for i in ratings[item_key]] X = sparse_matrix((ratings["rating"], (user_ind, item_ind)), shape=(n,d)) return X, user_mapper, item_mapper, user_inverse_mapper, item_inverse_mapper, user_ind, item_ind
def UpdateJSimMatrix(self, IncludeUpdate=True): if IncludeUpdate: self.UpdateIntersectionMatrix(IncludeUpdate) IntersectionMatrix = np.array( self.IntersectionMatrix.toarray().astype(float)) self.GroupSizeMatrix = np.array( [self.GroupSizes] * len(self.GroupNames)) + np.array( [self.GroupSizes] * len(self.GroupNames)).T self.JSimMatrix = sparse_matrix( IntersectionMatrix / (self.GroupSizeMatrix - IntersectionMatrix)) self.JSimMatrix = triu(self.JSimMatrix, k=1)
def build_vertex_edge_adjacency_matrix(mesh: List[List[Any]]) -> sparse_matrix: vertex_mapping, edge_mapping, face_mapping = assign_element_indices(mesh) vertex_edge_adjacency_matrix = np.zeros((len(vertex_mapping.keys()), len(edge_mapping.keys()))) for v in vertex_mapping.keys(): for e in edge_mapping.keys(): if v[0] in e: vertex_edge_adjacency_matrix[vertex_mapping[v], edge_mapping[e]] = 1 else: vertex_edge_adjacency_matrix[vertex_mapping[v], edge_mapping[e]] = 0 sparse_adjacency_matrix = sparse_matrix(vertex_edge_adjacency_matrix) return sparse_adjacency_matrix
def build_edge_face_adjacency_matrix(mesh: List[List[Any]]) -> sparse_matrix: vertex_mapping, edge_mapping, face_mapping = assign_element_indices(mesh) edge_face_adjacency_matrix = np.zeros((len(edge_mapping.keys()), len(face_mapping.keys()))) for e in edge_mapping.keys(): for f in face_mapping.keys(): if e[0] in f and e[1] in f: edge_face_adjacency_matrix[edge_mapping[e], face_mapping[f]] = 1 else: edge_face_adjacency_matrix[edge_mapping[e], face_mapping[f]] = 0 sparse_adjacency_matrix = sparse_matrix(edge_face_adjacency_matrix) return sparse_adjacency_matrix
def create_user_item_matrix(self, user_key="user",item_key="item"): n = len(set(self.ratings[user_key])) d = len(set(self.ratings[item_key])) self.user_mapper = dict(zip(np.unique(self.ratings[user_key]), list(range(n)))) self.item_mapper = dict(zip(np.unique(self.ratings[item_key]), list(range(d)))) self.user_inverse_mapper = dict(zip(list(range(n)), np.unique(self.ratings[user_key]))) self.item_inverse_mapper = dict(zip(list(range(d)), np.unique(self.ratings[item_key]))) self.user_ind = [self.user_mapper[i] for i in self.ratings[user_key]] self.item_ind = [self.item_mapper[i] for i in self.ratings[item_key]] self.ratings_matrix = sparse_matrix((self.ratings["rating"]-3, (self.user_ind, self.item_ind)), shape=(n,d)) print("user-item matrix generated.")
def create_X(ratings, n, d, user_key="user", item_key="item"): """ Creates a sparse matrix using scipy.csr_matrix and mappers to relate indexes to items' id. Parameters: ----------- ratings: pd.DataFrame the ratings to be stored in the matrix; n: int the number of items d: int the number of users user_key: string the column in ratings that contains the users id item_key: string the column in ratings that contains the items id Returns: (X, user_mapper, item_mapper, user_inverse_mapper, item_inverse_mapper, user_ind, item_ind) -------- X: np.sparse the sparse matrix containing the ratings. user_mapper: dict stores the indexes of the users - the user_id is the key; item_mapper: dict stores the indexes of the items - the item_id is the key; user_inverse_mapper: dict stores the user id - the user index is the key; item_inverse_mapper: dict stores the item id - the item index is the key; user_ind: list indexes of the users (in the order they are in ratings); item_ind: list indexes of the items; """ user_mapper = dict(zip(np.unique(ratings[user_key]), list(range(d)))) item_mapper = dict(zip(np.unique(ratings[item_key]), list(range(n)))) user_inverse_mapper = dict( zip(list(range(d)), np.unique(ratings[user_key]))) item_inverse_mapper = dict( zip(list(range(n)), np.unique(ratings[item_key]))) user_ind = [user_mapper[i] for i in ratings[user_key]] item_ind = [item_mapper[i] for i in ratings[item_key]] X = sparse_matrix((ratings["Rating"], (item_ind, user_ind)), shape=(n, d)) return X, user_mapper, item_mapper, user_inverse_mapper, item_inverse_mapper, user_ind, item_ind
def create_user_item_matrix(ratings, user_key="user", item_key="item"): n = len(set(ratings[user_key])) d = len(set(ratings[item_key])) user_mapper = dict(zip(np.unique(ratings[user_key]), list(range(n)))) #zip(*iterables) creates dict with key unique userkey and unique value 1,2,3 assigned to that key # if list is more than the first array the end elements will not appear item_mapper = dict(zip(np.unique(ratings[item_key]), list(range(d)))) user_inverse_mapper = dict( zip(list(range(n)), np.unique(ratings[user_key]))) item_inverse_mapper = dict( zip(list(range(d)), np.unique(ratings[item_key]))) user_ind = [user_mapper[i] for i in ratings[user_key] ] #returns unique value in the list for that user id item_ind = [item_mapper[i] for i in ratings[item_key] ] #returns unique value in the list for that item id # so their intersection is the rating # to get total rating for an item, get sum of ratings for a column in a spare matrix X = sparse_matrix((ratings["rating"], (user_ind, item_ind)), shape=(n, d)) # where data, row_ind and col_ind satisfy the relationship a[row_ind[k], col_ind[k]] = data[k]. return X, user_mapper, item_mapper, user_inverse_mapper, item_inverse_mapper, user_ind, item_ind
def get_binary_frame(evs, size=(346, 260), ds=1): tr = sparse_matrix((2 * evs[:, 3] - 1, (evs[:, 1] // ds, evs[:, 2] // ds)), dtype=np.int8, shape=size) return tr.toarray()
def sbfemAssembly(coord, sdConn, sdSC, mat): """ Original name: SBFEMAssembly (p.88) Assembly of global stiffness and mass matrices. :param coord: coord[i,:] - coordinates of node i :param sdConn: sdConn{isd,:}(ie,:) - S-element connectivity. The nodes of line element ie in S-element isd. :param sdSC: sdSC(isd,:) - coordinates of scaling centre of S-element isd :param mat: material constants (elasticity matrix, mass density) :return: sdSln, K, M sdSln - solutions for S-element K - global stiffness matrix M - global mass matrix """ # Solution of subdomains # number of S-elements Nsd = len(sdConn) # store solutions for S-elements sdSln = [] if len(sdSC.shape) == 1: # add row dim sdSC = np.expand_dims(sdSC, axis=0) # loop over S-elements for isd in range(Nsd): # sdNode contains global nodal numbers of the nodes in an S-element. # Vector ic maps the global connectivity to the local connectivity of the S-element sdNode, ic = np.unique(sdConn[isd].flatten(), return_inverse=True) # remove duplicates xy = coord[sdNode] # nodal coordinates # transform coordinate origin to scaling centre xy = xy - sdSC[ isd, :] # add column indexes too to be sure that number of columns are equal (xy and sdSC) # line element connectivity in local nodal numbers of an S-element LConn = np.reshape(ic, sdConn[isd].shape) # compute S-element coefficient matrices E0, E1, E2, M0 = coeffMatricesOfSElement(xy, LConn, mat) # compute solution for S-element K, d, v, M = sbfem(E0, E1, E2, M0) # store S-element data and solution sdSln.append({ 'xy': xy, 'sc': sdSC[isd], 'conn': LConn, 'node': sdNode, 'K': K, 'M': M, 'd': d, 'v': v }) # Assembly # sum of entries of stiffness matrices of all S-elements ncoe = 0 for sln in sdSln: ncoe += sln['K'].size # initializing non-zero entries in global stiffness and mass matrix K = np.zeros(ncoe) M = np.zeros(ncoe) # rows and columns of non-zero entries in global stiffness matrix Ki = np.zeros(ncoe, dtype=np.int32) Kj = np.zeros(ncoe, dtype=np.int32) StartInd = 0 # starting position of an S-element stiffness matrix # loop over subdomains for sln in sdSln: # global DOFs of nodes in an S-element # for Python # Original # for ux => 2*i # 2*i -1 # for uy => 2*i + 1 # 2*i dof = np.concatenate( (np.reshape(2 * sln['node'], (-1, 1)), np.reshape(2 * sln['node'] + 1, (-1, 1))), axis=1).reshape((-1, 1)) # number of DOFs of an S-element Ndof = dof.shape[0] # row and column numbers of stiffness coefficients of an S-element sdI = np.tile(dof, (1, Ndof)) sdJ = sdI.T # store stiffness, row and column indices EndInd = StartInd + Ndof**2 # ending position K[StartInd:EndInd] = sln['K'].flatten(order='F') M[StartInd:EndInd] = sln['M'].flatten(order='F') Ki[StartInd:EndInd] = sdI.flatten(order='F') Kj[StartInd:EndInd] = sdJ.flatten(order='F') StartInd = EndInd # increment the starting position # form global stiffness matrix in sparse storage K = sparse_matrix((K, (Ki, Kj))) # ensure symmetry K = (K + K.T) / 2 # form global mass matrix in sparse storage M = sparse_matrix((M, (Ki, Kj))) # ensure symmetry M = (M + M.T) / 2 return sdSln, K, M
for recommendation in topK: if recommendation in user_hidden[u]: hits += 1 else: misses += 1 recommendation_times.append(time.time() - start) users_scanned += 1 if users_scanned % 100 == 99: break print("hits: {}, missed:{}, hit rate:{}".format(hits, misses, hits / misses)) print("avg recommendation time {}".format( sum(recommendation_times) / len(recommendation_times))) print(recommendation_times) ######################################### data, title_id, id_title = get_ratings() data = sparse_matrix(data) users, user_hidden = hide_data(data) model = train_model(data, k=K) evaluate_model(model, data, users, user_hidden)
def UpdateIntersectionMatrix(self, IncludeUpdate=True): if IncludeUpdate: self.BuildMatrix() RMIntForm = sparse_matrix(self.RelationshipMatrix.astype(np.float)) self.IntersectionMatrix = np.dot(RMIntForm, RMIntForm.T)
def PEG_construct(frame_len, syndrom_len, lam, ro): """ Generate H-matrix. """ data_dt = np.int32 N_1 = np.round(frame_len * (np.sum(lam[1, :] / lam[0, :]) ** (-1))) ds = [] for i in range(lam.shape[1]): ds.extend([int(lam[0, i])] * int(np.round(N_1 * (lam[1, i] / lam[0, i])))) if len(ds) < frame_len: # fill ds with lam[0,-1] to n elements ds.extend([int(lam[0, -1])] * (frame_len - len(ds))) while len(ds) > frame_len: ds.pop() ds.sort() ds = np.array(ds, dtype=data_dt) dc = [] for i in range(ro.shape[1]): dc.extend([int(ro[0, i])] * int(np.round(N_1 * (ro[1, i] / ro[0, i])))) if len(dc) < syndrom_len: dc.extend([int(ro[0, -1])] * (syndrom_len - len(dc))) # fill dc with ro[0,-1] to m elements while len(dc) > syndrom_len: dc.pop() # substract last elements if not (len(ds) == frame_len and len(dc) == syndrom_len): print('some error with dimensions of the matrix', len(ds), len(dc)) dc.sort() dc = np.array(dc, dtype=data_dt) fc = dc.copy() O_c = np.arange(syndrom_len, dtype=data_dt) O_c_free = O_c.copy() H = sparse_matrix((syndrom_len, frame_len), dtype=data_dt) E_s = [] for i in range(frame_len): E_s.append(np.array([], dtype=data_dt)) E_c = [] for i in range(frame_len): E_c.append(np.array([], dtype=data_dt)) for j in range(frame_len): if ds[j] == 2 and j < syndrom_len - 1: # zig-zag pattern i = j E_s[j] = np.union1d(E_s[j], np.array([i], dtype=data_dt)) E_c[i] = np.union1d(E_c[i], np.array([j], dtype=data_dt)) H[i, j] = 1 fc[i] = fc[i] - 1 if fc[i] == 0: O_c_free = np.setdiff1d(O_c_free, np.array([i], dtype=data_dt)) i = j + 1 E_s[j] = np.union1d(E_s[j], np.array([i], dtype=data_dt)) E_c[i] = np.union1d(E_c[i], np.array([j], dtype=data_dt)) H[i, j] = 1 fc[i] = fc[i] - 1 if fc[i] == 0: O_c_free = np.setdiff1d(O_c_free, np.array([i], dtype=data_dt)) else: # First node i = highest_check_degree(O_c, fc) E_s[j] = np.union1d(E_s[j], np.array([i], dtype=data_dt)) E_c[i] = np.union1d(E_c[i], np.array([j], dtype=data_dt)) H[i, j] = 1 fc[i] = fc[i] - 1 if fc[i] == 0: O_c_free = np.setdiff1d(O_c_free, np.array([i], dtype=data_dt)) # All other nodes for k in range(1, ds[j]): O_c_tmp = find_N(O_c, j, E_c, E_s) O_c_tmp = np.setdiff1d(O_c_tmp, E_s[j], assume_unique=True) i = highest_check_degree(O_c_tmp, fc) E_s[j] = np.union1d(E_s[j], np.array([i], dtype=data_dt)) E_c[i] = np.union1d(E_c[i], np.array([j], dtype=data_dt)) H[i, j] = 1 fc[i] = fc[i] - 1 if fc[i] == 0: O_c_free = np.setdiff1d(O_c_free, np.array([i], dtype=data_dt)) return H
num_cols = 2*n num_rows = 2*len(edges) + 2 def x_var(i): return 2*i def y_var(i): return 2*i + 1 rows = [ i for i in range(2*len(edges)) for _ in range(4)] + [num_rows-2,num_rows-1] cols = [ f(e) for e in edges for f in (lambda e: x_var(e[0]), lambda e: x_var(e[1]), lambda e: y_var(e[0]), lambda e: y_var(e[1]), lambda e: x_var(e[0]), lambda e: x_var(e[1]), lambda e: y_var(e[0]), lambda e: y_var(e[1]))] + [0,1] vals = [ val for e in edges for val in (-e[2], e[2], -e[3], e[3], straightness*e[3],-straightness*e[3], -straightness*e[2], straightness*e[2])] + [1,1] A = sparse_matrix((vals,(rows,cols)),shape=(num_rows,num_cols)) b = [ val for e in edges for val in (sqrt(e[2]**2+e[3]**2),0) ] + [0,0] # Solve least squares problem by solving Gauß normal form: `A^T A positions = A^T b` AtA = A.transpose()*A Atb = A.transpose()*b positions = sparse_solve(AtA,Atb) # read the solution and apply scale scale = float(arguments['--scale']) pos = [ (scale*positions[x_var(i)],scale*positions[y_var(i)]) for i in range(n) ] ## Output if( arguments['--ipe']): from miniipe import Document, polyline
def PEG_construct(frame_len, syndrom_len, lam, ro): """ Generate H-matrix. """ data_dt = np.int32 N_1 = np.round(frame_len * (np.sum(lam[1, :] / lam[0, :])**(-1))) ds = [] for i in range(lam.shape[1]): ds.extend([int(lam[0, i])] * int(np.round(N_1 * (lam[1, i] / lam[0, i])))) if len(ds) < frame_len: # fill ds with lam[0,-1] to n elements ds.extend([int(lam[0, -1])] * (frame_len - len(ds))) while len(ds) > frame_len: ds.pop() ds.sort() ds = np.array(ds, dtype=data_dt) dc = [] for i in range(ro.shape[1]): dc.extend([int(ro[0, i])] * int(np.round(N_1 * (ro[1, i] / ro[0, i])))) if len(dc) < syndrom_len: dc.extend( [int(ro[0, -1])] * (syndrom_len - len(dc))) # fill dc with ro[0,-1] to m elements while len(dc) > syndrom_len: dc.pop() # substract last elements if not (len(ds) == frame_len and len(dc) == syndrom_len): print('some error with dimensions of the matrix', len(ds), len(dc)) dc.sort() dc = np.array(dc, dtype=data_dt) fc = dc.copy() O_c = np.arange(syndrom_len, dtype=data_dt) O_c_free = O_c.copy() H = sparse_matrix((syndrom_len, frame_len), dtype=data_dt) E_s = [] for i in range(frame_len): E_s.append(np.array([], dtype=data_dt)) E_c = [] for i in range(frame_len): E_c.append(np.array([], dtype=data_dt)) for j in range(frame_len): if ds[j] == 2 and j < syndrom_len - 1: # zig-zag pattern i = j E_s[j] = np.union1d(E_s[j], np.array([i], dtype=data_dt)) E_c[i] = np.union1d(E_c[i], np.array([j], dtype=data_dt)) H[i, j] = 1 fc[i] = fc[i] - 1 if fc[i] == 0: O_c_free = np.setdiff1d(O_c_free, np.array([i], dtype=data_dt)) i = j + 1 E_s[j] = np.union1d(E_s[j], np.array([i], dtype=data_dt)) E_c[i] = np.union1d(E_c[i], np.array([j], dtype=data_dt)) H[i, j] = 1 fc[i] = fc[i] - 1 if fc[i] == 0: O_c_free = np.setdiff1d(O_c_free, np.array([i], dtype=data_dt)) else: # First node i = highest_check_degree(O_c, fc) E_s[j] = np.union1d(E_s[j], np.array([i], dtype=data_dt)) E_c[i] = np.union1d(E_c[i], np.array([j], dtype=data_dt)) H[i, j] = 1 fc[i] = fc[i] - 1 if fc[i] == 0: O_c_free = np.setdiff1d(O_c_free, np.array([i], dtype=data_dt)) # All other nodes for k in range(1, ds[j]): O_c_tmp = find_N(O_c, j, E_c, E_s) O_c_tmp = np.setdiff1d(O_c_tmp, E_s[j], assume_unique=True) i = highest_check_degree(O_c_tmp, fc) E_s[j] = np.union1d(E_s[j], np.array([i], dtype=data_dt)) E_c[i] = np.union1d(E_c[i], np.array([j], dtype=data_dt)) H[i, j] = 1 fc[i] = fc[i] - 1 if fc[i] == 0: O_c_free = np.setdiff1d(O_c_free, np.array([i], dtype=data_dt)) return H
def inject_knowledge(self, instance_and_corpus): # This implementation borrows conceptually from K-BERT (Liu et al.) implementation, but is more efficient and adapted to English token_emb_corpus = [] segment_mask_corpus = [] soft_ids_corpus = [] visibility_matrix_corpus = [] vm_values_corpus = [] vm_indices_corpus = [] corpus = instance_and_corpus for injection_iteration, text in enumerate(corpus): sentence_tree = [] soft_ids = [] hard_ids = [] soft_tree = [] hard_tree = [] history = ["[START]"] * self.LOOK_BACK token_emb = [] segment_mask = [] hard_position = 0 turn_off_injection = False for soft_position, token in enumerate(text): if token in ["a", "b", "c", "d"]: turn_off_injection = True if not turn_off_injection: relations = [entity for combination in range(self.LOOK_BACK + 1) for entity in self.kg.get(" ".join(history[combination:self.LOOK_BACK] + [token]), [])][:self.MAX_RELATIONS] elif turn_off_injection:# or len(token) < 2: relations = [] sentence_tree.append((token, relations)) relations_hard_pos = [] relations_soft_pos = [] previous_hard = hard_position token_emb.append(token) soft_ids.append(soft_position) segment_mask.append(0) soft_position += 1 hard_ids.append(hard_position) for j, relation in enumerate(relations): if relation == ["is", "a", "word"]: continue self.unique_insert.add(" ".join(relation)) self.entity_insertions += 1 relations_soft_pos.append([soft_position + offset for offset in range(1, len(relation)+1)]) relations_hard_pos.append([hard_position + offset for offset in range(1, len(relation)+1)]) token_emb += relation segment_mask += [1] * len(relation) soft_ids += range(soft_position, len(relation) + soft_position) hard_position += len(relation) hard_tree.append(([previous_hard], relations_hard_pos)) hard_position += 1 soft_tree.append(([soft_position], relations_soft_pos)) history.append(token + " ") if len(history) > self.LOOK_BACK: history.pop(0) # Calculate visible matrix sentence_len = len(soft_ids) visibility_matrix = sparse_matrix((sentence_len, sentence_len)) #np.zeros((sentence_len, sentence_len))# for token_idx, relation_ids in hard_tree: for idx in token_idx: visible_abs_idx = hard_ids + [idx for ent in relation_ids for idx in ent] visibility_matrix[idx, visible_abs_idx] = 1 for ent in relation_ids: for idx in ent: visible_abs_idx = ent + token_idx visibility_matrix[idx, visible_abs_idx] = 1 if sentence_len < self.MAX_LEN: pad_num = self.MAX_LEN - sentence_len segment_mask += [0] * pad_num soft_ids += [self.MAX_LEN - 1] * pad_num #visibility_matrix = np.pad(visibility_matrix, ((0, pad_num), (0, pad_num)), 'constant') # pad 0 else: segment_mask = segment_mask[:self.MAX_LEN] soft_ids = soft_ids[:self.MAX_LEN] visibility_matrix = visibility_matrix[:self.MAX_LEN, :self.MAX_LEN] #attention_mask_corpus.append([1 if token is not "[PAD]" else 0 for token in token_emb]) #snapshot = tracemalloc.take_snapshot() #display_top(snapshot) visibility_matrix = visibility_matrix.tocoo() values = visibility_matrix.data indices = np.vstack((visibility_matrix.row, visibility_matrix.col)) indices = torch.LongTensor(indices) values = torch.FloatTensor(values) #visibility_matrix = torch.sparse.LongTensor(i, v, (self.MAX_LEN, self.MAX_LEN)) #yield token_emb, segment_mask, soft_ids, visibility_matrix token_emb_corpus.append(token_emb) segment_mask_corpus.append(segment_mask) soft_ids_corpus.append(soft_ids) vm_indices_corpus.append(indices) vm_values_corpus.append(values) #visibility_matrix_corpus.append(visibility_matrix) return token_emb_corpus, segment_mask_corpus, soft_ids_corpus, vm_values_corpus, vm_indices_corpus
def create_user_item_matrix( self, ratings, userKey="user", itemKey="movie", ratingKey="rating" ): userMap = { userId: index for index, userId in enumerate( map( lambda q: q[ userKey ], ratings.objects.all() .order_by( userKey ).values( userKey ).distinct() ) ) } itemMap = { itemId: index for index, itemId in enumerate( np.unique( list( map( lambda q: q[ itemKey ], ratings.objects.all() .order_by( userKey ).values( itemKey ) ) ) ) # DISTINCT ON sql only works with postgre # map( lambda q: q[ itemKey ], # ratings.objects.all() # .order_by( userKey ).values( itemKey ).distinct( itemKey ) ) ) } userInvMap = { index: userId for index, userId in enumerate( map( lambda q: q[ userKey ], ratings.objects.all() .order_by( userKey ).values( userKey ).distinct() ) ) } itemInvMap = { index: itemId for index, itemId in enumerate( np.unique( list( map( lambda q: q[ itemKey ], ratings.objects.all() .order_by( userKey ).values( itemKey ) ) ) ) # DISTINCT ON sql only works with postgre # map( lambda q: q[ itemKey ], # ratings.objects.all() # .order_by( userKey ).values( itemKey ).distinct() ) ) } userInd = [ userMap[ userId ] for userId in map( lambda q: q[ userKey ], ratings.objects.all().order_by( userKey ).values( userKey ) ) ] itemInd = [ itemMap[ itemId ] for itemId in map( lambda q: q[ itemKey ], ratings.objects.all().order_by( userKey ).values( itemKey ) ) ] n = len( userMap ) d = len( itemMap ) X = sparse_matrix( ( list( map( lambda q: q[ ratingKey ], ratings.objects.all().order_by( userKey ).values( ratingKey ) ) ), ( userInd, itemInd ) ), shape=( n, d ) ) return X, userMap, itemMap, userInvMap, itemInvMap, userInd, itemInd