def create_feat_eng_data(self, train_file, dev_file, test_file, train_out_file, dev_out_file, test_out_file): ''' Feature engineered data creation using TF-IDF train_file: file containing (training) input features dev_file: file containing (dev) input features test_file: file containing (test) input features train_out_file: output file for engineered (training) features dev_out_file: output file for engineered (dev) features test_out_file: output file for engineered (test) features ''' #create training features #x_mat = self.load_sparse_csr(feature_file).transpose().todense() #feature matrix #x_mat = self.load_sparse_csr(train_file).transpose().toarray() #feature matrix x_mat = self.load_sparse_csr(train_file).tocsc() #feature matrix x_mat = x_mat[:,1:] #removing the first (all 1) feature #dict_terms = ast.literal_eval(open(dict_file, 'r').read()) N = x_mat.shape[0] idf_arr = [math.log((N*1.0)/max(1., x_mat[:,i].nnz)) for i in range(x_mat.shape[1])] idf = np.asarray(idf_arr) x_mat = x_mat.tocsr() for i in range(0, N, 50000): j = min(i + 50000, N) x_mat_temp = csr(x_mat[i:j,:].multiply(idf)) if i==0: x_mat_new = x_mat_temp else: x_mat_new = vstack([x_mat_new, x_mat_temp]) print i all1_row = csr(np.asarray([1] * x_mat.shape[0])).transpose() x_mat_new = hstack([all1_row, x_mat_new]) self.save_sparse_csr(train_out_file, csr(x_mat_new)) #create dev features if dev_file != None: x_mat = self.load_sparse_csr(dev_file).transpose().toarray() #feature matrix x_mat = x_mat[1:,:] #removing the first (all 1) feature x_mat = np.multiply(x_mat.transpose(), idf) all1_row = np.asarray([1] * x_mat.shape[0]).transpose() self.save_sparse_csr(dev_out_file, csr(np.column_stack((all1_row, x_mat)))) #create test features if test_file != None: x_mat = self.load_sparse_csr(test_file).transpose().toarray() #feature matrix x_mat = x_mat[1:,:] #removing the first (all 1) feature x_mat = np.multiply(x_mat.transpose(), idf) all1_row = np.asarray([1] * x_mat.shape[0]).transpose() self.save_sparse_csr(test_out_file, csr(np.column_stack((all1_row, x_mat))))
def test_eval_sparse_dense(tmpdir, device_id): from cntk import Axis from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs from cntk.ops import times input_vocab_dim = label_vocab_dim = 69 ctf_data = '''\ 0 |S0 3:1 |# <s> |S1 3:1 |# <s> 0 |S0 4:1 |# A |S1 32:1 |# ~AH 0 |S0 5:1 |# B |S1 36:1 |# ~B 0 |S0 4:1 |# A |S1 31:1 |# ~AE 0 |S0 7:1 |# D |S1 38:1 |# ~D 0 |S0 12:1 |# I |S1 47:1 |# ~IY 0 |S0 1:1 |# </s> |S1 1:1 |# </s> 2 |S0 60:1 |# <s> |S1 3:1 |# <s> 2 |S0 61:1 |# A |S1 32:1 |# ~AH ''' ctf_file = str(tmpdir / '2seqtest.txt') with open(ctf_file, 'w') as f: f.write(ctf_data) mbs = MinibatchSource(CTFDeserializer( ctf_file, StreamDefs(features=StreamDef(field='S0', shape=input_vocab_dim, is_sparse=True), labels=StreamDef(field='S1', shape=label_vocab_dim, is_sparse=True))), randomize=False, max_samples=2) raw_input = sequence.input_variable(shape=input_vocab_dim, sequence_axis=Axis('inputAxis'), name='raw_input', is_sparse=True) mb_valid = mbs.next_minibatch(minibatch_size_in_samples=100, input_map={raw_input: mbs.streams.features}, device=cntk_device(device_id)) z = times(raw_input, np.eye(input_vocab_dim)) e_reader = z.eval(mb_valid, device=cntk_device(device_id)) # CSR with the raw_input encoding in ctf_data one_hot_data = [[3, 4, 5, 4, 7, 12, 1], [60, 61]] data = [ csr(np.eye(input_vocab_dim, dtype=np.float32)[d]) for d in one_hot_data ] e_csr = z.eval({raw_input: data}, device=cntk_device(device_id)) assert np.all([np.allclose(a, b) for a, b in zip(e_reader, e_csr)]) # One-hot with the raw_input encoding in ctf_data data = Value.one_hot(one_hot_data, num_classes=input_vocab_dim, device=cntk_device(device_id)) e_hot = z.eval({raw_input: data}, device=cntk_device(device_id)) assert np.all([np.allclose(a, b) for a, b in zip(e_reader, e_hot)])
def train(self, start_section, end_section): print 'Fitting the hyperplane...' #Applying 'csr' to the vectors makes a sparse matrix. X_matrix = csr(self.getfeaturevectors(start_section, end_section)) y_vector = np.array(self.getgsdata(start_section, end_section)) self.hyperplane.fit(X_matrix, y_vector)
def _get_kl(n, dx): rows2 = [i for i in range(n)] lower1rows = [i + 1 for i in range(n - 1)] rows = rows2 + rows2[:-1] + lower1rows cols = rows2 + lower1rows + rows2[:-1] vals = np.array([2.] * n + [-1.] * (n - 1) * 2) / dx**2 k = csr((vals, (rows, cols))) vals = (np.array([2.] * n + [-1.] * (n - 1) * 2) * -1 / dx**2).tolist() iden = [1.] * n rowsid = (np.array(rows2) + n).tolist() i = csr((iden, (rows2, rows2))) l = csr( (vals + iden, ((np.array(rows) + n).tolist() + rows2, cols + rowsid))) return i, k, l
def root_function_first_derivative_numerical(self,beta): num = beta.shape[0] idx = list(range(num)) idx_diag_ends = np.array([0,num-1]) Amatrix = csr((num,num)) Amatrix += csr((np.ones(num-1),(idx[:-1],idx[1:])),shape=(num,num)) Amatrix -= csr((np.ones(num-1),(idx[1:],idx[:-1])),shape=(num,num)) Amatrix += csr((np.array([-1,1]),(idx_diag_ends,idx_diag_ends)),shape=(num,num)) y_prime = Amatrix*root_function(beta,self.RR) x_prime = Amatrix*beta return y_prime/x_prime
def test_eval_sparse_dense(tmpdir, device_id): from cntk import Axis from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs from cntk.ops import input_variable, times input_vocab_dim = label_vocab_dim = 69 ctf_data = '''\ 0 |S0 3:1 |# <s> |S1 3:1 |# <s> 0 |S0 4:1 |# A |S1 32:1 |# ~AH 0 |S0 5:1 |# B |S1 36:1 |# ~B 0 |S0 4:1 |# A |S1 31:1 |# ~AE 0 |S0 7:1 |# D |S1 38:1 |# ~D 0 |S0 12:1 |# I |S1 47:1 |# ~IY 0 |S0 1:1 |# </s> |S1 1:1 |# </s> 2 |S0 60:1 |# <s> |S1 3:1 |# <s> 2 |S0 61:1 |# A |S1 32:1 |# ~AH ''' ctf_file = str(tmpdir/'2seqtest.txt') with open(ctf_file, 'w') as f: f.write(ctf_data) mbs = MinibatchSource(CTFDeserializer(ctf_file, StreamDefs( features = StreamDef(field='S0', shape=input_vocab_dim, is_sparse=True), labels = StreamDef(field='S1', shape=label_vocab_dim, is_sparse=True) )), randomize=False, epoch_size = 2) batch_axis = Axis.default_batch_axis() input_seq_axis = Axis('inputAxis') label_seq_axis = Axis('labelAxis') input_dynamic_axes = [batch_axis, input_seq_axis] raw_input = input_variable( shape=input_vocab_dim, dynamic_axes=input_dynamic_axes, name='raw_input', is_sparse=True) mb_valid = mbs.next_minibatch(minibatch_size_in_samples=100, input_map={raw_input : mbs.streams.features}, device=cntk_device(device_id)) z = times(raw_input, np.eye(input_vocab_dim)) e_reader = z.eval(mb_valid, device=cntk_device(device_id)) # CSR with the raw_input encoding in ctf_data one_hot_data = [ [3, 4, 5, 4, 7, 12, 1], [60, 61] ] data = [csr(np.eye(input_vocab_dim, dtype=np.float32)[d]) for d in one_hot_data] e_csr = z.eval({raw_input: data}, device=cntk_device(device_id)) assert np.all([np.allclose(a, b) for a,b in zip(e_reader, e_csr)]) # One-hot with the raw_input encoding in ctf_data data = Value.one_hot(one_hot_data, num_classes=input_vocab_dim, device=cntk_device(device_id)) e_hot = z.eval({raw_input: data}, device=cntk_device(device_id)) assert np.all([np.allclose(a, b) for a,b in zip(e_reader, e_hot)])
def devtest(self, start_section, end_section, verbose=False): print 'Predicting...' #Applying 'csr' to the vectors makes a sparse matrix. predictions = self.hyperplane.predict( csr(self.getfeaturevectors(start_section, end_section))) self.compare(self.getgsdata(start_section, end_section), predictions, start_section, verbose=verbose)
def predict_class(self, from_file, w_file, test_file, pred_file, x_crossval, w_crossval): ''' Predicts the classes for the dev or test set Parameters: from_file: boolean value to check if data is to be read from file w_file: file containing the w learned from the training data for each class test_file: file containing the dev or test data (in csr) pred_file: prediction output file x_crossval: the testing can be done on a small cross validation set (test_file should be None) w_crossval: weights for cross validation ''' if from_file: x = self.load_sparse_csr(test_file) #157010 x 1001 x = csr(x[:,1:]) #removing the first (all 1) feature w = self.load_sparse_csr(w_file) #5 x 1001, and without the first feature, 5 x 1000 else: x = x_crossval w = w_crossval w_dot_x = x.dot(w.transpose()) #157010 x 5 hard_pred = w_dot_x.toarray().argmax(axis=1) + 1 w_dot_x_arr = w_dot_x.toarray() w_dot_x_arr[w_dot_x_arr>20] = 20 w_dot_x_arr[w_dot_x_arr<-20] = -20 w_dot_x = csr(w_dot_x_arr) w_dot_x_exp = csr(np.exp(w_dot_x.toarray())) #157010 x 5 w_dot_x_sum = w_dot_x_exp.sum(axis=1) #row sum (157010 x 1) x_div_mat = csr(w_dot_x_exp.toarray()/w_dot_x_sum) #157010 x 5 num_vectors = 5 rating_arr = np.array([1,2,3,4,5]).transpose() soft_pred = x_div_mat.toarray().dot(rating_arr) if from_file: w = open(pred_file, 'w') for index in range(len(hard_pred)): w.write(str(hard_pred[index])) w.write(' ') w.write(str(soft_pred[index])) w.write('\n') w.close return hard_pred, soft_pred
def term_transitions(replace, DIST='damerau'): index2term = list( set([item for item in replace.keys()]) | set([item for item in replace.values()])) term2index = {index2term[i]: i for i in range(len(index2term))} rows, cols = zip(*[[term2index[item[0]], term2index[item[1]]] for item in replace.items()]) R = csr((np.ones(2 * len(rows)), (rows + cols, cols + rows)), dtype=bool, shape=(len(index2term), len(index2term))) labels = connected_components(R)[1] sorting = np.argsort(labels) labels_s = labels[sorting] _, starts = np.unique(labels_s, return_index=True) sizes = np.diff(starts) groups = [ group for group in np.split(sorting, starts[1:]) if group.size > 1 ] transition = dict() for group in groups: sum_group = float(sum([d[(index2term[index], )] for index in group])) max_index = None max_freq = 0 for index in group: predict_term = index2term[index] predict_freq = d[(predict_term, )] if predict_freq > max_freq: max_freq = predict_freq max_index = index for index1 in group: given_term = index2term[index1] len_1 = len(given_term) transition[given_term] = dict() for index2 in [index1, max_index]: predict_term = index2term[index2] len_2 = len(predict_term) sim_prefix = prefix_normed(given_term, predict_term, len_1, len_2) sim_similar = similarity_normed(given_term, predict_term, len_1, len_2, DIST) transition[given_term][predict_term] = (d[ (predict_term, )] / sum_group) * sim_similar #(sim_similar+sim_prefix)/2; sum_sim = sum([ transition[given_term][predict_term] for predict_term in transition[given_term] ]) for predict_term in transition[given_term]: transition[given_term][predict_term] /= sum_sim for index2 in [index1, max_index]: print(given_term, '-->', index2term[index2], transition[given_term][index2term[index2]]) return transition
def test_eval_sparse_no_seq(batch_index_data, device_id): dim = 10 multiplier = 2 for var_is_sparse in [True, False]: in1 = sequence.input(shape=(dim, ), is_sparse=var_is_sparse) z = times(in1, multiplier * np.eye(dim)) batch = np.eye(dim)[batch_index_data] expected = batch * multiplier sparse_val = csr(batch.astype('f')) result = z.eval({in1: [sparse_val]}, device=cntk_device(device_id)) assert np.allclose(result, [expected])
def test_eval_sparse_no_seq(batch_index_data, device_id): dim = 10 multiplier = 2 in1 = input_variable(shape=(dim, ), is_sparse=True) z = times(in1, np.eye(dim).astype(np.float32)) z *= multiplier batch = (np.eye(dim)[batch_index_data]).astype(np.float32) expected = batch * multiplier sparse_val = csr(batch) result = z.eval({in1: sparse_val}, device=cntk_device(device_id)) assert np.allclose(result, [expected])
def test_eval_sparse_no_seq(batch_index_data, device_id): dim = 10 multiplier = 2 for var_is_sparse in [True, False]: in1 = input_variable(shape=(dim,), is_sparse=var_is_sparse) z = times(in1, multiplier*np.eye(dim)) batch = np.eye(dim)[batch_index_data] expected = batch * multiplier sparse_val = csr(batch.astype('f')) result = z.eval({in1: [sparse_val]}, device=cntk_device(device_id)) assert np.allclose(result, [expected])
def adjust_n_max(new_n_max): global n_max n_max = new_n_max global n_phonon n_phonon = np.array([i for i in range(n_max + 1)], dtype=np.float64) global sqrt_n sqrt_n = np.array([sqrt(i) for i in range(1, n_max + 1)]) global annihilation, creation annihilation = csr(np.diag(sqrt_n, k=1)) creation = csr(np.diag(sqrt_n, k=-1)) annihilation.prune() creation.prune() global x_hat, p_hat ### we assume \hbar = m * \omega = 1 x_hat = sqrt(1 / 2) * (creation + annihilation) p_hat = 1.j * sqrt(1 / 2) * (creation - annihilation) x_hat.prune() p_hat.prune() global x_hat_2, p_hat_2, xp_px_hat x_hat_2 = x_hat.dot(x_hat) p_hat_2 = np.real(p_hat.dot(p_hat)) xp_px_hat = x_hat.dot(p_hat) + p_hat.dot(x_hat) x_hat_2.prune() p_hat_2.prune() xp_px_hat.prune() global harmonic_Hamil harmonic_Hamil = omega * np.diag(1 / 2 + n_phonon) harmonic_Hamil = csr(harmonic_Hamil) harmonic_Hamil.prune() if __name__ == '__main__': print('n_max adjusted to {}'.format(new_n_max)) global eigen_states eigen_states = [] for i in range(new_n_max + 1): eigen_states.append( common_factor_of_1Dharmonics(i) * np.polynomial.hermite.hermval( x.astype(np.float128, order='C'), np.array([0. for j in range(i)] + [1.], dtype=np.float128))) eigen_states = np.array(eigen_states).transpose().astype(np.float64, order='C')
def construct_graph(indices, costs, N): """ Creates a compressed sparse row matrix of the travel costs associated with each node connection. The SciPy sparse row matrix function takes the following input arguments: :param indices: indices of the connected nodes. Each index is split into start nodes and end nodes for each connection. This is equivalent to the transposed indices matrix. :param costs: the costs associated with each node connection :param N: is the size of the sparse graph :return: a SciPy compressed sparse row matrix describing the costs associated with each node connection. """ s_graph = csr((costs, (indices[:, 0], indices[:, 1])), shape=(N, N)) return s_graph
def test_validate(): csr = sparse.csr_matrix sym = DiscreteSymmetry( projectors=[csr(np.array([[1], [0]])), csr(np.array([[0], [1]]))]) assert sym.validate(csr(np.array([[0], [1]]))) == 'Conservation law' assert sym.validate(np.array([[1], [0]])) is None assert sym.validate(np.eye(2)) is None assert sym.validate(1 - np.eye(2)) == 'Conservation law' sym = DiscreteSymmetry(particle_hole=sparse.identity(2)) assert sym.validate(1j * sparse.identity(2)) is None assert sym.validate(sparse.identity(2)) == 'Particle-hole' sym = DiscreteSymmetry(time_reversal=sparse.identity(2)) assert sym.validate(sparse.identity(2)) is None assert sym.validate(1j * sparse.identity(2)) == 'Time reversal' sym = DiscreteSymmetry(chiral=csr(np.diag((1, -1)))) assert sym.validate(np.eye(2)) == 'Chiral' assert sym.validate(1 - np.eye(2)) is None
def intra_u(img, tmap, X): ##yet to add symmetricity N = X.shape[0] kuu = 5 X[:, 3:5] = X[:, 3:5] / 20 alpha = tmap.ravel() ind = np.arange(X.shape[0]) unk = X[(alpha > 0.1) & (alpha < 0.9)] unkind = ind[(alpha > 0.1) & (alpha < 0.9)] #nearest unknown pixels to unknown kdt = KDTree(unk, leaf_size=30, metric='euclidean') nu = kdt.query(unk, k=kuu, return_distance=False) unk_nbr_true_ind = unkind[nu] unk_nu_ind = np.asarray( [int(i / kuu) for i in range(nu.shape[0] * nu.shape[1])]) unk_nu_true_ind = unkind[unk_nu_ind] nbr = unk[nu] nbr = np.swapaxes(nbr, 1, 2) unk = unk.reshape((unk.shape[0], unk.shape[1], 1)) x = nbr - unk x = np.abs(x) print(x.shape) y = 1 - np.sum(x, axis=1) y[y < 0] = 0 # print(y.shape) row = unk_nu_true_ind col = unk_nbr_true_ind.ravel() data = y.ravel() z = csr((data, (col, row)), shape=(N, N)) w = csr((data, (row, col)), shape=(N, N)) # z = csr((data,(col,row)),shape=(h*w,h*w)) w = w + z return w
def input_to_features(self, feature_list, unhashed_features, hashed_features, unhashed_csr_file, hashed_csr_file, num_features): ''' Converts the parsed input file to features and returns a scipy csr matrix Parameters: feature_list: list of features in order of input unhashed_features: top n features hashed_features: top m features (hashed) unhashed_csr_file: file where csr matrix is to be stored (unhashed) hashed_csr_file: file where csr matrix is to be stored (hashed) num_features: number of features ''' row_arr = [] col_arr_unhashed = [] col_arr_hashed = [] data_arr = [] row = 0 for text_arr in feature_list: #add 1 corresponding to x_0 and w_0 row_arr.append(row) col_arr_unhashed.append(0) col_arr_hashed.append(0) data_arr.append(1) for term in text_arr: term = term.encode('ascii', 'ignore') try: col_unhashed = unhashed_features[term] col_hashed = hashed_features[term] row_arr.append(row) col_arr_unhashed.append(col_unhashed+1) col_arr_hashed.append(col_hashed+1) data_arr.append(1) except KeyError: pass row += 1 if row%5000==0: print row mat = csr((data_arr, (row_arr, col_arr_unhashed)), shape=(row, num_features+1)) self.save_sparse_csr(unhashed_csr_file, mat) mat = csr((data_arr, (row_arr, col_arr_hashed)), shape=(row, num_features+1)) self.save_sparse_csr(hashed_csr_file, mat)
def __init__(self, factorList=None, copy=True, isLog=False): """Take in a list of factors and convert & store them in the internal format Can also accept a matrix of Ising parameters """ if factorList is None: self.h = np.zeros(0) self.L = csr((0, 0)) return if not isinstance(factorList[0], Factor): # not a factor list => matrix? L = coo(factorList) LL = csr(factorList) n = L.shape[0] self.h = np.array([LL[i, i] for i in range(n)]) # extract diagonal self.dims = np.array([2 for i in range(n)], dtype=int) # all variables binary keep = (L.row != L.col) data, row, col = L.data[keep], L.row[keep], L.col[keep] #for j in np.where(L.row > L.col): row[j],col[j] = col[j],row[j] self.L = csr((data, (row, col)), shape=(n, n)) # keep in csr format self.L = .5 * ( self.L + self.L.T ) # force symmetric if not (TODO: detect zeros & overwrite?) else: n = np.max( [np.max(f.vars.labels) for f in factorList if len(f.vars)]) + 1 assert np.max([np.max(f.vars.dims()) for f in factorList ]) <= 2, "Variables must be binary" assert np.max([f.nvar for f in factorList ]) <= 2, "Factors must be pairwise" self.dims = np.zeros((n, ), dtype=int) for f in factorList: for v in f.vars: self.dims[v] = v.states self.h = np.zeros(n) self.L = csr(([], ([], [])), shape=(n, n)) self.addFactors(factorList, isLog=isLog)
def get_banded(a: np.ndarray, p, q): """ Converts the matrix a into a banded scipy.sparse.csr_matrix object with lower and upper bandwidths p and q, respectively. Returns a CSR matrix of the same shape and banded entries as a. """ for i in range(a.shape[0]): for j in range(a.shape[1]): if i > j + p or j > i + q: a[i, j] = 0. return csr(a)
def LBP(ising, maxIter=100, verbose=False): """Run loopy belief propagation (specialized for Ising models) lnZ, bel = LBP(ising, maxIter, verbose) lnZ : float, estimate of the log partition function bel : vector, bel[i] = estimated marginal probability that Xi = +1 """ # TODO: pass requested beliefs (like JT?), or "single", "factors", etc. assert isinstance(ising,Ising), "Model must be an Ising model for this version to work" R = ising.L.tocoo(); row = R.row; col = R.col; mu = csr(([],([],[])),shape=ising.L.shape) L_tanh = ising.L.tanh(); for it in range(maxIter): mu_sum = arr(mu.sum(0)).reshape(-1); #R = csr( (ising.h[row]+mu_sum[row], (row,col)), shape=ising.L.shape) - mu.T R = csr( (ising.h[row]+mu_sum[row]-arr(mu[col,row]).reshape(-1), (row,col)), shape=ising.L.shape); mu = (L_tanh.multiply(R.tanh())).arctanh() if verbose: print("Iter "+str(it)+": "+str(__Bethe(ising,R,mu))); R = csr( (ising.h[row]+mu_sum[row]-arr(mu[col,row]).reshape(-1), (row,col)), shape=ising.L.shape); bel = 1./(1+np.exp(-2.*(arr(mu.sum(0)).reshape(-1)+ising.h))) lnZ = __Bethe(ising,R,mu,bel) return lnZ, bel
def writeSparseMatrix(nDays): for i in range(nDays): fil="day"+"%d"%i+"PoissonParametersNonHom.txt" A=np.loadtxt(os.path.join("NonHomegeneousPP",fil)) fil2="daySparse"+"%d"%i+"PoissonParametersNonHom.txt" f=open(os.path.join("NonHomogeneousPP2",fil2),'w') A=csr(A) temp=A.nonzero() temp2=np.array([A.data,temp[0],temp[1]]) np.savetxt(f,temp2) f.close() fil="day"+"%d"%i+"ExponentialTimesNonHom.txt" A=np.loadtxt(os.path.join("NonHomegeneousPP",fil)) fil2="daySparse"+"%d"%i+"ExponentialTimesNonHom.txt" f=open(os.path.join("NonHomogeneousPP2",fil2),'w') A=csr(A) temp=A.nonzero() temp2=np.array([A.data,temp[0],temp[1]]) np.savetxt(f,temp2) f.close()
def writeSparseMatrix(nDays): for i in range(nDays): fil = "day" + "%d" % i + "PoissonParametersNonHom.txt" A = np.loadtxt(os.path.join("NonHomogeneousPP2", fil)) fil2 = "daySparse" + "%d" % i + "PoissonParametersNonHom.txt" f = open(os.path.join("SparseNonHomogeneousPP2", fil2), 'w') A = csr(A) temp = A.nonzero() temp2 = np.array([A.data, temp[0], temp[1]]) np.savetxt(f, temp2) f.close() fil = "day" + "%d" % i + "ExponentialTimesNonHom.txt" A = np.loadtxt(os.path.join("NonHomogeneousPP2", fil)) fil2 = "daySparse" + "%d" % i + "ExponentialTimesNonHom.txt" f = open(os.path.join("SparseNonHomogeneousPP2", fil2), 'w') A = csr(A) temp = A.nonzero() temp2 = np.array([A.data, temp[0], temp[1]]) np.savetxt(f, temp2) f.close()
def row_agg(self, mat, mapping, rowK): ''' Aggregates the row (document) vectors Params: mat: original matrix mapping: the row mapping (to cluster centroids) rowK: number of document clusters ''' agg_mat = np.zeros(shape=(rowK, mat.shape[1])) i = 0 for key in mapping: agg_mat[key].__iadd__(mat[i,:]) i += 1 return csr(agg_mat)
def test_validate_commutator(): symm_class = ['AI', 'AII', 'D', 'C', 'AIII', 'BDI'] sym_dict = { 'AI': ['Time reversal'], 'AII': ['Time reversal'], 'D': ['Particle-hole'], 'C': ['Particle-hole'], 'AIII': ['Chiral'], 'BDI': ['Time reversal', 'Particle-hole', 'Chiral'] } n = 10 rng = 10 for sym in symm_class: # Random matrix in symmetry class h = kwant.rmt.gaussian(n, sym, rng=rng) if kwant.rmt.p(sym): p_mat = np.array(kwant.rmt.h_p_matrix[sym]) p_mat = csr(np.kron(np.identity(n // len(p_mat)), p_mat)) else: p_mat = None if kwant.rmt.t(sym): t_mat = np.array(kwant.rmt.h_t_matrix[sym]) t_mat = csr(np.kron(np.identity(n // len(t_mat)), t_mat)) else: t_mat = None if kwant.rmt.c(sym): c_mat = csr(np.kron(np.identity(n // 2), np.diag([1, -1]))) else: c_mat = None disc_symm = DiscreteSymmetry(particle_hole=p_mat, time_reversal=t_mat, chiral=c_mat) assert disc_symm.validate(h) == [] a = random_onsite_hop(n, rng=rng)[1] for symmetry in disc_symm.validate(a): assert symmetry in sym_dict[sym]
def test_eval_sparse_seq_0(batch_index_data, device_id): if cntk_device(device_id) != cpu(): # FIXME pytest.skip("sparse is not yet supported on GPU") dim = 10 multiplier = 2 in1 = input_variable(shape=(dim, ), is_sparse=True) z = times(in1, np.eye(dim).astype(np.float32)) z *= multiplier batch = [(np.eye(dim)[seq_index_data]).astype(np.float32) for seq_index_data in batch_index_data] expected = batch * multiplier sparse_val = [csr(seq) for seq in batch] result = z.eval({in1: sparse_val}, device=cntk_device(device_id)) assert np.all(np.allclose(a,b) \ for a,b in zip(result, expected))
def set_diagonal( matrix, new ): #WARNING: new is expected to be sparse csr matrix (as opposed to what is expected in set_new) matrix.eliminate_zeros() new.eliminate_zeros() rows, cols = matrix.nonzero() data = matrix.data old = rows != cols rows_old, cols_old = rows[old], cols[old] data_old = data[old] rows_cols_new = new.nonzero()[0] data_new = new.data cols_, rows_ = np.concatenate([cols_old, rows_cols_new], 0), np.concatenate([rows_old, rows_cols_new], 0) data_ = np.concatenate([data_old, data_new], 0) return csr((data_, (rows_, cols_)), shape=matrix.shape)
def normalize_rows(self, mat, mapping): ''' Normalizes the row clusters Parameters: mat: matrix mapping: mapping of row to cluster ''' cluster_sum = [0] * mat.shape[0] for key in mapping: cluster_sum[key] += 1. for row_no in range(mat.shape[0]): if cluster_sum[row_no] == 0: cluster_sum[row_no] = 1. cluster_sum = np.array(cluster_sum) return csr((mat.T.toarray().__mul__(1/cluster_sum)).T)
def kron_s(A, B, sec): fp = A.fparity ss = A.sym_sum sym = A.sym dim = sym * A.dim * B.dim ret = csr((dim, dim), dtype=np.complex) for row1 in range(sym): for col1 in range(sym): row2 = ss(sec, -row1) col2 = ss(sec, -col1) if not A._empty_(row1, col1) and not B._empty_(row2, col2): sign = 1 - 2*( fp[col1] * fp[ss(col2, -row2)] ) temp = coo( sparse.kron(A.val[row1][col1], B.val[row2][col2]) * sign ) block = A.dim * B.dim add = coo((temp.data, (temp.row + row1*block, temp.col + col1*block)),(dim, dim), dtype=np.complex) ret += add return ret
def __init__(self, sym=2, dim=1, datatype=np.float, fparity=None, sym_sum=None): def sym_sum_n(a, b): return (a+b+sym)%sym self.sym = sym # number of total symmetry sectors self.dim = dim # dimension of each symmetry sector self.datatype = datatype if fparity is None: self.fparity = np.zeros(sym, dtype=np.int) # fermion parity else: self.fparity = fparity if sym_sum is None: self.sym_sum = sym_sum_n # the rule to sum the symmetries else: self.sym_sum = sym_sum self.val = [[csr((dim,dim), dtype=datatype) for x in range(sym)] for y in range(sym)] self.basis = None self.L = 1
def normalize_cols(self, mat, mapping): ''' Normalizes the column clusters Parameters: mat: matrix mapping: mapping of column to cluster ''' cluster_sum = [0] * mat.shape[1] for key in mapping: cluster_sum[key] += 1. for col_no in range(mat.shape[1]): if cluster_sum[col_no] == 0: cluster_sum[col_no] = 1. cluster_sum = np.array(cluster_sum) return csr(mat.toarray().__mul__(1/cluster_sum))
def fp_fv_mod(x,y,time,m_tmp,a1_tmp,a2_tmp,D11,D22,D12,dt,dx): I = x.size J = y.size eye = sparse.identity((I*J),format='lil') #get function values [f1_array, f2_array] = iF.f_global(time,x,y,a1_tmp,a2_tmp) D11 = iF.Sigma_D11_test(time,x,y,a1_tmp,a2_tmp,m_tmp) D12 = iF.Sigma_D12_test(time,x,y,a1_tmp,a2_tmp,m_tmp) D22 = iF.Sigma_D22_test(time,x,y,a1_tmp,a2_tmp,m_tmp) #make matrices LHS = mg.add_diffusion_flux_Ometh(eye,D11,D22,D12,I,J,dx,dt) LHS = sparse.csr(LHS) RHS = mg.fp_fv_convection(time,x,a_tmp,m_tmp,dt,dx) #print LHS #print RHS #print ss #LHS = sparse.csr(sparse.eye(I)-mg.fp_fv_diffusion(time,x,a_tmp,m_tmp,dt,dx)) #RHS = sparse.csr(sparse.eye(I)+mg.fp_fv_convection(time,x,a_tmp,m_tmp,dt,dx)) return sparse.linalg.spsolve(LHS,RHS*m_tmp)
def addFactors(self, flist, copy=True, isLog=False): """Add a list of (binary, pairwise) factors to the model; factors are converted to Ising parameters""" row = np.zeros(2*len(flist),dtype=int)-1; col=row.copy(); data=np.zeros(2*len(flist)); for k,f in enumerate(flist): if not isLog: if np.any(f.t<=0): f = f+1e-10; # TODO: log nonzero tol f = f.log() if f.nvar == 1: Xi = f.vars[0] self.h[Xi] += .5*(f[1]-f[0]) self.c += .5*(f[1]+f[0]) else: Xi,Xj = f.vars[0],f.vars[1] row[2*k],col[2*k],data[2*k] = int(Xi),int(Xj), .25*(f[1,1]+f[0,0]-f[0,1]-f[1,0]) row[2*k+1],col[2*k+1],data[2*k+1] = col[2*k],row[2*k],data[2*k] #L[Xi,Xj] += .25*(f[1,1]+f[0,0]-f[0,1]-f[1,0]) self.h[Xi] += .5*(f[1,0]-f[0,0])+data[2*k] #L[Xi,Xj] self.h[Xj] += .5*(f[0,1]-f[0,0])+data[2*k] #L[Xi,Xj] self.c += .25*(f[1,1]+f[1,0]+f[0,1]+f[0,0]) self.L += csr((data[row>=0],(row[row>=0],col[row>=0])),shape=(self.nvar,self.nvar));
def file_to_csr(self, file_name): ''' Converts file to list of maps where each map represents a doc vector ''' f = open(file_name, 'r') count = 0 row_arr = [] col_arr = [] val_arr = [] max_col = 0 for line in iter(lambda: f.readline().rstrip(), ''): for x in line.split(' '): col = int(x.split(':')[0]) if col>max_col: max_col = col col_arr.append(col) val_arr.append(int(x.split(':')[1])) row_arr.append(count) count += 1 return csr((val_arr, (row_arr, col_arr)), shape=(count, max_col+1)).tocsc()
def __init__(self, coor_x, coor_y, val, max_rat, min_rat, user_num, movie_num, U=None, V=None, alpha=2.0, mu0=0.0, D=30, T=100): """ coor和val是表示稀疏的评分矩阵 coor是一个列表,每一项为一个列表,此列表第一项为用户,第二项为电影 val是一个列表,每一项为coor中对应的评分 """ self.alpha = alpha self.mu0 = mu0 self.D = D self.v0 = D self.beta0 = 2.0 self.val = val self.N = user_num self.M = movie_num self.max_rat = max_rat self.min_rat = min_rat self.T = T self.R = csr((val, (coor_x, coor_y)), shape=(self.N, self.M)) self.U = np.random.normal(size=(self.N, self.D)) self.V = np.random.normal(size=(self.M, self.D)) self.W0_user = np.eye(self.D) self.W0_item = np.eye(self.D) self.rmses = []
def bandedLU(M: csr, ml, mu): """ Computes standard LU decomposition of a class 'scipy.sparse.csr.csr_matrix' banded square matrix M with lower and upper bandwidths ml and mu, respectively. Returns L and U as sparse CSR matrices. """ m = M.shape[0] u = M.copy() # can remove to act directly on M # Allocating memory to store nnzl number of non-zero entries of L nnzl = int(m * (ml + 1) - ml * (ml + 1) / 2) l_row = np.zeros(nnzl).astype(np.int_) l_val = np.ones(nnzl).astype(M.dtype) for i in range(m): l_row[i] = i l_col = l_row.copy() count = i + 1 # counter for the next entry of L for k in range(m - 1): column_entries_ind = u.indptr[k] + ( u.indices[u.indptr[k]:u.indptr[min(k + ml + 1, m)]] == k).nonzero()[0] for i, ind in enumerate(column_entries_ind[1:]): l = u.data[ind] / u.data[column_entries_ind[0]] l_val[count] = l l_col[count] = k l_row[count] = int(k + i + 1) count += 1 b = min(mu + 1, m - k) u.data[ind + 1:ind + b] -= l * u.data[column_entries_ind[0] + 1:column_entries_ind[0] + b] u.data[ind] = 0. u.eliminate_zeros() l = csr((l_val, (l_row, l_col))) return l, u
def fractionalflow(self): self.fw = (self.krw * self.muo) / (self.krw * self.muo + self.kro * self.muw) N = self.fw.size one = np.ones(N - 1) idx = np.array(list(range(N))) row = np.concatenate(((idx[0], idx[-1]), idx[:-1], idx[1:])) col = np.concatenate(((idx[0], idx[-1]), idx[1:], idx[:-1])) val = np.concatenate(((-1, 1), one, -one)) G = csr((val, (row, col)), shape=(N, N)) fw_diff = G * self.fw Sw_diff = G * self.Sw self.fw_der = fw_diff / Sw_diff
def generate_matrix(df, r, g_item1, g_item2): at1 = pd.to_numeric(df[g_item1[0]]) op1 = g_item1[1] at2 = pd.to_numeric(df[g_item2[0]]) op2 = g_item2[1] N = len(at1) matrix = np.zeros((N, N)) # Length is the number of transactions... if op1 == '>' and op2 == '>': for i in range(N): for j in range(N): if i != j: matrix[i,j] = Concordance_degree([at1[i], at1[j]],[at2[i], at2[j]],r) if op1 == '>' and op2 == '<': for i in range(N): for j in range(N): if i != j: matrix[i,j] = Concordance_degree([at1[i], at1[j]],[at2[j], at2[i]],r) return csr(matrix)
def kron_s(A, B, sec): fp = A.fparity ss = A.sym_sum sym = A.sym dim = sym * A.dim * B.dim ret = csr((dim, dim), dtype=np.complex) for row1 in range(sym): for col1 in range(sym): row2 = ss(sec, -row1) col2 = ss(sec, -col1) if not A._empty_(row1, col1) and not B._empty_(row2, col2): sign = 1 - 2 * (fp[col1] * fp[ss(col2, -row2)]) temp = coo( sparse.kron(A.val[row1][col1], B.val[row2][col2]) * sign) block = A.dim * B.dim add = coo( (temp.data, (temp.row + row1 * block, temp.col + col1 * block)), (dim, dim), dtype=np.complex) ret += add return ret
def load_data(input_file, test_percentage): data = np.loadtxt(input_file) rating = data[:, 2] #1-5 number_ratings = len(rating) user_ids = data[:, 0].astype(int) user_ids -= 1 #convert to 0 based indexing movie_ids = data[:, 1].astype(int) movie_ids -= 1 #convert to 0 based indexing reviews = csr((rating, (user_ids, movie_ids)), shape = (max(user_ids) + 1, max(movie_ids) + 1)) reviews = reviews.toarray() test_idxs = np.array(random.sample(range(number_ratings), number_ratings/test_percentage)) train_reviews = np.array(reviews) for idx in test_idxs: train_reviews[user_ids[idx]][movie_ids[idx]] = 0 test_reviews = np.zeros_like(reviews) for idx in test_idxs: test_reviews[user_ids[idx]][movie_ids[idx]] = reviews[user_ids[idx]][movie_ids[idx]] return train_reviews, test_reviews
def __init__(self, mesh, **kwargs): LinearSimulation.__init__(self, mesh, **kwargs) # Find non-zero cells if getattr(self, "actInd", None) is not None: if self.actInd.dtype == "bool": indices = np.where(self.actInd)[0] else: indices = self.actInd else: indices = np.asarray(range(self.mesh.nC)) self.nC = len(indices) # Create active cell projector projection = csr( (np.ones(self.nC), (indices, range(self.nC))), shape=(self.mesh.nC, self.nC) ) # Create vectors of nodal location for the lower and upper corners bsw = self.mesh.gridCC - self.mesh.h_gridded / 2.0 tne = self.mesh.gridCC + self.mesh.h_gridded / 2.0 xn1, xn2 = bsw[:, 0], tne[:, 0] yn1, yn2 = bsw[:, 1], tne[:, 1] self.Yn = projection.T * np.c_[mkvc(yn1), mkvc(yn2)] self.Xn = projection.T * np.c_[mkvc(xn1), mkvc(xn2)] # Allows for 2D mesh where Zn is defined by user if self.mesh.dim > 2: zn1, zn2 = bsw[:, 2], tne[:, 2] self.Zn = projection.T * np.c_[mkvc(zn1), mkvc(zn2)]
def split_data(X, test_percentage=10): X = csr(X) number_ratings = len(X.indices) user_ids = [] indptr = X.indptr for i in range(len(indptr) - 1): t1 = indptr[i + 1] - indptr[i] for j in range(t1): user_ids.append(i) movie_ids = X.indices test_idxs = np.array( random.sample(range(number_ratings), number_ratings / test_percentage)) X = X.toarray() train_reviews = np.array(X) for idx in test_idxs: train_reviews[user_ids[idx]][movie_ids[idx]] = 0 test_reviews = np.zeros_like(X) for idx in test_idxs: test_reviews[user_ids[idx]][movie_ids[idx]] = X[user_ids[idx]][ movie_ids[idx]] return train_reviews, test_reviews
def ls_featuresign_sub(A,y, AtA, Aty, gamma, xinit=None): L, M = A.shape rankA = min(A.shape[0]-10, A.shape[0]-10) # Step 1: initialize usexinit=False if xinit is None: xinit = [] x = ssp.csr(np.zeros((M,1))) theta = ssp.csr(np.zeros((M,1))) act = ssp.csr(np.zeros((M,1))) allowZero = False else x = ssp.csr(xinit) theta = ssp.csr(x) act = ssp.csr(np.abs(theta)) usexinit = True allowZero = True #[TO BE INSERTED] debug file fobj = 0 ITERMAX=1000 optimality1=False for iter in range(ITERMAX): act_indx0 = np.where(act==0) grad = np.dot(AtA,ssp.csr(x)) - Aty theta = np.sign(x) optimality0 = False #step 2 mx, indx = max(np.abs(grad[act_indx0])) if mx is None and mx >= gamma and (iter > 1 or not usexinit): act[act_indx0[idx]] = 1 theta[act_indx0[idx]] = -np.sign(grad[act_indx0[idx]]) usexinit = False else optimality0 =True if optimality1: break act_indx1 = np.where(act == 1) if len(act_indx1) > rankA: print "warning: sparsity penality is too small: too many coefficients are activated!" return if act_indx1.size == 0: if allowZero: allowZero = False continue return k=0 while 1: k +=1 if k > ITERMAX print "Maximum number of iterations reached. The solution may not be optimal" return if act_indx1.size == 0: if allowZero: allowZero = False break return #step 3 x, theta, act, act_indx1, optimality1, lsearch, fobj = compute_FS_step(x,A,y,AtA,Aty,theta, act, act_indx1, gamma) #step 4 if optimality1: break; if lsearch>0: continue; if iter >= ITERMAX: print "maximum number of iterations reached. The solution may not be optimal" #[add later] if 0 #check optimality fobj = fobj_featersign(x,A,y,AtA, Aty, gamma) return x, fobj
def devtest(self, start_section, end_section, verbose=False): print 'Predicting...' #Applying 'csr' to the vectors makes a sparse matrix. predictions = self.hyperplane.predict(csr(self.getfeaturevectors(start_section, end_section))) self.compare(self.getgsdata(start_section, end_section), predictions, start_section, verbose=verbose)
[0,1,6], ]) def test_eval_sparse_no_seq(batch_index_data, device_id): dim = 10 multiplier = 2 for var_is_sparse in [True, False]: in1 = input_variable(shape=(dim,), is_sparse=var_is_sparse) z = times(in1, multiplier*np.eye(dim)) batch = np.eye(dim)[batch_index_data] expected = batch * multiplier sparse_val = csr(batch.astype('f')) result = z.eval({in1: [sparse_val]}, device=cntk_device(device_id)) assert np.allclose(result, [expected]) @pytest.mark.parametrize("batch", [ [csr([0,1,2,0])], [ csr([[0, 2, 0, 7], [10, 20, 0, 0]]), csr([0, 0, 0, 3]) ] ]) def test_eval_sparse_seq_1(batch, device_id): dim = 4 multiplier = 2 for var_is_sparse in [True, False]: in1 = input_variable(shape=(dim,), is_sparse=var_is_sparse) z = times(in1, multiplier*np.eye(dim)) if isinstance(batch[0], list): expected = [np.vstack([m.todense() * multiplier for m in seq]) for seq in batch] else:
def batched_gradient_descent(self, feature_file, y_file, w_final_file, num_features, alpha, batch_size, lambda_var, cv_percentage): ''' Batched gradient descent function Parameters: feature_file: file containing features in sparse scipy format y_file: file containing ratings in same order as feature_file w_final_file: file storing final w as csr matrix num_features: number of features alpha: value of alpha for gradient descent batch_size: size of each batch lambda_var: value of lambda cv_percentage: percentage of dataset to be used for cross validation ''' x_mat = obj.load_sparse_csr(feature_file) #feature matrix x_mat = csr(x_mat[:,1:]) #removing the first (all 1) feature min_rating = 1000 max_rating = 0 y_arr = [] #y values corr to feature matrix f = open(y_file, 'r') stars_list = ast.literal_eval(f.read()) for rating in stars_list: if rating>max_rating: max_rating = rating if rating<min_rating: min_rating = rating y_arr.append(rating) f.close() y_mat = np.zeros((max_rating-min_rating+1, len(y_arr))) #5 x num_input_examples #y_mat = csr((y_arr,(row_arr, col_arr)), shape=(1, len(col_arr))) col = 0 for rating in y_arr: y_mat[rating-1][col] = 1 col += 1 #init max_rating-min_rating+1 number of feature vectors; init with all weights as 1/n num_vectors = max_rating-min_rating+1 #w_mat = csr(np.random.rand(num_vectors, num_features)) w_mat = np.zeros(shape = (num_vectors, num_features)) w_mat[:] = 1./num_features w_mat = csr(w_mat) #parameters m = batch_size #batch size total_x = len(y_arr) #determine cross validation dataset cv_size = int((cv_percentage*0.01*total_x)%m) * m max_start = int(total_x/m) * m - m - cv_size cv_start = 0 while True: rand_start = random.randint(0, max_start) cv_start = int(rand_start/m) * m if cv_start <= max_start: break #iterate i = 0 j = 0 old_hard_accuracy = 0. old_soft_accuracy = 0. iter_no = 1 #gold_y = y_arr[cv_start:(cv_start+cv_size)] #x_crossval = x_mat[cv_start:(cv_start+cv_size),:] temp_alpha = alpha while True: if i==cv_start: i = cv_start + cv_size if i>=total_x: #check stopping condition: cross validation error #NOTE: soft_accuracy = rmse gold_y = y_arr[cv_start:(cv_start+cv_size)] x_crossval = x_mat[cv_start:(cv_start+cv_size),:] hard_pred, soft_pred = self.predict_class(False, None, None, None, x_crossval, w_mat) hard_accuracy, soft_accuracy = self.compute_cv_error(hard_pred, soft_pred, gold_y) print hard_accuracy, soft_accuracy if math.fabs(hard_accuracy-old_hard_accuracy)<0.01 and iter_no>10: print 'FINAL TRAINING ACCURACY:' hard_pred, soft_pred = self.predict_class(False, None, None, None, x_mat, w_mat) hard_accuracy, rmse = self.compute_cv_error(hard_pred, soft_pred, y_arr) print hard_accuracy, rmse print '--------------------------' break ''' w_sq = ((w_mat-w_mat_prev).toarray())**2 print np.sqrt(np.sum(w_sq)) if np.sqrt(np.sum(w_sq)) < 0.01: break ''' i = 0 old_hard_accuracy = hard_accuracy old_soft_accuracy = soft_accuracy #get a new set of examples to be used for cross validation: random cross-validation cv_start = 0 while True: rand_start = random.randint(0, max_start) cv_start = int(rand_start/m) * m if cv_start <= max_start: break iter_no += 1 #alpha /= (iter_no**2) alpha /= (2**iter_no) #alpha = temp_alpha / (iter_no**2) #alpha *= 0.8 continue j = min(i+m-1, total_x-1) #total m examples in a batch x_batch_mat = csr(x_mat[i:j+1,:]) #m x num_features w_dot_x = w_mat.dot(x_batch_mat.transpose()).transpose() #m x 5 w_dot_x_arr = w_dot_x.toarray() w_dot_x_arr[w_dot_x_arr>20] = 20 w_dot_x_arr[w_dot_x_arr<-20] = -20 w_dot_x = csr(w_dot_x_arr) w_dot_x_exp = csr(np.exp(w_dot_x.toarray())) #m x 5 w_dot_x_sum = w_dot_x_exp.sum(axis=1) #row sum (m x 1) x_div_mat = csr(w_dot_x_exp.toarray()/w_dot_x_sum) #m x 5 y_batch_mat = csr(y_mat[:,i:j+1].transpose()) #m x 5 sub_mat = csr(y_batch_mat.toarray() - x_div_mat.toarray()) #m x 5 sum_by_m_mat = csr(sub_mat.transpose().dot(x_batch_mat)/m) #5 x num_features w_mat_prev = w_mat w_mat = w_mat + alpha * (sum_by_m_mat - lambda_var * w_mat) i = j + 1 self.save_sparse_csr(w_final_file, w_mat)
[0, 1, 6], ]) def test_eval_sparse_no_seq(batch_index_data, device_id): dim = 10 multiplier = 2 for var_is_sparse in [True, False]: in1 = sequence.input(shape=(dim, ), is_sparse=var_is_sparse) z = times(in1, multiplier * np.eye(dim)) batch = np.eye(dim)[batch_index_data] expected = batch * multiplier sparse_val = csr(batch.astype('f')) result = z.eval({in1: [sparse_val]}, device=cntk_device(device_id)) assert np.allclose(result, [expected]) @pytest.mark.parametrize("batch", [[csr( [0, 1, 2, 0])], [csr([[0, 2, 0, 7], [10, 20, 0, 0]]), csr([0, 0, 0, 3])]]) def test_eval_sparse_seq_1(batch, device_id): dim = 4 multiplier = 2 for var_is_sparse in [True, False]: in1 = sequence.input(shape=(dim, ), is_sparse=var_is_sparse) z = times(in1, multiplier * np.eye(dim)) if isinstance(batch[0], list): expected = [ np.vstack([m.todense() * multiplier for m in seq]) for seq in batch ] else: expected = [seq.todense() * multiplier for seq in batch] result = z.eval({in1: batch}, device=cntk_device(device_id))
def load_sparse_csr(self, filename): ''' Loads a sparse matrix ''' loader = np.load(filename) return csr((loader['data'], loader['indices'], loader['indptr']), shape=loader['shape'])