def detect_clusters_big_v4(g): ''' Metoda koja detektuje klastere u velikim grafovima, ali metoda 'np.indices(mat.shape).T[:, :, [1, 0]]' ne moze da primi i obradi preveliku matricu (ali za ove potrebe radi sasvim prihvatljivo)... Ideja__ svim cvorovima dodati atribut rednog broja sto ce pomoci kod pravljenja matrice povezanosti, prolaskom kroz sve grane i uzimanjem 'endpoints' povezujemo ta dva cvora u matrici, a na pozicijama atributa cvorova (redni broj). Kasnije ce se preko labela spojiti cvorovi iz istih klastera (sa istim atriburima labela 'lbl') :param g: - graf za koji treba detektovati klastere :return: - vraca skup skupova koji sadrze cvorove u istoj komponenti povezanosti ''' row = [] col = [] data = [] g.nodes(data=True) nx.set_node_attributes(g, "", "serial_num") nx.set_node_attributes(g, "", "lbl") i = 0 for n in g.nodes(): g.add_node(n, serial_num=i) i += 1 i = 1 l = len(g.edges) for e in g.edges(data=True): sys.stdout.write(f"{i}/{l}") sys.stdout.flush() if e[2]['affinity'] is "+": row.append(dict(g.nodes(data=True)).get(e[0])['serial_num']) col.append(dict(g.nodes(data=True)).get(e[1])['serial_num']) data.append(1) i += 1 sys.stdout.write("\r") n = len(g.nodes) mat = cm((np.asarray(data), (np.asarray(row), np.asarray(col))), shape=(n, n)) n_comps, lbls = connected_components(mat, False) i = 0 nodes = np.asarray(g.nodes(data=True)) for lbl in lbls: nodes[i][1]['lbl'] = lbl i += 1 clusters = set() for i in range(n_comps): cluster = [x for (x, d) in g.nodes(data=True) if d['lbl'] == i] clusters.add(frozenset(cluster)) return clusters
def blob2csr(layer_name, threshold): param = net.params[layer_name][0].data pm = filter_2_matrix(param) # param matirx counter = 0 for i_n in range(pm.shape[0]): for i_c in range(pm.shape[1]): if abs(pm[i_n][i_c]) < 2 * threshold: pm[i_n][i_c] = 0 counter = counter + 1 spa_pm = cm(pm) ss.save_npz(output_dir + '{}'.format(layer_name), spa_pm, 'a') zero_persent = (float(counter) / float(pm.size)) * 100 print('zero weights percentage in layer {} is: '.format(layer_name) + str(zero_persent) + '%') return pm
def t_yelp(): #data from: http://www.trustlet.org/wiki/Epinions_datasets N,M = 0,0 max_r = 5.0 cNum = 8 R=defaultdict(dict) T=defaultdict(dict) R_test=defaultdict(dict) limit = 100 print 'get T' for line in open('./yelp_data/users.txt','r'): u = int(line.split(':')[0]) uf = line.split(':')[1][1:-1].split(',') if len(uf)>1: for x in line.split(':')[1][1:-1].split(',')[:-1]: v = int(x) if u<limit and v<limit: T[u][v] = 1.0 print 'get R' k = 0 ul,il,rl = [],[],[] for line in open('./yelp_data/ratings-train.txt','r'): u,i,r = [int(x) for x in line.split('::')[:3]] if u<limit and i<limit: N=max(N,u) M=max(M,i) ul.append(u) il.append(i) rl.append(r) R[u][i] = r/max_r # print ul Rcsr = cm((rl,(ul,il))) N+=1 M+=1 print 'get R_test' for line in open('./yelp_data/ratings-test.txt','r'): u,i,r = [int(x) for x in line.split('::')[:3]] if u<limit and i<limit: R_test[u][i] = r/max_r print "get Circle" C = [[] for i in range(cNum)] for line in open('./yelp_data/items-class.txt','r'): i,ci = [int(x) for x in line.split(' ')] if i<limit: C[ci].append(i) lambdaU,lambdaV,lambdaT,K=0.2, 0.2, 0.1, 4 test(R,T,C,N,M,K,max_r,lambdaU,lambdaV,lambdaT,R_test,Rcsr)
def __init__(self, data, train_data, test_data, **paras): self.K = paras['K'] self.reg = paras['reg'] self.eps = paras['eps'] self.initial = paras['initial'] self.ite = paras['max_iter'] self.tol = paras['tol'] self.data = data self.obs_num = len(self.data) self.train_data = train_data self.train_num = len(self.train_data) self.test_data = test_data self.test_num = len(self.test_data) self.load_lib() self.X = cm((self.data[:, 2], (self.data[:, 0], self.data[:, 1]))) #index starting from 0 self.M, self.N = self.X.shape logging.info('finish initiating the model, paras=%s', paras)
def run(self): logger.info('MF running: parras: K=%s, reg=%s, lr=%s, silent_run=%s', self.K, self.lamb, self.eps, self.silent_run) X = cm((self.data[:, 2], (self.data[:, 0].astype(np.int32), self.data[:, 1].astype(np.int32)))) #index starting from 0 M, N = X.shape # print X.shape omega = cm((self.train_data[:, 2], (self.train_data[:, 0], self.train_data[:, 1])), shape=(M, N)) #index starting from 0 # print omega.shape if len(self.test_data): trows, tcols = self.test_data[:, 0].astype( np.int32), self.test_data[:, 1].astype(np.int32) U = np.random.rand(M, self.K) * 0.0002 V = np.random.rand(N, self.K) * 0.0002 bias = self.train_data[:, 2].mean( ) # in reality, bias can also be updated, modified later # bias = 0.0 # print bias eps_1 = eps_2 = self.eps rows, cols = omega.tocoo().row.astype( np.int32), omega.tocoo().col.astype(np.int32) obs = omega.copy().data.astype(np.float64).reshape(self.train_num, 1) # print type(obs) self.cal_omega(omega, U, V, rows, cols, bias, obs) objs_1 = [self.obj(U, V, omega)] objs_2 = [] trmses = [] rmses, maes, costs, acu_cost = [], [], [], [] run_start = time.time() # print "start run..." for rnd in range(0, self.ite): if rnd % 50 == 0: print "rnd", rnd start = time.time() self.cal_omega(omega, U, V, rows, cols, bias, obs) #grad_bias = -omega + self.lamb * bias #bias = bias - 1.0/eps_1 * grad_bias du, dv = self.get_grad(omega, U, V) l_omega = omega.copy() temp_max_t1 = 100 for t1 in range(0, temp_max_t1): #line search LU = U - 1.0 / eps_1 * du LV = V - 1.0 / eps_1 * dv self.cal_omega(l_omega, LU, LV, rows, cols, bias, obs) l_obj = self.obj(LU, LV, l_omega) if l_obj < objs_1[rnd]: U, V = LU, LV eps_1 *= 0.95 objs_1.append(l_obj) trmses.append(self.train_rmse(U, V, bias, l_omega)) break else: eps_1 *= 1.5 if t1 == temp_max_t1 - 1: break lrate = (objs_1[rnd] - objs_1[rnd + 1]) / objs_1[rnd] end = time.time() costs.append(round(end - start, 1)) acu_cost.append(int(end - run_start)) if len(self.test_data): preds = self.part_uv(U, V, trows, tcols, self.K) rmses.append(self.cal_rmse(preds)) maes.append(self.cal_mae(preds)) if not self.silent_run: logger.info( 'iter=%s, obj=%.4f(%.2f%%), ls:((%.4f, %s), (%.4f, %s)), train_rmse=%.4f,rmse=%.4f, mae=%.4f, time:%.1fs', rnd, objs_1[rnd], lrate * 100, eps_1, t1, eps_1, t1, trmses[rnd], rmses[rnd], maes[rnd], end - start) else: logger.info( 'iter=%s, obj=%.4f(%.2f%%), ls:((%.4f, %s), (%.4f, %s)), train_rmse=%.4f, time:%.1fs', rnd, objs_1[rnd], lrate * 100, eps_1, t1, eps_1, t1, trmses[rnd], end - start) if abs(lrate) < self.tol: #import pdb;pdb.set_trace() break if objs_1[rnd] < self.tol: break self.rmses = rmses if rmses else 99.0 self.maes = maes if maes else 99.0 if self.save_uv: np.savetxt(dir_ + 'mf_features/ratings_only/U_K%s.res' % self.K, U) np.savetxt(dir_ + 'mf_features/ratings_only/V_K%s.res' % self.K, V) return U, V
a = random(10000, 10000, density=0.25, random_state=rs, data_rvs=rvs) a = a.A # Generating a vector for the b component b = np.random.randint(23, size=(10000, 1)) # Method 2: Creating a random matrix and forcing the code to to make non diagonal elements to be zero. This allowed the iteration to be 0.1145 seconds, however took about 78 seconds for the matrix to be generated. #A = np.random.randint(34, size=(50000, 50000)) """for i in range(len(a)): for j in range(len(a)): if(i != j): if ((i - j) > 2) or ((j - i) > 2): a[i, j] = 0""" print(a) # Further increasing efficiency by converting the matrix a to a sparse matrix, where only the non zero terms are stored in memory. a = cm(a) # Starting the time calculations to see the time taken just for solving the matrix and not the matrix generation. start = time.time() # Due to non-convergence issues when dealing with randomly generated matrices, I decided to increase the tolerance and to control the amount of time taken for these iterations, I set the maximum number of iterations = 2000. x = spla.bicgstab(a, b, tol=0.7, maxiter=2000) print(x) if (x[1] == 0): print("Solver successful") else: print("Error detected") # Printing time taken to double check which of the solvers was the fastest. end = time.time() print("Time for iteration: ", end - start)
def run(self): omega = cm((self.train_data[:, 2], (self.train_data[:, 0], self.train_data[:, 1])), shape=(self.M, self.N)) #index starting from 0 trows, tcols = self.test_data[:, 0].astype( np.int32), self.test_data[:, 1].astype(np.int32) ground = self.test_data[:, 2] U = np.random.rand(self.M, self.K) * self.initial V = np.random.rand(self.N, self.K) * self.initial bias = np.mean( self.train_data[:, 2] ) # in reality, bias can also be updated, modified later eps = self.eps rows, cols = omega.tocoo().row.astype( np.int32), omega.tocoo().col.astype(np.int32) obs = omega.copy().data.astype(np.float64).reshape(self.train_num, 1) self.cal_omega(omega, U, V, rows, cols, bias, obs) objs = [self.obj(U, V, omega)] trmses = [] rmses, maes, costs, acu_cost = [], [], [], [] run_start = time.time() for rnd in range(0, self.ite): start = time.time() self.cal_omega(omega, U, V, rows, cols, bias, obs) du, dv = self.get_grad(omega, U, V) l_omega = omega.copy() for t1 in range(0, 20): #line search LU = U - 1.0 / eps * du LV = V - 1.0 / eps * dv self.cal_omega(l_omega, LU, LV, rows, cols, bias, obs) l_obj = self.obj(LU, LV, l_omega) if l_obj < objs[rnd]: U, V = LU, LV eps *= 0.95 objs.append(l_obj) trmses.append(self.train_rmse(U, V, bias, l_omega)) break else: eps *= 1.5 if t1 == 19: logging.info('*************stopped by linesearch**********') break lrate = (objs[rnd] - objs[rnd + 1]) / objs[rnd] end = time.time() costs.append(round(end - start, 1)) acu_cost.append(int(end - run_start)) preds = self.part_uv(U, V, trows, tcols, self.K) preds += bias rmses.append(self.cal_rmse(preds, ground)) maes.append(self.cal_mae(preds, ground)) logging.info( 'iter=%s, obj=%.4f(%.7f), ls:((%.4f, %s), train_rmse=%.4f, rmse=%.4f, mae=%.4f, time:%.1fs', rnd + 1, objs[rnd + 1], lrate, eps, t1, trmses[rnd], rmses[rnd], maes[rnd], end - start) if abs(lrate) < self.tol: logging.info('stopped by tol, iter=%s', rnd + 1) break self.U = U self.V = V self.bias = bias self.preds = preds return objs, trmses, rmses, maes, acu_cost
def run(self): X = cm((self.data[:, 2], (self.data[:, 0], self.data[:, 1]))) #index starting from 0 M, N = X.shape omega = cm((self.train_data[:, 2], (self.train_data[:, 0], self.train_data[:, 1])), shape=(M, N)) #index starting from 0 trows, tcols = self.test_data[:, 0].astype( np.int32), self.test_data[:, 1].astype(np.int32) U = np.random.rand(M, self.K) * 0.001 V = np.random.rand(N, self.K) * 0.001 bias = 0 # in reality, bias can also be updated, modified later eps_1 = eps_2 = self.eps rows, cols = omega.tocoo().row.astype( np.int32), omega.tocoo().col.astype(np.int32) obs = omega.copy().data.astype(np.float64).reshape(self.train_num, 1) self.cal_omega(omega, U, V, rows, cols, bias, obs) objs_1 = [self.obj(U, V, omega)] objs_2 = [] trmses = [] rmses, maes, costs, acu_cost = [], [], [], [] run_start = time.time() for rnd in range(0, self.ite): start = time.time() self.cal_omega(omega, U, V, rows, cols, bias, obs) du, dv = self.get_grad(omega, U, V) l_omega = omega.copy() for t1 in range(0, 20): #line search LU = U - 1.0 / eps_1 * du LV = V - 1.0 / eps_1 * dv self.cal_omega(l_omega, LU, LV, rows, cols, bias, obs) l_obj = self.obj(LU, LV, l_omega) if l_obj < objs_1[rnd]: U, V = LU, LV eps_1 *= 0.95 objs_1.append(l_obj) trmses.append(self.train_rmse(U, V, bias, l_omega)) break else: eps_1 *= 1.5 if t1 == 19: break lrate = (objs_1[rnd] - objs_1[rnd + 1]) / objs_1[rnd] end = time.time() print 'iter=%s, obj=%.4f(%.2f%%), ls:((%.4f, %s), (%.4f, %s)), time:%.1fs\n' % ( rnd, objs_1[rnd], lrate * 100, eps_1, t1, eps_1, t1, end - start) costs.append(round(end - start, 1)) acu_cost.append(int(end - run_start)) preds = self.part_uv(U, V, trows, tcols, self.K) rmses.append(self.cal_rmse(preds)) maes.append(self.cal_mae(preds)) print 'train_rmse=%.4f,rmse=%.4f, mae=%.4f\n' % ( trmses[rnd], rmses[rnd], maes[rnd]) if abs(lrate) < self.tol: break if objs_1[rnd] < self.tol: break #inds = range(1, len(objs_1)+1) #print 'objs_1', objs_1 ##plt.plot(objs_1, label='obj') #print 'maes', maes ##plt.plot(maes, label='mae') #print 'rmses', rmses #inds = range(1, rnd+2) #l1, l2 = plt.plot(inds, rmses, 'r-', inds, trmses, 'g-', label='rmse') #l1.set_label('test') #l2.set_label('train') #plt.ylabel('RMSE') #plt.xlabel('iterations') #plt.title('movielens-1m') #plt.legend() #plt.show() #print 'costs',costs return objs_1, trmses, rmses, acu_cost