Beispiel #1
0
def detect_clusters_big_v4(g):
    '''
    Metoda koja detektuje klastere u velikim grafovima,
    ali metoda 'np.indices(mat.shape).T[:, :, [1, 0]]' ne moze da primi i obradi
    preveliku matricu (ali za ove potrebe radi sasvim prihvatljivo)...
    Ideja__ svim cvorovima dodati atribut rednog broja sto ce pomoci kod pravljenja matrice povezanosti,
    prolaskom kroz sve grane i uzimanjem 'endpoints' povezujemo ta dva cvora u matrici, a na pozicijama
    atributa cvorova (redni broj). Kasnije ce se preko labela spojiti cvorovi iz istih klastera (sa istim
    atriburima labela 'lbl')
    :param g: - graf za koji treba detektovati klastere
    :return: - vraca skup skupova koji sadrze cvorove u istoj komponenti povezanosti
    '''
    row = []
    col = []
    data = []

    g.nodes(data=True)
    nx.set_node_attributes(g, "", "serial_num")
    nx.set_node_attributes(g, "", "lbl")

    i = 0
    for n in g.nodes():
        g.add_node(n, serial_num=i)
        i += 1

    i = 1
    l = len(g.edges)
    for e in g.edges(data=True):
        sys.stdout.write(f"{i}/{l}")
        sys.stdout.flush()
        if e[2]['affinity'] is "+":
            row.append(dict(g.nodes(data=True)).get(e[0])['serial_num'])
            col.append(dict(g.nodes(data=True)).get(e[1])['serial_num'])
            data.append(1)
        i += 1
        sys.stdout.write("\r")

    n = len(g.nodes)
    mat = cm((np.asarray(data), (np.asarray(row), np.asarray(col))),
             shape=(n, n))

    n_comps, lbls = connected_components(mat, False)

    i = 0
    nodes = np.asarray(g.nodes(data=True))
    for lbl in lbls:
        nodes[i][1]['lbl'] = lbl
        i += 1

    clusters = set()
    for i in range(n_comps):
        cluster = [x for (x, d) in g.nodes(data=True) if d['lbl'] == i]
        clusters.add(frozenset(cluster))

    return clusters
Beispiel #2
0
def blob2csr(layer_name, threshold):
    param = net.params[layer_name][0].data
    pm = filter_2_matrix(param)  # param matirx
    counter = 0
    for i_n in range(pm.shape[0]):
        for i_c in range(pm.shape[1]):
            if abs(pm[i_n][i_c]) < 2 * threshold:
                pm[i_n][i_c] = 0
                counter = counter + 1
    spa_pm = cm(pm)
    ss.save_npz(output_dir + '{}'.format(layer_name), spa_pm, 'a')
    zero_persent = (float(counter) / float(pm.size)) * 100
    print('zero weights percentage in layer {} is: '.format(layer_name) +
          str(zero_persent) + '%')
    return pm
Beispiel #3
0
def t_yelp():
    #data from: http://www.trustlet.org/wiki/Epinions_datasets
    N,M = 0,0
    max_r = 5.0
    cNum = 8
    R=defaultdict(dict)
    T=defaultdict(dict)
    R_test=defaultdict(dict)
    limit = 100
    print 'get T'
    for line in open('./yelp_data/users.txt','r'):
        u = int(line.split(':')[0])
        uf = line.split(':')[1][1:-1].split(',')
        if len(uf)>1:
            for x in line.split(':')[1][1:-1].split(',')[:-1]:
                v = int(x)
                if u<limit and v<limit:
                    T[u][v] = 1.0
    print 'get R'
    k = 0
    ul,il,rl = [],[],[]
    for line in open('./yelp_data/ratings-train.txt','r'):
        u,i,r = [int(x) for x in line.split('::')[:3]]
        if u<limit and i<limit:
            N=max(N,u)
            M=max(M,i)
            ul.append(u)
            il.append(i)
            rl.append(r)
            R[u][i] = r/max_r
    # print ul
    Rcsr = cm((rl,(ul,il)))
    N+=1
    M+=1
    print 'get R_test'
    for line in open('./yelp_data/ratings-test.txt','r'):
        u,i,r = [int(x) for x in line.split('::')[:3]]
        if u<limit and i<limit:
            R_test[u][i] = r/max_r
    print "get Circle"
    C = [[] for i in range(cNum)]
    for line in open('./yelp_data/items-class.txt','r'):
        i,ci = [int(x) for x in line.split(' ')]
        if i<limit:
            C[ci].append(i)

    lambdaU,lambdaV,lambdaT,K=0.2, 0.2, 0.1, 4
    test(R,T,C,N,M,K,max_r,lambdaU,lambdaV,lambdaT,R_test,Rcsr)
Beispiel #4
0
Datei: mf.py Projekt: vickkyy/FMG
 def __init__(self, data, train_data, test_data, **paras):
     self.K = paras['K']
     self.reg = paras['reg']
     self.eps = paras['eps']
     self.initial = paras['initial']
     self.ite = paras['max_iter']
     self.tol = paras['tol']
     self.data = data
     self.obs_num = len(self.data)
     self.train_data = train_data
     self.train_num = len(self.train_data)
     self.test_data = test_data
     self.test_num = len(self.test_data)
     self.load_lib()
     self.X = cm((self.data[:, 2], (self.data[:, 0],
                                    self.data[:,
                                              1])))  #index starting from 0
     self.M, self.N = self.X.shape
     logging.info('finish initiating the model, paras=%s', paras)
Beispiel #5
0
    def run(self):
        logger.info('MF running: parras: K=%s, reg=%s, lr=%s, silent_run=%s',
                    self.K, self.lamb, self.eps, self.silent_run)
        X = cm((self.data[:, 2],
                (self.data[:, 0].astype(np.int32),
                 self.data[:, 1].astype(np.int32))))  #index starting from 0
        M, N = X.shape
        # print X.shape

        omega = cm((self.train_data[:, 2],
                    (self.train_data[:, 0], self.train_data[:, 1])),
                   shape=(M, N))  #index starting from 0
        # print omega.shape

        if len(self.test_data):
            trows, tcols = self.test_data[:, 0].astype(
                np.int32), self.test_data[:, 1].astype(np.int32)

        U = np.random.rand(M, self.K) * 0.0002
        V = np.random.rand(N, self.K) * 0.0002
        bias = self.train_data[:, 2].mean(
        )  # in reality, bias can also be updated, modified later
        # bias = 0.0
        # print bias
        eps_1 = eps_2 = self.eps

        rows, cols = omega.tocoo().row.astype(
            np.int32), omega.tocoo().col.astype(np.int32)
        obs = omega.copy().data.astype(np.float64).reshape(self.train_num, 1)
        # print type(obs)
        self.cal_omega(omega, U, V, rows, cols, bias, obs)

        objs_1 = [self.obj(U, V, omega)]
        objs_2 = []
        trmses = []
        rmses, maes, costs, acu_cost = [], [], [], []

        run_start = time.time()
        # print "start run..."
        for rnd in range(0, self.ite):
            if rnd % 50 == 0:
                print "rnd", rnd
            start = time.time()
            self.cal_omega(omega, U, V, rows, cols, bias, obs)
            #grad_bias = -omega + self.lamb * bias
            #bias = bias - 1.0/eps_1 * grad_bias
            du, dv = self.get_grad(omega, U, V)
            l_omega = omega.copy()
            temp_max_t1 = 100
            for t1 in range(0, temp_max_t1):
                #line search
                LU = U - 1.0 / eps_1 * du
                LV = V - 1.0 / eps_1 * dv
                self.cal_omega(l_omega, LU, LV, rows, cols, bias, obs)
                l_obj = self.obj(LU, LV, l_omega)
                if l_obj < objs_1[rnd]:
                    U, V = LU, LV
                    eps_1 *= 0.95
                    objs_1.append(l_obj)
                    trmses.append(self.train_rmse(U, V, bias, l_omega))
                    break
                else:
                    eps_1 *= 1.5

            if t1 == temp_max_t1 - 1:
                break

            lrate = (objs_1[rnd] - objs_1[rnd + 1]) / objs_1[rnd]

            end = time.time()
            costs.append(round(end - start, 1))
            acu_cost.append(int(end - run_start))

            if len(self.test_data):
                preds = self.part_uv(U, V, trows, tcols, self.K)
                rmses.append(self.cal_rmse(preds))
                maes.append(self.cal_mae(preds))
                if not self.silent_run:
                    logger.info(
                        'iter=%s, obj=%.4f(%.2f%%), ls:((%.4f, %s), (%.4f, %s)), train_rmse=%.4f,rmse=%.4f, mae=%.4f, time:%.1fs',
                        rnd, objs_1[rnd], lrate * 100, eps_1, t1, eps_1, t1,
                        trmses[rnd], rmses[rnd], maes[rnd], end - start)
            else:
                logger.info(
                    'iter=%s, obj=%.4f(%.2f%%), ls:((%.4f, %s), (%.4f, %s)), train_rmse=%.4f, time:%.1fs',
                    rnd, objs_1[rnd], lrate * 100, eps_1, t1, eps_1, t1,
                    trmses[rnd], end - start)

            if abs(lrate) < self.tol:
                #import pdb;pdb.set_trace()
                break

            if objs_1[rnd] < self.tol:
                break

        self.rmses = rmses if rmses else 99.0
        self.maes = maes if maes else 99.0
        if self.save_uv:
            np.savetxt(dir_ + 'mf_features/ratings_only/U_K%s.res' % self.K, U)
            np.savetxt(dir_ + 'mf_features/ratings_only/V_K%s.res' % self.K, V)
        return U, V
a = random(10000, 10000, density=0.25, random_state=rs, data_rvs=rvs)
a = a.A
# Generating a vector for the b component
b = np.random.randint(23, size=(10000, 1))

# Method 2: Creating a random matrix and forcing the code to to make non diagonal elements to be zero. This allowed the iteration to be 0.1145 seconds, however took about 78 seconds for the matrix to be generated.

#A = np.random.randint(34, size=(50000, 50000))
"""for i in range(len(a)):
    for j in range(len(a)):
        if(i != j):
            if ((i - j) > 2) or ((j - i) > 2):
                a[i, j] = 0"""

print(a)
# Further increasing efficiency by converting the matrix a to a sparse matrix, where only the non zero terms are stored in memory.
a = cm(a)
# Starting the time calculations to see the time taken just for solving the matrix and not the matrix generation.
start = time.time()
# Due to non-convergence issues when dealing with randomly generated matrices, I decided to increase the tolerance and to control the amount of time taken for these iterations, I set the maximum number of iterations = 2000.
x = spla.bicgstab(a, b, tol=0.7, maxiter=2000)
print(x)

if (x[1] == 0):
    print("Solver successful")
else:
    print("Error detected")
# Printing time taken to double check which of the solvers was the fastest.
end = time.time()
print("Time for iteration: ", end - start)
Beispiel #7
0
Datei: mf.py Projekt: vickkyy/FMG
    def run(self):
        omega = cm((self.train_data[:, 2],
                    (self.train_data[:, 0], self.train_data[:, 1])),
                   shape=(self.M, self.N))  #index starting from 0
        trows, tcols = self.test_data[:, 0].astype(
            np.int32), self.test_data[:, 1].astype(np.int32)
        ground = self.test_data[:, 2]

        U = np.random.rand(self.M, self.K) * self.initial
        V = np.random.rand(self.N, self.K) * self.initial
        bias = np.mean(
            self.train_data[:, 2]
        )  # in reality, bias can also be updated, modified later
        eps = self.eps

        rows, cols = omega.tocoo().row.astype(
            np.int32), omega.tocoo().col.astype(np.int32)
        obs = omega.copy().data.astype(np.float64).reshape(self.train_num, 1)
        self.cal_omega(omega, U, V, rows, cols, bias, obs)

        objs = [self.obj(U, V, omega)]
        trmses = []
        rmses, maes, costs, acu_cost = [], [], [], []

        run_start = time.time()
        for rnd in range(0, self.ite):
            start = time.time()
            self.cal_omega(omega, U, V, rows, cols, bias, obs)
            du, dv = self.get_grad(omega, U, V)
            l_omega = omega.copy()
            for t1 in range(0, 20):
                #line search
                LU = U - 1.0 / eps * du
                LV = V - 1.0 / eps * dv
                self.cal_omega(l_omega, LU, LV, rows, cols, bias, obs)
                l_obj = self.obj(LU, LV, l_omega)
                if l_obj < objs[rnd]:
                    U, V = LU, LV
                    eps *= 0.95
                    objs.append(l_obj)
                    trmses.append(self.train_rmse(U, V, bias, l_omega))
                    break
                else:
                    eps *= 1.5

            if t1 == 19:
                logging.info('*************stopped by linesearch**********')
                break

            lrate = (objs[rnd] - objs[rnd + 1]) / objs[rnd]

            end = time.time()
            costs.append(round(end - start, 1))
            acu_cost.append(int(end - run_start))

            preds = self.part_uv(U, V, trows, tcols, self.K)
            preds += bias
            rmses.append(self.cal_rmse(preds, ground))
            maes.append(self.cal_mae(preds, ground))
            logging.info(
                'iter=%s, obj=%.4f(%.7f), ls:((%.4f, %s), train_rmse=%.4f, rmse=%.4f, mae=%.4f, time:%.1fs',
                rnd + 1, objs[rnd + 1], lrate, eps, t1, trmses[rnd],
                rmses[rnd], maes[rnd], end - start)

            if abs(lrate) < self.tol:
                logging.info('stopped by tol, iter=%s', rnd + 1)
                break

        self.U = U
        self.V = V
        self.bias = bias
        self.preds = preds
        return objs, trmses, rmses, maes, acu_cost
Beispiel #8
0
    def run(self):
        X = cm((self.data[:, 2], (self.data[:, 0],
                                  self.data[:, 1])))  #index starting from 0
        M, N = X.shape
        omega = cm((self.train_data[:, 2],
                    (self.train_data[:, 0], self.train_data[:, 1])),
                   shape=(M, N))  #index starting from 0
        trows, tcols = self.test_data[:, 0].astype(
            np.int32), self.test_data[:, 1].astype(np.int32)

        U = np.random.rand(M, self.K) * 0.001
        V = np.random.rand(N, self.K) * 0.001
        bias = 0  # in reality, bias can also be updated, modified later
        eps_1 = eps_2 = self.eps

        rows, cols = omega.tocoo().row.astype(
            np.int32), omega.tocoo().col.astype(np.int32)
        obs = omega.copy().data.astype(np.float64).reshape(self.train_num, 1)
        self.cal_omega(omega, U, V, rows, cols, bias, obs)

        objs_1 = [self.obj(U, V, omega)]
        objs_2 = []
        trmses = []
        rmses, maes, costs, acu_cost = [], [], [], []

        run_start = time.time()
        for rnd in range(0, self.ite):
            start = time.time()
            self.cal_omega(omega, U, V, rows, cols, bias, obs)
            du, dv = self.get_grad(omega, U, V)
            l_omega = omega.copy()
            for t1 in range(0, 20):
                #line search
                LU = U - 1.0 / eps_1 * du
                LV = V - 1.0 / eps_1 * dv
                self.cal_omega(l_omega, LU, LV, rows, cols, bias, obs)
                l_obj = self.obj(LU, LV, l_omega)
                if l_obj < objs_1[rnd]:
                    U, V = LU, LV
                    eps_1 *= 0.95
                    objs_1.append(l_obj)
                    trmses.append(self.train_rmse(U, V, bias, l_omega))
                    break
                else:
                    eps_1 *= 1.5

            if t1 == 19:
                break

            lrate = (objs_1[rnd] - objs_1[rnd + 1]) / objs_1[rnd]

            end = time.time()
            print 'iter=%s, obj=%.4f(%.2f%%), ls:((%.4f, %s), (%.4f, %s)), time:%.1fs\n' % (
                rnd, objs_1[rnd], lrate * 100, eps_1, t1, eps_1, t1,
                end - start)
            costs.append(round(end - start, 1))
            acu_cost.append(int(end - run_start))

            preds = self.part_uv(U, V, trows, tcols, self.K)
            rmses.append(self.cal_rmse(preds))
            maes.append(self.cal_mae(preds))
            print 'train_rmse=%.4f,rmse=%.4f, mae=%.4f\n' % (
                trmses[rnd], rmses[rnd], maes[rnd])

            if abs(lrate) < self.tol:
                break

            if objs_1[rnd] < self.tol:
                break

        #inds = range(1, len(objs_1)+1)
        #print 'objs_1', objs_1
        ##plt.plot(objs_1, label='obj')
        #print 'maes', maes
        ##plt.plot(maes, label='mae')
        #print 'rmses', rmses
        #inds = range(1, rnd+2)
        #l1, l2 = plt.plot(inds, rmses, 'r-', inds, trmses, 'g-', label='rmse')
        #l1.set_label('test')
        #l2.set_label('train')
        #plt.ylabel('RMSE')
        #plt.xlabel('iterations')
        #plt.title('movielens-1m')
        #plt.legend()
        #plt.show()
        #print 'costs',costs
        return objs_1, trmses, rmses, acu_cost