예제 #1
0
def predict_rescal_als(T, idx):
	A, R, _, _, _ = rescal_als(T, 10, maxIter=10, lambda_A=10, lambda_R=10, compute_fit=False)
	n = A.shape[0]
	P = np.dot(A, np.dot(R[-1], A[idx, :].T))
	nrm = np.linalg.norm(P)
	if nrm != 0:
		P = np.round_(P/nrm, decimals=3)
	return P
def tensor_factorization(lang, r, n_iter):
	_log.info("start factorization")
	X = pkl_utils._load(config.TENSOR[lang])
	_log.info("data loading complete")
	A, R, _, _, _ = rescal_als(X, r, maxIter=n_iter, lambda_A=10, lambda_R=10, compute_fit=False)
	data_output = {'A':A, 'R':R}
	pkl_utils._save(config.RESCAL_OUTPUT[lang], data_output)
	_log.info("factorization complete")
예제 #3
0
def get_embeddings(Tenc):
    feature_vec,R, fit, itr, exectimes = rescal_als(Tenc,250,
                                                    init='nvecs',
                                                    conv=1e-2,
                                                    lambda_A=0.1,
                                                    lambda_R= 0.1 
                                                   )
    
    return feature_vec
예제 #4
0
def predict_rescal_als(T):
    A, R, _, _, _ = rescal_als(
        T, 100, init='nvecs', conv=1e-3,
        lambda_A=10, lambda_R=10
    )
    n = A.shape[0]
    P = zeros((n, n, len(R)))
    for k in range(len(R)):
        P[:, :, k] = dot(A, dot(R[k], A.T))
    return P
예제 #5
0
def predict_rescal_als(T):
    A, R, f, itr, exectimes = rescal_als(T,
                                         10,
                                         init='nvecs',
                                         lambda_A=10,
                                         lambda_R=10)
    n = A.shape[0]
    P = zeros((n, n, len(R)))
    for k in range(len(R)):
        P[:, :, k] = dot(A, dot(R[k], A.T))
    return A, P, R
예제 #6
0
def predict_rescal_als(T):
    A, R, _, _, _ = rescal_als(T,
                               100,
                               init='nvecs',
                               conv=1e-3,
                               lambda_A=10,
                               lambda_R=10)
    n = A.shape[0]
    P = zeros((n, n, len(R)))
    for k in range(len(R)):
        P[:, :, k] = dot(A, dot(R[k], A.T))
    return P
예제 #7
0
def predict_rescal_als(T, rank, lambda_A, lambda_R):
    A, R, _, _, _ = rescal_als(T,
                               rank,
                               init='nvecs',
                               conv=1e-4,
                               lambda_A=lambda_A,
                               lambda_R=lambda_R,
                               compute_fit=True)
    n = A.shape[0]
    P = zeros((n, n, len(R)))
    for k in range(len(R)):
        P[:, :, k] = dot(A, dot(R[k], A.T))
    return P
예제 #8
0
def predict_rescal_als(T, rank=100, lambda_A=10, lambda_R=10):
    ''' T is a list of sparse 2d lil_matrices.
    '''
    A, R, _, _, _ = rescal_als(T,
                               rank,
                               init='nvecs',
                               conv=1e-3,
                               lambda_A=lambda_A,
                               lambda_R=lambda_R)
    n = A.shape[0]
    P = np.zeros((n, n, len(R)))
    for k in range(len(R)):
        P[:, :, k] = np.dot(A, np.dot(R[k], A.T))
    return P
예제 #9
0
파일: rescal.py 프로젝트: oboder/pymake
    def fit(self):
        data = self.frontend.data_ma
        K = self.expe.K

        data = [sp.sparse.csr_matrix(data)]
        A, R, fit, itr, exectimes = rescal_als(data,
                                               K,
                                               init='nvecs',
                                               lambda_A=10,
                                               lambda_R=10,
                                               maxIter=self.iterations)

        self.log.info('Rescal fit info : ')
        print('fit: %s; itr: %s, exectimes: %s' % (fit, itr, exectimes))

        self._theta = A
        self._phi = R
예제 #10
0
파일: rescal.py 프로젝트: dtrckd/ml
def rescal(X, K):

    ## Load Matlab data and convert it to dense tensor format
    #T = loadmat('data/alyawarra.mat')['Rs']
    #X = [lil_matrix(T[:, :, k]) for k in range(T.shape[2])]

    X = [sp.sparse.csr_matrix(X)]
    A, R, fit, itr, exectimes = rescal_als(X,
                                           K,
                                           init='nvecs',
                                           lambda_A=10,
                                           lambda_R=10)

    theta = A.dot(R).dot(A.T)
    Y = 1 / (1 + np.exp(-theta))
    Y = Y[:, 0, :]
    Y[Y <= 0.5] = 0
    Y[Y > 0.5] = 1
    #Y = sp.stats.bernoulli.rvs(Y)
    return Y
예제 #11
0
파일: rescal.py 프로젝트: dtrckd/ml
    def fit(self, frontend):
        self._init(frontend)
        K = self.expe.K
        y = frontend.adj()
        data = [y]

        A, R, fit, itr, exectimes = rescal_als(data,
                                               K,
                                               init='nvecs',
                                               lambda_A=10,
                                               lambda_R=10)

        self._theta = A
        self._phi = R[0]

        self.log.info('rescal fit info: %s; itr: %s, exectimes: %s' %
                      (fit, itr, exectimes))

        self.compute_measures()
        if self.expe.get('_write'):
            self.write_current_state(self)
def execute_rescal(input_tensor, rank, useNeedTypeSlice=True, useConnectionSlice=True, init='nvecs', conv=1e-4,
                   lambda_A=0, lambda_R=0, lambda_V=0):

    temp_tensor = input_tensor.getSliceMatrixList()
    if not (useNeedTypeSlice):
        _log.info('Do not use needtype slice for RESCAL')
        del temp_tensor[SparseTensor.NEED_TYPE_SLICE]
    if not (useConnectionSlice):
        _log.info('Do not use connection slice for RESCAL')
        del temp_tensor[SparseTensor.CONNECTION_SLICE]

    _log.info('start rescal processing ...')
    _log.info('config: init=%s, conv=%f, lambda_A=%f, lambda_R=%f, lambda_V=%f' %
              (init, conv, lambda_A, lambda_R, lambda_V))
    _log.info('Datasize: %d x %d x %d | Rank: %d' % (
        temp_tensor[0].shape + (len(temp_tensor),) + (rank,))
    )
    A, R, _, _, _ = rescal_als(
        temp_tensor, rank, init=init, conv=conv,
        lambda_A=lambda_A, lambda_R=lambda_R, lambda_V=lambda_V, compute_fit='true'
    )
    _log.info('rescal stopped processing')
    return A, R
예제 #13
0
def netCreate(X, r, minEdges, sampleMethod='deterministic',
              rescal_lambda_A=10, rescal_lambda_R=10, 
              plotting=True, layout='spring', graphScale=1.0): 
    """Wrapper for sktensor.rescal_als. Create network by given 
    sampleMethod from hierarchical clustering of RESCALA_ALS 
    tensor factorization of singular values matrix A(A^T);
    
    Input: X is list of sktensor.csr_matrices [X[k] for k in 
    relationships],
    each X_k is frontal slide of adjacency tensor 
    (ie, adjacency matrix of one relationship type);
    
    Return: {'cluster':hm, 'graph':g, 'linkage':hm['linkage'], 
    'theta':theta, 'A':A, 'Z':Z}
    """
    #import logging
    #from rescal import rescal_als
    #import numpy as np
    #import pandas as pd
    #import networkx as nx
    #import matplotlib.pyplot as plt
    #from scipy.spatial.distance import pdist, squareform

    # Set logging to INFO to see RESCAL information
    logging.basicConfig(level=logging.INFO)

    # Decompose tensor using RESCAL-ALS
    A, R, fit, itr, exectimes = rescal_als(X, r, init='nvecs', lambda_A=rescal_lambda_A, lambda_R=rescal_lambda_R)

    # make the AAT matrix
    AAT = np.dot(A, A.T)

    # heatmap
    hm = heatmap(AAT)
    plt.suptitle(r'A(A^T) HAC for Induced Rank = %s, $\lambda_{A}$ = %s, $\lambda_{R}$ = %s '%(r,rescal_lambda_A,rescal_lambda_R), fontweight='bold', fontsize=14)

    # remove zeros
    AATnn = AAT
    AATnn[AATnn < 0] = 0
    # remove upper triangle
    AATnn = np.tril(AATnn, k= -1)  # k = -1 to keep only below diagonal

    # reproducibility
    np.random.seed(1)

    # network sampling
    if sampleMethod == 'Bernoulli':
        # shortcut: normalize by largest AAT (non-negative) value for separate bernoulli draws
        # instead of summing over all AAT for multinomial draw, which is harder to 
        # transfer back and forth between vector and triangular matrix
        theta = AATnn / AATnn.max()
        # NETWORK SAMPLE ALGORITHM:
        # random sample ties in network adjacency matrix
        # one-element-at-a-time Bernoulli shortcut
        # instead of multinomial sample of entire adjacency
        n = np.shape(theta)[0]
        m = np.shape(theta)[1]
        Z = np.zeros((n,m))
        # use dependent row,col permutations to randomly select
        # elements ij to sample after first full pass through matrix
        while np.sum(Z) < minEdges:
            shuffledRows = np.arange(1,n)  #up to n rows
            np.random.shuffle(shuffledRows)
            # first shuffle rows
            for i in shuffledRows: 
                # for given row shuffle use lower triangle columns j in that row i
                shuffledCols = np.arange(i) #up to (i-1) cols, ie, lower triangle
                np.random.shuffle(shuffledCols)
                for j in shuffledCols:
                    if Z[i,j] < 1:
                        Z[i,j] = np.random.binomial(n=1, p=theta[i,j], size=1)
                    if np.sum(Z) >= minEdges:
                        break
                if np.sum(Z) >= minEdges:
                    break
                
    elif sampleMethod == 'multinomial':
        # NETWORK SAMPLING ALGORITHM:
        # problem: doesn't sufficiently cluster the resulting network
        draws = int(np.ceil(minEdges*1.2))
        dist = pdist(A)   # what matrix to use:  pdist(A) or just tril(AAT) directly?
        invdist = dist
        invdist[invdist != 0] = 1/invdist[invdist!=0]  # prevent division by 0
        thetavec = invdist / np.sum(invdist)
        theta = squareform(thetavec)
        # multinomial sample
        n = np.shape(theta)[0]
        Z = np.zeros((n,n))
        samp = sampleLinks(q=thetavec, edgesToDraw=1, draws=draws)
        while np.sum(samp) < minEdges:
            draws = int(np.ceil(draws * 1.1))   #increase number of draws and try again
            samp = sampleLinks(q=thetavec,edgesToDraw=1,draws=draws)
        Z[np.tril_indices_from(Z, k =-1)] = samp
        
    elif sampleMethod == 'deterministic':
        theta = AATnn / AATnn.max()
        n = np.shape(AATnn)[0]
        sv = AATnn[np.tril_indices_from(AATnn, k =-1)]  #pull singular values from triangle
        cutOff = topNEdges(data = sv, minEdges = minEdges, 
                           n = n)['cutOff']
        Z = np.zeros((n,n))
        Z[np.where(AATnn >= cutOff)] = 1
        
    else:
        print('No valid sampleMethod selected. Please choose "Bernoulli", "multinomial", or "deterministic" .') 

    # NETWORK    
    # Create networkx graph from Z
    g = nx.Graph()
    
    #add nodes with colors of group
    for n in np.arange(np.shape(hm['corder'])[0]-1):
        g.add_node(hm['corder'][n],color=hm['group'][n])
    nodeColorList = list(nx.get_node_attributes(g,'color').values())
    
    #add edges with weight of theta (probability the link exists)
    cardE = len(np.where(Z==1)[1])
    edgeList = [(np.where(Z==1)[0][i], np.where(Z==1)[1][i]) for i in np.arange(cardE)]
    edgeWeightList = theta[np.where(Z==1)] * (2 / max(theta[np.where(Z==1)]))  #scaled link prob Pr(Z[i,j]=1) * weight
    for e in np.arange(len(edgeList)-1):
        g.add_edge(edgeList[e][0],edgeList[e][1],weight=edgeWeightList[e])

    # NODE SIZES
    # 1. cluster linkage importance
    #nodesizelist = cluster['linkage'] * (400 / max(cluster['linkage']))
    # 2. betweenness centrality (wide range of sizes; very small on periphery)
    #nodesizelist = np.asarray(list(nx.betweenness_centrality(G,normalized=False).values())) * (400 / max(list(nx.betweenness_centrality(G,normalized=False).values())))
    # 3. degree (smaller range of sizes; easier to see on the periphery)
    nodeSizeList = np.asarray(list(g.degree().values())) * (350 / max(list(g.degree().values())))   #scaled so the largest is size 350

    if plotting:
        # reproducibility
        np.random.seed(1)        
        
        #bc = nx.betweenness_centrality(g)
        E = len(nx.edges(g))
        V = len(g)
        k = round(E/V,3)
		
        #size = np.array(list(bc.values())) * 1000  
        # here replacing the hierarchical magnitude hm['corder']

        fignx = plt.figure(figsize=(10,10))
        ## use heatmap color groupings to color nodes and heatmap magnitudes to size nodes
        if layout == 'spring':
            nx.draw(g, pos=nx.spring_layout(g, scale=graphScale),
                    node_color=nodeColorList, node_size=nodeSizeList,
                    width=edgeWeightList)
        elif layout == 'fruchterman':
            nx.draw(g, pos=nx.fruchterman_reingold_layout(g, scale=graphScale),
                    node_color=nodeColorList, node_size=nodeSizeList,
                    width=edgeWeightList)
        else:
            print('Please indicate at a valid layout.')
        #else:
            #nx.graphviz_layout(g, prog=graphProg)
        plt.title('Network Created from Induced Rank = %s \n V = %s, E = %s, <k> = %s'%(r,V,E,k), fontweight='bold', fontsize=14)
    
        #plot log degree sequence
        degree_sequence=sorted(nx.degree(g).values(),reverse=True)
        fig3 = plt.figure(figsize=(10,5))
        plt.loglog(degree_sequence)
        plt.title('Log Degree Distribution', fontweight='bold', fontsize=14)
        
    return {'cluster':hm, 'graph':g, 'linkage':hm['linkage'], 'theta':theta, 'A':A, 'Z':Z}
예제 #14
0
import logging
from scipy.io.matlab import loadmat
from scipy.sparse import lil_matrix
from rescal import rescal_als

# Set logging to INFO to see RESCAL information
logging.basicConfig(level=logging.INFO)

# Load Matlab data and convert it to dense tensor format
T = loadmat('data/alyawarradata.mat')['Rs']
X = [lil_matrix(T[:, :, k]) for k in range(T.shape[2])]

# Decompose tensor using RESCAL-ALS
A, R, fit, itr, exectimes = rescal_als(X, 100, init='nvecs', lambda_A=10, lambda_R=10)
예제 #15
0
    print 'Number of keys for T[1]:', len(t2_keys)
    for i in range(10):
        print '  ', t2_keys[i]
    print 'Size of intersection:', len(set(t1_keys).intersection(set(t2_keys)))

    print 'Running RESCAL code'
    rank = 20
    maxIter = 100
    conv = 1e-5
    lambda_A = 0
    lambda_R = 0
    A, R, _, _, _ = rescal_als(
            [t[1].copy() for t in T],
            rank,
            init='nvecs',
            conv=conv,
            lambda_A=lambda_A,
            lambda_R=lambda_A,
            maxIter=maxIter,
            )

    print 'A shape:', A.shape
    print 'R[0] shape:', R[0].shape
    print 'R size:', len(R)

    print '\nWriting A and R to disk'
    a_matrix_file = out_dir + 'a_matrix.tsv'
    r_matrix_file = out_dir + 'r_matrix.tsv'
    out = open(a_matrix_file, 'w')
    for n in range(node_dict.current_index - 1):
        node = node_dict.getString(n+1)
예제 #16
0
def netCreate(X,
              r,
              minEdges,
              sampleMethod='deterministic',
              rescal_lambda_A=10,
              rescal_lambda_R=10,
              plotting=True,
              layout='spring',
              graphScale=1.0):
    """Wrapper for sktensor.rescal_als. Create network by given 
    sampleMethod from hierarchical clustering of RESCALA_ALS 
    tensor factorization of singular values matrix A(A^T);
    
    Input: X is list of sktensor.csr_matrices [X[k] for k in 
    relationships],
    each X_k is frontal slide of adjacency tensor 
    (ie, adjacency matrix of one relationship type);
    
    Return: {'cluster':hm, 'graph':g, 'linkage':hm['linkage'], 
    'theta':theta, 'A':A, 'Z':Z}
    """
    #import logging
    #from rescal import rescal_als
    #import numpy as np
    #import pandas as pd
    #import networkx as nx
    #import matplotlib.pyplot as plt
    #from scipy.spatial.distance import pdist, squareform

    # Set logging to INFO to see RESCAL information
    logging.basicConfig(level=logging.INFO)

    # Decompose tensor using RESCAL-ALS
    A, R, fit, itr, exectimes = rescal_als(X,
                                           r,
                                           init='nvecs',
                                           lambda_A=rescal_lambda_A,
                                           lambda_R=rescal_lambda_R)

    # make the AAT matrix
    AAT = np.dot(A, A.T)

    # heatmap
    hm = heatmap(AAT)
    plt.suptitle(
        r'A(A^T) HAC for Induced Rank = %s, $\lambda_{A}$ = %s, $\lambda_{R}$ = %s '
        % (r, rescal_lambda_A, rescal_lambda_R),
        fontweight='bold',
        fontsize=14)

    # remove zeros
    AATnn = AAT
    AATnn[AATnn < 0] = 0
    # remove upper triangle
    AATnn = np.tril(AATnn, k=-1)  # k = -1 to keep only below diagonal

    # reproducibility
    np.random.seed(1)

    # network sampling
    if sampleMethod == 'Bernoulli':
        # shortcut: normalize by largest AAT (non-negative) value for separate bernoulli draws
        # instead of summing over all AAT for multinomial draw, which is harder to
        # transfer back and forth between vector and triangular matrix
        theta = AATnn / AATnn.max()
        # NETWORK SAMPLE ALGORITHM:
        # random sample ties in network adjacency matrix
        # one-element-at-a-time Bernoulli shortcut
        # instead of multinomial sample of entire adjacency
        n = np.shape(theta)[0]
        m = np.shape(theta)[1]
        Z = np.zeros((n, m))
        # use dependent row,col permutations to randomly select
        # elements ij to sample after first full pass through matrix
        while np.sum(Z) < minEdges:
            shuffledRows = np.arange(1, n)  #up to n rows
            np.random.shuffle(shuffledRows)
            # first shuffle rows
            for i in shuffledRows:
                # for given row shuffle use lower triangle columns j in that row i
                shuffledCols = np.arange(
                    i)  #up to (i-1) cols, ie, lower triangle
                np.random.shuffle(shuffledCols)
                for j in shuffledCols:
                    if Z[i, j] < 1:
                        Z[i, j] = np.random.binomial(n=1,
                                                     p=theta[i, j],
                                                     size=1)
                    if np.sum(Z) >= minEdges:
                        break
                if np.sum(Z) >= minEdges:
                    break

    elif sampleMethod == 'multinomial':
        # NETWORK SAMPLING ALGORITHM:
        # problem: doesn't sufficiently cluster the resulting network
        draws = int(np.ceil(minEdges * 1.2))
        dist = pdist(
            A)  # what matrix to use:  pdist(A) or just tril(AAT) directly?
        invdist = dist
        invdist[
            invdist != 0] = 1 / invdist[invdist != 0]  # prevent division by 0
        thetavec = invdist / np.sum(invdist)
        theta = squareform(thetavec)
        # multinomial sample
        n = np.shape(theta)[0]
        Z = np.zeros((n, n))
        samp = sampleLinks(q=thetavec, edgesToDraw=1, draws=draws)
        while np.sum(samp) < minEdges:
            draws = int(np.ceil(draws *
                                1.1))  #increase number of draws and try again
            samp = sampleLinks(q=thetavec, edgesToDraw=1, draws=draws)
        Z[np.tril_indices_from(Z, k=-1)] = samp

    elif sampleMethod == 'deterministic':
        theta = AATnn / AATnn.max()
        n = np.shape(AATnn)[0]
        sv = AATnn[np.tril_indices_from(
            AATnn, k=-1)]  #pull singular values from triangle
        cutOff = topNEdges(data=sv, minEdges=minEdges, n=n)['cutOff']
        Z = np.zeros((n, n))
        Z[np.where(AATnn >= cutOff)] = 1

    else:
        print(
            'No valid sampleMethod selected. Please choose "Bernoulli", "multinomial", or "deterministic" .'
        )

    # NETWORK
    # Create networkx graph from Z
    g = nx.Graph()

    #add nodes with colors of group
    for n in np.arange(np.shape(hm['corder'])[0] - 1):
        g.add_node(hm['corder'][n], color=hm['group'][n])
    nodeColorList = list(nx.get_node_attributes(g, 'color').values())

    #add edges with weight of theta (probability the link exists)
    cardE = len(np.where(Z == 1)[1])
    edgeList = [(np.where(Z == 1)[0][i], np.where(Z == 1)[1][i])
                for i in np.arange(cardE)]
    edgeWeightList = theta[np.where(Z == 1)] * (2 / max(
        theta[np.where(Z == 1)]))  #scaled link prob Pr(Z[i,j]=1) * weight
    for e in np.arange(len(edgeList) - 1):
        g.add_edge(edgeList[e][0], edgeList[e][1], weight=edgeWeightList[e])

    # NODE SIZES
    # 1. cluster linkage importance
    #nodesizelist = cluster['linkage'] * (400 / max(cluster['linkage']))
    # 2. betweenness centrality (wide range of sizes; very small on periphery)
    #nodesizelist = np.asarray(list(nx.betweenness_centrality(G,normalized=False).values())) * (400 / max(list(nx.betweenness_centrality(G,normalized=False).values())))
    # 3. degree (smaller range of sizes; easier to see on the periphery)
    nodeSizeList = np.asarray(list(g.degree().values())) * (350 / max(
        list(g.degree().values())))  #scaled so the largest is size 350

    if plotting:
        # reproducibility
        np.random.seed(1)

        #bc = nx.betweenness_centrality(g)
        E = len(nx.edges(g))
        V = len(g)
        k = round(E / V, 3)

        #size = np.array(list(bc.values())) * 1000
        # here replacing the hierarchical magnitude hm['corder']

        fignx = plt.figure(figsize=(10, 10))
        ## use heatmap color groupings to color nodes and heatmap magnitudes to size nodes
        if layout == 'spring':
            nx.draw(g,
                    pos=nx.spring_layout(g, scale=graphScale),
                    node_color=nodeColorList,
                    node_size=nodeSizeList,
                    width=edgeWeightList)
        elif layout == 'fruchterman':
            nx.draw(g,
                    pos=nx.fruchterman_reingold_layout(g, scale=graphScale),
                    node_color=nodeColorList,
                    node_size=nodeSizeList,
                    width=edgeWeightList)
        else:
            print('Please indicate at a valid layout.')
        #else:
        #nx.graphviz_layout(g, prog=graphProg)
        plt.title(
            'Network Created from Induced Rank = %s \n V = %s, E = %s, <k> = %s'
            % (r, V, E, k),
            fontweight='bold',
            fontsize=14)

        #plot log degree sequence
        degree_sequence = sorted(nx.degree(g).values(), reverse=True)
        fig3 = plt.figure(figsize=(10, 5))
        plt.loglog(degree_sequence)
        plt.title('Log Degree Distribution', fontweight='bold', fontsize=14)

    return {
        'cluster': hm,
        'graph': g,
        'linkage': hm['linkage'],
        'theta': theta,
        'A': A,
        'Z': Z
    }
예제 #17
0
if not os.path.exists(dest):
    os.makedirs(dest, exist_ok=True)

for nt in range(n_test):
    print(nt)
    file_name = os.path.join(dest, 'init_%.3f_rescal_n_dim_%d_%d.txt' % (p_obs, n_dim, nt))
    if not os.path.exists(file_name):
        seq = list()
        with open('../data/%s/train_%.3f.pkl' % (dataset, p_obs), 'rb') as f:
            mask = pickle.load(f)

        X = [csr_matrix(mask[k]) for k in range(n_relation)]

        for i in range(budget):
            try:
                A, R, f, itr, exectimes = rescal.rescal_als(X, n_dim)
            except:
                A = np.random.random([n_entity, n_dim])
                R = np.random.random([n_relation, n_dim, n_dim])

            _X = np.zeros_like(T)
            for k in range(T.shape[0]):
                _X[k] = np.dot(np.dot(A, R[k]), A.T)

            find = False
            while not find:
                _X[mask == 1] = -99999999
                next_idx = np.unravel_index(_X.argmax(), _X.shape)
                mask[next_idx] = 1
                seq.append(next_idx)
                if T[next_idx] == 1:
예제 #18
0
                    except ValueError:
                        print ' NO_PATH ',
                    print idx_to_name[e]

                # This is the induced subgraph with the deleted edges.
                # We can do RESCAL on the dependency matrix of this induced
                # subgraph.
                my_subgraph = g.induced_subgraph(cmp_22147)
                my_subgraph_adj = lil_matrix((my_subgraph.vcount(),
                                              my_subgraph.vcount()),
                                             dtype='uint8')
                for eel in my_subgraph.get_edgelist():
                    my_subgraph_adj[eel[0], eel[1]] = 1

                T = [lil_matrix(my_subgraph_adj)]
                A, R, _, _, _ = rescal_als(
                    T, 10, init='nvecs', conv=1e-3, lambda_A=1, lambda_R=1)
                idx_of_org_in_cmp_22147 = cmp_22147.index(org)
                als_prediction = np.dot(
                    np.dot(A[idx_of_org_in_cmp_22147], R[0]), A.T)
                # Now based on this ranking find out what the rank of
                # the true employees would be.
                als_pred_of_employees_removed = [als_prediction[cmp_22147.index(e)]
                                                 for e in employees_to_remove]

                sorted_als_pred = sorted(
                    filter(
                        lambda x: cmp_22147_mask[x[0]], enumerate(als_prediction)),
                    reverse=True,
                    key=lambda x: x[1])
                sorted_als_pred_idx = [e[0] for e in sorted_als_pred]
                sorted_als_pred_val = [e[1] for e in sorted_als_pred]
예제 #19
0
sparse = [lil_matrix((len(entities),len(entities))) for r in relations]
ind = 0
for rel in relations:
    for s in rel:
        for t in rel[s]:
            sparse[ind][e2i[s],e2i[t]] = rel[s][t]
    ind += 1 
#plt.show()

fname = '{}{}.pkl'.format(sys.argv[1],sys.argv[2])

if os.path.isfile(fname) :
    A,R,fit,itr,exectimes = pickle.load(open(fname,'r'))
else:
    A, R, fit, itr, exectimes = rescal_als(sparse, int(sys.argv[2]), init='nvecs', conv=1e-6, lambda_A=1, lambda_R=1)
    pickle.dump([A, R, fit, itr, exectimes],open(fname,'w'))

n = A.shape[0]
P = zeros((n, n, len(R)))
for k in range(len(R)):
    P[:, :, k] = dot(A, dot(R[k], A.T))
print A.shape
print len(R),R[0].shape
plt.matshow(P[:,:,0])
#plt.show()
A_ = np.mean(A,axis=0)
e2vec = {}
for e in a2col:
    e2vec[e] = A[e2i[e],:]
print e2vec
예제 #20
0
    print 'Number of keys for T[1]:', len(t2_keys)
    for i in range(10):
        print '  ', t2_keys[i]
    print 'Size of intersection:', len(set(t1_keys).intersection(set(t2_keys)))

    print 'Running RESCAL code'
    rank = 20
    maxIter = 100
    conv = 1e-5
    lambda_A = 0
    lambda_R = 0
    A, R, _, _, _ = rescal_als(
        [t[1].copy() for t in T],
        rank,
        init='nvecs',
        conv=conv,
        lambda_A=lambda_A,
        lambda_R=lambda_A,
        maxIter=maxIter,
    )

    print 'A shape:', A.shape
    print 'R[0] shape:', R[0].shape
    print 'R size:', len(R)

    print '\nWriting A and R to disk'
    a_matrix_file = out_dir + 'a_matrix.tsv'
    r_matrix_file = out_dir + 'r_matrix.tsv'
    out = open(a_matrix_file, 'w')
    for n in range(node_dict.current_index - 1):
        node = node_dict.getString(n + 1)
예제 #21
0
    assert (len(entity) == 14951)
    rel = data_table[1].unqiue()
    assert (len(rel) == 1345)

    entity_n = len(entity)

    entity_index = {k: v for k, v in enumerate(entity)}

    data_table.columns = ["head", "rel", "tail"]
    # Packing it into a three-indices tensor.
    print("Packaging Tensors...")
    for r in rel:
        tensor_slice = np.zeros([entity_n, entity_n])
        rel_dataFrame = data_table[data_table["rel"].str.contains(r)]
        for idx, row in rel_dataFrame.iterrows():
            head_index = entity_index[row[0]]
            tail_index = entity_index[row[2]]
            tensor_slice[head_index, tail_index] = 1
        tensor_slice = lil_matrix(tensor_slice)
        rel_mat.append(tensor_slice)
    return rel_mat


if __name__ == "__main__":
    X = read_data("./FB15k/freebase_mtr100_mte100-train.txt")
    print("Training starts.")
    A, R, fit, itr, exectimes = rescal_als(X,
                                           100,
                                           init='nvecs',
                                           lambda_A=10,
                                           lambda_R=10)
예제 #22
0
def run_rescal(graph_dir, out_dir):
    try_makedirs(out_dir)

    T, node_dict, edge_dict = read_tensor_from_graph(graph_dir)
    #T, node_dict, edge_dict = create_test_tensor()

    print 'Samples from T[0] and T[1]:'
    import random
    t1_keys = T[0][1].keys()
    random.shuffle(t1_keys)
    print 'Relation for T[0]:', T[0][0]
    print 'Number of keys for T[0]:', len(t1_keys)
    for i in range(10):
        print '  ', t1_keys[i]
    t2_keys = T[1][1].keys()
    random.shuffle(t2_keys)
    print 'Relation for T[1]:', T[1][0]
    print 'Number of keys for T[1]:', len(t2_keys)
    for i in range(10):
        print '  ', t2_keys[i]
    print 'Size of intersection:', len(set(t1_keys).intersection(set(t2_keys)))

    print 'Running RESCAL code'
    rank = 20
    maxIter = 100
    conv = 1e-5
    lambda_A = 0
    lambda_R = 0
    A, R, _, _, _ = rescal_als(
            [t[1].copy() for t in T],
            rank,
            init='nvecs',
            conv=conv,
            lambda_A=lambda_A,
            lambda_R=lambda_R,
            maxIter=maxIter,
            )

    print 'A shape:', A.shape
    print 'R[0] shape:', R[0].shape
    print 'R size:', len(R)

    print '\nWriting A and R to disk'
    a_matrix_file = out_dir + 'a_matrix.tsv'
    r_matrix_file = out_dir + 'r_matrix.tsv'
    out = open(a_matrix_file, 'w')
    for n in range(node_dict.current_index - 1):
        node = node_dict.getString(n+1)
        out.write(node)
        for j in range(rank):
            out.write('\t')
            out.write(str(A[n,j]))
        out.write('\n')
    out.close()
    out = open(r_matrix_file, 'w')
    for index, r in enumerate(R):
        relation = T[index][0]
        out.write(relation)
        out.write('\n')
        for i in range(rank):
            for j in range(rank):
                out.write(str(r[i,j]))
                if (j < rank - 1):
                    out.write('\t')
            out.write('\n')
        out.write('\n')
    out.close()
    return A, R, rank, edge_dict, node_dict