예제 #1
0
def fit_mtl_proj_WFA(q_wfa, R, R_task_vec, H_P, task_id_vec, version, sparse):
    '''
    Fit WFA with multi-tasking and projection, i.e. task-specific WFAs
    :param q_wfa: a Q_WFA instance (class above)
    :param R: Desired rank
    :param H_P: Hankel matrix
    :param R_task_vec: Desired ranks for each task (in a vector form)
    :param task_id_vec: task ids vector
    :param version: version of the Hankel matrix, currently only support 'classic'
    :param sparse: Boolean, if the Hankel matrix is in DOX sparse matrix form
    :return: the status of the construction, the Q_WFA instance (task-specific WFAs)
    '''

    P_P = []
    for task_id in task_id_vec:
        H = H_P[task_id]
        if sparse:
            U, D, VT = sparse_svd(H[0], k=R)
        else:
            H_1 = flatten_k(H[0], 0)
            U, D, VT = np.linalg.svd(H_1)

        P = U[:, :R].dot(np.diag(D[:R]))
        P_P.append(P)

    H_P = np.asarray(H_P)
    qwfa = q_wfa.fit_WFA(H_P,
                         R,
                         task_id_vec=task_id_vec,
                         R_tasks_vec=R_task_vec,
                         P_vec=P_P,
                         version=version,
                         sparse=sparse)
    return qwfa, q_wfa
예제 #2
0
    def corona(self):
        cmat = self.get_conjunctions()
        mat_up, mat_down = self.get_matrices()
        
        hub = np.ones((mat_up.shape[0],))
        authority = np.ones((mat_up.shape[0],))

        for iter in xrange(100):
            vec = authority + hub
            vec /= np.max(vec)
            root = vec * 0
            root[0] = 1.0
            conj_sums = cmat * vec
            conj_par = 1.0/(np.maximum(EPS, cmat * (1.0 / np.maximum(EPS, vec))))
            conj_factor = np.minimum(1.0, conj_par / (conj_sums+EPS))
            conj_diag = make_diag(conj_factor)
            combined = conj_diag * (mat_up + mat_down) + make_diag(np.ones(len(vec)))
            #combined = (mat_up + mat_down) * 0.5

            u, sigma, v = sparse_svd(combined, k=4)
            activation = np.dot(u, u[0])
            #w, v = eigen(combined, k=3, v0=root)
            #activation = v[:, np.argmax(w)]
            #activation = (activation / (activation[0]+EPS)).real
            print activation
            print sigma
            print conj_factor
            print
            hub += self._fast_matrix.T * conj_diag * activation
            authority += conj_diag * self._fast_matrix * activation
        hub /= np.max(hub)
        authority /= np.max(authority)
        return zip(self.nodes, hub, authority)
예제 #3
0
    def corona(self):
        cmat = self.get_conjunctions()
        mat_up, mat_down = self.get_matrices()

        hub = np.ones((mat_up.shape[0], ))
        authority = np.ones((mat_up.shape[0], ))

        for iter in xrange(100):
            vec = authority + hub
            vec /= np.max(vec)
            root = vec * 0
            root[0] = 1.0
            conj_sums = cmat * vec
            conj_par = 1.0 / (np.maximum(EPS,
                                         cmat * (1.0 / np.maximum(EPS, vec))))
            conj_factor = np.minimum(1.0, conj_par / (conj_sums + EPS))
            conj_diag = make_diag(conj_factor)
            combined = conj_diag * (mat_up + mat_down) + make_diag(
                np.ones(len(vec)))
            #combined = (mat_up + mat_down) * 0.5

            u, sigma, v = sparse_svd(combined, k=4)
            activation = np.dot(u, u[0])
            #w, v = eigen(combined, k=3, v0=root)
            #activation = v[:, np.argmax(w)]
            #activation = (activation / (activation[0]+EPS)).real
            print activation
            print sigma
            print conj_factor
            print
            hub += self._fast_matrix.T * conj_diag * activation
            authority += conj_diag * self._fast_matrix * activation
        hub /= np.max(hub)
        authority /= np.max(authority)
        return zip(self.nodes, hub, authority)
예제 #4
0
    def corona(self):
        cmat = self.get_conjunctions()
        mat_up, mat_down = self.get_matrices()

        hub = np.ones((mat_up.shape[0], )) / mat_up.shape[0] / 100
        authority = np.ones((mat_up.shape[0], )) / mat_up.shape[0] / 100
        prev_activation = np.zeros((mat_up.shape[0], ))
        prev_err = 1.0

        for iter in xrange(100):
            vec = authority + hub
            vec /= np.max(vec)
            root = np.zeros(len(vec), 'f')
            root[0] = 1.0
            conj_sums = cmat * vec
            conj_par = 1.0 / (np.maximum(EPS,
                                         cmat * (1.0 / np.maximum(EPS, vec))))
            conj_factor = np.minimum(1.0, conj_par / (conj_sums + EPS))
            conj_diag = make_diag(conj_factor)
            combined = conj_diag * (mat_up + mat_down) * 0.25 + make_diag(
                np.ones(len(vec)) * 0.5)
            #combined = (mat_up + mat_down) * 0.5

            u, sigma, v = sparse_svd(combined, k=1)
            activation = u[:, 0]
            #activation = np.dot(u, u[0])
            #w, v = eigen(combined.T, k=1, v0=root, which='LR')
            #activation = v[:, np.argmax(w)].real
            activation *= np.sign(np.sum(activation))
            activation /= (np.sum(np.abs(activation)) + EPS)
            hub += (hub + self._final_matrix_T * conj_diag * activation) / 2
            authority += (authority +
                          conj_diag * self._final_matrix * activation) / 2
            print activation
            err = np.max(np.abs(activation - prev_activation))\
                / np.max(np.abs(activation))
            print err
            if iter >= 3 and err + prev_err < 1e-9:
                print "converged on iteration %d" % iter
                break
            prev_err = err
            prev_activation = activation.copy()
            print sigma
            print conj_factor
            print
        hub = self._final_matrix_T * conj_diag * activation
        authority = conj_diag * self._final_matrix * activation
        return zip(self.nodes, hub, authority)
예제 #5
0
    def corona(self):
        cmat = self.get_conjunctions()
        mat_up, mat_down = self.get_matrices()
        
        hub = np.ones((mat_up.shape[0],)) / mat_up.shape[0] / 100
        authority = np.ones((mat_up.shape[0],)) / mat_up.shape[0] / 100
        prev_activation = np.zeros((mat_up.shape[0],))
        prev_err = 1.0

        for iter in xrange(100):
            vec = authority + hub
            vec /= np.max(vec)
            root = np.zeros(len(vec), 'f')
            root[0] = 1.0
            conj_sums = cmat * vec
            conj_par = 1.0/(np.maximum(EPS, cmat * (1.0 / np.maximum(EPS, vec))))
            conj_factor = np.minimum(1.0, conj_par / (conj_sums+EPS))
            conj_diag = make_diag(conj_factor)
            combined = conj_diag * (mat_up + mat_down) * 0.25 + make_diag(np.ones(len(vec))*0.5)
            #combined = (mat_up + mat_down) * 0.5

            u, sigma, v = sparse_svd(combined, k=1)
            activation = u[:, 0]
            #activation = np.dot(u, u[0])
            #w, v = eigen(combined.T, k=1, v0=root, which='LR')
            #activation = v[:, np.argmax(w)].real
            activation *= np.sign(np.sum(activation))
            activation /= (np.sum(np.abs(activation)) + EPS)
            hub += (hub + self._final_matrix_T * conj_diag * activation) / 2
            authority += (authority + conj_diag * self._final_matrix * activation) / 2
            print activation
            err = np.max(np.abs(activation - prev_activation))\
                / np.max(np.abs(activation))
            print err
            if iter >= 3 and err + prev_err < 1e-9:
                print "converged on iteration %d" % iter
                break
            prev_err = err
            prev_activation = activation.copy()
            print sigma
            print conj_factor
            print
        hub = self._final_matrix_T * conj_diag * activation
        authority = conj_diag * self._final_matrix * activation
        return zip(self.nodes, hub, authority)
print 'userid = ',userid
print 'KNN Type = ',KNNType


with open(filepath+'User_'+userid+'/'+KNNType+"/KNN_"+userid+".out", 'r') as f:
    new_data = pickle.load(f)

#Reading query, row values of the userid to which recommendation has to be done.
with open(filepath+'User_'+userid+'/'+KNNType+'/'+userid+'.out', 'r') as f1:
    query = pickle.load(f1) 
query1 = np.array(query)

svdInputMatrix = np.array(new_data,dtype=np.float)
svdInputList= []
u,s,vt = sparse_svd(svdInputMatrix)
print u.shape, s.shape, vt.shape
print s

#Calculating energy for row elimination
energy = 0
for i in range(len(s)):
    energy = energy + (s[i]*s[i])
energy = (energy * 90)/100
print energy
#########################################


V = np.transpose(vt)
print V.shape
예제 #7
0
파일: similarity.py 프로젝트: wumch/saa
 def svd(self, output):      # 对 self.csc 做 SVD,生成 self.projectMatrix。
     U, s, V = sparse_svd(self.csc.astype(np.float32), k=min(100, min(self.csc.shape) - 1), return_singular_vectors='u')
     del self.csc
     self.projectMatrix = scipy.sparse.csr_matrix(spares_inv(scipy.sparse.diags(s)).dot(U.T))       # 生成 映射矩阵。应该持久化保存。
예제 #8
0
    def fit_WFA(self,
                H,
                R,
                task_id_vec=[0],
                R_tasks_vec=None,
                P_vec=None,
                version='classic',
                sparse=False,
                return_P=False):
        '''
        General funtion for fitting the WFA
        :param H: Hankel matrix (can be either meta or task-specific one)
        :param R: Desired rank of the SVD of H
        :param task_id_vec: the vector of task ids
        :param R_tasks_vec: If executing multi_proj, set this parameter to desired task-specific rank
        :param P_vec: left singular vectors of each individual Hankel matrix corresponding to each task
        :param version: version of the Hankel matrix, currently only support 'classic'
        :param sparse: Boolean, if the Hankel matrix is in DOX sparse matrix form
        :param return_P: if you want to return the singular vectors of the meta Hankel matrix
        :return:
        '''
        # sparse = False
        try:
            if sparse:
                acc = []
                for task in range(len(H)):
                    acc.append(H[task][0])
                acc = sps.hstack(acc)
                U, D, VT = sparse_svd(acc, k=R)
            else:
                H = np.array(H)
                H = H.transpose((1, 2, 0, 3))
                H_1 = flatten_k(H[0], 0)
                U, D, VT = np.linalg.svd(H_1)
            P = U[:, :R].dot(np.diag(D[:R]))
            S = VT[:R, :]
            P_inv = np.linalg.pinv(P)
            S_inv = np.linalg.pinv(S)
            alpha = P[0]
            if sparse:
                acc = []
                for task in range(len(H)):
                    acc.append(H[task][0][:, 0])
                acc = sps.hstack(acc)
                Omega = P_inv @ acc
                As = []
                for sigma in range(1, len(H[0])):
                    acc = []
                    for task in range(len(H)):
                        acc.append(H[task][sigma])
                    acc = sps.hstack(acc)
                    A_sigma = P_inv @ acc @ S_inv
                    As.append(A_sigma)
                As = np.array(As)
            else:
                Omega = P_inv @ H[0, :, :, 0]
                As = []
                for sigma in range(1, H.shape[0]):
                    H_1_sigma = flatten_k(H[sigma], 0)
                    A_sigma = P_inv @ H_1_sigma @ S_inv
                    As.append(A_sigma)
                As = np.array(As)
            A = np.sum(As, axis=0)
            # print(R)
            for k in task_id_vec:
                self.alpha.append(alpha)
                self.As.append(As)
                self.Omega.append(Omega[:, k])
            if P_vec is not None:
                n_tasks = len(task_id_vec)
                alpha = copy.deepcopy(self.alpha)
                As = copy.deepcopy(self.As)
                Omega = copy.deepcopy(self.Omega)
                self.convert_meta_task_Q_WFA(alpha, As, Omega, n_tasks, P,
                                             P_vec, R_tasks_vec)

        except:
            #print(error)
            return 'Failed', []
        if return_P:
            return 'Success', P
        return 'Success', []
query1 = np.array(query)

svdInputMatrix = np.array(new_data,dtype=np.float)
svdInputList= []
for i in range(len(svdInputMatrix)):
    correlation,p = pearsonr(query1,svdInputMatrix[i])
    print "correlation = ", correlation
    if correlation > 0:
        svdInputList.append(svdInputMatrix[i])
#print array[1]

svdInputMatrixWithPearson = np.array(svdInputList,dtype=np.float)
print "new size after corelation check"
print type(svdInputMatrixWithPearson)
print svdInputMatrixWithPearson.shape
u,s,vt = sparse_svd(svdInputMatrixWithPearson)
"u,s,vt = sparse_svd(svdInputMatrix)"
print u.shape, s.shape, vt.shape
print s

energy = 0
for i in range(len(s)):
    energy = energy + (s[i]*s[i])
energy = (energy * 90)/100
print energy

V = np.transpose(vt)
print V.shape
print type(V)

result = np.dot(query1,V)
예제 #10
0
with open(save_path + 'tf_idf.pkl', 'r') as f:
    tf_idf = pickle.load(f)

with open(save_path + 'scores.pkl', 'r') as f:
    scores = pickle.load(f)

if is_sparse:
    tf_idf = csc_matrix(tf_idf)

tf_idf_top = []

tf_idf = tf_idf.T
print('Doing svd...')
if is_sparse:
    U, D, V = sparse_svd(tf_idf,
                         min(tf_idf.shape))  #, k = min(PPMI.shape) - 1)
    U = U.T
else:
    U, D, V = svd(tf_idf, full_matrices=False)
print('finished doing svd.')
embeddings_U_tot = U * np.sqrt(D)

tot_l = 1
tot_u = min(vocabulary_size, D.shape[0])

for i in range(tot_l, tot_u):
    print(i)
    keep_dim = i

    keep_dims = np.arange(keep_dim)
    embeddings_U = embeddings_U_tot[:, keep_dims]