Exemple #1
0
def img_compr_separate(image, k):
    img = np.array(Image.open(image))

    img = img / 255
    row, col, _ = img.shape

    img_red = img[:, :, 0]
    img_green = img[:, :, 1]
    img_blue = img[:, :, 2]

    U_r, D_r, V_r = svd(img_red)
    U_g, D_g, V_g = svd(img_green)
    U_b, D_b, V_b = svd(img_blue)

    bytes_stored = sum([
        matrix.nbytes
        for matrix in [U_r, D_r, V_r, U_g, D_g, V_g, U_b, D_b, V_b]
    ])

    U_r_k = U_r[:, 0:k]
    U_g_k = U_g[:, 0:k]
    U_b_k = U_b[:, 0:k]

    V_r_k = V_r[0:k, :]
    V_g_k = V_g[0:k, :]
    V_b_k = V_b[0:k, :]

    D_r_k = D_r[0:k]
    D_g_k = D_g[0:k]
    D_b_k = D_b[0:k]

    compressed_bytes = sum([
        matrix.nbytes for matrix in
        [U_r_k, D_r_k, V_r_k, U_g_k, D_g_k, V_g_k, U_b_k, D_b_k, V_b_k]
    ])

    img_red_compr = np.dot(U_r_k, np.dot(np.diag(D_r_k), V_r_k))
    img_green_compr = np.dot(U_g_k, np.dot(np.diag(D_g_k), V_g_k))
    img_blue_compr = np.dot(U_b_k, np.dot(np.diag(D_b_k), V_b_k))

    img_compr = np.zeros((row, col, 3))

    img_compr[:, :, 0] = img_red_compr
    img_compr[:, :, 1] = img_green_compr
    img_compr[:, :, 2] = img_blue_compr

    img_compr[img_compr < 0] = 0
    img_compr[img_compr > 1] = 1

    plt.xlabel('rank = {}'.format(k))

    plt.imshow(img_compr)
    plt.show()
    matplotlib.image.imsave('compressed_{}.{}'.format(k,
                                                      image.split('.')[-1]),
                            img_compr)

    mse = ((img - img_compr)**2).mean(axis=None)

    return bytes_stored, compressed_bytes, mse
Exemple #2
0
def testSVD():
    '''SVD based feature selection'''
    features = trainingDataWithoutLabel()
    New_features = svd(features)
    test_features = svd(testDataWithoutLabel())

    print('SVD', New_features.shape)
    testModel(New_features, test_features)
Exemple #3
0
 def generate_models(self, train_matrix):
     scores_by_movie = score_matrix(train_matrix, avg='movie')
     scores_by_user = score_matrix(train_matrix, avg='user')
     self.movie_normalized_models = []
     self.user_normalized_models = []
     for i in range(self.start, self.stop, self.step):
         self.movie_normalized_models.append(svd(scores_by_movie, i))
         self.user_normalized_models.append(svd(scores_by_user, i))
Exemple #4
0
def kabsch(X, Y):
    """Using the Kabsch algorithm with two sets of paired point X and Y.

    Args:
        X (np.ndarray): NxD matrix, where N is points and D is dimension.
        Y (np.ndarray): NxD matrix, where N is points and D is dimension.

    Returns:
        np.ndarray: Rotation matrix (D,D)
    """

    # Computation of the covariance matrix
    C = np.dot(np.transpose(X), Y)

    # Computation of the optimal rotation matrix see:
    # http://en.wikipedia.org/wiki/Kabsch_algorithm
    V, S, W = svd(C)
    d = (np.linalg.det(V) * np.linalg.det(W)) < 0.0

    if d:
        V[:, -1] = -V[:, -1]

    # Create Rotation matrix U
    U = np.dot(V, W)

    return U
Exemple #5
0
def main():

  semeval_dir = 'data/maui-semeval2010-test/'
  manual_keywords = []
  total_precision = 0
  total_recall = 0
  total_docs = 0
  method = str(sys.argv[1])
  fdir = str(sys.argv[2])
  single = False
  num_top = -1
  if (len(sys.argv) > 3) and (str(sys.argv[3]) == 'single'):
    single = True
  if (len(sys.argv) > 4):
    num_top = int(sys.argv[4])
  filenames = sorted(os.listdir(fdir))

  for filename in filenames:
    if (filename[0] =='.'):
      continue
    print(filename)
    f = open(fdir + filename, 'r')
    content = f.read()
    if method == 'svd':
      keywords = svd(content, 1, single, num_top)
    elif method == 'raketr':
      keywords = raketr.main(content, single, num_top)
    elif method == 'cluster':
      keywords = kcluster(content, 6, 15, single, num_top)
    else:
      print('methods accepted: svd raketr cluster, please specify')
      exit(0)
    print('keyphrases found')
    print(keywords)
def UserRecommend(name):
    u = json.load(open("data.json"), "GB2312")
    i = 0
    try:
        for z in range(1, len(u['user'])):
            if u['user'][z]['name'] == name:
                i = z
    except:
        print 'err'
    temp = svd.svd()
    userDmkForm = temp[i]
    a = [abs(x - int(u['user'][i]['dmk_log'])) for x in userDmkForm]
    result1 = a.index(min(a))
    result1 = u['bangumi'][result1]['title']
    result2 = findmin(a, 1)
    result2 = u['bangumi'][result2]['title']
    result3 = findmin(a, 2)
    result3 = u['bangumi'][result3]['title']
    result4 = findmin(a, 3)
    result4 = u['bangumi'][result4]['title']
    result = []
    result.append(result1)
    result.append(result2)
    result.append(result3)
    result.append(result4)
    return result
 def WeightedPrinComp(self, Mat, Rep=-1):
     """Takes a matrix and row-weights and manually computes the statistical procedure known as Principal Components Analysis (PCA)
     This version of the procedure is so basic, that it can also be thought of as merely a singular-value decomposition on a weighted covariance matrix."""      
     wCVM = self.WeightedCov(Mat,Rep)
     SVD = svd.svd(wCVM['Cov'])
     L = SVD[0].T[0]                      #First loading
     S = dot(wCVM['Center'],SVD[0]).T[0]   #First Score
     return (L,S)
def WeightedPrinComp(M, Weights):
    if len(Weights)!=len(M):
        print('Weights must be equal in length to rows')
        return 'error'
    if type(Weights[0])!=list: Weights=map(lambda x: [x], Weights)
    wCVM=WeightedCov(M, Weights)
    SVD=svd.svd(wCVM['Cov'])
    L=switch_row_cols(SVD[0])[0]
    S=switch_row_cols(dot(wCVM['Center'], L))[0]
    return{'Scores':S, 'Loadings':L}
Exemple #9
0
 def test_factorization(self):
     r2 = np.sqrt(2)
     u = np.array([[1 / r2, 1 / r2], [-1 / r2, 1 / r2]])
     d = np.array([[4, 0], [0, 1]])
     v_t = np.eye(2)
     mat = np.dot(np.dot(u, d), v_t)
     result = svd.svd(mat)
     assert_array_almost_equal(mat, np.dot(np.dot(result[0], result[1]), result[2]))
     assert_array_almost_equal(u, result[0], decimal=10)
     assert_array_almost_equal(d, result[1], decimal=10)
     assert_array_almost_equal(v_t, result[2], decimal=10)
Exemple #10
0
def recommendation():
    selected_option = request.args.get('type')
    if selected_option == '1':
        k=["This option is yet to come..!"]
    if selected_option == '2':
        k=knn.knn(bookname)
    if selected_option == '3':
        k=svd.svd(bookname)  
    if selected_option == '4':
        k=["The Lovely Bones: A Novel","The Da Vinci Code","The Red Tent (Bestselling Backlist)","Harry Potter and the Sorcerer's Stone (Harry Potter (Paperback))","The Secret Life of Bees","Wild Animus","Divine Secrets of the Ya-Ya Sisterhood: A Novel","Where the Heart Is (Oprah's Book Club (Paperback))","Girl with a Pearl Earring","Angels &amp; Demons"]
    return render_template('recommendations.html', k=k)
    def test_svd_with_zero_singular_values(self):
        matrix = [[3, -2], [-3, 2]]

        result = svd(matrix, max_eigenvalues=2, iterations=10)

        assert len(result) == 1

        assert_allclose(result[0][0],
                        [[0.7071067811865475], [-0.7071067811865475]])
        assert result[0][1] == approx(5.099019513592785)
        assert_allclose(result[0][2],
                        [[0.8320502943378437], [-0.5547001962252291]])
Exemple #12
0
def test_1_by_1():
    matrix = numpy.array([[2]], dtype='float64')
    singular_values, us, vs = svd(matrix)
    assert_that(singular_values).is_equal_to([2])
    assert_that(us).is_length(1)
    assert_that(vs).is_length(1)

    if us[0] == [-1.0]:
        assert_that(vs[0]).is_equal_to([-1.0])
    else:
        assert_that(us[0]).is_equal_to([1.0])
        assert_that(vs[0]).is_equal_to([1.0])
Exemple #13
0
def problem():
    #Initialize parameters of the problem
    m = 15
    n = 100
    c = 2006.787453080206

    #Initilize matrix A and vector b
    b = empty(n)
    A = empty((n, m))

    #Calculate A and b according to the parameters
    i = 0
    while i < n:
        A[i, 0] = 1
        A[i, 1] = i / (n - 1)
        j = 2
        while j < m:
            A[i, j] = power(A[i, 1], j)
            j = j + 1
        b[i] = (1 / c) * exp(sin(4 * A[i, 1]))
        i = i + 1

    print('Matrix A:')
    print(A)

    print('\nVector b:')
    print(b)

    #Calculate x by solving the least squares problem using the normal equations
    #NOTE: This won't execute because A is very ill conditioned (det ~= 1E-90)
    #ne_x = normal_equations(A, b)
    #print(ne_x)

    #Calculate x by solving the least squares problem using the Householder tranformations
    h_x = householder(A, b)
    print('\nValue of x using Householder:')
    print(h_x)

    #Calculate x by solving the least squares problem using the Gram-Schmidt
    gs_x = gram_schmidt(A, b)
    print('\nValue of x using Gram-Schmidt:')
    print(gs_x)

    #Calculate x by solving the least squares problem using the modified Gram-Schmidt
    mgs_x = modified_gram_schmidt(A, b)
    print('\nValue of x using modified Gram-Schmidt:')
    print(mgs_x)

    #Calculate x by solving the least squares problem using the SVD decomposition (pseudoinverse)
    svd_x = svd(A, b)
    print('\nValue of x using the SVD decomposition (pseudoinverse):')
    print(svd_x)
Exemple #14
0
 def test_2by4_factorization(self):
     svd.TEST = True
     r3 = np.sqrt(3)
     r2 = np.sqrt(2)
     u = np.array(
         [
             [1.0 / 4.0, 1.0 / 4.0, 1.0 / 4.0],
             [1.0 / 4.0, -1.0 / 4.0, 1.0 / 4.0],
             [1.0 / 4.0, 1.0 / 4.0, -1.0 / 4.0],
             [1.0 / 4.0, -1.0 / 4.0, -1.0 / 4.0],
         ]
     )
     d = np.zeros((3, 3), dtype=np.float64)
     d[0][0] = 10
     d[1][1] = 5
     d[2][2] = 2
     v = np.array([[1 / r3, 1 / r3, 1 / r3], [1 / r2, -1 / r2, 0]])
     # Now we gon transpose it
     mat = np.dot(np.dot(v, d), u.T)
     result = svd.svd(mat)
     trans_res = svd.svd(mat.T)
     assert_array_almost_equal(result[0].T, trans_res[2])
     assert_array_almost_equal(mat, np.dot(np.dot(result[0], result[1]), result[2]))
Exemple #15
0
def rSVD(A, rank):
    """
    Implementation of randomized SVD
    :param A: input matrix
    :param rank: rank of SVD decomposition
    :return: SVD decomposition matrices
    """
    n, m = A.shape
    P = np.random.randn(m, rank)
    Z = A @ P
    q, r = np.linalg.qr(Z, mode="reduced")
    Y = q.T @ A
    s, uy, v = svd(Y, min(min(Y.shape), rank))
    u = q @ uy
    return s, u, v
    def test_svd(self):
        matrix = np.array([[2, -1], [-1, 2]])

        result = svd(matrix, max_eigenvalues=2, iterations=10)

        assert len(result) == 2

        assert_allclose(result[0][0],
                        [[0.7071067812541463], [-0.7071067811189488]])
        assert result[0][1] == approx(3)
        assert_allclose(result[0][2],
                        [[0.7071067813893437], [-0.7071067809837513]])

        assert_allclose(result[1][0],
                        [[0.7071067805781592], [0.7071067817949359]])
        assert result[1][1] == approx(1)
        assert_allclose(result[1][2],
                        [[0.7071067809837512], [0.7071067813893438]])
Exemple #17
0
 def test_4by2_factorization(self):
     r3 = np.sqrt(3)
     r2 = np.sqrt(2)
     u = np.array(
         [
             [1.0 / 4.0, 1.0 / 4.0, 1.0 / 4.0],
             [1.0 / 4.0, -1.0 / 4.0, 1.0 / 4.0],
             [1.0 / 4.0, 1.0 / 4.0, -1.0 / 4.0],
             [1.0 / 4.0, -1.0 / 4.0, -1.0 / 4.0],
         ]
     )
     d = np.zeros((3, 3), dtype=np.float64)
     d[0][0] = 10
     d[1][1] = 5
     d[2][2] = 2
     v = np.array([[1 / r3, 1 / r3, 1 / r3], [1 / r2, -1 / r2, 0]])
     mat = np.dot(np.dot(u, d), v.T)
     result = svd.svd(mat)
     assert_array_almost_equal(mat, np.dot(np.dot(result[0], result[1]), result[2]))
Exemple #18
0
def equipoints(k, state0, state1, num):
    mat = [[EKF.Ps[k][state0, state0], EKF.Ps[k][state0, state1]],
           [EKF.Ps[k][state1, state0], EKF.Ps[k][state1, state1]]]
    decomp = svd.svd(mat)
    U = decomp[0]
    std0 = math.sqrt(decomp[1][0])
    std1 = math.sqrt(decomp[1][1])
    SLam = [[std0, 0.0], [0.0, std1]]
    m = [[EKF.ms[k][state0, 0]], [EKF.ms[k][state1, 0]]]
    m = numpy.matrix(m)
    SLam = numpy.matrix(SLam)
    U = numpy.matrix(U)
    xy = []
    for angle in numpy.arange(0, 2 * math.pi + 2 * math.pi / num,
                              2 * math.pi / num):
        circlepoint = numpy.matrix([[math.cos(angle)], [math.sin(angle)]])
        xypoints = m + U * SLam * circlepoint
        xy.append([xypoints[0, 0], xypoints[1, 0]])
    return xy
Exemple #19
0
 def compress(self):
     print('Compressing', self.img_name, 'with k =', self.k, '...')
     for i in range(self.img_ori_arr.shape[2] - 1):
         RGB = ['R', 'G', 'B']
         channel = self.img_ori_arr[..., i]
         print(RGB[i], 'channel shape: ', channel.shape)
         u, s, vh = svd(channel)
         s[self.k:-1] = np.zeros(s.shape[0] - self.k - 1)
         s = np.diag(s)
         print(u.shape)
         print(s.shape)
         print(vh.shape)
         # uncomment and modify the below lines if not input not square to fit svd outputs
         # s_new = np.zeros((9,220))
         # s = np.vstack((s, s_new))
         self.output[..., i] = np.clip(np.matmul(np.matmul(u, s), vh), 0,
                                       255)
         self.compressed = True
     print('Image compressed!\n')
def cluster_stories(documents, k=10):
    '''Cluster a set of documents using a simple SVD-based topic model.

    Arguments:
        documents: a list of dictionaries of the form

            {
                'words': [string]
                'text': string
            }

        k: the number of singular values to compute.

    Returns:
        A pair of (word_clusters, document_clusters), where word_clusters
        is a clustering over the set of all words in all documents, and
        document_clustering is a clustering over the set of documents.
    '''
    matrix, (index_to_word,
             index_to_document) = make_document_term_matrix(documents)
    matrix = normalize(matrix)
    sigma, U, V = svd(matrix, k=k)

    projected_documents = np.dot(matrix.T, U)
    projected_words = np.dot(matrix, V.T)

    document_centers, document_clustering = cluster(projected_documents)
    word_centers, word_clustering = cluster(projected_words)

    word_clusters = tuple(
        tuple(index_to_word[i] for (i, x) in enumerate(word_clustering)
              if x == j) for j in range(len(set(word_clustering))))

    document_clusters = tuple(
        tuple(index_to_document[i]['text']
              for (i, x) in enumerate(document_clustering) if x == j)
        for j in range(len(set(document_clustering))))

    return word_clusters, document_clusters
Exemple #21
0
def test_reconstruct_matrix_from_svd():
    matrix = numpy.array([
        [2, 5, 3],
        [1, 2, 1],
        [4, 1, 1],
        [3, 5, 2],
        [5, 3, 1],
        [4, 5, 5],
        [2, 4, 2],
        [2, 2, 5],
    ],
                         dtype='float64')

    singular_values, us, vs = svd(matrix)
    singular_value_matrix = numpy.diag(singular_values)

    reconstructed_matrix = numpy.dot(us, numpy.dot(singular_value_matrix, vs))

    flattened_original = matrix.flatten()
    flattened_actual = reconstructed_matrix.flatten()

    for (a, b) in zip(flattened_actual, flattened_original):
        assert_that(a).is_close_to(b, EPSILON)
def main():

    # load all matrices
    all_orig = load_sparse_matrix('all')
    all_norm = load_sparse_matrix('all', normalized=True)
    train_orig = load_sparse_matrix('train')
    train_norm = load_sparse_matrix('train', normalized=True)
    test_orig = load_sparse_matrix('test')
    test_norm = load_sparse_matrix('test', normalized=True)
    test_txt_path = get_txt_path_by_type('test')

    # perform collaborative filtering with and without baseline approach
    collab_matrix = collaborative_filtering(train_norm, train_orig, test_orig,
                                            collaborative_neighbours)
    rmse_spearman(collab_matrix, test_orig, test_txt_path)
    precision_on_top_k(collab_matrix, all_orig)

    collab_matrix_baseline = collaborative_filtering(train_norm,
                                                     train_orig,
                                                     test_orig,
                                                     collaborative_neighbours,
                                                     baseline=True)
    rmse_spearman(collab_matrix_baseline, all_orig, test_txt_path)
    precision_on_top_k(collab_matrix_baseline, all_orig)

    # perform svd
    for energy in [1, 0.9]:
        svd_matrix = svd(train_norm, concepts, energy)
        rmse_spearman(svd_matrix, test_norm, test_txt_path)
        precision_on_top_k(svd_matrix, all_norm)

    # perform cur
    for energy in [1, 0.9]:
        cur_matrix = cur(train_norm, CUR_no_cols, concepts, energy)
        rmse_spearman(cur_matrix, test_norm, test_txt_path)
        precision_on_top_k(cur_matrix, all_norm)
import numpy as np
import pandas as pd
from utility import create_utility_matrix, rmse
from svd import svd


data = pd.read_csv('../data/ratings7kusers.csv')

data['userId'] = data['userId'].astype('str')
data['movieId'] = data['movieId'].astype('str')

users = data['userId'].unique()
movies = data['movieId'].unique()
dataset = pd.DataFrame(data=data)
utilMat, users_index, items_index = create_utility_matrix(dataset)
svd_out = svd(utilMat, k=10)

while True:
    user = input("Enter user ID: ")
    u_index = users_index[user]
    films_ratings = utilMat.loc[user].copy()
    unwatched_films_ratings = films_ratings[films_ratings.isnull()]
    unwatched_films = list(unwatched_films_ratings.index.values)
    unwatched_films_predicts = []
    for item in unwatched_films:
        if item in items_index:
            i_index = items_index[item]
            unwatched_films_predicts.append([item, svd_out[u_index, i_index]])
        else:
            unwatched_films_predicts.append([item, np.mean(svd_out[u_index, :])])
    unwatched_films_predicts.sort(key=lambda x: x[1], reverse=True)
print Control_matrix
# This is the controllability matrix I get:
#
# [[-0.70710678 -0.70710678  0.70710678  0.70710678 -0.70710678 -0.70710678   0.70710678  0.          0.          0.        ]
#  [-0.70710678  0.70710678  0.70710678 -0.70710678 -0.70710678  0.70710678   0.70710678  0.          0.          0.        ]
#  [ 0.70710678  0.70710678 -0.70710678 -0.70710678  0.70710678  0.70710678  -0.70710678 -0.70710678  0.70710678  0.        ]
#  [ 0.70710678 -0.70710678 -0.70710678  0.70710678  0.70710678 -0.70710678  -0.70710678  0.70710678  0.70710678  0.        ]
#  [ 0.          0.          0.          0.          0.          0.          0.           0.          0.          1.        ]]

Control_rank = matrix_rank(Control_matrix)
print "rank = " + str(Control_rank)  # rank = 5 --> fully controllable.

print "(2) find steering input sequence"
# we need to transpose first before feeding into svd function
# due to shape constraint
a, b, c = svd.svd(svd.transpose(Control_matrix))

# This is the "a" I get:
#
#[[   -0.48324982438233488,  7.6327832942979512e-16, -1.0178532794652785e-14,     0.12833396757851195,  0.0],
# [ 6.9388939039072284e-16,     0.55105881968666859,     0.17224259223220284,   1.385436904088877e-14,  0.0],
# [    0.48324982438233482, -8.6042284408449632e-16,  1.0440476039887174e-14,    -0.12833396757851209,  0.0],
# [-7.4940054162198066e-16,    -0.55105881968666848,    -0.17224259223220295, -1.3783679059242715e-14,  0.0],
# [   -0.48324982438233488,  7.4940054162198066e-16, -1.0398842676101839e-14,     0.12833396757851209,  0.0],
# [ 7.4940054162198066e-16,     0.55105881968666848,     0.17224259223220295,  1.3783679059242715e-14,  0.0],
# [    0.48324982438233488, -7.4940054162198066e-16,  1.0398842676101839e-14,    -0.12833396757851209,  0.0],
# [ -5.134781488891349e-16,    -0.29833292097354391,     0.95446187365624646,  7.6645287339083268e-14,  0.0],
# [   -0.25666793515702424,  4.8572257327350599e-16,  7.7440328390661301e-14,    -0.96649964876466921,  0.0],
# [                    0.0,                     0.0,                    -0.0,                     0.0, -1.0]]

# This is the "b" I get:
Exemple #25
0
Go = X[:,:,1]
Bo = X[:,:,2]

# compute the storage space for X - image
storage_o = X.nbytes/1024.0/1024.0

index = array([[10,25,50,100,250]])

# initialize comparison metrics (Frobenious norm, stress, variance)
Fros = zeros((3,index.shape[1]))
var = zeros((3,index.shape[1]))
Stress = zeros((3,index.shape[1]))
storages = zeros((3,index.shape[1]))

# svd
tfros,tvars,tstr,mbs = svd(Ro,Go,Bo,index)
Fros[0,:] = transpose(tfros)
var[0,:] = transpose(tvars)
Stress[0,:] = transpose(tstr)
storages[0,:] = transpose(mbs)

# pca
tfros,tvars,tstr,mbs = pca(Ro,Go,Bo,index)
Fros[1,:] = transpose(tfros)
var[1,:] = transpose(tvars)
Stress[1,:] = transpose(tstr)
storages[1,:] = transpose(mbs)

# mds
tfros,tvars,tstr,mbs = mds(Ro,Go,Bo,index)
Fros[2,:] = transpose(tfros)
Exemple #26
0
 [  0.00000000e+00   0.00000000e+00   1.00000000e+00]
 [  1.32506500e+02  -8.34400000e+00  -8.00000000e-04]
 [  1.00000000e+00   0.00000000e+00   0.00000000e+00]
 [  0.00000000e+00   1.00000000e+00   0.00000000e+00]
 [  0.00000000e+00   0.00000000e+00   1.00000000e+00]]
'''


Observe_rank = matrix_rank(Observe_matrix)
print "rank = " + str(Observe_rank)  # rank = 3 --> fully observable.


print "(2) find steering input sequence"
# we need to transpose first before feeding into svd function
# due to shape constraint
a,b,c = svd.svd(Observe_matrix)


# This is the "a" I get: (30*30)
'''
[[ -2.06752633e-01  -6.60343207e-01  -6.34190843e-01]
 [ -3.52274770e-02   4.94296295e-01  -5.02801802e-01]
 [ -9.77093922e-01   1.03529721e-01   1.69907217e-01]
 [ -7.45412132e-03   1.19624786e-02   1.22650501e-02]
 [ -1.27006499e-03   1.77514504e-01   1.74459959e-01]
 [ -3.52274770e-02   4.94296295e-01  -5.02801802e-01]
 [ -2.68745749e-04   5.28837683e-03  -5.77741481e-03]
 [ -4.57892527e-05   2.47250717e-02  -3.15056066e-02]
 [ -1.27006499e-03   1.77514504e-01   1.74459959e-01]
 [ -9.68914976e-06   1.88272669e-03   2.02969877e-03]
 [ -1.65073168e-06   8.62167304e-03   1.13271243e-02]
Exemple #27
0
    def compute(self, nchan, seed, n_trajec, run=4):
        # The "run" parameter controls when the fitting stops
        # If run = 0 doesn't fit
        self.lastrun = run
        self.lastnchan = nchan
        self.lastseed = seed
        self.lastn_trajec = n_trajec
        self.datagen.fill(nchan, seed, n_trajec)
        printSomeInfo()
        # self.tl = self.N.likelihood()
        # print self.tl
        if run == 0:
            return
        # Initial Guess
        self.usingArgs(True, True)
        self.N.setParmVal(self.guess)
        # Nuisance Fit At True
        global preG, preH, postG, postH
        self.usingArgs(False, True)
        h.attr_praxis(seed)
        #  passes here assert(False)
        #print 'SIZE =', self.N.getParm().size()
        self.preTrueParm = self.N.getParmVal()
        # passes here assert(False)
        self.preTruef = self.ef()
        # fails here:  assert(False)
        if preG:
            self.preTrueGrad = numpy.matrix(self.Gradient())
        if preH:
            self.preTrueHess = numpy.matrix(self.Hessian())
        print "USE 0", cvodewrap.fs.use_fixed_step
        self.mrf.efun()
        print "USE 1", cvodewrap.fs.use_fixed_step
        self.postTrueParm = self.N.getParmVal()
        self.postTruef = self.ef()
        self.otle = self.N.getParmVal()
        self.otml = self.N.likelihood()  #optimized true maximum likelihood
        if postG:
            self.TrueGrad = numpy.matrix(self.Gradient())
        if postH:
            self.TrueHess = numpy.matrix(self.Hessian())
        if run == 1:
            return

        # Square Norm Fit
        self.usingArgs(True, False)
        self.generator.use_likelihood = 0
        h.attr_praxis(seed)
        self.preSNFParm = self.N.getParmVal()
        self.preSNFf = self.ef()
        self.mrf.efun()
        self.postSNFParm = self.N.getParmVal()
        self.postSNFf = self.ef()
        self.generator.use_likelihood = 1
        if run == 2:
            return

        # Main Parm Fit:
        global MPF
        if MPF:
            h.attr_praxis(seed)
            self.preMPFParm = self.N.getParmVal()
            self.preMPFf = self.ef()
            self.mrf.efun()
            self.postMPFParm = self.N.getParmVal()
            self.postMPFf = self.ef()
            self.MPFpValue = self.get_pValue(self.postTruef, self.postMPFf,
                                             self.trueParm.size())
        if run == 3:
            return

        # All Parm Fit
        self.usingArgs(True, True)
        h.attr_praxis(seed)
        self.preAPFParm = self.N.getParmVal()
        self.preAPFf = self.ef()
        if preG:
            self.preAPFGrad = numpy.matrix(self.Gradient())
        if preH:
            self.preAPFHess = numpy.matrix(self.Hessian())
        print "USE 2", cvodewrap.fs.use_fixed_step
        self.mrf.efun()
        print "USE 3", cvodewrap.fs.use_fixed_step
        self.postAPFParm = self.N.getParmVal()
        self.postAPFf = self.ef()
        self.APFpValue = self.get_pValue(self.postTruef, self.postAPFf,
                                         self.trueParm.size())
        self.mle = self.N.getParmVal()
        self.ml = self.N.likelihood()
        if postG:
            self.APFGrad = numpy.matrix(self.Gradient())
        if postH:
            self.APFHess = numpy.matrix(self.Hessian())
        if run == 4:
            return

        self.H = numpy.matrix(self.Hessian())
        svdList = svd.svd(numpy.array(self.H))[1]
        self.precision = 0.0
        for sl in svdList:
            sl_positive = max(sl, 1e-14)
            self.precision += math.log(sl_positive)
        # THESE STATEMENTS INTENDED to always run but don't
        self.N.setParm(self.saveParm)
        self.likefailed = self.N.likefailed
        self.N.likefailed = False
    return compact_matrix, rows_selected


ratings = handle_input("ratings.txt")
start_time = time.time()

R, rows_selected = selection(ratings)
C, columns_selected = selection(ratings.T)

intersection = np.zeros((len(rows_selected), len(columns_selected)))

for i in xrange(len(rows_selected)):
    for j in xrange(len(columns_selected)):
        intersection[i][j] = ratings[rows_selected[i]][columns_selected[j]]

u, sigma, v = svd(intersection)

u = u.T
v = v.T
for j in xrange(len(sigma)):
    if abs(sigma[j][j]) != 0:
        sigma[j][j] = 1 / sigma[j][j]

intersection = np.dot(v, np.dot(sigma, u))
C = np.matrix(C)
C = C.T
R = np.matrix(R)

final_matrix = np.dot(C, np.dot(intersection, R))

print final_matrix
Exemple #29
0
def SVD(M, use_np=False):
    if use_np:
        return np.linalg.svd(M)
    else:
        return svd(M)
Exemple #30
0
Go = X[:, :, 1]
Bo = X[:, :, 2]

# compute the storage space for X - image
storage_o = X.nbytes / 1024.0 / 1024.0

index = array([[10, 25, 50, 100, 250]])

# initialize comparison metrics (Frobenious norm, stress, variance)
Fros = zeros((3, index.shape[1]))
var = zeros((3, index.shape[1]))
Stress = zeros((3, index.shape[1]))
storages = zeros((3, index.shape[1]))

# svd
tfros, tvars, tstr, mbs = svd(Ro, Go, Bo, index)
Fros[0, :] = transpose(tfros)
var[0, :] = transpose(tvars)
Stress[0, :] = transpose(tstr)
storages[0, :] = transpose(mbs)

# pca
tfros, tvars, tstr, mbs = pca(Ro, Go, Bo, index)
Fros[1, :] = transpose(tfros)
var[1, :] = transpose(tvars)
Stress[1, :] = transpose(tstr)
storages[1, :] = transpose(mbs)

# mds
tfros, tvars, tstr, mbs = mds(Ro, Go, Bo, index)
Fros[2, :] = transpose(tfros)
from svd import svd
import time

starttime = time.time()

alpha = 0.003
lamba = 0.01

svd2 = svd(3, alpha, lamba, 10)
svd2.printData()
svd2.saveData()

endtime = time.time()
print '\n-----------csv data load finished ,cost time %f -------------\n' % (
    endtime - starttime)
def main():

  semeval_dir = 'data/maui-semeval2010-test/'
  filenames = sorted(os.listdir(semeval_dir))
  manual_keywords = []
  total_precision = 0
  total_recall = 0
  total_docs = 0
  method = str(sys.argv[1])

  for filename in filenames:

    if filename[-3:] == 'key':
      # ignored due to issue on Mac or empty keyfile
      if filename == "H-5.key" or filename == "C-86.key":
        continue
      with open(semeval_dir + filename, 'r') as f:
        last_key_file = filename
        key_lines = f.read().splitlines()
        # list of list of keywords by line
        manual_keywords = [line.split() for line in key_lines]
        # flatten list
        manual_keywords = [word for line in manual_keywords for word in line]
        manual_keywords = list(set(manual_keywords))
        manual_keywords = [t for t in manual_keywords if ( (len(t) > 1) and (t.lower()not in stopwords.words('english')) )]

    elif filename[-3:] == 'txt':
      # ignored due to issue on Mac or empty keyfile
      if filename == "H-5.txt" or filename == "C-86.txt":
        continue
      total_docs += 1
      print(filename)
      with open(semeval_dir + filename, 'r') as f:
        correct = 0
        f = open(semeval_dir + filename, 'r')
        content = f.read()
        if method == 'svd':
          keywords = svd(content, 1, True)
        elif method == 'raketr':
          keywords = raketr.main(content, True)
        elif method == 'cluster':
          keywords = kcluster(content, 6, 10, True)
        else:
          print('methods accepted: svd raketr cluster')
          exit(0)
        keywords = list(set(keywords))
        keywords = [word.encode('ascii') for word in keywords]
#        print('--------manual keywords---------')
#        print(manual_keywords)
        print(keywords)
        print('-'*100)
        for keyword in keywords:
          if keyword in set(manual_keywords):
            correct += 1
        if len(manual_keywords) == 0:
          print(filename)
          print(last_key_file)
          print('^^^^ issue with this file ^^^^')
          exit(0)
        total_precision += correct/float(len(keywords))
        total_recall += correct/float(len(manual_keywords))

  total_precision /= total_docs
  total_recall /= total_docs
  total_fmeasure = round(2*total_precision*total_recall/(total_precision + total_recall), 5)
  print('total docs: ' + str(total_docs))
  print('total precision: ' + str(total_precision))
  print('total recall: ' + str(total_recall))
  print('total fmeasure: ' + str(total_fmeasure))
Exemple #33
0
                csv_writer.writerow(row)
            else:
                print mid
        print '--------update number  ', num, ' ---------------'
        out.close()
        return num

    def clean(self):
        print '--------current number of data ', self.rates.count(
        ), ' ---------------'
        result = self.rates.delete_many({})
        print '--------clean number  ', result.deleted_count, ' ---------------'


starttime = time.time()
u = update_rate()
num = u.update()
u.clean()

alpha = 0.003
lamba = 0.01
if num > 0:
    svd2 = svd(3, alpha, lamba, 30)
    svd2.printData()
    svd2.saveData()
else:
    print '------------no update data,no svd-------------------------'

endtime = time.time()
print '\n-----------update ratings data fininshed ,cost time %f -------------\n' % (
    endtime - starttime)
Exemple #34
0
	Course: CS F469 Information Retrieval

"""

import numpy as np
import math
import time
from numpy import linalg as LA
from common import handle_input, calc_error, print_matrix
from svd import svd

ratings = handle_input("ratings.txt")

start_time  = time.time()

U,sigma,V = svd(ratings)

final_matrix = (np.dot(U,np.dot(sigma,V)))

for i in xrange(len(final_matrix)):
	for j in xrange(len(final_matrix[i])):
		final_matrix[i][j] = round(final_matrix[i][j],2)

print "Printing matrix U:"
print U
print "\nPrinting matrix sigma:"
print sigma
print "\nPrinting matrix V:"
print V
print "\nPrinting the final matrix got by multiplying U, sigma and V:"
print final_matrix
__author__ = 'PC-LiNing'

from svd import svd
import numpy as np

matrix = np.asarray([1.0, 1.0, 0.5, 1.0, 1.0, 0.25, 0.5, 0.25,
                     2.0]).reshape(3, 3)
print(matrix)
print(matrix.shape)
singularValues, us, vs = svd(matrix)
print(singularValues)
print(us)
print(vs)
print('#######')
# sum
result = np.zeros(shape=(3, 3))
for i in range(3):
    singularValue = singularValues[i]
    u = us[i]
    v = vs[i]
    result += singularValue * np.outer(u, v)
print(result)
Exemple #36
0
def pinv_by_svd(A):
    U, S, V = svd(A, retSimple=True)
    S = inv(S)
    A_pinv = np.dot(np.dot(V.T, S), U.T)

    return A_pinv
Exemple #37
0
    masked_arr = np.ma.masked_array(user_item_matrix, mask)

    del mask
    del user_item_matrix

    item_means=np.mean(masked_arr, axis=0)
    #user_means=np.mean(masked_arr, axis=1)
    #item_means_tiled = np.tile(item_means, (user_item_matrix.shape[0],1))
    #init_dgesdd failed init

    print masked_arr
    utilMat = masked_arr.filled(item_means)
    print(utilMat)

    #utilMat = kpod(utilMat=utilMat, mask=masked_arr, iter=40, n_clusters=10, method="normal")
    utilMat = svd(utilMat,k=15)

    pred = [] #to store the predicted ratings

    for _,row in test.iterrows():
        user = row['userId']
        item = row['movieId']

        if user in user_index:
            u_index = user_index[user]
            if item in item_index:
                i_index = item_index[item]
                pred_rating = utilMat[u_index, i_index]
            else:
                pred_rating = np.mean(utilMat[u_index, :])
        else:
Exemple #38
0
    for entry in data:
        words |= set(entry['words'])
    return list(sorted(words))


def load():
    with open('all_stories.json', 'r') as infile:
        data = json.loads(infile.read())
    return data


if __name__ == "__main__":
    data = load()
    matrix, (indexToWord, indexToDocument) = makeDocumentTermMatrix(data)
    matrix = normalize(matrix)
    sigma, U, V = svd(matrix, k=10)

    projectedDocuments = np.dot(matrix.T, U)
    projectedWords = np.dot(matrix, V.T)

    documentCenters, documentClustering = cluster(projectedDocuments)
    wordCenters, wordClustering = cluster(projectedWords)

    wordClusters = [[
        indexToWord[i] for (i, x) in enumerate(wordClustering) if x == j
    ] for j in range(len(set(wordClustering)))]

    documentClusters = [[
        indexToDocument[i]['text'] for (i, x) in enumerate(documentClustering)
        if x == j
    ] for j in range(len(set(documentClustering)))]
Exemple #39
0
 def test_simple_svd(self):
     test_data = np.array([1, -1, 1, -1, 3, 0], dtype=np.float64).reshape(3, 2)
     result = svd.svd(test_data)
     done = np.dot(np.dot(result[0], result[1]), result[2])
     assert_array_almost_equal(test_data, done, decimal=10)
import numpy as np
from svd import svd

y = np.array([4, 3, 7]).T
A = np.array([[1, -2, 3], [2, -1, 4], [-1, -4, 1]])

# A_red = [[1,-2,3],[0,3,-2],[0,0,0]] -> rank 2

####################### Rank #######################

print('Rank of A:\t\t{0:d}'.format(np.linalg.matrix_rank(A)))

####################### SVD #######################

matrices, a, error, C = svd(A, y=y)
U, w, V = matrices

a_lst, *_, s = np.linalg.lstsq(A, y)

print('Singular Values of A:\t{}'.format(list(np.diag(w))))
print('Solution a:\t\t{}'.format(list(a)))
print('Lstsq Solution a:\t{}'.format(list(a_lst)))
print('Residual Error:\t\t{} ({})'.format(error, error/np.linalg.norm(a)))
print('Covariance Matrix:\n{}'.format(C))
print('U Matrix:\n{}'.format(U))
print('D Matrix:\n{}'.format(w))
print('V Matrix:\n{}'.format(V))


print('U.T@U Matrix:\n{}'.format(U.T@U))
print('[email protected] Matrix:\n{}'.format([email protected]))
Exemple #41
0
    for entry in data:
        words |= set(entry['words'])
    return list(sorted(words))


def load():
    with open('all_stories.json', 'r') as infile:
        data = json.loads(infile.read())
    return data


if __name__ == "__main__":
    data = load()
    matrix, (indexToWord, indexToDocument) = makeDocumentTermMatrix(data)
    matrix = normalize(matrix)
    sigma, U, V = svd(matrix, k=10)

    projectedDocuments = np.dot(matrix.T, U)
    projectedWords = np.dot(matrix, V.T)

    documentCenters, documentClustering = cluster(projectedDocuments)
    wordCenters, wordClustering = cluster(projectedWords)

    wordClusters = [
        [indexToWord[i] for (i, x) in enumerate(wordClustering) if x == j]
        for j in range(len(set(wordClustering)))
    ]

    documentClusters = [
        [indexToDocument[i]['text']
         for (i, x) in enumerate(documentClustering) if x == j]
def main():

  semeval_dir = 'data/maui-semeval2010-test/'
  filenames = sorted(os.listdir(semeval_dir))
  manual_keywords = []
  total_precision = 0
  total_recall = 0
  total_docs = 0
  method = str(sys.argv[1])

  for filename in filenames:
    if filename[-3:] == 'key':
      # ignored due to issue on Mac or empty keyfile
      if filename == "H-5.key" or filename == "C-86.key":
        continue
      with open(semeval_dir + filename, 'r') as f:
        last_key_file = filename
        key_lines = f.read().splitlines()
        key_lines = [word.encode('ascii') for word in key_lines]
        manual_keywords = get_stemmed_keywords(key_lines)

    elif filename[-3:] == 'txt':
      # ignored due to issue on Mac or empty keyfile
      if filename == "H-5.txt" or filename == "C-86.txt":
        continue
      total_docs += 1
      print(filename)
      with open(semeval_dir + filename, 'r') as f:
        correct = 0
        f = open(semeval_dir + filename, 'r')
        content = f.read()
        if method == 'svd':
          keywords = svd(content, 1, False)
        elif method == 'raketr':
          keywords = raketr.main(content, False)
        elif method == 'cluster':
         keywords = kcluster(content, 6, 15, False)
#        benchmark against RAKE
#        keywords = rake_object.run(content)[:15]
#        keywords = [word[0] for word in keywords]
#        keywords = [''.join([i if ord(i) < 128 and i != '\n' else ' ' for i in keyword]).encode('ascii') for keyword in keywords]
        else:
          print('methods accepted: svd raketr cluster')
          exit(0)
        print(keywords)
        print('-'*100)
#        print('--------manual keywords---------')
#        print(manual_keywords)
#        print('--------extracted keywords---------')
#        print(keywords)
        stemmed_keywords = get_stemmed_keywords(keywords)
        for keyword in stemmed_keywords:
          if keyword in set(manual_keywords):
            correct += 1
        if len(manual_keywords) == 0:
          print(filename)
          print(last_key_file)
          print('^^^^ issue with this file ^^^^')
          exit(0)
        total_precision += correct/float(len(keywords))
        total_recall += correct/float(len(manual_keywords))


  total_precision /= total_docs
  total_recall /= total_docs
  total_fmeasure = round(2*total_precision*total_recall/(total_precision + total_recall), 5)
  print('total docs: ' + str(total_docs))
  print('total precision: ' + str(total_precision))
  print('total recall: ' + str(total_recall))
  print('total fmeasure: ' + str(total_fmeasure))