/
Compare_test_1.py
133 lines (95 loc) · 3.24 KB
/
Compare_test_1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# -*- coding: utf-8 -*-
"""
Created on Fri Nov 13 14:03:22 2015
@author: Deirdre Meehan
"""
import numpy
import time
from rmse import rmse
import cProfile
import pstats, StringIO
# creating R with zeroes and using the basic method and the zero_matrix method
# to see how long each takes with the same input for comparison
iterations = 5
n = 500
m = 1000
d = 20
fraction_non_zero = 0.9
# correct matrix we want to get back to setup
U_matrix = numpy.random.rand(d, n)
V_matrix = numpy.random.rand(d, m)
R_matrix = numpy.dot(U_matrix.T, V_matrix)
#remobve values to zero
#find logical rep of R = R>0
R_temp = R_matrix > fraction_non_zero
for row in range(n):
for col in range(m):
if R_temp[row,col] == False:
R_matrix[row,col] = 0.0
#use this to index columns
#benchmark against this (taking subset vs not)
# begin with random guesses of U, V
U_matrix = 5*numpy.random.rand(d, n)
V_matrix = 5*numpy.random.rand(d, m)
#set up matrices that will change
basic_u = U_matrix
basic_v = V_matrix
zero_u = U_matrix
zero_v = V_matrix
pr = cProfile.Profile()
pr.enable()
# BASIC MATRIX FACTORIZATION
t0 = time.time()
#for i in range(iterations):
tolerance = 0.1
err = tolerance + 1
i = 0
while (err > tolerance or i < 100):
for u in range (n): # u = row
basic_u[:, u] = numpy.dot(numpy.dot(R_matrix[u, :], basic_v.T), numpy.linalg.pinv(numpy.asmatrix(numpy.dot(basic_v, basic_v.T))))
for v in range(m): #for each row
basic_v[:, v] = numpy.dot(numpy.dot(R_matrix[:, v].T, basic_u.T), numpy.linalg.pinv(numpy.asmatrix(numpy.dot(basic_u, basic_u.T))))
basic_R = numpy.dot(basic_u.T, basic_v)
<<<<<<< HEAD
pr.disable()
s = StringIO.StringIO()
sortby = "cumulative"
ps = pstats.Stats(pr, stream = s).sort_stats(sortby)
ps.print_stats()
ps.dump_stats("output_stats.txt")
#print s.getvalue()
=======
err = rmse(R_matrix, basic_R)
i += 1
>>>>>>> 3db25c535f07af3fc549e118689b68ad6a31d0d1
t1 = time.time()
basic_time = t1 - t0
# ZERO MATRIX FACTORIZATION
un,um = zero_u.shape
t0 = time.time()
for i in range(iterations):
for u in range (n): # u = row
zero_u[:, u] = numpy.dot(numpy.dot(R_matrix[u,R_matrix[u,:]!=0], zero_v[:,R_matrix[u,:]!=0].T), numpy.linalg.pinv(numpy.asmatrix(numpy.dot(zero_v[:,R_matrix[u,:]!=0], zero_v[:,R_matrix[u,:]!=0].T))))
for v in range(m): #for each row
zero_v[:, v] = numpy.dot(numpy.dot(R_matrix[R_matrix[:,v]!=0, v].T, zero_u[:, R_matrix[0:um,v]!=0].T), numpy.linalg.pinv(numpy.asmatrix(numpy.dot(zero_u[:, R_matrix[0:um,v]!=0], zero_u[:, R_matrix[0:um,v]!=0].T))))
zero_R = numpy.dot(zero_u.T, zero_v)
t1 = time.time()
zero_time = t1 - t0
def compare_matrices(a, b, tolerance):
if a.shape != b.shape:
return "different dimensions"
n, m = a.shape
for row in range(n):
for column in range(m):
if a[row, column] != 0:
if a[row, column] - b[row, column] > tolerance:
print "row: ", row, " column: ", column
print a[row, column], " & " ,b[row, column]
return False
return True
print "Basic: ", compare_matrices(R_matrix, basic_R, 0.1)
print basic_time
print "Zero: ", compare_matrices(R_matrix, zero_R, 0.1)
print zero_time
err = rmse(R_matrix, zero_R)
print err