forked from nokia/natural-language-processing
-
Notifications
You must be signed in to change notification settings - Fork 0
/
matrix_operations.py
132 lines (86 loc) · 3.61 KB
/
matrix_operations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# © 2020 Nokia
# Licensed under the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
# !/usr/bin/env python3
# coding: utf-8
# Author: Élie de Panafieu <elie.de_panafieu@nokia-bell-labs.com>
import math
import numpy as np
from scipy.sparse.csr import csr_matrix
from scipy.sparse import lil_matrix
def matrix_from_iterables_and_index_maps(iterables, item_to_index: dict, iterable_to_index: dict) -> csr_matrix:
matrix = lil_matrix((len(item_to_index), len(iterable_to_index)), dtype='int')
for iterable in iterables:
for item in iterable:
matrix[item_to_index[item], iterable_to_index[iterable]] += 1
return matrix.tocsr()
def vector_from_index_and_value_maps(to_index: dict, to_value, length=None):
if length is None:
length = len(to_index)
vector = zero_vector_from_length(length)
for key, value in to_value.items():
vector[to_index[key]] = value
return vector
def dict_from_index_map_and_vector(to_index, vector):
return {item: vector[index] for item, index in to_index.items()}
def count_nonzero_entries_in_matrix_row(matrix, row_index):
row = matrix.getrow(row_index)
return row.getnnz()
def cosine_distance(vector0, vector1):
distance, _, _ = verbose_cosine_distance(vector0, vector1)
return distance
def verbose_cosine_distance(vector0, vector1):
normalized_vector0, norm0 = verbose_normalize(vector0)
normalized_vector1, norm1 = verbose_normalize(vector1)
return 1. - scalar_product(normalized_vector0, normalized_vector1), norm0, norm1
def scalar_product(vector0, vector1):
return np.dot(vector0, vector1)
def normalize(vector):
normalized_vector, _ = verbose_normalize(vector)
return normalized_vector
def verbose_normalize(vector):
if is_zero_vector(vector):
return vector, 0
vector_norm = norm(vector)
return vector / vector_norm, vector_norm
def is_zero_vector(vector):
return not np.any(vector)
def norm(vector):
return math.sqrt(scalar_product(vector, vector))
def coefficient_wise_vector_product(vector0: np.ndarray, vector1: np.ndarray) -> np.ndarray:
return np.multiply(vector0, vector1)
def matrix_vector_product(matrix: csr_matrix, vector: np.ndarray) -> np.ndarray:
return matrix.dot(vector)
def dot_matrix_dot_products(dot_vector0, matrix, dot_vector1, vector):
vector = coefficient_wise_vector_product(dot_vector1, vector)
vector = matrix_vector_product(matrix, vector)
vector = coefficient_wise_vector_product(dot_vector0, vector)
return vector
def zero_vector_from_length(length: int) -> np.ndarray:
return np.zeros(length)
def one_vector_from_length(length: int) -> np.ndarray:
return np.ones(length)
def rescale_vector_to_satisfy_lower_negative_bound(vector, lower_bound):
min_element = min(vector)
if min_element < lower_bound:
vector = lower_bound / min_element * vector
return vector
def transpose_matrix(matrix):
return matrix.transpose()
def create_vector(coefficients):
return np.array(coefficients)
def are_equal_vectors(vector0, vector1):
return np.array_equal(vector0, vector1)
def are_almost_equal_vectors(vector0, vector1):
if len(vector0) != len(vector1):
return False
for index in range(len(vector0)):
if not math.isclose(vector0[index], vector1[index]):
return False
return True
def are_almost_colinear_vectors(vector0, vector1):
if is_zero_vector(vector0):
return True
for index in range(len(vector0)):
if vector0[index] != 0.:
return are_almost_equal_vectors(vector0 / vector0[index] * vector1[index], vector1)