Ejemplo n.º 1
0
    def testBasicDictionaryLearning(self):
        embedding_table = np.array([[1, 2, 3, 4, 5], [10, 9, 8, 7, 6],
                                    [-5, -6, -7, -8, -9],
                                    [100, 101, 102, 103, 104],
                                    [20, 19, 18, 17, 16], [30, 39, 38, 37, 36],
                                    [45, 44, 43, 42, 41], [88, 89, 89, 80, 91],
                                    [54, 53, 52, 51, 50]])

        [code0, dictionary0
         ] = dictionary_learning.dictionary_learning(embedding_table,
                                                     row_percentage=0.5,
                                                     col_percentage=0.5)

        [code1, dictionary1
         ] = dictionary_learning.dictionary_learning(embedding_table,
                                                     row_percentage=0.8,
                                                     col_percentage=0.8)

        approx0 = np.matmul(code0, dictionary0)
        error0 = 100.0 * LA.norm(embedding_table - approx0,
                                 "fro") / LA.norm(embedding_table)

        approx1 = np.matmul(code1, dictionary1)
        error1 = 100.0 * LA.norm(embedding_table - approx1,
                                 "fro") / LA.norm(embedding_table)

        # Expect that error0 > error1 because the sizes of code1 & dictionary1 are
        # larger than the sizes of code0 & dictionary0.
        self.assertGreaterEqual(error0, error1)
Ejemplo n.º 2
0
    def testDictionaryLearningWithSimulatedDataLshProjectionComparison(self):
        # Generate random matrices a, b, and c.
        b = np.random.normal(0, 1, [1000, 500])
        sub_thresh_indices = (b <= 0.7)
        b[sub_thresh_indices] = 0
        b[:, 0] = np.ones(shape=b[:, 0].shape)
        c = np.random.normal(0, 1, [500, 65])
        a = np.matmul(b, c)

        [b_out,
         c_out] = dictionary_learning.dictionary_learning(a,
                                                          row_percentage=0.5,
                                                          col_percentage=0.3,
                                                          n_iterations=1,
                                                          use_lsh=True,
                                                          use_projection=True,
                                                          projection_dim=10,
                                                          version_num=1)
        a_recovered = np.matmul(b_out, c_out)
        error_with_projection = np.linalg.norm(a -
                                               a_recovered) / np.linalg.norm(a)

        [b_out_prime, c_out_prime
         ] = dictionary_learning.dictionary_learning(a,
                                                     row_percentage=0.5,
                                                     col_percentage=0.3,
                                                     n_iterations=1,
                                                     use_lsh=True,
                                                     version_num=1)
        a_recovered = np.matmul(b_out_prime, c_out_prime)
        error = np.linalg.norm(a - a_recovered) / np.linalg.norm(a)

        self.assertLessEqual(error, error_with_projection)
Ejemplo n.º 3
0
    def testDictionaryLearningWithSimulatedDataLsh(self):
        # The test case has been modified to smaller matrices, the original runtime
        # is recorded as follows:for b.shape = [10000, 5000], c.shape = [5000, 650],
        # one iteration of DL takes about 1 hour.
        # Generate random matrices a, b, and c.
        b = np.random.normal(0, 1, [1000, 500])
        sub_thresh_indices = (b <= 0.7)
        b[sub_thresh_indices] = 0
        b[:, 0] = np.ones(shape=b[:, 0].shape)
        c = np.random.normal(0, 1, [500, 65])
        a = np.matmul(b, c)

        [b_out,
         c_out] = dictionary_learning.dictionary_learning(a,
                                                          row_percentage=0.5,
                                                          col_percentage=0.3,
                                                          n_iterations=3,
                                                          use_lsh=True,
                                                          version_num=1)
        a_recovered = np.matmul(b_out, c_out)
        error = np.linalg.norm(a - a_recovered) / np.linalg.norm(a)

        # Expect that the approximation 'a_recovered' is not too bad.
        self.assertLessEqual(error, 0.2)
        self.assertGreaterEqual(error, 0.00005)
Ejemplo n.º 4
0
    def testDictionaryLearningWithZeroMatrix(self):
        embedding_table = np.array([[0, 0, 0, 0, 0], [0, 0, 0, 0, 0],
                                    [0, 0, 0, 0, 0]])

        [code,
         dictionary] = dictionary_learning.dictionary_learning(embedding_table)

        approx = np.matmul(code, dictionary)

        # Expect that 'approx' is a 0-matrix
        self.assertEqual(np.count_nonzero(approx), 0)
Ejemplo n.º 5
0
    def testDictionaryLearningWithSimulatedData(self):
        # Generate random matrices a, b, and c.
        b = np.random.normal(0, 1, [30, 20])
        sub_thresh_indices = (b <= 0.7)
        b[sub_thresh_indices] = 0
        b[:, 0] = np.ones(shape=b[:, 0].shape)
        c = np.random.normal(0, 1, [20, 10])
        a = np.matmul(b, c)

        [b_out,
         c_out] = dictionary_learning.dictionary_learning(a,
                                                          row_percentage=0.5,
                                                          col_percentage=0.3,
                                                          n_iterations=0,
                                                          version_num=1)
        a_recovered = np.matmul(b_out, c_out)
        error = np.linalg.norm(a - a_recovered) / np.linalg.norm(a)
        # Expect that the approximation 'a_recovered' is not too bad.
        self.assertGreaterEqual(error, 0.00005)
    def static_matrix_compressor(self, matrix, n_iterations=1):
        """Performance dictionary learning on an input matrix.

    Args:
      matrix: input matrix, numpy 2d array;
      n_iterations: number of iterations to performance in dictionary learning,
        int.

    Returns:
      code: code matrix, numpy 2d array, see dictionary_learning module for more
        details;
      dictionary: dictionary matrix, numpy 2d array, see dictionary_learning
        module for more details.
    """
        logging.info(
            'Inside dl static_matrix_compressor: matrix shape is %s norm is %d: ',
            matrix.shape, np.linalg.norm(matrix))
        logging.info(self._spec.to_json())
        print('matrix.shape: ', matrix.shape)
        [code, dictionary] = dictionary_learning.dictionary_learning(
            matrix,
            row_percentage=100 / self._spec.rank,
            col_percentage=100 / self._spec.rank,
            n_iterations=n_iterations,
            seed=15,
            use_lsh=self._spec.use_lsh)

        logging.info(
            'Inside dl static_matrix_compressor: code, dictionary shapes are: %s %s',
            code.shape, dictionary.shape)
        col_percentage = 100 / self._spec.rank
        self.uncompressed_size = matrix.size
        self.compressed_size = int(
            code.size * col_percentage) + dictionary.size

        print(
            'Inside dl_matrix_compressor: a_matrix,b_matrix,c_matrix shapes are: ',
            matrix.shape, code.shape, dictionary.shape,
            '; compressed and uncompressed size are: ', self.uncompressed_size,
            self.compressed_size)

        return [code.astype(np.float32), dictionary.astype(np.float32)]