Esempio n. 1
0
    def test_init_svd(self):
        test_cases = [(self.space2, self.us, self.us2, self.x, self.row3)]
        red1 = Svd(2)
        red2 = Svd(1)

        for in_s, expected_mat, expected_mat2, data, rows in test_cases:
            in_s = in_s.apply(red1)
            per_s = PeripheralSpace(in_s, DenseMatrix(data), rows)

            np.testing.assert_array_almost_equal(expected_mat,
                                                 per_s.cooccurrence_matrix.mat,
                                                 2)
            self.assertListEqual(per_s.id2row, in_s.id2row)
            self.assertListEqual(per_s.id2column, [])
            self.assertDictEqual(per_s.row2id, in_s.row2id)
            self.assertDictEqual(per_s.column2id, {})
            self.assertEqual(1, len(per_s.operations))

            in_s = in_s.apply(red2)
            per_s = PeripheralSpace(in_s, DenseMatrix(data), rows)

            np.testing.assert_array_almost_equal(expected_mat2,
                                                 per_s.cooccurrence_matrix.mat,
                                                 2)
            self.assertListEqual(per_s.id2row, in_s.id2row)
            self.assertListEqual(per_s.id2column, [])
            self.assertDictEqual(per_s.row2id, in_s.row2id)
            self.assertDictEqual(per_s.column2id, {})
            self.assertEqual(2, len(per_s.operations))
Esempio n. 2
0
    def test_trivial_crossvalidation(self):

        for i in range(1, 10):
            m_a = DenseMatrix(np.mat(np.random.random((i + 1, 4))))
            m_b = DenseMatrix(np.mat(np.random.random((i + 1, 4))))
            tmp_a = m_a.mat.copy()
            tmp_b = m_b.mat.copy()

            learner = RidgeRegressionLearner(param_range=[3], intercept=False)
            solution = learner.train(m_a, m_b)

            learner2 = RidgeRegressionLearner(param=3, intercept=False)
            solution2 = learner2.train(m_a, m_b)

            np.testing.assert_array_equal(tmp_a, m_a.mat)
            np.testing.assert_array_equal(tmp_b, m_b.mat)
            np.testing.assert_array_equal(solution.mat, solution2.mat)

            learner = RidgeRegressionLearner(param_range=[3], intercept=False)
            solution = learner.train(m_a, m_b)

            np.testing.assert_array_equal(tmp_a, m_a.mat)
            np.testing.assert_array_equal(tmp_b, m_b.mat)
            np.testing.assert_array_equal(solution.mat, solution2.mat)

            learner = RidgeRegressionLearner(param_range=[0], intercept=False)
            solution = learner.train(m_a, m_b)

            learner2 = LstsqRegressionLearner(intercept=False)
            solution2 = learner2.train(m_a, m_b)

            np.testing.assert_array_almost_equal(solution.mat, solution2.mat,
                                                 3)
Esempio n. 3
0
    def setUp(self):
        self.dir_ = data_dir + "/space_test_resources/"
        self.init_test_cases = [(DenseMatrix(np.array([[1,2],[3,4]])),
                       ["car", "man"],
                       ["feat1", "feat2"],
                       {"man":1, "car":0},
                       {"feat1":0, "feat2":1},
                       [ScalingOperation(EpmiWeighting())]),
                      (DenseMatrix(np.array([[1,2],[3,4]])),
                       ["car", "man"],
                       [],
                       {"man":1, "car":0},
                       {},
                       [ScalingOperation(EpmiWeighting())])]

        self.m1 = np.array([[1,2,3]])
        self.row1 = ["a"]
        self.row2 = ["a", "b", "c"]
        self.ft1 = ["f1","f2","f3"]
        self.space1 = Space(DenseMatrix(self.m1),self.row1, self.ft1)

        self.x = np.mat([[1,2,3],[2,4,6],[4,675,43]])
        self.us = np.mat([[  2.19272110e+00,   3.03174768e+00],
                               [  4.38544220e+00,   6.06349536e+00],
                               [  6.76369708e+02,  -4.91431927e-02]])
        self.space2 = Space(DenseMatrix(self.x), self.row2, self.ft1)
Esempio n. 4
0
    def setUp(self):
        self.ft = ["f1", "f2"]

        self.n_space = Space(DenseMatrix(np.mat([[3, 4], [5, 6]])),
                             ["car", "man"], self.ft)
        self.an_space = Space(DenseMatrix(np.mat([[3, 4], [5, 6]])),
                              ["a1_car", "a1_man"], self.ft)
Esempio n. 5
0
    def test_top_feat_selection(self):
        test_cases = [
            (self.a, np.mat([[3, 1], [5, 4]]), [2, 0], 2),
            (self.a, np.mat([[3], [5]]), [2], 1),
            (self.a, np.mat([[3, 1, 2], [5, 4, 0]]), [2, 0, 1], 6),
        ]

        for in_mat, expected_mat, expected_perm, no_cols in test_cases:
            fs = TopFeatureSelection(no_cols)

            out_mat, perm = fs.apply(DenseMatrix(in_mat))
            np.testing.assert_array_equal(out_mat.mat, expected_mat)
            self.assertListEqual(perm, expected_perm)

            out_mat, perm = fs.apply(SparseMatrix(in_mat))
            np.testing.assert_array_equal(out_mat.mat.todense(), expected_mat)
            self.assertListEqual(perm, expected_perm)

            fs = TopFeatureSelection(no_cols, criterion="length")

            out_mat, perm = fs.apply(DenseMatrix(in_mat))
            np.testing.assert_array_equal(out_mat.mat, expected_mat)
            self.assertListEqual(perm, expected_perm)

            out_mat, perm = fs.apply(SparseMatrix(in_mat))
            np.testing.assert_array_equal(out_mat.mat.todense(), expected_mat)
            self.assertListEqual(perm, expected_perm)

        self.assertRaises(ValueError, TopFeatureSelection, 0)
        self.assertRaises(ValueError,
                          TopFeatureSelection,
                          2,
                          criterion="something")
Esempio n. 6
0
    def test_weighted_additive(self):

        self.m12 = DenseMatrix(np.mat([[3, 1], [9, 2]]))
        self.m22 = DenseMatrix(np.mat([[4, 3], [2, 1]]))
        self.ph2 = DenseMatrix(np.mat([[18, 11], [24, 7]]))
        self.row = ["a", "b"]
        self.ft = ["f1", "f2"]
        self.space1 = Space(DenseMatrix(self.m12), self.row, self.ft)
        self.space2 = Space(DenseMatrix(self.ph2), ["a_a", "a_b"], self.ft)
        m = WeightedAdditive()
        m.export(self.prefix + ".add1")
        m.train([("a", "a", "a_a")], self.space1, self.space2)
        m.export(self.prefix + ".add2")
Esempio n. 7
0
    def test_dilation(self):

        self.m12 = DenseMatrix(np.mat([[3, 1], [9, 2]]))
        self.m22 = DenseMatrix(np.mat([[4, 3], [2, 1]]))
        self.ph2 = DenseMatrix(np.mat([[18, 11], [24, 7]]))
        self.row = ["a", "b"]
        self.ft = ["f1", "f2"]
        self.space1 = Space(DenseMatrix(self.m12), self.row, self.ft)
        self.space2 = Space(DenseMatrix(self.ph2), ["a_a", "a_b"], self.ft)
        m = Dilation()
        m.export(self.prefix + ".dil1")
        m.train([("a", "b", "a_b")], self.space1, self.space2)
        m.export(self.prefix + ".dil2")
Esempio n. 8
0
    def test_train_intercept(self):
        a1_mat = DenseMatrix(np.mat([[3, 4], [5, 6]]))
        a2_mat = DenseMatrix(np.mat([[1, 2], [3, 4]]))

        train_data = [("a1", "man", "a1_man"),
                      ("a2", "car", "a2_car"),
                      ("a1", "boy", "a1_boy"),
                      ("a2", "boy", "a2_boy")
        ]

        n_mat = DenseMatrix(np.mat([[13, 21], [3, 4], [5, 6]]))
        n_space = Space(n_mat, ["man", "car", "boy"], self.ft)

        an1_mat = (a1_mat * n_mat.transpose()).transpose()
        an2_mat = (a2_mat * n_mat.transpose()).transpose()
        an_mat = an1_mat.vstack(an2_mat)

        an_space = Space(an_mat, ["a1_man", "a1_car", "a1_boy", "a2_man", "a2_car", "a2_boy"], self.ft)

        #test train
        model = LexicalFunction(learner=LstsqRegressionLearner(intercept=True))
        model.train(train_data, n_space, an_space)
        a_space = model.function_space

        a1_mat.reshape((1, 4))
        #np.testing.assert_array_almost_equal(a1_mat.mat,
        #                                     a_space.cooccurrence_matrix.mat[0])

        a2_mat.reshape((1, 4))
        #np.testing.assert_array_almost_equal(a2_mat.mat,
        #                                     a_space.cooccurrence_matrix.mat[1])

        self.assertListEqual(a_space.id2row, ["a1", "a2"])
        self.assertTupleEqual(a_space.element_shape, (2, 3))

        #test compose
        a1_mat = DenseMatrix(np.mat([[3, 4, 5, 6]]))
        a2_mat = DenseMatrix(np.mat([[1, 2, 3, 4]]))
        a_mat = a_space.cooccurrence_matrix

        a_space = Space(a_mat, ["a1", "a2"], [], element_shape=(2, 3))
        model = LexicalFunction(function_space=a_space, intercept=True)
        comp_space = model.compose(train_data, n_space)

        self.assertListEqual(comp_space.id2row, ["a1_man", "a2_car", "a1_boy", "a2_boy"])
        self.assertListEqual(comp_space.id2column, [])

        self.assertEqual(comp_space.element_shape, (2,))

        np.testing.assert_array_almost_equal(comp_space.cooccurrence_matrix.mat,
                                             an_mat[[0, 4, 2, 5]].mat, 8)
Esempio n. 9
0
    def test_init(self):
        test_cases = [(self.space1, self.m2, self.row2, np.array([[2, 0.5,
                                                                   1]]),
                       np.array([[0.69314718, 0, 0]]))]

        w1 = EpmiWeighting()
        w2 = PlogWeighting()

        for core_s, per_mat, per_row, per_mat_out1, per_mat_out2 in test_cases:
            tmp_mat = per_mat.copy()
            tmp_core_mat = core_s.cooccurrence_matrix.mat
            per_s1 = PeripheralSpace(core_s, DenseMatrix(per_mat), per_row)

            np.testing.assert_array_equal(per_s1.cooccurrence_matrix.mat,
                                          tmp_mat)
            self.assert_column_identical(per_s1, core_s)
            self.assertListEqual(per_s1.id2row, per_row)
            self.assertListEqual(per_s1.operations, core_s.operations)

            core_s1 = core_s.apply(w1)
            per_s2 = PeripheralSpace(core_s1, DenseMatrix(per_mat), per_row)
            np.testing.assert_array_almost_equal(
                per_s2.cooccurrence_matrix.mat, per_mat_out1)
            self.assert_column_identical(per_s2, core_s1)
            self.assertListEqual(per_s2.id2row, per_row)
            self.assertListEqual(per_s2.operations, core_s1.operations)
            self.assertEqual(len(per_s2.operations), 1)

            core_s2 = core_s1.apply(w2)
            per_s3 = PeripheralSpace(core_s2, DenseMatrix(per_mat), per_row)
            np.testing.assert_array_almost_equal(
                per_s3.cooccurrence_matrix.mat, per_mat_out2)
            self.assert_column_identical(per_s3, core_s2)
            self.assertListEqual(per_s3.id2row, per_row)
            self.assertListEqual(per_s3.operations, core_s2.operations)
            self.assertEqual(len(per_s3.operations), 2)

            np.testing.assert_array_equal(tmp_core_mat,
                                          core_s.cooccurrence_matrix.mat)

            core_s3 = core_s2
            per_s4 = PeripheralSpace(core_s3, DenseMatrix(per_mat), per_row)
            np.testing.assert_array_almost_equal(
                per_s4.cooccurrence_matrix.mat, per_mat_out2)
            self.assert_column_identical(per_s4, core_s2)
            self.assertListEqual(per_s4.id2row, per_row)
            self.assertListEqual(per_s4.operations, core_s3.operations)
            self.assertEqual(len(per_s4.operations), 2)

            np.testing.assert_array_equal(tmp_core_mat,
                                          core_s.cooccurrence_matrix.mat)
Esempio n. 10
0
    def test_train_random(self):
        test_cases = [1.0, 2.0, 3.0]
        rows = 4
        cols = 3
        m1 = np.random.rand(rows, cols)
        m2 = np.random.rand(rows, cols)

        for lambda_ in test_cases:
            m = Dilation(lambda_)
            result_p = m._compose(DenseMatrix(m1), DenseMatrix(m2))

            m = Dilation()
            m._solve(DenseMatrix(m1), DenseMatrix(m2), result_p)
            self.assertAlmostEqual(lambda_, m._lambda)
Esempio n. 11
0
    def setUp(self):
        self.a = np.array([[1,2,3],[4,0,5]])
        self.b = np.array([[0,0,0],[0,0,0]])

        self.c = np.array([[0,0],[0,0],[0,0]])
        self.d = np.array([[1,0],[0,1]])
        self.e = np.array([1,10])
        self.f = np.array([1,10,100])

        self.matrix_a = DenseMatrix(self.a)
        self.matrix_b = DenseMatrix(self.b)

        self.matrix_c = DenseMatrix(self.c)
        self.matrix_d = DenseMatrix(self.d)
Esempio n. 12
0
    def test_dense_svd(self):
        test_cases = self.svd_test_cases

        for x, u_expected, s_expected, v_expected in test_cases:
            for dim in [2, 3, 6]:
                u, s, v = Linalg.svd(DenseMatrix(x), dim)
                np.testing.assert_array_almost_equal(u.mat, u_expected, 2)
                np.testing.assert_array_almost_equal(s, s_expected, 2)
                np.testing.assert_array_almost_equal(v.mat, v_expected, 2)

            u, s, v = Linalg.svd(DenseMatrix(x), 1)
            np.testing.assert_array_almost_equal(u.mat, u_expected[:, 0:1], 2)
            np.testing.assert_array_almost_equal(s, s_expected[0:1], 2)
            np.testing.assert_array_almost_equal(v.mat, v_expected[:, 0:1], 2)
Esempio n. 13
0
    def _export(self, filename):
        if self._mat_a_t is None or self._mat_b_t is None:
            raise IllegalStateError("cannot export an untrained FullAdditive model.")

        with open(filename, "w") as output_stream:
            output_stream.write("A\n")
            output_stream.write(str(DenseMatrix(self._mat_a_t).mat.T))
            output_stream.write("\nB\n")

            if self._has_intercept:
                output_stream.write(str(DenseMatrix(self._mat_b_t[:-1,]).mat.T))
                output_stream.write("\nIntercept\n")
                output_stream.write(str(DenseMatrix(self._mat_b_t[-1,]).mat.T))
            else:
                output_stream.write(str(DenseMatrix(self._mat_b_t).mat.T))
Esempio n. 14
0
    def test_lexical_function(self):

        self.m12 = DenseMatrix(np.mat([[3, 1], [9, 2]]))
        self.m22 = DenseMatrix(np.mat([[4, 3], [2, 1]]))
        self.ph2 = DenseMatrix(np.mat([[18, 11], [24, 7]]))
        self.row = ["a", "b"]
        self.ft = ["f1", "f2"]
        self.space1 = Space(DenseMatrix(self.m12), self.row, self.ft)
        self.space2 = Space(DenseMatrix(self.ph2), ["a_a", "a_b"], self.ft)
        m = LexicalFunction()
        m._MIN_SAMPLES = 1
        self.assertRaises(IllegalStateError, m.export, self.prefix + ".lf1")
        m.train([("a", "b", "a_b"), ("a", "a", "a_a")], self.space1,
                self.space2)
        m.export(self.prefix + ".lf2")
Esempio n. 15
0
    def test_full_additive(self):

        self.m12 = DenseMatrix(np.mat([[3, 1], [9, 2]]))
        self.m22 = DenseMatrix(np.mat([[4, 3], [2, 1]]))
        self.ph2 = DenseMatrix(np.mat([[18, 11], [24, 7]]))
        self.row = ["a", "b"]
        self.ft = ["f1", "f2"]
        self.space1 = Space(DenseMatrix(self.m12), self.row, self.ft)
        self.space2 = Space(DenseMatrix(self.ph2), ["a_a", "a_b"], self.ft)
        m = FullAdditive()
        self.assertRaises(IllegalStateError, m.export, self.prefix + ".full1")
        m.train([("a", "b", "a_b"), ("a", "a", "a_a")], self.space1,
                self.space2)

        m.export(self.prefix + ".full2")
Esempio n. 16
0
    def test_vstack_raises(self):

        space3 = Space(DenseMatrix(self.x[0:2,0:1]), ["e","f"], self.ft1[0:1])
        space4 = Space(DenseMatrix(self.x[0:2,:]), ["a","f"], self.ft1)
        space5 = Space(DenseMatrix(self.x[0:2,:]), ["e","f"], [])
        space6 = Space(DenseMatrix(self.x[0:2,:]), ["e","f"], ["f1","f2","f4"])

        test_cases = [(self.space2, space3),
                      (self.space2, space4),
                      (self.space2, space5),
                      (self.space2, space6)
                      ]

        for space1, space2 in test_cases:
            self.assertRaises(ValueError, space1.vstack, space1, space2)
Esempio n. 17
0
    def project(self, matrix_):
        """
        Projects a dim. reduction operation.

        Args:
            matrix_: matrix on which the reduction is projected, of type Matrix

        Returns:
            the reduced matrix

        Uses the transformation matrix stored in the operation object to project
        the dimensionality reduction method on a new space, peripheral to the
        original one.
        """

        if self.__transmat is None:
            self._raise_projection_error(self.__dim_reduction)

        if self.__dim_reduction.name == "nmf":
            matrix_.assert_positive()

        if not isinstance(matrix_, type(self.__transmat)):
            warn(
                "WARNING: peripheral matrix type (dense/sparse) should be the same as the core space matrix type!!"
            )

        [matrix_, transmat] = resolve_type_conflict([matrix_, self.__transmat],
                                                    type(matrix_))

        result_mat = matrix_ * transmat

        if self.__dim_reduction.name == "nmf":
            result_mat.to_non_negative()

        return DenseMatrix(result_mat)
Esempio n. 18
0
def main():
    """
    Convert temporal referencing matrix to regular (binned) matrix.
    """

    # Get the arguments
    args = docopt(
        """Convert temporal referencing matrix to regular (binned) matrix.

    Usage:
        tr2bin.py (-w | -s) <spacePrefix> <ref> <outPath>

        <spacePrefix> = path to pickled space without suffix
        <ref> = reference string
        <outPath> = output path for result file

    Options:
        -w, --w2v   save in w2v format
        -s, --sps   save in sparse matrix format
        
    """)

    is_w2v = args['--w2v']
    is_sps = args['--sps']
    spacePrefix = args['<spacePrefix>']
    ref = args['<ref>']
    outPath = args['<outPath>']

    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                        level=logging.INFO)
    logging.info(__file__.upper())
    start_time = time.time()

    # Load spaces
    space = load_pkl_files(spacePrefix)
    matrix = space.get_cooccurrence_matrix().get_mat()
    id2row = space.get_id2row()
    id2column = space.get_id2column()

    ti = [(spl[0], i) for i, w in enumerate(id2row) for spl in [w.split('_')]
          if len(spl) == 1 or (len(spl) == 2 and spl[1] == ref)]
    targets, indices = zip(*ti)

    new_matrix = matrix[list(indices), :]

    # Save the Space objects
    if is_w2v:
        new_space = Space(DenseMatrix(new_matrix), list(targets), id2column)
        save_pkl_files(new_space,
                       outPath,
                       save_in_one_file=True,
                       save_as_w2v=True)
    if is_sps:
        new_space = Space(SparseMatrix(new_matrix), list(targets), id2column)
        save_pkl_files(new_space,
                       outPath,
                       save_in_one_file=True,
                       save_as_w2v=False)

    logging.info("--- %s seconds ---" % (time.time() - start_time))
Esempio n. 19
0
    def test_mul_raises(self):
        test_cases = [(self.matrix_a, self.a),
                      (self.matrix_a, DenseMatrix(self.a)),
                      (self.matrix_a, "3")]

        for (term1, term2) in test_cases:
            self.assertRaises(TypeError, term1.__mul__, term2)
Esempio n. 20
0
def main():
    parser = argparse.ArgumentParser(
        description="Converts a vecf file to dissect pkl format.")
    parser.add_argument('--input',
                        '-i',
                        type=argparse.FileType('r'),
                        help='Input file')
    parser.add_argument('--output',
                        '-o',
                        type=argparse.FileType('w'),
                        help='Output file')
    args = parser.parse_args()

    header = args.input.readline().rstrip()
    vocab_s, dims = map(int, header.split(" "))

    vocab = []

    # init matrix
    matrix = np.zeros((vocab_s, dims), dtype=np.float)

    for i, line in enumerate(args.input):
        data = line.split()
        vector = np.array(map(float, data[1:]))
        word = data[0]
        vocab.append(word)
        matrix[i] = vector

    dm = DenseMatrix(matrix)
    sp = Space(dm, vocab, [])
    pickle.dump(sp, args.output)
    args.output.close()
Esempio n. 21
0
def read_mikolov(spacefile):
    header = spacefile.readline().rstrip()
    vocab_s, dims = map(int, header.split(" "))

    vocab = []

    # init matrix
    matrix = np.zeros((vocab_s, dims), dtype=np.float)

    i = 0
    while True:
        line = spacefile.readline()
        if not line:
            break
        sep = line.find(" ")
        if sep == -1:
            raise ValueError(
                "Couldn't find the vocab/data separation character! Space file corruption?"
            )

        word = line[:sep]
        data = line[sep + 1:]
        if len(data) < FLOAT_SIZE * dims + 1:
            data += spacefile.read(FLOAT_SIZE * dims + 1 - len(data))
        data = data[:-1]
        vocab.append(word)
        vector = (struct.unpack("%df" % dims, data))
        matrix[i] = vector
        i += 1

    dm = DenseMatrix(matrix)
    sp = Space(dm, vocab, [])

    return sp
Esempio n. 22
0
def main():
    parser = argparse.ArgumentParser(
        'Converts a VW topic output to a COMPOSES pkl file.')
    parser.add_argument('--input',
                        '-i',
                        type=argparse.FileType('r'),
                        help='Input file')
    parser.add_argument('--docnames',
                        '-d',
                        type=argparse.FileType('r'),
                        help='Docnames file')
    parser.add_argument('--output',
                        '-o',
                        type=argparse.FileType('w'),
                        default=sys.stdout,
                        help='Output file')

    args = parser.parse_args()
    docnames = [l for l in (l.strip() for l in args.docnames) if l]
    matrix = None
    for i, line in enumerate(args.input):
        line = line.strip()
        weights = map(float, line.split(" "))
        if matrix is None:
            matrix = np.zeros((len(docnames), len(weights)), dtype=np.float)
        weights = np.array(weights)
        matrix[i] = weights

    dm = DenseMatrix(matrix)
    sp = Space(dm, docnames, [])
    pickle.dump(sp, args.output)
    args.output.close()
Esempio n. 23
0
    def setUp(self):
        self.a = np.array([[1, 2, 3], [4, 0, 5]])
        self.space_s = Space(SparseMatrix(np.mat(self.a)), ["a", "b"],
                             ["f1", "f2", "f3"])

        self.space_d = Space(DenseMatrix(np.mat(self.a)), ["a", "b"],
                             ["f1", "f2", "f3"])
    def test_nmf(self):
        test_cases = [np.mat([[1,2,3],[2,4,6],[4,17,13]], dtype = np.double),
                      np.mat([[1,0,0]], dtype = np.double)]

        for in_mat in test_cases:
            red = Nmf(2)
            d_mat = DenseMatrix(in_mat)
            #wd_init, hd_init = red.random_init(d_mat)
            wd_init, hd_init = red.v_col_init(d_mat)

            s_mat = SparseMatrix(in_mat)
            ws_init = SparseMatrix(wd_init)
            hs_init = SparseMatrix(hd_init)

            wd_mat, hd_mat = Linalg.nmf(d_mat, wd_init, hd_init)
            ws_mat, hs_mat = Linalg.nmf(s_mat, ws_init, hs_init)

            #TESTED IT AGAINST MATLAB IMPLEMENTATION - ALL GOOD
            #print wd_mat.mat
            #print hd_mat.mat
            #print ws_mat.mat.todense()
            #print hs_mat.mat.todense()
            print "V:", in_mat
            print "WH:", (ws_mat*hs_mat).mat.todense()

            np.testing.assert_array_almost_equal(wd_mat.mat,
                                                 ws_mat.mat.todense(), 2)
            np.testing.assert_array_almost_equal(hd_mat.mat,
                                                 hs_mat.mat.todense(), 2)
Esempio n. 25
0
def to_matrix(matrix_):
    """
    Converts an array-like structure to a DenseMatrix/SparseMatrix
    """
    if issparse(matrix_):
        return SparseMatrix(matrix_)
    else:
        return DenseMatrix(matrix_)
Esempio n. 26
0
def print_cooc_mat_dense_format(matrix_, id2row, file_prefix):
    matrix_file = "%s.%s" % (file_prefix, "dm")

    with open(matrix_file, 'w') as f:
        for i, row in enumerate(id2row):
            v = DenseMatrix(matrix_[i]).mat.flat
            line = "\t".join([row] + [repr(v[j]) for j in range(len(v))])
            f.write("%s\n" % (line))
Esempio n. 27
0
    def test_intercept_lstsq_regression(self):

        a = DenseMatrix(np.matrix([[1, 1], [2, 3], [4, 6]]))
        b = DenseMatrix(np.matrix([[12, 15, 18], [21, 27, 33], [35, 46, 57]]))
        res = DenseMatrix(np.matrix([[1, 2, 3], [4, 5, 6], [7, 8, 9]]))

        res1 = Linalg.lstsq_regression(a, b)
        res2 = Linalg.lstsq_regression(a, b, intercept=True)

        np.testing.assert_array_almost_equal(res2.mat[:-1, :], res[0:2, :].mat,
                                             6)
        np.testing.assert_array_almost_equal(res2.mat[-1, :], res[2:3, :].mat,
                                             6)

        new_a = a.hstack(DenseMatrix(np.ones((a.shape[0], 1))))
        self.assertGreater(((a * res1) - b).norm(),
                           ((new_a * res2) - b).norm())
Esempio n. 28
0
    def test_init_raise(self):
        test_cases = [(DenseMatrix(np.array([[1,2],[3,4],[5,6]])),
                       ["car", "man"], ["feat1", "feat2"],
                       {"man":1, "car":0}, {"feat1":0, "feat2":1}),
                      (DenseMatrix(np.array([[1,2],[3,4]])),
                       [], ["feat1", "feat2"],
                       {"man":1, "car":0}, {"feat1":0, "feat2":1}),
                      (DenseMatrix(np.array([[1,2],[3,4]])),
                       ["car", "man"], ["feat1", "feat2"],
                       {}, {"feat1":0, "feat2":1}),
                      (DenseMatrix(np.array([[1,2],[3,4]])),
                       ["car", "man"], ["feat1"],
                       {"man":1, "car":0}, {"feat1":0, "feat2":1}),
                      (DenseMatrix(np.array([[1,2],[3,4]])),
                       ["car", "man"], ["feat1"],
                       {"man":1, "car":0}, {"feat1":0, "feat2":1}),
                      (DenseMatrix(np.array([[1,2],[3,4]])),
                       ["car", "man"], ["feat1","feat2"],
                       {"man":1, "car":0}, {"feat1":0}),
                      (DenseMatrix(np.array([[1,2],[3,4]])),
                       ["car", "man"], ["feat1","feat2"],
                       {"man":1, "car":0}, {"feat1":1,"feat2":0})
                      ]

        for (m, id2row, id2col, row2id, col2id) in test_cases:
            self.assertRaises(ValueError, Space, m, id2row, id2col,
                              row2id, col2id)
Esempio n. 29
0
    def setUp(self):
        self.m1 = np.array([[1, 2, 3]])
        self.row1 = ["a"]
        self.ft1 = ["f1", "f2", "f3"]
        self.space1 = Space(DenseMatrix(self.m1), self.row1, self.ft1)

        self.m2 = np.array([[4, 2, 6]])
        self.row2 = ["b"]
        self.row3 = ["a", "b", "c"]

        self.x = np.mat([[1, 2, 3], [2, 4, 6], [4, 675, 43]])
        self.us = np.mat([[2.19272110e+00, 3.03174768e+00],
                          [4.38544220e+00, 6.06349536e+00],
                          [6.76369708e+02, -4.91431927e-02]])
        self.us2 = np.mat([[2.19272110e+00], [4.38544220e+00],
                           [6.76369708e+02]])

        self.space2 = Space(DenseMatrix(self.x), self.row3, self.ft1)
Esempio n. 30
0
    def test_dense_lstsq_regression(self):

        test_cases = self.pinv_test_cases
        for m, m_inv in test_cases:
            m1 = DenseMatrix(m)
            id_ = DenseMatrix.identity(m1.shape[0])

            res = Linalg.lstsq_regression(m1, id_)
            np.testing.assert_array_almost_equal(res.mat, m_inv, 7)