예제 #1
0
def test_io_seqgenerator_w_batches():
    X = [numpy.random.randn(15, 8) for i in range(500)]
    w = numpy.abs(numpy.random.randn(500))

    data = SequenceGenerator(X, w)
    w_ = numpy.concatenate([batch[1] for batch in data.batches()])
    assert_almost_equal(w, w_)
예제 #2
0
def test_io_seqgenerator_wy_labeled():
    X = [numpy.random.randn(15, 8) for i in range(500)]
    y = [numpy.random.randint(6, size=15) for i in range(500)]
    w = numpy.abs(numpy.random.randn(500))

    idx = numpy.random.choice(500, size=100, replace=False)
    for i in idx:
        y[i] = None

    data = SequenceGenerator(X, w, y)
    X_ = numpy.concatenate([batch[0] for batch in data.labeled_batches()])
    w_ = numpy.concatenate([batch[1] for batch in data.labeled_batches()])
    y_ = numpy.concatenate([batch[2] for batch in data.labeled_batches()])

    assert_true(len(X) > len(X_))
    assert_true(len(w) > len(w_))
    assert_true(len(y) > len(y_))

    i = 0
    for j in range(500):
        if y[j] is not None:
            assert_almost_equal(X[j], X_[i])
            assert_almost_equal(w[j], w_[i])
            assert_almost_equal(y[j], y_[i])
            i += 1
예제 #3
0
def test_io_seqgenerator_w_symbol_batches():
    X = [[numpy.random.choice(["A", "C", "G", "T"]) for i in range(18)]
         for i in range(500)]
    w = numpy.abs(numpy.random.randn(500))

    data = SequenceGenerator(X, w)
    w_ = numpy.concatenate([batch[1] for batch in data.batches()])
    assert_almost_equal(w, w_)
예제 #4
0
def test_io_seqgenerator_y_batches():
    X = [numpy.random.randn(15, 8) for i in range(500)]
    y = [numpy.random.randint(6, size=15) for i in range(500)]

    data = SequenceGenerator(X, y=y)
    X_ = numpy.concatenate([batch[0] for batch in data.batches()])
    y_ = numpy.concatenate([batch[2] for batch in data.batches()])
    assert_array_equal(X, X_)
    assert_almost_equal(y, y_)
예제 #5
0
def test_io_seqgenerator_y_symbol_batches():
    X = [[numpy.random.choice(["A", "C", "G", "T"]) for i in range(18)]
         for i in range(500)]
    y = [numpy.random.randint(6, size=15) for i in range(500)]

    data = SequenceGenerator(X, y=y)
    X_ = numpy.concatenate([batch[0] for batch in data.batches()])
    y_ = numpy.concatenate([batch[2] for batch in data.batches()])
    assert_array_equal(X, X_)
    assert_almost_equal(y, y_)
예제 #6
0
def test_io_seqgenerator_x_symbol_batches():
    X = [[numpy.random.choice(["A", "C", "G", "T"]) for i in range(18)]
         for i in range(500)]
    w = numpy.ones(500)

    data = SequenceGenerator(X)
    X_ = numpy.concatenate([batch[0] for batch in data.batches()])
    w_ = numpy.concatenate([batch[1] for batch in data.batches()])
    assert_array_equal(X, X_)
    assert_almost_equal(w, w_)
예제 #7
0
def test_io_seqgenerator_wy_batches():
    X = [numpy.random.randn(15, 8) for i in range(500)]
    y = [numpy.random.randint(6, size=15) for i in range(500)]
    w = numpy.abs(numpy.random.randn(500))

    data = SequenceGenerator(X, w, y)
    X_ = numpy.concatenate([batch[0] for batch in data.batches()])
    w_ = numpy.concatenate([batch[1] for batch in data.batches()])
    y_ = numpy.concatenate([batch[2] for batch in data.batches()])
    assert_almost_equal(X, X_)
    assert_almost_equal(w, w_)
    assert_almost_equal(y, y_)
예제 #8
0
def test_io_seqgenerator_wy_symbol_unlabeled():
    X = [[numpy.random.choice(["A", "C", "G", "T"]) for i in range(18)]
         for i in range(500)]
    y = [numpy.random.randint(6, size=15) for i in range(500)]
    w = numpy.abs(numpy.random.randn(500))

    idx = numpy.random.choice(500, size=100, replace=False)
    for i in idx:
        y[i] = None

    data = SequenceGenerator(X, w, y)
    X_ = numpy.concatenate([batch[0] for batch in data.unlabeled_batches()])
    w_ = numpy.concatenate([batch[1] for batch in data.unlabeled_batches()])

    assert_true(len(X) > len(X_))
    assert_true(len(w) > len(w_))

    i = 0
    for j in range(500):
        if y[j] is None:
            assert_array_equal(X[j], X_[i])
            assert_almost_equal(w[j], w_[i])
            i += 1
예제 #9
0
def test_io_seqgenerator_shape():
    X = [numpy.random.randn(15, 8) for i in range(500)]
    data = SequenceGenerator(X)

    assert_array_equal(data.shape, (len(X), 8))
예제 #10
0
def test_io_seqgenerator_symbol_shape():
    X = [[numpy.random.choice(['A', 'C', 'G', 'T']) for j in range(15)]
         for i in range(500)]
    data = SequenceGenerator(X)

    assert_array_equal(data.shape, (len(X), 1))
예제 #11
0
    def _initDists(self, X, distribution=MultivariateGaussianDistribution):
        technique = "R_MV-GMM"  # mixture of multivariate gaussain distribution
        if (technique == "GMM"):
            # gaussian mixture model
            #// uvgd = NormalDistribution.from_samples(X)
            #// gmm = GeneralMixtureModel([uvgd.copy() for _ in range(self.nmix)])
            gmm = GeneralMixtureModel.from_samples(
                distributions=[NormalDistribution for _ in range(self.nmix)],
                X=X)
            dists = [gmm.copy() for _ in range(self.statesNumber)]
        elif (technique == "MV-GMM"):
            # multivariate gaussian mixture model
            #// mvgd = MultivariateGaussianDistribution.from_samples(X)
            #// gmm = GeneralMixtureModel([mvgd.copy() for _ in range(self.nmix)])
            gmm = GeneralMixtureModel.from_samples(distributions=[
                MultivariateGaussianDistribution for _ in range(self.nmix)
            ],
                                                   X=X,
                                                   n_components=3)
            dists = [gmm.copy() for _ in range(self.statesNumber)]
        elif (technique == "MVG"):
            self._initkmeans(X=X, numClasses=self.statesNumber)
            dists = [
                MultivariateGaussianDistribution.from_samples(X=X[y == i])
                for i in range(self.statesNumber)
            ]
        elif (technique == "R_GMM"):
            # random gaussian mixture model
            randNormal = lambda: NormalDistribution(np.random.randint(1, 10), 1
                                                    )
            randGMM = lambda: GeneralMixtureModel(
                [randNormal() for _ in range(self.nmix)])
            dists = [randGMM() for _ in range(self.statesNumber)]
        elif (technique == "R_MV-GMM"):
            # random multivariate gaussian mixture model
            randGMM = lambda: GeneralMixtureModel(
                [randMVG() for _ in range(self.nmix)])
            dists = [randGMM() for _ in range(self.statesNumber)]
        return dists

        #* not completed:
        #! GMM-HMM-k
        y = self._initkmeans(X, self.statesNumber)
        # list(map(print, y))
        return [
            GeneralMixtureModel.from_samples(distribution,
                                             X=X[y == i],
                                             n_components=self.nmix)
            for i in range(self.statesNumber)
        ]

        #! Kmeans init
        if not isinstance(X, BaseGenerator):
            data_generator = SequenceGenerator(X, None, None)
        else:
            data_generator = X

        initialization_batch_size = len(data_generator)

        X_ = []
        data = data_generator.batches()
        for i in range(initialization_batch_size):
            batch = next(data)
            X_.extend(batch[0])

        X_concat = np.concatenate(X_)
        if X_concat.ndim == 1:
            X_concat = X_concat.reshape(X_concat.shape[0], 1)
        n, d = X_concat.shape
        clf = Kmeans(self.statesNumber, init="kmeans++",
                     n_init=1)  # init should be one of
        clf.fit(X_concat, max_iterations=None, batches_per_epoch=None)
        y = clf.predict(X_concat)
        if callable(distribution):
            if d == 1:
                dists = [
                    distribution.from_samples(X_concat[y == i][:, 0])
                    for i in range(self.statesNumber)
                ]
            elif distribution.blank().d > 1:
                dists = [
                    distribution.from_samples(X_concat[y == i])
                    for i in range(self.statesNumber)
                ]
            else:
                print("error")
        return dists