def test_io_seqgenerator_w_batches(): X = [numpy.random.randn(15, 8) for i in range(500)] w = numpy.abs(numpy.random.randn(500)) data = SequenceGenerator(X, w) w_ = numpy.concatenate([batch[1] for batch in data.batches()]) assert_almost_equal(w, w_)
def test_io_seqgenerator_wy_labeled(): X = [numpy.random.randn(15, 8) for i in range(500)] y = [numpy.random.randint(6, size=15) for i in range(500)] w = numpy.abs(numpy.random.randn(500)) idx = numpy.random.choice(500, size=100, replace=False) for i in idx: y[i] = None data = SequenceGenerator(X, w, y) X_ = numpy.concatenate([batch[0] for batch in data.labeled_batches()]) w_ = numpy.concatenate([batch[1] for batch in data.labeled_batches()]) y_ = numpy.concatenate([batch[2] for batch in data.labeled_batches()]) assert_true(len(X) > len(X_)) assert_true(len(w) > len(w_)) assert_true(len(y) > len(y_)) i = 0 for j in range(500): if y[j] is not None: assert_almost_equal(X[j], X_[i]) assert_almost_equal(w[j], w_[i]) assert_almost_equal(y[j], y_[i]) i += 1
def test_io_seqgenerator_w_symbol_batches(): X = [[numpy.random.choice(["A", "C", "G", "T"]) for i in range(18)] for i in range(500)] w = numpy.abs(numpy.random.randn(500)) data = SequenceGenerator(X, w) w_ = numpy.concatenate([batch[1] for batch in data.batches()]) assert_almost_equal(w, w_)
def test_io_seqgenerator_y_batches(): X = [numpy.random.randn(15, 8) for i in range(500)] y = [numpy.random.randint(6, size=15) for i in range(500)] data = SequenceGenerator(X, y=y) X_ = numpy.concatenate([batch[0] for batch in data.batches()]) y_ = numpy.concatenate([batch[2] for batch in data.batches()]) assert_array_equal(X, X_) assert_almost_equal(y, y_)
def test_io_seqgenerator_y_symbol_batches(): X = [[numpy.random.choice(["A", "C", "G", "T"]) for i in range(18)] for i in range(500)] y = [numpy.random.randint(6, size=15) for i in range(500)] data = SequenceGenerator(X, y=y) X_ = numpy.concatenate([batch[0] for batch in data.batches()]) y_ = numpy.concatenate([batch[2] for batch in data.batches()]) assert_array_equal(X, X_) assert_almost_equal(y, y_)
def test_io_seqgenerator_x_symbol_batches(): X = [[numpy.random.choice(["A", "C", "G", "T"]) for i in range(18)] for i in range(500)] w = numpy.ones(500) data = SequenceGenerator(X) X_ = numpy.concatenate([batch[0] for batch in data.batches()]) w_ = numpy.concatenate([batch[1] for batch in data.batches()]) assert_array_equal(X, X_) assert_almost_equal(w, w_)
def test_io_seqgenerator_wy_batches(): X = [numpy.random.randn(15, 8) for i in range(500)] y = [numpy.random.randint(6, size=15) for i in range(500)] w = numpy.abs(numpy.random.randn(500)) data = SequenceGenerator(X, w, y) X_ = numpy.concatenate([batch[0] for batch in data.batches()]) w_ = numpy.concatenate([batch[1] for batch in data.batches()]) y_ = numpy.concatenate([batch[2] for batch in data.batches()]) assert_almost_equal(X, X_) assert_almost_equal(w, w_) assert_almost_equal(y, y_)
def test_io_seqgenerator_wy_symbol_unlabeled(): X = [[numpy.random.choice(["A", "C", "G", "T"]) for i in range(18)] for i in range(500)] y = [numpy.random.randint(6, size=15) for i in range(500)] w = numpy.abs(numpy.random.randn(500)) idx = numpy.random.choice(500, size=100, replace=False) for i in idx: y[i] = None data = SequenceGenerator(X, w, y) X_ = numpy.concatenate([batch[0] for batch in data.unlabeled_batches()]) w_ = numpy.concatenate([batch[1] for batch in data.unlabeled_batches()]) assert_true(len(X) > len(X_)) assert_true(len(w) > len(w_)) i = 0 for j in range(500): if y[j] is None: assert_array_equal(X[j], X_[i]) assert_almost_equal(w[j], w_[i]) i += 1
def test_io_seqgenerator_shape(): X = [numpy.random.randn(15, 8) for i in range(500)] data = SequenceGenerator(X) assert_array_equal(data.shape, (len(X), 8))
def test_io_seqgenerator_symbol_shape(): X = [[numpy.random.choice(['A', 'C', 'G', 'T']) for j in range(15)] for i in range(500)] data = SequenceGenerator(X) assert_array_equal(data.shape, (len(X), 1))
def _initDists(self, X, distribution=MultivariateGaussianDistribution): technique = "R_MV-GMM" # mixture of multivariate gaussain distribution if (technique == "GMM"): # gaussian mixture model #// uvgd = NormalDistribution.from_samples(X) #// gmm = GeneralMixtureModel([uvgd.copy() for _ in range(self.nmix)]) gmm = GeneralMixtureModel.from_samples( distributions=[NormalDistribution for _ in range(self.nmix)], X=X) dists = [gmm.copy() for _ in range(self.statesNumber)] elif (technique == "MV-GMM"): # multivariate gaussian mixture model #// mvgd = MultivariateGaussianDistribution.from_samples(X) #// gmm = GeneralMixtureModel([mvgd.copy() for _ in range(self.nmix)]) gmm = GeneralMixtureModel.from_samples(distributions=[ MultivariateGaussianDistribution for _ in range(self.nmix) ], X=X, n_components=3) dists = [gmm.copy() for _ in range(self.statesNumber)] elif (technique == "MVG"): self._initkmeans(X=X, numClasses=self.statesNumber) dists = [ MultivariateGaussianDistribution.from_samples(X=X[y == i]) for i in range(self.statesNumber) ] elif (technique == "R_GMM"): # random gaussian mixture model randNormal = lambda: NormalDistribution(np.random.randint(1, 10), 1 ) randGMM = lambda: GeneralMixtureModel( [randNormal() for _ in range(self.nmix)]) dists = [randGMM() for _ in range(self.statesNumber)] elif (technique == "R_MV-GMM"): # random multivariate gaussian mixture model randGMM = lambda: GeneralMixtureModel( [randMVG() for _ in range(self.nmix)]) dists = [randGMM() for _ in range(self.statesNumber)] return dists #* not completed: #! GMM-HMM-k y = self._initkmeans(X, self.statesNumber) # list(map(print, y)) return [ GeneralMixtureModel.from_samples(distribution, X=X[y == i], n_components=self.nmix) for i in range(self.statesNumber) ] #! Kmeans init if not isinstance(X, BaseGenerator): data_generator = SequenceGenerator(X, None, None) else: data_generator = X initialization_batch_size = len(data_generator) X_ = [] data = data_generator.batches() for i in range(initialization_batch_size): batch = next(data) X_.extend(batch[0]) X_concat = np.concatenate(X_) if X_concat.ndim == 1: X_concat = X_concat.reshape(X_concat.shape[0], 1) n, d = X_concat.shape clf = Kmeans(self.statesNumber, init="kmeans++", n_init=1) # init should be one of clf.fit(X_concat, max_iterations=None, batches_per_epoch=None) y = clf.predict(X_concat) if callable(distribution): if d == 1: dists = [ distribution.from_samples(X_concat[y == i][:, 0]) for i in range(self.statesNumber) ] elif distribution.blank().d > 1: dists = [ distribution.from_samples(X_concat[y == i]) for i in range(self.statesNumber) ] else: print("error") return dists