def test_nmf_many_padding(eng): data, series, truth = make_gaussian(n=5, noise=0.5, seed=42, engine=eng, withparams=True) algorithm = NMF() model = algorithm.fit(data, chunk_size=(50, 100), padding=(25, 25)) distances = cdist(model.regions.center, truth.regions.center) assert model.regions.count == 10 assert allclose(sum(distances < 10), [2, 1, 2, 4, 1])
def test_merging(eng): data, series, truth = make_gaussian(n=20, seed=42, noise=0.5, withparams=True) algorithm = NMF(k=5, percentile=95, max_iter=50, overlap=0.1) model = algorithm.fit(data, chunk_size=(50,100), padding=(15,15)) assert model.regions.count > 20 assert model.merge(overlap=0.5).regions.count <= 20 assert model.merge(overlap=0.1).regions.count < 18
def test_nmf_one(eng): data, series, truth = make_gaussian(n=1, noise=0.5, seed=42, engine=eng, withparams=True) algorithm = NMF() model = algorithm.fit(data, chunk_size=(100, 200)) assert model.regions.count == 1 assert allclose(model.regions.center, truth.regions.center, 0.1)
def test_nmf_many_chunked(eng): data, series, truth = make_gaussian(n=5, noise=0.5, seed=42, engine=eng, withparams=True) algorithm = NMF() model = algorithm.fit(data, chunk_size=(50, 100)) assert model.regions.count == 5 assert allclose( sum(cdist(model.regions.center, truth.regions.center) < 10), [1, 1, 1, 1, 1])
# generate data from extraction.utils import make_gaussian data = make_gaussian(noise=0.5) # fit a model from extraction import NMF model = NMF().fit(data, chunk_size=(100, 200)) # show estimated sources import matplotlib.pyplot as plt from showit import image image( model.regions.mask((100, 200), fill=None, stroke='deeppink', base=data.mean().toarray() / 2)) plt.show()
def test_nmf_one(eng): data, series, truth = make_gaussian(n=1, noise=0.5, seed=42, engine=eng, withparams=True) algorithm = NMF() model = algorithm.fit(data, block_size=(100,200)) assert model.regions.count == 1 assert allclose(model.regions.center, truth.regions.center, 0.1)
def test_nmf_many(eng): data, series, truth = make_gaussian(n=5, noise=0.5, seed=42, engine=eng, withparams=True) algorithm = NMF() model = algorithm.fit(data, block_size=(100,200)) assert model.regions.count == 5 assert allclose(sum(cdist(model.regions.center, truth.regions.center) < 10), [1, 1, 1, 1,1])
# generate data from pyspark import SparkContext sc = SparkContext() from extraction.utils import make_gaussian data = make_gaussian(engine=sc) # fit a model from extraction import NMF model = NMF().fit(data) # extract sources by transforming data sources = model.transform(data) print model.regions.count