Beispiel #1
0
 def __init__(self, data=np.asarray([[0, 0]]), cls_label=np.asarray([0]),
              ses_label=np.asarray([0]), buff_size=BUFF_SIZE,
              n_components=(K_CLS, K_SES, K_RES), beta=BETA,
              NMF_updates='beta', n_iter=N_ITER, lambdas=[0, 0, 0],
              normalize=False, fixed_factors=None, verbose=0,
              dist_mode='segment',Wn=None):
     self.data_shape = data.shape
     self.buff_size = np.min((buff_size, data.shape[0]))
     self.n_components = np.asarray(n_components, dtype='int32')
     self.beta = theano.shared(np.asarray(beta, theano.config.floatX),
                               name="beta")
     self.verbose = verbose
     self.normalize = normalize
     self.lambdas = np.asarray(lambdas, dtype=theano.config.floatX)
     self.n_iter = n_iter
     self.NMF_updates = NMF_updates
     self.iters = {}
     self.scores = []
     self.dist_mode = dist_mode
     if fixed_factors is None:
         fixed_factors = []
     self.fixed_factors = fixed_factors
     fact_ = np.asarray([base.nnrandn((self.data_shape[1],
                                       np.sum(self.n_components)))
                         for i in more_itertools.unique_everseen(itertools.izip(cls_label,
                                                                                ses_label))])
     self.W = theano.shared(fact_.astype(theano.config.floatX), name="W",
                            borrow=True, allow_downcast=True)
     fact_ = np.asarray(base.nnrandn((self.data_shape[0],
                                      np.sum(self.n_components))))
     self.H = theano.shared(fact_.astype(theano.config.floatX), name="H",
                            borrow=True, allow_downcast=True)
     self.factors_ = [self.H, self.W]
     if Wn is not None:
         self.Wn = Wn
     self.X_buff = theano.shared(np.zeros((self.buff_size,
                                           self.data_shape[1])).astype(theano.config.floatX),
                                 name="X_buff")
     if (self.NMF_updates == 'groupNMF') & (self.dist_mode == 'iter'):
         self.cls_sums = theano.shared(np.zeros((np.max(cls_label)+1,
                                                self.data_shape[1],
                                                self.n_components[0])
                                                ).astype(theano.config.floatX),
                                       name="cls_sums",
                                       borrow=True,
                                       allow_downcast=True)
         self.ses_sums = theano.shared(np.zeros((np.max(ses_label)+1,
                                                self.data_shape[1],
                                                self.n_components[1])
                                                ).astype(theano.config.floatX),
                                       name="ses_sums",
                                       borrow=True,
                                       allow_downcast=True)
         self.get_sum_function()
     self.get_updates_functions()
     self.get_norm_function()
     self.get_div_function()
Beispiel #2
0
    def __init__(self, data_shape, n_components=50, beta=2, n_iter=100,
                 fixed_factors=None, verbose=0,
                 l_sparse=0., sparse_idx=None):
        self.data_shape = data_shape
        self.n_components = n_components
        self.n_components = np.asarray(n_components, dtype='int32')
        self.beta = theano.shared(np.asarray(beta, theano.config.floatX),
                                  name="beta")
        self.verbose = verbose
        self.n_iter = n_iter
        self.scores = []
        if fixed_factors is None:
            fixed_factors = []
        self.fixed_factors = fixed_factors
        fact_ = [base.nnrandn((dim, self.n_components)) for dim in data_shape]
        self.w = theano.shared(fact_[1].astype(theano.config.floatX),
                               name="W", borrow=True, allow_downcast=True)
        self.h = theano.shared(fact_[0].astype(theano.config.floatX),
                               name="H", borrow=True, allow_downcast=True)
        self.factors = [self.h, self.w]
        self.x = theano.shared(
          np.zeros((data_shape)).astype(theano.config.floatX), name="X")
        self.eps = theano.shared(np.asarray(1e-10, theano.config.floatX),
                                  name="eps")

        self.l_sparse = theano.shared(l_sparse, name="l_sparse")
        if self.l_sparse.get_value() > 0:
            if sparse_idx is None:
                self.sparse_idx = None
            else:
                self.sparse_idx = theano.shared(
                    sparse_idx, name="sparse_idx")
        self.get_updates_functions()
        self.get_div_function()
Beispiel #3
0
    def set_factors(self, X, fixed_factors=None):
        """reset factors

        Parameters
        ----------
        X : array
            The input data
        fixed_factors : array  (default Null)
            list of factors that are not updated
                e.g. fixed_factors = [0] -> H is not updated

                fixed_factors = [1] -> W is not updated
        """
        self.data_shape = X.shape
        fact_ = [base.nnrandn((dim, self.n_components))
                 for dim in self.data_shape]
        if fixed_factors is None:
            fixed_factors = []
        if 1 not in fixed_factors:
            self.w.set_value(fact_[1])
        if 0 not in fixed_factors:
            self.h.set_value(fact_[0])
        self.factors = [self.h, self.w]
Beispiel #4
0
 def __init__(self,
              data_shape,
              n_components=50,
              beta=2,
              n_iter=100,
              fixed_factors=None,
              verbose=0,
              cold_start=True):
     self.data_shape = data_shape
     self.n_components = n_components
     self.n_components = np.asarray(n_components, dtype='int32')
     self.beta = theano.shared(np.asarray(beta, theano.config.floatX),
                               name="beta")
     self.verbose = verbose
     self.n_iter = n_iter
     self.scores = []
     self.cold_start = cold_start
     if fixed_factors is None:
         fixed_factors = []
     self.fixed_factors = fixed_factors
     self.eps = theano.shared(np.asarray(1e-10, theano.config.floatX),
                              name="beta")
     fact_ = [base.nnrandn((dim, self.n_components)) for dim in data_shape]
     self.w = theano.shared(fact_[1].astype(theano.config.floatX),
                            name="W",
                            borrow=True,
                            allow_downcast=True)
     self.h = theano.shared(fact_[0].astype(theano.config.floatX),
                            name="H",
                            borrow=True,
                            allow_downcast=True)
     self.factors = [self.h, self.w]
     self.x = theano.shared(np.zeros(
         (data_shape)).astype(theano.config.floatX),
                            name="X")
     self.get_updates_functions()
     self.get_div_function()
Beispiel #5
0
    def set_factors(self, X, fixed_factors=None):
        """reset factors

        Parameters
        ----------
        X : array
            The input data
        fixed_factors : array  (default Null)
            list of factors that are not updated
                e.g. fixed_factors = [0] -> H is not updated

                fixed_factors = [1] -> W is not updated
        """
        self.data_shape = X.shape
        fact_ = [
            base.nnrandn((dim, self.n_components)) for dim in self.data_shape
        ]
        if fixed_factors is None:
            fixed_factors = []
        if 1 not in fixed_factors:
            self.w.set_value(fact_[1])
        if 0 not in fixed_factors:
            self.h.set_value(fact_[0])
        self.factors = [self.h, self.w]
Beispiel #6
0
    def set_factors(self, data, W=None, H=None, fixed_factors=None):
        """Re-set theano based parameters according to the object attributes.

        Parameters
        ----------
        W : array (optionnal)
            Value for factor W when custom initialisation is used

        H : array (optionnal)
            Value for factor H when custom initialisation is used

        fixed_factors : array  (default Null)
            list of factors that are not updated
                e.g. fixed_factors = [0] -> H is not updated

                fixed_factors = [1] -> W is not updated
        """
        self.data_shape = data.shape
        self.nb_batch = int(
            np.ceil(np.true_divide(self.data_shape[0], self.batch_size)))
        self.batch_ind = np.zeros((self.nb_batch, self.batch_size))

        if self.cache1_size > 0 and self.cache1_size < self.data_shape[0]:
            if self.cache1_size < self.batch_size:
                raise ValueError('cache1_size should be at '
                                 'least equal to batch_size')
            self.cache1_size = self.cache1_size / self.batch_size * self.batch_size
            self.nb_cache1 = int(
                np.ceil(np.true_divide(self.data_shape[0], self.cache1_size)))
        else:
            self.cache1_size = self.data_shape[0]
            self.nb_cache1 = 1

        self.forget_factor = 1. / (self.sag_memory + 1)
        fact_ = [
            base.nnrandn((dim, self.n_components)) for dim in self.data_shape
        ]
        if H is not None:
            fact_[0] = H
        if W is not None:
            fact_[1] = W
        if fixed_factors is None:
            fixed_factors = []
        if 1 not in fixed_factors:
            self.w = theano.shared(fact_[1].astype(theano.config.floatX),
                                   name="W",
                                   borrow=True,
                                   allow_downcast=True)
        if 0 not in fixed_factors:
            self.h_cache1 = theano.shared(fact_[0][:self.cache1_size, ].astype(
                theano.config.floatX),
                                          name="H cache1",
                                          borrow=True,
                                          allow_downcast=True)
            self.factors_[0] = fact_[0]
        self.factors_ = fact_
        self.x_cache1 = theano.shared(np.zeros(
            (self.cache1_size,
             self.data_shape[1])).astype(theano.config.floatX),
                                      name="X cache1")
        self.init()
Beispiel #7
0
    def __init__(self,
                 data_shape,
                 n_components=50,
                 beta=2,
                 n_iter=50,
                 fixed_factors=None,
                 cache1_size=0,
                 batch_size=100,
                 verbose=0,
                 init_mode='random',
                 W=None,
                 H=None,
                 solver='mu_batch',
                 nb_batch_w=1,
                 sag_memory=0):
        self.data_shape = data_shape
        self.n_components = n_components
        self.batch_size = batch_size
        self.nb_batch = int(
            np.ceil(np.true_divide(data_shape[0], self.batch_size)))
        self.batch_ind = np.zeros((self.nb_batch, self.batch_size))

        if cache1_size > 0:
            cache1_size = min((cache1_size, data_shape[0]))
            if cache1_size < self.batch_size:
                raise ValueError('cache1_size should be at '
                                 'least equal to batch_size')
            self.cache1_size = cache1_size / self.batch_size * self.batch_size
            self.nb_cache1 = int(
                np.ceil(np.true_divide(self.data_shape[0], self.cache1_size)))
        else:
            self.cache1_size = data_shape[0]
            self.nb_cache1 = 1

        self.n_components = np.asarray(n_components, dtype='int32')
        self.beta = theano.shared(np.asarray(beta, theano.config.floatX),
                                  name="beta")
        self.eps = theano.shared(np.asarray(1e-10, theano.config.floatX),
                                 name="eps")
        self.sag_memory = sag_memory
        self.forget_factor = 1. / (self.sag_memory + 1)
        self.verbose = verbose
        self.n_iter = n_iter
        self.solver = solver
        self.scores = []
        self.nb_batch_w = nb_batch_w
        if fixed_factors is None:
            fixed_factors = []
        self.fixed_factors = fixed_factors
        fact_ = [base.nnrandn((dim, self.n_components)) for dim in data_shape]
        self.init_mode = init_mode
        if self.init_mode == 'custom':
            fact_[0] = H
            fact_[1] = W
        self.w = theano.shared(fact_[1].astype(theano.config.floatX),
                               name="W",
                               borrow=True,
                               allow_downcast=True)
        self.h_cache1 = theano.shared(fact_[0][:self.cache1_size, ].astype(
            theano.config.floatX),
                                      name="H cache1",
                                      borrow=True,
                                      allow_downcast=True)
        self.factors_ = fact_
        self.x_cache1 = theano.shared(np.zeros(
            (self.cache1_size, data_shape[1])).astype(theano.config.floatX),
                                      name="X cache1")
        self.init()
Beispiel #8
0
    def transform(self,
                  X,
                  comp=[0, 1],
                  n_iter=None,
                  buff_size=None,
                  fname='prediction.h5',
                  dataset='',
                  average_comp=False,
                  average_act=False,
                  seg_length=625,
                  l_sparse=0,
                  sparse_idx=None):
        if n_iter == None:
            n_iter = self.n_iter
        if buff_size == None:
            buff_size = self.buff_size
        if average_comp:
            W = self.average_and_select(comp)
        else:
            W = self.select(comp)
        buff_size = buff_size/W.shape[1]
        print buff_size
        f = h5py.File(fname)
        if average_act:
            H_out = f.create_dataset("H_{0}".format(dataset),
                                     (X.shape[0]/seg_length, W.shape[1]))
            buff_size = int(np.floor(buff_size/seg_length)*seg_length)
            out_size = buff_size/seg_length
        else:
            H_out = f.create_dataset("H_{0}".format(dataset),
                                     (X.shape[0], W.shape[1]))
        nmf_pred = beta_nmf.BetaNMF((buff_size, X.shape[1]),
                                    n_components=W.shape[1],
                                    beta=self.beta.get_value(),
                                    n_iter=n_iter,
                                    fixed_factors=[1],
                                    buff_size=buff_size,
                                    verbose=self.verbose,
                                    l_sparse=l_sparse,
                                    sparse_idx=sparse_idx)
        nmf_pred.w.set_value(W.astype(theano.config.floatX))
        i = -1
        for i in range(X.shape[0]/buff_size):
            nmf_pred.data_shape = X[i*buff_size:(i+1)*buff_size, ].shape
            print "Bloc: {0}, size {1}".format(i, nmf_pred.data_shape)
            nmf_pred.h.set_value(base.nnrandn((buff_size,
                                               nmf_pred.n_components)).astype(theano.config.floatX))
            nmf_pred.fit(X[i*buff_size:(i+1)*buff_size, ])
            if average_act:
                H_out[i*out_size:(i+1)*out_size, ] = np.mean(np.reshape(nmf_pred.h.get_value(),
                                                                        (out_size,
                                                                         seg_length,
                                                                         nmf_pred.h.get_value()\
                                                                         .shape[1])),
                                                             axis=1)
            else:
                H_out[i*buff_size:(i+1)*buff_size, ] = nmf_pred.h.get_value()

        nmf_pred.data_shape = X[(i+1)*buff_size:, ].shape
        print i+1, nmf_pred.data_shape
        nmf_pred.h.set_value(base.nnrandn((nmf_pred.data_shape[0],
                                           nmf_pred.n_components)).astype(theano.config.floatX))
        nmf_pred.fit(X[(i+1)*buff_size:, ])
        if average_act:
            H_out[(i+1)*out_size:, ] = np.mean(np.reshape(nmf_pred.h.get_value(),
                                                          (H_out.shape[0]-(i+1)*out_size,
                                                           seg_length,
                                                           nmf_pred.h.get_value().shape[1])),
                                               axis=1)
        else:
            H_out[(i+1)*buff_size:, ] = nmf_pred.h.get_value()
        f.close()
from beta_nmf_minibatch import BetaNMF
from base import nnrandn
import numpy as np

from bokeh.plotting import figure, show
from bokeh.io import output_file

n_iter = 100
n_components = 10
beta = 2
batch_size = 100
cache1_size = 500
sag_mem = 2
X = nnrandn((500, 20))
H_init = nnrandn((X.shape[0], n_components))
W_init = nnrandn((X.shape[1], n_components))

nmf = BetaNMF(X.shape,
              n_components,
              beta,
              n_iter,
              verbose=10,
              cache1_size=cache1_size,
              batch_size=batch_size,
              W=W_init,
              H=H_init,
              init_mode='custom',
              solver='mu_batch')
score_mu_batch = nmf.fit(X)

nmf = BetaNMF(X.shape,