def __init__(self, data=np.asarray([[0, 0]]), cls_label=np.asarray([0]), ses_label=np.asarray([0]), buff_size=BUFF_SIZE, n_components=(K_CLS, K_SES, K_RES), beta=BETA, NMF_updates='beta', n_iter=N_ITER, lambdas=[0, 0, 0], normalize=False, fixed_factors=None, verbose=0, dist_mode='segment',Wn=None): self.data_shape = data.shape self.buff_size = np.min((buff_size, data.shape[0])) self.n_components = np.asarray(n_components, dtype='int32') self.beta = theano.shared(np.asarray(beta, theano.config.floatX), name="beta") self.verbose = verbose self.normalize = normalize self.lambdas = np.asarray(lambdas, dtype=theano.config.floatX) self.n_iter = n_iter self.NMF_updates = NMF_updates self.iters = {} self.scores = [] self.dist_mode = dist_mode if fixed_factors is None: fixed_factors = [] self.fixed_factors = fixed_factors fact_ = np.asarray([base.nnrandn((self.data_shape[1], np.sum(self.n_components))) for i in more_itertools.unique_everseen(itertools.izip(cls_label, ses_label))]) self.W = theano.shared(fact_.astype(theano.config.floatX), name="W", borrow=True, allow_downcast=True) fact_ = np.asarray(base.nnrandn((self.data_shape[0], np.sum(self.n_components)))) self.H = theano.shared(fact_.astype(theano.config.floatX), name="H", borrow=True, allow_downcast=True) self.factors_ = [self.H, self.W] if Wn is not None: self.Wn = Wn self.X_buff = theano.shared(np.zeros((self.buff_size, self.data_shape[1])).astype(theano.config.floatX), name="X_buff") if (self.NMF_updates == 'groupNMF') & (self.dist_mode == 'iter'): self.cls_sums = theano.shared(np.zeros((np.max(cls_label)+1, self.data_shape[1], self.n_components[0]) ).astype(theano.config.floatX), name="cls_sums", borrow=True, allow_downcast=True) self.ses_sums = theano.shared(np.zeros((np.max(ses_label)+1, self.data_shape[1], self.n_components[1]) ).astype(theano.config.floatX), name="ses_sums", borrow=True, allow_downcast=True) self.get_sum_function() self.get_updates_functions() self.get_norm_function() self.get_div_function()
def __init__(self, data_shape, n_components=50, beta=2, n_iter=100, fixed_factors=None, verbose=0, l_sparse=0., sparse_idx=None): self.data_shape = data_shape self.n_components = n_components self.n_components = np.asarray(n_components, dtype='int32') self.beta = theano.shared(np.asarray(beta, theano.config.floatX), name="beta") self.verbose = verbose self.n_iter = n_iter self.scores = [] if fixed_factors is None: fixed_factors = [] self.fixed_factors = fixed_factors fact_ = [base.nnrandn((dim, self.n_components)) for dim in data_shape] self.w = theano.shared(fact_[1].astype(theano.config.floatX), name="W", borrow=True, allow_downcast=True) self.h = theano.shared(fact_[0].astype(theano.config.floatX), name="H", borrow=True, allow_downcast=True) self.factors = [self.h, self.w] self.x = theano.shared( np.zeros((data_shape)).astype(theano.config.floatX), name="X") self.eps = theano.shared(np.asarray(1e-10, theano.config.floatX), name="eps") self.l_sparse = theano.shared(l_sparse, name="l_sparse") if self.l_sparse.get_value() > 0: if sparse_idx is None: self.sparse_idx = None else: self.sparse_idx = theano.shared( sparse_idx, name="sparse_idx") self.get_updates_functions() self.get_div_function()
def set_factors(self, X, fixed_factors=None): """reset factors Parameters ---------- X : array The input data fixed_factors : array (default Null) list of factors that are not updated e.g. fixed_factors = [0] -> H is not updated fixed_factors = [1] -> W is not updated """ self.data_shape = X.shape fact_ = [base.nnrandn((dim, self.n_components)) for dim in self.data_shape] if fixed_factors is None: fixed_factors = [] if 1 not in fixed_factors: self.w.set_value(fact_[1]) if 0 not in fixed_factors: self.h.set_value(fact_[0]) self.factors = [self.h, self.w]
def __init__(self, data_shape, n_components=50, beta=2, n_iter=100, fixed_factors=None, verbose=0, cold_start=True): self.data_shape = data_shape self.n_components = n_components self.n_components = np.asarray(n_components, dtype='int32') self.beta = theano.shared(np.asarray(beta, theano.config.floatX), name="beta") self.verbose = verbose self.n_iter = n_iter self.scores = [] self.cold_start = cold_start if fixed_factors is None: fixed_factors = [] self.fixed_factors = fixed_factors self.eps = theano.shared(np.asarray(1e-10, theano.config.floatX), name="beta") fact_ = [base.nnrandn((dim, self.n_components)) for dim in data_shape] self.w = theano.shared(fact_[1].astype(theano.config.floatX), name="W", borrow=True, allow_downcast=True) self.h = theano.shared(fact_[0].astype(theano.config.floatX), name="H", borrow=True, allow_downcast=True) self.factors = [self.h, self.w] self.x = theano.shared(np.zeros( (data_shape)).astype(theano.config.floatX), name="X") self.get_updates_functions() self.get_div_function()
def set_factors(self, X, fixed_factors=None): """reset factors Parameters ---------- X : array The input data fixed_factors : array (default Null) list of factors that are not updated e.g. fixed_factors = [0] -> H is not updated fixed_factors = [1] -> W is not updated """ self.data_shape = X.shape fact_ = [ base.nnrandn((dim, self.n_components)) for dim in self.data_shape ] if fixed_factors is None: fixed_factors = [] if 1 not in fixed_factors: self.w.set_value(fact_[1]) if 0 not in fixed_factors: self.h.set_value(fact_[0]) self.factors = [self.h, self.w]
def set_factors(self, data, W=None, H=None, fixed_factors=None): """Re-set theano based parameters according to the object attributes. Parameters ---------- W : array (optionnal) Value for factor W when custom initialisation is used H : array (optionnal) Value for factor H when custom initialisation is used fixed_factors : array (default Null) list of factors that are not updated e.g. fixed_factors = [0] -> H is not updated fixed_factors = [1] -> W is not updated """ self.data_shape = data.shape self.nb_batch = int( np.ceil(np.true_divide(self.data_shape[0], self.batch_size))) self.batch_ind = np.zeros((self.nb_batch, self.batch_size)) if self.cache1_size > 0 and self.cache1_size < self.data_shape[0]: if self.cache1_size < self.batch_size: raise ValueError('cache1_size should be at ' 'least equal to batch_size') self.cache1_size = self.cache1_size / self.batch_size * self.batch_size self.nb_cache1 = int( np.ceil(np.true_divide(self.data_shape[0], self.cache1_size))) else: self.cache1_size = self.data_shape[0] self.nb_cache1 = 1 self.forget_factor = 1. / (self.sag_memory + 1) fact_ = [ base.nnrandn((dim, self.n_components)) for dim in self.data_shape ] if H is not None: fact_[0] = H if W is not None: fact_[1] = W if fixed_factors is None: fixed_factors = [] if 1 not in fixed_factors: self.w = theano.shared(fact_[1].astype(theano.config.floatX), name="W", borrow=True, allow_downcast=True) if 0 not in fixed_factors: self.h_cache1 = theano.shared(fact_[0][:self.cache1_size, ].astype( theano.config.floatX), name="H cache1", borrow=True, allow_downcast=True) self.factors_[0] = fact_[0] self.factors_ = fact_ self.x_cache1 = theano.shared(np.zeros( (self.cache1_size, self.data_shape[1])).astype(theano.config.floatX), name="X cache1") self.init()
def __init__(self, data_shape, n_components=50, beta=2, n_iter=50, fixed_factors=None, cache1_size=0, batch_size=100, verbose=0, init_mode='random', W=None, H=None, solver='mu_batch', nb_batch_w=1, sag_memory=0): self.data_shape = data_shape self.n_components = n_components self.batch_size = batch_size self.nb_batch = int( np.ceil(np.true_divide(data_shape[0], self.batch_size))) self.batch_ind = np.zeros((self.nb_batch, self.batch_size)) if cache1_size > 0: cache1_size = min((cache1_size, data_shape[0])) if cache1_size < self.batch_size: raise ValueError('cache1_size should be at ' 'least equal to batch_size') self.cache1_size = cache1_size / self.batch_size * self.batch_size self.nb_cache1 = int( np.ceil(np.true_divide(self.data_shape[0], self.cache1_size))) else: self.cache1_size = data_shape[0] self.nb_cache1 = 1 self.n_components = np.asarray(n_components, dtype='int32') self.beta = theano.shared(np.asarray(beta, theano.config.floatX), name="beta") self.eps = theano.shared(np.asarray(1e-10, theano.config.floatX), name="eps") self.sag_memory = sag_memory self.forget_factor = 1. / (self.sag_memory + 1) self.verbose = verbose self.n_iter = n_iter self.solver = solver self.scores = [] self.nb_batch_w = nb_batch_w if fixed_factors is None: fixed_factors = [] self.fixed_factors = fixed_factors fact_ = [base.nnrandn((dim, self.n_components)) for dim in data_shape] self.init_mode = init_mode if self.init_mode == 'custom': fact_[0] = H fact_[1] = W self.w = theano.shared(fact_[1].astype(theano.config.floatX), name="W", borrow=True, allow_downcast=True) self.h_cache1 = theano.shared(fact_[0][:self.cache1_size, ].astype( theano.config.floatX), name="H cache1", borrow=True, allow_downcast=True) self.factors_ = fact_ self.x_cache1 = theano.shared(np.zeros( (self.cache1_size, data_shape[1])).astype(theano.config.floatX), name="X cache1") self.init()
def transform(self, X, comp=[0, 1], n_iter=None, buff_size=None, fname='prediction.h5', dataset='', average_comp=False, average_act=False, seg_length=625, l_sparse=0, sparse_idx=None): if n_iter == None: n_iter = self.n_iter if buff_size == None: buff_size = self.buff_size if average_comp: W = self.average_and_select(comp) else: W = self.select(comp) buff_size = buff_size/W.shape[1] print buff_size f = h5py.File(fname) if average_act: H_out = f.create_dataset("H_{0}".format(dataset), (X.shape[0]/seg_length, W.shape[1])) buff_size = int(np.floor(buff_size/seg_length)*seg_length) out_size = buff_size/seg_length else: H_out = f.create_dataset("H_{0}".format(dataset), (X.shape[0], W.shape[1])) nmf_pred = beta_nmf.BetaNMF((buff_size, X.shape[1]), n_components=W.shape[1], beta=self.beta.get_value(), n_iter=n_iter, fixed_factors=[1], buff_size=buff_size, verbose=self.verbose, l_sparse=l_sparse, sparse_idx=sparse_idx) nmf_pred.w.set_value(W.astype(theano.config.floatX)) i = -1 for i in range(X.shape[0]/buff_size): nmf_pred.data_shape = X[i*buff_size:(i+1)*buff_size, ].shape print "Bloc: {0}, size {1}".format(i, nmf_pred.data_shape) nmf_pred.h.set_value(base.nnrandn((buff_size, nmf_pred.n_components)).astype(theano.config.floatX)) nmf_pred.fit(X[i*buff_size:(i+1)*buff_size, ]) if average_act: H_out[i*out_size:(i+1)*out_size, ] = np.mean(np.reshape(nmf_pred.h.get_value(), (out_size, seg_length, nmf_pred.h.get_value()\ .shape[1])), axis=1) else: H_out[i*buff_size:(i+1)*buff_size, ] = nmf_pred.h.get_value() nmf_pred.data_shape = X[(i+1)*buff_size:, ].shape print i+1, nmf_pred.data_shape nmf_pred.h.set_value(base.nnrandn((nmf_pred.data_shape[0], nmf_pred.n_components)).astype(theano.config.floatX)) nmf_pred.fit(X[(i+1)*buff_size:, ]) if average_act: H_out[(i+1)*out_size:, ] = np.mean(np.reshape(nmf_pred.h.get_value(), (H_out.shape[0]-(i+1)*out_size, seg_length, nmf_pred.h.get_value().shape[1])), axis=1) else: H_out[(i+1)*buff_size:, ] = nmf_pred.h.get_value() f.close()
from beta_nmf_minibatch import BetaNMF from base import nnrandn import numpy as np from bokeh.plotting import figure, show from bokeh.io import output_file n_iter = 100 n_components = 10 beta = 2 batch_size = 100 cache1_size = 500 sag_mem = 2 X = nnrandn((500, 20)) H_init = nnrandn((X.shape[0], n_components)) W_init = nnrandn((X.shape[1], n_components)) nmf = BetaNMF(X.shape, n_components, beta, n_iter, verbose=10, cache1_size=cache1_size, batch_size=batch_size, W=W_init, H=H_init, init_mode='custom', solver='mu_batch') score_mu_batch = nmf.fit(X) nmf = BetaNMF(X.shape,