def __init__(self, w_spams_func, u_spams_func, **other_params):
        super(BatchBivariateLearner, self).__init__()

        self.allParams = other_params
        self.initDefaults()

        self.elementsSeen = 0
        self.X = None
        self.w = None
        self.u = None
        self.w_bias = None
        self.u_bias = None
        self.bias = None
        self.change_eval = BiMeanSquareEval(self)
        self.part_eval = RootMeanEval()
        self.w_func = w_spams_func
        self.u_func = u_spams_func
	def __init__(self, w_spams_func, u_spams_func, **other_params):
		super(BatchBivariateLearner, self).__init__()
		
		self.allParams = other_params
		self.initDefaults()

		self.elementsSeen = 0
		self.X = None
		self.w = None
		self.u = None
		self.w_bias = None
		self.u_bias = None
		self.bias = None
		self.change_eval = BiMeanSquareEval(self)
		self.part_eval = RootMeanEval()
		self.w_func = w_spams_func
		self.u_func = u_spams_func
class BatchBivariateLearner(OnlineLearner):
	"""
	For every X,Y pair, add to an existing set of X,Y and relearn the model 
	from scratch

	All data recieved is collected and recompiled into a new numpy vector
	every time. This gives best conceivable result for a linear system 
	given this optimisation scheme
	"""
	def __init__(self, w_spams_func, u_spams_func, **other_params):
		super(BatchBivariateLearner, self).__init__()
		
		self.allParams = other_params
		self.initDefaults()

		self.elementsSeen = 0
		self.X = None
		self.w = None
		self.u = None
		self.w_bias = None
		self.u_bias = None
		self.bias = None
		self.change_eval = BiMeanSquareEval(self)
		self.part_eval = RootMeanEval()
		self.w_func = w_spams_func
		self.u_func = u_spams_func
	
	def initDefaults(self):
		self.allParams["bivar_it0"] = self.allParams.get("bivar_it0",3)
		self.allParams["bivar_tol"] = self.allParams.get("bivar_tol",1e-3)
		self.allParams["bivar_max_it"] = self.allParams.get("bivar_max_it",10)

	def predict(self,X):
		pass

	"""
	This function is just a combination of Calling
	setYX,
	then iterating through bivar_max_it iterations of calling
	calculateU and calculateW
	"""
	def process(self,Y,X=None,Xt=None,tests=None):
		self.setYX(Y,X,Xt)
		bivariter = 0
		sumSSE = 0
		esiter = list()
		es.state()["iterations"] = esiter
		# in the first iteration we calculate W by using ones on U
		U = ssp.csc_matrix(ones(self.u.shape))
		while True:
			esiterdict = dict()
			esiterdict["i"] = bivariter
			logger.debug("Starting iteration: %d"%bivariter)
			bivariter += 1
			W,w_bias,err = self.calculateW(U,tests=tests)
			esiterdict["w"] = W
			esiterdict["w_sparcity"] = (abs(W) > 0).sum()
			esiterdict["w_bias"] = w_bias
			esiterdict["w_test_err"] = err
			if "test" in err: logger.debug("W sparcity=%d,test_total_err=%2.2f,test_err=%s"%(esiterdict["w_sparcity"],err['test']["totalsse"],str(err['test']["diffsse"])))
			W = ssp.csc_matrix(W)
			U,u_bias,err = self.calculateU(W,tests=tests)
			esiterdict["u"] = U
			esiterdict["u_sparcity"] = (abs(U) > 0).sum()
			esiterdict["u_bias"] = u_bias
			esiterdict["u_test_err"] = err
			if "test" in err: logger.debug("U sparcity=%d,test_total_err=%2.2f,test_err=%s"%(esiterdict["u_sparcity"],err['test']["totalsse"],str(err['test']["diffsse"])))
			U = ssp.csc_matrix(U)
			self.u = U
			self.w = W
			self.w_bias = w_bias
			self.u_bias = u_bias
			esiter += [esiterdict]
			if self.allParams['bivar_max_it'] <= bivariter:
				break
		return sumSSE

	def optimise_lambda(self, lambda_w, lambda_u, Yparts, Xparts,w_lambda=None,u_lambda=None):
		logger.debug("... expanding Yparts")
		Yparts = Yparts.apply(BatchBivariateLearner._expandY)

		ls = LambdaSearch(self.part_eval)
		ntasks = Yparts.train_all.shape[1]
		ndays = Yparts.train_all.shape[0]/ntasks
		nusers = Xparts.train_all.shape[1]/ndays

		u = ssp.csc_matrix(ones((nusers,ntasks)))
		logger.debug("... Preparing VPrime")
		Vprime_parts = Xparts.apply(
			BatchBivariateLearner._calculateVprime,u
		)
		if w_lambda is None:
			logger.debug("... Optimising lambda for w")
			ls.optimise(self.w_func,lambda_w,Vprime_parts,Yparts,name="w")
		else:
			logger.debug("... Setting hardcoded w: %2.2f"%w_lambda)
			self.w_func.params['lambda1'] = w_lambda

		logger.debug("... Calculating w with optimal lambda")
		w,bias = self.w_func.call(Vprime_parts.train_all,Yparts.train_all)
		w = ssp.csc_matrix(w)
		logger.debug("... Preparing Dprime")
		Dprime_parts = Xparts.apply(
			BatchBivariateLearner._calculateDprime,w,u.shape
		)
		if u_lambda is None:
			logger.debug("... Optimising lambda for u")
			ls.optimise(self.u_func, lambda_u, Dprime_parts, Yparts,name="u")
		else:
			logger.debug("... Setting hardcoded w: %2.2f"%u_lambda)
			self.u_func.params['lambda1'] = u_lambda
		return [(u,self.u_func.params['lambda1']),(w,self.w_func.params['lambda1'])]

	
	"""
	The number of tasks is the columns of Y
	The number of days is the rows of Y
	The number of users is the columns of X (or the rows of Xt) over the number of days
		Put another way the columns of X contain users batched by days
	The number of words is the rows of X (or the columns of Xt) 
	"""
	def setYX(self,Y,X=None,Xt=None):
		X,Xt = BatchBivariateLearner._initX(X,Xt)
		Y = np.asfortranarray(Y)

		self.X = X
		self.Xt = Xt

		
		self.nusers = X.shape[1]/Y.shape[0]
		self.nwords = X.shape[0]
		self.ntasks = Y.shape[1]
		self.ndays = Y.shape[0]
		self.Yexpanded = self._expandY(Y)

		logger.debug("(ndays=%d,ntasks=%d,nusers=%d,nwords=%d)"%(
			self.ndays,self.ntasks,self.nusers,self.nwords)
		)
		self.u = ssp.csc_matrix(zeros((self.nusers,self.ntasks)))
		self.w = ssp.csc_matrix(zeros((self.nwords,self.ntasks)))

	def calculateW(self,U=None,tests=None):
		if U is None: U = self.u
		Vprime = BatchBivariateLearner._calculateVprime(self.X,U)
		logger.debug("Calling w_func: %s"%self.w_func)
		W,w_bias = self.w_func.call(Vprime,self.Yexpanded)
		err = self.part_eval.evaluate(Vprime,self.Yexpanded,W,w_bias)
		testerr = {"train_all":err}
		if tests is not None:
			for testName,(testX,testY) in tests.items():
				testerr[testName] = self.part_eval.evaluate(
					self._calculateVprime(testX,U),
					self._expandY(testY),
					W,w_bias
				)
		return W,w_bias,testerr
	
	def calculateU(self,W=None,tests=None):
		if W is None: W = self.w
		Dprime = BatchBivariateLearner._calculateDprime(self.X,W,self.u.shape)
		logger.debug("Calling u_func: %s"%self.u_func)
		U,u_bias = self.u_func.call(Dprime,self.Yexpanded)
		err = self.part_eval.evaluate(Dprime,self.Yexpanded,U,u_bias)
		testerr = {"train_all":err}
		if tests is not None:
			for testName,(testX,testY) in tests.items():
				testerr[testName] = self.part_eval.evaluate(
					self._calculateDprime(testX,W,self.u.shape),
					self._expandY(testY),
					U,u_bias
				)
		return U,u_bias,testerr

	@classmethod
	def _expandY(cls,Y):
		"""
		We expand Y s.t. the values of Y for each task t 
		are held in the diagonals of a t x t matrix whose 
		other values are NaN
		"""
		Yexpanded = ones(
			(
				multiply(*Y.shape),
				Y.shape[1]
			)
		) * nan
		for x in range(Y.shape[1]): 
			ind = x * Y.shape[0]; 
			indnext = (x+1) *Y.shape[0]; 
			Yexpanded[ind:indnext,x] = Y[:,x];
		
		return np.asfortranarray(Yexpanded)
	@classmethod
	def _initX(self,X=None,Xt=None):
		if X is None and Xt is None:
			raise Exception("At least one of X or Xt must be provided")
		if Xt is None:
			Xt = ssp.csc_matrix(X.transpose())
		if X is None:
			X = ssp.csc_matrix(Xt.transpose())
		if not ssp.issparse(X) or not ssp.issparse(Xt):
			raise Exception("X or Xt provided is not sparse, failing")
		return X,Xt


	@classmethod
	def _cols_for_day(cls,d,nusers):
		return slice(d*nusers,(d+1)*nusers)
	
	@classmethod
	def _rows_for_day(cls,d,ntasks):
		return slice(d*ntasks,(d+1)*ntasks)

	@classmethod
	def _user_day_slice(cls,nusers):
		def exp_slice_func(dp,dir):
			parts = []
			for d in dp: 
				dslc = BatchBivariateLearner._cols_for_day(d,nusers)
				drng = range(dslc.start,dslc.stop)
				parts += [x for x in drng]
			if dir is "row":
				return (parts,slice(None,None))
			else:
				return (slice(None,None),parts)
		return exp_slice_func

	"""
	Expects an X such that users are held in the columns
	and a U which weights each user for each task
	"""
	@classmethod
	def _calculateVprime(cls, X, U):
		# logger.debug("Preparing Vprime (X . U)")
		nu = U.shape[0]
		ndays = X.shape[1]/nu
		# stack in the columns the (word,days) matricies for each task
		# so the dimensions are (word,days*tasks).
		# we then transpose such that the days*tasks are in the columns
		# and the words in the rows resulting in (days*tasks,word)
		return ssp.hstack([
			# For every day, extract the day's sub matrix of user/word weights
			# weight each user's words by the user's weight
			# ends up with a (words,days) matrix (csr)
			ssp.hstack([
				X[:,cls._cols_for_day(d,nu)].dot(U[:,t:t+1]) 
				for d in range(ndays)
			],format="csr")
			for t in range(U.shape[1])
		],format="csr").transpose()

	"""
	Expects an X such that users are held in the columns
	and a W which weights each word for each task
	"""
	@classmethod
	def _calculateDprime(cls, X, W, Ushape):
		# logger.debug("Preparing Dprime (X . W)")
		nu = Ushape[0]
		ndays = X.shape[1]/nu
		# stack in the columns the (days,user) matricies for each task
		# so the dimensions are (days*tasks,user).
		return ssp.vstack([
			# For every day, extract the day's sub matrix of 
			# user/word weights but now transpose
			# weight each word's users by the word's weight
			# ends up with a (days,user) matrix (csr)
			ssp.hstack([
				X[:,cls._cols_for_day(d,nu)].transpose().dot(W[:,t:t+1])
				for d in range(ndays)
			],format="csc").transpose()
			for t in range(W.shape[1])
		],format="csc")

	@classmethod
	def XYparts(self,fold,X,Y):
		Yparts = fold.parts(Y)
		Xparts = fold.parts(
			X,dir="col",
			slicefunc=BatchBivariateLearner._user_day_slice(X.shape[1]/Y.shape[0])
		)
		return Xparts,Yparts
class BatchBivariateLearner(OnlineLearner):
    """
	For every X,Y pair, add to an existing set of X,Y and relearn the model 
	from scratch

	All data recieved is collected and recompiled into a new numpy vector
	every time. This gives best conceivable result for a linear system 
	given this optimisation scheme
	"""
    def __init__(self, w_spams_func, u_spams_func, **other_params):
        super(BatchBivariateLearner, self).__init__()

        self.allParams = other_params
        self.initDefaults()

        self.elementsSeen = 0
        self.X = None
        self.w = None
        self.u = None
        self.w_bias = None
        self.u_bias = None
        self.bias = None
        self.change_eval = BiMeanSquareEval(self)
        self.part_eval = RootMeanEval()
        self.w_func = w_spams_func
        self.u_func = u_spams_func

    def initDefaults(self):
        self.allParams["bivar_it0"] = self.allParams.get("bivar_it0", 3)
        self.allParams["bivar_tol"] = self.allParams.get("bivar_tol", 1e-3)
        self.allParams["bivar_max_it"] = self.allParams.get("bivar_max_it", 10)

    def predict(self, X):
        pass

    """
	This function is just a combination of Calling
	setYX,
	then iterating through bivar_max_it iterations of calling
	calculateU and calculateW
	"""

    def process(self, Y, X=None, Xt=None, tests=None):
        self.setYX(Y, X, Xt)
        bivariter = 0
        sumSSE = 0
        esiter = list()
        es.state()["iterations"] = esiter
        # in the first iteration we calculate W by using ones on U
        U = ssp.csc_matrix(ones(self.u.shape))
        while True:
            esiterdict = dict()
            esiterdict["i"] = bivariter
            logger.debug("Starting iteration: %d" % bivariter)
            bivariter += 1
            W, w_bias, err = self.calculateW(U, tests=tests)
            esiterdict["w"] = W
            esiterdict["w_sparcity"] = (abs(W) > 0).sum()
            esiterdict["w_bias"] = w_bias
            esiterdict["w_test_err"] = err
            if "test" in err:
                logger.debug(
                    "W sparcity=%d,test_total_err=%2.2f,test_err=%s" %
                    (esiterdict["w_sparcity"], err['test']["totalsse"],
                     str(err['test']["diffsse"])))
            W = ssp.csc_matrix(W)
            U, u_bias, err = self.calculateU(W, tests=tests)
            esiterdict["u"] = U
            esiterdict["u_sparcity"] = (abs(U) > 0).sum()
            esiterdict["u_bias"] = u_bias
            esiterdict["u_test_err"] = err
            if "test" in err:
                logger.debug(
                    "U sparcity=%d,test_total_err=%2.2f,test_err=%s" %
                    (esiterdict["u_sparcity"], err['test']["totalsse"],
                     str(err['test']["diffsse"])))
            U = ssp.csc_matrix(U)
            self.u = U
            self.w = W
            self.w_bias = w_bias
            self.u_bias = u_bias
            esiter += [esiterdict]
            if self.allParams['bivar_max_it'] <= bivariter:
                break
        return sumSSE

    def optimise_lambda(self,
                        lambda_w,
                        lambda_u,
                        Yparts,
                        Xparts,
                        w_lambda=None,
                        u_lambda=None):
        logger.debug("... expanding Yparts")
        Yparts = Yparts.apply(BatchBivariateLearner._expandY)

        ls = LambdaSearch(self.part_eval)
        ntasks = Yparts.train_all.shape[1]
        ndays = Yparts.train_all.shape[0] / ntasks
        nusers = Xparts.train_all.shape[1] / ndays

        u = ssp.csc_matrix(ones((nusers, ntasks)))
        logger.debug("... Preparing VPrime")
        Vprime_parts = Xparts.apply(BatchBivariateLearner._calculateVprime, u)
        if w_lambda is None:
            logger.debug("... Optimising lambda for w")
            ls.optimise(self.w_func, lambda_w, Vprime_parts, Yparts, name="w")
        else:
            logger.debug("... Setting hardcoded w: %2.2f" % w_lambda)
            self.w_func.params['lambda1'] = w_lambda

        logger.debug("... Calculating w with optimal lambda")
        w, bias = self.w_func.call(Vprime_parts.train_all, Yparts.train_all)
        w = ssp.csc_matrix(w)
        logger.debug("... Preparing Dprime")
        Dprime_parts = Xparts.apply(BatchBivariateLearner._calculateDprime, w,
                                    u.shape)
        if u_lambda is None:
            logger.debug("... Optimising lambda for u")
            ls.optimise(self.u_func, lambda_u, Dprime_parts, Yparts, name="u")
        else:
            logger.debug("... Setting hardcoded w: %2.2f" % u_lambda)
            self.u_func.params['lambda1'] = u_lambda
        return [(u, self.u_func.params['lambda1']),
                (w, self.w_func.params['lambda1'])]

    """
	The number of tasks is the columns of Y
	The number of days is the rows of Y
	The number of users is the columns of X (or the rows of Xt) over the number of days
		Put another way the columns of X contain users batched by days
	The number of words is the rows of X (or the columns of Xt) 
	"""

    def setYX(self, Y, X=None, Xt=None):
        X, Xt = BatchBivariateLearner._initX(X, Xt)
        Y = np.asfortranarray(Y)

        self.X = X
        self.Xt = Xt

        self.nusers = X.shape[1] / Y.shape[0]
        self.nwords = X.shape[0]
        self.ntasks = Y.shape[1]
        self.ndays = Y.shape[0]
        self.Yexpanded = self._expandY(Y)

        logger.debug("(ndays=%d,ntasks=%d,nusers=%d,nwords=%d)" %
                     (self.ndays, self.ntasks, self.nusers, self.nwords))
        self.u = ssp.csc_matrix(zeros((self.nusers, self.ntasks)))
        self.w = ssp.csc_matrix(zeros((self.nwords, self.ntasks)))

    def calculateW(self, U=None, tests=None):
        if U is None: U = self.u
        Vprime = BatchBivariateLearner._calculateVprime(self.X, U)
        logger.debug("Calling w_func: %s" % self.w_func)
        W, w_bias = self.w_func.call(Vprime, self.Yexpanded)
        err = self.part_eval.evaluate(Vprime, self.Yexpanded, W, w_bias)
        testerr = {"train_all": err}
        if tests is not None:
            for testName, (testX, testY) in tests.items():
                testerr[testName] = self.part_eval.evaluate(
                    self._calculateVprime(testX, U), self._expandY(testY), W,
                    w_bias)
        return W, w_bias, testerr

    def calculateU(self, W=None, tests=None):
        if W is None: W = self.w
        Dprime = BatchBivariateLearner._calculateDprime(
            self.X, W, self.u.shape)
        logger.debug("Calling u_func: %s" % self.u_func)
        U, u_bias = self.u_func.call(Dprime, self.Yexpanded)
        err = self.part_eval.evaluate(Dprime, self.Yexpanded, U, u_bias)
        testerr = {"train_all": err}
        if tests is not None:
            for testName, (testX, testY) in tests.items():
                testerr[testName] = self.part_eval.evaluate(
                    self._calculateDprime(testX, W, self.u.shape),
                    self._expandY(testY), U, u_bias)
        return U, u_bias, testerr

    @classmethod
    def _expandY(cls, Y):
        """
		We expand Y s.t. the values of Y for each task t 
		are held in the diagonals of a t x t matrix whose 
		other values are NaN
		"""
        Yexpanded = ones((multiply(*Y.shape), Y.shape[1])) * nan
        for x in range(Y.shape[1]):
            ind = x * Y.shape[0]
            indnext = (x + 1) * Y.shape[0]
            Yexpanded[ind:indnext, x] = Y[:, x]

        return np.asfortranarray(Yexpanded)

    @classmethod
    def _initX(self, X=None, Xt=None):
        if X is None and Xt is None:
            raise Exception("At least one of X or Xt must be provided")
        if Xt is None:
            Xt = ssp.csc_matrix(X.transpose())
        if X is None:
            X = ssp.csc_matrix(Xt.transpose())
        if not ssp.issparse(X) or not ssp.issparse(Xt):
            raise Exception("X or Xt provided is not sparse, failing")
        return X, Xt

    @classmethod
    def _cols_for_day(cls, d, nusers):
        return slice(d * nusers, (d + 1) * nusers)

    @classmethod
    def _rows_for_day(cls, d, ntasks):
        return slice(d * ntasks, (d + 1) * ntasks)

    @classmethod
    def _user_day_slice(cls, nusers):
        def exp_slice_func(dp, dir):
            parts = []
            for d in dp:
                dslc = BatchBivariateLearner._cols_for_day(d, nusers)
                drng = range(dslc.start, dslc.stop)
                parts += [x for x in drng]
            if dir is "row":
                return (parts, slice(None, None))
            else:
                return (slice(None, None), parts)

        return exp_slice_func

    """
	Expects an X such that users are held in the columns
	and a U which weights each user for each task
	"""

    @classmethod
    def _calculateVprime(cls, X, U):
        # logger.debug("Preparing Vprime (X . U)")
        nu = U.shape[0]
        ndays = X.shape[1] / nu
        # stack in the columns the (word,days) matricies for each task
        # so the dimensions are (word,days*tasks).
        # we then transpose such that the days*tasks are in the columns
        # and the words in the rows resulting in (days*tasks,word)
        return ssp.hstack(
            [
                # For every day, extract the day's sub matrix of user/word weights
                # weight each user's words by the user's weight
                # ends up with a (words,days) matrix (csr)
                ssp.hstack([
                    X[:, cls._cols_for_day(d, nu)].dot(U[:, t:t + 1])
                    for d in range(ndays)
                ],
                           format="csr") for t in range(U.shape[1])
            ],
            format="csr").transpose()

    """
	Expects an X such that users are held in the columns
	and a W which weights each word for each task
	"""

    @classmethod
    def _calculateDprime(cls, X, W, Ushape):
        # logger.debug("Preparing Dprime (X . W)")
        nu = Ushape[0]
        ndays = X.shape[1] / nu
        # stack in the columns the (days,user) matricies for each task
        # so the dimensions are (days*tasks,user).
        return ssp.vstack(
            [
                # For every day, extract the day's sub matrix of
                # user/word weights but now transpose
                # weight each word's users by the word's weight
                # ends up with a (days,user) matrix (csr)
                ssp.hstack([
                    X[:, cls._cols_for_day(d, nu)].transpose().dot(
                        W[:, t:t + 1]) for d in range(ndays)
                ],
                           format="csc").transpose() for t in range(W.shape[1])
            ],
            format="csc")

    @classmethod
    def XYparts(self, fold, X, Y):
        Yparts = fold.parts(Y)
        Xparts = fold.parts(X,
                            dir="col",
                            slicefunc=BatchBivariateLearner._user_day_slice(
                                X.shape[1] / Y.shape[0]))
        return Xparts, Yparts