class trainAE(object):

    def __init__(self, path, k, lr= 0.1, batch_size=1, loss='bce', n_epochs=100):
        '''
        Arguments:
            path : path to training data
            k : hidde unit's dimension
            lr : learning rate
            batch_size : batch_size for training, currently set to 1
            loss : loss function (bce, rmse) to train AutoEncoder
            n_epochs : number of epochs for training
        '''
        self.AE = AutoEncoder(path, k)
        # Definne the autoencoder model
        #self.AE.model()
        self.AE.model_batch()
        self.epochs = n_epochs


    def train(self):
        T = self.AE.T
        # COnverting to csr format for indexing
        T = T.tocsr()
        #pdb.set_trace()
        nonzero_indices = T.nonzero()
        for epoch in xrange(self.epochs):
            print("Running epoch %d"%(epoch))
            for i in np.unique(nonzero_indices[0]):
                # get indices of observed values from the user 'i' 's vector
                indices = T[i, :].nonzero()[1]
                #print indices
                #indices = indices.reshape(indices.shape[0],)
                # Get correspoding ratings
                ratings = T[i, indices].toarray()
                #print ratings
                ratings = ratings.reshape(ratings.shape[1],)
                # Convert inputs to theano datatype
                indices = indices.astype(np.int32)
                ratings = ratings.astype(np.int32)
                #pdb.set_trace()
                loss = self.AE.ae(indices, ratings)
                print("Loss at epoch %d is %f"%(epoch, loss))

    # Batch training method
    def train_batch(self, batch_size):
        T = self.AE.T
        T = T.tocsr()
        nonzero_indices = T.nonzero()
        #pdb.set_trace()
        n_users = len(np.unique(nonzero_indices[0]))
        indices = np.unique(nonzero_indices[0])
        for epoch in xrange(self.epochs):
            for ind, i in enumerate(xrange(0, n_users, batch_size)):
                ratings = T[indices[i:(i + batch_size)], :].toarray().astype(np.float32)
                #print ratings
                #pdb.set_trace()
                loss = self.AE.ae_batch(ratings)
                #loss = self.AE.debug(ratings)
                print loss
class trainAE(object):
    def __init__(self,
                 path,
                 k,
                 lr=0.01,
                 batch_size=1,
                 loss='bce',
                 n_epochs=500):
        '''
        Arguments:
            path : path to training data
            k : hidde unit's dimension
            lr : learning rate
            batch_size : batch_size for training, currently set to 1
            loss : loss function (bce, rmse) to train AutoEncoder
            n_epochs : number of epochs for training
        '''
        self.AE = AutoEncoder(path, k)
        # Definne the autoencoder model
        #self.AE.model()
        self.AE.model_batch()
        self.epochs = n_epochs

    def sigmoid(self, x):
        return 1.0 / (1.0 + np.exp(-x))

    def train(self):
        T = self.AE.T
        # COnverting to csr format for indexing
        T = T.tocsr()
        #pdb.set_trace()
        nonzero_indices = T.nonzero()
        for epoch in xrange(self.epochs):
            print("Running epoch %d" % (epoch))
            for i in np.unique(nonzero_indices[0]):
                # get indices of observed values from the user 'i' 's vector
                indices = T[i, :].nonzero()[1]
                #print indices
                #indices = indices.reshape(indices.shape[0],)
                # Get correspoding ratings
                ratings = T[i, indices].toarray()
                #print ratings
                ratings = ratings.reshape(ratings.shape[1], )
                # Convert inputs to theano datatype
                indices = indices.astype(np.int32)
                ratings = ratings.astype(np.int32)
                #pdb.set_trace()
                loss = self.AE.ae(indices, ratings)
                print("Loss at epoch %d is %f" % (epoch, loss))
            print("RMSE after one epoch is %f" % (self.RMSE()))

    # Batch training method
    def train_batch(self, batch_size):
        T = self.AE.T
        T = T.tocsr()
        nonzero_indices = T.nonzero()
        #pdb.set_trace()
        n_users = len(np.unique(nonzero_indices[0]))
        indices = np.unique(nonzero_indices[0])
        for epoch in xrange(self.epochs):
            l = []
            for ind, i in enumerate(xrange(0, n_users, batch_size)):
                # CHECK : SEEMS BUGGY.
                #------------------------
                #ratings = T[indices[i:(i + batch_size)], :].toarray().astype(np.float32)
                ratings = T[indices[i:(i + batch_size)], :].toarray().astype(
                    np.float32)

                #------------------------
                #print ratings
                #pdb.set_trace()
                loss = self.AE.ae_batch(ratings)
                #loss = self.AE.debug(ratings)
                #print loss
                #pdb.set_trace()
                l.append(loss)
            m = np.mean(np.array(l))
            print("mean Loss for epoch %d  batch %d is %f" % (epoch, ind, m))
            #rmse = self.RMSE_sparse()
            rmse = self.RMSE()
            print("RMSE after one epoch is %f" % (rmse))
            f.write(str(rmse) + '\n')

    def RMSE(self):
        W, V, b, mu = self.AE.get_params()
        print("testing process starts")
        test = self.AE.test_ind[:5000]
        rat = []
        pred = []
        rmse = 0
        #try:
        #    Rt = self.AE.T[test[:,0]].todense()
        #    p = expit(np.dot(W, expit(np.dot(V, Rt) + mu)) + b)
        #    P = p[:, test[:,1]]
        #    pdb.set_trace()
        #except:
        #    print "Exceptoin in batch test"
        #    pdb.set_trace()
        for i, j in test:
            Rt = self.AE.T[i, :].todense()
            Rt1 = np.zeros(Rt.shape[1])
            Rt1[:] = Rt[:]
            #pdb.set_trace()
            p = (np.dot(W, expit(np.dot(V, Rt1) + mu)) + b)
            #p = np.tanh(np.dot(W, np.tanh(np.dot(V, Rt1) + mu)) + b)
            p = p[j]
            pred.append(p)
            rat.append(self.AE.t[i, j])
        try:
            rat = np.array(rat)
            pred = np.array(pred)
            rmse = np.sqrt(np.mean((pred - rat)**2))
        except:
            print "exception"
            pdb.set_trace()
        np.save('test', test)
        np.save('W', W)
        np.save('V', V)
        np.save('mu', mu)
        np.save('b', b)
        return rmse
        #pdb.set_trace()

    def RMSE_sparse(self):
        W, V, b, mu = self.AE.get_params()
        #pdb.set_trace()
        mu = mu.reshape(100, 1)
        print("testing process starts")
        test = self.AE.test_ind
        rat = []
        pred = []
        rmse = 0
        for i, j in test:
            #pdb.set_trace()
            Rt = np.array(self.AE.T[i, :].todense().tolist()).astype(
                np.float16)
            #pdb.set_trace()
            ind = np.where(Rt > 0)[1]
            #pdb.set_trace()
            Rt = Rt.T
            #pdb.set_trace()
            temp1 = V[:, ind]
            temp2 = Rt[ind]
            temp = expit(np.dot(temp1, temp2) + mu)
            #del temp1
            #del temp2
            p = expit(np.dot(W, temp) + b)
            #pdb.set_trace()
            p = p[0, j]
            pred.append(p)
            rat.append(self.AE.t[i, j])
            #gc.collect()
        try:
            rat = np.array(rat)
            pred = np.array(pred)
            rmse = np.sqrt(np.mean((pred - rat)**2))
        except:
            print "exception"
            pdb.set_trace()
        np.save('test', test)
        np.save('W', W)
        np.save('V', V)
        np.save('mu', mu)
        np.save('b', b)
        return rmse
class trainAE(object):

    def __init__(self, path, k, lr= 0.1, batch_size=1, loss='bce', n_epochs=50):
        '''
        Arguments:
            path : path to training data
            k : hidde unit's dimension
            lr : learning rate
            batch_size : batch_size for training, currently set to 1
            loss : loss function (bce, rmse) to train AutoEncoder
            n_epochs : number of epochs for training
        '''
        self.AE = AutoEncoder(path, k)
        # Definne the autoencoder model
        #self.AE.model()
        self.AE.model_batch()
        self.epochs = n_epochs


    def sigmoid(self, x):
        return 1.0 / (1.0 + np.exp(-x))

    def train(self):
        T = self.AE.T
        # COnverting to csr format for indexing
        T = T.tocsr()
        #pdb.set_trace()
        nonzero_indices = T.nonzero()
        for epoch in xrange(self.epochs):
            print("Running epoch %d"%(epoch))
            for i in np.unique(nonzero_indices[0]):
                # get indices of observed values from the user 'i' 's vector
                indices = T[i, :].nonzero()[1]
                #print indices
                #indices = indices.reshape(indices.shape[0],)
                # Get correspoding ratings
                ratings = T[i, indices].toarray()
                #print ratings
                ratings = ratings.reshape(ratings.shape[1],)
                # Convert inputs to theano datatype
                indices = indices.astype(np.int32)
                ratings = ratings.astype(np.int32)
                #pdb.set_trace()
                loss = self.AE.ae(indices, ratings)
                print("Loss at epoch %d is %f"%(epoch, loss))
            print("RMSE after one epoch is %f"%(self.RMSE()))

    # Batch training method
    def train_batch(self, batch_size):
        T = self.AE.T
        T = T.tocsr()
        nonzero_indices = T.nonzero()
        #pdb.set_trace()
        n_users = len(np.unique(nonzero_indices[0]))
        indices = np.unique(nonzero_indices[0])
        for epoch in xrange(self.epochs):
            for ind, i in enumerate(xrange(0, n_users, batch_size)):
                # CHECK : SEEMS BUGGY. 
                #------------------------
                #ratings = T[indices[i:(i + batch_size)], :].toarray().astype(np.float32)
                ratings = T[indices[i:(i + batch_size)], :].toarray().astype(np.float32)

                #------------------------
                #print ratings
                #pdb.set_trace()
                loss = self.AE.ae_batch(ratings)
                #loss = self.AE.debug(ratings)
                #print loss
                #pdb.set_trace()
                print("Loss for epoch %d  batch %d is %f"%(epoch, ind, loss))
            print("RMSE after one epoch is %f"%(self.RMSE()))

    def RMSE(self):
        W, V, b, mu = self.AE.get_params()
        print("testing process starts")
        test = self.AE.test_ind
        rat = []
        pred = []
        rmse = 0
        for i,j in test:
            Rt = self.AE.T[i, :].todense()
            Rt1 = np.zeros(Rt.shape[1] +1)
            Rt1[:6541] = Rt
            #pdb.set_trace()
            p = expit(np.dot(W, expit(np.dot(V, Rt1) + mu)) + b)
            #p = np.tanh(np.dot(W, np.tanh(np.dot(V, Rt1) + mu)) + b)
            p = p[j]
            pred.append(p)
            rat.append(self.AE.t[i, j])
        try:
            rat = np.array(rat)
            pred = np.array(pred)
            rmse = np.sqrt(np.mean((pred-rat)**2))
        except:
            print "exception"
            pdb.set_trace()
        np.save('test', test)
        np.save('W', W)
        np.save('V', V)
        np.save('mu', mu)
        np.save('b', b)
        return rmse
        #pdb.set_trace()

    def RMSE_sparse(self):
        W, V, mu, b = self.AE.get_params()
        print("testing process starts")
        test = self.AE.test_ind
        rat = []
        pred = []
        for i,j in test:
            Rt = self.AE.T[i, :].todense()
            #pdb.set_trace()
            ind = self.AE.T[i, :].nonzero()[1]
            #pdb.set_trace()
            Rt = Rt.T
            temp = np.tanh(np.dot(V[:, ind], Rt[ind]) + mu.reshape(100,1))
            p = np.tanh(np.dot(W, temp) + b)
            p = p[j]
            pred.append(p)
            rat.append(self.AE.t[i, j])
        try:
            rat = np.array(rat)
            pred = np.array(pred)
            print np.sqrt(np.mean((pred-rat)*2))
        except:
            print "exception"
            pdb.set_trace()
        np.save('test', test)
        np.save('W', W)
        np.save('V', V)
        np.save('mu', mu)
        np.save('b', b)
        pdb.set_trace()
Beispiel #4
0
class trainAE(object):
    def __init__(self,
                 path,
                 k,
                 lr=0.01,
                 batch_size=1,
                 loss='bce',
                 n_epochs=500):
        '''
        Arguments:
            path : path to training data
            k : hidde unit's dimension
            lr : learning rate
            batch_size : batch_size for training, currently set to 1
            loss : loss function (bce, rmse) to train AutoEncoder
            n_epochs : number of epochs for training
        '''
        self.AE = AutoEncoder(path, k)
        # Definne the autoencoder model
        #self.AE.model()
        self.AE.model_batch()
        self.epochs = n_epochs

    def sigmoid(self, x):
        return 1.0 / (1.0 + np.exp(-x))

    # Batch training method
    def train(self, batch_size):
        T = self.AE.T
        T = T.tocsr()
        nonzero_indices = T.nonzero()
        #pdb.set_trace()
        n_users = len(np.unique(nonzero_indices[0]))
        indices = np.unique(nonzero_indices[0])
        for epoch in xrange(self.epochs):
            l = []
            for ind, i in enumerate(xrange(0, n_users, batch_size)):
                ratings = T[indices[i:(i + batch_size)], :].toarray().astype(
                    np.float32)
                loss = self.AE.ae_batch(ratings)
                l.append(loss)
            m = np.mean(np.array(l))
            print("mean Loss for epoch %d  batch %d is %f" % (epoch, ind, m))
            rmse = self.RMSE()
            print("RMSE after one epoch is %f" % (rmse))

    def RMSE(self):
        W, V, b, mu = self.AE.get_params()
        print("testing process starts")
        test = self.AE.test_ind
        rat = []
        pred = []
        rmse = 0
        for i, j in test:
            Rt = self.AE.T[i, :].todense()
            Rt1 = np.zeros(Rt.shape[1])
            Rt1[:] = Rt[:]
            #pdb.set_trace()
            p = expit(np.dot(W, expit(np.dot(V, Rt1) + mu)) + b)
            #p = np.tanh(np.dot(W, np.tanh(np.dot(V, Rt1) + mu)) + b)
            p = p[j]
            pred.append(p)
            rat.append(self.AE.t[i, j])
        try:
            rat = np.array(rat)
            pred = np.array(pred)
            rmse = np.sqrt(np.mean((pred - rat)**2))
        except:
            print "exception"
            pdb.set_trace()

        return rmse