def baselinePU(Y, label_loc, alpha, vlambda, kx):

    #random_mat = np.random.random(Y.shape)
    #label_loc = np.where(random_mat < label_fraction) ## locate the masked entries in the label matrix
    #### print statistics
    #print np.where(Y[label_loc] > 0)[0].shape[0] / float(np.where(Y > 0)[0].shape[0]) ## the ratio of "1" entries being masked
    #print np.where(Y[label_loc] < 1)[0].shape[0] / float(np.where(Y < 1)[0].shape[0]) ## the ratio of "0" entries being masked
    W = theano.shared(np.random.random((Y.shape[0], kx)), name='W')
    H = theano.shared(np.random.random((Y.shape[1], kx)), name='H')

    labelmask = np.ones(Y.shape)
    labelmask[label_loc] = 0
    Y_masked = Y.copy()
    Y_masked[label_loc] = 0

    reconstruction = theano.tensor.dot(W, H.T)
    X_symbolic = theano.tensor.matrix(name="Y_masked", dtype=Y_masked.dtype)
    difference = theano.tensor.sqr((X_symbolic - reconstruction)) * (1 - alpha)
    positive_difference = theano.tensor.sqr(
        (X_symbolic - reconstruction) * labelmask) * (2 * alpha - 1.)

    mse = difference.mean() + positive_difference.mean()
    loss = mse + vlambda * (W * W).mean() + vlambda * (H * H).mean()

    downhill.minimize(loss=loss,
                      train=[Y_masked],
                      patience=0,
                      algo='rmsprop',
                      batch_size=Y_masked.shape[0],
                      max_gradient_norm=1,
                      learning_rate=0.06,
                      min_improvement=0.00001)

    return W.get_value(), H.get_value()
Example #2
0
    def solve(self, X, missing_mask):
        (n_samples, n_features) = X.shape
        observed_mask = 1 - missing_mask

        # Set up a matrix factorization problem to optimize.
        U_init = self.initializer(n_samples, self.rank).astype(X.dtype)
        V_init = self.initializer(self.rank, n_features).astype(X.dtype)
        U = theano.shared(U_init, name="U")
        V = theano.shared(V_init, name="V")
        X_symbolic = T.matrix(name="X", dtype=X.dtype)
        reconstruction = T.dot(U, V)

        difference = X_symbolic - reconstruction

        masked_difference = difference * observed_mask
        err = T.sqr(masked_difference)
        mse = err.mean()
        loss = (mse + self.l1_penalty * abs(U).mean() + self.l2_penalty *
                (V * V).mean())
        downhill.minimize(loss=loss,
                          train=[X],
                          patience=self.patience,
                          algo=self.optimization_algorithm,
                          batch_size=n_samples,
                          min_improvement=self.min_improvement,
                          max_gradient_norm=self.max_gradient_norm,
                          learning_rate=self.learning_rate,
                          monitors=[("error", err.mean())],
                          monitor_gradients=self.verbose)

        U_value = U.get_value()
        V_value = V.get_value()
        return np.dot(U_value, V_value)
def completionLR(X, kx, fea_loc, lambdaU, lambdaV):
    mask = np.ones(X.shape)
    mask[fea_loc] = 0.

    #### Theano and downhill
    U = theano.shared(np.random.random((X.shape[0], kx)), name='U')
    V = theano.shared(np.random.random((X.shape[1], kx)), name='V')

    X_symbolic = theano.tensor.matrix(name="X", dtype=X.dtype)
    reconstruction = theano.tensor.dot(U, V.T)
    difference = X_symbolic - reconstruction
    masked_difference = difference * mask
    err = theano.tensor.sqr(masked_difference)
    mse = err.mean()
    xloss = mse + lambdaU * (U * U).mean() + lambdaV * (V * V).mean()

    #### optimisation
    downhill.minimize(loss=xloss,
                      train=[X],
                      patience=0,
                      algo='rmsprop',
                      batch_size=X.shape[0],
                      max_gradient_norm=1,
                      learning_rate=0.1,
                      min_improvement=0.0001)

    return U.get_value(), V.get_value()
Example #4
0
    def fit(self, X, y):
        self.w = theano.shared(
            value=np.random.normal(0, 0.001, (X.shape[1], 1)),  # random initialize
            name="w",
            borrow=False
        )

        x_ = TT.matrix("X")
        y_ = TT.matrix("y")

        e = ((y_ - TT.dot(x_, self.w)) ** 2).sum()
        l1_penalty = abs(self.w).sum()
        l2_penalty = TT.sqrt((self.w * self.w).sum())

        loss = (
            e +
            self.lambda_1 * l1_penalty +
            self.lambda_2 * l2_penalty
        ).sum()

        x_train, x_valid, y_train, y_valid = cv.train_test_split(X, y)

        downhill.minimize(
            loss,
            XYDataset(x_train, y_train, batch_size=self.batch_size),
            valid=XYDataset(x_valid, y_valid, batch_size=x_valid.shape[0]),
            params=[self.w],
            inputs=[x_, y_],
            algo="rmsprop",
            **self.downhill_args
        )
        w = self.w.get_value()
        self.coef_dist = [
            (abs(w) > x).sum() for x in [0.01, 0.001, 0.0001, 0.00001, 0.000001]
        ]
def completionPUV(X, Y, fea_loc, label_loc, alpha, lambda0, lambda1, lambda2,
                  delta, kx):

    #delta = 0.3
    ### masking out some entries from feature and label matrix

    mask = np.ones(X.shape)
    mask[fea_loc] = 0.
    labelmask = np.ones(Y.shape)
    labelmask[label_loc] = 0

    #### Theano and downhill
    U = theano.shared(np.random.random((X.shape[0], kx)), name='U')
    V = theano.shared(np.random.random((X.shape[1], kx)), name='V')
    W = theano.shared(np.random.random((Y.shape[0], kx)), name='W')
    H = theano.shared(np.random.random((Y.shape[1], kx)), name='H')
    feature_mask = theano.tensor.matrix('feature_mask')
    label_mask = theano.tensor.matrix('label_mask')
    #feature_mask = theano.shared(mask,name='mask')
    #label_mask = theano.shared(labelmask,name='labelmask')
    #### U,V,W and H randomly initialised

    nsample = X.shape[0]
    #X_symbolic = theano.tensor.matrix(name="X", dtype=X.dtype)
    #tX = theano.shared(X.astype(theano.config.floatX),name="X")
    #tX = theano.shared(X,name="X")
    tX = theano.tensor.matrix('X')  ### symbolic variable
    difference = tX - theano.tensor.dot(U, V.T)
    masked_difference = difference * feature_mask
    err = theano.tensor.sqr(masked_difference)
    mse = err.mean()
    xloss = mse + lambda0 * ((U * U).mean() + (V * V).mean())

    #tY = theano.shared(Y.astype(theano.config.floatX),name="Y")
    #tY = theano.shared(Y,name="Y")
    tY = theano.tensor.matrix('Y')  ### symbolic variable
    Y_reconstruction = theano.tensor.dot(W, H.T)
    Ydifference = theano.tensor.sqr((tY - Y_reconstruction)) * (1 - alpha)
    positive_difference = theano.tensor.sqr(
        (tY - Y_reconstruction) * label_mask) * (2 * alpha - 1.)
    Ymse = Ydifference.mean() + positive_difference.mean()
    global_loss = xloss + delta * Ymse + lambda1 * (
        (W * W).mean() + (H * H).mean()) + lambda2 * theano.tensor.sqr(
            (U - W)).mean()

    #### optimisation
    downhill.minimize(loss=global_loss,
                      params=[U, V, W, H],
                      train=[X, Y, mask, labelmask],
                      inputs=[tX, tY, feature_mask, label_mask],
                      patience=0,
                      algo='rmsprop',
                      batch_size=nsample,
                      max_gradient_norm=1,
                      learning_rate=0.1,
                      min_improvement=0.0001)

    return U.get_value(), V.get_value(), W.get_value(), H.get_value()
def TPAMI(X, Y, fea_loc_x, fea_loc_y, label_loc_x, label_loc_y, miu, lambda0,
          kx):
    ### X: feature matrix
    ### Y: label matrix
    ### fea_loc_x, fea_loc_y: masked entries in feature matrix
    ### label_loc_x, label_loc_y: masked entries in label matrix
    ### miu: regularisation parameter on matrix rank
    ### lambda0: regularisation parameter on label reconstruction
    ### kx: dimensionality of latent variables used for solving nuclear norm based regularisation
    M = np.concatenate((Y, X), axis=1)
    M = M.T

    label_dim = Y.shape[1]
    fea_dim = X.shape[1]
    gamma = 15.
    featuremask = np.ones(M.shape)
    labelmask = np.ones(M.shape)
    for i in range(len(label_loc_x)):
        labelmask[label_loc_y[i], label_loc_x[i]] = 0.

    for i in range(len(fea_loc_x)):
        featuremask[fea_loc_y[i] + label_dim, fea_loc_x[i]] = 0.

    #### Theano and downhill
    U = theano.shared(np.random.random((M.shape[0], kx)), name='U')
    V = theano.shared(np.random.random((M.shape[1], kx)), name='V')

    #### feature loss
    M_symbolic = theano.tensor.matrix(name="M", dtype=M.dtype)
    reconstruction = theano.tensor.dot(U, V.T)
    difference = M_symbolic - reconstruction
    masked_difference = difference * featuremask
    err = theano.tensor.sqr(masked_difference)
    mse = err.mean()
    xloss = (1. / float(len(fea_loc_x))) * mse + miu * ((U * U).mean() +
                                                        (V * V).mean())
    #### label loss
    label_reconstruction_kernel = -1 * gamma * (2 * M - 1) * (reconstruction -
                                                              M)
    label_reconstruction_difference = (1. / gamma) * theano.tensor.log(
        1 + theano.tensor.exp(label_reconstruction_kernel)) * labelmask
    label_err = (
        1. / float(len(label_loc_x))) * label_reconstruction_difference.mean()
    global_loss = xloss + lambda0 * label_err

    #### optimisation
    downhill.minimize(loss=global_loss,
                      train=[M],
                      inputs=[M_symbolic],
                      patience=0,
                      algo='rmsprop',
                      batch_size=M.shape[0],
                      max_gradient_norm=1,
                      learning_rate=0.1,
                      min_improvement=0.01)

    return U.get_value(), V.get_value()
Example #7
0
 def test_minimize(self):
     x = theano.shared(-3 + np.zeros((2, ), 'f'), name='x')
     data = downhill.Dataset(np.zeros((1, 1), 'f'), batch_size=1)
     data._slices = [[]]
     downhill.minimize(
         (100 * (x[1:] - x[:-1] ** 2) ** 2 + (1 - x[:-1]) ** 2).sum(),
         data,
         algo='nag',
         learning_rate=0.001,
         momentum=0.9,
         patience=1,
         min_improvement=0.1,
         max_gradient_norm=1,
     )
     assert np.allclose(x.get_value(), [1, 1]), x.get_value()
Example #8
0
 def test_minimize(self):
     x = theano.shared(-3 + np.zeros((2, ), 'f'), name='x')
     data = downhill.Dataset(np.zeros((1, 1), 'f'), batch_size=1)
     data._slices = [[]]
     downhill.minimize(
         (100 * (x[1:] - x[:-1]**2)**2 + (1 - x[:-1])**2).sum(),
         data,
         algo='nag',
         learning_rate=0.001,
         momentum=0.9,
         patience=1,
         min_improvement=0.1,
         max_gradient_norm=1,
     )
     assert np.allclose(x.get_value(), [1, 1]), x.get_value()
Example #9
0
def completionPUV(X, Y, fea_loc, label_loc, alpha, lambda0, lambda1, lambda2,
                  delta, kx):

    #delta = 0.3
    ### masking out some entries from feature and label matrix

    mask = np.ones(X.shape)
    mask[fea_loc] = 0.
    labelmask = np.ones(Y.shape)
    labelmask[label_loc] = 0

    #### Theano and downhill
    U = theano.shared(np.random.random((X.shape[0], kx)), name='U')
    V = theano.shared(np.random.random((X.shape[1], kx)), name='V')
    W = theano.shared(np.random.random((Y.shape[0], kx)), name='W')
    H = theano.shared(np.random.random((Y.shape[1], kx)), name='H')

    X_symbolic = theano.tensor.matrix(name="X", dtype=X.dtype)
    reconstruction = theano.tensor.dot(U, V.T)
    difference = X_symbolic - reconstruction
    masked_difference = difference * mask
    err = theano.tensor.sqr(masked_difference)
    mse = err.mean()
    xloss = mse + lambda0 * ((U * U).mean() + (V * V).mean())

    Y_symbolic = theano.tensor.matrix(name="Y", dtype=Y.dtype)
    Y_reconstruction = theano.tensor.dot(U, H.T)
    Ydifference = theano.tensor.sqr(
        (Y_symbolic - Y_reconstruction)) * (1 - alpha)
    positive_difference = theano.tensor.sqr(
        (Y_symbolic - Y_reconstruction) * labelmask) * (2 * alpha - 1.)
    Ymse = Ydifference.mean() + positive_difference.mean()
    global_loss = xloss + delta * Ymse + lambda1 * (
        (W * W).mean() + (H * H).mean()) + lambda2 * theano.tensor.sqr(
            (U - W)).mean()

    #### optimisation
    downhill.minimize(loss=global_loss,
                      train=[X, Y],
                      inputs=[X_symbolic, Y_symbolic],
                      patience=0,
                      algo='rmsprop',
                      batch_size=Y.shape[0],
                      max_gradient_norm=1,
                      learning_rate=0.1,
                      min_improvement=0.0001)

    return U.get_value(), V.get_value(), W.get_value(), H.get_value()
Example #10
0
    def fit(self, X, y):
        if self.select_cols is not None:
            _X = X[:, self.select_cols]
        else:
            _X = X

        self.w = theano.shared(
            value=np.random.normal(0, 0.001, (_X.shape[1], 1)),  # random initialize
            name="w",
            borrow=False
        )


        x_ = TT.matrix("X")
        y_ = TT.matrix("y")
        l_ = tsparse.csr_matrix("l")

        e = ((y_ - TT.dot(x_, self.w)) ** 2).sum()
        l1_penalty = abs(self.w).sum()
        l2_penalty = TT.sqrt((self.w * self.w).sum())
        s_sparse_penalty = theano.dot(theano.dot(self.w.T, l_), self.w)

        loss = (
            e +
            self.lambda_1 * l1_penalty +
            self.lambda_2 * l2_penalty +
            self.alpha    * s_sparse_penalty
        ).sum()

        x_train, x_valid, y_train, y_valid = cv.train_test_split(_X, y)

        downhill.minimize(
            loss,
            XYLDataset(x_train, y_train, self.L, batch_size=self.batch_size),
            valid=XYLDataset(x_valid, y_valid, self.L, batch_size=x_valid.shape[0]),
            params=[self.w],
            inputs=[x_, y_, l_],
            algo="rmsprop",
            **self.downhill_args
        )
        w = self.w.get_value()
        self.coef_dist = [
            (abs(w) > x).sum() for x in [0.01, 0.001, 0.0001, 0.00001, 0.000001]
        ]
    def solve(self, X, missing_mask):
        (n_samples, n_features) = X.shape
        observed_mask = 1 - missing_mask

        # Set up a matrix factorization problem to optimize.
        U_init = self.initializer(n_samples, self.rank).astype(X.dtype)
        V_init = self.initializer(self.rank, n_features).astype(X.dtype)
        U = theano.shared(U_init, name="U")
        V = theano.shared(V_init, name="V")
        X_symbolic = T.matrix(name="X", dtype=X.dtype)
        reconstruction = T.dot(U, V)

        difference = X_symbolic - reconstruction

        masked_difference = difference * observed_mask
        err = T.sqr(masked_difference)
        mse = err.mean()
        loss = (
            mse +
            self.l1_penalty * abs(U).mean() +
            self.l2_penalty * (V * V).mean())
        downhill.minimize(
            loss=loss,
            train=[X],
            patience=self.patience,
            algo=self.optimization_algorithm,
            batch_size=n_samples,
            min_improvement=self.min_improvement,
            max_gradient_norm=self.max_gradient_norm,
            learning_rate=self.learning_rate,
            monitors=[("error", err.mean())],
            monitor_gradients=self.verbose)

        U_value = U.get_value()
        V_value = V.get_value()
        return np.dot(U_value, V_value)
u = theano.shared(np.random.randn(N * N, K * K).astype('f'), name='u')
v = theano.shared(np.random.randn(K * K, B).astype('f'), name='v')

err = TT.sqr(x - TT.dot(u, v)).mean()

downhill.minimize(
    loss=err + 100 * (0.01 * abs(u).mean() + (v * v).mean()),
    params=[u, v],
    inputs=[x],
    train=train,
    valid=valid,
    batch_size=N * N,
    monitor_gradients=True,
    monitors=[
        ('err', err),
        ('u<-0.5', (u < -0.5).mean()),
        ('u<-0.1', (u < -0.1).mean()),
        ('u<0.1', (u < 0.1).mean()),
        ('u<0.5', (u < 0.5).mean()),
    ],
    algo='sgd',
    max_gradient_clip=1,
    learning_rate=0.5,
    momentum=0.9,
    patience=3,
    min_improvement=0.1,
)

plot_images(v.get_value(), 121)
plot_images(np.dot(u.get_value(), v.get_value()), 122)
plt.show()
Example #13
0
#minimize(
#loss, 
#train, 
#batch_size=32, 
#monitor_gradients=False, 
#monitors=(), 

#valid=None, 
#params=None, 
#inputs=None, 
#algo='rmsprop', 
#updates=(), 
#train_batches=None, 
#valid_batches=None, 
#**kwargs)

downhill.minimize(
    loss=loss,
    train=[y],
    patience=0,
    batch_size=A,                 # Process y as a single batch.
    max_gradient_norm=1,          # Prevent gradient explosion!
    learning_rate=0.1,
    monitors=monitors,
    monitor_gradients=True)

# Print out the optimized coefficients u and basis v.
print('u =', u.get_value())
print('v =', v.get_value())
Example #14
0
def rand(a, b):
    return np.random.randn(a, b).astype('f')


A, B, K = 20, 5, 3

# Set up a matrix factorization problem to optimize.
u = theano.shared(rand(A, K), name='u')
v = theano.shared(rand(K, B), name='v')
e = TT.sqr(TT.matrix() - TT.dot(u, v))

# Minimize the regularized loss with respect to a data matrix.
y = np.dot(rand(A, K), rand(K, B)) + rand(A, B)

downhill.minimize(
    loss=e.mean() + abs(u).mean() + (v * v).mean(),
    train=[y],
    patience=0,
    batch_size=A,  # Process y as a single batch.
    max_gradient_norm=1,  # Prevent gradient explosion!
    learning_rate=0.1,
    monitors=(
        ('err', e.mean()),  # Monitor during optimization.
        ('|u|<0.1', (abs(u) < 0.1).mean()),
        ('|v|<0.1', (abs(v) < 0.1).mean())),
    monitor_gradients=True)

# Print out the optimized coefficients u and basis v.
print('u =', u.get_value())
print('v =', v.get_value())
u = theano.shared(np.random.randn(N * N, K * K).astype('f'), name='u')
v = theano.shared(np.random.randn(K * K, B).astype('f'), name='v')

err = TT.sqr(x - TT.dot(u, v)).mean()

downhill.minimize(
    loss=err + 100 * (0.01 * abs(u).mean() + (v * v).mean()),
    params=[u, v],
    inputs=[x],
    train=train,
    valid=valid,
    batch_size=N * N,
    monitor_gradients=True,
    monitors=[
        ('err', err),
        ('u<-0.5', (u < -0.5).mean()),
        ('u<-0.1', (u < -0.1).mean()),
        ('u<0.1', (u < 0.1).mean()),
        ('u<0.5', (u < 0.5).mean()),
    ],
    algo='sgd',
    max_gradient_clip=1,
    learning_rate=0.5,
    momentum=0.9,
    patience=3,
    min_improvement=0.1,
)

plot_images(v.get_value(), 121)
plot_images(np.dot(u.get_value(), v.get_value()), 122)
plt.show()
Example #16
0
#minimize(
#loss,
#train,
#batch_size=32,
#monitor_gradients=False,
#monitors=(),

#valid=None,
#params=None,
#inputs=None,
#algo='rmsprop',
#updates=(),
#train_batches=None,
#valid_batches=None,
#**kwargs)

downhill.minimize(
    loss=loss,
    train=[y],
    patience=0,
    batch_size=A,  # Process y as a single batch.
    max_gradient_norm=1,  # Prevent gradient explosion!
    learning_rate=0.1,
    monitors=monitors,
    monitor_gradients=True)

# Print out the optimized coefficients u and basis v.
print(('u =', u.get_value()))
print(('v =', v.get_value()))
Example #17
0
def rand(a, b): return np.random.randn(a, b).astype('f')

A, B, K = 20, 5, 3

# Set up a matrix factorization problem to optimize.
u = theano.shared(rand(A, K), name='u')
v = theano.shared(rand(K, B), name='v')
e = TT.sqr(TT.matrix() - TT.dot(u, v))

# Minimize the regularized loss with respect to a data matrix.
y = np.dot(rand(A, K), rand(K, B)) + rand(A, B)

downhill.minimize(
    loss=e.mean() + abs(u).mean() + (v * v).mean(),
    train=[y],
    patience=0,
    batch_size=A,                 # Process y as a single batch.
    max_gradient_norm=1,          # Prevent gradient explosion!
    learning_rate=0.1,
    monitors=(('err', e.mean()),  # Monitor during optimization.
              ('|u|<0.1', (abs(u) < 0.1).mean()),
              ('|v|<0.1', (abs(v) < 0.1).mean())),
    algo='sgd',
    monitor_gradients=True)

# Print out the optimized coefficients u and basis v.
print('u =', u.get_value())
print('v =', v.get_value())