def get_Aopt(inX, iny):
    X_train, y_train, X_test, y_test = ascdata.split_train_test(inX, iny)
    X_train = np.concatenate((X_train, np.ones((X_train.shape[ 0 ], 1))), 1)
    X_test = np.concatenate((X_test, np.ones((X_test.shape[ 0 ], 1))), 1)
    X_train_less, s_train = ascdata.split_X_s(X_train)
    X_test_less, s_test = ascdata.split_X_s(X_test)

    s_train_phi = ascdata.generate_phi(s_train, d, A_phi, b_phi)
    s_test_phi = ascdata.generate_phi(s_test, d, A_phi, b_phi)

    nfeatures = X_train.shape[1] - 1
    # Dimensions of phi(s)
    nfeatures_phi = d
    invT2 = 10

    def logprob(inA, inX, iny, ins_phi):
        RMS = 0
        for i in range(len(iny)):
            wi = np.dot(inA, inX[i])
            RMS_current = (iny[i] - np.dot(wi, ins_phi[i]))**2
            RMS += RMS_current
        return -RMS

    objective = lambda inA, t: -logprob(inA, X_train_less, y_train, s_train_phi)

    LLHs = []
    LLH_xs = []

    def callback(params, t, g):
        LLH = -objective(params, t)
        LLHs.append(LLH)
        LLH_xs.append(t)
        print("Iteration {} log likelihood {}".format(t, LLH))

    init_A = 0.00000000001*(np.ones((nfeatures_phi, nfeatures)))
    # init_A =  [[ -3.05236728e-04,  -9.50015728e-04,  -3.80139503e-04,   1.44010470e-04, -3.05236728e-04,
    #              -4.96117987e-04,  -1.02736409e-04,  -1.86416292e-04, -9.52628589e-04,  -1.55023279e-03,
    #              1.44717581e-04,   1.00000000e-11, -9.50028200e-04,  -4.96117987e-04,   1.00000000e-11,
    #              -3.05236728e-04, 1.77416412e-06,  -8.16665436e-06,   3.12622951e-05,  -8.25700143e-04,
    #              1.44627987e-04,   1.90211243e-05,  -8.28273186e-04,  -9.41349990e-04, -4.56671031e-04,
    #              9.79097070e-03,  -6.41866046e-04,  -7.79274856e-05, 1.44539330e-04,  -3.05236728e-04,
    #              -5.99188450e-04,  -7.29470175e-04, -6.69558174e-04,  -9.50028200e-04]]
    init_A = np.array(init_A)

    print("Optimizing network parameters...")
    optimized_params = adam(grad(objective), init_A,
                            step_size=0.01, num_iters=1000, callback=callback)

    Aopt = optimized_params
    print "Aopt = ", Aopt

    return Aopt, X_train_less, y_train, s_train, X_test_less, y_test, s_test, LLHs, LLH_xs
def get_wridge(inX, iny):
    X_train, y_train, X_test, y_test = ascdata.split_train_test(inX, iny)
    X_train = np.concatenate((X_train, np.ones((X_train.shape[0], 1))), 1)
    X_test = np.concatenate((X_test, np.ones((X_test.shape[0], 1))), 1)

    nfeatures = X_train.shape[1]

    # Augment X, y
    precision = np.identity(nfeatures) * invT2
    cholesky_precision = np.linalg.cholesky(precision)
    X_train_aug = np.concatenate((X_train, cholesky_precision), axis=0)
    y_train_aug = np.transpose(np.concatenate((y_train, np.zeros(nfeatures))))

    # Get QR decomposition of X
    Q, R = np.linalg.qr(X_train_aug)
    Rinv = np.linalg.inv(R)
    RinvQ = np.dot(Rinv, Q.T)
    wridge = np.dot(RinvQ, y_train_aug)
    return wridge, X_train, y_train, X_test, y_test
import ascdata
import numpy as np
from sklearn import gaussian_process
from matplotlib import use
use("Agg")
import matplotlib.pyplot as plt
from sklearn.cross_validation import KFold
from math import sqrt
from tempfile import mkdtemp
import os.path as path


### IMPORT DATA ###
print "IMPORTING DATA"
X, y = ascdata.load_shrunken_asc_data()
X_train, y_train, X_test, y_test = ascdata.split_train_test(X, y)
# print "done splitting data"
# X_bp1, y_bp1 = ascdata.get_bp_data(1, int("400229",16), X_train, y_train)
# X_bp1_test, y_bp1_test = ascdata.get_bp_data(1, int("400229",16), X_test, y_test)
# del X_train, y_train, X_test, y_test

# X_bp1_test, y_bp1_test = ascdata.get_bp_data(1, 4194659, X_test, y_test)
# X_bp2, y_bp2 = ascdata.get_bp_data(3, 4194873, X, y)
# X_bp2_test, y_bp2_test = ascdata.get_bp_data(3, 4194873, X_test, y_test)
# X_bp3, y_bp3 = ascdata.get_bp_data(2, 4198375, X, y)
# X_bp3_test, y_bp3_test = ascdata.get_bp_data(2, 4198375, X_test, y_test)

### GET PREDICTIONS ###
print "GETTING PREDICTIONS"
gp = gaussian_process.GaussianProcess(theta0=1e-1, thetaL=1e-3, thetaU=1, nugget=y_train+20)
gp.fit(X_train, y_train)