def optimize_loss(loss_func, x_start=None, args=None, maxiter=100, ftol=1e-6):
    from scipy.optimize import minimize
    gloss_func = grad(loss_func, argnum=0)

    pbar = tqdm_cs(total=maxiter)

    def call(Xi):
        Nfeval = pbar.n
        sss = ''
        for x in Xi:
            sss += '  ' + '{:.5e}'.format(x)
        print('{0:4d}'.format(Nfeval) + sss)
        sys.stdout.flush()
        pbar.update()

    #const = spop.Bounds(np.zeros(x_start.shape),np.inf*np.ones(x_start.shape))
    myop = minimize(loss_func,
                    x_start,
                    args=args,
                    jac=gloss_func,
                    callback=call,
                    method='L-BFGS-B',
                    options={
                        "maxiter": maxiter,
                        "disp": False,
                        "maxcor": 9,
                        "gtol": 1e-9,
                        "ftol": ftol
                    })
    pbar.close()
    return myop
def sor_loss(x_opt,
             X,
             y,
             cv,
             jitter,
             disable_pbar=True,
             leave=False,
             return_score=False):

    Lambda = x_opt[0]
    kMM = X[0]
    kMN = X[1]

    Mactive, Nsample = kMN.shape

    mse = 0
    y_p = np.zeros((Nsample, ))
    scores = []
    for train, test in tqdm_cs(cv.split(kMN.T),
                               total=cv.n_splits,
                               disable=disable_pbar,
                               leave=False):
        # prepare SoR kernel
        kMN_train = kMN[:, train]
        kernel_train = kMM + np.dot(kMN_train,
                                    kMN_train.T) / Lambda**2 + np.diag(
                                        np.ones(Mactive)) * jitter
        y_train = np.dot(kMN_train, y[train]) / Lambda**2

        # train the KRR model
        alpha = np.linalg.solve(kernel_train, y_train).flatten()

        # make predictions
        kernel_test = kMN[:, test]
        y_pred = np.dot(alpha, kernel_test).flatten()
        if return_score is True:
            scores.append(get_score(y_pred, y[test]))
            #y_p[test] = y_pred

        mse += np.sum((y_pred - y[test])**2)
    mse /= len(y)

    if return_score is True:
        #score = get_score(y_p,y)
        score = {}
        for k in scores[0]:
            aa = []
            for sc in scores:
                aa.append(sc[k])
            score[k] = np.mean(aa)
        return score
    return mse
def soap_cov_loss(x_opt,
                  rawsoaps,
                  y,
                  cv,
                  jitter,
                  disable_pbar=True,
                  leave=False,
                  compressor=None,
                  active_ids=None,
                  return_score=False):
    Lambda = x_opt[0]
    fj = x_opt[1:]

    compressor.set_scaling_weights(fj)

    X = compressor.transform(rawsoaps)
    X_pseudo = X[active_ids]

    kMM = np.dot(X_pseudo, X_pseudo.T)
    kMN = np.dot(X_pseudo, X.T)
    Mactive, Nsample = kMN.shape

    mse = 0
    y_p = np.zeros((Nsample, ))
    for train, test in tqdm_cs(cv.split(rawsoaps),
                               total=cv.n_splits,
                               disable=disable_pbar,
                               leave=False):
        # prepare SoR kernel
        kMN_train = kMN[:, train]
        kernel_train = (kMM + np.dot(kMN_train, kMN_train.T) /
                        Lambda**2) + np.diag(np.ones(Mactive)) * jitter
        y_train = np.dot(kMN_train, y[train]) / Lambda**2

        # train the KRR model
        alpha = np.linalg.solve(kernel_train, y_train).flatten()

        # make predictions
        kernel_test = kMN[:, test]
        y_pred = np.dot(alpha, kernel_test).flatten()
        if return_score is True:
            y_p[test] = y_pred

        mse += np.sum((y_pred - y[test])**2)
    mse /= len(y)

    if return_score is True:
        score = get_score(y_p, y)
        return score

    return mse
def sor_fj_loss(x_opt,
                data,
                y,
                cv,
                jitter,
                disable_pbar=True,
                leave=False,
                kernel=None,
                compressor=None,
                strides=None,
                active_strides=None,
                stride_size=None,
                return_score=False):

    Lambda = x_opt[0]
    scaling_factors = x_opt[1:]

    compressor.to_reshape = False
    compressor.set_scaling_weights(scaling_factors)

    unlinsoaps = data[0]
    unlinsoaps_active = data[1]

    X = compressor.scale_features(unlinsoaps, stride_size)
    X_active = compressor.scale_features(unlinsoaps_active, stride_size)
    # X = compressor.transform(unlinsoaps)
    # X_active = compressor.transform(unlinsoaps_active)
    if strides is not None and active_strides is not None:
        X_active = dict(strides=active_strides, feature_matrix=X_active)
        X = dict(strides=strides, feature_matrix=X)

    kMM = kernel.transform(X_active, X_train=X_active)
    kMN = kernel.transform(X_active, X_train=X)

    Mactive, Nsample = kMN.shape

    mse = 0
    y_p = np.zeros((Nsample, ))
    scores = []
    for train, test in tqdm_cs(cv.split(y.reshape((-1, 1))),
                               total=cv.n_splits,
                               disable=disable_pbar,
                               leave=False):
        # prepare SoR kernel
        kMN_train = kMN[:, train]
        kernel_train = (kMM + np.dot(kMN_train, kMN_train.T) /
                        Lambda**2) + np.diag(np.ones(Mactive)) * jitter
        y_train = np.dot(kMN_train, y[train]) / Lambda**2

        # train the KRR model
        alpha = np.linalg.solve(kernel_train, y_train).flatten()

        # make predictions
        kernel_test = kMN[:, test]
        y_pred = np.dot(alpha, kernel_test).flatten()
        if return_score is True:
            scores.append(get_score(y_pred, y[test]))
            #y_p[test] = y_pred

        mse += np.sum((y_pred - y[test])**2)
    mse /= len(y)

    if return_score is True:
        #score = get_score(y_p,y)
        score = {}
        for k in scores[0]:
            aa = []
            for sc in scores:
                aa.append(sc[k])
            score[k] = np.mean(aa)
        return score

    return mse
Example #5
0
#trainer = TrainerCholesky(memory_efficient=False)
cv = EnvironmentalKFold(n_splits=10,
                        random_state=10,
                        shuffle=True,
                        mapping=env_mapping)
jitter = 1e-8

trainer = TrainerCholesky(memory_efficient=False)

scores = []

deltas = [1, 1e-1, 1e-2, 1e-3]
Lambdas = [2, 1, 0.7, 0.5, 0.1]
N_active_samples = [3000, 5000, 10000, 15000, 20000]

for delta in tqdm_cs(deltas, desc='delta'):
    krr = KRR(jitter, delta, trainer)
    for N_active_sample in tqdm_cs(N_active_samples,
                                   desc='N_active_sample',
                                   leave=False):
        active_ids = fps_ids[:N_active_sample]
        kMM = Kmat[np.ix_(active_ids, active_ids)]
        for Lambda in tqdm_cs(Lambdas, desc='Lambda', leave=False):
            preds = []
            y_pred = np.zeros(y_train.shape)
            for train, test in tqdm_cs(cv.split(Kmat),
                                       desc='cv',
                                       total=cv.n_splits,
                                       leave=False):
                kMN = Kmat[np.ix_(active_ids, train)]
                ## assumes Lambda= Lambda**2*np.diag(np.ones(n))
Example #6
0
            Nsample = Kmat.shape[1]
            kMM = Kmat[np.ix_(active_ids,active_ids)]
            kMN = Kmat[active_ids]
        else:
            params,Kmat = load_data(Kmat_fn,mmap_mode=None)
        Nsample = Kmat.shape[0]


    # trainer = TrainerCholesky(memory_efficient=True)
    # model = KRR(jitter,delta,trainer)
    lc = LCSplit(shuffler, **lc_params)

    scores = []
    results = dict(input_params=inp,results=[])
    ii = 0
    for train,test in tqdm_cs(lc.split(y.reshape((-1,1))),total=lc.n_splits,desc='LC'):
        if ii >= start_from_iter:
            if is_SoR is True:
                Mactive = kMN.shape[0]
                kMN_train =  kMN[:,train]
                k_train = kMM + np.dot(kMN_train,kMN_train.T)/Lambda**2 + np.diag(np.ones(Mactive))*jitter
                y_train = np.dot(kMN_train,y[train])/Lambda**2
                k_test = kMN[:,test]
            else:
                Ntrain = len(train)
                k_train = Kmat[np.ix_(train,train)] + np.diag(np.ones(Ntrain))*jitter
                y_train = y[train]
                k_test = Kmat[np.ix_(train,test)]

            alpha = np.linalg.solve(k_train, y_train).flatten()
            y_pred = np.dot(alpha,k_test).flatten()
Example #7
0
    #############################################

    fps_ids = params['fps_ids']
    soap_params = params['soap_params']
    kernel_params = params['kernel_params']
    env_mapping = params['env_mapping']

    kernel = KernelPower(**kernel_params)

    cv = EnvironmentalKFold(n_splits=10,
                            random_state=10,
                            shuffle=True,
                            mapping=env_mapping)
    jitter = 1e-8

    scores = []
    preds = []

    for delta in tqdm_cs(deltas):
        krr = KRRFastCV(jitter, delta, cv)
        _, Kmat = load_data(kernel_fn, mmap_mode=None)
        krr.fit(Kmat, y_train)
        y_pred = krr.predict()
        sc = get_score(y_pred, y_train)
        sc.update(dict(delta=delta, y_pred=y_pred, y_true=y_train))
        scores.append(sc)
        preds.append(y_pred)

    df = pd.DataFrame(scores)

    df.to_json(out_fn)