Ejemplo n.º 1
0
def mainregress(selection, alpha):
    if len(selection) < 2:
        return

    x = xdown.get()['value']
    y = ydown.get()['value']

    tabdata = []
    mldatax = []
    mldatay = []
    species = iris.Species.unique()
    for i, p in enumerate(selection['points']):
        mldatax.append(p['x'])
        mldatay.append(p['y'])
        tabdata.append({
            x: p['x'],
            y: p['y'],
            'species': species[p['curve']]
        })


    X = np.c_[mldatax, np.array(mldatax) ** 2]
    ridge = KernelRidge(alpha=alpha).fit(X, mldatay)

    xspace = np.linspace(min(mldatax)-1, max(mldatax)+1, 100)

    plot = pw.scatter(mldatax, mldatay, label='train', markersize=15)
    for i, df in iris.groupby('Species'):
        plot += pw.scatter(df[x], df[y], label=i)
    plot += pw.line(xspace, ridge.predict(np.c_[xspace, xspace**2]), label='model', mode='lines')
    plot.xlabel = x
    plot.ylabel = y
    linear.do_all(plot.dict)
    table1.do_data(pd.DataFrame(tabdata))
Ejemplo n.º 2
0
def test_kernel_ridge_singular_kernel():
    # alpha=0 causes a LinAlgError in computing the dual coefficients,
    # which causes a fallback to a lstsq solver. This is tested here.
    pred = Ridge(alpha=0, fit_intercept=False).fit(X, y).predict(X)
    kr = KernelRidge(kernel="linear", alpha=0)
    ignore_warnings(kr.fit)(X, y)
    pred2 = kr.predict(X)
    assert_array_almost_equal(pred, pred2)
 def fit( self, x , y , size=1 ):
   self.model = Sequential()
   self.model.add(Dense( int( embeddings_dim / 2.0 ) , input_dim=embeddings_dim , init='uniform' , activation='tanh'))
   self.model.add(Dense( int( embeddings_dim / 4.0 ) , init='uniform' , activation='tanh'))
   self.model.add(Dense(size , init='uniform' ) )
   self.model.compile(loss='mse', optimizer='rmsprop')
   self.model = KernelRidge( kernel='rbf' )
   self.model.fit( x , y )
Ejemplo n.º 4
0
def train_kernelRidgeModel(X, y, alpha=1, kernel="linear", gamma=None, degree=3, coef0=1, kernel_params=None):
    """
    Train a kernel ridge regression model
    """
    model = KernelRidge(
        alpha=alpha, kernel=kernel, gamma=gamma, degree=degree, coef0=coef0, kernel_params=kernel_params
    )
    model = model.fit(X, y)
    return model
Ejemplo n.º 5
0
def lgo_sklearn(X,y, groups, regparam):
    logo = LeaveOneGroupOut()
    errors = []
    for train, test in logo.split(X, y, groups=groups):
        rls = KernelRidge(kernel="rbf", gamma=0.01)
        rls.fit(X[train], y[train])
        p = rls.predict(X[test])
        e = sqerror(y[test], p)       
        errors.append(e)
    return np.mean(errors)
Ejemplo n.º 6
0
def lpo_sklearn(X,y, regparam):
    lpo = LeavePOut(p=2)
    preda = []
    predb = []
    for train, test in lpo.split(X):
        rls = KernelRidge(kernel="rbf", gamma=0.01)
        rls.fit(X[train], y[train])
        p = rls.predict(X[test])
        preda.append(p[0])
        predb.append(p[1])
    return preda, predb
Ejemplo n.º 7
0
    def ANM_causation_score(self,train_size=0.5,independence_criterion='HSIC',metric='linear',regression_method='GP'):
        '''
            Measure how likely a given causal direction is true

            Parameters
            ----------
            train_size :
                Fraction of given data used to training phase

            independence_criterion :
                kruskal for Kruskal-Wallis H-test,
                HSIC for Hilbert-Schmidt Independence Criterion

            metric :
                linear, sigmoid, rbf, poly
                kernel function to compute gramm matrix for HSIC
                gaussian kernel is used in :
                Nonlinear causal discovery with additive noise models
                Patrik O. Hoyer et. al

            Returns
            -------
            causal_strength: A float between 0. and 1.
        '''
        Xtrain, Xtest , Ytrain, Ytest = train_test_split(self.X, self.Y, train_size = train_size)
        if regression_method == 'GP':
            _gp = pyGPs.GPR()      # specify model (GP regression)
            _gp.getPosterior(Xtrain, Ytrain) # fit default model (mean zero & rbf kernel) with data
            _gp.optimize(Xtrain, Ytrain)     # optimize hyperparamters (default optimizer: single run minimize)

            #Forward case
            #_gp = KernelRidge(kernel='sigmoid',degree=3)
            #_gp.fit(Xtrain,Ytrain)
            ym, ys2, fm, fs2, lp = _gp.predict(Xtest)
            #_gp.plot()
            #errors_forward = _gp.predict(Xtest) - Ytest
            errors_forward = ym - Ytest
        else:
            _gp = KernelRidge(kernel='sigmoid')
            _gp.fit(Xtrain, Ytrain)
            errors_forward = _gp.predict(Xtest) - Ytest

        #Independence score

        forward_indep_pval = {
            'kruskal': kruskal(errors_forward,Xtest)[1],
            'HSIC': self.HilbertSchmidtNormIC(errors_forward,Xtest,metric=metric)[1]
        }[independence_criterion]


        return {'causal_strength':forward_indep_pval}
Ejemplo n.º 8
0
    def fit(self, Ks, y, holdout=None):
        """Learn weights for kernel matrices or Kinterfaces.

        :param Ks: (``list``) of (``numpy.ndarray``) or of (``Kinterface``) to be aligned.

        :param y: (``numpy.ndarray``) Class labels :math:`y_i \in {-1, 1}` or regression targets.

        :param holdout: (``list``) List of indices to exlude from alignment.
        """

        # Expand kernel interfaces to kernel matrices
        expand = lambda K: K[:, :] if isinstance(K, Kinterface) else K
        Hs     = map(expand, Ks)

        # Assert correct dimensions
        assert Ks[0].shape[0] == len(y)

        # Fit MKL model
        if self.method in self.supervised:
            self.mkl_model.fit(Hs, y, holdout=holdout)
        else:
            self.mkl_model.fit(Hs)

        if self.low_rank:
            self.X = hstack(map(lambda e: sqrt(e[0]) * e[1],
                                zip(self.mkl_model.mu, Hs)))

            if self.method in self.centered:
                self.X = center_kernel_low_rank(self.X)
                self.X[where(isnan(self.X))] = 0

            # Fit ridge model with given lbd and MKL model
            self.ridge = KernelRidge(alpha=self.lbd,
                                     kernel="linear", )

            # Fit ridge on the examples minus the holdout set
            inxs = list(set(range(Hs[0].shape[0])) - set(holdout))
            self.ridge.fit(self.X[inxs], y[inxs])
            self.trained = True

        else:
            # Fit ridge model with given lbd and MKL model
            self.ridge = KernelRidge(alpha=self.lbd,
                                     kernel=self.mkl_model, )

            # Fit ridge on the examples minus the holdout set
            inxs = array(list(set(range(Hs[0].shape[0])) - set(holdout)))
            inxs = inxs.reshape((len(inxs), 1)).astype(int)
            self.ridge.fit(inxs, y[inxs])
            self.trained = True
Ejemplo n.º 9
0
def xyz_kde(xyz,gamma,N_grid=100):
    xy = xyz[:,:-1]
    z = xyz[:,-1]
    
    x_edges = np.linspace(np.min(xy[:,0]),np.max(xy[:,0]),N_grid+1)
    y_edges = np.linspace(np.min(xy[:,1]),np.max(xy[:,1]),N_grid+1)
    x_centres = np.array([x_edges[b] + (x_edges[b+1]-x_edges[b])/2 
                          for b in range(N_grid)])
    y_centres = np.array([y_edges[b] + (y_edges[b+1]-y_edges[b])/2 
                          for b in range(N_grid)])
    x_grid, y_grid = np.meshgrid(x_centres,y_centres)
    xy_grid = np.array([np.ravel(x_grid),np.ravel(y_grid)]).T
    clf = KernelRidge(kernel='rbf',gamma=gamma).fit(xy,z)
    H = clf.predict(xy_grid).reshape(N_grid,N_grid)
    return H, x_grid, y_grid, gamma
Ejemplo n.º 10
0
def plot_kernel_ridge(X, y, gamma=0.5, alpha=0.1):
    # kernel (ridge) regression
    krr = KernelRidge(kernel="rbf", gamma=gamma, alpha=alpha);
    krr.fit(X,y);

    # predict
    x_plot = np.linspace(min(X), max(X), 100)[:,np.newaxis];
    y_plot = krr.predict(x_plot);

    # plot
    plt.figure(figsize=(8,4.8));
    plt.plot(X, y, 'or');
    plt.plot(x_plot, y_plot)
#     plt.title(r"Gaussian Kernel ($\gamma=%0.2f, \alpha=%0.2f$)" % (gamma,alpha), fontsize=16)
    plt.title(r"Gaussian Kernel ($\gamma=%0.2f$)" % (gamma), fontsize=16)
def modelfitOne(train_X, train_y, test_X, yd, ImageId, FeatureName):
    n_clf = 1
    # 拟合器
    clf = KernelRidge(kernel='rbf', gamma=6e-4, alpha=2e-2)
    # 训练
    print('-----------------开始训练...------------------')
    clf.fit(train_X, train_y)
    # 预测
    print('-----------------开始预测...------------------')
    pred = clf.predict(test_X)
    predicted = np.zeros(len(FeatureName))
    for i in range(len(FeatureName)):
        if i % 500 == 0:
            print('i =', i)
        else:
            pass
        imageID = ImageId[i]
        clfID = yd[FeatureName[i]]
        predicted[i] = pred[imageID, clfID]
    predicted = predicted*48.+48.
    return predicted
Ejemplo n.º 12
0
# ##Base models

# -  **LASSO  Regression**  :
#
# This model may be very sensitive to outliers. So we need to made it more robust on them. For that we use the sklearn's  **Robustscaler()**  method on pipeline
lasso = make_pipeline(RobustScaler(), Lasso(alpha=0.0005, random_state=1))

# - **Elastic Net Regression** :
#
# again made robust to outliers
ENet = make_pipeline(RobustScaler(),
                     ElasticNet(alpha=0.0005, l1_ratio=.9, random_state=3))

# - **Kernel Ridge Regression** :
KRR = KernelRidge(alpha=0.6, kernel='polynomial', degree=2, coef0=2.5)

# - **Gradient Boosting Regression** :
#
# With **huber**  loss that makes it robust to outliers
#
GBoost = GradientBoostingRegressor(n_estimators=3000,
                                   learning_rate=0.05,
                                   max_depth=4,
                                   max_features='sqrt',
                                   min_samples_leaf=15,
                                   min_samples_split=10,
                                   loss='huber',
                                   random_state=5)

# - **XGBoost** :
Ejemplo n.º 13
0
def ml_param_scan(x_datafile,
                  y_datafile,
                  ids,
                  testfiles,
                  alpha_list=np.logspace(-1, -8, 8),
                  gamma_list=np.logspace(-2, -10, 9),
                  kernel_list=['rbf'],
                  layer_list=[(40, 40, 40)],
                  learning_rate_list=[0.001],
                  sample_size=0.1,
                  ml_method="krr"):
    print('model ' + ml_method)
    # load, split and scale data
    x_train, x_test, y_train, y_test, ids_train, ids_test = load_split_scale_data(
        x_datafile, y_datafile, ids, testfiles, sample_size)

    if ml_method == "krr":
        # Create kernel linear ridge regression object
        learner = GridSearchCV(KernelRidge(kernel='rbf'),
                               n_jobs=8,
                               cv=5,
                               param_grid={
                                   "alpha": alpha_list,
                                   "gamma": gamma_list,
                                   "kernel": kernel_list
                               },
                               scoring='neg_mean_absolute_error')

    elif ml_method == "mlp":
        # Create Multi-Layer Perceptron object
        learner = GridSearchCV(MLPRegressor(hidden_layer_sizes=(40, 40, 40),
                                            max_iter=1600,
                                            alpha=0.001,
                                            learning_rate_init=0.001),
                               n_jobs=8,
                               cv=5,
                               param_grid={
                                   "alpha": alpha_list,
                                   "learning_rate_init": learning_rate_list,
                                   "hidden_layer_sizes": layer_list
                               },
                               scoring='neg_mean_absolute_error')
    else:
        print("ML method unknown. Exiting.")
        exit(1)
    t_ml0 = time.time()
    learner.fit(x_train, y_train)
    t_ml1 = time.time()
    print("ml time", str(t_ml1 - t_ml0))

    # getting best parameters
    learner_best = learner.best_estimator_

    mae, mse, y_pred, train_y_pred, learner_best = predict_and_error(
        learner_best, x_test, x_train, y_test)

    ### OUTPUT ###
    write_output(learner, sample_size, ml_method, mae, mse, "param", ids_test,
                 y_test, y_pred, ids_train, y_train, train_y_pred)

    return learner.best_params_
Ejemplo n.º 14
0
    def trainModel(self, s=None, a=None):
        """
        Trains model on given states and actions.
        Uses neural net or SVM based on global
        settings.
        """
        states, actions = self.states[3:], self.actions[3:]
        #print "states.shape"
        #print states.shape
        #print "actions.shape"
        #print actions.shape

        if len(self.itr) == 0:
            self.itr = np.array([states.shape[0]])
        else:
            self.itr = np.hstack((self.itr, states.shape[0]))

        '''if states.shape[0] > 2700.0:
            f = os.path.join(self.path, 'statesToValidate.npy')
            np.save(f, states)
            IPython.embed()'''

        
        fits = []

        #actions = actions.ravel()
        self.clf = KernelRidge(alpha=1.0)
        self.clf.kernel = 'rbf'
        print "SIZE: ", states.shape
        self.clf.fit(states, actions)
        #IPython.embed()
        actions_pred = self.clf.predict(states)
        bad_state = np.zeros(actions_pred.shape[0])
        for i in range(actions_pred.shape[0]):
            fit =  LA.norm(actions_pred[i,:] - actions[i,:])
            fits.append(fit)

        med = np.median(np.array(fits))
        for fit in fits:
            if(fit>med):
                bad_state[i] = 1

        IPython.embed()

        if self.useSHIV:
            self.labels = np.zeros(states.shape[0])+1.0
            self.scaler = preprocessing.StandardScaler().fit(states)
            states_proc = self.scaler.transform(states)
            
            good_labels = bad_state == 0.0         
            states_g = states_proc[good_labels,:] 

            bad_labels = bad_state == 1.0 
            states_b = states_proc[bad_labels,:] 
            #IPython.embed()
            self.ahqp_solver_g.assembleKernel(states_g, np.zeros(states_g.shape[0])+1.0)
            self.ahqp_solver_b.assembleKernel(states_b, np.zeros(states_b.shape[0])+1.0)
            #IPython.embed()
            self.ahqp_solver_g.solveQP()
            self.ahqp_solver_b.solveQP()

            #score = self.clf.score(states, actions)
            #print score
        
        self.plot(fits, states, med)
            if t == start:
                Xr = abs(x1 - x2)
            else:
                Xr = hstack((Xr, abs(x1 - x2)))
        y = zeros((nono, dt))
        y[cho, :] = 1  #blurred

        if i == 0 and cho == 0:
            X = Xr
            Y = y
        else:
            X = hstack((X, Xr))
            Y = hstack((Y, y))

clf = KernelRidge(alpha=1)
clf.fit(X.T, Y.T)
Wpred = dot(dot(Y, X.T),
            linalg.inv(dot(X, X.T) + err * eye(dot(X, X.T).shape[0])))
print('yay')

#prediction state

correct = 0
wrong = 0
lout = np.where(test_set[1] > 5)[0]
vs = []
rot = []
blur = []
scale = []
diff = []
            
            if varNames[variable] == 'eccentricity':
                predictor = predictor[~np.isinf(predictand)]
                predictand = predictand[~np.isinf(predictand)]
                
            # Prediction grid
            predictor_grid = np.linspace(np.min(predictor), np.max(predictor), 1000)
            
            #### KERNEL RIDGE REGRESSION
            alphaVec = [0.1, 0.01]
            sigmaVec = np.arange(5.0, 5.5, 0.5)
            
            if len(alphaVec) > 1 or len(sigmaVec) > 1:
                # Grid search of parameters
                param_grid = {"alpha": alphaVec, "kernel": [RBF(length_scale) for length_scale in sigmaVec]}
                kr = KernelRidge()
                kr = GridSearchCV(KernelRidge(), cv=5, param_grid=param_grid)
            else:
                # Run with pre-defined parameter set
                kr = KernelRidge(alpha=alphaVec[0], kernel='rbf', gamma=sigmaVec[0])
            
            # Fit model
            kr.fit(predictor.reshape(-1,1), predictand.reshape(-1,1))
            
            # Get best parameters
            bestAlpha_kr = kr.best_params_['alpha']
            bestSigma_kr = kr.best_params_['kernel'].length_scale

            # Predict over grid
            kr_fit = kr.predict(predictor_grid.reshape(-1,1))
            
test_labels = [ label for ( txt , label ) in data[train_size:-1] ]
tokenizer = Tokenizer(nb_words=max_features, filters=keras.preprocessing.text.base_filter(), lower=True, split=" ")
tokenizer.fit_on_texts(train_texts)
train_sequences = sequence.pad_sequences( tokenizer.texts_to_sequences( train_texts ) , maxlen=max_sent_len )
test_sequences = sequence.pad_sequences( tokenizer.texts_to_sequences( test_texts ) , maxlen=max_sent_len )
train_matrix = tokenizer.texts_to_matrix( train_texts )
test_matrix = tokenizer.texts_to_matrix( test_texts )
embedding_weights = np.zeros( ( max_features , embeddings_dim ) )
for word,index in tokenizer.word_index.items():
  if index < max_features:
    try: embedding_weights[index,:] = embeddings[word]
    except: embedding_weights[index,:] = np.random.rand( 1 , embeddings_dim )

print ("")
print ("Method = Linear ridge regression with bag-of-words features")
model = KernelRidge( kernel='linear' )
model.fit( train_matrix , train_labels )
results = model.predict( test_matrix )
if not(is_geocoding): 
  print ("RMSE = " + repr( np.sqrt(mean_squared_error( test_labels , results )) ) )
  print ("MAE = " + repr( mean_absolute_error( test_labels , results ) ) )
else: 
  print ("Mean error = " + repr( np.mean( [ geodistance( results[i] , test_labels[i] ) for i in range(results.shape[0]) ] ) ) )
  print ("Median error = " + repr( np.median( [ geodistance( results[i] , test_labels[i] ) for i in range(results.shape[0]) ] ) ) )

print ("")
print ("Method = MLP with bag-of-words features")
np.random.seed(0)
model = Sequential()
model.add(Dense(embeddings_dim, input_dim=train_matrix.shape[1], init='uniform', activation='relu'))
model.add(Dropout(0.25))
Ejemplo n.º 18
0
Y = (2 + np.sin(X) + noisesigma * np.random.randn(22, 1))
#Y[[5,10,15],0] = 2 * Y[[5,10,15],0]

#Testing Set
Xp = np.zeros((110,1))
Xp[:,0] = np.arange(0,11,.1)
Yp = (2 + np.sin(Xp))


# Linear Regression
reglr = linear_model.LinearRegression()
reglr.fit(X,Y)
Ylr = reglr.predict(Xp)

# Kernel Ridge Regression
regkr = KernelRidge(kernel='rbf', gamma=0.1,alpha=0.1)
regkr.fit(X,Y)
Ykr = regkr.predict(Xp)

# Kernel Regression
Yp1 = kernelregress(np.hstack((X,Y)),Xp,10)
Yp2 = kernelregress(np.hstack((X,Y)),Xp,1)

# Decision Tree Regressor
min_samples_split = 3
regtree = tree.DecisionTreeRegressor(min_samples_split=min_samples_split)
regtree = regtree.fit(X, Y)
Ytree = regtree.predict(Xp)


plt.plot(X,Y,'go',label='true')
Ejemplo n.º 19
0
def main():
    
    T = 300.0 # Simulation temperature
    dt = 1 * units.fs # MD timestep
    nsteps = 100000 # MD number of steps
    mixing = [1,-1,0] # [1.0, -1.0, 0.3] # mixing weights for "real" and ML forces
    lengthscale = 0.5 # KRR Gaussian width.
    gamma = 1 / (2 * lengthscale**2)
    grid_spacing = 0.05
    #     mlmodel = GaussianProcess(corr='squared_exponential', 
    #         # theta0=1e-1, thetaL=1e-4, thetaU=1e+2,
    #         theta0=1., 
    #         random_start=100, normalize=False, nugget=1.0e-2)
    mlmodel = KernelRidge(kernel='rbf', 
                          gamma=gamma, gammaL = gamma/4, gammaU=2*gamma,
                           alpha=1.0e-2, variable_noise=False, max_lhood=False)
    anglerange = sp.arange(0, 2*sp.pi + grid_spacing, grid_spacing)
    X_grid = sp.array([[sp.array([x,y]) for x in anglerange]
                       for y in anglerange]).reshape((len(anglerange)**2, 2))
    ext_field = None # IgnoranceField(X_grid, y_threshold=-0.075, cutoff = 3.)
                           
    # Bootstrap from initial database? uncomment
    # data = sp.loadtxt('phi_psi_F.csv')
    # # data[:,:2] -= 0.025 # fix because of old round_vector routine
    # mlmodel.fit(data[:,:2], data[:,2])
    # ext_field.update_cost(mlmodel.X_fit_, mlmodel.y)

    # mlmodel.fit(X_grid, sp.zeros(len(X_grid)))
    # mlmodel.fit(sp.load('X_fitD.npy'), sp.load('y_fitD.npy'))
    
    # Prepare diagnostic visual effects.
    plt.close('all')
    plt.ion()
    fig, ax = plt.subplots(1, 2, figsize=(24, 13))
    
    atoms = ase.io.read('myplum.xyz')
    with open('data.input', 'r') as file:
        lammpsdata = file.readlines()

    # Set temperature
    MaxwellBoltzmannDistribution(atoms, 0.5 * units.kB * T, force_temp=True)
    # Set total momentum to zero
    p = atoms.get_momenta()
    p -= p.sum(axis=0) / len(atoms)
    atoms.set_momenta(p)
    atoms.rescale_velocities(T)
    
    # Select MD propagator
    mdpropagator = Langevin(atoms, dt, T*units.kB, 1.0e-2, fixcm=True)
    # mdpropagator = MLVerlet(atoms, dt, T)

    # Zero-timestep evaluation and data files setup.
    print("START")
    pot_energy, f = calc_lammps(atoms, preloaded_data=lammpsdata)
    mlmodel.accumulate_data(round_vector(atoms.colvars(), precision=grid_spacing), 0.)
    # mlmodel.accumulate_data(round_vector(atoms.colvars(), precision=grid_spacing), pot_energy)
    printenergy(atoms, pot_energy)
    try:
        os.remove('atomstraj.xyz')
    except:
        pass
    traj = open("atomstraj.xyz", 'a')
    atoms.write(traj, format='extxyz')
    results, traj_buffer = [], []

    # When in the simulation to update the ML fit -- optional.
    teaching_points = sp.unique((sp.linspace(0, nsteps**(1/3), nsteps/20)**3).astype('int') + 1)

    # MD Loop
    for istep in range(nsteps):
        # Flush Cholesky decomposition of K
        if istep % 1000 == 0:
            mlmodel.Cho_L = None
            mlmodel.max_lhood = False
        print("Dihedral angles | phi = %.3f, psi = %.3f " % (atoms.phi(), atoms.psi()))
        do_update = (istep % 60 == 59)
        t = get_time()
        mdpropagator.halfstep_1of2(f)
        print("TIMER 001 | %.3f" % (get_time() - t))
        t = get_time()
        f, pot_energy, _ = get_all_forces(atoms, mlmodel, grid_spacing, T, extfield=ext_field, mixing=mixing, lammpsdata=lammpsdata, do_update=do_update)
        if do_update and mlmodel.max_lhood:
            mlmodel.max_lhood = False
        mdpropagator.halfstep_2of2(f)
        print("TIMER 002 | %.3f" % (get_time() - t))


        # manual cooldown!!!
        if sp.absolute(atoms.get_kinetic_energy() / (1.5 * units.kB * atoms.get_number_of_atoms()) - T) > 100:
            atoms.rescale_velocities(T)

        printenergy(atoms, pot_energy/atoms.get_number_of_atoms(), step=istep)
        # if do_update:
        #     try:
        #         print("Lengthscale = %.3e, Noise = %.3e" % (1/(2 * mlmodel.gamma)**0.5, mlmodel.noise.mean()))
        #     except:
        #         print("")
        if istep % 60 == 59:
            t = get_time()
            if 'datasetplot' not in locals():
                datasetplot = pl.Plot_datapts(ax[0], mlmodel)
            else:
                datasetplot.update()
            if hasattr(mlmodel, 'dual_coef_'):
                if 'my2dplot' not in locals():
                    my2dplot = pl.Plot_energy_n_point(ax[1], mlmodel, atoms.colvars().ravel())
                else:
                    my2dplot.update_prediction()
                    my2dplot.update_current_point(atoms.colvars().ravel())
            print("TIMER 003 | %.03f" % (get_time() - t))
            t = get_time()
            fig.canvas.draw()
            print("TIMER 004 | %.03f" % (get_time() - t))
            # fig.canvas.print_figure('current.png')
        t = get_time()
        # traj_buffer.append(atoms.copy())
        # if istep % 100 == 0:
        #     for at in traj_buffer:
        #         atoms.write(traj, format='extxyz')
        #     traj_buffer = []
        results.append(sp.array([atoms.phi(), atoms.psi(), pot_energy]))
        print("TIMER 005 | %.03f" % (get_time() - t))        
    traj.close()
    print("FINISHED")
    sp.savetxt('results.csv', sp.array(results))
    sp.savetxt('mlmodel.dual_coef_.csv', mlmodel.dual_coef_)
    sp.savetxt('mlmodel.X_fit_.csv', mlmodel.X_fit_)
    sp.savetxt('mlmodel.y.csv', mlmodel.y)
    calc = None
    
    return mlmodel
Ejemplo n.º 20
0
input_dim = 12
M = 15
alpha = 1.5848931924611107e-05
gamma = 0.14174741629268056

M = 200
alpha = 2.511886431509572e-09
gamma = 0.06579332246575682

#print(np.max(pos))
[train_set, test_set, train_ens, test_ens, train_fours,
 test_fours] = get_train_test(M, N, input_dim, pos, ens, fours)
print(len(train_set))
print(type(train_set))
kr = KernelRidge(kernel='rbf', alpha=alpha, gamma=gamma)
AlKRR = fit_quick(train_set, train_ens, alpha=alpha, gamma=gamma)
AlKRR.original_train_set = train_set
AlKRR.original_train_ens = train_ens

# In[32]:

# choose hyperparameter grids
alphas = np.logspace(-20, -1, 6)
gammas = np.logspace(-2, 1, 100)

for n in range(len(Ms)):
    M = Ms[n]

    # split into test and training
    [train_set, test_set, train_ens, test_ens, train_fours,
Ejemplo n.º 21
0
def relationship_road_traffic_accidents():
    accidents = glob.glob('accident/*.csv')

    acc = 0
    for a in accidents:
        acc = ReadAccident.Accident(a)

    toronto_traffic = pd.read_csv('traffic/traffic-vehicle.csv')

    # Relationship between peak vehicle volume and # of accidents at that intersection

    shp_files = glob.glob('shapefiles/*.shp')

    shp_data_objs = []

    for shp in shp_files:
        print(shp)
        shp_obj = ReadSHP.ReadSHPFile(shp, shp)
        shp_data_objs.append(shp_obj)

    data = acc.data

    intersec_id = {}
    other_xs = {}

    print('Running')

    for i in range(len(data)):
        long = data[i].long
        lat = data[i].lat
        fatal = data[i].fatal

        min_index = 0
        min_dist = math.sqrt(
            math.pow(long - toronto_traffic.loc[0, 'Longitude'], 2) +
            math.pow(lat - toronto_traffic.loc[0, 'Latitude'], 2))
        for j in range(1, len(toronto_traffic.index.values)):
            dist = math.sqrt(
                math.pow(long - toronto_traffic.loc[j, 'Longitude'], 2) +
                math.pow(lat - toronto_traffic.loc[j, 'Latitude'], 2))
            if dist < min_dist:
                min_dist = dist
                min_index = j

        if min_index not in intersec_id:
            intersec_id[min_index] = 1
        else:
            intersec_id[min_index] += 1

        if min_index not in other_xs:
            missing_xs = []
            for s in shp_data_objs:
                missing_xs.append(
                    s.binary_search(
                        toronto_traffic.loc[min_index, 'Longitude'],
                        toronto_traffic.loc[min_index, 'Latitude']))
            other_xs[min_index] = missing_xs

    xs = []
    ys = []
    for j in intersec_id:
        dt = [toronto_traffic.loc[j, '8 Peak Hr Vehicle Volume']]
        dt.extend(other_xs[j])
        xs.append(dt)
        ys.append(intersec_id[j])

    print(xs)
    xs = np.array(xs)
    ys = np.array(ys)

    # xs = sm.add_constant(xs)

    model = sm.OLS(ys, xs).fit()

    print(model.summary())
    print(model.params)

    clf = KernelRidge(alpha=1.0)
    clf.fit(xs, ys)

    file = open('k_reg.pickle', 'wb')
    pickle.dump(clf, file)
    print(clf.get_params())
# Add noise to targets
y[::5] += 3 * (0.5 - rng.rand(X.shape[0] // 5))

X_plot = np.linspace(0, 5, 100000)[:, np.newaxis]

# #############################################################################
# Fit regression model
train_size = 100
svr = GridSearchCV(SVR(kernel='rbf', gamma=0.01),
                   cv=5,
                   param_grid={
                       "C": [1e0, 1e1, 1e2, 1e3],
                       "gamma": np.logspace(-2, 2, 5)
                   })
kr = GridSearchCV(KernelRidge(kernel='rbf', gamma=0.1),
                  cv=5,
                  param_grid={
                      "alpha": [1e0, 1e-1, 1e-2, 1e-3],
                      "gamma": np.logspace(-2, 2, 5)
                  })

t0 = time.time()
svr.fit(X[:train_size], y[:train_size])
svr_fit = time.time() - t0
print("SVR complexity and bandwidth selected and model fitted in %.3f s" %
      svr_fit)

t0 = time.time()
kr.fit(X[:train_size], y[:train_size])
kr_fit = time.time() - t0
Ejemplo n.º 23
0
#
# LinearRegression
# Ridge
# Lasso
# Random Forrest
# Gradient Boosting Tree
# Support Vector Regression
# Linear Support Vector Regression
# ElasticNet
# Stochastic Gradient Descent
# BayesianRidge
# KernelRidge
# ExtraTreesRegressor
# XgBoost
models = [LinearRegression(),Ridge(),Lasso(alpha=0.01,max_iter=10000),RandomForestRegressor(),GradientBoostingRegressor(),SVR(),LinearSVR(),
          ElasticNet(alpha=0.001,max_iter=10000),SGDRegressor(max_iter=1000,tol=1e-3),BayesianRidge(),KernelRidge(alpha=0.6, kernel='polynomial', degree=2, coef0=2.5),
          ExtraTreesRegressor(),XGBRegressor()]

names = ["LR", "Ridge", "Lasso", "RF", "GBR", "SVR", "LinSVR", "Ela","SGD","Bay","Ker","Extra","Xgb"]
# for name, model in zip(names, models):
#     score = rmse_cv(model, X_scaled, y_log)
#     print("{}: {:.6f}, {:.4f}".format(name,score.mean(),score.std()))

# grid函数寻找最优参数
class grid():
    def __init__(self, model):
        self.model = model

    def grid_get(self, X, y, param_grid):
        grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring="neg_mean_squared_error")
        grid_search.fit(X, y)
Ejemplo n.º 24
0
def krr():
    # KRR
    # linear
    print ()
    tl = time.time()
    #alphas = [0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]
    alphas = [1.0]
    lin_krr = KernelRidge(alpha=alphas, kernel='linear')
    hyperparams = {'alpha' : alphas}
    
    lin_krr_clf = GridSearchCV(lin_krr, hyperparams, cv=5)
    
    lin_krr_fit = lin_krr_clf.fit(cv_train_X, cv_train_Y)
    lin_krr_res = lin_krr_clf.cv_results_
    lin_krr_params = lin_krr_clf.best_params_
    lin_krr_score = lin_krr_clf.best_score_
    
    
    test_pred = lin_krr_clf.predict(test_X)
    # The Coefficients
    print('Test Estimator : \n', lin_krr_clf.best_estimator_)
    # Mean Squared Error
    mse = float(m_s_e(test_Y, test_pred))
    print('Test Mean Squared Error : %f' % mse)
    # Root Mean Squared Error
    rmse = float(np.sqrt(m_s_e(test_Y, test_pred)))
    print('Test Root Mean Squared Error : %f' % rmse)
    # R^2 (Coefficient of Determination) Regression Score
    rsq = float(r2_s(test_Y, test_pred))
    print('Test R^2 Regression Score : %f' % rsq)
    tl = time.time() - tl
    print ('Time Secs : %f' % tl)
    
    # polynomial
    print ()
    tp = time.time()
    alphas = [1.0]
    degs = [2, 4, 7] # M
    hyperparams = {'alpha' : alphas, 'degree' : degs}
    poly_krr = KernelRidge(kernel='poly', alpha=alphas, degree=degs, gamma=1, coef0=1)
    
    poly_krr_clf = GridSearchCV(poly_krr, hyperparams, cv=5)
    
    poly_krr_fit = poly_krr_clf.fit(cv_train_X, cv_train_Y)
    poly_krr_res = poly_krr_clf.cv_results_
    poly_krr_params = poly_krr_clf.best_params_
    poly_krr_score = poly_krr_clf.best_score_
    
    
    test_pred = poly_krr_clf.predict(test_X)
    # The Coefficients
    print('Test Estimator : \n', poly_krr_clf.best_estimator_)
    # Mean Squared Error
    mse = float(m_s_e(test_Y, test_pred))
    print('Test Mean Squared Error : %f' % mse)
    # Root Mean Squared Error
    rmse = float(np.sqrt(m_s_e(test_Y, test_pred)))
    print('Test Root Mean Squared Error : %f' % rmse)
    # R^2 (Coefficient of Determination) Regression Score
    rsq = float(r2_s(test_Y, test_pred))
    print('Test R^2 Regression Score : %f' % rsq)
    tp = time.time() - tp
    print ('Time Secs : %f' % tp)
    
    # gaussian/rbf
    print ()
    tg = time.time()
    alphas = [1.0]
    sigmas = [0.1, 0.5, 1.0, 2.0, 4.0]
    hyperparams = {'alpha' : alphas, 'gamma' : sigmas}
    rbf_krr = KernelRidge(kernel='rbf', alpha=alphas, gamma=sigmas)
    
    rbf_krr_clf = GridSearchCV(rbf_krr, hyperparams, cv=5)
    
    rbf_krr_fit = rbf_krr_clf.fit(cv_train_X, cv_train_Y)
    rbf_krr_res = rbf_krr_clf.cv_results_
    rbf_krr_params = rbf_krr_clf.best_params_
    rbf_krr_score = rbf_krr_clf.best_score_
    
    
    test_pred = rbf_krr_clf.predict(test_X)
    # The Coefficients
    print('Test Estimator : \n', rbf_krr_clf.best_estimator_)
    # Mean Squared Error
    mse = float(m_s_e(test_Y, test_pred))
    print('Test Mean Squared Error : %f' % mse)
    # Root Mean Squared Error
    rmse = float(np.sqrt(m_s_e(test_Y, test_pred)))
    print('Test Root Mean Squared Error : %f' % rmse)
    # R^2 (Coefficient of Determination) Regression Score
    rsq = float(r2_s(test_Y, test_pred))
    print('Test R^2 Regression Score : %f' % rsq)
    tg = time.time() - tg
    print ('Time Secs : %f' % tg)
    return None


# End of File
Ejemplo n.º 25
0
train_matrix1 = preprocessing.scale( train_matrix1 )
test_matrix1 = preprocessing.scale( test_matrix1 )
data2 = [ ( [ float(row[i]) for i in range(len(row) - 2) ] , ( float( row[ len(row) - 2 ] ) , float( row[ len(row) - 1 ] ) ) ) for row in csv.reader( open("default_plus_chromatic_features_1059_tracks.txt"), delimiter=',', quoting=csv.QUOTE_NONE) ]
np.random.seed(0)
np.random.shuffle( data2 )
train_size2 = int(len(data2) * percent)
train_matrix2 = np.array( [ features for ( features, label ) in data2[0:train_size2] ] )
test_matrix2 = np.array( [ features for ( features, label ) in data2[train_size2:-1] ] )
train_labels2 = [ label for ( features , label ) in data2[0:train_size2] ]
test_labels2 = [ label for ( features , label ) in data2[train_size2:-1] ]
train_matrix2 = preprocessing.scale( train_matrix2 )
test_matrix2 = preprocessing.scale( test_matrix2 )

print ("")
print ("Method = Linear ridge regression - Default features")
model = KernelRidge( kernel='linear' )
model.fit( train_matrix1 , train_labels1 )
results = model.predict( test_matrix1 )
print ("Mean error = " + repr( np.mean( [ geodistance( results[i] , test_labels1[i] ) for i in range(results.shape[0]) ] ) ) )
print ("Median error = " + repr( np.median( [ geodistance( results[i] , test_labels1[i] ) for i in range(results.shape[0]) ] ) ) )
print ("Method = Linear ridge regression - Default features + chromatic features")
model = KernelRidge( kernel='linear' )
model.fit( train_matrix2 , train_labels2 )
results = model.predict( test_matrix2 )
print ("Mean error = " + repr( np.mean( [ geodistance( results[i] , test_labels2[i] ) for i in range(results.shape[0]) ] ) ) )
print ("Median error = " + repr( np.median( [ geodistance( results[i] , test_labels2[i] ) for i in range(results.shape[0]) ] ) ) )

print ("")
print ("Method = Random forest regression - Default features")
model = RandomForestRegressor( n_estimators=100 , random_state=0 )
model.fit( train_matrix1 , train_labels1 )
    #print(f_vector)  #1*vector_size shape-like array
X = np.array(X).reshape(-1, vector_size)
y = np.array(y).ravel()

# In[ ]:

from sklearn.model_selection import train_test_split
from sklearn.kernel_ridge import KernelRidge

# In[ ]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# In[ ]:

clf = KernelRidge(alpha=1.0, kernel=my_kernel)
clf.fit(X_train, y_train)

# In[ ]:

train_score = clf.score(X_train, y_train)
test_score = clf.score(X_test, y_test)
print('train score: {0}; test score: {1}'.format(train_score, test_score))

# In[ ]:

from plot_learning_curve import *
title = "Learning Curves (Kernel Ridge Regression)"
cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0)
plot_learning_curve(clf, title, X, y, cv=cv, n_jobs=4)
plt.show()
diff_fano = fano[5,neuron_keep]-fano[3,neuron_keep]
diff_fano = diff_fano[~np.isnan(diff_fano)]
pna.cal_CohenD(diff_fano)










""" temp_script """
x, y = data_tuning_mean[:,:,-10:-1].ravel(), data_tuning_std[:,:,-10:-1].ravel()
kr = KernelRidge()
kr.fit(x,y)

kr = kernel_regression.KernelReg(y, x, ['c'], bw=[np.std(x)/5])
plt.plot(x,y, '.')
plt.plot(x, kr.fit(x)[0], 'o')


for i in range(data_tuning_mean.shape[0]):
    plot_kr(data_tuning_mean[i, :, -3].ravel(), data_tuning_std[i, :, -3].ravel(), color=colors[i], linestyle=linestyles[i])


""" legacy code """
data_neuro_cur = signal_align.select_signal(data_neuro_spk, chan_filter=range( 0,32), sortcode_filter=range(1,4))
data_neuro_cur = signal_align.select_signal(data_neuro_spk, chan_filter=range(33,48), sortcode_filter=range(1,4))
plt.figure()
Ejemplo n.º 28
0
#%%

model = NuSVR(gamma='scale', nu=0.9, tol=0.01)
oof_svr1, prediction_svr1 = train_model(params=None,
                                        model_type='sklearn',
                                        model=model)

#%%

params = {'loss_function': 'MAE'}
oof_cat, prediction_cat = train_model(params=params, model_type='cat')

#%%

model = KernelRidge(kernel='rbf')
oof_r, prediction_r = train_model(params=None,
                                  model_type='sklearn',
                                  model=model)

#%%

plt.figure(figsize=(18, 8))
plt.subplot(2, 3, 1)
plt.plot(y_tr, color='g', label='y_train')
plt.plot(oof_lgb, color='b', label='lgb')
plt.legend(loc=(1, 0.5))
plt.title('lgb')
plt.subplot(2, 3, 2)
plt.plot(y_tr, color='g', label='y_train')
plt.plot(oof_xgb, color='teal', label='xgb')
Ejemplo n.º 29
0
Archivo: 999.py Proyecto: memoiry/2016-
X = np.arange(194)[:,None]
y = z


X_plot = np.arange(0,210)[:,None]

#############################################################################
# Fit regression model
train_size = 18630
C = 3e6
gamma = 0.01
svr = SVR(kernel='rbf', C=C, gamma=gamma)
alpha = 0.23
gamma1 = 0.01
kr = KernelRidge(kernel='rbf', gamma=gamma1,alpha = alpha)

t0 = time.time()
svr.fit(X[:train_size], y[:train_size])
svr_fit = time.time() - t0

t0 = time.time()
kr.fit(X[:train_size], y[:train_size])
kr_fit = time.time() - t0

t0 = time.time()
y_svr = svr.predict(X_plot)
svr_predict = time.time() - t0

t0 = time.time()
y_kr = kr.predict(X_plot)
Ejemplo n.º 30
0
print(np.isnan(train_SalePrice).any())
print(np.isnan(train_data).any())
print(np.isfinite(train_data).all())
print(np.isfinite(train_SalePrice).all())

score = rmse_cv(lasso)
print("Lasso Score: ", score.mean())
score = rmse_cv(ridge)
print("ridge Score: ", score.mean())
"""output
Lasso Score:  0.13320361773268208
ridge Score:  0.21324437336842056
"""

KRR = KernelRidge()
score = rmse_cv(KRR)
print("KRR Score: ", score.mean())
"""output
KRR Score:  0.14740376549230474

"""

GB = GradientBoostingRegressor(n_estimators=3000,
                               learning_rate=0.05,
                               max_depth=4,
                               max_features='sqrt',
                               min_samples_leaf=15,
                               min_samples_split=10,
                               loss='huber',
                               random_state=5)
    sig_h[k] = np.std(Xh_tr[:, k])
    Xh_tr[:, k] = (Xh_tr[:, k] - mea_h[k]) / sig_h[k]

############## Kernel Ridge Regression ########################################
from sklearn.kernel_ridge import KernelRidge
import scipy.io as sio

mf = sio.loadmat(
    "/data/ISOTROPIC/regression/KRR_rbf_cv_alpha_gamma_sspacing4_tspacing6.mat", squeeze_me=True, struct_as_record=False
)
KRR_alpha_opt = mf["KRR_alpha_opt"]
print("Optimal alpha:", KRR_alpha_opt)
KRR_gamma_opt = mf["KRR_gamma_opt"]
print("Optimal gamma:", KRR_gamma_opt)

kr = KernelRidge(kernel="rbf", alpha=KRR_alpha_opt, gamma=KRR_gamma_opt)
kr.fit(Xl_tr, Xh_tr)


############## Prediction and save to file ####################################
import os

try:
    os.remove("/data/ISOTROPIC/data/KRR_rbf_sspacing4_tspacing6.nc")
except OSError:
    pass
ncfile2 = Dataset("/data/ISOTROPIC/data/KRR_rbf_sspacing4_tspacing6.nc", "w")

ncfile1 = Dataset("/data/ISOTROPIC/data/data_downsampled4.nc", "r")

# create the dimensions
Ejemplo n.º 32
0
def main():

    data_train, data_test = loadData()

    data_train = deleteOutlier(data_train)
    trainX = data_train.drop(['Id', 'SalePrice'], axis=1)
    trainY = data_train['SalePrice']
    trainY = np.log1p(trainY)
    ntrain = len(trainX)

    testId = data_test['Id']
    testX = data_test.drop(['Id'], axis=1)

    dataSet = pd.concat([trainX, testX], axis=0)

    dataSet = imputingMissingData(dataSet)
    dataSet = transformStr(dataSet)
    dataSet = transformSortNum(dataSet)
    dataSet = extractFeat(dataSet)
    dataSet = boxCoxFeat(dataSet)
    dataSet = dummy(dataSet)

    trainX = dataSet[:ntrain]
    testX = dataSet[ntrain:]

    models = {
        # "lgd": lgb.LGBMRegressor(objective='regression', num_leaves=5,
        #                          learning_rate=0.05, n_estimators=720,
        #                          max_bin=55, bagging_fraction=0.8,
        #                          bagging_freq=5, feature_fraction=0.2319,
        #                          feature_fraction_seed=9, bagging_seed=9,
        #                          min_data_in_leaf=6, min_sum_hessian_in_leaf=11),
        'gbdt':
        GradientBoostingRegressor(n_estimators=2000,
                                  learning_rate=0.01,
                                  max_depth=4,
                                  max_features='sqrt',
                                  min_samples_leaf=15,
                                  min_samples_split=12,
                                  loss='huber',
                                  random_state=0),
        "ridge":
        Ridge(alpha=0.05, max_iter=100),
        'krr':
        KernelRidge(alpha=0.6, kernel='polynomial', degree=2, coef0=2.5),
        'enet':
        ElasticNet(alpha=0.0005, l1_ratio=1, random_state=3),
        'lasso':
        Lasso(alpha=0.0008, max_iter=100, random_state=1),
    }
    stackReg = StackingRegressor(
        regressors=[models['gbdt'], models['krr'], models['enet']],
        meta_regressor=models['lasso'])
    modelStack1 = StackingAverageModels(base_models=(models['ridge'],
                                                     models['krr'],
                                                     models['enet']),
                                        meta_model=models['lasso'])

    modelStack2 = StackingAverageModels(
        base_models=(models['gbdt'], models['ridge'], models['krr'],
                     models['lasso']),
        meta_model=models['enet'])

    modelStack3 = StackingAverageModels(base_models=(models['gbdt'],
                                                     models['lasso'],
                                                     models['enet']),
                                        meta_model=models['krr'])

    modelStack4 = StackingAverageModels(base_models=(models['lasso'],
                                                     models['krr'],
                                                     models['enet']),
                                        meta_model=models['gbdt'])

    averageModel = AveragingModels(
        (modelStack1, modelStack2, modelStack3, modelStack4))
    # for key in models:
    #     scores, reg = train(trainX, trainY, models[key])
    #
    #     print('model: {}   scores: {}'.format(key, scores))

    # parameters = {"n_estimators": [500], "min_samples_split": [12, 15], "min_samples_leaf": [12, 15],
    #                 "max_depth": [4, 6], "random_state": [0]}
    # clf = GridSearchCV(models['gbdt'], parameters)

    scores, reg = train(trainX.values, trainY.values, modelStack2)
    test(testX.values, testId, reg)
    print(scores)
#####
regr = linear_model.LinearRegression()
scores = cross_val_score(regr, data.df[inputVariables].values, data.df['count'].values)
print("Linear Regression cross validation score: ", scores.mean())
regr.fit(X_train_sum, y_train_sum)
print("Linear Regression training score: ", regr.score(X_train_sum, y_train_sum))
print("Linear Regression testing score: ", regr.score(X_test_sum, y_test_sum))



##### Kernel Ridge and Support Vector Regression
#####
## Finding the best parameters
alpha=[1,1e-1,1e-2,1e-3]
for a in alpha:
	kr = KernelRidge(kernel='rbf', alpha=a)
	kr.fit(X_train_sum, y_train_sum)
	print("Kernel Ridge train score: ", kr.score(X_train_sum, y_train_sum), " for alpha = %s" %a)
	print("Kernel Ridge test score: ", kr.score(X_test_sum, y_test_sum), " for alpha = %s" %a)


### Using GridSearchCV
param_grid = { 
	'alpha': [1, 1e-1, 1e-2]
	"gamma": np.logspace(-2, 2, 5)
}
GSKernelRidge = GridSearchCV(KernelRidge(kernel='rbf'), param_grid=param_grid)
GSKernelRidge.fit(X_train_sum, y_train_sum)


Ejemplo n.º 34
0
import numpy as np
from sklearn.kernel_ridge import KernelRidge
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV

# rng = np.random.RandomState(0)

x = 5 * np.random.rand(100, 1)  #构成的形状为(100,1)
y = np.sin(x).ravel()

#add noise to target
y[::5] += 3 * (0.5 - np.random.rand(x.shape[0] // 5))

# kr = KernelRidge(kernel='sigmoid',gamma=0.4)
kr = GridSearchCV(KernelRidge(),
                  param_grid={
                      "kernel": ["rbf", "laplacian", "polynomial", "sigmoid"],
                      "alpha": [1e0, 0.1, 1e-2, 1e-3],
                      "gamma": np.logspace(-2, 2, 5)
                  })
kr.fit(x, y)
print(kr.best_score_, kr.best_params_)

x_plot = np.linspace(0, 5, 100)
y_kr = kr.predict(x_plot[:, None])

plt.scatter(x, y)
plt.plot(x_plot, y_kr, "r")
plt.show()
    X = X_orig[:, :-n_removed]
    Y = wine['target']

    # scale evrything
    X_scaled = preprocessing.scale(X)
    X_orig_scaled = preprocessing.scale(X_orig)

    # train test split
    X_train, X_test, Y_train, Y_test = train_test_split(X_scaled,
                                                        Y,
                                                        test_size=0.2,
                                                        random_state=5)

    # let us train the model
    kernel = RBF(length_scale=10)
    krr_model = KernelRidge(kernel=kernel, alpha=1)
    krr_model.fit(X_train, Y_train)

    # and restrict to the first coordinates
    def my_model(array):
        return krr_model.predict(array[:, :-n_removed])

    # dimension of the ambient space
    dim = X_orig.shape[1]

    # the example to explain
    xi = X_orig_scaled[0, :]

    # bandwidth parameter
    nu = 5
Ejemplo n.º 36
0
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.kernel_ridge import KernelRidge

ridg = Ridge(alpha=0.01)

#svr = SVR(kernel='poly')
svr = GridSearchCV(SVR(kernel='linear', gamma=0.1),
                   cv=5,
                   param_grid={
                       "C": [1e0, 1e1, 1e2, 1e3],
                       "gamma": np.logspace(-2, 2, 5)
                   })
kr = GridSearchCV(KernelRidge(kernel='linear', gamma=0.1),
                  cv=5,
                  param_grid={
                      "alpha": [1e0, 0.1, 1e-2, 1e-3],
                      "gamma": np.logspace(-2, 2, 5)
                  })


def get_coef(x, y, model=ridg):
    #x=np.array([-10.1, -8.9, 0, 5.2, 10.1]).reshape(-1, 1)
    #y=np.array([-19, -18.1, 2, 10, 21]).reshape(-1, 1)

    if model == ridg:
        y = np.array(y, dtype=float).reshape(-1, 1)
        x = np.array(x, dtype=float).reshape(-1, 1)
    else:
Ejemplo n.º 37
0
rng = np.random.RandomState(0)

# Generate sample data
X = 15 * rng.rand(100, 1)
y = np.sin(X).ravel()
y += 3 * (0.5 - rng.rand(X.shape[0]))  # add noise

# Fit KernelRidge with parameter selection based on 5-fold cross validation
param_grid = {
    "alpha": [1e0, 1e-1, 1e-2, 1e-3],
    "kernel": [
        ExpSineSquared(l, p) for l in np.logspace(-2, 2, 10)
        for p in np.logspace(0, 2, 10)
    ]
}
kr = GridSearchCV(KernelRidge(), param_grid=param_grid)
stime = time.time()
kr.fit(X, y)
print("Time for KRR fitting: %.3f" % (time.time() - stime))

gp_kernel = ExpSineSquared(1.0, 5.0, periodicity_bounds=(1e-2, 1e1)) \
            + WhiteKernel(1e-1)
gpr = GaussianProcessRegressor(kernel=gp_kernel)
stime = time.time()
gpr.fit(X, y)
print("Time for GPR fitting: %.3f" % (time.time() - stime))

# Predict using kernel ridge
X_plot = np.linspace(0, 20, 10000)[:, None]
stime = time.time()
y_kr = kr.predict(X_plot)
Ejemplo n.º 38
0
cutarr2 = cut_predict['x'].values
cutarr2 = cutarr2.astype(np.float64)
for x in np.nditer(cutarr2, op_flags=['readwrite']):
    x[...] -= 733000
cutarr2 = cutarr2.reshape(-1, 1)

outarr = new_cut['y'].values
outarr = outarr.astype(np.float64)
outarr = outarr.reshape(-1, 1)
outarr2 = cut_predict['y'].values
outarr2 = outarr2.astype(np.float64)
outarr2 = outarr2.reshape(-1, 1)

regressor = KernelRidge(alpha=0.005,
                        kernel='rbf',
                        gamma=0.00025,
                        degree=3,
                        coef0=1,
                        kernel_params=None)
regressor2 = SVR(C=1e3, epsilon=0.01, gamma=0.001, tol=1e-4)

regressor.fit(cutarr, outarr)

time1, time2 = [], []
for i in range(
        dt.date(2008, 2, 25).toordinal(),
        dt.date(2014, 2, 3).toordinal(), 7):
    time1 = np.append(time1, i)
for i in range(
        dt.date(2014, 10, 27).toordinal(),
        dt.date(2018, 8, 21).toordinal(), 7):
    time2 = np.append(time2, i)
Ejemplo n.º 39
0
for row in csv.DictReader(open("affective-ratings.csv")): 
  affective[ row["Word"].lower() ] = np.array( [ float( row["V.Mean.Sum"] ) , float( row["A.Mean.Sum"] ) , float( row["D.Mean.Sum"] ) ] )

# Expand dictionary of affective words
embeddings_dim = 300
max_words = 100000
embeddings = dict( )
embeddings = Word2Vec.load_word2vec_format( "GoogleNews-vectors-negative300.bin.gz" , binary=True )
train_matrix = [ ]
train_labels = [ ]
for word,scores in affective.items():
  try:
    train_matrix.append( embeddings[word] )
    train_labels.append( scores )
  except: continue
model = KernelRidge( kernel='poly' , degree=4 )
model.fit( train_matrix , train_labels )
textdata = " ".join( open(sys.argv[1] + ".revised.txt",'r').readlines( ) )
tokenizer = Tokenizer(nb_words=max_words, filters=keras.preprocessing.text.base_filter(), lower=True, split=" ")
tokenizer.fit_on_texts( textdata )
for word, index in tokenizer.word_index.items():
  try:
    if not affective.has_key(word) : affective[word] = np.array( model.predict( np.array( embedding[word] ).reshape(1, -1) )[0] )
  except: affective[word] = np.array( [ 5.0 , 5.0 , 5.0 ] )

# Process the textual contents
textdata = "" 
file1 = open(sys.argv[1] + ".revised.txt",'r')
with file1 as myfile: textdata = re.sub( ">", "&gt;" , re.sub("<" , "&lt;" , re.sub( "&" , "&amp;" , re.sub( "   +", "\n\n" , re.sub( "\t" , " ", re.sub( "\r" , "" ,  "".join( myfile.readlines() ) ) ) ) ) ) )
corenlp = StanfordCoreNLP( )
file2 = open(sys.argv[1] + ".annotated.tsv",'w')
Ejemplo n.º 40
0
# Set the parameters by cross-validation
tuned_parameters = {
    'kernel': ['rbf'],
    'coef0': [1e-4, 1e-3, 1e-2, 1e2, 1e3],
    'gamma': [1e-4, 1e-3, 1e-2, 1e2, 1e3],
    'alpha': [1e-3, 1e-2, 1e-1, 0, 1e1, 1e2, 1e3]
}
scores = ['neg_mean_absolute_error']
model = 'KRR'
for score in scores:
    #    print(kernel[i])
    print("# Tuning hyper-parameters for %s" % score)
    print()

    if args.random == True:
        clf = RandomizedSearchCV(KernelRidge(),
                                 tuned_parameters,
                                 cv=5,
                                 verbose=10,
                                 n_jobs=-1,
                                 scoring='%s' % score)
        start_rn = time.time()
        clf.fit(X_train, y_train)
        end_rn = time.time()

    if args.grid == True:
        clf = GridSearchCV(KernelRidge(),
                           tuned_parameters,
                           cv=5,
                           verbose=10,
                           n_jobs=-1,
Ejemplo n.º 41
0
if __name__=="__main__":
    #trains Kronecker RLS for different sample sizes
    #comparing CPU time and verifying that the learned
    #dual coefficients are same for both methods
    regparam = 1.0
    for size in [10, 20, 40, 60, 80, 100, 500, 1000, 2000, 4000, 6000]:
        X1, X2, y = random_data(size, 100)
        kernel1 = GaussianKernel(X1, gamma=0.01)
        K1 = kernel1.getKM(X1)
        kernel2 = GaussianKernel(X2, gamma=0.01)
        K2 = kernel2.getKM(X2)
        start = time.clock()
        rls = KronRLS(K1=K1, K2=K2, Y=y, regparam=regparam)
        dur = time.clock() - start
        print("RLScore pairs: %d, CPU time: %f" %(size**2, dur))
        #forming full Kronecker product kernel matrix becomes fast
        #unfeasible
        if size <=100:
            K = np.kron(K2, K1)
            start = time.clock()
            ridge = KernelRidge(alpha=regparam, kernel="precomputed")
            ridge.fit(K, y)
            dur = time.clock() - start
            print("sklearn pairs: %d, CPU time: %f" %(size**2, dur))
            sklearn_coef = ridge.dual_coef_
            core_coef = rls.predictor.A.reshape(K1.shape[0], K2.shape[0]).T.ravel()
            print("Are the coefficients same: %r" %np.allclose(sklearn_coef, core_coef))
        else:
            print("sklearn: too much data")
        print "*****"
Ejemplo n.º 42
0
    def fit(self):
        """Train the model with the indicated algorithm.

        Do not forget to tune the hyperparameters.

        Parameters
        ----------
        algorithm : String,
            "KernelRidge", "SVM", "LinearRegression", "Lasso", "ElasticNet", "NeuralNet", "BaggingNeuralNet", default = "SVM"

        """
        self.X_scaler.fit(self.X_train)
        self.Y_scaler.fit(self.y_train)

        # scaling the data in all cases, it may not be used during the fit later
        self.X_train_sc = self.X_scaler.transform(self.X_train)
        self.y_train_sc = self.Y_scaler.transform(self.y_train)

        self.X_test_sc = self.X_scaler.transform(self.X_test)
        self.y_test_sc = self.Y_scaler.transform(self.y_test)

        if self.algorithm == "KernelRidge":
            clf_kr = KernelRidge(kernel=self.user_kernel)
            self.model = sklearn.model_selection.GridSearchCV(
                clf_kr, cv=5, param_grid=self.param_kr)

        elif self.algorithm == "SVM":
            clf_svm = SVR(kernel=self.user_kernel)
            self.model = sklearn.model_selection.GridSearchCV(
                clf_svm, cv=5, param_grid=self.param_svm)

        elif self.algorithm == "Lasso":
            clf_lasso = sklearn.linear_model.Lasso(
                alpha=0.1, random_state=self.rand_state)
            self.model = sklearn.model_selection.GridSearchCV(
                clf_lasso, cv=5, param_grid=dict(alpha=np.logspace(-5, 5, 30)))

        elif self.algorithm == "ElasticNet":
            clf_ElasticNet = sklearn.linear_model.ElasticNet(
                alpha=0.1, l1_ratio=0.5, random_state=self.rand_state)
            self.model = sklearn.model_selection.GridSearchCV(
                clf_ElasticNet,
                cv=5,
                param_grid=dict(alpha=np.logspace(-5, 5, 30)))

        elif self.algorithm == "LinearRegression":
            self.model = sklearn.linear_model.LinearRegression()

        elif self.algorithm == "NeuralNet":
            self.model = MLPRegressor(**self.param_neurons)
        elif self.algorithm == "BaggingNeuralNet":
            nn_m = MLPRegressor(**self.param_neurons)

            self.model = BaggingRegressor(base_estimator=nn_m,
                                          **self.param_bag)

        if self.scaling == True:
            self.model.fit(self.X_train_sc, self.y_train_sc.reshape(-1, ))
            predict_train_sc = self.model.predict(self.X_train_sc)
            self.prediction_train = self.Y_scaler.inverse_transform(
                predict_train_sc.reshape(-1, 1))
            predict_test_sc = self.model.predict(self.X_test_sc)
            self.prediction_test = self.Y_scaler.inverse_transform(
                predict_test_sc.reshape(-1, 1))
        else:
            self.model.fit(self.X_train, self.y_train.reshape(-1, ))
            self.prediction_train = self.model.predict(self.X_train)
            self.prediction_test = self.model.predict(self.X_test)
Ejemplo n.º 43
0
def parametrize_environment_specific(settings, rerun):
    channel_name = settings["embedding_options"]["channel_name"]
    log << log.mg << "Parametrizing" << channel_name << "model" << log.endl
    soap_types = SETTINGS["soap_types"]
    log << "Particle SOAP types are" << ", ".join(soap_types) << log.endl
    # PATHS - for example:
    # { "xyz_file": "data_esol/structures.xyz",
    #   "soap_file": "data_esol/structures.soap",
    #   "kmat_file": "data_esol/kernel.npy",
    #   "targets_file": "data_esol/targets.npy",
    #   "range_file": "data_esol/range.json",
    #   "weights_file": "data_esol/weights.npy" }
    paths = copy.deepcopy(settings["paths"])
    for p,v in paths.iteritems():
        paths[p] = os.path.join(PATH, v)
        log << "Path to %s = %s" % (p, paths[p]) << log.endl
    configs = soap.tools.io.read(paths["xyz_file"])
    # SOAP
    soap_options = SETTINGS["soap_options"][settings["soap_options_ref"]]
    if rerun or not os.path.isfile(paths["soap_file"]):
        log << "Make target: %s" % paths["soap_file"] << log.endl
        soap_configure_default(types=soap_types)
        dset = soap_evaluate(configs, soap_options, paths["soap_file"])
    else:
        log << "Load target: %s" % paths["soap_file"] << log.endl
        dset = soap.DMapMatrixSet(paths["soap_file"])
    # KERNEL
    kernel_options = settings["kernel_options"]
    if rerun or not os.path.isfile(paths["kmat_file"]):
        log << "Make target: %s" % paths["kmat_file"] << log.endl
        K = kernel_evaluate(dset, kernel_options, paths["kmat_file"])
    else:
        log << "Load target: %s" % paths["kmat_file"] << log.endl
        K = np.load(paths["kmat_file"])
    # TARGETS
    target_key = settings["regression_options"]["target_key"]
    if rerun or not os.path.isfile(paths["targets_file"]):
        log << "Make target: %s" % paths["targets_file"] << log.endl
        targets = np.array([float(c.info[target_key]) for c in configs])
        np.save(paths["targets_file"], targets)
    else:
        log << "Load target: %s" % paths["targets_file"] << log.endl
        targets = np.load(paths["targets_file"])
    # MODEL
    regr_options = settings["regression_options"]
    if rerun or not os.path.isfile(paths["weights_file"]):
        log << "Make target: %s" % paths["weights_file"] << log.endl
        y_avg = np.average(targets)
        krr = KernelRidge(
            alpha=regr_options["lreg"],
            kernel='precomputed')
        krr.fit(K**regr_options["xi"], targets)
        y_predict = krr.predict(K**regr_options["xi"])
        kweights = krr.dual_coef_
        np.save(paths["weights_file"], kweights)
        np.save(paths["pred_file"], y_predict)
    else:
        log << "Load target: %s" % paths["weights_file"] << log.endl
        kweights = np.load(paths["weights_file"])
        y_predict = np.load(paths["pred_file"])
    if rerun or not os.path.isfile(paths["range_file"]):
        dset_attr = soap.DMapMatrixSet(paths["soap_file"])
        delta_Ys = kernel_attribute(dset_attr, dset, kernel_options, kweights, regr_options["xi"])
        json.dump(delta_Ys, open(paths["range_file"], "w"))
    else:
        delta_Ys = json.load(open(paths["range_file"]))
Ejemplo n.º 44
0
rawData = np.zeros((4104, 4))

with open('Processed Time Dependent Data.csv', 'rb') as f:
    reader = csv.reader(f)
    labels = reader.next()

    i = 0
    for row in reader:
        for j in range(4):
            rawData[i, j] = float(row[j])
        i = i + 1

kr = KernelRidge(alpha=1,
                 kernel='rbf',
                 gamma=None,
                 degree=3,
                 coef0=1,
                 kernel_params=None)
kr.fit(rawData[:, 0:3], rawData[:, 3])

samplePoints = np.zeros((6000, 4))

with open('Sample Points 2.csv', 'rb') as dat:
    readDat = csv.reader(dat)
    i = 0
    for row in readDat:
        for j in range(3):
            samplePoints[i, j] = float(row[j])
        i = i + 1

for iterno in range(1, 13):
Ejemplo n.º 45
0
class RidgeMKL:
    """A MKL model in a transductive setting (test points are presented at training time).

    """

    mkls = {
        "align": Align,
        "alignf": Alignf,
        "alignfc": Alignf,
        "uniform": UniformAlignment,
    }

    mkls_low_rank = {
        "align": AlignLowRank,
        "alignf": AlignfLowRank,
        "alignfc": AlignfLowRank,
        "uniform": UniformAlignmentLowRank,
    }

    #  alignf expects kernels to be centered
    centered   = {"alignf", "alignfc"}
    supervised = {"align", "alignf", "alignfc"}

    def __init__(self, lbd=0, method="align", method_init_args={}, low_rank=False):
        """
        :param method: (``string``) "align", "alignf", or "uniform", MKL method to be used.

        :param low_rank: (``bool``) Use low-rank approximations.

        :param method_init_args: (``dict``) Initialization arguments for the MKL methods.

        :param lbd: (``float``) L2-regularization.
        """

        self.method  = method
        if not low_rank:
            self.mkl_model  = self.mkls[method](**method_init_args)
            if method == "alignfc":
                init_args = method_init_args.copy()
                init_args["typ"] = "convex"
                self.mkl_model  = self.mkls[method](**init_args)
        else:
            self.mkl_model  = self.mkls_low_rank[method](**method_init_args)
            if method == "alignfc":
                init_args = method_init_args.copy()
                init_args["typ"] = "convex"
                self.mkl_model  = self.mkls_low_rank[method](**init_args)
        self.lbd        = lbd
        self.low_rank   = low_rank
        self.trained    = False


    def fit(self, Ks, y, holdout=None):
        """Learn weights for kernel matrices or Kinterfaces.

        :param Ks: (``list``) of (``numpy.ndarray``) or of (``Kinterface``) to be aligned.

        :param y: (``numpy.ndarray``) Class labels :math:`y_i \in {-1, 1}` or regression targets.

        :param holdout: (``list``) List of indices to exlude from alignment.
        """

        # Expand kernel interfaces to kernel matrices
        expand = lambda K: K[:, :] if isinstance(K, Kinterface) else K
        Hs     = map(expand, Ks)

        # Assert correct dimensions
        assert Ks[0].shape[0] == len(y)

        # Fit MKL model
        if self.method in self.supervised:
            self.mkl_model.fit(Hs, y, holdout=holdout)
        else:
            self.mkl_model.fit(Hs)

        if self.low_rank:
            self.X = hstack(map(lambda e: sqrt(e[0]) * e[1],
                                zip(self.mkl_model.mu, Hs)))

            if self.method in self.centered:
                self.X = center_kernel_low_rank(self.X)
                self.X[where(isnan(self.X))] = 0

            # Fit ridge model with given lbd and MKL model
            self.ridge = KernelRidge(alpha=self.lbd,
                                     kernel="linear", )

            # Fit ridge on the examples minus the holdout set
            inxs = list(set(range(Hs[0].shape[0])) - set(holdout))
            self.ridge.fit(self.X[inxs], y[inxs])
            self.trained = True

        else:
            # Fit ridge model with given lbd and MKL model
            self.ridge = KernelRidge(alpha=self.lbd,
                                     kernel=self.mkl_model, )

            # Fit ridge on the examples minus the holdout set
            inxs = array(list(set(range(Hs[0].shape[0])) - set(holdout)))
            inxs = inxs.reshape((len(inxs), 1)).astype(int)
            self.ridge.fit(inxs, y[inxs])
            self.trained = True


    def predict(self, inxs):
        """
        Predict values for data on indices inxs (transcductive setting).

        :param inxs: (``list``) Indices of samples to be used for prediction.

        :return: (``numpy.ndarray``) Vector of prediction of regression targets.
        """
        assert self.trained

        if self.low_rank:
            return self.ridge.predict(self.X[inxs])
        else:
            inxs = array(inxs)
            inxs = inxs.reshape((len(inxs), 1)).astype(int)
            return self.ridge.predict(inxs).ravel()
Ejemplo n.º 46
0
def make_CV_models(X, y):
    '''performs grid searches to find all the best models for dataset X, y'''

    model_dict = {
        'KRR':
        grid_search(X,
                    y,
                    KernelRidge(),
                    param_grid={
                        "alpha": np.logspace(-10, 2, 300),
                        "gamma": np.logspace(-10, -1, 100),
                        "kernel": ['rbf']
                    }),
        'SVR':
        grid_search(X,
                    y,
                    SVR(),
                    param_grid={
                        "C": np.logspace(-1, 4, 20),
                        "epsilon": np.logspace(-2, 2, 20)
                    }),
        'Ridge':
        grid_search(X,
                    y,
                    Ridge(),
                    param_grid={"alpha": np.logspace(-6, 6, 150)}),
        'Lasso':
        grid_search(X,
                    y,
                    Lasso(max_iter=20000),
                    param_grid={"alpha": np.logspace(-2, 6, 100)}),
        'BR':
        grid_search(X,
                    y,
                    BayesianRidge(),
                    param_grid={
                        "alpha_1": np.logspace(-13, -5, 10),
                        "alpha_2": np.logspace(-9, -3, 10),
                        "lambda_1": np.logspace(-10, -5, 10),
                        "lambda_2": np.logspace(-11, -4, 10)
                    }),
        'GBoost':
        grid_search(X,
                    y,
                    GradientBoostingRegressor(),
                    param_grid={
                        "n_estimators": np.linspace(5, 350, 100).astype('int')
                    }),
        'RF':
        grid_search(
            X,
            y,
            RandomForestRegressor(),
            param_grid={"n_estimators": np.linspace(5, 100, 50).astype('int')},
        ),
        'kNN':
        grid_search(
            X,
            y,
            KNeighborsRegressor(),
            param_grid={"n_neighbors": np.linspace(2, 20, 18).astype('int')}),
        'mean':
        DummyRegressor(strategy='mean'),
    }

    return model_dict
Ejemplo n.º 47
0
px = []
py = []
with open('/home/redwards/Desktop/genus_species_analysis/pseudo_coverage.txt', 'r') as fin:
    for l in fin:
        p = l.strip().split("\t")
        px.append(float(p[0]))
        py.append(float(p[1]))

ny = np.array(y)
nx = np.array(x)
pnx = np.array(px)
pny = np.array(py)


kr = KernelRidge(kernel='rbf', gamma=7.5e-5, alpha=0.001)
kr.fit(nx[:, None], ny[:, None])

x_pred = np.linspace(min(x), max(x), 10000)[:, None]
y_pred = kr.predict(x_pred)


kr.fit(pnx[:, None], pny[:, None])
px_pred = np.linspace(min(px), max(px), 10000)[:, None]
py_pred = kr.predict(px_pred)

fig = plt.figure()
ax = fig.add_subplot(111)


"""
Ejemplo n.º 48
0
def ml_size_scan(x_datafile,
                 y_datafile,
                 ids,
                 testfiles,
                 alpha0=1,
                 gamma0=1,
                 kernel0='rbf',
                 learning_rate_init0=0.001,
                 hidden_layer_sizes0=(80, 80, 80),
                 sample_size_list=[
                     0.005, 0.01, 0.03, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6,
                     0.7, 0.8, 0.9
                 ],
                 ml_method="krr"):
    max_sample_size = max(sample_size_list)
    ax_train, x_test, ay_train, y_test, aids_train, ids_test = load_split_scale_data(
        x_datafile, y_datafile, ids, testfiles, max_sample_size)

    for sample_size in sample_size_list:
        ratio = float(sample_size) / float(max_sample_size)
        if ratio > 0.999:
            x_train = ax_train
            y_train = ay_train
            ids_train = aids_train
        else:
            # reduce set
            x_dump, x_train, y_dump, y_train, ids_dump, ids_train = train_test_split(
                ax_train,
                ay_train,
                aids_train,
                test_size=ratio,
            )

        if ml_method == "krr":
            # Create kernel linear ridge regression object
            learner = KernelRidge(alpha=alpha0,
                                  coef0=1,
                                  degree=3,
                                  gamma=gamma0,
                                  kernel=kernel0,
                                  kernel_params=None)
        elif ml_method == "mlp":
            # Create Multi-Layer Perceptron object
            learner = MLPRegressor(hidden_layer_sizes=hidden_layer_sizes0,
                                   max_iter=1600,
                                   alpha=alpha0,
                                   learning_rate_init=learning_rate_init0)
        else:
            print("ML method unknown. Exiting.")
            exit(1)

        t_ml0 = time.time()
        learner.fit(x_train, y_train)
        t_ml1 = time.time()
        print("ml time", str(t_ml1 - t_ml0))

        mae, mse, y_pred, train_y_pred, learner = predict_and_error(
            learner, x_test, x_train, y_test)

        ### OUTPUT ###
        write_output(learner, sample_size, ml_method, mae, mse, "size",
                     ids_test, y_test, y_pred, ids_train, y_train,
                     train_y_pred)

    return None
Ejemplo n.º 49
0

	#####################################################################
	# --- RUN THE MODEL: FOR A GIVEN SPLIT AND EACH PARAMETER TRIAL --- #
	#####################################################################

	# For each parameter trial
	for i in xrange(trials):

		# For regression use the Kernel Ridge method
		if model_type == "regression":

			print "\n Starting experiment for trial %d and parameter alpha = %3f\n " % (i, alpha_grid[i])

			# Fit the kernel ridge model
			KR = KernelRidge(kernel = 'precomputed', alpha = alpha_grid[i])
			KR.fit(K_train, y_train)

			# predict on the validation and test set
			y_pred = KR.predict(K_val)
			y_pred_test = KR.predict(K_test)
			
			# adjust prediction: needed because the training targets have been normalizaed
			y_pred = y_pred * float(y_train_std) + y_train_mean
			y_pred_test = y_pred_test * float(y_train_std) + y_train_mean

			# root mean squared error on validation
			rmse = np.sqrt(mean_squared_error(y_val, y_pred))
			perf_all_val.append(rmse)

			# root mean squared error in test 
def create(X, X_column_types, y, y_column_types, arm, **kwargs):
    method = kwargs.get("method", "RFE_rf")
    method = kwargs.get("method", "RFE_Lasso")
    method = kwargs.get("method", "Lasso")
    method = kwargs.get("method", "Boruta")
    method = kwargs.get("method", "Autoencoder")
    method = kwargs.get("method", "Boruta")
    method = kwargs.get("method", "XBG")

    # finding distance correlation b/w the features
    def distcorr(X, Y):
        X = np.atleast_1d(X)
        Y = np.atleast_1d(Y)
        if np.prod(X.shape) == len(X):
            X = X[:, None]
        if np.prod(Y.shape) == len(Y):
            Y = Y[:, None]
        X = np.atleast_2d(X)
        Y = np.atleast_2d(Y)
        n = X.shape[0]
        if Y.shape[0] != X.shape[0]:
            raise ValueError('Number of samples must match')
        a = squareform(pdist(X))
        b = squareform(pdist(Y))
        A = a - a.mean(axis=0)[None, :] - a.mean(axis=1)[:, None] + a.mean()
        B = b - b.mean(axis=0)[None, :] - b.mean(axis=1)[:, None] + b.mean()
        dcov2_xy = (A * B).sum() / float(n * n)
        dcov2_xx = (A * A).sum() / float(n * n)
        dcov2_yy = (B * B).sum() / float(n * n)
        dcor = np.sqrt(dcov2_xy) / np.sqrt(
            np.sqrt(dcov2_xx) * np.sqrt(dcov2_yy))
        return dcor

    ############# Shuffle the data ###################
    X['target'] = y
    data = shuffle(X)
    X = data.drop(['target'], axis=1)
    y = data['target']

    # AutoLearn  will take only the numerical features

    num_X = X.select_dtypes(include=[int, float])

    #################### preprocessing step ###################
    if (y_column_types == "int64"
            or y_column_types == "float64"):  # Regression
        #if method == "mutual_information":
        c, d = num_X.shape
        names = np.arange(d)
        mode = SelectKBest(mutual_info_regression, k='all')
        mode.fit_transform(num_X, y)

        feats = sorted(zip(map(lambda c: round(c, 4), mode.scores_), names),
                       reverse=True)

        finale = []
        for i in range(0, len(feats)):
            r, s = feats[i]
            if (r > 0):
                finale.append(s)

        dataframe = num_X.iloc[:, finale]

    else:  # classification

        c, d = num_X.shape
        names = np.arange(d)
        mode = SelectKBest(mutual_info_classif, k='all')
        mode.fit_transform(num_X, y)

        feats = sorted(zip(map(lambda c: round(c, 4), mode.scores_), names),
                       reverse=True)

        finale = []
        for i in range(0, len(feats)):
            r, s = feats[i]
            if (r > 0):
                finale.append(s)

        dataframe = num_X.iloc[:, finale]

    ############# feature_generation #########################
    Non_linear = pd.DataFrame()
    linear = pd.DataFrame()
    m, n = dataframe.shape
    thrs = 0.7
    for i in range(n):
        for j in range(i + 1, n):
            if (i != j) and (distcorr(dataframe.iloc[:, i],
                                      dataframe.iloc[:, j]) != 0):
                if (distcorr(dataframe.iloc[:, i], dataframe.iloc[:, j]) >
                        0) and (distcorr(dataframe.iloc[:, i],
                                         dataframe.iloc[:, j]) < thrs):
                    non_lin_X, non_lin_y = dataframe.iloc[:, i].to_frame(
                    ), dataframe.iloc[:, j]
                    model = KernelRidge(alpha=1.0,
                                        coef0=1,
                                        degree=3,
                                        gamma=None,
                                        kernel='rbf',
                                        kernel_params=None)
                    model.fit(non_lin_X, non_lin_y)
                    first = model.predict(non_lin_X)
                    first_feat_gen = pd.Series(first)
                    second_feat_gen = (non_lin_y - (first))
                    Non_linear = Non_linear.append(first_feat_gen,
                                                   ignore_index=True)
                    Non_linear = Non_linear.append(second_feat_gen,
                                                   ignore_index=True)
                elif (distcorr(dataframe.iloc[:, i], dataframe.iloc[:, j]) >=
                      thrs) and (distcorr(dataframe.iloc[:, i],
                                          dataframe.iloc[:, j]) <= 1):
                    lin_X, lin_y = dataframe.iloc[:, i].to_frame(
                    ), dataframe.iloc[:, j]
                    model = Ridge(alpha=1.0)
                    model.fit(lin_X, lin_y)
                    first_feat = model.predict(lin_X)
                    lin_first_feat = pd.Series(first_feat)
                    second_feat = (lin_y - (first_feat))
                    linear = linear.append(lin_first_feat, ignore_index=True)
                    linear = linear.append(second_feat, ignore_index=True)

    nonlinear_genereted = Non_linear.T
    linear_generated = linear.T
    print("no. of features generated by nonlinear features :",
          nonlinear_genereted.shape[1])
    print("no. of features generated by linear features:",
          linear_generated.shape[1])

    Generated_feats = pd.concat([nonlinear_genereted, linear_generated],
                                axis=1)
    print("Total no. of generated features :", Generated_feats.shape[1])

    #################### Feature_selection in 2 steps###################
    if Generated_feats.shape[1] > 0:

        ############ 1st step of selction ##############
        if method == "RFE_rf":
            if (y_column_types == "int64" or y_column_types == "float64"):
                model = RandomForestRegressor()
            else:
                model = RandomForestClassifier()

            rfe = RFECV(model, step=1, cv=5)
            rfe.fit(Generated_feats, y)
            print("Optimal number of features after 1st step : %d" %
                  rfe.n_features_)
            selected_feats_order = np.argsort(rfe.grid_scores_)[::-1]
            Data_X = pd.DataFrame()
            for i in range(rfe.n_features_):
                col = Generated_feats.iloc[:, selected_feats_order[i]]
                Data_X = Data_X.append(col)

            one_featsel = Data_X.transpose()

        elif method == "RFE_Lasso":
            if (y_column_types == "int64" or y_column_types == "float"):
                model = LassoLarsCV()
            else:
                model = LogisticRegressionCV(penalty="l1", solver='liblinear')

            rfe = RFECV(model, step=1, cv=5)
            rfe.fit(Generated_feats, y)

            print("Optimal number of features after 1st step : %d" %
                  rfe.n_features_)

            selected_feats_order = np.argsort(rfe.grid_scores_)[::-1]

            Data_X = pd.DataFrame()
            for i in range(rfe.n_features_):
                col = Generated_feats.iloc[:, selected_feats_order[i]]
                Data_X = Data_X.append(col)

            one_featsel = Data_X.transpose()

        elif method == "Lasso":
            if (y_column_types == "int64" or y_column_types == "float64"):
                model = LassoLarsCV(eps=1e-8)
            else:
                model = LogisticRegressionCV(penalty="l1", solver='liblinear')

            sfm = SelectFromModel(model, threshold=0.7)
            sfm.fit(Generated_feats, y)
            n_features = sfm.transform(Generated_feats).shape[1]
            print("optimal no. of features after lst step:", n_features)
            one_featsel = pd.DataFrame(sfm.transform(Generated_feats))

        elif method == "Boruta":
            if (y_column_types == "int64" or y_column_types == "float"):
                model = XGBRegressor()
            else:
                model = XGBClassifier()

            boruta = BorutaPy(model, n_estimators='auto', verbose=2)
            boruta.fit(Generated_feats.values, y.values)
            sel_index = Generated_feats.columns[boruta.support_]
            one_featsel = Generated_feats.loc[:, sel_index]

        elif method == "XGB":

            if (y_column_types == "int64" or y_column_types == "float64"):
                model = XGBRegressor()
            else:
                model = XGBClassifier()

            sfm = SelectFromModel(model, threshold=0.7)
            sfm.fit(Generated_feats, y)
            n_features = sfm.transform(Generated_feats).shape[1]
            print("optimal no. of features after lst step:", n_features)
            one_featsel = pd.DataFrame(sfm.transform(Generated_feats))

        #elif method == "Autoencoder" :

    ################# 2nd step of slections ###########
        o, p = one_featsel.shape
        Names = np.arange(p)

        if (y_column_types == "int64" or y_column_types == "float"):
            model = SelectKBest(mutual_info_regression, k='all')
        else:
            model = SelectKBest(mutual_info_classif, k='all')

        model.fit_transform(one_featsel, y)

        new_feat = sorted(zip(map(lambda o: round(o, 4), model.scores_),
                              Names),
                          reverse=True)

        sel_finale = []
        for i in range(0, len(new_feat)):
            s, t = new_feat[i]
            if (s > 0):
                sel_finale.append(t)

        second_featsel = one_featsel.iloc[:, sel_finale]

        ################ concat the original features & selected new features

        transformed_X = pd.concat([X, second_featsel], axis=1)

    else:

        transformed_X = X

    return None, transformed_X
Ejemplo n.º 51
0
    # Choose the number of predicted peptides and their length
    n_predictions = 1000
    y_length = 5

    # Max time (seconds) for the branch and bound search
    max_time = 500

    print('String maximization model on BPPs dataset')
    gs_kernel = GenericStringKernel(AminoAcidFile.blosum62_natural, sigma_position, sigma_amino_acid, n,
                                    is_normalized=True)
    alphabet = gs_kernel.alphabet
    dataset = load_bpps_dataset()

    # Use a regression algorithm to learn the weights first
    print('Learning the regression weights ...')
    learner = KernelRidge(alpha, kernel='precomputed')
    gram_matrix = gs_kernel(dataset.X, dataset.X)
    learner.fit(gram_matrix, dataset.y)
    learned_weights = learner.dual_coef_

    # We can then use the string maximization model with the learned weights
    print('Branch and bound search for the top {} peptides of length {} ...'.format(n_predictions, y_length))
    model = StringMaximizationModel(alphabet, n, gs_kernel, max_time)
    model.fit(dataset.X, learned_weights, y_length)
    peptides, bioactivities = model.predict(n_predictions)

    print('\n')
    print('Peptides | Predicted bioactivities')
    for peptide, bioactivity in zip(peptides, bioactivities):
        print(peptide, bioactivity)
Ejemplo n.º 52
0
 def train(self):
     final = KernelRidge(alpha=.1, kernel="linear")
     self.csvToArray()
     final.fit(self.feats, self.labels)
     self.model = final
class VADEstimator(BaseEstimator):
  def fit( self, x , y , size=1 ):
    self.model = Sequential()
    self.model.add(Dense( int( embeddings_dim / 2.0 ) , input_dim=embeddings_dim , init='uniform' , activation='tanh'))
    self.model.add(Dense( int( embeddings_dim / 4.0 ) , init='uniform' , activation='tanh'))
    self.model.add(Dense(size , init='uniform' ) )
    self.model.compile(loss='mse', optimizer='rmsprop')
    self.model = KernelRidge( kernel='rbf' )
    self.model.fit( x , y )
  def predict( self, x ): 
    if isinstance( self.model , Sequential ): return self.model.predict( x , verbose=0 )[ 0 ]
    return self.model.predict( x )
Ejemplo n.º 54
0
def test_generalization_across_time():
    """Test time generalization decoding
    """
    from sklearn.svm import SVC
    from sklearn.base import is_classifier
    # KernelRidge is used for testing 1) regression analyses 2) n-dimensional
    # predictions.
    from sklearn.kernel_ridge import KernelRidge
    from sklearn.preprocessing import LabelEncoder
    from sklearn.metrics import roc_auc_score, mean_squared_error

    epochs = make_epochs()
    y_4classes = np.hstack((epochs.events[:7, 2], epochs.events[7:, 2] + 1))
    if check_version('sklearn', '0.18'):
        from sklearn.model_selection import (KFold, StratifiedKFold,
                                             ShuffleSplit, LeaveOneLabelOut)
        cv_shuffle = ShuffleSplit()
        cv = LeaveOneLabelOut()
        # XXX we cannot pass any other parameters than X and y to cv.split
        # so we have to build it before hand
        cv_lolo = [(train, test) for train, test in cv.split(
            X=y_4classes, y=y_4classes, labels=y_4classes)]

        # With sklearn >= 0.17, `clf` can be identified as a regressor, and
        # the scoring metrics can therefore be automatically assigned.
        scorer_regress = None
    else:
        from sklearn.cross_validation import (KFold, StratifiedKFold,
                                              ShuffleSplit, LeaveOneLabelOut)
        cv_shuffle = ShuffleSplit(len(epochs))
        cv_lolo = LeaveOneLabelOut(y_4classes)

        # With sklearn < 0.17, `clf` cannot be identified as a regressor, and
        # therefore the scoring metrics cannot be automatically assigned.
        scorer_regress = mean_squared_error
    # Test default running
    gat = GeneralizationAcrossTime(picks='foo')
    assert_equal("<GAT | no fit, no prediction, no score>", "%s" % gat)
    assert_raises(ValueError, gat.fit, epochs)
    with warnings.catch_warnings(record=True):
        # check classic fit + check manual picks
        gat.picks = [0]
        gat.fit(epochs)
        # check optional y as array
        gat.picks = None
        gat.fit(epochs, y=epochs.events[:, 2])
        # check optional y as list
        gat.fit(epochs, y=epochs.events[:, 2].tolist())
    assert_equal(len(gat.picks_), len(gat.ch_names), 1)
    assert_equal(
        "<GAT | fitted, start : -0.200 (s), stop : 0.499 (s), no "
        "prediction, no score>", '%s' % gat)
    assert_equal(gat.ch_names, epochs.ch_names)
    # test different predict function:
    gat = GeneralizationAcrossTime(predict_method='decision_function')
    gat.fit(epochs)
    # With classifier, the default cv is StratifiedKFold
    assert_true(gat.cv_.__class__ == StratifiedKFold)
    gat.predict(epochs)
    assert_array_equal(np.shape(gat.y_pred_), (15, 15, 14, 1))
    gat.predict_method = 'predict_proba'
    gat.predict(epochs)
    assert_array_equal(np.shape(gat.y_pred_), (15, 15, 14, 2))
    gat.predict_method = 'foo'
    assert_raises(NotImplementedError, gat.predict, epochs)
    gat.predict_method = 'predict'
    gat.predict(epochs)
    assert_array_equal(np.shape(gat.y_pred_), (15, 15, 14, 1))
    assert_equal(
        "<GAT | fitted, start : -0.200 (s), stop : 0.499 (s), "
        "predicted 14 epochs, no score>", "%s" % gat)
    gat.score(epochs)
    assert_true(gat.scorer_.__name__ == 'accuracy_score')
    # check clf / predict_method combinations for which the scoring metrics
    # cannot be inferred.
    gat.scorer = None
    gat.predict_method = 'decision_function'
    assert_raises(ValueError, gat.score, epochs)
    # Check specifying y manually
    gat.predict_method = 'predict'
    gat.score(epochs, y=epochs.events[:, 2])
    gat.score(epochs, y=epochs.events[:, 2].tolist())
    assert_equal(
        "<GAT | fitted, start : -0.200 (s), stop : 0.499 (s), "
        "predicted 14 epochs,\n scored "
        "(accuracy_score)>", "%s" % gat)
    with warnings.catch_warnings(record=True):
        gat.fit(epochs, y=epochs.events[:, 2])

    old_mode = gat.predict_mode
    gat.predict_mode = 'super-foo-mode'
    assert_raises(ValueError, gat.predict, epochs)
    gat.predict_mode = old_mode

    gat.score(epochs, y=epochs.events[:, 2])
    assert_true("accuracy_score" in '%s' % gat.scorer_)
    epochs2 = epochs.copy()

    # check _DecodingTime class
    assert_equal(
        "<DecodingTime | start: -0.200 (s), stop: 0.499 (s), step: "
        "0.050 (s), length: 0.050 (s), n_time_windows: 15>",
        "%s" % gat.train_times_)
    assert_equal(
        "<DecodingTime | start: -0.200 (s), stop: 0.499 (s), step: "
        "0.050 (s), length: 0.050 (s), n_time_windows: 15 x 15>",
        "%s" % gat.test_times_)

    # the y-check
    gat.predict_mode = 'mean-prediction'
    epochs2.events[:, 2] += 10
    gat_ = copy.deepcopy(gat)
    with use_log_level('error'):
        assert_raises(ValueError, gat_.score, epochs2)
    gat.predict_mode = 'cross-validation'

    # Test basics
    # --- number of trials
    assert_true(gat.y_train_.shape[0] == gat.y_true_.shape[0] == len(
        gat.y_pred_[0][0]) == 14)
    # ---  number of folds
    assert_true(np.shape(gat.estimators_)[1] == gat.cv)
    # ---  length training size
    assert_true(
        len(gat.train_times_['slices']) == 15 == np.shape(gat.estimators_)[0])
    # ---  length testing sizes
    assert_true(
        len(gat.test_times_['slices']) == 15 == np.shape(gat.scores_)[0])
    assert_true(
        len(gat.test_times_['slices'][0]) == 15 == np.shape(gat.scores_)[1])

    # Test score_mode
    gat.score_mode = 'foo'
    assert_raises(ValueError, gat.score, epochs)
    gat.score_mode = 'fold-wise'
    scores = gat.score(epochs)
    assert_array_equal(np.shape(scores), [15, 15, 5])
    gat.score_mode = 'mean-sample-wise'
    scores = gat.score(epochs)
    assert_array_equal(np.shape(scores), [15, 15])
    gat.score_mode = 'mean-fold-wise'
    scores = gat.score(epochs)
    assert_array_equal(np.shape(scores), [15, 15])
    gat.predict_mode = 'mean-prediction'
    with warnings.catch_warnings(record=True) as w:
        gat.score(epochs)
        assert_true(
            any("score_mode changed from " in str(ww.message) for ww in w))

    # Test longer time window
    gat = GeneralizationAcrossTime(train_times={'length': .100})
    with warnings.catch_warnings(record=True):
        gat2 = gat.fit(epochs)
    assert_true(gat is gat2)  # return self
    assert_true(hasattr(gat2, 'cv_'))
    assert_true(gat2.cv_ != gat.cv)
    with warnings.catch_warnings(record=True):  # not vectorizing
        scores = gat.score(epochs)
    assert_true(isinstance(scores, np.ndarray))  # type check
    assert_equal(len(scores[0]), len(scores))  # shape check
    assert_equal(len(gat.test_times_['slices'][0][0]), 2)
    # Decim training steps
    gat = GeneralizationAcrossTime(train_times={'step': .100})
    with warnings.catch_warnings(record=True):
        gat.fit(epochs)
    gat.score(epochs)
    assert_true(len(gat.scores_) == len(gat.estimators_) == 8)  # training time
    assert_equal(len(gat.scores_[0]), 15)  # testing time

    # Test start stop training & test cv without n_fold params
    y_4classes = np.hstack((epochs.events[:7, 2], epochs.events[7:, 2] + 1))
    train_times = dict(start=0.090, stop=0.250)
    gat = GeneralizationAcrossTime(cv=cv_lolo, train_times=train_times)
    # predict without fit
    assert_raises(RuntimeError, gat.predict, epochs)
    with warnings.catch_warnings(record=True):
        gat.fit(epochs, y=y_4classes)
    gat.score(epochs)
    assert_equal(len(gat.scores_), 4)
    assert_equal(gat.train_times_['times'][0], epochs.times[6])
    assert_equal(gat.train_times_['times'][-1], epochs.times[9])

    # Test score without passing epochs & Test diagonal decoding
    gat = GeneralizationAcrossTime(test_times='diagonal')
    with warnings.catch_warnings(record=True):  # not vectorizing
        gat.fit(epochs)
    assert_raises(RuntimeError, gat.score)
    with warnings.catch_warnings(record=True):  # not vectorizing
        gat.predict(epochs)
    scores = gat.score()
    assert_true(scores is gat.scores_)
    assert_equal(np.shape(gat.scores_), (15, 1))
    assert_array_equal(
        [tim for ttime in gat.test_times_['times'] for tim in ttime],
        gat.train_times_['times'])
    # Test generalization across conditions
    gat = GeneralizationAcrossTime(predict_mode='mean-prediction', cv=2)
    with warnings.catch_warnings(record=True):
        gat.fit(epochs[0:6])
    with warnings.catch_warnings(record=True):
        # There are some empty test folds because of n_trials
        gat.predict(epochs[7:])
        gat.score(epochs[7:])

    # Test training time parameters
    gat_ = copy.deepcopy(gat)
    # --- start stop outside time range
    gat_.train_times = dict(start=-999.)
    with use_log_level('error'):
        assert_raises(ValueError, gat_.fit, epochs)
    gat_.train_times = dict(start=999.)
    assert_raises(ValueError, gat_.fit, epochs)
    # --- impossible slices
    gat_.train_times = dict(step=.000001)
    assert_raises(ValueError, gat_.fit, epochs)
    gat_.train_times = dict(length=.000001)
    assert_raises(ValueError, gat_.fit, epochs)
    gat_.train_times = dict(length=999.)
    assert_raises(ValueError, gat_.fit, epochs)

    # Test testing time parameters
    # --- outside time range
    gat.test_times = dict(start=-999.)
    with warnings.catch_warnings(record=True):  # no epochs in fold
        assert_raises(ValueError, gat.predict, epochs)
    gat.test_times = dict(start=999.)
    with warnings.catch_warnings(record=True):  # no test epochs
        assert_raises(ValueError, gat.predict, epochs)
    # --- impossible slices
    gat.test_times = dict(step=.000001)
    with warnings.catch_warnings(record=True):  # no test epochs
        assert_raises(ValueError, gat.predict, epochs)
    gat_ = copy.deepcopy(gat)
    gat_.train_times_['length'] = .000001
    gat_.test_times = dict(length=.000001)
    with warnings.catch_warnings(record=True):  # no test epochs
        assert_raises(ValueError, gat_.predict, epochs)
    # --- test time region of interest
    gat.test_times = dict(step=.150)
    with warnings.catch_warnings(record=True):  # not vectorizing
        gat.predict(epochs)
    assert_array_equal(np.shape(gat.y_pred_), (15, 5, 14, 1))
    # --- silly value
    gat.test_times = 'foo'
    with warnings.catch_warnings(record=True):  # no test epochs
        assert_raises(ValueError, gat.predict, epochs)
    assert_raises(RuntimeError, gat.score)
    # --- unmatched length between training and testing time
    gat.test_times = dict(length=.150)
    assert_raises(ValueError, gat.predict, epochs)
    # --- irregular length training and testing times
    # 2 estimators, the first one is trained on two successive time samples
    # whereas the second one is trained on a single time sample.
    train_times = dict(slices=[[0, 1], [1]])
    # The first estimator is tested once, the second estimator is tested on
    # two successive time samples.
    test_times = dict(slices=[[[0, 1]], [[0], [1]]])
    gat = GeneralizationAcrossTime(train_times=train_times,
                                   test_times=test_times)
    gat.fit(epochs)
    with warnings.catch_warnings(record=True):  # not vectorizing
        gat.score(epochs)
    assert_array_equal(np.shape(gat.y_pred_[0]), [1, len(epochs), 1])
    assert_array_equal(np.shape(gat.y_pred_[1]), [2, len(epochs), 1])
    # check cannot Automatically infer testing times for adhoc training times
    gat.test_times = None
    assert_raises(ValueError, gat.predict, epochs)

    svc = SVC(C=1, kernel='linear', probability=True)
    gat = GeneralizationAcrossTime(clf=svc, predict_mode='mean-prediction')
    with warnings.catch_warnings(record=True):
        gat.fit(epochs)

    # sklearn needs it: c.f.
    # https://github.com/scikit-learn/scikit-learn/issues/2723
    # and http://bit.ly/1u7t8UT
    with use_log_level('error'):
        assert_raises(ValueError, gat.score, epochs2)
        gat.score(epochs)
    assert_true(0.0 <= np.min(scores) <= 1.0)
    assert_true(0.0 <= np.max(scores) <= 1.0)

    # Test that gets error if train on one dataset, test on another, and don't
    # specify appropriate cv:
    gat = GeneralizationAcrossTime(cv=cv_shuffle)
    gat.fit(epochs)
    with warnings.catch_warnings(record=True):
        gat.fit(epochs)

    gat.predict(epochs)
    assert_raises(ValueError, gat.predict, epochs[:10])

    # Make CV with some empty train and test folds:
    # --- empty test fold(s) should warn when gat.predict()
    gat._cv_splits[0] = [gat._cv_splits[0][0], np.empty(0)]
    with warnings.catch_warnings(record=True) as w:
        gat.predict(epochs)
        assert_true(len(w) > 0)
        assert_true(
            any('do not have any test epochs' in str(ww.message) for ww in w))
    # --- empty train fold(s) should raise when gat.fit()
    gat = GeneralizationAcrossTime(cv=[([0], [1]), ([], [0])])
    assert_raises(ValueError, gat.fit, epochs[:2])

    # Check that still works with classifier that output y_pred with
    # shape = (n_trials, 1) instead of (n_trials,)
    if check_version('sklearn', '0.17'):  # no is_regressor before v0.17
        gat = GeneralizationAcrossTime(clf=KernelRidge(), cv=2)
        epochs.crop(None, epochs.times[2])
        gat.fit(epochs)
        # With regression the default cv is KFold and not StratifiedKFold
        assert_true(gat.cv_.__class__ == KFold)
        gat.score(epochs)
        # with regression the default scoring metrics is mean squared error
        assert_true(gat.scorer_.__name__ == 'mean_squared_error')

    # Test combinations of complex scenarios
    # 2 or more distinct classes
    n_classes = [2, 4]  # 4 tested
    # nicely ordered labels or not
    le = LabelEncoder()
    y = le.fit_transform(epochs.events[:, 2])
    y[len(y) // 2:] += 2
    ys = (y, y + 1000)
    # Univariate and multivariate prediction
    svc = SVC(C=1, kernel='linear', probability=True)
    reg = KernelRidge()

    def scorer_proba(y_true, y_pred):
        return roc_auc_score(y_true, y_pred[:, 0])

    # We re testing 3 scenario: default, classifier + predict_proba, regressor
    scorers = [None, scorer_proba, scorer_regress]
    predict_methods = [None, 'predict_proba', None]
    clfs = [svc, svc, reg]
    # Test all combinations
    for clf, predict_method, scorer in zip(clfs, predict_methods, scorers):
        for y in ys:
            for n_class in n_classes:
                for predict_mode in ['cross-validation', 'mean-prediction']:
                    # Cannot use AUC for n_class > 2
                    if (predict_method == 'predict_proba' and n_class != 2):
                        continue

                    y_ = y % n_class

                    with warnings.catch_warnings(record=True):
                        gat = GeneralizationAcrossTime(
                            cv=2,
                            clf=clf,
                            scorer=scorer,
                            predict_mode=predict_mode)
                        gat.fit(epochs, y=y_)
                        gat.score(epochs, y=y_)

                    # Check that scorer is correctly defined manually and
                    # automatically.
                    scorer_name = gat.scorer_.__name__
                    if scorer is None:
                        if is_classifier(clf):
                            assert_equal(scorer_name, 'accuracy_score')
                        else:
                            assert_equal(scorer_name, 'mean_squared_error')
                    else:
                        assert_equal(scorer_name, scorer.__name__)
f5 = open("f5.csv")
data5 = np.loadtxt(f5, delimiter=',')
dataTemp5 = data5[:, np.newaxis]
index5 = list(np.array(dataTemp5).reshape(-1,))
index5 = [int(i-1) for i in index5]


#Combine the index
index = [index1, index2, index3, index4, index5]
index = np.asarray(index)



#Model:
a = .00139;y = .518
model = KernelRidge(alpha= a, gamma=y, kernel='rbf')



#Loop:
rmse_list = []
num_folds = 5   #data is divide into 5 time slices
Overall_Y_Pred = np.zeros(len(X))
for i in [t+1 for t in list(range(4))]:
    to_exclude = list(range(i))
    folder_train = np.asarray(to_exclude).astype(int)
    #index_train starts with the first folder
    index_train = index[folder_train];
    index_test = [element for i, element in enumerate(index) if i not in to_exclude]
    print (len(index_test))
    #train set starts with the first folder
Ejemplo n.º 56
0
from sklearn.linear_model import Ridge

from sklearn.datasets import load_diabetes, load_boston
from sklearn.preprocessing import MinMaxScaler

# %%

# constants
num_splits = 9
num_seeds = 10

DATASET_LOADER = {"boston": load_boston, "diabetes": load_diabetes}

MODELS = {
    "linear_ridge": Ridge(alpha=5.0),
    "kernel_ridge": KernelRidge(kernel="rbf"),
    # "linear_svr": LinearSVR(max_iter=12000),
    "svr": SVR(kernel="rbf", gamma="scale", C=1.0, epsilon=0.1)
}

# %%


def line(x, y, *args, **kwargs):

    # a = np.minimum(x.min(), y.min())
    # b = np.maximum(x.max(), y.max())
    a = 0.2
    b = 2.0

    u = [a, b]
Ejemplo n.º 57
0
class Learner():

    path = 'matrices/'
    inputF = 'inputs.npy'
    stateF = 'states.npy'
    itrF = 'itr.npy'
    inptFile = os.path.join(path, inputF)
    stateFile = os.path.join(path, stateF)
    itrFile = os.path.join(path, itrF)

    itr = np.array([])

    useSHIV = False
    THRESH = 0.45
    ahqp_solver_g = AHQP(sigma=6)
    ahqp_solver_b = AHQP(sigma=5,nu=1e-3)


    def trainModel(self, s=None, a=None):
        """
        Trains model on given states and actions.
        Uses neural net or SVM based on global
        settings.
        """
        states, actions = self.states[3:], self.actions[3:]
        #print "states.shape"
        #print states.shape
        #print "actions.shape"
        #print actions.shape

        if len(self.itr) == 0:
            self.itr = np.array([states.shape[0]])
        else:
            self.itr = np.hstack((self.itr, states.shape[0]))

        '''if states.shape[0] > 2700.0:
            f = os.path.join(self.path, 'statesToValidate.npy')
            np.save(f, states)
            IPython.embed()'''

        
        fits = []

        #actions = actions.ravel()
        self.clf = KernelRidge(alpha=1.0)
        self.clf.kernel = 'rbf'
        print "SIZE: ", states.shape
        self.clf.fit(states, actions)
        #IPython.embed()
        actions_pred = self.clf.predict(states)
        bad_state = np.zeros(actions_pred.shape[0])
        for i in range(actions_pred.shape[0]):
            fit =  LA.norm(actions_pred[i,:] - actions[i,:])
            fits.append(fit)

        med = np.median(np.array(fits))
        for fit in fits:
            if(fit>med):
                bad_state[i] = 1

        IPython.embed()

        if self.useSHIV:
            self.labels = np.zeros(states.shape[0])+1.0
            self.scaler = preprocessing.StandardScaler().fit(states)
            states_proc = self.scaler.transform(states)
            
            good_labels = bad_state == 0.0         
            states_g = states_proc[good_labels,:] 

            bad_labels = bad_state == 1.0 
            states_b = states_proc[bad_labels,:] 
            #IPython.embed()
            self.ahqp_solver_g.assembleKernel(states_g, np.zeros(states_g.shape[0])+1.0)
            self.ahqp_solver_b.assembleKernel(states_b, np.zeros(states_b.shape[0])+1.0)
            #IPython.embed()
            self.ahqp_solver_g.solveQP()
            self.ahqp_solver_b.solveQP()

            #score = self.clf.score(states, actions)
            #print score
        
        self.plot(fits, states, med)

    def askForHelp(self,state):
        if self.useSHIV:
            state = self.scaler.transform(state)
            if self.ahqp_solver_b.predict(state)==1.0:
                return -1.0
            else:
                return self.ahqp_solver_g.predict(state)
        else:
            return -1

    
    def plot(self, fits, states, threshold):
        index = range(len(states))
        t = np.ones(len(index)) * threshold
        plt.figure(1)
        plt.plot(index, fits, color='b', linewidth=4.0)
        plt.plot(index, t, color='r', linewidth=4.0)
        plt.ylabel('Fit')
        plt.xlabel('Index of State')

        plt.show()


    def getAction(self, state):
	"""
	Returns a prediction given the input state.
	Uses neural net or SVM based on global
	settings.
	"""

	return self.clf.predict(state)


    def initModel(self, useSHIV):
        self.useSHIV = useSHIV
        try:
            self.states = np.load(self.stateFile)
            self.actions = np.load(self.inptFile)
        except IOError:
            self.states = np.array([-8,8.75,0,-12,22,0,-15,21.13043404,
                                     0,-12,18.52173996,0,-15,14.173913,
                                     0,-12,8.08695698,0,0,0,0,0])
            self.actions = np.array([0,0,0,0])
        #self.trainModel(self.states, self.actions)

    def updateModel(self, s, a):
	self.states = np.vstack((self.states, s))
	self.actions = np.vstack((self.actions,a))
	#self.trainModel(self.states, self.actions)

    def saveModel(self):
        path = 'matrices/oldData/'
        currT = strftime("%Y-%m-%d %H:%M:%S", gmtime())

        inptFileOut = os.path.join(path, 'inputs' + currT + '.npy')
        stateFileOut = os.path.join(path, 'states' + currT + '.npy')

        np.save(stateFileOut, self.states)
	np.save(inptFileOut, self.actions)
        np.save(self.itrFile, self.itr)
y = np.sin(X).ravel()

# Add noise to targets
y[::5] += 3 * (0.5 - rng.rand(X.shape[0] // 5))

X_plot = np.linspace(0, 5, 100000)[:, None]

# #############################################################################
# Fit regression model
train_size = 100
svr = GridSearchCV(SVR(kernel='rbf', gamma=0.1),
                   param_grid={"C": [1e0, 1e1, 1e2, 1e3],
                               "gamma": np.logspace(-2, 2, 5)},
                   n_jobs=1)

kr = GridSearchCV(KernelRidge(kernel='rbf', gamma=0.1),
                  param_grid={"alpha": [1e0, 0.1, 1e-2, 1e-3],
                              "gamma": np.logspace(-2, 2, 5)},
                  n_jobs=1)

t0 = time.time()
svr.fit(X[:train_size], y[:train_size])
svr_fit = time.time() - t0
print("SVR complexity and bandwidth selected and model fitted in %.3f s"
      % svr_fit)

t0 = time.time()
kr.fit(X[:train_size], y[:train_size])
kr_fit = time.time() - t0
print("KRR complexity and bandwidth selected and model fitted in %.3f s"
      % kr_fit)
Ejemplo n.º 59
0
    df = df.apply(LabelEncoder().fit_transform)
    X = df.drop('Face Rental', axis=1).values
    y = df['Face Rental'].values
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        random_state=1,
                                                        test_size=0.3)
    regressor = [
        SVR(kernel='rbf', gamma=0.7, C=1),
        linear_model.Ridge(alpha=.5),
        linear_model.Lasso(alpha=0.1),
        linear_model.LassoLars(alpha=.1),
        linear_model.BayesianRidge(),
        MLPRegressor(),
        DecisionTreeRegressor(),
        KernelRidge(),
        PassiveAggressiveRegressor(),
        RANSACRegressor(),
        TheilSenRegressor(),
        RandomForestRegressor()
    ]

    result_cols = ["Regressor", "Accuracy"]
    result_frame = pd.DataFrame(columns=result_cols)

    for model in regressor:
        name = model.__class__.__name__
        model.fit(X_train, y_train)
        predictions = model.predict(X_test)
        error = sqrt(mean_squared_error(y_test, predictions))
        acc = 100 - error
Ejemplo n.º 60
0
def ml_param_size_scan(x_datafile,
                       y_datafile,
                       ids,
                       testfiles,
                       alpha_list=np.logspace(-1, -8, 8),
                       gamma_list=np.logspace(-2, -10, 9),
                       kernel_list=['rbf'],
                       layer_list=[(40, 40, 40)],
                       learning_rate_list=[0.001],
                       sample_size_list=[
                           0.005, 0.01, 0.03, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5,
                           0.6, 0.7, 0.8, 0.9
                       ],
                       ml_method="krr",
                       paramset_size=0.1):
    print('model ' + ml_method)
    max_sample_size = max(sample_size_list)
    # load, split and scale data
    ax_train, ax_test, ay_train, ay_test, aids_train, aids_test = load_split_scale_data(
        x_datafile, y_datafile, ids, testfiles, max_sample_size)

    # search for optimal learner parameters
    # reduce set
    x_train, x_test, y_train, y_test, ids_train, ids_test = train_test_split(
        ax_train,
        ay_train,
        aids_train,
        test_size=1 - paramset_size,
    )

    if ml_method == "krr":
        # Create kernel linear ridge regression object
        learner = GridSearchCV(KernelRidge(kernel='rbf'),
                               n_jobs=8,
                               cv=5,
                               param_grid={
                                   "alpha": alpha_list,
                                   "gamma": gamma_list,
                                   "kernel": kernel_list
                               },
                               scoring='neg_mean_absolute_error')

    elif ml_method == "mlp":
        # Create Multi-Layer Perceptron object
        learner = GridSearchCV(MLPRegressor(hidden_layer_sizes=(40, 40, 40),
                                            max_iter=1600,
                                            alpha=0.001,
                                            learning_rate_init=0.001),
                               n_jobs=8,
                               cv=5,
                               param_grid={
                                   "alpha": alpha_list,
                                   "learning_rate_init": learning_rate_list,
                                   "hidden_layer_sizes": layer_list
                               },
                               scoring='neg_mean_absolute_error')
    else:
        print("ML method unknown. Exiting.")
        exit(1)
    t_ml0 = time.time()
    learner.fit(x_train, y_train)
    t_ml1 = time.time()
    print("ml time", str(t_ml1 - t_ml0))

    # getting best parameters
    learner_best = learner.best_estimator_

    mae, mse, y_pred, train_y_pred, learner_best = predict_and_error(
        learner_best, x_test, x_train, y_test)

    ### OUTPUT ###
    write_output(learner, max_sample_size * paramset_size, ml_method, mae, mse,
                 "param", ids_test, y_test, y_pred, ids_train, y_train,
                 train_y_pred)

    # use above found best parameters
    x_test = ax_test
    y_test = ay_test
    ids_test = aids_test
    paramlearner = learner

    for sample_size in sample_size_list:
        ratio = float(sample_size) / float(max_sample_size)
        if ratio > 0.999:
            x_train = ax_train
            y_train = ay_train
            ids_train = aids_train
        else:
            # reduce set ("test set" is the part of the training set that is used)
            x_dump, x_train, y_dump, y_train, ids_dump, ids_train = train_test_split(
                ax_train,
                ay_train,
                aids_train,
                test_size=ratio,
            )

        if ml_method == "krr":
            # Create kernel linear ridge regression object

            alpha0 = paramlearner.best_params_["alpha"]
            gamma0 = paramlearner.best_params_["gamma"]
            kernel0 = paramlearner.best_params_["kernel"]

            learner = KernelRidge(alpha=alpha0,
                                  coef0=1,
                                  degree=3,
                                  gamma=gamma0,
                                  kernel=kernel0,
                                  kernel_params=None)
        elif ml_method == "mlp":
            # Create Multi-Layer Perceptron object

            hidden_layer_sizes0 = paramlearner.best_params_[
                "hidden_layer_sizes"]
            alpha0 = paramlearner.best_params_["alpha"]
            learning_rate_init0 = paramlearner.best_params_[
                "learning_rate_init"]

            learner = MLPRegressor(hidden_layer_sizes=hidden_layer_sizes0,
                                   max_iter=1600,
                                   alpha=alpha0,
                                   learning_rate_init=learning_rate_init0)
        else:
            print("ML method unknown. Exiting.")
            exit(1)

        t_ml0 = time.time()
        learner.fit(x_train, y_train)
        t_ml1 = time.time()
        print("ml time", str(t_ml1 - t_ml0))

        mae, mse, y_pred, train_y_pred, learner = predict_and_error(
            learner, x_test, x_train, y_test)

        ### OUTPUT ###
        write_output(learner, sample_size, ml_method, mae, mse, "psize",
                     ids_test, y_test, y_pred, ids_train, y_train,
                     train_y_pred)
    return None