Beispiel #1
0
def OMP(problem, **kwargs):
    r"""High level description.

    Parameters
    ----------
    problem : type
        Description
    kwargs : dictionary
        kwargs['choose'] must be a positive integer
        kwargs['coef_tolerance'] must be a nonnegative float

    Returns
    -------

    """
    data_list = [datum['data']['values'] for datum in problem.data]
    data = numpy.array(data_list)
    OMP = OrthogonalMatchingPursuit(n_nonzero_coefs=kwargs['choose'])
    OMP.fit(data.T, problem.goal['data']['values'])
    OMP_coefficients = OMP.coef_
    optimum = [
        problem.data[index] for index, element in enumerate(OMP_coefficients)
        if abs(element) > kwargs['coef_tolerance']
    ]
    maximum = OMP.score(data.T, problem.goal['data']['values'])
    return (optimum, maximum)
class image_data_repo:
    def __init__(self, name='image_', image_feature_dict={}, reconsitution_element_nums=6, error_limit=0.1):
        self.name = name
        self.image_feature_dict = image_feature_dict.copy()
        self.reconsitution_element_nums=reconsitution_element_nums
        self.error_limit=error_limit
        self.omp=OrthogonalMatchingPursuit(n_nonzero_coefs=reconsitution_element_nums)
    def image_nums(self):
        return np.size(self.image_feature_dict.keys())
    def add_element(self, keys, values):
        self.image_feature_dict[keys]=values
    def use_image(self, image_feature):
        data = np.array(self.image_feature_dict.values()).T
        self.omp.fit(data, image_feature)
        err = 1 - self.omp.score(data, image_feature)
        if err<self.error_limit:
            return False,err
        else:
            return True,err
    def update(self):
        image_list = self.image_feature_dict.items()
        data = np.array([i[1] for i in image_list])
        name = [i[0] for i in image_list]
        similar_coef = np.amax( np.dot(data.T,data))
        filename = name[ np.argmax( similar_coef )]
        dst_filename = '';
        self.image_feature_dict.pop( filename)  
        os.system('cp ~/caffe/{} ~/rubbish/'.format(filename)) 
        os.system('rm -f ~/caffe/{}'.format(filename))  
        return
def _orthogonal_matching_pursuit(response_mat, diff_vec, opt):
    """ Calculated n_correctors via orthogonal matching pursuit"""
    if opt.n_correctors is None:
        raise ValueError(
            "n_correctors setting needed for orthogonal matching pursuit.")

    # return orthogonal_mp(response_mat, diff_vec, opt.n_correctors)
    res = OrthogonalMatchingPursuit(opt.n_correctors).fit(
        response_mat, diff_vec)
    coef = res.coef_
    LOG.debug("Orthogonal Matching Pursuit Results:")
    LOG.debug("  Chosen variables: {:s}".format(
        str(response_mat.columns.values[coef.nonzero()])))
    LOG.debug("  Score: {:f}".format(res.score(response_mat, diff_vec)))

    return coef
    else:
        X = X_train
    print X.shape[1]
    param_grid={'n_nonzero_coefs':range(1,X.shape[1])}
    grid_values= np.array(param_grid.values()).astype('int')
    print grid_values
    gridSize=grid_values.shape[1]
    OMP_regr = OrthogonalMatchingPursuit()
    cv_grid_scores_mean,cv_grid_scores_std = grid_search_helper(X, Y_train,OMP_regr, param_grid,gridSize, n_jobs=-1,
                                               n_folds=n_folds, n_repetitions=noRep, scoring=mse_scorer, iid=False)
    #Find optimum val and calculate r2_score on testing set or each polynomial
    opt_ind = np.where(-cv_grid_scores_mean == np.amin(-cv_grid_scores_mean))
    OMP_regr= OrthogonalMatchingPursuit(n_nonzero_coefs= grid_values[0,opt_ind])
    print X.shape
    OMP_regr.fit(X,Y_train)
    print OMP_regr.score(X,Y_train)

    if(i>1):

        X_val_tr = pfeat.fit_transform(X_val)
    else:
        X_val_tr = X_val

    r2Poly[i-1]=OMP_regr.score(X_val_tr,Y_val)

print r2Poly




Beispiel #5
0
def task2(data):

    df = data

    dfreg = df.loc[:, ['Adj Close', 'Volume']]
    dfreg['HL_PCT'] = (df['High'] - df['Low']) / df['Close'] * 100.0
    dfreg['PCT_change'] = (df['Close'] - df['Open']) / df['Open'] * 100.0

    # Drop missing value
    dfreg.fillna(value=-99999, inplace=True)
    # We want to separate 1 percent of the data to forecast
    forecast_out = int(math.ceil(0.01 * len(dfreg)))
    # Separating the label here, we want to predict the AdjClose
    forecast_col = 'Adj Close'
    dfreg['label'] = dfreg[forecast_col].shift(-forecast_out)
    X = np.array(dfreg.drop(['label'], 1))
    # Scale the X so that everyone can have the same distribution for linear regression
    X = preprocessing.scale(X)
    # Finally We want to find Data Series of late X and early X (train) for model generation and evaluation
    X_lately = X[-forecast_out:]
    X = X[:-forecast_out]
    # Separate label and identify it as y
    y = np.array(dfreg['label'])
    y = y[:-forecast_out]

    #Split data
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=0)

    ##################
    ##################
    ##################

    # Linear regression
    clfreg = LinearRegression(n_jobs=-1)
    clfreg.fit(X_train, y_train)
    # Quadratic Regression 2
    clfpoly2 = make_pipeline(PolynomialFeatures(2), Ridge())
    clfpoly2.fit(X_train, y_train)

    # Quadratic Regression 3
    clfpoly3 = make_pipeline(PolynomialFeatures(3), Ridge())
    clfpoly3.fit(X_train, y_train)

    # KNN Regression
    clfknn = KNeighborsRegressor(n_neighbors=2)
    clfknn.fit(X_train, y_train)

    # Lasso Regression
    clflas = Lasso()
    clflas.fit(X_train, y_train)

    # Multitask Lasso Regression
    # clfmtl = MultiTaskLasso(alpha=1.)
    # clfmtl.fit(X_train, y_train).coef_

    # Bayesian Ridge Regression
    clfbyr = BayesianRidge()
    clfbyr.fit(X_train, y_train)

    # Lasso LARS Regression
    clflar = LassoLars(alpha=.1)
    clflar.fit(X_train, y_train)

    # Orthogonal Matching Pursuit Regression
    clfomp = OrthogonalMatchingPursuit(n_nonzero_coefs=2)
    clfomp.fit(X_train, y_train)

    # Automatic Relevance Determination Regression
    clfard = ARDRegression(compute_score=True)
    clfard.fit(X_train, y_train)

    # Logistic Regression
    # clflgr = linear_model.LogisticRegression(penalty='l1', solver='saga', tol=1e-6, max_iter=int(1e6), warm_start=True)
    # coefs_ = []
    # for c in cs:
    #   clflgr.set_params(C=c)
    #   clflgr.fit(X_train, y_train)
    #   coefs_.append(clflgr.coef_.ravel().copy())

    clfsgd = SGDRegressor(random_state=0, max_iter=1000, tol=1e-3)
    clfsgd.fit(X_train, y_train)

    ##################
    ##################
    ##################

    #Create confindence scores
    confidencereg = clfreg.score(X_test, y_test)
    confidencepoly2 = clfpoly2.score(X_test, y_test)
    confidencepoly3 = clfpoly3.score(X_test, y_test)
    confidenceknn = clfknn.score(X_test, y_test)
    confidencelas = clflas.score(X_test, y_test)
    # confidencemtl = clfmtl.score(X_test, y_test)
    confidencebyr = clfbyr.score(X_test, y_test)
    confidencelar = clflar.score(X_test, y_test)
    confidenceomp = clfomp.score(X_test, y_test)
    confidenceard = clfard.score(X_test, y_test)
    confidencesgd = clfsgd.score(X_test, y_test)

    # results
    print('The linear regression confidence is:', confidencereg * 100)
    print('The quadratic regression 2 confidence is:', confidencepoly2 * 100)
    print('The quadratic regression 3 confidence is:', confidencepoly3 * 100)
    print('The knn regression confidence is:', confidenceknn * 100)
    print('The lasso regression confidence is:', confidencelas * 100)
    # print('The lasso regression confidence is:',confidencemtl*100)
    print('The Bayesian Ridge regression confidence is:', confidencebyr * 100)
    print('The Lasso LARS regression confidence is:', confidencelar * 100)
    print('The OMP regression confidence is:', confidenceomp * 100)
    print('The ARD regression confidence is:', confidenceard * 100)
    print('The SGD regression confidence is:', confidencesgd * 100)

    #Create new columns
    forecast_reg = clfreg.predict(X_lately)
    forecast_pol2 = clfpoly2.predict(X_lately)
    forecast_pol3 = clfpoly3.predict(X_lately)
    forecast_knn = clfknn.predict(X_lately)
    forecast_las = clflas.predict(X_lately)
    forecast_byr = clfbyr.predict(X_lately)
    forecast_lar = clflar.predict(X_lately)
    forecast_omp = clfomp.predict(X_lately)
    forecast_ard = clfard.predict(X_lately)
    forecast_sgd = clfsgd.predict(X_lately)

    #Process all new columns data
    dfreg['Forecast_reg'] = np.nan

    last_date = dfreg.iloc[-1].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_reg:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg.loc[next_date] = [np.nan for _ in range(len(dfreg.columns))]
        dfreg['Forecast_reg'].loc[next_date] = i

    dfreg['Forecast_pol2'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_pol2:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_pol2'].loc[next_date] = i

    dfreg['Forecast_pol3'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_pol3:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_pol3'].loc[next_date] = i

    dfreg['Forecast_knn'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_knn:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_knn'].loc[next_date] = i

    dfreg['Forecast_las'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_las:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_las'].loc[next_date] = i

    dfreg['Forecast_byr'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_byr:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_byr'].loc[next_date] = i

    dfreg['Forecast_lar'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_lar:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_lar'].loc[next_date] = i

    dfreg['Forecast_omp'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_omp:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_omp'].loc[next_date] = i

    dfreg['Forecast_ard'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_ard:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_ard'].loc[next_date] = i

    dfreg['Forecast_sgd'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_sgd:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_sgd'].loc[next_date] = i

    return dfreg.index.format(formatter=lambda x: x.strftime(
        '%Y-%m-%d')), dfreg['Adj Close'].to_list(
        ), dfreg['Forecast_reg'].to_list(), dfreg['Forecast_pol2'].to_list(
        ), dfreg['Forecast_pol3'].to_list(), dfreg['Forecast_knn'].to_list(
        ), dfreg['Forecast_las'].to_list(), dfreg['Forecast_byr'].to_list(
        ), dfreg['Forecast_lar'].to_list(), dfreg['Forecast_omp'].to_list(
        ), dfreg['Forecast_ard'].to_list(), dfreg['Forecast_sgd'].to_list()
 
# PCA + LARS
lars = Lars()
lars.fit(reduced_training_features, training_labels)
preds = lars.predict(reduced_testing_features)
score = lars.score(reduced_testing_features,testing_labels)
print 'PCA + LARS Results:'
print 'R2 score:', score
print 'MAE:', mean_absolute_error(testing_labels,preds)
 
# Orthogonal Matching Pursuit
from sklearn.linear_model import OrthogonalMatchingPursuit
omp = OrthogonalMatchingPursuit()
omp.fit(training_features, training_labels)
preds = omp.predict(testing_features)
score = omp.score(testing_features,testing_labels)
print 'Orthogonal Matching Pursuit Regression Results:'
print 'R2 score:', score
print 'MAE:', mean_absolute_error(testing_labels,preds), '\n'
 
# PCA + Orthogonal Matching Pursuit
omp = OrthogonalMatchingPursuit()
omp.fit(reduced_training_features, training_labels)
preds = omp.predict(reduced_testing_features)
score = omp.score(reduced_testing_features,testing_labels)
print 'PCA + Orthogonal Matching Pursuit Results:'
print 'R2 score:', score
print 'MAE:', mean_absolute_error(testing_labels,preds)
 
# Bayesian Ridge Regression
from sklearn.linear_model import BayesianRidge
Beispiel #7
0
    print "R^2: ", r2

    print "\n**********测试OrthogonalMatchingPursuit类**********"
    # 在初始化OrthogonalMatchingPursuit类时, 指定参数n_nonzero_coefs, 默认值是None.
    omp = OrthogonalMatchingPursuit(n_nonzero_coefs=3)
    # 拟合训练集
    omp.fit(train_X, train_Y)
    # 打印模型的系数
    print "系数:", omp.coef_
    print "截距:", omp.intercept_
    print '训练集R2: ', r2_score(train_Y, omp.predict(train_X))

    # 对于线性回归模型, 一般使用均方误差(Mean Squared Error,MSE)或者
    # 均方根误差(Root Mean Squared Error,RMSE)在测试集上的表现来评该价模型的好坏.
    test_Y_pred = omp.predict(test_X)
    print "测试集得分:", omp.score(test_X, test_Y)
    print "测试集MSE:", mean_squared_error(test_Y, test_Y_pred)
    print "测试集RMSE:", np.sqrt(mean_squared_error(test_Y, test_Y_pred))
    print "测试集R2:", r2_score(test_Y, test_Y_pred)

    tss, rss, ess, r2 = xss(Y, omp.predict(X))
    print "TSS(Total Sum of Squares): ", tss
    print "RSS(Residual Sum of Squares): ", rss
    print "ESS(Explained Sum of Squares): ", ess
    print "R^2: ", r2

    print "\n**********测试OrthogonalMatchingPursuitCV类**********"
    ompCV = OrthogonalMatchingPursuitCV(cv=5)
    # 拟合训练集
    ompCV.fit(train_X, train_Y.values.ravel())
    # 打印最好的n_nonzero_coefs值