예제 #1
0
def test(p,T):
    #当这个值小于1的时候,点差切矢才有效
    #数据衡量三种方法的优劣
    p,pder=data(T)#提取数据点   
    '''开始实验'''
##    fder=fmill(p,tlimit)#有问题,需要调试
##    print error.rmse(pder,fder)#维数统一
    bder=bessell(p,tlimit)
    print error.rmse(pder,bder)
예제 #2
0
파일: cmf.py 프로젝트: IDmy/blockgmf
def matrix_factorization(users,
                         movies,
                         ratings,
                         test_users,
                         test_movies,
                         test_ratings,
                         K=10,
                         steps=10,
                         alpha=0.0002,
                         beta=0.01,
                         delta=0.01):

    t0 = time.clock()

    P, Q = np.ones((np.max(users) + 1, K)) * .2, np.ones(
        (np.max(movies) + 1, K)) * .2  #initPQ(R.shape[0], K, R.shape[1])

    for step in range(steps):

        print("Step : ", step)

        for idx in range(len(users)):
            if (ratings[idx] > 0):
                i = users[idx]
                j = movies[idx]
                eij = ratings[idx] - np.dot(P[i, :], Q[j, :])

                for k in range(K):
                    P[i,
                      k] = P[i,
                             k] + alpha * (2 * eij * Q[j, k] - beta * P[i, k])
                    Q[j,
                      k] = Q[j,
                             k] + alpha * (2 * eij * P[i, k] - beta * Q[j, k])

        print("Time till now :", round(time.clock() - t0, 2), "Train error",
              round(rmse(users, movies, ratings, P, Q), 4), "Test error",
              round(rmse(test_users, test_movies, test_ratings, P, Q), 4))

    return P, Q
예제 #3
0
def cross_validate(X,y,model,folds=5,random_seed=42,test_size=.2,model_name='',parameters={},plot=False):
	"""
	-----------------------
	cross validate models
	-----------------------
	"""
	print ' ------------------ Cross Validation using %s model -------------------- '% model_name
	mses=[];rmses=[];rmsles=[]
	
	for fold in range(folds):		
		#! create a test and train cv set
		train_cv, test_cv, y_target, y_true = cross_validation.train_test_split(X, y, test_size=test_size, random_state=fold*random_seed)
		
		#! train model and make predictions
		model.fit(train_cv, y_target)
		preds = model.predict(test_cv)
		
		#! measure the error (difference between the predictions and the actual targets)
		mse = error.mse(y_true, preds)
		rmse = error.rmse(y_true, preds)
		rmsle = error.rmsle(y_true, preds)
		
		print "(fold %d of %d) MSE : %f | RMSE: %f | RMSLE: %f %s" % (fold + 1, folds, mse, rmse, rmsle, '')
		mses.append(mse); rmses.append(rmse); rmsles.append(rmsle)
		
	print ">>> Mean MSE: %f | Mean RMSE: %f | Mean RMSLE: %f <<<" % (np.mean(mses), np.mean(rmses), np.mean(rmsles))
	print "______________________________________________________________"
	
	if plot:
		plot_error(range(folds), rmsles,"Fold", "RMSLE", "Cross Validation using %s model" % model_name, ["RMSLE"])

        



# ---------------------- Scrap Notes ---------------------------------	
# models = [(linear_model.SGDRegressor, ), (linear_model.Ridge, ), ()]
# param_grid = {'alpha': [0.001, 0.01, 0.5]} #,1,5, 10, 100, 1000] }
#clf = linear_model.Ridge(alpha=a)
#clf = linear_model.SGDRegressor(alpha=0.2,n_iter=1000,shuffle=True)
#clf = linear_model.LassoCV(cv=3)
#clf = linear_model.ElasticNet()
#clf = linear_model.BayesianRidge()     
            
#clf = ensemble.RandomForestRegressor(n_estimators=100,random_state=42*idx*10,max_depth=4)
#clf = ensemble.ExtraTreesRegressor(n_estimators=100,random_state=42*idx*10,max_depth=4)
#clf = ensemble.GradientBoostingRegressor(alpha=a,n_estimators=100,random_state=42,max_depth=4)
                                                
def search_forward(X_train, X_test, y_train, y_test):
    
    """
    implements search forward algorithm, returns features selected
    
    Keyword arguments:
    X_train - training data set 
    y_train - training target
    X_test - testing data set
    y_test - testing target
    """
    beta0 = learn_linreg_NormEq(X_train, y_train)
    y_hat = lin_reg(X_test, beta0)
    
    e_all_best = 2000000
    V = []
    v_best = 1

    all_features = list(range(len(X_test.T)))

    while(v_best != 100):
        v_best = 100
        e_best = e_all_best
        
        
        for v in (list(set(all_features) - set(V))):

            current_feature = []
            current_feature.append(v)

            V_prime = V + current_feature
            
            beta_ = learn_linreg_NormEq(X_train[:, V_prime], y_train)
            y_hat = lin_reg(X_test[:, V_prime], beta_)
            e = rmse(y_test, y_hat)

            if e < e_best:
                e_best = e
                v_best = v
                
        if e_best < e_all_best:
            V.append(v_best)
            e_all_best = e_best
        
    return V
예제 #5
0
def factorize(users,
              movies,
              ratings,
              test_users,
              test_movies,
              test_ratings,
              latent=30,
              steps=10,
              gpu_steps=1,
              alpha=0.0002,
              beta=0.02,
              delta=0.01,
              rmse_repeat_count=5,
              debug=1):

    U, V = initUV(int(np.max(users) + 1), latent, int(np.max(movies) + 1))
    U, V = np.array(U).astype(np.float32), np.array(V).astype(
        np.float32).transpose()

    print("Shape of P,Q : ", U.shape, V.shape)

    start_time = time.clock()
    y1, y2 = [], []

    error, count = rmse(test_users, test_movies, test_ratings, U, V.T), 0
    print("Initial test error :", round(error, 4))

    for k in range(steps):

        if debug > 1:
            print("Step : ", k)

        t6 = time.clock()

        uu, mm, rr = np.array(users).astype(np.int32), np.array(movies).astype(
            np.int32), np.array(ratings).astype(np.int32)

        t7 = time.clock()
        tools.clear_context_caches()
        u_gpu = gpuarray.to_gpu(uu)
        v_gpu = gpuarray.to_gpu(mm)
        r_gpu = gpuarray.to_gpu(rr)

        a_gpu = gpuarray.to_gpu(U)
        b_gpu = gpuarray.to_gpu(V)

        if debug > 1:
            print("Length of uu,mm ", len(uu), len(mm), np.max(users),
                  np.max(movies), U.shape, V.shape)

        if (len(uu) != 0 and len(mm) != 0):
            matrixfact(
                u_gpu,
                v_gpu,
                r_gpu,
                a_gpu,
                b_gpu,
                np.int32(np.max(users)),
                np.int32(latent),
                np.int32(np.max(movies)),
                np.int32(len(uu)),
                np.int32(len(mm)),
                np.int32(gpu_steps),
                np.float32(alpha),
                np.float32(beta),
                np.float32(delta),
                block=(16, 16, 1),
                grid=(
                    3, 4
                )  # always keep blockIdx.z as 1 - the kernal expects no threads in z axis
            )
            P = a_gpu.get()
            Q = b_gpu.get()
            U, V = np.array(P), np.array(Q)
            t8 = time.clock()

            if debug > 1:
                t9 = time.clock()
                if debug > 2:
                    np.savetxt('U' + str(k), U, fmt='%.4f')
                    np.savetxt('V' + str(k), V, fmt='%.4f')
                print("Timer :", round(t7 - t6, 4), round(t8 - t7, 4),
                      round(t9 - t8, 4))

        t5 = time.clock()
        if debug > 1:
            print("Step time taken : ", round(t5 - t7, 2))
        y1.append(t5 - start_time)
        test_rmse = rmse(test_users, test_movies, test_ratings, U, V.T)
        print("Step test error :", round(test_rmse, 4))

        train_rmse = rmse(users, movies, ratings, U, V.T)
        y2.append([train_rmse, test_rmse])

        step_error = round(test_rmse, 4)

        if step_error < delta:
            break
        elif step_error == error:
            count = count + 1
        elif step_error > error:
            break
        elif rmse_repeat_count == count:
            break
        else:
            error = step_error

    if debug > 1:
        np.savetxt('gpmf-' + str(start_time) + '-y1.txt', y1, fmt='%.4f')
        np.savetxt('gpmf-' + str(start_time) + '-y2.txt', y2, fmt='%.4f')
예제 #6
0
파일: cpmf.py 프로젝트: IDmy/blockgmf
def factorize(users,
              movies,
              ratings,
              test_users,
              test_movies,
              test_ratings,
              blocks=1,
              latent=30,
              steps=10,
              block_steps=1,
              alpha=0.00001,
              beta=0.01,
              delta=0.01,
              rmse_repeat_count=3,
              debug=2,
              dataset=''):
    global U, V
    U, V = initUV(np.max(users) + 1, latent, np.max(movies) + 1)
    R = csr_matrix((ratings, (users, movies))).todense()

    size = max(np.max(users) + 1, np.max(movies) + 1)
    split = int(size / blocks)
    us = int(math.ceil(np.float(np.max(users)) / split))
    vs = int(math.ceil(np.float(np.max(movies)) / split))
    if debug > 1:
        print("Total splits : ", split, us, vs, us * vs)
        print("U, V shapes :", U.shape, V.shape)

    start_time = time.clock()
    y1, y2 = [], []
    count, error = 0, 100

    for k in range(steps):

        if debug > 1:
            print("Step : ", k)

        u1, v1 = 0, 0

        t4 = time.clock()
        for i in range(us):
            u1 = i * split
            if np.max(users) < u1:
                u1 = int(np.max(users))

            u2 = ((i + 1) * split - 1)
            if np.max(users) < u2:
                u2 = int(np.max(users))

            stemp = 0
            tpool = [None] * vs
            for j in range(vs):
                xtemp = int((i + stemp) % us)

                if debug > 1:
                    print("i, j, ii, jj ", i, j, xtemp, j)

                u1 = xtemp * split
                if np.max(users) < u1:
                    u1 = int(np.max(users))

                u2 = ((xtemp + 1) * split - 1)
                if np.max(users) < u2:
                    u2 = int(np.max(users))

                v1 = j * split
                if np.max(movies) < v1:
                    v1 = int(np.max(movies))

                v2 = (j + 1) * split - 1
                if np.max(movies) < v2:
                    v2 = int(np.max(movies))

                if debug > 1:
                    print("Processing split : ", i, j, u1, u2, v1, v2)

                uu, mm, rr = fetch(u1, u2, v1, v2, users, movies, ratings)
                if debug > 1:
                    print("Shapes of uu,mm,rr :", uu.shape, mm.shape, rr.shape)
                t6 = time.clock()
                P, Q = U[u1:u2 + 1, 0:latent], V[v1:v2 + 1, 0:latent]
                if debug > 1:
                    print("P Q shapes : ", P.shape, Q.shape)
                t7 = time.clock()

                if debug > 1:
                    print("Length of uu,mm ", len(uu), len(mm), u2 - u1 + 1,
                          v2 - v1 + 1, P.shape, Q.shape)

                if (len(uu) != 0 and len(mm) != 0):
                    t = tpool[j]
                    if t is not None:
                        while t.isAlive():
                            print('waiting for the thread ...')
                            time.sleep(5)

                    t = threading.Thread(target=block_factorization,
                                         args=(P, Q, R, u1, u2, v1, v2,
                                               block_steps))
                    tpool[j] = t
                    t.start()
                    t8 = time.clock()

                stemp += 1

        t5 = time.clock()
        if debug > 1:
            print(" Step time taken : ", round(t5 - t4, 2))
        y1.append(round(t5 - start_time, 3))
        test_rmse = rmse(test_users, test_movies, test_ratings, U, V)
        print("Step error :", round(test_rmse, 3))
        y2.append(round(test_rmse, 3))

        step_error = round(test_rmse, 4)

        if step_error < delta:
            break
        elif error < step_error:
            break
        elif rmse_repeat_count < count:
            break
        elif error == step_error:
            count = count + 1
        else:
            count = 0
        error = step_error

    np.savetxt(str(blocks * blocks) + 'blocks_y2.txt', y2, fmt='%.3f')
    np.savetxt(str(blocks * blocks) + 'blocks_y1.txt', y1, fmt='%.3f')
예제 #7
0
파일: bgmf.py 프로젝트: IDmy/blockgmf
def factorize(users, movies, ratings, test_users, test_movies, test_ratings, blocks=1, latent=10, steps=10, gpu_steps=2, alpha=0.0002, beta=0.01, delta=0.01, rmse_repeat_count=3, debug=2, dataset=''):

    U, V = initUV( np.max(users)-np.min(users)+1, latent, np.max(movies)-np.min(movies)+1)
    U = np.array(U)
    V = np.array(V)

    size = max(np.max(users)+1, np.max(movies)+1)
    split = int(size/blocks)
    us = int(math.ceil( np.float(np.max(users))/split ) )
    vs = int(math.ceil( np.float(np.max(movies))/split ) )
    if debug>1:
        print("Total splits : ",split, us, vs, us*vs)
        print("U, V shapes :", U.shape, V.shape)

    start_time=time.clock()
    y1, y2 = [], []
    count, error = 0, 100
    
    for k in range(steps):

        if debug>1:
            print("Step : ", k)

        u1, v1 = 0, 0
        t4 = time.clock()

        for i in range(us):
            u1 = i*split
            if np.max(users) < u1:
                u1 = int(np.max(users))

            u2 = ((i+1)*split - 1)
            if np.max(users) < u2:
                u2 = int(np.max(users))

            stemp = 0
            UU, MM, RR = [], [], []
            ulimits = [0]
           
            for j in range(vs):
                xtemp = int((i+stemp)%us)

                print("i, j, ii, jj ", i, j, xtemp, j)

                u1 = xtemp*split
                if np.max(users) < u1:
                    u1 = int(np.max(users))

                u2 = ((xtemp+1)*split - 1)
                if np.max(users) < u2:
                    u2 = int(np.max(users))

                v1 = j*split
                if np.max(movies) < v1:
                    v1 = int(np.max(movies))
                    
                v2 = (j+1)*split -1
                if np.max(movies) < v2:
                    v2 = int(np.max(movies))

                print("Processing split : " , i , j, u1, u2, v1, v2)

                uu, mm, rr = fetch(u1,u2, v1,v2, users,movies,ratings)

                if(len(uu)!=0 and len(mm)!=0):
                    UU,MM,RR, ulimits = pack(UU,MM,RR, uu,mm,rr, ulimits)

                stemp+=1
            U, V = matrix_factorization(UU,MM,RR, U,V, ulimits,np.min(users), np.min(movies))

        t5 = time.clock()
        if debug>1:
            print(" Step time taken : ", round(t5-t4,2))

        y1.append(round(t5-start_time,3))
        train_rmse = rmse(users, movies, ratings, U, V)
        test_rmse = rmse(test_users, test_movies, test_ratings, U, V)
        print("Train error:", round(train_rmse, 3) , " Test error:", round(test_rmse,3) )
        y2.append(round(test_rmse,3) )

        step_error=round(test_rmse,4)
        
        if step_error < delta:
            break
        elif error<step_error :
            break
        elif rmse_repeat_count<count:
            break
        elif step_error==error:
            count=count+1
        else:
            count = 0
        error=step_error

    np.savetxt('blocks_'+str(gpu_steps)+'iterations_y2.txt', y2, fmt='%.3f')
    np.savetxt('blocks_'+str(gpu_steps)+'iterations_y1.txt', y1, fmt='%.3f')
예제 #8
0
train_input = train_data[:,1:] 
train_output = train_data[:,0] 
test_input = test_data[:,1:] 
test_output = test_data[:,0]

# Linear Regression
m=test_input.shape[0]
ones=np.ones((m,1))
test_input1=np.hstack((ones,test_input))

learing_rate =1e-8
max_steps = 100000
obj_linear_regression = linear_regression(train_input,train_output,learing_rate,max_steps,C=0.0)
linear_regression_weights = obj_linear_regression.weights
test_loss = mse(prediction = np.dot(test_input1,linear_regression_weights)[:,0], target = test_output)/100
test_loss1 = rmse(prediction = np.dot(test_input1,linear_regression_weights)[:,0], target = test_output)/100
print(f'\nMse Test loss in Linear Regression is \t {test_loss}')
print(f'Rmse Test loss in Linear Regression is \t {test_loss1}\n')



# Neural Network
nn_max_epochs = 1000
nn_batch_size = 128
nn_learning_rate = 5e-8
num_layers = 1
num_units = 32
lamda = 0.00002
network = neural_network(train_input,num_layers,num_units)
optimizer = optimizer(nn_learning_rate)
train(network, optimizer, lamda, nn_batch_size, nn_max_epochs,train_input, train_output)
예제 #9
0
    'Dses' : forecast.dses(main_df, usr_input[1], usr_input[3]),

    'Hles' : forecast.hles(main_df, usr_input[1], usr_input[3])
}

f_df = forecasts[usr_input[2]]
#f_df is the forecast data frame with respect to the usr_input

v_df = error.val_period(f_df, today)
#get validation data frame from forecast data frame

#gives all error values for dataframe
mae = error.mae(v_df)
avg_err = error.avg_err(v_df)
mape = error.mape(v_df)
rmse = error.rmse(v_df)

print('MAE = ' + str(mae) + '\nAverage Error = ' + str(avg_err) +\
    '\nMAPE = ' + str(mape) + '%\nRMSE = ' + str(rmse))
#Prints error names with values

plottin.b_plot(f_df, usr_input[1], usr_input[3], usr_input[0], \
    usr_input[2], today)
#makes a basic plot but doesn't show it

plt.show()
#shows the plot which was made in plottin

#see if you can put a timer on it