Coulomb_df = scaler.transform(Coulomb_df) X_train_scaled, X_test_scaled, y_train, y_test = train_test_split( Coulomb_df, Output_df, test_size=.2, random_state=None) reports_df = pd.DataFrame( columns=['Name', 'MARE', 'MSE', 'R2']) for regr_choice in range(5): regr_names = ['RF', 'SVM', 'RVM', 'Huber', 'XGBOOST'] regr_objects = [RandomForestRegressor(n_estimators=400, max_depth=1000, random_state=0), svm.SVR(kernel='rbf', epsilon=0.1, verbose=True), RVR(kernel='rbf', n_iter=10000, tol=0.0001, verbose=True), linear_model.HuberRegressor( epsilon=1.35, max_iter=100, alpha=0.0001, warm_start=False, fit_intercept=True, tol=1e-05), XGBRegressor(objective='reg:linear', colsample_bytree=0.3, learning_rate=0.1, max_depth=400, alpha=10, n_estimators=400) ] regr = regr_objects[regr_choice] regr_name = regr_names[regr_choice] if reusingModels: regr = joblib.load('SavedModels_'+regr_name+'.pkl') else: regr.fit(X_train_scaled, y_train) if 'XGB' in regr_name: X_scaled_df_XGB = X_test_scaled # .as_matrix() y_predicted = regr.predict(X_scaled_df_XGB)
classification(svm.LinearSVC(random_state=RANDOM_SEED)), classification_binary(svm.LinearSVC(random_state=RANDOM_SEED)), # SVM regression(svm.SVR(kernel='rbf')), classification_binary(svm.SVC(kernel='rbf', random_state=RANDOM_SEED)), classification_binary( svm.SVC(kernel='linear', random_state=RANDOM_SEED)), classification_binary( svm.SVC(kernel='poly', degree=2, random_state=RANDOM_SEED)), classification_binary( svm.SVC(kernel='sigmoid', random_state=RANDOM_SEED)), # Linear Regression regression(linear_model.LinearRegression()), regression(linear_model.HuberRegressor()), regression(linear_model.ElasticNet(random_state=RANDOM_SEED)), regression(linear_model.ElasticNetCV(random_state=RANDOM_SEED)), regression(linear_model.TheilSenRegressor(random_state=RANDOM_SEED)), regression(linear_model.Lars()), regression(linear_model.LarsCV()), regression(linear_model.Lasso(random_state=RANDOM_SEED)), regression(linear_model.LassoCV(random_state=RANDOM_SEED)), regression(linear_model.LassoLars()), regression(linear_model.LassoLarsIC()), regression(linear_model.OrthogonalMatchingPursuit()), regression(linear_model.OrthogonalMatchingPursuitCV()), regression(linear_model.Ridge(random_state=RANDOM_SEED)), regression(linear_model.RidgeCV()), regression(linear_model.BayesianRidge()), regression(linear_model.ARDRegression()),
def loopModels(tsize=30, rep=300): ''' Loop through a number of predefined regression modelds and report their performance. Use numeric X,y. Print mean and median RMSE for each model. Plot the distribution of RMSE for each model. Return a dictionary of: {"model name" : RMSE list} that includes all models. This dictionary can easily be converted to pd.DataFrame and saved as csv. Each model is run rep times, to obtain a distribution of RMSE instead of a single number. Usually a single error calculation is done, or cross validation is used (e.g. with 5 fold), but a distribution of errors from e.g. 300 runs gives interesting results, useful in exploration. ''' X, y = getNumericXy() # OverallQual is the feature with highest correlation with the target. # Wanted to see how a single, highest correlated feature do by itself. # Uncomment if you want to test. #X = data[["OverallQual"]] models = { #"ARDRegression":linear_model.ARDRegression(), "BayesianRidge": linear_model.BayesianRidge(), "ElasticNet": linear_model.ElasticNet(), "HuberRegressor": linear_model.HuberRegressor(), "Lars": linear_model.Lars(), "Lasso": linear_model.Lasso(), "LassoLars": linear_model.LassoLars(), "RANSACRegressor": linear_model.RANSACRegressor(), "DecisionTree": tree.DecisionTreeRegressor(), "RandomForest20": ensemble.RandomForestRegressor(n_estimators=20), "RandomForest100": ensemble.RandomForestRegressor(n_estimators=100), "RandomForest200": ensemble.RandomForestRegressor(n_estimators=200), "XGBRegressor": XGBRegressor(), "XGBRegressor_n1000_r05": XGBRegressor(n_estimators=1000, learning_rate=0.05), } models_rmselist = {} for mname, mdl in models.items(): print("%s\n%s" % ("-" * 80, mname)) rmselist = [] for r in range(rep): X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=tsize / 100.0) model = mdl.fit(X_train, y_train) predictions = model.predict(X_test) rmse = np.sqrt(mean_squared_error(y_test, predictions)) rmselist.append(rmse) print("\tMean RMSE = %f, Median RMSE = %f" % (np.mean(rmselist), np.median(rmselist))) models_rmselist[mname] = rmselist plt.figure() plt.hist(rmselist, max(int(np.sqrt(rep) * 1.5), 10)) #plt.hist(rmselist,np.linspace(0.01,0.07,num=150)) plt.title("%s mn=%.4f,md=%.4f (tsize=%d,rep=%d)" % (mname, np.mean(rmselist), np.median(rmselist), tsize, rep)) return models_rmselist
print(i) clf_xgb = XGBRegressor(#objective='reg:linear', learning_rate=0.1, #so called `eta` value max_depth=4, min_child_weight=1, silent=1, subsample=0.8, colsample_bytree=1, n_estimators=450, seed = 123) clf_rf = RandomForestRegressor(max_depth=4, random_state=0,n_estimators=1000) clf_glm = linear_model.LinearRegression() clf_ada = AdaBoostRegressor() clf_huber = linear_model.HuberRegressor() clf_gbm = GradientBoostingRegressor(n_estimators=900, min_samples_split=6, min_samples_leaf=6, max_features='sqrt', max_depth=15, loss='lad') clf_SVM = svm.SVR() clf_ridge= linear_model.Ridge(alpha=.5) print("#------------ GBM-------------------#") r2,rmse= fitAndPrint(clf_gbm, X_train, y_train,X_test,y_test) print("#------------ XGB -------------------#") fitAndPrint(clf_xgb, X_train, y_train,X_test,y_test) # print("#------------ RF -------------------#") # fitAndPrint(clf_rf, X_train, y_train,X_test,y_test) print("#------------ GLM -------------------#") fitAndPrint(clf_glm, X_train, y_train,X_test,y_test) # print("#------------ ADA -------------------#") # fitAndPrint(clf_ada, X_train, y_train,X_test,y_test) # print("#------------ HUBER-------------------#")
# XGBoost xgb_params = { 'gamma': Integer(1, 10), 'learning_rate': Real(10**-5, 0.99, prior="log-uniform"), 'max_depth': Integer(3, 10), 'reg_alpha': Real(10**-5, 1, prior="log-uniform"), 'reg_lambda': Real(10**-5, 1, prior="log-uniform"), 'max_delta_step': Integer(0, 10), } """### 1d.3. Store everything in lists so we can iterate the code in a for loop We'll run all steps as a single for loop. So we need to save the initial model structures, the hyperparameters, and the names of the pickled files in lists. """ models = [ lm.HuberRegressor(max_iter=1000), lm.Ridge(), lm.Lasso(), lm.ElasticNet(), SVR(), RandomForestRegressor(), GradientBoostingRegressor(), ExtraTreesRegressor(), xgb.XGBRegressor() ] params = [ robust_params, ridge_params, lasso_params, en_params, svm_params, rf_params, gb_params, et_params, xgb_params ] # Names stored on local
def _fit_gumbel_chart(self, outlier_detect, plot_diagnosis): '''Fit a Gumbel distribution fit via Gumbel chart Parameters ---------- outlier_detect : bool Whether to assume outliers. Use OLS when False. plot_diagnosis: bool Whether to generate diagnostic plot. Variables added --------------- self.maxima_inlier_mask: Mask indicating inliers self.maxima_dist: Probability distribution for the maxima self.threshold: Threshold of X between bulk and tail, minimum is constrained to be no lower than 5 percentile of F_maxima ''' def _gumbel_y(F): ''' Calculate y coordinates on the Gumbel chart from CDF ''' return -np.log(-np.log(F)) x = self.maxima F = util.plotting_position(x, method='unbiased') y = _gumbel_y(F) if outlier_detect is None or outlier_detect == 'None': mdl = linear_model.LinearRegression().fit(x.reshape(-1, 1), y) self.maxima_inlier_mask = np.array( [True] * len(self.maxima)) # Create mask manually elif outlier_detect == 'RANSAC Regression': mdl = linear_model.RANSACRegressor(random_state=1).fit( x.reshape(-1, 1), y) self.maxima_inlier_mask = mdl.inlier_mask_ mdl = mdl.estimator_ elif outlier_detect == 'Huber Regression': mdl = linear_model.HuberRegressor(epsilon=1.35).fit( x.reshape(-1, 1), y) self.maxima_inlier_mask = np.array( [True] * len(self.maxima)) # Create mask manually else: raise ValueError('Unrecognized outlier_detect keyword') k, b = mdl.coef_[0], mdl.intercept_ if plot_diagnosis: ax = self.diag_fig.add_subplot(1, 3, 1, label=self.label) ax.plot(x[self.maxima_inlier_mask], y[self.maxima_inlier_mask], 'b.', markersize=10, label='Maxima(inliers)') ax.plot(x[~self.maxima_inlier_mask], y[~self.maxima_inlier_mask], 'r.', markersize=10, label='Maxima(outliers)') xlm, ylm = ax.get_xlim(), ax.get_ylim() ax.plot(self.sample_coor, mdl.predict(self.sample_coor.reshape(-1, 1)), 'r--', label='Linear fitting') ax.set_xlim(xlm) ax.set_ylim(ylm) ax.set_xlabel('Maxima data') ax.set_ylabel('$-ln(-ln(F))$') ax.set_title(f'Gumbel chart ({self.label} tail)') ax.grid(True) # ax.legend(loc='best') self.maxima_dist = stats.gumbel_r(loc=-b / k, scale=1 / k) self.maxima_inlier_mask[ self.maxima < self.maxima_dist.ppf(0.05)] = False self.threshold = self.maxima[self.maxima_inlier_mask].min()
def main(): d = 600 # dim of data,m,no-changeable n = 3000 # number of sample changeable sigma = 1 #changeable fold = 5 # load data #x samples x_data = np.loadtxt(data_root + 'sample.txt', dtype=np.float32) x_data = x_data[0:n, 0:d] #perturbation data #y = y* + \epsilon + b y_perturbation_data = np.loadtxt(data_root + 'label_b.txt', dtype=np.float32) y_perturbation_data = y_perturbation_data[0:n] err = np.random.normal(loc=0, scale=sigma, size=n) y_perturbation_data = y_perturbation_data + err #y = y* y_standard_data = np.loadtxt(data_root + 'label.txt', dtype=np.float32) y_standard_data = y_standard_data[0:n] #sign information of perturbation b perturbation = np.loadtxt(data_root + 's_b.txt', dtype=np.float32) perturbation = perturbation[0:n] #optimal weight w_optimal = np.loadtxt(data_root + 'w_s.txt', dtype=np.float32) #k for hard k = np.loadtxt(data_root + 'k_hard.txt', dtype=np.float32) y_data = [y_perturbation_data, y_standard_data, perturbation] y_data = np.array(y_data) y_data = y_data.T w_loss = np.zeros([5, 6]) y_loss = np.zeros([5, 6]) for ii in range(fold): x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.5) l, m = np.shape(x_train) #print("l=", l) print("fold:%d" % ii) #sign information of perturbation b s_train = y_train[:, 2] #training data label with perturbation b y_train = y_train[:, 0] y_test = y_test[:, 1] mean = np.mean(y_test) # SZMO w_szmo = SZMO(x_train, y_train, s_train) #print "szmo:{0:.4}".format(np.linalg.norm(w_optimal-w_szmo,ord=2)) w_loss[ii, 0] = np.linalg.norm(w_optimal - w_szmo, ord=2) y_predict = np.dot(x_test, w_szmo) y_loss[ii, 0] = np.linalg.norm(y_predict - y_test, ord=2) / l #ols w_ols = OLS(x_train, y_train) #print "ols:{0:.4}".format(np.linalg.norm(w_optimal-w_ols,ord=2)) w_loss[ii, 1] = np.linalg.norm(w_optimal - w_ols, ord=2) y_predict = np.dot(x_test, w_ols) y_loss[ii, 1] = np.linalg.norm(y_predict - y_test, ord=2) / l #HARD w_hard = HARD(x_train, y_train, k) #print "hard:{0:.4}".format(np.linalg.norm(w_optimal-w_hard,ord=2)) w_loss[ii, 2] = np.linalg.norm(w_optimal - w_hard, ord=2) y_predict = np.dot(x_test, w_hard) y_loss[ii, 2] = np.linalg.norm(y_predict - y_test, ord=2) / l # Ridge clf = Ridge(alpha=0.3) clf.fit(x_train, y_train) w_r = clf.coef_ #print "Ridge:{0:.4}".format(np.linalg.norm(w_optimal-w_r,ord=2)) w_loss[ii, 3] = np.linalg.norm(w_optimal - w_r, ord=2) y_predict = np.dot(x_test, w_r) y_loss[ii, 3] = np.linalg.norm(y_predict - y_test, ord=2) / l # Lasso reg = linear_model.Lasso(alpha=0.1) reg.fit(x_train, y_train) w_lasso = reg.coef_ #print "lasso:{0:.4}".format(np.linalg.norm(w_optimal-w_lasso,ord=2)) w_loss[ii, 4] = np.linalg.norm(w_optimal - w_lasso, ord=2) y_predict = np.dot(x_test, w_lasso) y_loss[ii, 4] = np.linalg.norm(y_predict - y_test, ord=2) / l # Huber huber = linear_model.HuberRegressor() huber.fit(x_train, y_train) w_huber = huber.coef_ #print "huber:{0:.4}".format(np.linalg.norm(w_optimal-w_huber,ord=2)) w_loss[ii, 5] = np.linalg.norm(w_optimal - w_huber, ord=2) y_predict = np.dot(x_test, w_huber) y_loss[ii, 5] = np.linalg.norm(y_predict - y_test, ord=2) / l # k = np.loadtxt('k_hard.txt',dtype = np.float32) # num = int( k[0] * l) # print("num= ", num) #Assign the sign value of perturbation b according to b values. #with shape[5,6] #w_diff = np.linalg.norm(w_optimal - w_predict,ord = 2,axis = 2) mean_w_loss = w_loss.mean(axis=0) print("m:%d,n:%d,sigma:%d,alpha:50" % (d, n / 2, sigma)) print( "attention:[alpha should be Manually changed based on generated files]" ) print(" SZMO OLS HARD Ridge LASSO Huber ") print("mean_w_loss:", end=' ') for i in mean_w_loss: print("%.4f" % i, end=' ') print("") #y_diff = np.linalg.norm(w_optimal - w_predict,ord = 2,axis = 2) mean_y_loss = y_loss.mean(axis=0) var_y_loss = y_loss.var(axis=0) print("mean_y_loss:", end=' ') for i in mean_y_loss: print("%.4f" % i, end=' ') print("") print("var_y_loss :", end=' ') var_y_loss = var_y_loss * 1000 for i in var_y_loss: print("%.4f" % i, end=' ') print("")
def optimize_trajectory(poses2D, joint_conf_2d, poses3D, root_pos, joint_angles, skeleton, names, ppx, ppy, camFocal, velConstraints, save_dir='./', plane_normal=None, plane_point=None): ''' Runs kinematic optimization ''' given_floor = True if plane_normal is None or plane_point is None: given_floor = False plane_normal = np.zeros((3), dtype=np.float) plane_point = np.zeros((3), dtype=np.float) num_frames = poses2D.shape[0] num_joints = poses2D.shape[1] if num_joints != poses3D.shape[1]: print('2D and 3D data must have the same number of joints!') print('2D: ' + str(num_joints)) print('3D: ' + str(poses3D.shape[1])) return None # Fit the initial motion to the template skeleton # target joint positions must be in order of the skeleton to fit to targets = np.zeros((num_frames, len(FORWARD_MAPPING), 3)) for fr in range(num_frames): for j in range(targets.shape[1]): targets[fr, j, :] = poses3D[fr, FORWARD_MAPPING[j], :] + root_pos[fr] # match bone lengths to initial 3D pose skeleton = update_skeleton(skeleton, targets, names) # calculate normalized 2D coordinates for projection loss # (regular projection with focal length and camera center removed) # also find projection weights to be used in optimization joints_2d_normalized = poses2D.copy() proj_weights = np.ones( (num_frames, num_joints)) # weights for the re-projection term data_weights = np.ones( (num_frames, num_joints)) # weights for the re-projection term cam_center = np.array([ppx, ppy]) for frame_idx in range(num_frames): cur_2d_joints = poses2D[frame_idx, :] for joint_idx in range(num_joints): if joint_idx < 25: # only for joints that have 2D correspondence proj_weights[frame_idx, joint_idx] = proj_weights[ frame_idx, joint_idx] * joint_conf_2d[ frame_idx, joint_idx] * PROJ_WEIGHTS[joint_idx] # still need all joints to be used for data term, but some more than others data_weights[frame_idx, joint_idx] = (data_weights[frame_idx, joint_idx] + joint_conf_2d[frame_idx, joint_idx] ) * DATA_WEIGHTS[joint_idx] # calc normalized projection joints_2d_normalized[frame_idx, joint_idx, 0] = (cur_2d_joints[joint_idx, 0] - cam_center[0]) / camFocal[0] joints_2d_normalized[frame_idx, joint_idx, 1] = (cur_2d_joints[joint_idx, 1] - cam_center[1]) / camFocal[1] else: proj_weights[frame_idx, joint_idx] = 0 data_weights[frame_idx, joint_idx] = ( data_weights[frame_idx, joint_idx] + 0.4 ) * DATA_WEIGHTS[ joint_idx] # still want data term on spine joints (0.4 is arbitrary) init_root_sol = root_pos.copy() # # perform IK to get initial joint angle estimates. # # Initialize the animation with fitted skeleton anim = skeleton.copy() anim.orients.qs = skeleton.orients.qs.copy() anim.offsets = skeleton.offsets.copy() anim.positions = skeleton.positions.repeat(num_frames, axis=0) anim.rotations.qs = anim.rotations.qs.repeat(num_frames, axis=0) anim.positions[:, 0] = init_root_sol # initialize with smpl prediction angle_init = np.linalg.norm(joint_angles, axis=2) axis_init = joint_angles / np.expand_dims(angle_init + 1e-10, axis=2) axis_init[:, :, 0] *= -1.0 axis_init[:, :, 1] *= -1.0 axis_init[:, :, 2] *= -1.0 init_transforms = Quaternions.from_angle_axis(angle_init, axis_init) align_transform = Quaternions.from_angle_axis(0.0, np.array([1.0, 0.0, 0.0])) for i in range(num_frames): for j in range(1): init_transforms[i, j] *= align_transform anim.rotations = init_transforms # BVH.save(os.path.join(save_dir, 'pre_ik.bvh'), anim, names) # Set up end effector constraints with target positions targetmap = {} for ee_idx in range(targets.shape[1]): # no IK on spine if ee_idx not in SKEL_SPINE_IDX: targetmap[ee_idx] = targets[:, ee_idx] # Solve IK for joint angles ik = JacobianInverseKinematicsCK(anim, targetmap, translate=False, iterations=200, smoothness=0.0, damping=7, silent=False) ik() # BVH.save(os.path.join(save_dir, 'init_test.bvh'), anim, names) init_root_sol = anim.positions[:, 0] init_positions = Animation.positions_global(anim) # # Perform the kinematic optimization # # # For first stage, don't care about floor # Weights of various optimization terms for each optim step. # projWeight = [1000.0] # projection term smoothWeightVel = [0.1] # velocity smoothness smoothWeightAcc = [0.5] # acceleration smoothness dataWeight = [0.3] # keep 3d pose close to initialization velWeight = [ 10.0 ] # velocity at contact frames should be 0 (e.g., foot contact) floorWeight = [0.0] # at contact frames, feet must be on floor # Compute the initial projection residual init_angles = np.reshape(anim.rotations.euler(), [num_frames, -1]) print(init_angles.shape) init_sol = deepcopy(np.concatenate((init_root_sol, init_angles), axis=1)) init_res = np.zeros(num_frames * num_joints * 2) count = 0 for fr in range(num_frames): current_root = init_root_sol[fr, :] current_pose = init_positions[fr, :] - current_root for j in range(num_joints): joint_idx = BACKWARD_MAPPING[j] if (proj_weights[fr, j] > 0): proj_x = (current_pose[joint_idx, 0] + current_root[0]) / ( current_pose[joint_idx, 2] + current_root[2]) proj_y = (current_pose[joint_idx, 1] + current_root[1]) / ( current_pose[joint_idx, 2] + current_root[2]) init_res[count + j * 2 + 0] = projWeight[-1] * proj_weights[fr, j] * ( proj_x - joints_2d_normalized[fr, j, 0]) init_res[count + j * 2 + 1] = projWeight[-1] * proj_weights[fr, j] * ( proj_y - joints_2d_normalized[fr, j, 1]) count = count + num_joints * 2 print('Error init:%f' % np.linalg.norm(init_res)) init_sol = np.reshape(init_sol, [-1]) # run stepwise optimization for step_idx in range(len(projWeight)): cur_sol = least_squares( fun_anim_for_projection, init_sol, max_nfev=50, verbose=2, jac=jac_anim_for_projection_sparse, gtol=1e-12, bounds=[-np.inf, np.inf], tr_solver='lsmr', args=(skeleton, poses3D, root_pos, joints_2d_normalized, plane_normal, plane_point, proj_weights, data_weights, np.arange(num_joints), np.arange(num_joints), SMOOTH_WEIGHTS, velConstraints, projWeight[step_idx], smoothWeightVel[step_idx], smoothWeightAcc[step_idx], dataWeight[step_idx], velWeight[step_idx], floorWeight[step_idx])) print('solution at step ' + str(step_idx) + ':') print(cur_sol.cost) #forward kinematics x = np.reshape(cur_sol.x, [num_frames, -1]) root = x[:, :3] angles = x[:, 3:] anim = skeleton.copy() anim.orients.qs = skeleton.orients.qs.copy() anim.offsets = skeleton.offsets.copy() anim.positions = skeleton.positions.repeat(num_frames, axis=0) anim.rotations = Quaternions.from_euler(angles.reshape( (num_frames, num_joints, 3)), order='xyz', world=True) anim.positions[:, 0] = root # save final animation # BVH.save(os.path.join(save_dir, 'step' + str(step_idx) + '_test.bvh'), anim, names) init_sol = cur_sol.x # # fit floor # # collect foot positions at contact points #forward kinematics x = np.reshape(cur_sol.x, [num_frames, -1]) root = x[:, :3] angles = x[:, 3:] anim = skeleton.copy() anim.orients.qs = skeleton.orients.qs.copy() anim.offsets = skeleton.offsets.copy() anim.positions = skeleton.positions.repeat(num_frames, axis=0) anim.rotations = Quaternions.from_euler(angles.reshape( (num_frames, num_joints, 3)), order='xyz', world=True) anim.positions[:, 0] = root final_pos = Animation.positions_global(anim) feet_pos = final_pos[:, FEET_IDX, :] feet_contact = np.array( [FORWARD_MAPPING[foot_idx] for foot_idx in FEET_IDX]) # this will order positions by feet_pos = feet_pos[velConstraints[:, feet_contact] == 1] print(str(feet_pos.shape[0]) + ' contacts for floor fit...') # if we weren't given a floor to use, fit one if not given_floor: # floor normal and point won't be used now plane_normal = np.zeros((3), dtype=np.float) plane_point = np.zeros((3), dtype=np.float) # First fit for floor (ignore more outliers) huber = linear_model.HuberRegressor(epsilon=1.5) huber.fit(feet_pos[:, [0, 2]], feet_pos[:, 1]) print('Floor fit after ' + str(huber.n_iter_) + ' LBFGS iters!') coeff = huber.coef_ intercept = huber.intercept_ print('Coeffs + Intercept = (%f, %f, %f)' % (coeff[0], coeff[1], intercept)) plane_verts = np.array([[0.0, -1.0, 0.0], [0.0, -1.0, 100.0], [100.0, -1.0, 0.0]]) for i in range(plane_verts.shape[0]): plane_verts[i, 1] = huber.predict(np.array([plane_verts[i, [0, 2]]])) # print(plane_verts) plane_normal = np.cross(plane_verts[2, :] - plane_verts[0, :], plane_verts[1, :] - plane_verts[2, :]) plane_normal /= np.linalg.norm(plane_normal) plane_point = plane_verts[0] print('Number of outliers: %d' % (np.sum(huber.outliers_))) print('Normal: (%f, %f, %f)' % (plane_normal[0], plane_normal[1], plane_normal[2])) print('Point: (%f, %f, %f)' % (plane_point[0], plane_point[1], plane_point[2])) # update contacts based on floor fit only if we fit a floor if not given_floor: # second fit to find spurious contacts huber = linear_model.HuberRegressor(epsilon=2.2) huber.fit(feet_pos[:, [0, 2]], feet_pos[:, 1]) print('Floor fit (for contact refinement) after ' + str(huber.n_iter_) + ' LBFGS iters!') coeff = huber.coef_ intercept = huber.intercept_ print('Coeffs + Intercept = (%f, %f, %f)' % (coeff[0], coeff[1], intercept)) print('Number of outliers: %d' % (np.sum(huber.outliers_))) # print(velConstraints[:, feet_contact]) og_vel_const = velConstraints[:, feet_contact].copy() # go through and figure out which contact labels were outliers feet_vel_constraints = velConstraints[:, feet_contact] fit_pts_cnt = 0 for frame_idx in range(feet_vel_constraints.shape[0]): for foot_joint_idx in range(feet_vel_constraints.shape[1]): if feet_vel_constraints[frame_idx, foot_joint_idx] == 1: # check if marked an outlier if huber.outliers_[fit_pts_cnt]: # set to out of contact feet_vel_constraints[frame_idx, foot_joint_idx] = 0 fit_pts_cnt += 1 velConstraints[:, feet_contact] = feet_vel_constraints # # Final stage optimize for foot placements # print('Now optimizing for foot placement...') projWeight = 1000.0 smoothWeightVel = 0.1 smoothWeightAcc = 0.5 dataWeight = 0.3 velWeight = 10.0 floorWeight = 10.0 cur_sol = least_squares( fun_anim_for_projection, init_sol, max_nfev=50, verbose=2, jac=jac_anim_for_projection_sparse, gtol=1e-12, bounds=[-np.inf, np.inf], tr_solver='lsmr', args=(skeleton, poses3D, root_pos, joints_2d_normalized, plane_normal, plane_point, proj_weights, data_weights, np.arange(num_joints), np.arange(num_joints), SMOOTH_WEIGHTS, velConstraints, projWeight, smoothWeightVel, smoothWeightAcc, dataWeight, velWeight, floorWeight)) #output for all frames newPose3D = [] projPose2D = [] allRootPos = [] #forward kinematics x = np.reshape(cur_sol.x, [num_frames, -1]) root = x[:, :3] angles = x[:, 3:] anim = skeleton.copy() anim.orients.qs = skeleton.orients.qs.copy() anim.offsets = skeleton.offsets.copy() anim.positions = skeleton.positions.repeat(num_frames, axis=0) anim.rotations = Quaternions.from_euler(angles.reshape( (num_frames, num_joints, 3)), order='xyz', world=True) anim.positions[:, 0] = root # save final animation BVH.save(os.path.join(save_dir, 'final_test.bvh'), anim, names) # get final 3d joints final_pos = Animation.positions_global(anim) newPose3D = 0 * final_pos for j in range(final_pos.shape[1]): newPose3D[:, j, :] = final_pos[:, BACKWARD_MAPPING[j], :] # get final 2d joints for frameNum in range(num_frames): varIndex = frameNum * num_joints * 3 pts2D = poses2D[frameNum, :] projPt = np.zeros(pts2D.shape) for j in range(0, num_joints): corr2DPt = j corr3DPt = j projPt[corr2DPt, 0] = camFocal[0] * ( newPose3D[frameNum, corr3DPt, 0] / newPose3D[frameNum, corr3DPt, 2]) + cam_center[0] projPt[corr2DPt, 1] = camFocal[1] * ( newPose3D[frameNum, corr3DPt, 1] / newPose3D[frameNum, corr3DPt, 2]) + cam_center[1] projPose2D.append(projPt) projPose2D = np.array(projPose2D) return (anim, newPose3D, projPose2D, plane_normal, plane_point, velConstraints)
################################################## ## Huber Regression ## ################################################## ''' The HuberRegressor is different to Ridge because it applies a linear loss to samples that are classified as outliers. A sample is classified as an inlier if the absolute error of that sample is lesser than a certain threshold. It differs from TheilSenRegressor and RANSACRegressor because it does not ignore the effect of the outliers but gives a lesser weight to them. Example: http://scikit-learn.org/stable/auto_examples/linear_model/plot_huber_vs_ridge.html#sphx-glr-auto-examples-linear-model-plot-huber-vs-ridge-py ''' try: huber = linear_model.HuberRegressor(fit_intercept=True, alpha=0.0, max_iter=100) huber.fit(X_train, y_train) predictions = cross_val_predict(huber, X_test, y_test, cv=6) f=open('huber.pickle','wb') pickle.dump(huber,f) f.close() except: print('error - HUBER') # get stats modeltypes.append('huber regression') explained_variances, mean_absolute_errors, mean_squared_errors, mean_squared_log_errors, median_absolute_errors, r2_scores = update_list(explained_variances, mean_absolute_errors, mean_squared_errors, mean_squared_log_errors, median_absolute_errors, r2_scores) ################################################## ## Polynomial Regression ## ##################################################
train = all_data.loc[all_data.Dataset == 'train'] test = all_data.loc[all_data.Dataset == 'test'] submission = all_data.loc[all_data.Dataset == 'submission'] """And take a look at the features that will be used to train the model.""" train[feature_columns].head() """## Model As I mentioned at the start of the notebook I didn't intend to experiment too much with model selection in this notebook so I have stuck with a simple Linear Regressor from SKLearn. While these non-deep models don't usually produce as accurate results as deep models they are super fast to train and easy to evaluate. This is great for notebooks focussing primarily on features. """ model = linear_model.HuberRegressor( max_iter=200) """With the model loaded, insert the features and labels for training.""" model.fit(train[feature_columns], train['FVC']) """Make predictions""" predictions = model.predict(train[feature_columns]) """## Evaluate Let's begin by having a look at the models weights. This gives us a good indication of what features are driving the models predictions. """ plt.bar(train[feature_columns].columns.values, model.coef_)
poly3coefs = poly.polyfit(x, y, 3) poly3fit = poly.polyval(x_new, poly3coefs) fit_dic['poly3'] = poly3fit if 'spline' in fits: spline_params = splrep(x, y, s=s, k=3) splinefit = splev(x_new, spline_params) fit_dic['spline'] = splinefit return fit_dic modeldict = { 'ardregression': lm.ARDRegression(), 'bayesianridge': lm.BayesianRidge(), 'elasticnet': lm.ElasticNet(), 'elasticnetcv': lm.ElasticNetCV(), 'huberregression': lm.HuberRegressor(), 'lars': lm.Lars(), 'larscv': lm.LarsCV(), 'lasso': lm.Lasso(), 'lassocv': lm.LassoCV(), 'lassolars': lm.LassoLars(), 'lassolarscv': lm.LassoLarsCV(), 'lassolarsic': lm.LassoLarsIC(), 'linearregression': lm.LinearRegression(), 'orthogonalmatchingpursuit': lm.OrthogonalMatchingPursuit(), 'orthogonalmatchingpursuitcv': lm.OrthogonalMatchingPursuitCV(), 'passiveagressiveregressor': lm.PassiveAggressiveRegressor(), 'ridge': lm.Ridge(), 'ridgecv': lm.RidgeCV(), 'sgdregressor': lm.SGDRegressor(), 'theilsenregressor': lm.TheilSenRegressor(),
KR.fit(X_train, y_train) KR.score(X_test, y_test) KR_test_pred = KR.predict(X_test) RSS_KR = np.mean(pow((KR_test_pred - y_test),2)) #neural network from sklearn.neural_network import MLPRegressor NN = MLPRegressor(hidden_layer_sizes = 20) NN.fit(X_train, y_train) NN.score(X_test, y_test) NN_test_pred = NN.predict(X_test) RSS_NN = np.mean(pow((NN_test_pred - y_test),2)) #Huber Regression Hub = linear_model.HuberRegressor(epsilon = 1) Hub.fit(X_train, y_train) Hub.score(X_test, y_test) Hub_test_pred = Hub.predict(X_test) RSS_Hub = np.mean(pow((Hub_test_pred - y_test),2)) #SGDRegressor SGD = linear_model.SGDRegressor(loss = 'epsilon_insensitive') SGD.fit(X_train, y_train) SGD.score(X_test, y_test) SGD_test_pred = SGD.predict(X_test) RSS_SGD = np.mean(pow((SGD_test_pred - y_test),2)) #GRADIENT BOOSTER from sklearn.ensemble import GradientBoostingRegressor
def trained_pipeline(self): pca = PCA() lin_reg = linear_model.HuberRegressor() pipe = Pipeline(steps=[('pca', pca), ('regressor', lin_reg)]) pipe.fit(self.X_train, self.y_train) return pipe
beta_star = np.zeros(d) beta_star[:5] = [5, -10, 0, 0, 3] adaptiveCV = AdaptiveHuberCV({ 'c_tau': np.arange(start=0.5, stop=1.5, step=.5), 'c_lamb': np.arange(start=0.005, stop=0.03, step=.005) }) adaptive = AdaptiveHuber(c_lamb=0.005, c_tau=0.5) N = 100 algos = { "OLS": linear_model.LinearRegression(fit_intercept=False), "LassoCV": LassoCV(cv=3), "Huber": linear_model.HuberRegressor(), #"MedianReg" : QuantRegScikit(q = 0.5), "AdaptiveCV": adaptiveCV } tails = { "normal": stats.norm(loc=0, scale=4), "student": stats.t(df=1.5), "lognormal": stats.lognorm(1, loc=0, scale=4) } errors = get_errors_for(algos, tails, N, d, n, beta_star) #errors.to_pickle("{}_{}_errors.pickle".format(n, d)) # the table of the paper table = errors.groupby(["algo", "tail"]).l2_error.describe()[["mean", "std"]]
import sklearn.metrics as sm from sklearn import linear_model if __name__ == '__main__': input_file = "mydata.txt" data = np.loadtxt(input_file, delimiter=",") X, y = data[:, :-1], data[:, -1] num_training = int(0.9 * len(X)) num_test = len(X) - num_training X_tran, y_train = X[:num_training], y[:num_training] X_test, y_test = X[num_training:], y[num_training:] regressor = linear_model.HuberRegressor() regressor.fit(X_tran, y_train) y_test_pred = regressor.predict(X_test) with open("nauczonymodel.pkl", 'wb') as f: pickle.dump(regressor, f) with open("nauczonymodel.pkl", 'rb') as f: regressor_model = pickle.load(f) blad = sm.mean_absolute_error(y_test, y_test_pred) print("Mean absolute error: ", blad) plt.scatter(X_test, y_test, color='red') plt.plot(X_test, y_test_pred, color='black') plt.show()
b3 = A[train_index, :] @ x3 E3[jj] = np.linalg.norm(b[train_index, :] - b3, ord=2) / np.linalg.norm( b[train_index, :], ord=2) regr4 = linear_model.ElasticNet(alpha=0.8, copy_X=True, l1_ratio=lam, max_iter=10**5, random_state=0) regr4.fit(A[train_index, :], b[train_index, :]) x4 = np.transpose(regr4.coef_) b4 = A[train_index, :] @ x4 E4[jj] = np.linalg.norm(b[train_index, :] - b4, ord=2) / np.linalg.norm( b[train_index, :], ord=2) regr5 = MultiOutputRegressor(linear_model.HuberRegressor(), n_jobs=-1) huber = regr5.fit( A[train_index, :], b[train_index, :] ) # matlab's robustfit() does not have an exact sklearn analogue x5 = np.empty([m, p]) for i in range(0, len(huber.estimators_)): x5[:, i] = huber.estimators_[i].coef_ b5 = A[train_index, :] @ x5 E5[jj] = np.linalg.norm(b[train_index, :] - b5, ord=2) / np.linalg.norm( b[train_index, :], ord=2) ridge = linear_model.Ridge(alpha=1.0).fit(A[train_index, :], b[train_index, :]) x6 = np.transpose(ridge.coef_)
def run_simple_model(train_x, train_y, dev_x, dev_y, test_x, test_y, model_type, out_dir=None, class_weight=None): from sklearn import datasets, neighbors, linear_model, svm totalTime = 0 startTrainTime = time() logger.info("Start training...") if model_type == 'ARDRegression': model = linear_model.ARDRegression().fit(train_x, train_y) elif model_type == 'BayesianRidge': model = linear_model.BayesianRidge().fit(train_x, train_y) elif model_type == 'ElasticNet': model = linear_model.ElasticNet().fit(train_x, train_y) elif model_type == 'ElasticNetCV': model = linear_model.ElasticNetCV().fit(train_x, train_y) elif model_type == 'HuberRegressor': model = linear_model.HuberRegressor().fit(train_x, train_y) elif model_type == 'Lars': model = linear_model.Lars().fit(train_x, train_y) elif model_type == 'LarsCV': model = linear_model.LarsCV().fit(train_x, train_y) elif model_type == 'Lasso': model = linear_model.Lasso().fit(train_x, train_y) elif model_type == 'LassoCV': model = linear_model.LassoCV().fit(train_x, train_y) elif model_type == 'LassoLars': model = linear_model.LassoLars().fit(train_x, train_y) elif model_type == 'LassoLarsCV': model = linear_model.LassoLarsCV().fit(train_x, train_y) elif model_type == 'LassoLarsIC': model = linear_model.LassoLarsIC().fit(train_x, train_y) elif model_type == 'LinearRegression': model = linear_model.LinearRegression().fit(train_x, train_y) elif model_type == 'LogisticRegression': model = linear_model.LogisticRegression(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'LogisticRegressionCV': model = linear_model.LogisticRegressionCV(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'MultiTaskLasso': model = linear_model.MultiTaskLasso().fit(train_x, train_y) elif model_type == 'MultiTaskElasticNet': model = linear_model.MultiTaskElasticNet().fit(train_x, train_y) elif model_type == 'MultiTaskLassoCV': model = linear_model.MultiTaskLassoCV().fit(train_x, train_y) elif model_type == 'MultiTaskElasticNetCV': model = linear_model.MultiTaskElasticNetCV().fit(train_x, train_y) elif model_type == 'OrthogonalMatchingPursuit': model = linear_model.OrthogonalMatchingPursuit().fit(train_x, train_y) elif model_type == 'OrthogonalMatchingPursuitCV': model = linear_model.OrthogonalMatchingPursuitCV().fit(train_x, train_y) elif model_type == 'PassiveAggressiveClassifier': model = linear_model.PassiveAggressiveClassifier(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'PassiveAggressiveRegressor': model = linear_model.PassiveAggressiveRegressor().fit(train_x, train_y) elif model_type == 'Perceptron': model = linear_model.Perceptron(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'RandomizedLasso': model = linear_model.RandomizedLasso().fit(train_x, train_y) elif model_type == 'RandomizedLogisticRegression': model = linear_model.RandomizedLogisticRegression().fit(train_x, train_y) elif model_type == 'RANSACRegressor': model = linear_model.RANSACRegressor().fit(train_x, train_y) elif model_type == 'Ridge': model = linear_model.Ridge().fit(train_x, train_y) elif model_type == 'RidgeClassifier': model = linear_model.RidgeClassifier(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'RidgeClassifierCV': model = linear_model.RidgeClassifierCV(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'RidgeCV': model = linear_model.RidgeCV().fit(train_x, train_y) elif model_type == 'SGDClassifier': model = linear_model.SGDClassifier(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'SGDRegressor': model = linear_model.SGDRegressor().fit(train_x, train_y) elif model_type == 'TheilSenRegressor': model = linear_model.TheilSenRegressor().fit(train_x, train_y) elif model_type == 'lars_path': model = linear_model.lars_path().fit(train_x, train_y) elif model_type == 'lasso_path': model = linear_model.lasso_path().fit(train_x, train_y) elif model_type == 'lasso_stability_path': model = linear_model.lasso_stability_path().fit(train_x, train_y) elif model_type == 'logistic_regression_path': model = linear_model.logistic_regression_path(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'orthogonal_mp': model = linear_model.orthogonal_mp().fit(train_x, train_y) elif model_type == 'orthogonal_mp_gram': model = linear_model.orthogonal_mp_gram().fit(train_x, train_y) elif model_type == 'LinearSVC': model = svm.LinearSVC(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'SVC': model = svm.SVC(class_weight=class_weight, degree=3).fit(train_x, train_y) else: raise NotImplementedError('Model not implemented') logger.info("Finished training.") endTrainTime = time() trainTime = endTrainTime - startTrainTime logger.info("Training time : %d seconds" % trainTime) logger.info("Start predicting train set...") train_pred_y = model.predict(train_x) logger.info("Finished predicting train set.") logger.info("Start predicting test set...") test_pred_y = model.predict(test_x) logger.info("Finished predicting test set.") endTestTime = time() testTime = endTestTime - endTrainTime logger.info("Testing time : %d seconds" % testTime) totalTime += trainTime + testTime train_pred_y = np.round(train_pred_y) test_pred_y = np.round(test_pred_y) np.savetxt(out_dir + '/preds/best_test_pred' + '.txt', test_pred_y, fmt='%i') logger.info('[TRAIN] Acc: %.3f' % (accuracy_score(train_y, train_pred_y))) logger.info('[TEST] Acc: %.3f' % (accuracy_score(test_y, test_pred_y))) return accuracy_score(test_y, test_pred_y)