Exemple #1
0
Coulomb_df = scaler.transform(Coulomb_df)

X_train_scaled, X_test_scaled, y_train, y_test = train_test_split(
        Coulomb_df, Output_df, test_size=.2, random_state=None)

reports_df = pd.DataFrame(
    columns=['Name', 'MARE', 'MSE', 'R2'])

for regr_choice in range(5):

    regr_names = ['RF', 'SVM', 'RVM', 'Huber',
                  'XGBOOST']
    regr_objects = [RandomForestRegressor(n_estimators=400, max_depth=1000, random_state=0),
                    svm.SVR(kernel='rbf', epsilon=0.1, verbose=True),
                    RVR(kernel='rbf', n_iter=10000, tol=0.0001, verbose=True),
                    linear_model.HuberRegressor(
                        epsilon=1.35, max_iter=100, alpha=0.0001, warm_start=False, fit_intercept=True, tol=1e-05),
                    XGBRegressor(objective='reg:linear', colsample_bytree=0.3, learning_rate=0.1,
                                 max_depth=400, alpha=10, n_estimators=400)
                    ]

    regr = regr_objects[regr_choice]
    regr_name = regr_names[regr_choice]

    if reusingModels:
        regr = joblib.load('SavedModels_'+regr_name+'.pkl')
    else:
        regr.fit(X_train_scaled, y_train)

    if 'XGB' in regr_name:
        X_scaled_df_XGB = X_test_scaled  # .as_matrix()
        y_predicted = regr.predict(X_scaled_df_XGB)
Exemple #2
0
        classification(svm.LinearSVC(random_state=RANDOM_SEED)),
        classification_binary(svm.LinearSVC(random_state=RANDOM_SEED)),

        # SVM
        regression(svm.SVR(kernel='rbf')),
        classification_binary(svm.SVC(kernel='rbf', random_state=RANDOM_SEED)),
        classification_binary(
            svm.SVC(kernel='linear', random_state=RANDOM_SEED)),
        classification_binary(
            svm.SVC(kernel='poly', degree=2, random_state=RANDOM_SEED)),
        classification_binary(
            svm.SVC(kernel='sigmoid', random_state=RANDOM_SEED)),

        # Linear Regression
        regression(linear_model.LinearRegression()),
        regression(linear_model.HuberRegressor()),
        regression(linear_model.ElasticNet(random_state=RANDOM_SEED)),
        regression(linear_model.ElasticNetCV(random_state=RANDOM_SEED)),
        regression(linear_model.TheilSenRegressor(random_state=RANDOM_SEED)),
        regression(linear_model.Lars()),
        regression(linear_model.LarsCV()),
        regression(linear_model.Lasso(random_state=RANDOM_SEED)),
        regression(linear_model.LassoCV(random_state=RANDOM_SEED)),
        regression(linear_model.LassoLars()),
        regression(linear_model.LassoLarsIC()),
        regression(linear_model.OrthogonalMatchingPursuit()),
        regression(linear_model.OrthogonalMatchingPursuitCV()),
        regression(linear_model.Ridge(random_state=RANDOM_SEED)),
        regression(linear_model.RidgeCV()),
        regression(linear_model.BayesianRidge()),
        regression(linear_model.ARDRegression()),
def loopModels(tsize=30, rep=300):
    '''
    Loop through a number of predefined regression modelds and report their performance.
    Use numeric X,y.
    Print mean and median RMSE for each model. 
    Plot the distribution of RMSE for each model.
    Return a dictionary of: {"model name" : RMSE list} that includes all models.
    This dictionary can easily be converted to pd.DataFrame and saved as csv.

    Each model is run rep times, to obtain a distribution of RMSE instead of a single number.
    Usually a single error calculation is done, or cross validation is used (e.g. with 5 fold),
    but a distribution of errors from e.g. 300 runs gives interesting results, useful in exploration.
    '''
    X, y = getNumericXy()

    # OverallQual is the feature with highest correlation with the target.
    # Wanted to see how a single, highest correlated feature do by itself.
    # Uncomment if you want to test.
    #X = data[["OverallQual"]]

    models = {
        #"ARDRegression":linear_model.ARDRegression(),
        "BayesianRidge":
        linear_model.BayesianRidge(),
        "ElasticNet":
        linear_model.ElasticNet(),
        "HuberRegressor":
        linear_model.HuberRegressor(),
        "Lars":
        linear_model.Lars(),
        "Lasso":
        linear_model.Lasso(),
        "LassoLars":
        linear_model.LassoLars(),
        "RANSACRegressor":
        linear_model.RANSACRegressor(),
        "DecisionTree":
        tree.DecisionTreeRegressor(),
        "RandomForest20":
        ensemble.RandomForestRegressor(n_estimators=20),
        "RandomForest100":
        ensemble.RandomForestRegressor(n_estimators=100),
        "RandomForest200":
        ensemble.RandomForestRegressor(n_estimators=200),
        "XGBRegressor":
        XGBRegressor(),
        "XGBRegressor_n1000_r05":
        XGBRegressor(n_estimators=1000, learning_rate=0.05),
    }
    models_rmselist = {}
    for mname, mdl in models.items():
        print("%s\n%s" % ("-" * 80, mname))
        rmselist = []
        for r in range(rep):
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, test_size=tsize / 100.0)
            model = mdl.fit(X_train, y_train)
            predictions = model.predict(X_test)
            rmse = np.sqrt(mean_squared_error(y_test, predictions))
            rmselist.append(rmse)
        print("\tMean RMSE = %f, Median RMSE = %f" %
              (np.mean(rmselist), np.median(rmselist)))
        models_rmselist[mname] = rmselist
        plt.figure()
        plt.hist(rmselist, max(int(np.sqrt(rep) * 1.5), 10))
        #plt.hist(rmselist,np.linspace(0.01,0.07,num=150))
        plt.title("%s mn=%.4f,md=%.4f (tsize=%d,rep=%d)" %
                  (mname, np.mean(rmselist), np.median(rmselist), tsize, rep))
    return models_rmselist
    print(i)
    clf_xgb = XGBRegressor(#objective='reg:linear',
                  learning_rate=0.1, #so called `eta` value
                  max_depth=4,
                  min_child_weight=1,
                  silent=1,
                  subsample=0.8,
                  colsample_bytree=1,
                  n_estimators=450,
                  seed = 123)
    
    clf_rf = RandomForestRegressor(max_depth=4, random_state=0,n_estimators=1000)
    clf_glm = linear_model.LinearRegression()
    clf_ada = AdaBoostRegressor()
    clf_huber = linear_model.HuberRegressor()
    clf_gbm = GradientBoostingRegressor(n_estimators=900, min_samples_split=6, min_samples_leaf=6, max_features='sqrt', max_depth=15, loss='lad')
    clf_SVM = svm.SVR()
    clf_ridge= linear_model.Ridge(alpha=.5)
    
    print("#------------ GBM-------------------#")
    r2,rmse= fitAndPrint(clf_gbm, X_train, y_train,X_test,y_test)
    print("#------------ XGB -------------------#")
    fitAndPrint(clf_xgb, X_train, y_train,X_test,y_test)
#    print("#------------ RF -------------------#")
#    fitAndPrint(clf_rf, X_train, y_train,X_test,y_test)
    print("#------------ GLM -------------------#")
    fitAndPrint(clf_glm, X_train, y_train,X_test,y_test)
#    print("#------------ ADA -------------------#")
#    fitAndPrint(clf_ada, X_train, y_train,X_test,y_test)
#    print("#------------ HUBER-------------------#")
Exemple #5
0
# XGBoost
xgb_params = {
    'gamma': Integer(1, 10),
    'learning_rate': Real(10**-5, 0.99, prior="log-uniform"),
    'max_depth': Integer(3, 10),
    'reg_alpha': Real(10**-5, 1, prior="log-uniform"),
    'reg_lambda': Real(10**-5, 1, prior="log-uniform"),
    'max_delta_step': Integer(0, 10),
}
"""### 1d.3. Store everything in lists so we can iterate the code in a for loop
We'll run all steps as a single for loop. So we need to save the initial model structures, the hyperparameters, and the names of the pickled files in lists.
"""

models = [
    lm.HuberRegressor(max_iter=1000),
    lm.Ridge(),
    lm.Lasso(),
    lm.ElasticNet(),
    SVR(),
    RandomForestRegressor(),
    GradientBoostingRegressor(),
    ExtraTreesRegressor(),
    xgb.XGBRegressor()
]
params = [
    robust_params, ridge_params, lasso_params, en_params, svm_params,
    rf_params, gb_params, et_params, xgb_params
]

# Names stored on local
Exemple #6
0
    def _fit_gumbel_chart(self, outlier_detect, plot_diagnosis):
        '''Fit a Gumbel distribution fit via Gumbel chart 

        Parameters
        ----------
            outlier_detect : bool
                Whether to assume outliers. Use OLS when False.
            plot_diagnosis: bool
                Whether to generate diagnostic plot.

        Variables added
        ---------------
            self.maxima_inlier_mask: Mask indicating inliers
            self.maxima_dist: Probability distribution for the maxima
            self.threshold: Threshold of X between bulk and tail, minimum 
                is constrained to be no lower than 5 percentile of F_maxima
        '''
        def _gumbel_y(F):
            ''' Calculate y coordinates on the Gumbel chart from CDF '''
            return -np.log(-np.log(F))

        x = self.maxima
        F = util.plotting_position(x, method='unbiased')
        y = _gumbel_y(F)
        if outlier_detect is None or outlier_detect == 'None':
            mdl = linear_model.LinearRegression().fit(x.reshape(-1, 1), y)
            self.maxima_inlier_mask = np.array(
                [True] * len(self.maxima))  # Create mask manually
        elif outlier_detect == 'RANSAC Regression':
            mdl = linear_model.RANSACRegressor(random_state=1).fit(
                x.reshape(-1, 1), y)
            self.maxima_inlier_mask = mdl.inlier_mask_
            mdl = mdl.estimator_
        elif outlier_detect == 'Huber Regression':
            mdl = linear_model.HuberRegressor(epsilon=1.35).fit(
                x.reshape(-1, 1), y)
            self.maxima_inlier_mask = np.array(
                [True] * len(self.maxima))  # Create mask manually
        else:
            raise ValueError('Unrecognized outlier_detect keyword')
        k, b = mdl.coef_[0], mdl.intercept_

        if plot_diagnosis:
            ax = self.diag_fig.add_subplot(1, 3, 1, label=self.label)
            ax.plot(x[self.maxima_inlier_mask],
                    y[self.maxima_inlier_mask],
                    'b.',
                    markersize=10,
                    label='Maxima(inliers)')
            ax.plot(x[~self.maxima_inlier_mask],
                    y[~self.maxima_inlier_mask],
                    'r.',
                    markersize=10,
                    label='Maxima(outliers)')
            xlm, ylm = ax.get_xlim(), ax.get_ylim()
            ax.plot(self.sample_coor,
                    mdl.predict(self.sample_coor.reshape(-1, 1)),
                    'r--',
                    label='Linear fitting')
            ax.set_xlim(xlm)
            ax.set_ylim(ylm)
            ax.set_xlabel('Maxima data')
            ax.set_ylabel('$-ln(-ln(F))$')
            ax.set_title(f'Gumbel chart ({self.label} tail)')
            ax.grid(True)
            # ax.legend(loc='best')

        self.maxima_dist = stats.gumbel_r(loc=-b / k, scale=1 / k)
        self.maxima_inlier_mask[
            self.maxima < self.maxima_dist.ppf(0.05)] = False
        self.threshold = self.maxima[self.maxima_inlier_mask].min()
Exemple #7
0
def main():

    d = 600  # dim of data,m,no-changeable
    n = 3000  # number of sample changeable
    sigma = 1  #changeable
    fold = 5

    # load data
    #x samples
    x_data = np.loadtxt(data_root + 'sample.txt', dtype=np.float32)
    x_data = x_data[0:n, 0:d]

    #perturbation data
    #y = y* + \epsilon + b
    y_perturbation_data = np.loadtxt(data_root + 'label_b.txt',
                                     dtype=np.float32)
    y_perturbation_data = y_perturbation_data[0:n]
    err = np.random.normal(loc=0, scale=sigma, size=n)
    y_perturbation_data = y_perturbation_data + err

    #y = y*
    y_standard_data = np.loadtxt(data_root + 'label.txt', dtype=np.float32)
    y_standard_data = y_standard_data[0:n]

    #sign information of perturbation b
    perturbation = np.loadtxt(data_root + 's_b.txt', dtype=np.float32)
    perturbation = perturbation[0:n]
    #optimal weight
    w_optimal = np.loadtxt(data_root + 'w_s.txt', dtype=np.float32)

    #k for hard
    k = np.loadtxt(data_root + 'k_hard.txt', dtype=np.float32)

    y_data = [y_perturbation_data, y_standard_data, perturbation]
    y_data = np.array(y_data)
    y_data = y_data.T

    w_loss = np.zeros([5, 6])
    y_loss = np.zeros([5, 6])

    for ii in range(fold):
        x_train, x_test, y_train, y_test = train_test_split(x_data,
                                                            y_data,
                                                            test_size=0.5)
        l, m = np.shape(x_train)
        #print("l=", l)
        print("fold:%d" % ii)

        #sign information of perturbation b
        s_train = y_train[:, 2]
        #training data label with perturbation b
        y_train = y_train[:, 0]
        y_test = y_test[:, 1]
        mean = np.mean(y_test)

        # SZMO
        w_szmo = SZMO(x_train, y_train, s_train)
        #print "szmo:{0:.4}".format(np.linalg.norm(w_optimal-w_szmo,ord=2))
        w_loss[ii, 0] = np.linalg.norm(w_optimal - w_szmo, ord=2)
        y_predict = np.dot(x_test, w_szmo)
        y_loss[ii, 0] = np.linalg.norm(y_predict - y_test, ord=2) / l

        #ols
        w_ols = OLS(x_train, y_train)
        #print "ols:{0:.4}".format(np.linalg.norm(w_optimal-w_ols,ord=2))
        w_loss[ii, 1] = np.linalg.norm(w_optimal - w_ols, ord=2)
        y_predict = np.dot(x_test, w_ols)
        y_loss[ii, 1] = np.linalg.norm(y_predict - y_test, ord=2) / l

        #HARD
        w_hard = HARD(x_train, y_train, k)
        #print "hard:{0:.4}".format(np.linalg.norm(w_optimal-w_hard,ord=2))
        w_loss[ii, 2] = np.linalg.norm(w_optimal - w_hard, ord=2)
        y_predict = np.dot(x_test, w_hard)
        y_loss[ii, 2] = np.linalg.norm(y_predict - y_test, ord=2) / l

        # Ridge
        clf = Ridge(alpha=0.3)
        clf.fit(x_train, y_train)
        w_r = clf.coef_
        #print "Ridge:{0:.4}".format(np.linalg.norm(w_optimal-w_r,ord=2))
        w_loss[ii, 3] = np.linalg.norm(w_optimal - w_r, ord=2)
        y_predict = np.dot(x_test, w_r)
        y_loss[ii, 3] = np.linalg.norm(y_predict - y_test, ord=2) / l

        # Lasso
        reg = linear_model.Lasso(alpha=0.1)
        reg.fit(x_train, y_train)
        w_lasso = reg.coef_
        #print "lasso:{0:.4}".format(np.linalg.norm(w_optimal-w_lasso,ord=2))
        w_loss[ii, 4] = np.linalg.norm(w_optimal - w_lasso, ord=2)
        y_predict = np.dot(x_test, w_lasso)
        y_loss[ii, 4] = np.linalg.norm(y_predict - y_test, ord=2) / l

        # Huber
        huber = linear_model.HuberRegressor()
        huber.fit(x_train, y_train)
        w_huber = huber.coef_
        #print "huber:{0:.4}".format(np.linalg.norm(w_optimal-w_huber,ord=2))
        w_loss[ii, 5] = np.linalg.norm(w_optimal - w_huber, ord=2)
        y_predict = np.dot(x_test, w_huber)
        y_loss[ii, 5] = np.linalg.norm(y_predict - y_test, ord=2) / l

        #     k = np.loadtxt('k_hard.txt',dtype = np.float32)
    #     num = int( k[0] * l)
    #     print("num= ", num)
    #Assign the sign value of perturbation b according to b values.
    #with shape[5,6]
    #w_diff = np.linalg.norm(w_optimal - w_predict,ord = 2,axis = 2)
    mean_w_loss = w_loss.mean(axis=0)
    print("m:%d,n:%d,sigma:%d,alpha:50" % (d, n / 2, sigma))
    print(
        "attention:[alpha should be Manually changed based on generated files]"
    )
    print("              SZMO    OLS   HARD Ridge LASSO Huber     ")
    print("mean_w_loss:", end=' ')
    for i in mean_w_loss:
        print("%.4f" % i, end=' ')
    print("")
    #y_diff = np.linalg.norm(w_optimal - w_predict,ord = 2,axis = 2)
    mean_y_loss = y_loss.mean(axis=0)
    var_y_loss = y_loss.var(axis=0)
    print("mean_y_loss:", end=' ')
    for i in mean_y_loss:
        print("%.4f" % i, end=' ')
    print("")
    print("var_y_loss :", end=' ')
    var_y_loss = var_y_loss * 1000
    for i in var_y_loss:
        print("%.4f" % i, end=' ')
    print("")
Exemple #8
0
def optimize_trajectory(poses2D,
                        joint_conf_2d,
                        poses3D,
                        root_pos,
                        joint_angles,
                        skeleton,
                        names,
                        ppx,
                        ppy,
                        camFocal,
                        velConstraints,
                        save_dir='./',
                        plane_normal=None,
                        plane_point=None):
    '''
    Runs kinematic optimization
    '''
    given_floor = True
    if plane_normal is None or plane_point is None:
        given_floor = False
        plane_normal = np.zeros((3), dtype=np.float)
        plane_point = np.zeros((3), dtype=np.float)

    num_frames = poses2D.shape[0]
    num_joints = poses2D.shape[1]
    if num_joints != poses3D.shape[1]:
        print('2D and 3D data must have the same number of joints!')
        print('2D: ' + str(num_joints))
        print('3D: ' + str(poses3D.shape[1]))
        return None

    # Fit the initial motion to the template skeleton
    # target joint positions must be in order of the skeleton to fit to
    targets = np.zeros((num_frames, len(FORWARD_MAPPING), 3))
    for fr in range(num_frames):
        for j in range(targets.shape[1]):
            targets[fr,
                    j, :] = poses3D[fr, FORWARD_MAPPING[j], :] + root_pos[fr]
    # match bone lengths to initial 3D pose
    skeleton = update_skeleton(skeleton, targets, names)

    # calculate normalized 2D coordinates for projection loss
    # (regular projection with focal length and camera center removed)
    # also find projection weights to be used in optimization
    joints_2d_normalized = poses2D.copy()
    proj_weights = np.ones(
        (num_frames, num_joints))  # weights for the re-projection term
    data_weights = np.ones(
        (num_frames, num_joints))  # weights for the re-projection term
    cam_center = np.array([ppx, ppy])
    for frame_idx in range(num_frames):
        cur_2d_joints = poses2D[frame_idx, :]
        for joint_idx in range(num_joints):
            if joint_idx < 25:  # only for joints that have 2D correspondence
                proj_weights[frame_idx, joint_idx] = proj_weights[
                    frame_idx, joint_idx] * joint_conf_2d[
                        frame_idx, joint_idx] * PROJ_WEIGHTS[joint_idx]
                # still need all joints to be used for data term, but some more than others
                data_weights[frame_idx,
                             joint_idx] = (data_weights[frame_idx, joint_idx] +
                                           joint_conf_2d[frame_idx, joint_idx]
                                           ) * DATA_WEIGHTS[joint_idx]
                # calc normalized projection
                joints_2d_normalized[frame_idx, joint_idx,
                                     0] = (cur_2d_joints[joint_idx, 0] -
                                           cam_center[0]) / camFocal[0]
                joints_2d_normalized[frame_idx, joint_idx,
                                     1] = (cur_2d_joints[joint_idx, 1] -
                                           cam_center[1]) / camFocal[1]
            else:
                proj_weights[frame_idx, joint_idx] = 0
                data_weights[frame_idx, joint_idx] = (
                    data_weights[frame_idx, joint_idx] + 0.4
                ) * DATA_WEIGHTS[
                    joint_idx]  # still want data term on spine joints (0.4 is arbitrary)

    init_root_sol = root_pos.copy()

    #
    # perform IK to get initial joint angle estimates.
    #

    # Initialize the animation with fitted skeleton
    anim = skeleton.copy()
    anim.orients.qs = skeleton.orients.qs.copy()
    anim.offsets = skeleton.offsets.copy()
    anim.positions = skeleton.positions.repeat(num_frames, axis=0)
    anim.rotations.qs = anim.rotations.qs.repeat(num_frames, axis=0)
    anim.positions[:, 0] = init_root_sol

    # initialize with smpl prediction
    angle_init = np.linalg.norm(joint_angles, axis=2)
    axis_init = joint_angles / np.expand_dims(angle_init + 1e-10, axis=2)
    axis_init[:, :, 0] *= -1.0
    axis_init[:, :, 1] *= -1.0
    axis_init[:, :, 2] *= -1.0
    init_transforms = Quaternions.from_angle_axis(angle_init, axis_init)
    align_transform = Quaternions.from_angle_axis(0.0,
                                                  np.array([1.0, 0.0, 0.0]))
    for i in range(num_frames):
        for j in range(1):
            init_transforms[i, j] *= align_transform

    anim.rotations = init_transforms

    # BVH.save(os.path.join(save_dir, 'pre_ik.bvh'), anim, names)

    # Set up end effector constraints with target positions
    targetmap = {}
    for ee_idx in range(targets.shape[1]):
        # no IK on spine
        if ee_idx not in SKEL_SPINE_IDX:
            targetmap[ee_idx] = targets[:, ee_idx]
    # Solve IK for joint angles
    ik = JacobianInverseKinematicsCK(anim,
                                     targetmap,
                                     translate=False,
                                     iterations=200,
                                     smoothness=0.0,
                                     damping=7,
                                     silent=False)
    ik()
    # BVH.save(os.path.join(save_dir, 'init_test.bvh'), anim, names)
    init_root_sol = anim.positions[:, 0]
    init_positions = Animation.positions_global(anim)

    #
    # Perform the kinematic optimization
    #

    #
    # For first stage, don't care about floor
    # Weights of various optimization terms for each optim step.
    #
    projWeight = [1000.0]  # projection term
    smoothWeightVel = [0.1]  # velocity smoothness
    smoothWeightAcc = [0.5]  # acceleration smoothness
    dataWeight = [0.3]  # keep 3d pose close to initialization
    velWeight = [
        10.0
    ]  # velocity at contact frames should be 0 (e.g., foot contact)
    floorWeight = [0.0]  # at contact frames, feet must be on floor

    # Compute the initial projection residual
    init_angles = np.reshape(anim.rotations.euler(), [num_frames, -1])
    print(init_angles.shape)
    init_sol = deepcopy(np.concatenate((init_root_sol, init_angles), axis=1))
    init_res = np.zeros(num_frames * num_joints * 2)
    count = 0
    for fr in range(num_frames):
        current_root = init_root_sol[fr, :]
        current_pose = init_positions[fr, :] - current_root
        for j in range(num_joints):
            joint_idx = BACKWARD_MAPPING[j]
            if (proj_weights[fr, j] > 0):
                proj_x = (current_pose[joint_idx, 0] + current_root[0]) / (
                    current_pose[joint_idx, 2] + current_root[2])
                proj_y = (current_pose[joint_idx, 1] + current_root[1]) / (
                    current_pose[joint_idx, 2] + current_root[2])
                init_res[count + j * 2 +
                         0] = projWeight[-1] * proj_weights[fr, j] * (
                             proj_x - joints_2d_normalized[fr, j, 0])
                init_res[count + j * 2 +
                         1] = projWeight[-1] * proj_weights[fr, j] * (
                             proj_y - joints_2d_normalized[fr, j, 1])
        count = count + num_joints * 2
    print('Error init:%f' % np.linalg.norm(init_res))

    init_sol = np.reshape(init_sol, [-1])

    # run stepwise optimization
    for step_idx in range(len(projWeight)):
        cur_sol = least_squares(
            fun_anim_for_projection,
            init_sol,
            max_nfev=50,
            verbose=2,
            jac=jac_anim_for_projection_sparse,
            gtol=1e-12,
            bounds=[-np.inf, np.inf],
            tr_solver='lsmr',
            args=(skeleton, poses3D, root_pos, joints_2d_normalized,
                  plane_normal, plane_point, proj_weights, data_weights,
                  np.arange(num_joints), np.arange(num_joints), SMOOTH_WEIGHTS,
                  velConstraints, projWeight[step_idx],
                  smoothWeightVel[step_idx], smoothWeightAcc[step_idx],
                  dataWeight[step_idx], velWeight[step_idx],
                  floorWeight[step_idx]))
        print('solution at step ' + str(step_idx) + ':')
        print(cur_sol.cost)

        #forward kinematics
        x = np.reshape(cur_sol.x, [num_frames, -1])
        root = x[:, :3]
        angles = x[:, 3:]
        anim = skeleton.copy()
        anim.orients.qs = skeleton.orients.qs.copy()
        anim.offsets = skeleton.offsets.copy()
        anim.positions = skeleton.positions.repeat(num_frames, axis=0)
        anim.rotations = Quaternions.from_euler(angles.reshape(
            (num_frames, num_joints, 3)),
                                                order='xyz',
                                                world=True)
        anim.positions[:, 0] = root
        # save final animation
        # BVH.save(os.path.join(save_dir, 'step' + str(step_idx) + '_test.bvh'), anim, names)

        init_sol = cur_sol.x

    #
    # fit floor
    #

    # collect foot positions at contact points
    #forward kinematics
    x = np.reshape(cur_sol.x, [num_frames, -1])
    root = x[:, :3]
    angles = x[:, 3:]
    anim = skeleton.copy()
    anim.orients.qs = skeleton.orients.qs.copy()
    anim.offsets = skeleton.offsets.copy()
    anim.positions = skeleton.positions.repeat(num_frames, axis=0)
    anim.rotations = Quaternions.from_euler(angles.reshape(
        (num_frames, num_joints, 3)),
                                            order='xyz',
                                            world=True)
    anim.positions[:, 0] = root

    final_pos = Animation.positions_global(anim)
    feet_pos = final_pos[:, FEET_IDX, :]
    feet_contact = np.array(
        [FORWARD_MAPPING[foot_idx] for foot_idx in FEET_IDX])
    # this will order positions by
    feet_pos = feet_pos[velConstraints[:, feet_contact] == 1]
    print(str(feet_pos.shape[0]) + ' contacts for floor fit...')

    # if we weren't given a floor to use, fit one
    if not given_floor:
        # floor normal and point won't be used now
        plane_normal = np.zeros((3), dtype=np.float)
        plane_point = np.zeros((3), dtype=np.float)

        # First fit for floor (ignore more outliers)
        huber = linear_model.HuberRegressor(epsilon=1.5)
        huber.fit(feet_pos[:, [0, 2]], feet_pos[:, 1])

        print('Floor fit after ' + str(huber.n_iter_) + ' LBFGS iters!')
        coeff = huber.coef_
        intercept = huber.intercept_
        print('Coeffs + Intercept = (%f, %f, %f)' %
              (coeff[0], coeff[1], intercept))

        plane_verts = np.array([[0.0, -1.0, 0.0], [0.0, -1.0, 100.0],
                                [100.0, -1.0, 0.0]])
        for i in range(plane_verts.shape[0]):
            plane_verts[i,
                        1] = huber.predict(np.array([plane_verts[i, [0, 2]]]))
        # print(plane_verts)
        plane_normal = np.cross(plane_verts[2, :] - plane_verts[0, :],
                                plane_verts[1, :] - plane_verts[2, :])
        plane_normal /= np.linalg.norm(plane_normal)
        plane_point = plane_verts[0]

        print('Number of outliers: %d' % (np.sum(huber.outliers_)))

    print('Normal: (%f, %f, %f)' %
          (plane_normal[0], plane_normal[1], plane_normal[2]))
    print('Point: (%f, %f, %f)' %
          (plane_point[0], plane_point[1], plane_point[2]))

    # update contacts based on floor fit only if we fit a floor
    if not given_floor:
        # second fit to find spurious contacts
        huber = linear_model.HuberRegressor(epsilon=2.2)
        huber.fit(feet_pos[:, [0, 2]], feet_pos[:, 1])

        print('Floor fit (for contact refinement) after ' +
              str(huber.n_iter_) + ' LBFGS iters!')
        coeff = huber.coef_
        intercept = huber.intercept_
        print('Coeffs + Intercept = (%f, %f, %f)' %
              (coeff[0], coeff[1], intercept))
        print('Number of outliers: %d' % (np.sum(huber.outliers_)))

        # print(velConstraints[:, feet_contact])
        og_vel_const = velConstraints[:, feet_contact].copy()

        # go through and figure out which contact labels were outliers
        feet_vel_constraints = velConstraints[:, feet_contact]
        fit_pts_cnt = 0
        for frame_idx in range(feet_vel_constraints.shape[0]):
            for foot_joint_idx in range(feet_vel_constraints.shape[1]):
                if feet_vel_constraints[frame_idx, foot_joint_idx] == 1:
                    # check if marked an outlier
                    if huber.outliers_[fit_pts_cnt]:
                        # set to out of contact
                        feet_vel_constraints[frame_idx, foot_joint_idx] = 0
                    fit_pts_cnt += 1

        velConstraints[:, feet_contact] = feet_vel_constraints

    #
    # Final stage optimize for foot placements
    #
    print('Now optimizing for foot placement...')
    projWeight = 1000.0
    smoothWeightVel = 0.1
    smoothWeightAcc = 0.5
    dataWeight = 0.3
    velWeight = 10.0
    floorWeight = 10.0
    cur_sol = least_squares(
        fun_anim_for_projection,
        init_sol,
        max_nfev=50,
        verbose=2,
        jac=jac_anim_for_projection_sparse,
        gtol=1e-12,
        bounds=[-np.inf, np.inf],
        tr_solver='lsmr',
        args=(skeleton, poses3D, root_pos, joints_2d_normalized, plane_normal,
              plane_point, proj_weights, data_weights, np.arange(num_joints),
              np.arange(num_joints), SMOOTH_WEIGHTS, velConstraints,
              projWeight, smoothWeightVel, smoothWeightAcc, dataWeight,
              velWeight, floorWeight))

    #output for all frames
    newPose3D = []
    projPose2D = []
    allRootPos = []

    #forward kinematics
    x = np.reshape(cur_sol.x, [num_frames, -1])
    root = x[:, :3]
    angles = x[:, 3:]
    anim = skeleton.copy()
    anim.orients.qs = skeleton.orients.qs.copy()
    anim.offsets = skeleton.offsets.copy()
    anim.positions = skeleton.positions.repeat(num_frames, axis=0)
    anim.rotations = Quaternions.from_euler(angles.reshape(
        (num_frames, num_joints, 3)),
                                            order='xyz',
                                            world=True)
    anim.positions[:, 0] = root
    # save final animation
    BVH.save(os.path.join(save_dir, 'final_test.bvh'), anim, names)

    # get final 3d joints
    final_pos = Animation.positions_global(anim)
    newPose3D = 0 * final_pos
    for j in range(final_pos.shape[1]):
        newPose3D[:, j, :] = final_pos[:, BACKWARD_MAPPING[j], :]

    # get final 2d joints
    for frameNum in range(num_frames):
        varIndex = frameNum * num_joints * 3

        pts2D = poses2D[frameNum, :]

        projPt = np.zeros(pts2D.shape)
        for j in range(0, num_joints):
            corr2DPt = j
            corr3DPt = j
            projPt[corr2DPt, 0] = camFocal[0] * (
                newPose3D[frameNum, corr3DPt, 0] /
                newPose3D[frameNum, corr3DPt, 2]) + cam_center[0]
            projPt[corr2DPt, 1] = camFocal[1] * (
                newPose3D[frameNum, corr3DPt, 1] /
                newPose3D[frameNum, corr3DPt, 2]) + cam_center[1]

        projPose2D.append(projPt)

    projPose2D = np.array(projPose2D)

    return (anim, newPose3D, projPose2D, plane_normal, plane_point,
            velConstraints)
Exemple #9
0
##################################################
##              Huber Regression                ##
##################################################
'''
The HuberRegressor is different to Ridge because it applies a linear loss
to samples that are classified as outliers. A sample is classified as an
inlier if the absolute error of that sample is lesser than a certain threshold.

It differs from TheilSenRegressor and RANSACRegressor because it does not
ignore the effect of the outliers but gives a lesser weight to them.

Example:
http://scikit-learn.org/stable/auto_examples/linear_model/plot_huber_vs_ridge.html#sphx-glr-auto-examples-linear-model-plot-huber-vs-ridge-py
'''
try:
	huber = linear_model.HuberRegressor(fit_intercept=True, alpha=0.0, max_iter=100)
	huber.fit(X_train, y_train)
	predictions = cross_val_predict(huber, X_test, y_test, cv=6)
	f=open('huber.pickle','wb')
	pickle.dump(huber,f)
	f.close()
except:
	print('error - HUBER')

# get stats 
modeltypes.append('huber regression')
explained_variances, mean_absolute_errors, mean_squared_errors, mean_squared_log_errors, median_absolute_errors, r2_scores = update_list(explained_variances, mean_absolute_errors, mean_squared_errors, mean_squared_log_errors, median_absolute_errors, r2_scores)

##################################################
##              Polynomial Regression           ##
##################################################
train = all_data.loc[all_data.Dataset == 'train']
test = all_data.loc[all_data.Dataset == 'test']
submission = all_data.loc[all_data.Dataset == 'submission']


"""And take a look at the features that will be used to train the model."""

train[feature_columns].head()

"""## Model

As I mentioned at the start of the notebook I didn't intend to experiment too much with model selection in this notebook so I have stuck with a simple Linear Regressor from SKLearn. While these non-deep models don't usually produce as accurate results as deep models they are super fast to train and easy to evaluate. This is great for notebooks focussing primarily on features.
"""

model = linear_model.HuberRegressor( max_iter=200)

"""With the model loaded, insert the features and labels for training."""

model.fit(train[feature_columns], train['FVC'])

"""Make predictions"""

predictions = model.predict(train[feature_columns])

"""## Evaluate

Let's begin by having a look at the models weights. This gives us a good indication of what features are driving the models predictions.
"""

plt.bar(train[feature_columns].columns.values, model.coef_)
Exemple #11
0
        poly3coefs = poly.polyfit(x, y, 3)
        poly3fit = poly.polyval(x_new, poly3coefs)
        fit_dic['poly3'] = poly3fit
    if 'spline' in fits:
        spline_params = splrep(x, y, s=s, k=3)
        splinefit = splev(x_new, spline_params)
        fit_dic['spline'] = splinefit
    return fit_dic


modeldict = {
    'ardregression': lm.ARDRegression(),
    'bayesianridge': lm.BayesianRidge(),
    'elasticnet': lm.ElasticNet(),
    'elasticnetcv': lm.ElasticNetCV(),
    'huberregression': lm.HuberRegressor(),
    'lars': lm.Lars(),
    'larscv': lm.LarsCV(),
    'lasso': lm.Lasso(),
    'lassocv': lm.LassoCV(),
    'lassolars': lm.LassoLars(),
    'lassolarscv': lm.LassoLarsCV(),
    'lassolarsic': lm.LassoLarsIC(),
    'linearregression': lm.LinearRegression(),
    'orthogonalmatchingpursuit': lm.OrthogonalMatchingPursuit(),
    'orthogonalmatchingpursuitcv': lm.OrthogonalMatchingPursuitCV(),
    'passiveagressiveregressor': lm.PassiveAggressiveRegressor(),
    'ridge': lm.Ridge(),
    'ridgecv': lm.RidgeCV(),
    'sgdregressor': lm.SGDRegressor(),
    'theilsenregressor': lm.TheilSenRegressor(),
KR.fit(X_train, y_train) 
KR.score(X_test, y_test)
KR_test_pred = KR.predict(X_test)
RSS_KR = np.mean(pow((KR_test_pred - y_test),2))

#neural network 
from sklearn.neural_network import MLPRegressor
NN = MLPRegressor(hidden_layer_sizes = 20)
NN.fit(X_train, y_train) 
NN.score(X_test, y_test)
NN_test_pred = NN.predict(X_test)
RSS_NN = np.mean(pow((NN_test_pred - y_test),2))


#Huber Regression
Hub = linear_model.HuberRegressor(epsilon = 1)
Hub.fit(X_train, y_train) 
Hub.score(X_test, y_test)
Hub_test_pred = Hub.predict(X_test)
RSS_Hub = np.mean(pow((Hub_test_pred - y_test),2))

#SGDRegressor
SGD = linear_model.SGDRegressor(loss = 'epsilon_insensitive')
SGD.fit(X_train, y_train) 
SGD.score(X_test, y_test)
SGD_test_pred = SGD.predict(X_test)
RSS_SGD = np.mean(pow((SGD_test_pred - y_test),2))


#GRADIENT BOOSTER
from sklearn.ensemble import GradientBoostingRegressor
 def trained_pipeline(self):
     pca = PCA()
     lin_reg = linear_model.HuberRegressor()
     pipe = Pipeline(steps=[('pca', pca), ('regressor', lin_reg)])
     pipe.fit(self.X_train, self.y_train)
     return pipe
Exemple #14
0
beta_star = np.zeros(d)
beta_star[:5] = [5, -10, 0, 0, 3]

adaptiveCV = AdaptiveHuberCV({
    'c_tau':
    np.arange(start=0.5, stop=1.5, step=.5),
    'c_lamb':
    np.arange(start=0.005, stop=0.03, step=.005)
})
adaptive = AdaptiveHuber(c_lamb=0.005, c_tau=0.5)

N = 100
algos = {
    "OLS": linear_model.LinearRegression(fit_intercept=False),
    "LassoCV": LassoCV(cv=3),
    "Huber": linear_model.HuberRegressor(),
    #"MedianReg" : QuantRegScikit(q = 0.5),
    "AdaptiveCV": adaptiveCV
}

tails = {
    "normal": stats.norm(loc=0, scale=4),
    "student": stats.t(df=1.5),
    "lognormal": stats.lognorm(1, loc=0, scale=4)
}

errors = get_errors_for(algos, tails, N, d, n, beta_star)
#errors.to_pickle("{}_{}_errors.pickle".format(n, d))

# the table of the paper
table = errors.groupby(["algo", "tail"]).l2_error.describe()[["mean", "std"]]
import sklearn.metrics as sm
from sklearn import linear_model

if __name__ == '__main__':
    input_file = "mydata.txt"
    data = np.loadtxt(input_file, delimiter=",")

    X, y = data[:, :-1], data[:, -1]

    num_training = int(0.9 * len(X))
    num_test = len(X) - num_training

    X_tran, y_train = X[:num_training], y[:num_training]
    X_test, y_test = X[num_training:], y[num_training:]

    regressor = linear_model.HuberRegressor()
    regressor.fit(X_tran, y_train)
    y_test_pred = regressor.predict(X_test)

    with open("nauczonymodel.pkl", 'wb') as f:
        pickle.dump(regressor, f)

    with open("nauczonymodel.pkl", 'rb') as f:
        regressor_model = pickle.load(f)

    blad = sm.mean_absolute_error(y_test, y_test_pred)
    print("Mean absolute error: ", blad)

    plt.scatter(X_test, y_test, color='red')
    plt.plot(X_test, y_test_pred, color='black')
    plt.show()
    b3 = A[train_index, :] @ x3
    E3[jj] = np.linalg.norm(b[train_index, :] - b3, ord=2) / np.linalg.norm(
        b[train_index, :], ord=2)

    regr4 = linear_model.ElasticNet(alpha=0.8,
                                    copy_X=True,
                                    l1_ratio=lam,
                                    max_iter=10**5,
                                    random_state=0)
    regr4.fit(A[train_index, :], b[train_index, :])
    x4 = np.transpose(regr4.coef_)
    b4 = A[train_index, :] @ x4
    E4[jj] = np.linalg.norm(b[train_index, :] - b4, ord=2) / np.linalg.norm(
        b[train_index, :], ord=2)

    regr5 = MultiOutputRegressor(linear_model.HuberRegressor(), n_jobs=-1)
    huber = regr5.fit(
        A[train_index, :], b[train_index, :]
    )  # matlab's robustfit() does not have an exact sklearn analogue

    x5 = np.empty([m, p])
    for i in range(0, len(huber.estimators_)):
        x5[:, i] = huber.estimators_[i].coef_

    b5 = A[train_index, :] @ x5
    E5[jj] = np.linalg.norm(b[train_index, :] - b5, ord=2) / np.linalg.norm(
        b[train_index, :], ord=2)

    ridge = linear_model.Ridge(alpha=1.0).fit(A[train_index, :],
                                              b[train_index, :])
    x6 = np.transpose(ridge.coef_)
Exemple #17
0
def run_simple_model(train_x, train_y, dev_x, dev_y, test_x, test_y, model_type, out_dir=None, class_weight=None):
    from sklearn import datasets, neighbors, linear_model, svm

    totalTime = 0

    startTrainTime = time()
    logger.info("Start training...")
    if model_type == 'ARDRegression':
        model = linear_model.ARDRegression().fit(train_x, train_y)
    elif model_type == 'BayesianRidge':
        model = linear_model.BayesianRidge().fit(train_x, train_y)
    elif model_type == 'ElasticNet':
        model = linear_model.ElasticNet().fit(train_x, train_y)
    elif model_type == 'ElasticNetCV':
        model = linear_model.ElasticNetCV().fit(train_x, train_y)
    elif model_type == 'HuberRegressor':
        model = linear_model.HuberRegressor().fit(train_x, train_y)
    elif model_type == 'Lars':
        model = linear_model.Lars().fit(train_x, train_y)
    elif model_type == 'LarsCV':
        model = linear_model.LarsCV().fit(train_x, train_y)
    elif model_type == 'Lasso':
        model = linear_model.Lasso().fit(train_x, train_y)
    elif model_type == 'LassoCV':
        model = linear_model.LassoCV().fit(train_x, train_y)
    elif model_type == 'LassoLars':
        model = linear_model.LassoLars().fit(train_x, train_y)
    elif model_type == 'LassoLarsCV':
        model = linear_model.LassoLarsCV().fit(train_x, train_y)
    elif model_type == 'LassoLarsIC':
        model = linear_model.LassoLarsIC().fit(train_x, train_y)
    elif model_type == 'LinearRegression':
        model = linear_model.LinearRegression().fit(train_x, train_y)
    elif model_type == 'LogisticRegression':
        model = linear_model.LogisticRegression(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'LogisticRegressionCV':
        model = linear_model.LogisticRegressionCV(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'MultiTaskLasso':
        model = linear_model.MultiTaskLasso().fit(train_x, train_y)
    elif model_type == 'MultiTaskElasticNet':
        model = linear_model.MultiTaskElasticNet().fit(train_x, train_y)
    elif model_type == 'MultiTaskLassoCV':
        model = linear_model.MultiTaskLassoCV().fit(train_x, train_y)
    elif model_type == 'MultiTaskElasticNetCV':
        model = linear_model.MultiTaskElasticNetCV().fit(train_x, train_y)
    elif model_type == 'OrthogonalMatchingPursuit':
        model = linear_model.OrthogonalMatchingPursuit().fit(train_x, train_y)
    elif model_type == 'OrthogonalMatchingPursuitCV':
        model = linear_model.OrthogonalMatchingPursuitCV().fit(train_x, train_y)
    elif model_type == 'PassiveAggressiveClassifier':
        model = linear_model.PassiveAggressiveClassifier(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'PassiveAggressiveRegressor':
        model = linear_model.PassiveAggressiveRegressor().fit(train_x, train_y)
    elif model_type == 'Perceptron':
        model = linear_model.Perceptron(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'RandomizedLasso':
        model = linear_model.RandomizedLasso().fit(train_x, train_y)
    elif model_type == 'RandomizedLogisticRegression':
        model = linear_model.RandomizedLogisticRegression().fit(train_x, train_y)
    elif model_type == 'RANSACRegressor':
        model = linear_model.RANSACRegressor().fit(train_x, train_y)
    elif model_type == 'Ridge':
        model = linear_model.Ridge().fit(train_x, train_y)
    elif model_type == 'RidgeClassifier':
        model = linear_model.RidgeClassifier(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'RidgeClassifierCV':
        model = linear_model.RidgeClassifierCV(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'RidgeCV':
        model = linear_model.RidgeCV().fit(train_x, train_y)
    elif model_type == 'SGDClassifier':
        model = linear_model.SGDClassifier(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'SGDRegressor':
        model = linear_model.SGDRegressor().fit(train_x, train_y)
    elif model_type == 'TheilSenRegressor':
        model = linear_model.TheilSenRegressor().fit(train_x, train_y)
    elif model_type == 'lars_path':
        model = linear_model.lars_path().fit(train_x, train_y)
    elif model_type == 'lasso_path':
        model = linear_model.lasso_path().fit(train_x, train_y)
    elif model_type == 'lasso_stability_path':
        model = linear_model.lasso_stability_path().fit(train_x, train_y)
    elif model_type == 'logistic_regression_path':
        model = linear_model.logistic_regression_path(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'orthogonal_mp':
        model = linear_model.orthogonal_mp().fit(train_x, train_y)
    elif model_type == 'orthogonal_mp_gram':
        model = linear_model.orthogonal_mp_gram().fit(train_x, train_y)
    elif model_type == 'LinearSVC':
        model = svm.LinearSVC(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'SVC':
        model = svm.SVC(class_weight=class_weight, degree=3).fit(train_x, train_y)
    else:
        raise NotImplementedError('Model not implemented')

        
    logger.info("Finished training.")
    endTrainTime = time()
    trainTime = endTrainTime - startTrainTime
    logger.info("Training time : %d seconds" % trainTime)


    logger.info("Start predicting train set...")
    train_pred_y = model.predict(train_x)
    logger.info("Finished predicting train set.")
    logger.info("Start predicting test set...")
    test_pred_y = model.predict(test_x)
    logger.info("Finished predicting test set.")
    endTestTime = time()
    testTime = endTestTime - endTrainTime
    logger.info("Testing time : %d seconds" % testTime)
    totalTime += trainTime + testTime

    train_pred_y = np.round(train_pred_y)
    test_pred_y = np.round(test_pred_y)

    np.savetxt(out_dir + '/preds/best_test_pred' + '.txt', test_pred_y, fmt='%i')

    logger.info('[TRAIN] Acc: %.3f' % (accuracy_score(train_y, train_pred_y)))
    logger.info('[TEST]  Acc: %.3f' % (accuracy_score(test_y, test_pred_y)))

    return accuracy_score(test_y, test_pred_y)