def trial_standup(df_train, test_data):
    """
    Test 1: 1s followed by 3s
    """
    y = df_train['avg_stand'].values
    X = df_train.drop(['avg_stand', 'stand', 'state', 'index'], axis=1)
    if X.isnull().values.any() == False: 

        rf = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                max_depth=None, max_features='auto', max_leaf_nodes=None,
                min_samples_leaf=8, min_samples_split=4,
                min_weight_fraction_leaf=0.0, n_estimators=500, n_jobs=-1,
                oob_score=False, random_state=None, verbose=0,
                warm_start=False)

        X = polynomial_features.fit_transform(X)

        X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.1)

    else: 
        print "Found NaN values"

    rf.fit(X_train, y_train)

    p_test_data = polynomial_features.fit_transform(test_data)
    rf_pred2 = rf.predict(p_test_data)
    print rf_pred2
    test_data['avg_stand'] = rf_pred2
    final_prediction = convert_to_words(rf_pred2)
    print_full(final_prediction)
    get_position_stats(final_prediction)
    # Now we have the estimated stand_up values, we use them to create a new feature
    # in the original df
    # rf_pred3 = rf_pred2.astype(int)
    return test_data
Ejemplo n.º 2
0
def trial(df_train, test_data):
    """
    Test 1: 1s followed by 3s
    """
    y = df_train['state'].values
    X = df_train.drop(['state', 'index'], axis=1)
    if X.isnull().values.any() == False: 

        rf = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                max_depth=None, max_features='auto', max_leaf_nodes=None,
                min_samples_leaf=8, min_samples_split=4,
                min_weight_fraction_leaf=0.0, n_estimators=5000, n_jobs=-1,
                oob_score=False, random_state=None, verbose=0,
                warm_start=False)

        X = polynomial_features.fit_transform(X)

        X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.1)

    else: 
        print "Found NaN values"

    rf.fit(X_train, y_train)
    rf_pred2 = rf.predict(test_data)
    final_prediction = convert_to_words(rf_pred2)
    print_full(final_prediction)
    get_position_stats(final_prediction)
Ejemplo n.º 3
0
def trial(df_train, test_data):
    """The trial is for running predictions on test data."""

    #my_test_data = test_data.drop(['avg_stand'], axis=1)
    y = df_train['state'].values
    X = df_train.drop(['avg_stand', 'stand', 'state', 'index'], axis=1)
    if X.isnull().values.any() == False:
        #X = polynomial_features.fit_transform(X)
        X_train, X_test, y_train, y_test = cross_validation.train_test_split(
            X, y, test_size=0.1)
    else:
        print "Found NaN values"

    rf.fit(X_train, y_train)
    #polynomial_test_data = polynomial_features.fit_transform(my_test_data)
    rf_pred2 = rf.predict(test_data)
    print rf_pred2
    test_data['state'] = rf_pred2
    final_prediction = convert_to_words(rf_pred2)
    print_full(final_prediction)
    get_position_stats(final_prediction)
    return test_data
Ejemplo n.º 4
0
def trial(df_train, test_data):
    """
    Test 1: 1s followed by 3s
    """
    y = df_train['state'].values
    X = df_train.drop(['state', 'index'], axis=1)
    if X.isnull().values.any() == False:

        rf = RandomForestClassifier(bootstrap=True,
                                    class_weight=None,
                                    criterion='gini',
                                    max_depth=None,
                                    max_features='auto',
                                    max_leaf_nodes=None,
                                    min_samples_leaf=8,
                                    min_samples_split=4,
                                    min_weight_fraction_leaf=0.0,
                                    n_estimators=5000,
                                    n_jobs=-1,
                                    oob_score=False,
                                    random_state=None,
                                    verbose=0,
                                    warm_start=False)

        X = polynomial_features.fit_transform(X)

        X_train, X_test, y_train, y_test = cross_validation.train_test_split(
            X, y, test_size=0.1)

    else:
        print "Found NaN values"

    rf.fit(X_train, y_train)
    rf_pred2 = rf.predict(test_data)
    final_prediction = convert_to_words(rf_pred2)
    print_full(final_prediction)
    get_position_stats(final_prediction)