def trial_standup(df_train, test_data): """ Test 1: 1s followed by 3s """ y = df_train['avg_stand'].values X = df_train.drop(['avg_stand', 'stand', 'state', 'index'], axis=1) if X.isnull().values.any() == False: rf = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini', max_depth=None, max_features='auto', max_leaf_nodes=None, min_samples_leaf=8, min_samples_split=4, min_weight_fraction_leaf=0.0, n_estimators=500, n_jobs=-1, oob_score=False, random_state=None, verbose=0, warm_start=False) X = polynomial_features.fit_transform(X) X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.1) else: print "Found NaN values" rf.fit(X_train, y_train) p_test_data = polynomial_features.fit_transform(test_data) rf_pred2 = rf.predict(p_test_data) print rf_pred2 test_data['avg_stand'] = rf_pred2 final_prediction = convert_to_words(rf_pred2) print_full(final_prediction) get_position_stats(final_prediction) # Now we have the estimated stand_up values, we use them to create a new feature # in the original df # rf_pred3 = rf_pred2.astype(int) return test_data
def trial(df_train, test_data): """ Test 1: 1s followed by 3s """ y = df_train['state'].values X = df_train.drop(['state', 'index'], axis=1) if X.isnull().values.any() == False: rf = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini', max_depth=None, max_features='auto', max_leaf_nodes=None, min_samples_leaf=8, min_samples_split=4, min_weight_fraction_leaf=0.0, n_estimators=5000, n_jobs=-1, oob_score=False, random_state=None, verbose=0, warm_start=False) X = polynomial_features.fit_transform(X) X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.1) else: print "Found NaN values" rf.fit(X_train, y_train) rf_pred2 = rf.predict(test_data) final_prediction = convert_to_words(rf_pred2) print_full(final_prediction) get_position_stats(final_prediction)
def apply_hmm(data): """smooth the predictions from the Random Forest classifier using HMM logic""" data_ = np.array(data) n_samples = len(data) raw_data = data_.reshape((n_samples, -1)) result = model.decode(raw_data, algorithm='viterbi') result_words = convert_to_words(result[1]) print 'result words: {}'.format(result_words) print 'result accuracy: {}'.format(result[0]) return result[1]
def trial(df_train, test_data): """The trial is for running predictions on test data.""" #my_test_data = test_data.drop(['avg_stand'], axis=1) y = df_train['state'].values X = df_train.drop(['avg_stand', 'stand', 'state', 'index'], axis=1) if X.isnull().values.any() == False: #X = polynomial_features.fit_transform(X) X_train, X_test, y_train, y_test = cross_validation.train_test_split( X, y, test_size=0.1) else: print "Found NaN values" rf.fit(X_train, y_train) #polynomial_test_data = polynomial_features.fit_transform(my_test_data) rf_pred2 = rf.predict(test_data) print rf_pred2 test_data['state'] = rf_pred2 final_prediction = convert_to_words(rf_pred2) print_full(final_prediction) get_position_stats(final_prediction) return test_data
def trial(df_train, test_data): """ Test 1: 1s followed by 3s """ y = df_train['state'].values X = df_train.drop(['state', 'index'], axis=1) if X.isnull().values.any() == False: rf = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini', max_depth=None, max_features='auto', max_leaf_nodes=None, min_samples_leaf=8, min_samples_split=4, min_weight_fraction_leaf=0.0, n_estimators=5000, n_jobs=-1, oob_score=False, random_state=None, verbose=0, warm_start=False) X = polynomial_features.fit_transform(X) X_train, X_test, y_train, y_test = cross_validation.train_test_split( X, y, test_size=0.1) else: print "Found NaN values" rf.fit(X_train, y_train) rf_pred2 = rf.predict(test_data) final_prediction = convert_to_words(rf_pred2) print_full(final_prediction) get_position_stats(final_prediction)