def train_model(self, test_split=0.4): # split training and test data feature_train, feature_test, lable_train, lable_test = train_test_split( data.get_features(), data.get_lables(), test_size=test_split ) self.classifier.fit(feature_train, lable_train["lable"].values) self.accuracy = self.get_accuracy(feature_test, lable_test["lable"].values)
def get_n_fold_validation_score(self, fold=10): features = data.get_features() lables = data.get_lables() length = len(features) jump = length / fold index = 0 k = 0 scores = list() while k < fold: feature_test = features.iloc[index : (index + jump), :] lable_test = lables.iloc[index : (index + jump), :] feature_train_1, feature_train_2 = ( features.iloc[0 : index - 1, :] if index != 0 else pd.DataFrame(), features.iloc[index + jump + 1 : length - 1], ) feature_train = pd.concat([feature_train_1, feature_train_2]) lable_train_1, lable_train_2 = ( lables.iloc[0 : index - 1, :] if index != 0 else pd.DataFrame(), lables.iloc[index + jump + 1 : length - 1], ) lable_train = pd.concat([lable_train_1, lable_train_2]) index += jump k += 1 classifier = GradientBoostingClassifier() classifier.fit(feature_train, lable_train["lable"].values) scores.append(accuracy_score(lable_test, classifier.predict(feature_test))) return sum(scores) / float(len(scores))
def get_n_fold_validation_score(self, fold=10): features = data.get_features() lables = data.get_lables() length = len(features) jump = length / fold index = 0 k = 0 scores = list() while k < fold: feature_test = features.iloc[index:(index + jump), :] lable_test = lables.iloc[index:(index + jump), :] feature_train_1, feature_train_2 = features.iloc[ 0:index - 1, :] if index != 0 else pd.DataFrame( ), features.iloc[index + jump + 1:length - 1] feature_train = pd.concat([feature_train_1, feature_train_2]) lable_train_1, lable_train_2 = lables.iloc[ 0:index - 1, :] if index != 0 else pd.DataFrame(), lables.iloc[index + jump + 1:length - 1] lable_train = pd.concat([lable_train_1, lable_train_2]) index += jump k += 1 classifier = GradientBoostingClassifier() classifier.fit(feature_train, lable_train['lable'].values) scores.append( accuracy_score(lable_test, classifier.predict(feature_test))) return sum(scores) / float(len(scores))
def train_model(self, test_split=0.4): # split training and test data feature_train, feature_test, lable_train, lable_test = train_test_split( data.get_features(), data.get_lables(), test_size=test_split) self.classifier.fit(feature_train, lable_train['lable'].values) self.accuracy = self.get_accuracy(feature_test, lable_test['lable'].values)
def train_model(self, test_split=0.1): # split training and test data feature_train, feature_test, lable_train, lable_test = train_test_split(data.get_features(), data.get_lables() , test_size=test_split) print "Start - Training Model" self.classifier.fit(feature_train, lable_train) print "Done - Training Model" joblib.dump(self.classifier, MODEL_FILE)
def get_n_fold_validation_score(self, fold=10): features = data.get_features() lables = data.get_lables() length = len(features) jump = length/fold index = 0 k = 0 scores = list() while k < fold: feature_test = features.iloc[index:(index + jump), :] lable_test = lables.iloc[index: (index + jump), :] feature_train_1, feature_train_2 = features.iloc[0: index-1, :] if index != 0 else pd.DataFrame(), features.iloc[index+jump+1: length-1] feature_train = pd.concat([feature_train_1, feature_train_2]) lable_train_1, lable_train_2 = lables.iloc[0: index-1, :] if index != 0 else pd.DataFrame(), lables.iloc[index+jump+1: length-1] lable_train = pd.concat([lable_train_1, lable_train_2]) index += jump k += 1 classifier = RandomForestClassifier(criterion="gini", n_estimators=10) classifier.fit(feature_train, lable_train['lable'].values) scores.append(accuracy_score(lable_test, classifier.predict(feature_test))) return sum(scores)/float(len(scores))
def predict_activities(self): records = db.get_records_to_predict(helper.last_read()) helper.write_last_read() user_activities = [] for record in records: user_activities.append({ 'user_id': record.pop('user_id'), 'distance': round(float(record.pop('distance')), 2), 'start_datetime': int(record['start_timestamp']), 'end_datetime': int(record.pop('start_timestamp')) + 5000, 'workout_type_id': int(float(self.classifier.predict(pd.DataFrame(record, index=[0]))[0])) }) return user_activities
def fetch_predict_load_activities(self): user_activities = self.predict_activities() for user_activity in user_activities: db.insert_activity(user_activity)