def train_model(self, test_split=0.4):
     # split training and test data
     feature_train, feature_test, lable_train, lable_test = train_test_split(
         data.get_features(), data.get_lables(), test_size=test_split
     )
     self.classifier.fit(feature_train, lable_train["lable"].values)
     self.accuracy = self.get_accuracy(feature_test, lable_test["lable"].values)
 def get_n_fold_validation_score(self, fold=10):
     features = data.get_features()
     lables = data.get_lables()
     length = len(features)
     jump = length / fold
     index = 0
     k = 0
     scores = list()
     while k < fold:
         feature_test = features.iloc[index : (index + jump), :]
         lable_test = lables.iloc[index : (index + jump), :]
         feature_train_1, feature_train_2 = (
             features.iloc[0 : index - 1, :] if index != 0 else pd.DataFrame(),
             features.iloc[index + jump + 1 : length - 1],
         )
         feature_train = pd.concat([feature_train_1, feature_train_2])
         lable_train_1, lable_train_2 = (
             lables.iloc[0 : index - 1, :] if index != 0 else pd.DataFrame(),
             lables.iloc[index + jump + 1 : length - 1],
         )
         lable_train = pd.concat([lable_train_1, lable_train_2])
         index += jump
         k += 1
         classifier = GradientBoostingClassifier()
         classifier.fit(feature_train, lable_train["lable"].values)
         scores.append(accuracy_score(lable_test, classifier.predict(feature_test)))
     return sum(scores) / float(len(scores))
 def get_n_fold_validation_score(self, fold=10):
     features = data.get_features()
     lables = data.get_lables()
     length = len(features)
     jump = length / fold
     index = 0
     k = 0
     scores = list()
     while k < fold:
         feature_test = features.iloc[index:(index + jump), :]
         lable_test = lables.iloc[index:(index + jump), :]
         feature_train_1, feature_train_2 = features.iloc[
             0:index - 1, :] if index != 0 else pd.DataFrame(
             ), features.iloc[index + jump + 1:length - 1]
         feature_train = pd.concat([feature_train_1, feature_train_2])
         lable_train_1, lable_train_2 = lables.iloc[
             0:index -
             1, :] if index != 0 else pd.DataFrame(), lables.iloc[index +
                                                                  jump +
                                                                  1:length -
                                                                  1]
         lable_train = pd.concat([lable_train_1, lable_train_2])
         index += jump
         k += 1
         classifier = GradientBoostingClassifier()
         classifier.fit(feature_train, lable_train['lable'].values)
         scores.append(
             accuracy_score(lable_test, classifier.predict(feature_test)))
     return sum(scores) / float(len(scores))
 def train_model(self, test_split=0.4):
     # split training and test data
     feature_train, feature_test, lable_train, lable_test = train_test_split(
         data.get_features(), data.get_lables(), test_size=test_split)
     self.classifier.fit(feature_train, lable_train['lable'].values)
     self.accuracy = self.get_accuracy(feature_test,
                                       lable_test['lable'].values)
 def train_model(self, test_split=0.1):
     # split training and test data
     feature_train, feature_test, lable_train, lable_test = train_test_split(data.get_features(), data.get_lables()
                                                                             , test_size=test_split)
     print "Start - Training Model"
     self.classifier.fit(feature_train, lable_train)
     print "Done - Training Model"
     joblib.dump(self.classifier, MODEL_FILE)
예제 #6
0
 def get_n_fold_validation_score(self, fold=10):
     features = data.get_features()
     lables = data.get_lables()
     length = len(features)
     jump = length/fold
     index = 0
     k = 0
     scores = list()
     while k < fold:
         feature_test = features.iloc[index:(index + jump), :]
         lable_test = lables.iloc[index: (index + jump), :]
         feature_train_1, feature_train_2 = features.iloc[0: index-1, :] if index != 0 else pd.DataFrame(), features.iloc[index+jump+1: length-1]
         feature_train = pd.concat([feature_train_1, feature_train_2])
         lable_train_1, lable_train_2 = lables.iloc[0: index-1, :] if index != 0 else pd.DataFrame(), lables.iloc[index+jump+1: length-1]
         lable_train = pd.concat([lable_train_1, lable_train_2])
         index += jump
         k += 1
         classifier = RandomForestClassifier(criterion="gini", n_estimators=10)
         classifier.fit(feature_train, lable_train['lable'].values)
         scores.append(accuracy_score(lable_test, classifier.predict(feature_test)))
     return sum(scores)/float(len(scores))
 def predict_activities(self):
     records = db.get_records_to_predict(helper.last_read())
     helper.write_last_read()
     user_activities = []
     for record in records:
         user_activities.append({
             'user_id': record.pop('user_id'),
             'distance': round(float(record.pop('distance')), 2),
             'start_datetime': int(record['start_timestamp']),
             'end_datetime': int(record.pop('start_timestamp')) + 5000,
             'workout_type_id': int(float(self.classifier.predict(pd.DataFrame(record, index=[0]))[0]))
         })
     return user_activities
 def fetch_predict_load_activities(self):
     user_activities = self.predict_activities()
     for user_activity in user_activities:
         db.insert_activity(user_activity)