def created_data_for_recommendation_boat(self, root_model): print("log refrech data start") boats_data = self.boat_found_all_id() boats_data = boats_data[boats_data["country"] != ""] boats_data_final = self.ranked_boat(boats_data) Etl_data.writeToJSONFile(root_model, "recommandation_boats", boats_data_final.to_dict("records")) recommendation = Recommendation_boats(root_model) recommendation.restart(root_model)
def create_data_ml_boats(self, root_model): boat = self.all_boat.drop_duplicates(subset='boat', keep='first') boat_id = self.boat_id_ww.drop_duplicates(subset='generic', keep='first') boat_final = self.boat_found_id(boat, boat_id) df_boats = pd.merge(pd.DataFrame(self.all_boat), pd.DataFrame(boat_final), on='boat', how='inner') df_boats = self.ranged_week(df_boats) df_boats_final = self.ranked_boat(df_boats) Etl_data.writeToJSONFile(root_model, 'data_Ml_boat', df_boats_final.to_dict("records")) return "done"
def training_model(self, df_boats_destination, root_model, name_model): print("begin of training") feature_col_names = [ 'year', 'tremestre', 'id_gen', 'id_location', 'fuel', 'loa', 'beam' ] predicted_class_names = ['counts'] X = df_boats_destination[feature_col_names].values y = df_boats_destination[predicted_class_names].values split_test_size = 0.25 X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=split_test_size, random_state=23) model = RandomForestRegressor(n_estimators=100) model.fit(X_train, y_train) joblib.dump(model, root_model + "/" + name_model) ##################################### score_train = model.score(X_train, y_train) score_test = model.score(X_test, y_test) print(score_train, score_test) x_pred = model.predict(X_train) error_train = mean_squared_error(y_train, x_pred) print(error_train) y_pred = model.predict(X_test) error_test = mean_squared_error(y_test, y_pred) print(error_test) #################### info = [] info.append({ "scoretrai": score_train, "errortr": error_train, "scoretes": score_test, "errorte": error_test }) Etl_data.writeToJSONFile(root_model, "score_ml_boats_destination", info) #################### #predection = Predection_distination(root_name+"/"+name_model) #predection.restart(root_name+"/"+name_model) ##################################### print("training done !")
def create_data_ML(self, root_model): df_boats = self.data_ww.append(self.data_crm) df_boats = df_boats.append(self.data_crm2) print(df_boats.shape) print(df_boats.info()) df_boats = df_boats[df_boats["boat"] != ""] df_boats = df_boats[df_boats["month"] != "0"] df_boats = df_boats[df_boats["request_destination"] != ""] df_boats = self.found_id_for_all_boat(df_boats) df_boats = self.found_destination(df_boats) Etl_data.writeToJSONFile(root_model, "data_boat_destination_statestique", df_boats.to_dict('records')) df_boats_final = self.tremestre(df_boats) df_boats_final = self.rank(df_boats_final) req_generic = self.boats_generic.loc[:, [ 'boat_id', 'loa', 'beam', 'fuel' ]] req_generic = req_generic.rename(columns={'boat_id': 'id_gen'}) Model_boats_final = pd.merge(df_boats_final, req_generic, on='id_gen', how='inner') Model_boats_final['loa'] = Model_boats_final.loa.astype(float) Model_boats_final['beam'] = Model_boats_final.beam.astype(float) Model_boats_final['fuel'] = Model_boats_final.fuel.astype(int) Model_boats_final[ 'id_location'] = Model_boats_final.id_location.astype(int) Model_boats_final['id_gen'] = Model_boats_final.id_gen.astype(int) Model_boats_final['year'] = Model_boats_final.year.astype(int) Model_boats_final['tremestre'] = Model_boats_final.tremestre.astype( int) Etl_data.writeToJSONFile(root_model, "data_final_boats_destination", Model_boats_final.to_dict('records'))
def training_data_boat(self): feature_col_names = ['year', 'day', 'id_gen'] predicted_class_names = ['counts'] X = self.boat_data[ feature_col_names].values # predictor feature columns y = self.boat_data[ predicted_class_names].values # predicted class (score) column (1 X m) split_test_size = 0.25 X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=split_test_size, random_state=20) regrossor = RandomForestRegressor(n_estimators=500, random_state=0) regrossor.fit(X_train, y_train) joblib.dump(regrossor, self.path + "/" + self.name_model) score_train = math.fabs(regrossor.score(X_train, y_train)) score_test = math.fabs(regrossor.score(X_test, y_test)) print(score_train, score_test) x_pred = regrossor.predict(X_train) error_train = mean_squared_error(y_train, x_pred) print(error_train) y_pred = regrossor.predict(X_test) error_test = mean_squared_error(y_test, y_pred) print(error_test) prediction = Prediction(self.path + "/" + self.name_model) prediction.restart() info = [] info.append({ "scoretrai": score_train, "errortr": error_train, "scoretes": score_test, "errorte": error_test }) Etl_data.writeToJSONFile(self.path, "score_ml_boats", info)