def all_boats_destination(root_model, api_all_boat, api_all_destination): boats_destination = Etl_data.open_json( "data_boat_destination_statestique", root_model) all_boat = Statestique_bosts.all_boats(api_all_boat) all_boat = pd.DataFrame(all_boat) df_boats = boats_destination.drop_duplicates(subset="id_gen", keep="first") df_boats = pd.merge(df_boats, all_boat, on='id_gen', how='inner') df_boats = df_boats.to_dict('records') df_boats = sorted(df_boats, key=lambda k: k["name"], reverse=False) all_destination = Etl_data.web_service_response(api_all_destination) df_destenation = boats_destination.drop_duplicates(subset="id", keep="first") all_destination = all_destination[all_destination["ref_language"] == "1"] all_destination = all_destination.rename(columns={'location_id': 'id'}) df_destenation = pd.merge(df_destenation, all_destination, on='id', how='inner') df_destenation = df_destenation.to_dict('records') df_destenation = sorted(df_destenation, key=lambda k: k["location_name"], reverse=False) return df_destenation, df_boats
def destination_boat_statetique(root_model, id_location, api_all_boats): df_boats = Etl_data.open_json("data_boat_destination_statestique", root_model) df_boats = df_boats[df_boats["id"] == id_location] boat = list(df_boats.id_gen.unique()) df_score = [] for i in range(0, len(boat)): score = 0 one_boat = pd.DataFrame(df_boats[df_boats["id_gen"] == boat[i]]) score = len(one_boat) df_score.append({"id_gen": boat[i], "score": score}) df_score = pd.DataFrame(df_score) all_boats = Etl_data.web_service_response(api_all_boats) all_boats = all_boats.rename(columns={'generic': 'id_gen'}) all_boats = all_boats.drop_duplicates(subset="id_gen", keep="first") df_boats = pd.merge(df_score, all_boats, on='id_gen', how='inner') df_final = [] for index, boat in df_boats.iterrows(): df_final.append({ "id_gen": boat["id_gen"], "name_boat": boat["boat_brand"] + " " + boat["boat_model"] + " " + boat["shipyard_name"], "score": boat["score"] }) return df_final
def __init__(self, api_data_ww, api_data_crm1, api_data_crm2, api_boats_id_ww): self.data_ww = Etl_data.web_service_response(api_data_ww) self.data_crm1 = Etl_data.web_service_response(api_data_crm1) self.data_crm2 = Etl_data.web_service_response(api_data_crm2) self.boat_id_ww = Etl_data.web_service_response(api_boats_id_ww) self.all_boat = self.all_boat_int(self.data_ww, self.data_crm1, self.data_crm2)
def __init__(self, api_boats_generic, api_all_destination, root_model, name_model): self.boats_generic = Etl_data.web_service_response(api_boats_generic) self.all_destination = Etl_data.web_service_response( api_all_destination) self.root_model = root_model self.name_model = name_model
def created_data_for_recommendation_boat(self, root_model): print("log refrech data start") boats_data = self.boat_found_all_id() boats_data = boats_data[boats_data["country"] != ""] boats_data_final = self.ranked_boat(boats_data) Etl_data.writeToJSONFile(root_model, "recommandation_boats", boats_data_final.to_dict("records")) recommendation = Recommendation_boats(root_model) recommendation.restart(root_model)
def statestique_type_boat(self, api_ww, apicrm, country): req_ww = Etl_data.web_service_response(api_ww) req_crm = Etl_data.web_service_response(apicrm) req_ww = req_ww[req_ww["country"] == country.upper()] req_crm = req_crm[req_crm["pays"] == country.upper()] boats_crm = self.boats_clean_type(req_crm, "type_bateau", "pays", ["3", "1", "2"]) boats = self.boats_clean_type(req_ww, "boat_type", "country", ["Motoryacht", "Monohull", "Catamaran"]) boats = self.somme_boats(boats, boats_crm) return boats
def index_country(self, root_model): boats_data = Etl_data.open_json("recommandation_boats", root_model) index_country = Etl_data.open_json("indexed_countryt", root_model) unique_country = list(boats_data.country.unique()) for i in range(0, len(unique_country)): one_index = index_country[index_country["label"] == unique_country[i]] for index, one_contry in one_index.iterrows(): boats_data.loc[boats_data['country'] == unique_country[i], ['country']] = one_contry["index"] return index_country, boats_data
def create_data_ml_boats(self, root_model): boat = self.all_boat.drop_duplicates(subset='boat', keep='first') boat_id = self.boat_id_ww.drop_duplicates(subset='generic', keep='first') boat_final = self.boat_found_id(boat, boat_id) df_boats = pd.merge(pd.DataFrame(self.all_boat), pd.DataFrame(boat_final), on='boat', how='inner') df_boats = self.ranged_week(df_boats) df_boats_final = self.ranked_boat(df_boats) Etl_data.writeToJSONFile(root_model, 'data_Ml_boat', df_boats_final.to_dict("records")) return "done"
def training_model(self, df_boats_destination, root_model, name_model): print("begin of training") feature_col_names = [ 'year', 'tremestre', 'id_gen', 'id_location', 'fuel', 'loa', 'beam' ] predicted_class_names = ['counts'] X = df_boats_destination[feature_col_names].values y = df_boats_destination[predicted_class_names].values split_test_size = 0.25 X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=split_test_size, random_state=23) model = RandomForestRegressor(n_estimators=100) model.fit(X_train, y_train) joblib.dump(model, root_model + "/" + name_model) ##################################### score_train = model.score(X_train, y_train) score_test = model.score(X_test, y_test) print(score_train, score_test) x_pred = model.predict(X_train) error_train = mean_squared_error(y_train, x_pred) print(error_train) y_pred = model.predict(X_test) error_test = mean_squared_error(y_test, y_pred) print(error_test) #################### info = [] info.append({ "scoretrai": score_train, "errortr": error_train, "scoretes": score_test, "errorte": error_test }) Etl_data.writeToJSONFile(root_model, "score_ml_boats_destination", info) #################### #predection = Predection_distination(root_name+"/"+name_model) #predection.restart(root_name+"/"+name_model) ##################################### print("training done !")
def scored_day(self, name_model, root_model, api_all_destination): all_destination = Etl_data.web_service_response(api_all_destination) predection = Predection_distination(root_model + "/" + name_model) date = datetime.datetime.now() date = date.strftime('%Y/%m/%d') destination = pd.DataFrame( all_destination[all_destination["ref_language"] == "1"]) rank = [] date_in = date.split("/") for index, request in destination.iterrows(): if request["location_type"] == "country" or request[ "location_type"] == "ocean": predicit = predection.predict(date_in[0], date_in[1], date_in[2], request["location_id"]) rank.append({ "date": date_in[0] + "/" + date_in[1] + "/" + date_in[2], "score": predicit[0], 'destination': request["location_name"], 'id': request["location_id"] }) return sorted(rank, key=lambda k: k["score"], reverse=True)[:20]
def found_all_year(root_model): df_destination = Etl_data.open_json("data_Ml_destination", root_model) df_final = [] one_year = list(df_destination.year.unique()) for i in range(0, len(one_year)): df_final.append({'year': one_year[i]}) return sorted(df_final, key=lambda k: k["year"], reverse=False)
def courbe_destination(self, root_model, location=21): destination = Etl_data.open_json("data_Ml_destination", root_model) destination = destination[destination["id"] == location] destination = self.rank(destination) destination = pd.DataFrame(destination) df_destination_choix = self.reg_time(destination) return df_destination_choix.to_dict('records')
def found_tuple(self, root_model, api_all_destination): all_destination = Etl_data.web_service_response(api_all_destination) destination = Etl_data.open_json("data_Ml_destination", root_model) all_destination = all_destination[all_destination["ref_language"] == "1"] unique = list(destination.id.unique()) id_destination_f = [] for i in range(0, len(unique)): id_destination = all_destination[all_destination["location_id"] == unique[i]] for index, des in id_destination.iterrows(): id_destination_f.append({ "id": des["location_id"], "destination": des["location_name"] }) return sorted(id_destination_f, key=lambda k: k["destination"], reverse=False)
def create_data_ML(self, root_model): df_boats = self.data_ww.append(self.data_crm) df_boats = df_boats.append(self.data_crm2) print(df_boats.shape) print(df_boats.info()) df_boats = df_boats[df_boats["boat"] != ""] df_boats = df_boats[df_boats["month"] != "0"] df_boats = df_boats[df_boats["request_destination"] != ""] df_boats = self.found_id_for_all_boat(df_boats) df_boats = self.found_destination(df_boats) Etl_data.writeToJSONFile(root_model, "data_boat_destination_statestique", df_boats.to_dict('records')) df_boats_final = self.tremestre(df_boats) df_boats_final = self.rank(df_boats_final) req_generic = self.boats_generic.loc[:, [ 'boat_id', 'loa', 'beam', 'fuel' ]] req_generic = req_generic.rename(columns={'boat_id': 'id_gen'}) Model_boats_final = pd.merge(df_boats_final, req_generic, on='id_gen', how='inner') Model_boats_final['loa'] = Model_boats_final.loa.astype(float) Model_boats_final['beam'] = Model_boats_final.beam.astype(float) Model_boats_final['fuel'] = Model_boats_final.fuel.astype(int) Model_boats_final[ 'id_location'] = Model_boats_final.id_location.astype(int) Model_boats_final['id_gen'] = Model_boats_final.id_gen.astype(int) Model_boats_final['year'] = Model_boats_final.year.astype(int) Model_boats_final['tremestre'] = Model_boats_final.tremestre.astype( int) Etl_data.writeToJSONFile(root_model, "data_final_boats_destination", Model_boats_final.to_dict('records'))
def country_boat_statistique(root_model, code_country): df_boats = Etl_data.open_json("recommandation_boats", root_model) df_boats = df_boats[df_boats["country"] == code_country] boat = list(df_boats.name.unique()) df_score = [] for i in range(0, len(boat)): one_boat = pd.DataFrame(df_boats[df_boats["name"] == boat[i]]) for index, bateaux in one_boat.iterrows(): df_score.append({"counts": bateaux["counts"], 'name': boat[i]}) df_final = pd.DataFrame(df_score) return df_final.to_dict('records')
def training_data_boat(self): feature_col_names = ['year', 'day', 'id_gen'] predicted_class_names = ['counts'] X = self.boat_data[ feature_col_names].values # predictor feature columns y = self.boat_data[ predicted_class_names].values # predicted class (score) column (1 X m) split_test_size = 0.25 X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=split_test_size, random_state=20) regrossor = RandomForestRegressor(n_estimators=500, random_state=0) regrossor.fit(X_train, y_train) joblib.dump(regrossor, self.path + "/" + self.name_model) score_train = math.fabs(regrossor.score(X_train, y_train)) score_test = math.fabs(regrossor.score(X_test, y_test)) print(score_train, score_test) x_pred = regrossor.predict(X_train) error_train = mean_squared_error(y_train, x_pred) print(error_train) y_pred = regrossor.predict(X_test) error_test = mean_squared_error(y_test, y_pred) print(error_test) prediction = Prediction(self.path + "/" + self.name_model) prediction.restart() info = [] info.append({ "scoretrai": score_train, "errortr": error_train, "scoretes": score_test, "errorte": error_test }) Etl_data.writeToJSONFile(self.path, "score_ml_boats", info)
def all_boats(api_all_boat): boats = Etl_data.web_service_response(api_all_boat) boats = boats.drop_duplicates(subset="generic", keep="first") boats_id = [] for index, boat in boats.iterrows(): ch = boat["boat_brand"] + " " + boat["boat_model"] + " " + boat[ "shipyard_name"] boats_id.append({"id_gen": boat["generic"], "name": ch}) boats_id = sorted(boats_id, key=lambda k: k["name"], reverse=False) return boats_id
def number_request_allyear_statistique(root_model): df_destination = Etl_data.open_json("data_Ml_destination", root_model) one_year = list(df_destination.year.unique()) df_final = [] for i in range(0, len(one_year)): one_boat = pd.DataFrame( df_destination[df_destination["year"] == one_year[i]]) score = 0 for index, one_data_frame in one_boat.iterrows(): score = score + one_data_frame['counts'] df_final.append({'year': one_year[i], 'counts': score}) return sorted(df_final, key=lambda k: k["year"], reverse=True)
def country_alldestnation_statistique(root_model, code_country): df_destination = Etl_data.open_json("recommendation_destination", root_model) indexed_country = Etl_data.open_json("indexed_country", root_model) index_c = indexed_country[indexed_country["label"] == code_country.upper()] for index, country in index_c.iterrows(): df_destination = df_destination[df_destination["country"] == country['index']] destination = list(df_destination.destination.unique()) df_score = [] for i in range(0, len(destination)): one_boat = pd.DataFrame(df_destination[ df_destination["destination"] == destination[i]]) for index, des in one_boat.iterrows(): df_score.append({ "counts": des["counts"], 'name': destination[i] }) df_final = pd.DataFrame(df_score) return df_final.to_dict('records')
def top_destination_request(root_model, api_all_destination): df_destination = Etl_data.open_json("data_Ml_destination", root_model) all_destination = Etl_data.web_service_response(api_all_destination) all_destination = all_destination[all_destination["ref_language"] == "1"] destination = list(df_destination.id.unique()) df_score = [] for i in range(0, len(destination)): one_destination = pd.DataFrame( df_destination[df_destination["id"] == destination[i]]) score = 0 for index, des in one_destination.iterrows(): score = score + des["counts"] df_score.append({"location_id": destination[i], "score": score}) df_score = pd.DataFrame( sorted(df_score, key=lambda k: k["score"], reverse=True)) df_destination = pd.merge(df_score, all_destination, on='location_id', how='inner') return df_destination.to_dict('records')
def satestique_boat_generique(self, id_generic, root_model): boats = Etl_data.open_json("data_boat_destination_statestique", root_model) df_boats = boats[boats["id_gen"] == id_generic] if len(df_boats) < 1: return {"dates": "", "score": 0} df_boats = self.rank_boat(df_boats) df_boats = pd.DataFrame(df_boats) df_boats["dates"] = pd.DatetimeIndex(data=df_boats.dates) df_boats = df_boats.sort_values(by='dates') df_boats["dates"] = df_boats["dates"].dt.strftime('%Y/%m') return df_boats.to_dict('records')
def boat_type_destination(self, root_model, id_location, api_all_boats): df_boats = Etl_data.open_json("data_boat_destination_statestique", root_model) df_boats = df_boats[df_boats["id"] == id_location] req_generic = Etl_data.web_service_response(api_all_boats) req_generic = req_generic.rename(columns={'boat_id': 'id_gen'}) df_boats = pd.merge(df_boats, req_generic, on='id_gen', how='inner') df_boats_type = [] motoryacht = 0 monohull = 0 catamaran = 0 for index, boat in df_boats.iterrows(): if boat["hull"].upper() == "MONOHULL": if boat["propulsion"].upper() == "SAILING": monohull = monohull + 1 else: motoryacht = motoryacht + 1 else: catamaran = catamaran + 1 df_boats_type.append({'label': 'Catamaran', 'value': catamaran}) df_boats_type.append({'label': 'Monohull', 'value': monohull}) df_boats_type.append({'label': 'Motoryacht', 'value': motoryacht}) return df_boats_type
def __init__(self, api_data_ww, api_data_crm, api_data_crm2, api_boats_id_ww, api_boats_generic, api_all_destination): self.data_ww = Etl_data.web_service_response(api_data_ww) self.data_crm = Etl_data.web_service_response(api_data_crm) self.data_crm2 = Etl_data.web_service_response(api_data_crm2) self.boats_id_ww = Etl_data.web_service_response(api_boats_id_ww) self.boats_generic = Etl_data.web_service_response(api_boats_generic) self.all_destination = Etl_data.web_service_response( api_all_destination)
def top_boats(api_all_boats, root_model): boats = Etl_data.open_json("data_boat_destination_statestique", root_model) all_boat = Statestique_bosts.all_boats(api_all_boats) boat = list(boats.id_gen.unique()) df_score = [] for i in range(0, len(boat)): one_boat = pd.DataFrame(boats[boats["id_gen"] == boat[i]]) score = len(one_boat) df_score.append({"id_gen": boat[i], "score": score}) df_score = pd.DataFrame( sorted(df_score, key=lambda k: k["score"], reverse=True)) all_boat = pd.DataFrame(all_boat) df_boats = pd.merge(df_score, all_boat, on='id_gen', how='inner') return df_boats.to_dict('records')
def country_destinaion_statistique(root_model, code_country, id_destination): df_destination = Etl_data.open_json("data_Ml_destination", root_model) df_destination = df_destination[df_destination["country"] == code_country] df_destination = df_destination[df_destination["id"] == id_destination] list_destination = [] for index, destination in df_destination.iterrows(): list_destination.append({ "dates": destination["month"] + "/" + destination["year"], "counts": destination["counts"] }) statis = Statestique_all_destination() list_destination = pd.DataFrame(list_destination) if len(list_destination) > 0: list_destination = statis.reg_time(list_destination) return list_destination.to_dict('records')
def __init__(self, api_data_ww, api_data_crm, api_boats_id_ww): self.data_ww = Etl_data.web_service_response(api_data_ww) self.data_crm = Etl_data.web_service_response(api_data_crm) self.boats_id_ww = Etl_data.web_service_response(api_boats_id_ww)
def __init__(self, root_model, name_model): self.path = root_model self.name_model = name_model self.boat_data = Etl_data.open_json('data_Ml_boat', self.path) self.training_data_boat()
def __init__(self, root_model, name_model): self.df_boats_destination = Etl_data.open_json( "data_final_boats_destination", root_model) self.training_model(self.df_boats_destination, root_model, name_model)
def score_ml_destination(self, root_model): score = Etl_data.open_json("score_ml_destination", root_model) for index, sc in score.iterrows(): return sc["scoretrai"], sc["scoretes"], sc["errortr"], sc[ "errorte"]