def fit_model(id, street): worker_session = Session() print("received id to fit: " + id + " for the street " + street) X = pd.read_csv(PROCESSED_LOGS_FILE_NAME_FORMAT.format(street, id)) X = MultiColumnLabelEncoder( columns=["position", "position_category"]).fit_transform(X) # X = replace_in_df(X, action_to_code) y = X['action'] del X['action'] del X['street'] X = X.to_numpy() y = y.to_numpy() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=42, stratify=y) classifier = XGBClassifier() classifier = classifier.fit(X_train, y_train) # classifier.dump_model() pickle.dump(classifier, open(TRAINNED_MODEL_FILE_NAME_FORMAT.format(street, id), "wb")) predicted = classifier.predict(X_test) score = classifier.score(X_test, y_test) predictor = worker_session.query(Predictor).filter( Predictor.id == id).first() if (street == 'pre_flop'): predictor.pre_flop_success_rate = score if (street == 'flop'): predictor.flop_success_rate = score if (street == 'turn'): predictor.turn_success_rate = score if (street == 'river'): predictor.river_success_rate = score predictor.status = 'finished' worker_session.commit()
def process_log_files(id): worker_session = Session() print("received prossing :" + id) dir = LOGS_DIR + "/" + id os.mkdir(dir) with zipfile.ZipFile(LOGS_DIR + "/" + id + ".zip", 'r') as zip_ref: zip_ref.extractall(dir) predictor = worker_session.query(Predictor).filter( Predictor.id == id).first() predictor.total_files = len(os.listdir(dir)) worker_session.commit() tasks, pre_flop_actions, flop_actions, turn_actions, river_actions = [], [], [], [], [] for tournament_log in read_all_tournaments(dir): # enumerable tasks = tasks + \ [celery.send_task('wsgi.process_single_log_file', kwargs={ "tournament_log": tournament_log})] for task in tasks: try: with allow_join_result(): tournament = task.get() pre_flop_actions = pre_flop_actions + tournament[0] flop_actions = flop_actions + tournament[1] turn_actions = turn_actions + tournament[2] river_actions = river_actions + tournament[3] except: predictor.failed_files = predictor.failed_files + 1 traceback.print_exc() finally: predictor.finished_files = predictor.finished_files + 1 worker_session.commit() predictor.status = 'training_model' worker_session.commit() pd.DataFrame(pre_flop_actions).fillna(0).to_csv( PROCESSED_LOGS_FILE_NAME_FORMAT.format("pre_flop", id), index=None, header=True) pd.DataFrame(flop_actions).fillna(0).to_csv( PROCESSED_LOGS_FILE_NAME_FORMAT.format("flop", id), index=None, header=True) pd.DataFrame(turn_actions).fillna(0).to_csv( PROCESSED_LOGS_FILE_NAME_FORMAT.format("turn", id), index=None, header=True) pd.DataFrame(river_actions).fillna(0).to_csv( PROCESSED_LOGS_FILE_NAME_FORMAT.format("river", id), index=None, header=True) streets = ['pre_flop', 'flop', 'turn', 'river'] for street in streets: celery.send_task('wsgi.fit_model', kwargs={ "id": str(predictor.id), "street": street })