def train(start, end, label, label_values, model_dir, receipt): train_path = create_train_path() start_date = datetime.strptime(start, '%d-%m-%Y') end_date = datetime.strptime(end, '%d-%m-%Y') next_date = end_date + pd.DateOffset(years=1) if end_date > datetime.now(): end_date = datetime.now() learning_cfg = get_learning_cfg(model_dir) train_filename = "train-players-" + start_date.strftime( "%d-%m-%Y") + '-' + end_date.strftime("%d-%m-%Y") + ".csv" evaluate_filename = "train-players-" + end_date.strftime( "%d-%m-%Y") + '-' + next_date.strftime("%d-%m-%Y") + ".csv" train_file_path = local_dir + train_path + train_filename evaluate_file_path = local_dir + train_path + evaluate_filename has_data = model_utils.create_csv(url=model_utils.EVENT_MODEL_URL, filename=train_file_path, start_date=start_date, end_date=end_date, aws_path=train_path) if learning_cfg['evaluate']: has_test_data = model_utils.create_csv(url=model_utils.EVENT_MODEL_URL, filename=evaluate_file_path, start_date=end_date, end_date=next_date, aws_path=train_path) if has_data == True and has_test_data == False: evaluate_filename = None else: logger.info('we can evaluate') if has_data: train_filename = train_path + train_filename if evaluate_filename is not None: evaluate_filename = train_path + evaluate_filename match_model.create(train=True, label=label, label_values=label_values, model_dir=model_dir, train_filename=train_filename, test_filename=evaluate_filename, init=True) else: logger.info('no data to train') if receipt is not None: receipt_service.put_receipt(receipt_service.TRAIN_RECEIPT_URL, receipt, None)
def predict(data, country, label, label_values, model_dir, receipt): # def create(type, country, train, label, label_values, model_dir, train_filename, test_filename, outcome, previous_vocab_date): # there is no guarantee the predict is on same day as the train. so we need the history classifier = match_model.create(country=country, train=False, label=label, label_values=label_values, model_dir=model_dir, train_filename='', test_filename='') home = [] away = [] outcomes = [] # Generate predictions from the model home.append(data['home']) away.append(data['away']) # print(data) expected = [0] predict_x = {'home': home, 'away': away} response = model_utils.predict(classifier=classifier, predict_x=predict_x, label_values=label_values) match_model.tidy_up(local_dir + '/models/' + model_dir + '/' + country, None, None, None) receipt_service.put_receipt(receipt_service.PREDICT_RECEIPT_URL, receipt, response)
def predict(data, init, label, label_values, model_dir, receipt): # def create(type, country, train, label, label_values, model_dir, train_filename, test_filename, outcome, previous_vocab_date): # there is no guarantee the predict is on same day as the train. so we need the history classifier = match_model.create(train=False, label=label, label_values=label_values, model_dir=model_dir, train_filename='', test_filename='', init=init) player = [] home = [] opponent = [] # Generate predictions from the model opponent.append(data['opponent']) home.append(data['home']) player.append(data['player']) predict_x = {'player': player, 'opponent': opponent, 'home': home} response = model_utils.predict(classifier=classifier, predict_x=predict_x, label_values=label_values) if init: logger.info('tidying up') match_model.tidy_up(local_dir + '/models/' + model_dir, None, None, None) receipt_service.put_receipt(receipt_service.PREDICT_RECEIPT_URL, receipt, response)
def train(data_range, label, label_values, model_dir, train_path, receipt, history, history_file): for data in data_range: learning_cfg = get_learning_cfg(model_dir) train_filename = "train-players" + data.replace('/', '-') + ".csv" evaluate_filename = "train-players" + get_next_in_range( data_range, data).replace('/', '-') + ".csv" train_file_path = local_dir + train_path + train_filename evaluate_file_path = local_dir + train_path + evaluate_filename has_data = model_utils.create_csv(url=model_utils.EVENT_MODEL_URL, filename=train_file_path, range=data, aws_path=train_path) if learning_cfg['evaluate']: has_test_data = model_utils.create_csv( url=model_utils.EVENT_MODEL_URL, filename=evaluate_file_path, range=get_next_in_range(data_range, data), aws_path=train_path) if has_data == True and has_test_data == False: evaluate_filename = None else: logger.info('we can evaluate') if has_data: train_filename = train_path + train_filename if evaluate_filename is not None: evaluate_filename = train_path + evaluate_filename ##take a copy of our file if it doesnt exist. #if not is_on_file(test_file_path): # copyfile(train_file_path, # test_file_path) # put_aws_file_with_path(train_path,test_filename) # write_filenames_index_from_filename(test_file_path) # else: # get_aws_file(train_path, test_filename) match_model.create(train=True, label=label, label_values=label_values, model_dir=model_dir, train_filename=train_filename, test_filename=evaluate_filename, init=True) else: logger.info('no data to train') #write the history... start_day, start_month, start_year, end_day, end_month, end_year = get_range_details( data) history = train_history_utils.create_history('Success - Partial', start_day, start_month, start_year, end_day, end_month, end_year) train_history_utils.add_history(history_file, 'default', history) if receipt is not None: receipt_utils.put_receipt(receipt_utils.TRAIN_RECEIPT_URL, receipt, None) history['status'] = "Success - Full" train_history_utils.add_history(history_file, 'default', history)