예제 #1
0
def train(start, end, label, label_values, model_dir, receipt):
    train_path = create_train_path()

    start_date = datetime.strptime(start, '%d-%m-%Y')
    end_date = datetime.strptime(end, '%d-%m-%Y')

    next_date = end_date + pd.DateOffset(years=1)

    if end_date > datetime.now():
        end_date = datetime.now()

    learning_cfg = get_learning_cfg(model_dir)

    train_filename = "train-players-" + start_date.strftime(
        "%d-%m-%Y") + '-' + end_date.strftime("%d-%m-%Y") + ".csv"
    evaluate_filename = "train-players-" + end_date.strftime(
        "%d-%m-%Y") + '-' + next_date.strftime("%d-%m-%Y") + ".csv"
    train_file_path = local_dir + train_path + train_filename
    evaluate_file_path = local_dir + train_path + evaluate_filename

    has_data = model_utils.create_csv(url=model_utils.EVENT_MODEL_URL,
                                      filename=train_file_path,
                                      start_date=start_date,
                                      end_date=end_date,
                                      aws_path=train_path)

    if learning_cfg['evaluate']:

        has_test_data = model_utils.create_csv(url=model_utils.EVENT_MODEL_URL,
                                               filename=evaluate_file_path,
                                               start_date=end_date,
                                               end_date=next_date,
                                               aws_path=train_path)

        if has_data == True and has_test_data == False:
            evaluate_filename = None
        else:
            logger.info('we can evaluate')

    if has_data:

        train_filename = train_path + train_filename
        if evaluate_filename is not None:
            evaluate_filename = train_path + evaluate_filename

        match_model.create(train=True,
                           label=label,
                           label_values=label_values,
                           model_dir=model_dir,
                           train_filename=train_filename,
                           test_filename=evaluate_filename,
                           init=True)
    else:
        logger.info('no data to train')

    if receipt is not None:
        receipt_service.put_receipt(receipt_service.TRAIN_RECEIPT_URL, receipt,
                                    None)
def predict(data, country, label, label_values, model_dir, receipt):
    # def create(type, country, train, label, label_values, model_dir, train_filename, test_filename, outcome, previous_vocab_date):
    # there is no guarantee the predict is on same day as the train.  so we need the history
    classifier = match_model.create(country=country,
                                    train=False,
                                    label=label,
                                    label_values=label_values,
                                    model_dir=model_dir,
                                    train_filename='',
                                    test_filename='')

    home = []
    away = []
    outcomes = []

    # Generate predictions from the model
    home.append(data['home'])
    away.append(data['away'])

    # print(data)

    expected = [0]
    predict_x = {'home': home, 'away': away}

    response = model_utils.predict(classifier=classifier,
                                   predict_x=predict_x,
                                   label_values=label_values)

    match_model.tidy_up(local_dir + '/models/' + model_dir + '/' + country,
                        None, None, None)
    receipt_service.put_receipt(receipt_service.PREDICT_RECEIPT_URL, receipt,
                                response)
예제 #3
0
def predict(data, init, label, label_values, model_dir, receipt):
    # def create(type, country, train, label, label_values, model_dir, train_filename, test_filename, outcome, previous_vocab_date):
    # there is no guarantee the predict is on same day as the train.  so we need the history
    classifier = match_model.create(train=False,
                                    label=label,
                                    label_values=label_values,
                                    model_dir=model_dir,
                                    train_filename='',
                                    test_filename='',
                                    init=init)

    player = []
    home = []
    opponent = []

    # Generate predictions from the model

    opponent.append(data['opponent'])
    home.append(data['home'])
    player.append(data['player'])

    predict_x = {'player': player, 'opponent': opponent, 'home': home}

    response = model_utils.predict(classifier=classifier,
                                   predict_x=predict_x,
                                   label_values=label_values)

    if init:
        logger.info('tidying up')
        match_model.tidy_up(local_dir + '/models/' + model_dir, None, None,
                            None)

    receipt_service.put_receipt(receipt_service.PREDICT_RECEIPT_URL, receipt,
                                response)
예제 #4
0
def train(data_range, label, label_values, model_dir, train_path, receipt,
          history, history_file):

    for data in data_range:

        learning_cfg = get_learning_cfg(model_dir)

        train_filename = "train-players" + data.replace('/', '-') + ".csv"
        evaluate_filename = "train-players" + get_next_in_range(
            data_range, data).replace('/', '-') + ".csv"
        train_file_path = local_dir + train_path + train_filename
        evaluate_file_path = local_dir + train_path + evaluate_filename

        has_data = model_utils.create_csv(url=model_utils.EVENT_MODEL_URL,
                                          filename=train_file_path,
                                          range=data,
                                          aws_path=train_path)

        if learning_cfg['evaluate']:

            has_test_data = model_utils.create_csv(
                url=model_utils.EVENT_MODEL_URL,
                filename=evaluate_file_path,
                range=get_next_in_range(data_range, data),
                aws_path=train_path)

            if has_data == True and has_test_data == False:
                evaluate_filename = None
            else:
                logger.info('we can evaluate')

        if has_data:

            train_filename = train_path + train_filename
            if evaluate_filename is not None:
                evaluate_filename = train_path + evaluate_filename
            ##take a copy of our file if it doesnt exist.
            #if not is_on_file(test_file_path):
            #    copyfile(train_file_path,
            #             test_file_path)
            #    put_aws_file_with_path(train_path,test_filename)
            #    write_filenames_index_from_filename(test_file_path)
            # else:
            #    get_aws_file(train_path,  test_filename)

            match_model.create(train=True,
                               label=label,
                               label_values=label_values,
                               model_dir=model_dir,
                               train_filename=train_filename,
                               test_filename=evaluate_filename,
                               init=True)
        else:
            logger.info('no data to train')

        #write the history...
        start_day, start_month, start_year, end_day, end_month, end_year = get_range_details(
            data)
        history = train_history_utils.create_history('Success - Partial',
                                                     start_day, start_month,
                                                     start_year, end_day,
                                                     end_month, end_year)
        train_history_utils.add_history(history_file, 'default', history)

    if receipt is not None:
        receipt_utils.put_receipt(receipt_utils.TRAIN_RECEIPT_URL, receipt,
                                  None)

    history['status'] = "Success - Full"
    train_history_utils.add_history(history_file, 'default', history)