type=int,
                        dest='early_stopping_rounds',
                        required=False,
                        help='Number of round for early stopping')

    parser.add_argument('--xgbparams',
                        type=str,
                        dest='xgb_params',
                        required=False,
                        help='Parameters of XGBoost constructor')

    args = parser.parse_args()

    print("#### Started %s ####" % os.path.basename(__file__))

    head_train, df_independent_train, df_dependent_train = read_csv_dataset(
        args.train_dataset_filename, 1)
    eval_set = [(df_independent_train, df_dependent_train)]

    if args.val_dataset_filename is not None:
        head_val, df_independent_val, df_dependent_val = read_csv_dataset(
            args.val_dataset_filename, 1)
        eval_set.append((df_independent_val, df_dependent_val))

    xgb_kwargs = prepare_kwargs_for_regressor(args)
    model = xgb.XGBRegressor(**xgb_kwargs)

    start_time = time.time()
    model.fit(df_independent_train,
              df_dependent_train,
              eval_set=eval_set,
              eval_metric=args.val_metrics,
        type=str,
        dest='dumpout_path',
        required=False,
        help='Dump directory (directory to store metric values)')

    parser.add_argument('--xgbparams',
                        type=str,
                        dest='xgb_params',
                        required=False,
                        help='Parameters of XGBoost constructor')

    args = parser.parse_args()

    print("#### Started %s ####" % os.path.basename(__file__))

    head_train, df_independent_train, df_dependent_train = read_csv_dataset(
        args.train_dataset_filename, args.num_of_dependent_columns)

    xgb_kwargs = prepare_kwargs_for_regressor(args)
    model = sklmo.MultiOutputRegressor(xgb.XGBRegressor(**xgb_kwargs))

    start_time = time.time()
    model.fit(df_independent_train, df_dependent_train)
    elapsed_time = time.time() - start_time
    print("Training time:", time.strftime("%H:%M:%S",
                                          time.gmtime(elapsed_time)))

    jl.dump(model, args.model_file)
    print("Generated one-variable function xgboost model '%s'" %
          args.model_file)

    print("#### Terminated %s ####" % os.path.basename(__file__))
예제 #3
0
    parser.add_argument(
        '--measures',
        type=str,
        dest='measures',
        required=False,
        nargs='+',
        default=[],
        help=
        'List of built-in sklearn regression metrics to compare prediction with input dataset'
    )

    args = parser.parse_args()

    print("#### Started %s ####" % os.path.basename(__file__))

    head, df_independent, df_dependent = read_csv_dataset(
        args.df_prediction, 1)

    model = jl.load(args.model_file)

    start_time = time.time()
    prediction = model.predict(df_independent)
    elapsed_time = time.time() - start_time
    print("Predicting time:",
          time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))

    compute_measures(df_dependent, prediction)
    save_prediction(df_independent, prediction)

    print("#### Terminated %s ####" % os.path.basename(__file__))
    parser.add_argument(
        '--measures',
        type=str,
        dest='measures',
        required=False,
        nargs='+',
        default=[],
        help=
        'List of built-in sklearn regression measures to compare prediction with input dataset'
    )

    args = parser.parse_args()

    print("#### Started %s ####" % os.path.basename(__file__))

    df_prediction = read_csv_dataset(args.df_prediction)
    columns = df_prediction.columns.tolist()

    final_model = pcr.load_model(args.model_file)

    start_time = time.time()
    prediction = pcr.predict_model(final_model, data=df_prediction)
    elapsed_time = time.time() - start_time
    print("Predicting time:",
          time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))

    compute_measures(prediction)
    save_prediction(columns, prediction)

    print("#### Terminated %s ####" % os.path.basename(__file__))
    parser.add_argument(
        '--measures',
        type=str,
        dest='measures',
        required=False,
        nargs='+',
        default=[],
        help=
        'List of built-in sklearn regression metrics to compare prediction with input dataset'
    )

    args = parser.parse_args()

    print("#### Started %s ####" % os.path.basename(__file__))

    head, df_independent, df_dependent = read_csv_dataset(
        args.df_prediction, args.num_of_dependent_columns)

    model = jl.load(args.model_file)

    start_time = time.time()
    prediction = model.predict(df_independent)
    elapsed_time = time.time() - start_time
    print("Predicting time:",
          time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))

    compute_measures(df_dependent, prediction)
    save_prediction(df_independent, prediction)

    print("#### Terminated %s ####" % os.path.basename(__file__))