Exemple #1
0
def run(dataset: Dataset, config: TaskConfig):
    #TODO: use rpy2 instead? not necessary here though as the call is very simple
    log.info(f"\n**** Autoxgboost (R) [{config.framework_version}] ****\n")

    is_classification = config.type == 'classification'

    here = dir_of(__file__)

    meta_results_file = os.path.join(config.output_dir, "meta_results.csv")
    run_cmd((
        "Rscript --vanilla -e \""
        "source('{script}'); "
        "run('{train}', '{test}', target.index = {target_index}, '{type}', '{output}', {cores},"
        " time.budget = {time_budget}, meta_results_file='{meta_results}')"
        "\"").format(script=os.path.join(here, 'exec.R'),
                     train=dataset.train.path,
                     test=dataset.test.path,
                     target_index=dataset.target.index + 1,
                     type=config.type,
                     output=config.output_predictions_file,
                     cores=config.cores,
                     time_budget=config.max_runtime_seconds,
                     meta_results=meta_results_file),
            _live_output_=True)

    log.info("Predictions saved to %s", config.output_predictions_file)

    meta_results = read_csv(meta_results_file)
    return dict(training_duration=meta_result(meta_results,
                                              'training_duration'),
                predict_duration=meta_result(meta_results, 'predict_duration'))
Exemple #2
0
def run(dataset: Dataset, config: TaskConfig):
    #TODO: use rpy2 instead? not necessary here though as the call is very simple
    log.info("\n**** Random Forest (R) ****\n")
    save_metadata(config)

    is_classification = config.type == 'classification'
    if not is_classification:
        raise ValueError('Regression is not supported.')

    here = dir_of(__file__)
    meta_results_file = os.path.join(config.output_dir, "meta_results.csv")
    run_cmd(r"""Rscript --vanilla -e "
            source('{script}'); 
            run('{train}', '{test}', '{output}', 
                cores={cores}, meta_results_file='{meta_results}')
            " """.format(script=os.path.join(here, 'exec.R'),
                         train=dataset.train.path,
                         test=dataset.test.path,
                         output=config.output_predictions_file,
                         meta_results=meta_results_file,
                         cores=config.cores),
            _live_output_=True)

    log.info("Predictions saved to %s", config.output_predictions_file)

    meta_results = read_csv(meta_results_file)
    return dict(training_duration=meta_result(meta_results,
                                              'training_duration'),
                predict_duration=meta_result(meta_results, 'predict_duration'))
Exemple #3
0
def run(dataset: Dataset, config: TaskConfig):
    #TODO: use rpy2 instead? not necessary here though as the call is very simple
    log.info("\n**** Random Forest (R) ****\n")

    here = dir_of(__file__)
    meta_results_file = os.path.join(config.output_dir, "meta_results.csv")
    run_cmd((
        "Rscript --vanilla -e \""
        "source('{script}'); "
        "run('{train}', '{test}', '{output}', cores={cores}, meta_results_file='{meta_results}', task_type='{task_type}')"
        "\"").format(script=os.path.join(here, 'exec.R'),
                     train=dataset.train.path,
                     test=dataset.test.path,
                     output=config.output_predictions_file,
                     meta_results=meta_results_file,
                     task_type=config.type,
                     cores=config.cores),
            _live_output_=True)

    log.info("Predictions saved to %s", config.output_predictions_file)

    meta_results = read_csv(meta_results_file)
    return dict(training_duration=meta_result(meta_results,
                                              'training_duration'),
                predict_duration=meta_result(meta_results, 'predict_duration'))
 def load_data(path):
     return read_csv(path, as_data_frame=False, header=False)