Ejemplo n.º 1
0
def main():
    """main entry"""
    LOGGER.info('===== Start ingestion program.')
    # Parse directories from input arguments
    LOGGER.info('===== Initialize args.')
    args = _parse_args()

    _init_python_path(args)

    write_start_file(args.output_dir)

    LOGGER.info('===== Load data.')
    dataset = Dataset(args.dataset_dir)
    time_budget = dataset.get_metadata().get("time_budget")
    # time_budget = time_budget * 2  # used for off-line search
    n_class = dataset.get_metadata().get("n_class")
    schema = dataset.get_metadata().get("schema")

    LOGGER.info(f"Time budget: {time_budget}")

    LOGGER.info("===== import user model")
    umodel = init_usermodel(args.seed)

    LOGGER.info("===== Begin training user model")
    timer = _init_timer(time_budget)
    predictions = _train_predict(umodel, dataset, timer, n_class, schema)
    _write_predict(args.output_dir, predictions)

    _finalize(args, timer)
Ejemplo n.º 2
0
def _predict(args):
    result = {}
    try:
        timer = Timer.from_file(join(args.temp_dir, 'timer.yaml'))
        LOGGER.info("===== Load test data")
        dataset = Dataset(args.dataset_dir)
        args.time_budget = dataset.get_metadata().get("time_budget")
        path.append(args.model_dir)
        LOGGER.info('==== Load user model')
        umodel = init_usermodel(dataset)
        with timer.time_limit('load'):
            umodel.load(args.temp_dir, timer.get_all_remain())

        LOGGER.info('==== start predicting')
        idx = args.idx
        y_preds = []
        while not dataset.is_end(idx):
            history = dataset.get_history(idx)
            pred_record = dataset.get_next_pred(idx)
            with timer.time_limit('predict', verbose=False):
                y_pred, next_step = umodel.predict(
                    history, pred_record, timer.get_all_remain())
            y_preds.extend(y_pred)
            idx += 1
            if next_step == 'update':
                result['is_end'] = False
                break
        else:
            result['is_end'] = True

        # Write predictions to output_dir
        _write_predict(idx, args.output_dir, y_preds)
        result = {
            **result,
            'idx': idx,
            'status': 'success',
            'next_step': next_step,
        }

        with timer.time_limit('save'):
            umodel.save(args.temp_dir, timer.get_all_remain())
        timer.save(join(args.temp_dir, 'timer.yaml'))

    except TimeoutException as ex:
        LOGGER.error(ex, exc_info=True)
        result['status'] = 'timeout'
    except Exception as ex:
        LOGGER.error(ex, exc_info=True)
        result['status'] = 'failed'

    return result
Ejemplo n.º 3
0
def main():
    """main entry"""
    LOGGER.info('===== Start ingestion program.')
    # Parse directories from input arguments
    LOGGER.info('===== Initialize args.')
    args = _parse_args()
    _init_python_path(args)

    LOGGER.info('===== Set alive_thd')
    alive_thd = threading.Thread(target=write_start_file,
                                 name="alive",
                                 args=(args.output_dir, ))
    alive_thd.daemon = True
    alive_thd.start()

    LOGGER.info('===== Load data.')
    dataset = Dataset(args.dataset_dir)
    args.time_budget = dataset.get_metadata().get("time_budget")

    for key, value in args.time_budget.items():
        LOGGER.info(f"Time budget for {key}: {value}")

    LOGGER.info("===== import user model")
    umodel = init_usermodel(dataset)

    LOGGER.info("===== Begin training user model")
    timer = _init_timer(args.time_budget)

    next_step = _train(umodel, dataset, timer)
    idx = 0
    n_update = 0
    while True:
        if next_step == 'predict':
            LOGGER.info("===== predict")
            idx, next_step, is_end = _predict(umodel, args, idx, timer)
            if is_end:
                break
        elif next_step == 'update':
            n_update += 1
            LOGGER.info(f"===== update ({n_update})")
            next_step = _update(umodel, idx, dataset, timer)
        else:
            raise IngestionError(f"wrong next_step [{next_step}], "
                                 "should be {predict, update}")

    _finalize(args, timer, n_update)
def main():
    """main entry"""
    LOGGER.info('===== Start ingestion program.')
    # Parse directories from input arguments
    LOGGER.info('===== Initialize args.')
    args = _parse_args()
    _init_python_path(args)

    accuarcies = []
    overall_time_spents = []
    valid_accuarcies1 = []
    valid_accuarcies2 = []
    accuarcies1 = []
    accuarcies2 = []

    #    for file in ['a','b','c','d','e']:
    #    for file in ['a']:
    for file in ['a', 'b', 'd', 'e']:
        LOGGER.info(f'===== Start Dataset {file}')
        root_dir = _here(os.pardir)
        args.dataset_dir = join(root_dir, "data/" + file + "/train.data")

        # write_start_file(args.output_dir)

        LOGGER.info('===== Load data.')
        dataset = Dataset(args.dataset_dir)
        time_budget = dataset.get_metadata().get("time_budget") * 10
        #        time_budget = 100
        n_class = dataset.get_metadata().get("n_class")
        schema = dataset.get_metadata().get("schema")
        LOGGER.info(f"Time budget: {time_budget}")

        LOGGER.info("===== import user model")
        umodel = init_usermodel()

        LOGGER.info("===== Begin training user model")
        timer = _init_timer(time_budget)
        predictions, valid_acc1, valid_acc2, preds1, preds2 = _train_predict(
            umodel, dataset, timer, n_class, schema)
        valid_accuarcies1.append(valid_acc1)
        valid_accuarcies2.append(valid_acc2)
        LOGGER.info(f"valid accuracy1:{valid_acc1}")
        LOGGER.info(f"valid accuracy2:{valid_acc2}")

        accuarcy1 = (dataset.test_label['label'].values
                     == preds1).sum() / predictions.shape[0]
        accuarcy2 = (dataset.test_label['label'].values
                     == preds2).sum() / predictions.shape[0]
        accuarcy = (dataset.test_label['label'].values
                    == predictions).sum() / predictions.shape[0]

        accuarcies1.append(accuarcy1)
        LOGGER.info(f"test accuracy1:{accuarcy1}")
        accuarcies2.append(accuarcy2)
        LOGGER.info(f"test accuracy2:{accuarcy2}")
        accuarcies.append(accuarcy)
        LOGGER.info(f"test accuracy:{accuarcy}")
        # _write_predict(args.output_dir, predictions)

        # _finalize(args, timer)

        overall_time_spent = timer.get_overall_duration()
        LOGGER.info(f"time spent:{overall_time_spent}")
        overall_time_spents.append(overall_time_spent)

    LOGGER.info(f"valid accuarcies1:{valid_accuarcies1}")
    LOGGER.info(f"valid accuarcies2:{valid_accuarcies2}")
    LOGGER.info(f"test accuarcies1:{accuarcies1}")
    LOGGER.info(f"test accuarcies2:{accuarcies2}")
    LOGGER.info(f"test accuarcies:{accuarcies}")
    LOGGER.info(f"time spents:{overall_time_spents}")