Example #1
0
def to_multidevice(batch_iter, num_trainer):
    """to_multidevice"""
    batch_dict = []
    for batch in batch_iter():
        batch_dict.append(batch)
        if len(batch_dict) == num_trainer:
            yield batch_dict
            batch_dict = []

    if len(batch_dict) > 0:
        log.warning("The batch (%s) can't fill all device (%s)"
                    "which will be discarded." %
                    (len(batch_dict), num_trainer))
Example #2
0
def run_epoch(
    py_reader,
    exe,
    program,
    prefix,
    model_dict,
    epoch,
    batch_size,
    log_per_step=100,
    save_per_step=10000,
):
    """run_epoch"""
    batch = 0
    start = time.time()

    batch_end = time.time()

    for batch_feed_dict in py_reader():
        if prefix == "train":
            if batch_feed_dict["src_index"].shape[0] != batch_size:
                log.warning(
                    'batch_feed_dict["src_index"].shape[0] != 1024, continue')
                continue
        batch_start = time.time()
        batch += 1
        batch_loss, batch_auc = exe.run(
            program,
            feed=batch_feed_dict,
            fetch_list=[model_dict.loss.name, model_dict.auc.name])

        batch_end = time.time()
        if batch % log_per_step == 0:
            log.info(
                "Batch %s %s-Loss %s \t %s-Auc  %s \t Speed(per batch) %.5lf sec"
                % (batch, prefix, np.mean(batch_loss), prefix,
                   np.mean(batch_auc), batch_end - batch_start))
        if batch != 0 and batch % save_per_step == 0:
            fluid.io.save_params(exe,
                                 dirname='checkpoint',
                                 main_program=program)
    fluid.io.save_params(exe, dirname='checkpoint', main_program=program)