Beispiel #1
0
def update_network(queue, nn_update_idx, symbol_filename, params_filename,
                   convert_to_onnx, main_config, train_config: TrainConfig,
                   model_contender_dir):
    """
    Creates a new NN checkpoint in the model contender directory after training using the game files stored in the
     training directory
    :param queue: Queue object used to return items
    :param nn_update_idx: Defines how many updates of the nn has already been done. This index should be incremented
    after every update.
    :param symbol_filename: Architecture definition file
    :param params_filename: Weight file which will be loaded before training
    Updates the neural network with the newly acquired games from the replay memory
    :param convert_to_onnx: Boolean indicating if the network shall be exported to ONNX to allow TensorRT inference
    :param main_config: Dict of the main_config (imported from main_config.py)
    :param train_config: Dict of the train_config (imported from train_config.py)
    :param model_contender_dir: String of the contender directory path
    :return: k_steps_final
    """

    # set the context on CPU, switch to GPU if there is one available (strongly recommended for training)
    ctx = mx.gpu(
        train_config.device_id) if train_config.context == "gpu" else mx.cpu()
    # set a specific seed value for reproducibility
    train_config.nb_parts = len(
        glob.glob(main_config["planes_train_dir"] + '**/*.zip'))
    logging.info("number parts for training: %d" % train_config.nb_parts)
    train_objects = TrainObjects()

    if train_config.nb_parts <= 0:
        raise Exception(
            'No .zip files for training available. Check the path in main_config["planes_train_dir"]:'
            ' %s' % main_config["planes_train_dir"])

    _, x_val, y_val_value, y_val_policy, _, _ = load_pgn_dataset(
        dataset_type="val",
        part_id=0,
        normalize=train_config.normalize,
        verbose=False,
        q_value_ratio=train_config.q_value_ratio)
    y_val_policy = prepare_policy(y_val_policy,
                                  train_config.select_policy_from_plane,
                                  train_config.sparse_policy_label,
                                  train_config.is_policy_from_plane_data)
    val_dataset = gluon.data.ArrayDataset(nd.array(x_val),
                                          nd.array(y_val_value),
                                          nd.array(y_val_policy))
    val_data = gluon.data.DataLoader(val_dataset,
                                     train_config.batch_size,
                                     shuffle=False,
                                     num_workers=train_config.cpu_count)

    symbol = mx.sym.load(symbol_filename)

    # calculate how many iterations per epoch exist
    nb_it_per_epoch = (len(x_val) *
                       train_config.nb_parts) // train_config.batch_size
    # one iteration is defined by passing 1 batch and doing backprop
    train_config.total_it = int(nb_it_per_epoch *
                                train_config.nb_training_epochs)

    train_objects.lr_schedule = CosineAnnealingSchedule(
        train_config.min_lr, train_config.max_lr,
        max(train_config.total_it * .7, 1))
    train_objects.lr_schedule = LinearWarmUp(train_objects.lr_schedule,
                                             start_lr=train_config.min_lr,
                                             length=max(
                                                 train_config.total_it * .25,
                                                 1))
    train_objects.momentum_schedule = MomentumSchedule(
        train_objects.lr_schedule, train_config.min_lr, train_config.max_lr,
        train_config.min_momentum, train_config.max_momentum)

    input_shape = x_val[0].shape
    inputs = mx.sym.var('data', dtype='float32')
    value_out = symbol.get_internals()[main_config['value_output'] + '_output']
    policy_out = symbol.get_internals()[main_config['policy_output'] +
                                        '_output']
    sym = mx.symbol.Group([value_out, policy_out])
    net = mx.gluon.SymbolBlock(sym, inputs)
    net.collect_params().load(params_filename, ctx)

    metrics_gluon = {
        'value_loss':
        metric.MSE(name='value_loss', output_names=['value_output']),
        'value_acc_sign':
        metric.create(acc_sign,
                      name='value_acc_sign',
                      output_names=['value_output'],
                      label_names=['value_label']),
    }

    if train_config.sparse_policy_label:
        print("train with sparse labels")
        # the default cross entropy only supports sparse labels
        metrics_gluon['policy_loss'] = metric.CrossEntropy(
            name='policy_loss',
            output_names=['policy_output'],
            label_names=['policy_label']),
        metrics_gluon['policy_acc'] = metric.Accuracy(
            axis=1,
            name='policy_acc',
            output_names=['policy_output'],
            label_names=['policy_label'])
    else:
        metrics_gluon['policy_loss'] = metric.create(
            cross_entropy,
            name='policy_loss',
            output_names=['policy_output'],
            label_names=['policy_label'])
        metrics_gluon['policy_acc'] = metric.create(
            acc_distribution,
            name='policy_acc',
            output_names=['policy_output'],
            label_names=['policy_label'])

    train_objects.metrics = metrics_gluon

    train_config.export_weights = False  # don't save intermediate weights
    train_agent = TrainerAgent(net,
                               val_data,
                               train_config,
                               train_objects,
                               use_rtpt=False)

    # iteration counter used for the momentum and learning rate schedule
    cur_it = train_config.k_steps_initial * train_config.batch_steps
    (k_steps_final, val_value_loss_final, val_policy_loss_final,
     val_value_acc_sign_final,
     val_policy_acc_final), _ = train_agent.train(cur_it)

    prefix = "%smodel-%.5f-%.5f-%.3f-%.3f" % (
        model_contender_dir, val_value_loss_final, val_policy_loss_final,
        val_value_acc_sign_final, val_policy_acc_final)

    sym_file = prefix + "-symbol.json"
    params_file = prefix + "-" + "%04d.params" % nn_update_idx

    # the export function saves both the architecture and the weights
    net.export(prefix, epoch=nn_update_idx)
    print()
    logging.info("Saved checkpoint to %s-%04d.params", prefix, nn_update_idx)

    if convert_to_onnx:
        convert_mxnet_model_to_onnx(sym_file, params_file,
                                    ["value_out_output", "policy_out_output"],
                                    input_shape, [1, 8, 16], False)

    logging.info("k_steps_final %d" % k_steps_final)
    queue.put(k_steps_final)
Beispiel #2
0
def update_network(queue, nn_update_idx, k_steps_initial, max_lr,
                   symbol_filename, params_filename, cwd, convert_to_onnx):
    """
    Creates a new NN checkpoint in the model contender directory after training using the game files stored in the
     training directory
    :param queue: Queue object used to return items
    :param k_steps_initial: Initial amount of steps of the NN update
    :param nn_update_idx: Defines how many updates of the nn has already been done. This index should be incremented
    after every update.
    :param max_lr: Maximum learning rate used for the learning rate schedule
    :param symbol_filename: Architecture definition file
    :param params_filename: Weight file which will be loaded before training
    Updates the neural network with the newly acquired games from the replay memory
    :param cwd: Current working directory (must end with "/")
    :param convert_to_onnx: Boolean indicating if the network shall be exported to ONNX to allow TensorRT inference
    :return: k_steps_final
    """

    # set the context on CPU, switch to GPU if there is one available (strongly recommended for training)
    ctx = mx.gpu(train_config["device_id"]
                 ) if train_config["context"] == "gpu" else mx.cpu()
    # set a specific seed value for reproducibility
    nb_parts = len(glob.glob(main_config["planes_train_dir"] + '**/*.zip'))
    logging.info("number parts: %d" % nb_parts)

    if nb_parts <= 0:
        raise Exception(
            'No .zip files for training available. Check the path in main_config["planes_train_dir"]:'
            ' %s' % main_config["planes_train_dir"])

    _, x_val, y_val_value, y_val_policy, _, _ = load_pgn_dataset(
        dataset_type="val",
        part_id=0,
        normalize=train_config["normalize"],
        verbose=False,
        q_value_ratio=train_config["q_value_ratio"])

    y_val_policy = prepare_policy(y_val_policy,
                                  train_config["select_policy_from_plane"],
                                  train_config["sparse_policy_label"])

    symbol = mx.sym.load(symbol_filename)
    if not train_config["sparse_policy_label"]:
        symbol = add_non_sparse_cross_entropy(
            symbol, train_config["val_loss_factor"],
            train_config["value_output"] + "_output",
            train_config["policy_output"] + "_output")

    # calculate how many iterations per epoch exist
    nb_it_per_epoch = (len(x_val) * nb_parts) // train_config["batch_size"]
    # one iteration is defined by passing 1 batch and doing backprop
    total_it = int(nb_it_per_epoch * train_config["nb_epochs"])

    lr_schedule = CosineAnnealingSchedule(train_config["min_lr"], max_lr,
                                          max(total_it * .7, 1))
    lr_schedule = LinearWarmUp(lr_schedule,
                               start_lr=train_config["min_lr"],
                               length=max(total_it * .25, 1))
    momentum_schedule = MomentumSchedule(lr_schedule, train_config["min_lr"],
                                         max_lr, train_config["min_momentum"],
                                         train_config["max_momentum"])

    if train_config["select_policy_from_plane"]:
        val_iter = mx.io.NDArrayIter({'data': x_val}, {
            'value_label': y_val_value,
            'policy_label': y_val_policy
        }, train_config["batch_size"])
    else:
        val_iter = mx.io.NDArrayIter({'data': x_val}, {
            'value_label': y_val_value,
            'policy_label': y_val_policy
        }, train_config["batch_size"])

    # calculate how many iterations per epoch exist
    nb_it_per_epoch = (len(x_val) * nb_parts) // train_config["batch_size"]
    # one iteration is defined by passing 1 batch and doing backprop
    total_it = int(nb_it_per_epoch * train_config["nb_epochs"])

    input_shape = x_val[0].shape
    model = mx.mod.Module(symbol=symbol,
                          context=ctx,
                          label_names=['value_label', 'policy_label'])
    # mx.viz.print_summary(
    #     symbol,
    #     shape={'data': (1, input_shape[0], input_shape[1], input_shape[2])},
    # )
    model.bind(for_training=True,
               data_shapes=[('data',
                             (train_config["batch_size"], input_shape[0],
                              input_shape[1], input_shape[2]))],
               label_shapes=val_iter.provide_label)
    model.load_params(params_filename)

    metrics = [
        mx.metric.MSE(name='value_loss',
                      output_names=['value_output'],
                      label_names=['value_label']),
        mx.metric.create(acc_sign,
                         name='value_acc_sign',
                         output_names=['value_output'],
                         label_names=['value_label']),
    ]

    if train_config["sparse_policy_label"]:
        print("train with sparse labels")
        # the default cross entropy only supports sparse labels
        metrics.append(
            mx.metric.Accuracy(axis=1,
                               name='policy_acc',
                               output_names=['policy_output'],
                               label_names=['policy_label']))
        metrics.append(
            mx.metric.CrossEntropy(name='policy_loss',
                                   output_names=['policy_output'],
                                   label_names=['policy_label']))
    else:
        metrics.append(
            mx.metric.create(acc_distribution,
                             name='policy_acc',
                             output_names=['policy_output'],
                             label_names=['policy_label']))
        metrics.append(
            mx.metric.create(cross_entropy,
                             name='policy_loss',
                             output_names=['policy_output'],
                             label_names=['policy_label']))

    logging.info("Performance pre training")
    logging.info(model.score(val_iter, metrics))

    train_agent = TrainerAgentMXNET(
        model,
        symbol,
        val_iter,
        nb_parts,
        lr_schedule,
        momentum_schedule,
        total_it,
        train_config["optimizer_name"],
        wd=train_config["wd"],
        batch_steps=train_config["batch_steps"],
        k_steps_initial=k_steps_initial,
        cpu_count=train_config["cpu_count"],
        batch_size=train_config["batch_size"],
        normalize=train_config["normalize"],
        export_weights=train_config["export_weights"],
        export_grad_histograms=train_config["export_grad_histograms"],
        log_metrics_to_tensorboard=train_config["log_metrics_to_tensorboard"],
        ctx=ctx,
        metrics=metrics,
        use_spike_recovery=train_config["use_spike_recovery"],
        max_spikes=train_config["max_spikes"],
        spike_thresh=train_config["spike_thresh"],
        seed=None,
        val_loss_factor=train_config["val_loss_factor"],
        policy_loss_factor=train_config["policy_loss_factor"],
        select_policy_from_plane=train_config["select_policy_from_plane"],
        discount=train_config["discount"],
        sparse_policy_label=train_config["sparse_policy_label"],
        q_value_ratio=train_config["q_value_ratio"],
        cwd=cwd)
    # iteration counter used for the momentum and learning rate schedule
    cur_it = train_config["k_steps_initial"] * train_config["batch_steps"]
    (k_steps_final, val_value_loss_final, val_policy_loss_final,
     val_value_acc_sign_final,
     val_policy_acc_final), _ = train_agent.train(cur_it)

    if not train_config["sparse_policy_label"]:
        symbol = remove_no_sparse_cross_entropy(
            symbol, train_config["val_loss_factor"],
            train_config["value_output"] + "_output",
            train_config["policy_output"] + "_output")
    prefix = cwd + "model_contender/model-%.5f-%.5f-%.3f-%.3f" % (
        val_value_loss_final, val_policy_loss_final, val_value_acc_sign_final,
        val_policy_acc_final)

    sym_file = prefix + "-symbol.json"
    params_file = prefix + "-" + "%04d.params" % nn_update_idx
    symbol.save(sym_file)
    model.save_params(params_file)

    if convert_to_onnx:
        convert_mxnet_model_to_onnx(sym_file, params_file,
                                    ["value_out_output", "policy_out_output"],
                                    input_shape, [1, 8, 16], False)

    logging.info("k_steps_final %d" % k_steps_final)
    queue.put(k_steps_final)
Beispiel #3
0
def update_network(queue, nn_update_idx, symbol_filename, params_filename, tar_filename, convert_to_onnx, main_config, train_config: TrainConfig, model_contender_dir):
    """
    Creates a new NN checkpoint in the model contender directory after training using the game files stored in the
     training directory
    :param queue: Queue object used to return items
    :param nn_update_idx: Defines how many updates of the nn has already been done. This index should be incremented
    after every update.
    :param symbol_filename: Architecture definition file
    :param params_filename: Weight file which will be loaded before training
    :param tar_filename: Filepath to the model for pytorch
    Updates the neural network with the newly acquired games from the replay memory
    :param convert_to_onnx: Boolean indicating if the network shall be exported to ONNX to allow TensorRT inference
    :param main_config: Dict of the main_config (imported from main_config.py)
    :param train_config: Dict of the train_config (imported from train_config.py)
    :param model_contender_dir: String of the contender directory path
    :return: k_steps_final
    """

    # set the context on CPU, switch to GPU if there is one available (strongly recommended for training)
    ctx = mx.gpu(train_config.device_id) if train_config.context == "gpu" else mx.cpu()
    # set a specific seed value for reproducibility
    train_config.nb_parts = len(glob.glob(main_config["planes_train_dir"] + '**/*.zip'))
    logging.info("number parts for training: %d" % train_config.nb_parts)
    train_objects = TrainObjects()

    if train_config.nb_parts <= 0:
        raise Exception('No .zip files for training available. Check the path in main_config["planes_train_dir"]:'
                        ' %s' % main_config["planes_train_dir"])

    val_data, x_val = _get_val_loader(train_config)

    input_shape = x_val[0].shape
    # calculate how many iterations per epoch exist
    nb_it_per_epoch = (len(x_val) * train_config.nb_parts) // train_config.batch_size
    # one iteration is defined by passing 1 batch and doing backprop
    train_config.total_it = int(nb_it_per_epoch * train_config.nb_training_epochs)

    train_objects.lr_schedule = CosineAnnealingSchedule(train_config.min_lr, train_config.max_lr, max(train_config.total_it * .7, 1))
    train_objects.lr_schedule = LinearWarmUp(train_objects.lr_schedule, start_lr=train_config.min_lr, length=max(train_config.total_it * .25, 1))
    train_objects.momentum_schedule = MomentumSchedule(train_objects.lr_schedule, train_config.min_lr, train_config.max_lr,
                                         train_config.min_momentum, train_config.max_momentum)

    net = _get_net(ctx, input_shape, main_config, params_filename, symbol_filename, tar_filename, train_config)

    train_objects.metrics = get_metrics(train_config)

    train_config.export_weights = True  # save intermediate results to handle spikes
    if train_config.framework == 'gluon':
        train_agent = TrainerAgentGluon(net, val_data, train_config, train_objects, use_rtpt=False)
    elif train_config.framework == 'pytorch':
        train_agent = TrainerAgentPytorch(net, val_data, train_config, train_objects, use_rtpt=False)

    # iteration counter used for the momentum and learning rate schedule
    cur_it = train_config.k_steps_initial * train_config.batch_steps
    (k_steps_final, val_value_loss_final, val_policy_loss_final, val_value_acc_sign_final,
     val_policy_acc_final), (_, _) = train_agent.train(cur_it)
    prefix = "%smodel-%.5f-%.5f-%.3f-%.3f" % (model_contender_dir, val_value_loss_final, val_policy_loss_final,
                                                                   val_value_acc_sign_final, val_policy_acc_final)

    sym_file = prefix + "-symbol.json"
    params_file = prefix + "-" + "%04d.params" % nn_update_idx

    _export_net(convert_to_onnx, input_shape, k_steps_final, net, nn_update_idx, params_file, prefix, sym_file,
                train_config, model_contender_dir)

    logging.info("k_steps_final %d" % k_steps_final)
    queue.put(k_steps_final)