def update_network(queue, nn_update_idx, symbol_filename, params_filename, convert_to_onnx, main_config, train_config: TrainConfig, model_contender_dir): """ Creates a new NN checkpoint in the model contender directory after training using the game files stored in the training directory :param queue: Queue object used to return items :param nn_update_idx: Defines how many updates of the nn has already been done. This index should be incremented after every update. :param symbol_filename: Architecture definition file :param params_filename: Weight file which will be loaded before training Updates the neural network with the newly acquired games from the replay memory :param convert_to_onnx: Boolean indicating if the network shall be exported to ONNX to allow TensorRT inference :param main_config: Dict of the main_config (imported from main_config.py) :param train_config: Dict of the train_config (imported from train_config.py) :param model_contender_dir: String of the contender directory path :return: k_steps_final """ # set the context on CPU, switch to GPU if there is one available (strongly recommended for training) ctx = mx.gpu( train_config.device_id) if train_config.context == "gpu" else mx.cpu() # set a specific seed value for reproducibility train_config.nb_parts = len( glob.glob(main_config["planes_train_dir"] + '**/*.zip')) logging.info("number parts for training: %d" % train_config.nb_parts) train_objects = TrainObjects() if train_config.nb_parts <= 0: raise Exception( 'No .zip files for training available. Check the path in main_config["planes_train_dir"]:' ' %s' % main_config["planes_train_dir"]) _, x_val, y_val_value, y_val_policy, _, _ = load_pgn_dataset( dataset_type="val", part_id=0, normalize=train_config.normalize, verbose=False, q_value_ratio=train_config.q_value_ratio) y_val_policy = prepare_policy(y_val_policy, train_config.select_policy_from_plane, train_config.sparse_policy_label, train_config.is_policy_from_plane_data) val_dataset = gluon.data.ArrayDataset(nd.array(x_val), nd.array(y_val_value), nd.array(y_val_policy)) val_data = gluon.data.DataLoader(val_dataset, train_config.batch_size, shuffle=False, num_workers=train_config.cpu_count) symbol = mx.sym.load(symbol_filename) # calculate how many iterations per epoch exist nb_it_per_epoch = (len(x_val) * train_config.nb_parts) // train_config.batch_size # one iteration is defined by passing 1 batch and doing backprop train_config.total_it = int(nb_it_per_epoch * train_config.nb_training_epochs) train_objects.lr_schedule = CosineAnnealingSchedule( train_config.min_lr, train_config.max_lr, max(train_config.total_it * .7, 1)) train_objects.lr_schedule = LinearWarmUp(train_objects.lr_schedule, start_lr=train_config.min_lr, length=max( train_config.total_it * .25, 1)) train_objects.momentum_schedule = MomentumSchedule( train_objects.lr_schedule, train_config.min_lr, train_config.max_lr, train_config.min_momentum, train_config.max_momentum) input_shape = x_val[0].shape inputs = mx.sym.var('data', dtype='float32') value_out = symbol.get_internals()[main_config['value_output'] + '_output'] policy_out = symbol.get_internals()[main_config['policy_output'] + '_output'] sym = mx.symbol.Group([value_out, policy_out]) net = mx.gluon.SymbolBlock(sym, inputs) net.collect_params().load(params_filename, ctx) metrics_gluon = { 'value_loss': metric.MSE(name='value_loss', output_names=['value_output']), 'value_acc_sign': metric.create(acc_sign, name='value_acc_sign', output_names=['value_output'], label_names=['value_label']), } if train_config.sparse_policy_label: print("train with sparse labels") # the default cross entropy only supports sparse labels metrics_gluon['policy_loss'] = metric.CrossEntropy( name='policy_loss', output_names=['policy_output'], label_names=['policy_label']), metrics_gluon['policy_acc'] = metric.Accuracy( axis=1, name='policy_acc', output_names=['policy_output'], label_names=['policy_label']) else: metrics_gluon['policy_loss'] = metric.create( cross_entropy, name='policy_loss', output_names=['policy_output'], label_names=['policy_label']) metrics_gluon['policy_acc'] = metric.create( acc_distribution, name='policy_acc', output_names=['policy_output'], label_names=['policy_label']) train_objects.metrics = metrics_gluon train_config.export_weights = False # don't save intermediate weights train_agent = TrainerAgent(net, val_data, train_config, train_objects, use_rtpt=False) # iteration counter used for the momentum and learning rate schedule cur_it = train_config.k_steps_initial * train_config.batch_steps (k_steps_final, val_value_loss_final, val_policy_loss_final, val_value_acc_sign_final, val_policy_acc_final), _ = train_agent.train(cur_it) prefix = "%smodel-%.5f-%.5f-%.3f-%.3f" % ( model_contender_dir, val_value_loss_final, val_policy_loss_final, val_value_acc_sign_final, val_policy_acc_final) sym_file = prefix + "-symbol.json" params_file = prefix + "-" + "%04d.params" % nn_update_idx # the export function saves both the architecture and the weights net.export(prefix, epoch=nn_update_idx) print() logging.info("Saved checkpoint to %s-%04d.params", prefix, nn_update_idx) if convert_to_onnx: convert_mxnet_model_to_onnx(sym_file, params_file, ["value_out_output", "policy_out_output"], input_shape, [1, 8, 16], False) logging.info("k_steps_final %d" % k_steps_final) queue.put(k_steps_final)
def update_network(queue, nn_update_idx, k_steps_initial, max_lr, symbol_filename, params_filename, cwd, convert_to_onnx): """ Creates a new NN checkpoint in the model contender directory after training using the game files stored in the training directory :param queue: Queue object used to return items :param k_steps_initial: Initial amount of steps of the NN update :param nn_update_idx: Defines how many updates of the nn has already been done. This index should be incremented after every update. :param max_lr: Maximum learning rate used for the learning rate schedule :param symbol_filename: Architecture definition file :param params_filename: Weight file which will be loaded before training Updates the neural network with the newly acquired games from the replay memory :param cwd: Current working directory (must end with "/") :param convert_to_onnx: Boolean indicating if the network shall be exported to ONNX to allow TensorRT inference :return: k_steps_final """ # set the context on CPU, switch to GPU if there is one available (strongly recommended for training) ctx = mx.gpu(train_config["device_id"] ) if train_config["context"] == "gpu" else mx.cpu() # set a specific seed value for reproducibility nb_parts = len(glob.glob(main_config["planes_train_dir"] + '**/*.zip')) logging.info("number parts: %d" % nb_parts) if nb_parts <= 0: raise Exception( 'No .zip files for training available. Check the path in main_config["planes_train_dir"]:' ' %s' % main_config["planes_train_dir"]) _, x_val, y_val_value, y_val_policy, _, _ = load_pgn_dataset( dataset_type="val", part_id=0, normalize=train_config["normalize"], verbose=False, q_value_ratio=train_config["q_value_ratio"]) y_val_policy = prepare_policy(y_val_policy, train_config["select_policy_from_plane"], train_config["sparse_policy_label"]) symbol = mx.sym.load(symbol_filename) if not train_config["sparse_policy_label"]: symbol = add_non_sparse_cross_entropy( symbol, train_config["val_loss_factor"], train_config["value_output"] + "_output", train_config["policy_output"] + "_output") # calculate how many iterations per epoch exist nb_it_per_epoch = (len(x_val) * nb_parts) // train_config["batch_size"] # one iteration is defined by passing 1 batch and doing backprop total_it = int(nb_it_per_epoch * train_config["nb_epochs"]) lr_schedule = CosineAnnealingSchedule(train_config["min_lr"], max_lr, max(total_it * .7, 1)) lr_schedule = LinearWarmUp(lr_schedule, start_lr=train_config["min_lr"], length=max(total_it * .25, 1)) momentum_schedule = MomentumSchedule(lr_schedule, train_config["min_lr"], max_lr, train_config["min_momentum"], train_config["max_momentum"]) if train_config["select_policy_from_plane"]: val_iter = mx.io.NDArrayIter({'data': x_val}, { 'value_label': y_val_value, 'policy_label': y_val_policy }, train_config["batch_size"]) else: val_iter = mx.io.NDArrayIter({'data': x_val}, { 'value_label': y_val_value, 'policy_label': y_val_policy }, train_config["batch_size"]) # calculate how many iterations per epoch exist nb_it_per_epoch = (len(x_val) * nb_parts) // train_config["batch_size"] # one iteration is defined by passing 1 batch and doing backprop total_it = int(nb_it_per_epoch * train_config["nb_epochs"]) input_shape = x_val[0].shape model = mx.mod.Module(symbol=symbol, context=ctx, label_names=['value_label', 'policy_label']) # mx.viz.print_summary( # symbol, # shape={'data': (1, input_shape[0], input_shape[1], input_shape[2])}, # ) model.bind(for_training=True, data_shapes=[('data', (train_config["batch_size"], input_shape[0], input_shape[1], input_shape[2]))], label_shapes=val_iter.provide_label) model.load_params(params_filename) metrics = [ mx.metric.MSE(name='value_loss', output_names=['value_output'], label_names=['value_label']), mx.metric.create(acc_sign, name='value_acc_sign', output_names=['value_output'], label_names=['value_label']), ] if train_config["sparse_policy_label"]: print("train with sparse labels") # the default cross entropy only supports sparse labels metrics.append( mx.metric.Accuracy(axis=1, name='policy_acc', output_names=['policy_output'], label_names=['policy_label'])) metrics.append( mx.metric.CrossEntropy(name='policy_loss', output_names=['policy_output'], label_names=['policy_label'])) else: metrics.append( mx.metric.create(acc_distribution, name='policy_acc', output_names=['policy_output'], label_names=['policy_label'])) metrics.append( mx.metric.create(cross_entropy, name='policy_loss', output_names=['policy_output'], label_names=['policy_label'])) logging.info("Performance pre training") logging.info(model.score(val_iter, metrics)) train_agent = TrainerAgentMXNET( model, symbol, val_iter, nb_parts, lr_schedule, momentum_schedule, total_it, train_config["optimizer_name"], wd=train_config["wd"], batch_steps=train_config["batch_steps"], k_steps_initial=k_steps_initial, cpu_count=train_config["cpu_count"], batch_size=train_config["batch_size"], normalize=train_config["normalize"], export_weights=train_config["export_weights"], export_grad_histograms=train_config["export_grad_histograms"], log_metrics_to_tensorboard=train_config["log_metrics_to_tensorboard"], ctx=ctx, metrics=metrics, use_spike_recovery=train_config["use_spike_recovery"], max_spikes=train_config["max_spikes"], spike_thresh=train_config["spike_thresh"], seed=None, val_loss_factor=train_config["val_loss_factor"], policy_loss_factor=train_config["policy_loss_factor"], select_policy_from_plane=train_config["select_policy_from_plane"], discount=train_config["discount"], sparse_policy_label=train_config["sparse_policy_label"], q_value_ratio=train_config["q_value_ratio"], cwd=cwd) # iteration counter used for the momentum and learning rate schedule cur_it = train_config["k_steps_initial"] * train_config["batch_steps"] (k_steps_final, val_value_loss_final, val_policy_loss_final, val_value_acc_sign_final, val_policy_acc_final), _ = train_agent.train(cur_it) if not train_config["sparse_policy_label"]: symbol = remove_no_sparse_cross_entropy( symbol, train_config["val_loss_factor"], train_config["value_output"] + "_output", train_config["policy_output"] + "_output") prefix = cwd + "model_contender/model-%.5f-%.5f-%.3f-%.3f" % ( val_value_loss_final, val_policy_loss_final, val_value_acc_sign_final, val_policy_acc_final) sym_file = prefix + "-symbol.json" params_file = prefix + "-" + "%04d.params" % nn_update_idx symbol.save(sym_file) model.save_params(params_file) if convert_to_onnx: convert_mxnet_model_to_onnx(sym_file, params_file, ["value_out_output", "policy_out_output"], input_shape, [1, 8, 16], False) logging.info("k_steps_final %d" % k_steps_final) queue.put(k_steps_final)
def update_network(queue, nn_update_idx, symbol_filename, params_filename, tar_filename, convert_to_onnx, main_config, train_config: TrainConfig, model_contender_dir): """ Creates a new NN checkpoint in the model contender directory after training using the game files stored in the training directory :param queue: Queue object used to return items :param nn_update_idx: Defines how many updates of the nn has already been done. This index should be incremented after every update. :param symbol_filename: Architecture definition file :param params_filename: Weight file which will be loaded before training :param tar_filename: Filepath to the model for pytorch Updates the neural network with the newly acquired games from the replay memory :param convert_to_onnx: Boolean indicating if the network shall be exported to ONNX to allow TensorRT inference :param main_config: Dict of the main_config (imported from main_config.py) :param train_config: Dict of the train_config (imported from train_config.py) :param model_contender_dir: String of the contender directory path :return: k_steps_final """ # set the context on CPU, switch to GPU if there is one available (strongly recommended for training) ctx = mx.gpu(train_config.device_id) if train_config.context == "gpu" else mx.cpu() # set a specific seed value for reproducibility train_config.nb_parts = len(glob.glob(main_config["planes_train_dir"] + '**/*.zip')) logging.info("number parts for training: %d" % train_config.nb_parts) train_objects = TrainObjects() if train_config.nb_parts <= 0: raise Exception('No .zip files for training available. Check the path in main_config["planes_train_dir"]:' ' %s' % main_config["planes_train_dir"]) val_data, x_val = _get_val_loader(train_config) input_shape = x_val[0].shape # calculate how many iterations per epoch exist nb_it_per_epoch = (len(x_val) * train_config.nb_parts) // train_config.batch_size # one iteration is defined by passing 1 batch and doing backprop train_config.total_it = int(nb_it_per_epoch * train_config.nb_training_epochs) train_objects.lr_schedule = CosineAnnealingSchedule(train_config.min_lr, train_config.max_lr, max(train_config.total_it * .7, 1)) train_objects.lr_schedule = LinearWarmUp(train_objects.lr_schedule, start_lr=train_config.min_lr, length=max(train_config.total_it * .25, 1)) train_objects.momentum_schedule = MomentumSchedule(train_objects.lr_schedule, train_config.min_lr, train_config.max_lr, train_config.min_momentum, train_config.max_momentum) net = _get_net(ctx, input_shape, main_config, params_filename, symbol_filename, tar_filename, train_config) train_objects.metrics = get_metrics(train_config) train_config.export_weights = True # save intermediate results to handle spikes if train_config.framework == 'gluon': train_agent = TrainerAgentGluon(net, val_data, train_config, train_objects, use_rtpt=False) elif train_config.framework == 'pytorch': train_agent = TrainerAgentPytorch(net, val_data, train_config, train_objects, use_rtpt=False) # iteration counter used for the momentum and learning rate schedule cur_it = train_config.k_steps_initial * train_config.batch_steps (k_steps_final, val_value_loss_final, val_policy_loss_final, val_value_acc_sign_final, val_policy_acc_final), (_, _) = train_agent.train(cur_it) prefix = "%smodel-%.5f-%.5f-%.3f-%.3f" % (model_contender_dir, val_value_loss_final, val_policy_loss_final, val_value_acc_sign_final, val_policy_acc_final) sym_file = prefix + "-symbol.json" params_file = prefix + "-" + "%04d.params" % nn_update_idx _export_net(convert_to_onnx, input_shape, k_steps_final, net, nn_update_idx, params_file, prefix, sym_file, train_config, model_contender_dir) logging.info("k_steps_final %d" % k_steps_final) queue.put(k_steps_final)