Beispiel #1
0
def _get_val_loader(train_config):
    """
    Returns the validation loader and x-Data object.
    """
    _, x_val, y_val_value, y_val_policy, plys_to_end, _ = load_pgn_dataset(dataset_type="val",
                                                                           part_id=0,
                                                                           normalize=train_config.normalize,
                                                                           verbose=False,
                                                                           q_value_ratio=train_config.q_value_ratio)
    y_val_policy = prepare_policy(y_val_policy, train_config.select_policy_from_plane,
                                  train_config.sparse_policy_label, train_config.is_policy_from_plane_data)
    if train_config.framework == 'gluon':
        if train_config.use_wdl and train_config.use_plys_to_end:
            val_dataset = gluon.data.ArrayDataset(nd.array(x_val), nd.array(y_val_value), nd.array(y_val_policy),
                                                  nd.array(value_to_wdl_label(y_val_value)),
                                                  nd.array(prepare_plys_label(plys_to_end)))
        else:
            val_dataset = gluon.data.ArrayDataset(nd.array(x_val), nd.array(y_val_value), nd.array(y_val_policy))
        val_data = gluon.data.DataLoader(val_dataset, train_config.batch_size, shuffle=False,
                                         num_workers=train_config.cpu_count)
    elif train_config.framework == 'pytorch':
        if train_config.use_wdl and train_config.use_wdl:
            val_dataset = TensorDataset(torch.Tensor(x_val), torch.Tensor(y_val_value),
                                        torch.Tensor(y_val_policy),
                                        torch.Tensor(value_to_wdl_label(y_val_value)),
                                        torch.Tensor(prepare_plys_label(plys_to_end)))
        else:
            val_dataset = TensorDataset(torch.Tensor(x_val), torch.Tensor(y_val_value),
                                        torch.Tensor(y_val_policy))
        val_data = DataLoader(val_dataset, shuffle=True, batch_size=train_config.batch_size,
                              num_workers=train_config.cpu_count)
    return val_data, x_val
Beispiel #2
0
    def _get_train_loader(self, part_id):
        # load one chunk of the dataset from memory
        _, self.x_train, self.yv_train, self.yp_train, self.plys_to_end, _ = load_pgn_dataset(
            dataset_type="train",
            part_id=part_id,
            normalize=self.tc.normalize,
            verbose=False,
            q_value_ratio=self.tc.q_value_ratio)
        self.yp_train = prepare_policy(
            y_policy=self.yp_train,
            select_policy_from_plane=self.tc.select_policy_from_plane,
            sparse_policy_label=self.tc.sparse_policy_label,
            is_policy_from_plane_data=self.tc.is_policy_from_plane_data)

        # update the train_data object
        if self.tc.use_wdl and self.tc.use_plys_to_end:
            train_dataset = TensorDataset(
                torch.Tensor(self.x_train), torch.Tensor(self.yv_train),
                torch.Tensor(self.yp_train),
                torch.Tensor(value_to_wdl_label(self.yv_train)),
                torch.Tensor(prepare_plys_label(self.plys_to_end)))
        else:
            train_dataset = TensorDataset(torch.Tensor(self.x_train),
                                          torch.Tensor(self.yv_train),
                                          torch.Tensor(self.yp_train))
        train_loader = DataLoader(train_dataset,
                                  shuffle=True,
                                  batch_size=self.tc.batch_size,
                                  num_workers=self.tc.cpu_count)
        return train_loader
Beispiel #3
0
    def convert_all_planes_to_rec(self):
        """
        Converts all part files from the via load_pgn_dataset() to a single .rec file

        :return:
        """

        # we must add '**/*' because we want to go into the time stamp directory
        plane_files = glob(self._import_dir + "**/*")

        # construct the export filepaths
        idx_filepath = "%s%s" % (self._export_dir, self._dataset_type + ".idx")
        rec_filepath = "%s%s" % (self._export_dir, self._dataset_type + ".rec")

        # create both an '.idx' and '.rec' file
        # the '.idx' file stores the indices to the string buffers
        # the '.rec' files stores the planes in a compressed binary string buffer format
        record = mx.recordio.MXIndexedRecordIO(idx_filepath, rec_filepath, "w")

        nb_parts = len(plane_files)
        idx = 0
        for part_id in range(nb_parts):

            t_s = time()

            logging.info("PART: %d", part_id)
            # load one chunk of the dataset from memory
            s_ids_train, x, yv, yp, pgn_datasets = load_pgn_dataset(
                dataset_type=self._dataset_type,
                part_id=part_id,
                print_statistics=True,
                print_parameters=False,
                normalize=False,
            )

            # iterate over all board states aka. data samples in the file
            for position, value in enumerate(x):
                data = value.flatten()
                buf = zlib.compress(data.tobytes())
                # we only store the integer idx of the highest output
                header = mx.recordio.IRHeader(
                    0, [yv[position], yp[position].argmax()], idx, 0)
                s = mx.recordio.pack(header, buf)
                record.write_idx(idx, s)
                idx += 1

            # log the elapsed time for a single dataset part file
            logging.debug("elapsed time %.2fs", (time() - t_s))

        # close the record file
        record.close()

        logging.debug("created %s sucessfully", idx_filepath)
        logging.debug("created %s sucessfully", rec_filepath)
Beispiel #4
0
    def test_loaded_dataset_black_move(self):
        """
        Loads the dataset file and checks the first move policy vector for black for correctness
        :return:
        """
        _, _, _, yp_val, _ = load_pgn_dataset(dataset_type="test",
                                              part_id=0,
                                              print_statistics=True,
                                              print_parameters=True,
                                              normalize=True)

        board = chess.variant.CrazyhouseBoard()
        # push a dummy move
        board.push_uci("e2e4")

        mv_conv0 = policy_to_move(yp_val[1], is_white_to_move=False)
        mv_conv1, prob = policy_to_best_move(board, yp_val[1])

        self.assertEqual(prob,
                         1,
                         msg="The policy vector has to be one hot encoded.")

        selected_moves, move_probabilities = policy_to_moves(board, yp_val[1])
        mv_conv2 = selected_moves[0]

        self.assertGreater(move_probabilities[0],
                           0,
                           msg="The move probability must be greater 0")
        self.assertEqual(move_probabilities[0],
                         1,
                         msg="The policy vector has to be one hot encoded.")

        converted_moves = [mv_conv0, mv_conv1, mv_conv2]

        for mv_converted in converted_moves:
            mv_converted_is_legal = False

            # check if the move is legal in the starting position
            for move in board.legal_moves:
                if move == mv_converted:
                    mv_converted_is_legal = True

            self.assertTrue(
                mv_converted_is_legal,
                msg=
                "Convert move %s is not a legal move in the starting position for BLACK"
                % mv_converted.uci(),
            )
Beispiel #5
0
    def custom_metric_eval(self):
        """
        Evaluates the model based on the validation set of different variants
        """

        if self.to.variant_metrics is None:
            return

        for part_id, variant_name in enumerate(self.to.variant_metrics):
            # load one chunk of the dataset from memory
            _, x_val, yv_val, yp_val, _, _ = load_pgn_dataset(
                dataset_type="val",
                part_id=part_id,
                normalize=self.tc.normalize,
                verbose=False,
                q_value_ratio=self.tc.q_value_ratio)

            if self.tc.select_policy_from_plane:
                val_iter = mx.io.NDArrayIter({'data': x_val}, {
                    'value_label':
                    yv_val,
                    'policy_label':
                    np.array(FLAT_PLANE_IDX)[yp_val.argmax(axis=1)]
                }, self.tc.batch_size)
            else:
                val_iter = mx.io.NDArrayIter(
                    {'data': x_val}, {
                        'value_label': yv_val,
                        'policy_label': yp_val.argmax(axis=1)
                    }, self.tc.batch_size)

            results = self._model.score(val_iter, self.to.metrics)
            prefix = "val_"

            for entry in results:
                name = variant_name + "_" + entry[0]
                value = entry[1]
                print(" - %s%s: %.4f" % (prefix, name, value), end="")
                # add the metrics to the tensorboard event file
                if self.tc.log_metrics_to_tensorboard:
                    self.sum_writer.add_scalar(
                        name, [prefix.replace("_", ""), value], self.k_steps)
        print()
Beispiel #6
0
def update_network(queue, nn_update_idx, k_steps_initial, max_lr,
                   symbol_filename, params_filename, cwd, convert_to_onnx):
    """
    Creates a new NN checkpoint in the model contender directory after training using the game files stored in the
     training directory
    :param queue: Queue object used to return items
    :param k_steps_initial: Initial amount of steps of the NN update
    :param nn_update_idx: Defines how many updates of the nn has already been done. This index should be incremented
    after every update.
    :param max_lr: Maximum learning rate used for the learning rate schedule
    :param symbol_filename: Architecture definition file
    :param params_filename: Weight file which will be loaded before training
    Updates the neural network with the newly acquired games from the replay memory
    :param cwd: Current working directory (must end with "/")
    :param convert_to_onnx: Boolean indicating if the network shall be exported to ONNX to allow TensorRT inference
    :return: k_steps_final
    """

    # set the context on CPU, switch to GPU if there is one available (strongly recommended for training)
    ctx = mx.gpu(train_config["device_id"]
                 ) if train_config["context"] == "gpu" else mx.cpu()
    # set a specific seed value for reproducibility
    nb_parts = len(glob.glob(main_config["planes_train_dir"] + '**/*.zip'))
    logging.info("number parts: %d" % nb_parts)

    if nb_parts <= 0:
        raise Exception(
            'No .zip files for training available. Check the path in main_config["planes_train_dir"]:'
            ' %s' % main_config["planes_train_dir"])

    _, x_val, y_val_value, y_val_policy, _, _ = load_pgn_dataset(
        dataset_type="val",
        part_id=0,
        normalize=train_config["normalize"],
        verbose=False,
        q_value_ratio=train_config["q_value_ratio"])

    y_val_policy = prepare_policy(y_val_policy,
                                  train_config["select_policy_from_plane"],
                                  train_config["sparse_policy_label"])

    symbol = mx.sym.load(symbol_filename)
    if not train_config["sparse_policy_label"]:
        symbol = add_non_sparse_cross_entropy(
            symbol, train_config["val_loss_factor"],
            train_config["value_output"] + "_output",
            train_config["policy_output"] + "_output")

    # calculate how many iterations per epoch exist
    nb_it_per_epoch = (len(x_val) * nb_parts) // train_config["batch_size"]
    # one iteration is defined by passing 1 batch and doing backprop
    total_it = int(nb_it_per_epoch * train_config["nb_epochs"])

    lr_schedule = CosineAnnealingSchedule(train_config["min_lr"], max_lr,
                                          max(total_it * .7, 1))
    lr_schedule = LinearWarmUp(lr_schedule,
                               start_lr=train_config["min_lr"],
                               length=max(total_it * .25, 1))
    momentum_schedule = MomentumSchedule(lr_schedule, train_config["min_lr"],
                                         max_lr, train_config["min_momentum"],
                                         train_config["max_momentum"])

    if train_config["select_policy_from_plane"]:
        val_iter = mx.io.NDArrayIter({'data': x_val}, {
            'value_label': y_val_value,
            'policy_label': y_val_policy
        }, train_config["batch_size"])
    else:
        val_iter = mx.io.NDArrayIter({'data': x_val}, {
            'value_label': y_val_value,
            'policy_label': y_val_policy
        }, train_config["batch_size"])

    # calculate how many iterations per epoch exist
    nb_it_per_epoch = (len(x_val) * nb_parts) // train_config["batch_size"]
    # one iteration is defined by passing 1 batch and doing backprop
    total_it = int(nb_it_per_epoch * train_config["nb_epochs"])

    input_shape = x_val[0].shape
    model = mx.mod.Module(symbol=symbol,
                          context=ctx,
                          label_names=['value_label', 'policy_label'])
    # mx.viz.print_summary(
    #     symbol,
    #     shape={'data': (1, input_shape[0], input_shape[1], input_shape[2])},
    # )
    model.bind(for_training=True,
               data_shapes=[('data',
                             (train_config["batch_size"], input_shape[0],
                              input_shape[1], input_shape[2]))],
               label_shapes=val_iter.provide_label)
    model.load_params(params_filename)

    metrics = [
        mx.metric.MSE(name='value_loss',
                      output_names=['value_output'],
                      label_names=['value_label']),
        mx.metric.create(acc_sign,
                         name='value_acc_sign',
                         output_names=['value_output'],
                         label_names=['value_label']),
    ]

    if train_config["sparse_policy_label"]:
        print("train with sparse labels")
        # the default cross entropy only supports sparse labels
        metrics.append(
            mx.metric.Accuracy(axis=1,
                               name='policy_acc',
                               output_names=['policy_output'],
                               label_names=['policy_label']))
        metrics.append(
            mx.metric.CrossEntropy(name='policy_loss',
                                   output_names=['policy_output'],
                                   label_names=['policy_label']))
    else:
        metrics.append(
            mx.metric.create(acc_distribution,
                             name='policy_acc',
                             output_names=['policy_output'],
                             label_names=['policy_label']))
        metrics.append(
            mx.metric.create(cross_entropy,
                             name='policy_loss',
                             output_names=['policy_output'],
                             label_names=['policy_label']))

    logging.info("Performance pre training")
    logging.info(model.score(val_iter, metrics))

    train_agent = TrainerAgentMXNET(
        model,
        symbol,
        val_iter,
        nb_parts,
        lr_schedule,
        momentum_schedule,
        total_it,
        train_config["optimizer_name"],
        wd=train_config["wd"],
        batch_steps=train_config["batch_steps"],
        k_steps_initial=k_steps_initial,
        cpu_count=train_config["cpu_count"],
        batch_size=train_config["batch_size"],
        normalize=train_config["normalize"],
        export_weights=train_config["export_weights"],
        export_grad_histograms=train_config["export_grad_histograms"],
        log_metrics_to_tensorboard=train_config["log_metrics_to_tensorboard"],
        ctx=ctx,
        metrics=metrics,
        use_spike_recovery=train_config["use_spike_recovery"],
        max_spikes=train_config["max_spikes"],
        spike_thresh=train_config["spike_thresh"],
        seed=None,
        val_loss_factor=train_config["val_loss_factor"],
        policy_loss_factor=train_config["policy_loss_factor"],
        select_policy_from_plane=train_config["select_policy_from_plane"],
        discount=train_config["discount"],
        sparse_policy_label=train_config["sparse_policy_label"],
        q_value_ratio=train_config["q_value_ratio"],
        cwd=cwd)
    # iteration counter used for the momentum and learning rate schedule
    cur_it = train_config["k_steps_initial"] * train_config["batch_steps"]
    (k_steps_final, val_value_loss_final, val_policy_loss_final,
     val_value_acc_sign_final,
     val_policy_acc_final), _ = train_agent.train(cur_it)

    if not train_config["sparse_policy_label"]:
        symbol = remove_no_sparse_cross_entropy(
            symbol, train_config["val_loss_factor"],
            train_config["value_output"] + "_output",
            train_config["policy_output"] + "_output")
    prefix = cwd + "model_contender/model-%.5f-%.5f-%.3f-%.3f" % (
        val_value_loss_final, val_policy_loss_final, val_value_acc_sign_final,
        val_policy_acc_final)

    sym_file = prefix + "-symbol.json"
    params_file = prefix + "-" + "%04d.params" % nn_update_idx
    symbol.save(sym_file)
    model.save_params(params_file)

    if convert_to_onnx:
        convert_mxnet_model_to_onnx(sym_file, params_file,
                                    ["value_out_output", "policy_out_output"],
                                    input_shape, [1, 8, 16], False)

    logging.info("k_steps_final %d" % k_steps_final)
    queue.put(k_steps_final)
def run_training(alpha, queue):
    _, x_val, yv_val, yp_val, plys_to_end, _ = load_pgn_dataset(
        dataset_type='val', part_id=0, verbose=True, normalize=tc.normalize)
    if tc.discount != 1:
        yv_val *= tc.discount**plys_to_end

    if tc.select_policy_from_plane:
        val_iter = mx.io.NDArrayIter(
            {'data': x_val}, {
                'value_label': yv_val,
                'policy_label': np.array(FLAT_PLANE_IDX)[yp_val.argmax(axis=1)]
            }, tc.batch_size)
    else:
        val_iter = mx.io.NDArrayIter({'data': x_val}, {
            'value_label': yv_val,
            'policy_label': yp_val.argmax(axis=1)
        }, tc.batch_size)

    tc.nb_parts = len(glob.glob(main_config['planes_train_dir'] + '**/*'))

    nb_it_per_epoch = (
        len(x_val) * tc.nb_parts
    ) // tc.batch_size  # calculate how many iterations per epoch exist
    # one iteration is defined by passing 1 batch and doing backprop
    tc.total_it = int(nb_it_per_epoch * tc.nb_training_epochs)

    ### Define a Learning Rate schedule
    to.lr_schedule = OneCycleSchedule(start_lr=tc.max_lr / 8,
                                      max_lr=tc.max_lr,
                                      cycle_length=tc.total_it * .3,
                                      cooldown_length=tc.total_it * .6,
                                      finish_lr=tc.min_lr)
    to.lr_schedule = LinearWarmUp(to.lr_schedule,
                                  start_lr=tc.min_lr,
                                  length=tc.total_it / 30)

    ### Momentum schedule
    to.momentum_schedule = MomentumSchedule(to.lr_schedule, tc.min_lr,
                                            tc.max_lr, tc.min_momentum,
                                            tc.max_momentum)
    plot_schedule(to.momentum_schedule,
                  iterations=tc.total_it,
                  ylabel='Momentum')

    input_shape = x_val[0].shape

    beta = np.sqrt(2 / alpha)

    print("alpha:", alpha)
    print("beta:", beta)

    depth = int(round(base_depth * alpha))
    channels = int(round(base_channels * beta))

    kernels = [3] * depth
    se_types = [None] * len(kernels)
    channels_reduced = int(round(channels / 4))

    symbol = rise_mobile_v3_symbol(channels=channels,
                                   channels_operating_init=channels_reduced,
                                   act_type='relu',
                                   channels_value_head=8,
                                   value_fc_size=256,
                                   channels_policy_head=NB_POLICY_MAP_CHANNELS,
                                   grad_scale_value=tc.val_loss_factor,
                                   grad_scale_policy=tc.policy_loss_factor,
                                   dropout_rate=tc.dropout_rate,
                                   select_policy_from_plane=True,
                                   kernels=kernels,
                                   se_types=se_types)

    # create a trainable module on compute context
    model = mx.mod.Module(symbol=symbol,
                          context=ctx,
                          label_names=['value_label', 'policy_label'])
    model.bind(for_training=True,
               data_shapes=[('data', (tc.batch_size, input_shape[0],
                                      input_shape[1], input_shape[2]))],
               label_shapes=val_iter.provide_label)
    model.init_params(
        mx.initializer.Xavier(rnd_type='uniform',
                              factor_type='avg',
                              magnitude=2.24))

    metrics_mxnet = [
        metric.MSE(name='value_loss',
                   output_names=['value_output'],
                   label_names=['value_label']),
        metric.CrossEntropy(name='policy_loss',
                            output_names=['policy_output'],
                            label_names=['policy_label']),
        metric.create(acc_sign,
                      name='value_acc_sign',
                      output_names=['value_output'],
                      label_names=['value_label']),
        metric.Accuracy(axis=1,
                        name='policy_acc',
                        output_names=['policy_output'],
                        label_names=['policy_label'])
    ]

    to.metrics = metrics_mxnet
    train_agent = TrainerAgentMXNET(model,
                                    symbol,
                                    val_iter,
                                    tc,
                                    to,
                                    use_rtpt=True)
    print("model.score(val_iter, to.metrics:",
          model.score(val_iter, to.metrics))

    # Start the training process
    _, (k_steps_best, val_metric_values_best) = train_agent.train(cur_it)

    new_row = {
        'alpha': alpha,
        'beta': beta,
        'depth': depth,
        'channels': channels,
        'k_steps_best': k_steps_best,
        'val_loss': val_metric_values_best['loss'],
        'val_value_loss': val_metric_values_best['value_loss'],
        'val_policy_loss': val_metric_values_best['policy_loss'],
        'val_policy_acc': val_metric_values_best['policy_acc'],
        'val_value_acc': val_metric_values_best['value_acc_sign']
    }

    queue.put(new_row)
    print(new_row)
    def __init__(self, part_id=0, *args, **kwargs):
        """
        Constructor
        :param part_id: Part id to choose for file selection. This way you can test different variants at a time.
        :param args:
        :param kwargs:
        """
        super(FullRoundTripTests, self).__init__(*args, **kwargs)
        logging.info("loading test dataset...")
        self._s_idcs_test, self._x_test, self._yv_test, self._yp_test, _, self._pgn_datasets_test = load_pgn_dataset(
            dataset_type="test", part_id=part_id, verbose=True, normalize=False,
        )
        logging.info("loading test pgn file...")
        self._pgn_filename = self._pgn_datasets_test["parameters/pgn_name"][0].decode("UTF8")
        self._batch_size = self._pgn_datasets_test["parameters/batch_size"][0]
        # self._min_elo_both = self._pgn_datasets_test["parameters/min_elo_both"][0]
        # Rating cap at 90% cumulative rating for all varaints
        self._min_elo_both = {
            "Chess": 2200,
            # "Crazyhouse": 2000,
            # "Chess960": 1950,
            # "King of the Hill": 1925,
            # "Three-check": 1900,
            # "Antichess": 1925,
            # "Atomic": 1900,
            # "Horde": 1900,
            # "Racing Kings": 1900
        }
        self._start_indices = self._pgn_datasets_test["start_indices"]

        use_all_games = True if MODE == MODE_CHESS and VERSION == 2 else False
        converter = PGN2PlanesConverter(
            limit_nb_games_to_analyze=0,
            nb_games_per_file=self._batch_size,
            max_nb_files=1,
            min_elo_both=self._min_elo_both,
            termination_conditions=["Normal"],
            log_lvl=logging.DEBUG,
            compression="lz4",
            clevel=5,
            dataset_type="test",
            first_pgn_to_analyze=self._pgn_filename,
            use_all_games=use_all_games
        )
        self._all_pgn_sel, _, _, _, _ = converter.filter_pgn()
        print(len(self._all_pgn_sel))
Beispiel #9
0
    def __init__(self, *args, **kwargs):
        super(FullRoundTripTests, self).__init__(*args, **kwargs)
        logging.info("loading test dataset...")
        self._s_idcs_test, self._x_test, self._yv_test, self._yp_test, self._pgn_datasets_test = load_pgn_dataset(
            dataset_type="test", part_id=0, print_statistics=True, normalize=False, print_parameters=True
        )
        logging.info("loading test pgn file...")
        self._pgn_filename = self._pgn_datasets_test["parameters/pgn_name"][0].decode("UTF8")
        self._batch_size = self._pgn_datasets_test["parameters/batch_size"][0]
        self._min_elo_both = self._pgn_datasets_test["parameters/min_elo_both"][0]
        self._start_indices = self._pgn_datasets_test["start_indices"]

        converter = PGN2PlanesConverter(
            limit_nb_games_to_analyze=0,
            nb_games_per_file=self._batch_size,
            max_nb_files=1,
            min_elo_both=self._min_elo_both,
            termination_conditions=["Normal"],
            log_lvl=logging.DEBUG,
            compression="lz4",
            clevel=5,
            dataset_type="test",
        )
        self._all_pgn_sel, _, _, _, _ = converter.filter_pgn()
        print(len(self._all_pgn_sel))
Beispiel #10
0
def update_network(queue, nn_update_idx, symbol_filename, params_filename,
                   convert_to_onnx, main_config, train_config: TrainConfig,
                   model_contender_dir):
    """
    Creates a new NN checkpoint in the model contender directory after training using the game files stored in the
     training directory
    :param queue: Queue object used to return items
    :param nn_update_idx: Defines how many updates of the nn has already been done. This index should be incremented
    after every update.
    :param symbol_filename: Architecture definition file
    :param params_filename: Weight file which will be loaded before training
    Updates the neural network with the newly acquired games from the replay memory
    :param convert_to_onnx: Boolean indicating if the network shall be exported to ONNX to allow TensorRT inference
    :param main_config: Dict of the main_config (imported from main_config.py)
    :param train_config: Dict of the train_config (imported from train_config.py)
    :param model_contender_dir: String of the contender directory path
    :return: k_steps_final
    """

    # set the context on CPU, switch to GPU if there is one available (strongly recommended for training)
    ctx = mx.gpu(
        train_config.device_id) if train_config.context == "gpu" else mx.cpu()
    # set a specific seed value for reproducibility
    train_config.nb_parts = len(
        glob.glob(main_config["planes_train_dir"] + '**/*.zip'))
    logging.info("number parts for training: %d" % train_config.nb_parts)
    train_objects = TrainObjects()

    if train_config.nb_parts <= 0:
        raise Exception(
            'No .zip files for training available. Check the path in main_config["planes_train_dir"]:'
            ' %s' % main_config["planes_train_dir"])

    _, x_val, y_val_value, y_val_policy, _, _ = load_pgn_dataset(
        dataset_type="val",
        part_id=0,
        normalize=train_config.normalize,
        verbose=False,
        q_value_ratio=train_config.q_value_ratio)
    y_val_policy = prepare_policy(y_val_policy,
                                  train_config.select_policy_from_plane,
                                  train_config.sparse_policy_label,
                                  train_config.is_policy_from_plane_data)
    val_dataset = gluon.data.ArrayDataset(nd.array(x_val),
                                          nd.array(y_val_value),
                                          nd.array(y_val_policy))
    val_data = gluon.data.DataLoader(val_dataset,
                                     train_config.batch_size,
                                     shuffle=False,
                                     num_workers=train_config.cpu_count)

    symbol = mx.sym.load(symbol_filename)

    # calculate how many iterations per epoch exist
    nb_it_per_epoch = (len(x_val) *
                       train_config.nb_parts) // train_config.batch_size
    # one iteration is defined by passing 1 batch and doing backprop
    train_config.total_it = int(nb_it_per_epoch *
                                train_config.nb_training_epochs)

    train_objects.lr_schedule = CosineAnnealingSchedule(
        train_config.min_lr, train_config.max_lr,
        max(train_config.total_it * .7, 1))
    train_objects.lr_schedule = LinearWarmUp(train_objects.lr_schedule,
                                             start_lr=train_config.min_lr,
                                             length=max(
                                                 train_config.total_it * .25,
                                                 1))
    train_objects.momentum_schedule = MomentumSchedule(
        train_objects.lr_schedule, train_config.min_lr, train_config.max_lr,
        train_config.min_momentum, train_config.max_momentum)

    input_shape = x_val[0].shape
    inputs = mx.sym.var('data', dtype='float32')
    value_out = symbol.get_internals()[main_config['value_output'] + '_output']
    policy_out = symbol.get_internals()[main_config['policy_output'] +
                                        '_output']
    sym = mx.symbol.Group([value_out, policy_out])
    net = mx.gluon.SymbolBlock(sym, inputs)
    net.collect_params().load(params_filename, ctx)

    metrics_gluon = {
        'value_loss':
        metric.MSE(name='value_loss', output_names=['value_output']),
        'value_acc_sign':
        metric.create(acc_sign,
                      name='value_acc_sign',
                      output_names=['value_output'],
                      label_names=['value_label']),
    }

    if train_config.sparse_policy_label:
        print("train with sparse labels")
        # the default cross entropy only supports sparse labels
        metrics_gluon['policy_loss'] = metric.CrossEntropy(
            name='policy_loss',
            output_names=['policy_output'],
            label_names=['policy_label']),
        metrics_gluon['policy_acc'] = metric.Accuracy(
            axis=1,
            name='policy_acc',
            output_names=['policy_output'],
            label_names=['policy_label'])
    else:
        metrics_gluon['policy_loss'] = metric.create(
            cross_entropy,
            name='policy_loss',
            output_names=['policy_output'],
            label_names=['policy_label'])
        metrics_gluon['policy_acc'] = metric.create(
            acc_distribution,
            name='policy_acc',
            output_names=['policy_output'],
            label_names=['policy_label'])

    train_objects.metrics = metrics_gluon

    train_config.export_weights = False  # don't save intermediate weights
    train_agent = TrainerAgent(net,
                               val_data,
                               train_config,
                               train_objects,
                               use_rtpt=False)

    # iteration counter used for the momentum and learning rate schedule
    cur_it = train_config.k_steps_initial * train_config.batch_steps
    (k_steps_final, val_value_loss_final, val_policy_loss_final,
     val_value_acc_sign_final,
     val_policy_acc_final), _ = train_agent.train(cur_it)

    prefix = "%smodel-%.5f-%.5f-%.3f-%.3f" % (
        model_contender_dir, val_value_loss_final, val_policy_loss_final,
        val_value_acc_sign_final, val_policy_acc_final)

    sym_file = prefix + "-symbol.json"
    params_file = prefix + "-" + "%04d.params" % nn_update_idx

    # the export function saves both the architecture and the weights
    net.export(prefix, epoch=nn_update_idx)
    print()
    logging.info("Saved checkpoint to %s-%04d.params", prefix, nn_update_idx)

    if convert_to_onnx:
        convert_mxnet_model_to_onnx(sym_file, params_file,
                                    ["value_out_output", "policy_out_output"],
                                    input_shape, [1, 8, 16], False)

    logging.info("k_steps_final %d" % k_steps_final)
    queue.put(k_steps_final)
Beispiel #11
0
    def train(self, cur_it=None):  # Probably needs refactoring
        """
        Training model
        :param cur_it: Current iteration which is used for the learning rate and momentum schedule.
         If set to None it will be initialized
        :return: return_metrics_and_stop_training()
        """
        # Too many local variables (44/15) - Too many branches (18/12) - Too many statements (108/50)
        # set a custom seed for reproducibility
        if self.tc.seed is not None:
            random.seed(self.tc.seed)
        # define and initialize the variables which will be used
        self.t_s = time()
        # track on how many batches have been processed in this epoch
        self.patience_cnt = epoch = self.batch_proc_tmp = 0
        self.k_steps = self.tc.k_steps_initial  # counter for thousands steps

        if cur_it is None:
            self.cur_it = self.tc.k_steps_initial * 1000
        else:
            self.cur_it = cur_it
        self.nb_spikes = 0  # count the number of spikes that have been detected
        # initialize the loss to compare with, with a very high value
        self.old_val_loss = 9000
        self.graph_exported = False  # create a state variable to check if the net architecture has been reported yet
        self.continue_training = True
        self.optimizer.lr = self.to.lr_schedule(self.cur_it)
        if self.tc.optimizer_name == "nag":
            self.optimizer.momentum = self.to.momentum_schedule(self.cur_it)

        if not self.ordering:  # safety check to prevent eternal loop
            raise Exception(
                "You must have at least one part file in your planes-dataset directory!"
            )

        if self.use_rtpt:
            # Start the RTPT tracking
            self.rtpt.start()

        while self.continue_training:  # Too many nested blocks (7/5)
            # reshuffle the ordering of the training game batches (shuffle works in place)
            random.shuffle(self.ordering)

            epoch += 1
            logging.info("EPOCH %d", epoch)
            logging.info("=========================")
            self.t_s_steps = time()
            self._model.init_optimizer(optimizer=self.optimizer)

            if self._augment:
                # stores part ids that were not augmented yet
                parts_not_augmented = list(set(self.ordering.copy()))
                # stores part ids that were loaded before but not augmented
                parts_to_augment = []

            for part_id in tqdm_notebook(self.ordering):

                if MODE == MODE_XIANGQI:
                    _, self.x_train, self.yv_train, self.yp_train, _ = load_xiangqi_dataset(
                        dataset_type="train",
                        part_id=part_id,
                        normalize=self.tc.normalize,
                        verbose=False)
                    if self._augment:
                        # check whether the current part should be augmented
                        if part_id in parts_to_augment:
                            augment(self.x_train, self.yp_train)
                            logging.debug(
                                "Using augmented part with id {}".format(
                                    part_id))
                        elif part_id in parts_not_augmented:
                            if random.randint(0, 1):
                                augment(self.x_train, self.yp_train)
                                parts_not_augmented.remove(part_id)
                                logging.debug(
                                    "Using augmented part with id {}".format(
                                        part_id))
                            else:
                                parts_to_augment.append(part_id)
                                logging.debug(
                                    "Using unaugmented part with id {}".format(
                                        part_id))
                else:
                    # load one chunk of the dataset from memory
                    _, self.x_train, self.yv_train, self.yp_train, plys_to_end, _ = load_pgn_dataset(
                        dataset_type="train",
                        part_id=part_id,
                        normalize=self.tc.normalize,
                        verbose=False,
                        q_value_ratio=self.tc.q_value_ratio)
                # fill_up_batch if there aren't enough games
                if len(self.yv_train) < self.tc.batch_size:
                    logging.info("filling up batch with too few samples %d" %
                                 len(self.yv_train))
                    self.x_train = fill_up_batch(self.x_train,
                                                 self.tc.batch_size)
                    self.yv_train = fill_up_batch(self.yv_train,
                                                  self.tc.batch_size)
                    self.yp_train = fill_up_batch(self.yp_train,
                                                  self.tc.batch_size)
                    if MODE != MODE_XIANGQI:
                        if plys_to_end is not None:
                            plys_to_end = fill_up_batch(
                                plys_to_end, self.tc.batch_size)

                if MODE != MODE_XIANGQI:
                    if self.tc.discount != 1:
                        self.yv_train *= self.tc.discount**plys_to_end
                self.yp_train = prepare_policy(
                    self.yp_train, self.tc.select_policy_from_plane,
                    self.tc.sparse_policy_label,
                    self.tc.is_policy_from_plane_data)

                if self.tc.use_wdl and self.tc.use_plys_to_end:
                    self._train_iter = mx.io.NDArrayIter(
                        {'data': self.x_train}, {
                            'value_label': self.yv_train,
                            'policy_label': self.yp_train,
                            'wdl_label': value_to_wdl_label(self.yv_train),
                            'plys_to_end_label':
                            prepare_plys_label(plys_to_end)
                        },
                        self.tc.batch_size,
                        shuffle=True)
                else:
                    self._train_iter = mx.io.NDArrayIter(
                        {'data': self.x_train}, {
                            'value_label': self.yv_train,
                            'policy_label': self.yp_train
                        },
                        self.tc.batch_size,
                        shuffle=True)

                # avoid memory leaks by adding synchronization
                mx.nd.waitall()

                reset_metrics(self.to.metrics)
                for batch in self._train_iter:
                    self._model.forward(batch,
                                        is_train=True)  # compute predictions
                    for metric in self.to.metrics:  # update the metrics
                        self._model.update_metric(metric, batch.label)

                    self._model.backward()
                    # compute gradients
                    self._model.update()  # update parameters
                    self.batch_callback()

                    if not self.continue_training:
                        logging.info('Elapsed time for training(hh:mm:ss): ' +
                                     str(
                                         datetime.timedelta(
                                             seconds=round(time() -
                                                           self.t_s))))

                        return return_metrics_and_stop_training(
                            self.k_steps, self.val_metric_values,
                            self.k_steps_best, self.val_metric_values_best)

                # add the graph representation of the network to the tensorboard log file
                if not self.graph_exported and self.tc.log_metrics_to_tensorboard:
                    # self.sum_writer.add_graph(self._symbol)
                    self.graph_exported = True
    def train(self, cur_it=None):  # Probably needs refactoring
        """
        Training model
        :param cur_it: Current iteration which is used for the learning rate and momentum schedule.
         If set to None it will be initialized
        """
        # Too many local variables (44/15) - Too many branches (18/12) - Too many statements (108/50)
        # set a custom seed for reproducibility
        random.seed(self.tc.seed)
        # define and initialize the variables which will be used
        t_s = time()
        # predefine the local variables that will be used in the training loop
        val_loss_best = val_p_acc_best = k_steps_best = val_metric_values_best = old_label = value_out = None
        patience_cnt = epoch = batch_proc_tmp = 0  # track on how many batches have been processed in this epoch
        k_steps = self.tc.k_steps_initial  # counter for thousands steps
        # calculate how many log states will be processed
        k_steps_end = round(self.tc.total_it / self.tc.batch_steps)
        # we use k-steps instead of epochs here
        if k_steps_end == 0:
            k_steps_end = 1

        if self.use_rtpt:
            self.rtpt = RTPT(name_initials=self.tc.name_initials,
                             experiment_name='crazyara',
                             max_iterations=k_steps_end -
                             self.tc.k_steps_initial)
        if cur_it is None:
            cur_it = self.tc.k_steps_initial * 1000
        nb_spikes = 0  # count the number of spikes that have been detected
        # initialize the loss to compare with, with a very high value
        old_val_loss = np.inf
        graph_exported = False  # create a state variable to check if the net architecture has been reported yet

        if not self.ordering:  # safety check to prevent eternal loop
            raise Exception(
                "You must have at least one part file in your planes-dataset directory!"
            )

        if self.use_rtpt:
            # Start the RTPT tracking
            self.rtpt.start()

        while True:  # Too many nested blocks (7/5)
            # reshuffle the ordering of the training game batches (shuffle works in place)
            random.shuffle(self.ordering)

            epoch += 1
            logging.info("EPOCH %d", epoch)
            logging.info("=========================")
            t_s_steps = time()

            for part_id in tqdm_notebook(self.ordering):
                # load one chunk of the dataset from memory
                _, x_train, yv_train, yp_train, _, _ = load_pgn_dataset(
                    dataset_type="train",
                    part_id=part_id,
                    normalize=self.tc.normalize,
                    verbose=False,
                    q_value_ratio=self.tc.q_value_ratio)

                yp_train = prepare_policy(
                    y_policy=yp_train,
                    select_policy_from_plane=self.tc.select_policy_from_plane,
                    sparse_policy_label=self.tc.sparse_policy_label,
                    is_policy_from_plane_data=self.tc.is_policy_from_plane_data
                )

                # update the train_data object
                train_dataset = gluon.data.ArrayDataset(
                    nd.array(x_train), nd.array(yv_train), nd.array(yp_train))
                train_data = gluon.data.DataLoader(
                    train_dataset,
                    batch_size=self.tc.batch_size,
                    shuffle=True,
                    num_workers=self.tc.cpu_count)

                for _, (data, value_label,
                        policy_label) in enumerate(train_data):
                    data = data.as_in_context(self._ctx)
                    value_label = value_label.as_in_context(self._ctx)
                    policy_label = policy_label.as_in_context(self._ctx)

                    # update a dummy metric to see a proper progress bar
                    #  (the metrics will get evaluated at the end of 100k steps)
                    if batch_proc_tmp > 0:
                        self.to.metrics["value_loss"].update(
                            old_label, value_out)

                    old_label = value_label
                    with autograd.record():
                        [value_out, policy_out] = self._net(data)
                        value_loss = self._l2_loss(value_out, value_label)
                        policy_loss = self._softmax_cross_entropy(
                            policy_out, policy_label)
                        # weight the components of the combined loss
                        combined_loss = (
                            self.tc.val_loss_factor * value_loss +
                            self.tc.policy_loss_factor * policy_loss)
                        # update a dummy metric to see a proper progress bar
                        # self._metrics['value_loss'].update(preds=value_out, labels=value_label)

                    combined_loss.backward()
                    learning_rate = self.to.lr_schedule(
                        cur_it)  # update the learning rate
                    self._trainer.set_learning_rate(learning_rate)
                    momentum = self.to.momentum_schedule(
                        cur_it)  # update the momentum
                    self._trainer._optimizer.momentum = momentum
                    self._trainer.step(data.shape[0])
                    cur_it += 1
                    batch_proc_tmp += 1
                    # add the graph representation of the network to the tensorboard log file
                    if not graph_exported and self.tc.log_metrics_to_tensorboard:
                        self.sum_writer.add_graph(self._net)
                        graph_exported = True

                    if batch_proc_tmp >= self.tc.batch_steps:  # show metrics every thousands steps
                        # log the current learning rate
                        # update batch_proc_tmp counter by subtracting the batch_steps
                        batch_proc_tmp = batch_proc_tmp - self.tc.batch_steps
                        ms_step = (
                            (time() - t_s_steps) /
                            self.tc.batch_steps) * 1000  # measure elapsed time
                        # update the counters
                        k_steps += 1
                        patience_cnt += 1
                        logging.info("Step %dK/%dK - %dms/step", k_steps,
                                     k_steps_end, ms_step)
                        logging.info("-------------------------")
                        logging.debug("Iteration %d/%d", cur_it,
                                      self.tc.total_it)
                        logging.debug("lr: %.7f - momentum: %.7f",
                                      learning_rate, momentum)
                        train_metric_values = evaluate_metrics(
                            self.to.metrics,
                            train_data,
                            self._net,
                            nb_batches=10,  #25,
                            ctx=self._ctx,
                            sparse_policy_label=self.tc.sparse_policy_label,
                            apply_select_policy_from_plane=self.tc.
                            select_policy_from_plane
                            and not self.tc.is_policy_from_plane_data)
                        val_metric_values = evaluate_metrics(
                            self.to.metrics,
                            self._val_data,
                            self._net,
                            nb_batches=None,
                            ctx=self._ctx,
                            sparse_policy_label=self.tc.sparse_policy_label,
                            apply_select_policy_from_plane=self.tc.
                            select_policy_from_plane
                            and not self.tc.is_policy_from_plane_data)
                        if self.use_rtpt:
                            # update process title according to loss
                            self.rtpt.step(
                                subtitle=
                                f"loss={val_metric_values['loss']:2.2f}")
                        if self.tc.use_spike_recovery and (
                                old_val_loss * self.tc.spike_thresh <
                                val_metric_values["loss"]
                                or np.isnan(val_metric_values["loss"])
                        ):  # check for spikes
                            nb_spikes += 1
                            logging.warning(
                                "Spike %d/%d occurred - val_loss: %.3f",
                                nb_spikes,
                                self.tc.max_spikes,
                                val_metric_values["loss"],
                            )
                            if nb_spikes >= self.tc.max_spikes:
                                val_loss = val_metric_values["loss"]
                                val_p_acc = val_metric_values["policy_acc"]
                                logging.debug(
                                    "The maximum number of spikes has been reached. Stop training."
                                )
                                # finally stop training because the number of lr drops has been achieved
                                print()
                                print("Elapsed time for training(hh:mm:ss): " +
                                      str(
                                          datetime.timedelta(
                                              seconds=round(time() - t_s))))

                                if self.tc.log_metrics_to_tensorboard:
                                    self.sum_writer.close()
                                return return_metrics_and_stop_training(
                                    k_steps, val_metric_values, k_steps_best,
                                    val_metric_values_best)

                            logging.debug("Recover to latest checkpoint")
                            model_path = self.tc.export_dir + "weights/model-%.5f-%.3f-%04d.params" % (
                                val_loss_best,
                                val_p_acc_best,
                                k_steps_best,
                            )  # Load the best model once again
                            logging.debug("load current best model:%s",
                                          model_path)
                            self._net.load_parameters(model_path,
                                                      ctx=self._ctx)
                            k_steps = k_steps_best
                            logging.debug("k_step is back at %d", k_steps_best)
                            # print the elapsed time
                            t_delta = time() - t_s_steps
                            print(" - %.ds" % t_delta)
                            t_s_steps = time()
                        else:
                            # update the val_loss_value to compare with using spike recovery
                            old_val_loss = val_metric_values["loss"]
                            # log the metric values to tensorboard
                            self._log_metrics(train_metric_values,
                                              global_step=k_steps,
                                              prefix="train_")
                            self._log_metrics(val_metric_values,
                                              global_step=k_steps,
                                              prefix="val_")

                            if self.tc.export_grad_histograms:
                                grads = []
                                # logging the gradients of parameters for checking convergence
                                for _, name in enumerate(self._param_names):
                                    if "bn" not in name and "batch" not in name and name != "policy_flat_plane_idx":
                                        grads.append(self._params[name].grad())
                                        self.sum_writer.add_histogram(
                                            tag=name,
                                            values=grads[-1],
                                            global_step=k_steps,
                                            bins=20)

                            # check if a new checkpoint shall be created
                            if val_loss_best is None or val_metric_values[
                                    "loss"] < val_loss_best:
                                # update val_loss_best
                                val_loss_best = val_metric_values["loss"]
                                val_p_acc_best = val_metric_values[
                                    "policy_acc"]
                                val_metric_values_best = val_metric_values
                                k_steps_best = k_steps

                                if self.tc.export_weights:
                                    prefix = self.tc.export_dir + "weights/model-%.5f-%.3f" \
                                             % (val_loss_best, val_p_acc_best)
                                    # the export function saves both the architecture and the weights
                                    self._net.export(prefix,
                                                     epoch=k_steps_best)
                                    print()
                                    logging.info(
                                        "Saved checkpoint to %s-%04d.params",
                                        prefix, k_steps_best)

                                patience_cnt = 0  # reset the patience counter
                            # print the elapsed time
                            t_delta = time() - t_s_steps
                            print(" - %.ds" % t_delta)
                            t_s_steps = time()

                            # log the samples per second metric to tensorboard
                            self.sum_writer.add_scalar(
                                tag="samples_per_second",
                                value={
                                    "hybrid_sync":
                                    data.shape[0] * self.tc.batch_steps /
                                    t_delta
                                },
                                global_step=k_steps,
                            )

                            # log the current learning rate
                            self.sum_writer.add_scalar(
                                tag="lr",
                                value=self.to.lr_schedule(cur_it),
                                global_step=k_steps)
                            # log the current momentum value
                            self.sum_writer.add_scalar(
                                tag="momentum",
                                value=self.to.momentum_schedule(cur_it),
                                global_step=k_steps)

                            if cur_it >= self.tc.total_it:

                                val_loss = val_metric_values["loss"]
                                val_p_acc = val_metric_values["policy_acc"]
                                logging.debug(
                                    "The number of given iterations has been reached"
                                )
                                # finally stop training because the number of lr drops has been achieved
                                print()
                                print("Elapsed time for training(hh:mm:ss): " +
                                      str(
                                          datetime.timedelta(
                                              seconds=round(time() - t_s))))

                                if self.tc.log_metrics_to_tensorboard:
                                    self.sum_writer.close()

                                return return_metrics_and_stop_training(
                                    k_steps, val_metric_values, k_steps_best,
                                    val_metric_values_best)
Beispiel #13
0
    def __init__(self, *args, **kwargs):
        super(FullRoundTripTests, self).__init__(*args, **kwargs)

        logging.info('loading test dataset...')
        self._s_idcs_test, self._x_test, self._yv_test, self._yp_test, self._pgn_datasets_test = load_pgn_dataset(
            dataset_type='test',
            part_id=0,
            print_statistics=True,
            normalize=False,
            print_parameters=True)

        logging.info('loading test pgn file...')
        self._pgn_filename = self._pgn_datasets_test['parameters/pgn_name'][
            0].decode('UTF8')
        self._batch_size = self._pgn_datasets_test['parameters/batch_size'][0]

        self._min_elo_both = self._pgn_datasets_test[
            'parameters/min_elo_both'][0]
        self._start_indices = self._pgn_datasets_test['start_indices']

        converter = PGN2PlanesConverter(limit_nb_games_to_analyze=0,
                                        nb_games_per_file=self._batch_size,
                                        max_nb_files=1,
                                        min_elo_both=self._min_elo_both,
                                        termination_conditions=["Normal"],
                                        log_lvl=logging.DEBUG,
                                        compression='lz4',
                                        clevel=5,
                                        dataset_type='test')
        self._all_pgn_sel, nb_games_sel, batch_white_won, batch_black_won, batch_draw = converter.filter_pgn(
        )
        print(len(self._all_pgn_sel))
Beispiel #14
0
    def train(self):
        """

        :param net: Gluon network object
        :param val_data: Gluon dataloader object
        :param nb_parts: Sets how many different part files exist in the train directory
        :param lr: Initial learning rate
        :param momentum:
        :param wd:
        :param nb_k_steps: Number of steps in after which to drop the learning rate (assuming the patience counter
        early dropping hasn't activated beforehand)

        :param patience: Number of batches to wait until no progress on validation loss has been achieved.
        If the no progress has been done the learning rate is multiplied by the drop factor.
        :param nb_lr_drops: Number of time to drop the learning rate in total. This defines the end of the train loop
        :param batch_steps: Number of batches after which the validation loss is evaluated
        :param k_steps_initial: Initial starting point of the network in terms of process k batches (default 0)
        :param lr_drop_fac: Dropping factor to the learning rate to apply
        :param cpu_count: How many cpu threads on the current are available
        :param batch_size: Batch size to train the network with
        :param normalize: Weather to use data normalization after loading the data (recommend to set to True)
        :param export_weights: Sets if network checkpoints should be exported
        :param export_grad_histograms: Sets if the gradient updates of the weights should be logged to tensorboard
        :return:
        """

        # set a custom seed for reproducibility
        random.seed(self._seed)

        # define and initialize the variables which will be used
        t_s = time()

        # predefine the local variables that will be used in the training loop
        val_loss_best = None
        val_p_acc_best = None
        k_steps_best = None
        patience_cnt = 0

        epoch = 0
        # keep track on how many batches have been processed in this epoch so far
        batch_proc_tmp = 0
        # counter for thousands steps
        k_steps = self._k_steps_initial
        # calculate how many log states will be processed
        k_steps_end = self._total_it / self._batch_steps

        cur_it = 0

        # count the number of spikes that have been detected
        nb_spikes = 0
        # initialize the loss to compare with, with a very high value
        old_val_loss = 9000

        # self._lr = self._lr_warmup_init
        # logging.info('Warmup-Schedule')
        # logging.info('Initial learning rate: lr = %.5f', self._lr)
        # logging.info('=========================================')

        # set initial lr
        # self._trainer.set_learning_rate(self._lr)
        # log the current learning rate
        # self.sw.add_scalar(tag='lr', value=self._lr, global_step=k_steps)

        # create a state variable to check if the net architecture has been reported yet
        graph_exported = False

        old_label = None
        value_out = None

        # safety check to prevent eternal loop
        if not self.ordering:
            raise Exception(
                "You must have at least one part file in your planes-dataset directory!"
            )

        while True:
            # reshuffle the ordering of the training game batches (shuffle works in place)
            random.shuffle(self.ordering)

            epoch += 1
            logging.info("EPOCH %d", epoch)
            logging.info("=========================")
            t_s_steps = time()

            for part_id in tqdm_notebook(self.ordering):

                # load one chunk of the dataset from memory
                s_idcs_train, x_train, yv_train, yp_train, pgn_datasets_train = load_pgn_dataset(
                    dataset_type="train",
                    part_id=part_id,
                    normalize=self._normalize,
                    verbose=False)
                # update the train_data object
                train_dataset = gluon.data.ArrayDataset(
                    nd.array(x_train), nd.array(yv_train),
                    nd.array(yp_train.argmax(axis=1)))
                train_data = gluon.data.DataLoader(train_dataset,
                                                   batch_size=self._batch_size,
                                                   shuffle=True,
                                                   num_workers=self._cpu_count)
                # batch_proc_tmp, dummy = self._process_on_data_plane_file(train_data, batch_proc_tmp)

                for i, (data, value_label,
                        policy_label) in enumerate(train_data):
                    data = data.as_in_context(self._ctx)
                    value_label = value_label.as_in_context(self._ctx)
                    policy_label = policy_label.as_in_context(self._ctx)

                    # update a dummy metric to see a proper progress bar
                    #  (the metrics will get evaluated at the end of 100k steps)
                    if batch_proc_tmp > 0:
                        self._metrics["value_loss"].update(
                            old_label, value_out)

                    old_label = value_label
                    with autograd.record():
                        [value_out, policy_out] = self._net(data)
                        value_loss = self._l2_loss(value_out, value_label)
                        policy_loss = self._softmax_cross_entropy(
                            policy_out, policy_label)
                        # weight the components of the combined loss
                        combined_loss = (
                            self._val_loss_factor * value_loss.sum() +
                            self._policy_loss_factor * policy_loss.sum())

                        # update a dummy metric to see a proper progress bar
                        # self._metrics['value_loss'].update(preds=value_out, labels=value_label)

                    combined_loss.backward()

                    # update the learning rate
                    lr = self._lr_schedule(cur_it)
                    self._trainer.set_learning_rate(lr)

                    # update the momentum
                    momentum = self._momentum_schedule(cur_it)
                    self._trainer._optimizer.momentum = momentum

                    self._trainer.step(data.shape[0])
                    cur_it += 1
                    batch_proc_tmp += 1

                    # add the graph representation of the network to the tensorboard log file
                    if graph_exported is False and self._log_metrics_to_tensorboard is True:
                        self.sw.add_graph(self._net)
                        graph_exported = True

                    # show metrics every thousands steps
                    if batch_proc_tmp >= self._batch_steps:

                        # if k_steps < self._warmup_k_steps:
                        # update the learning rate
                        # self._lr *= k_steps * ((self._lr_first - self._lr_warmup_init) / self._warmup_k_steps) + self._lr_warmup_init #self._lr_drop_fac
                        # self._trainer.set_learning_rate(self._lr)
                        # logging.info('Learning rate update: lr = %.5f', self._lr)
                        # logging.info('=========================================')

                        # log the current learning rate

                        # update batch_proc_tmp counter by subtracting the batch_steps
                        batch_proc_tmp = batch_proc_tmp - self._batch_steps

                        # measure elapsed time
                        ms_step = (
                            (time() - t_s_steps) / self._batch_steps) * 1000
                        # update the counters
                        k_steps += 1
                        patience_cnt += 1

                        logging.info("Step %dK/%dK - %dms/step", k_steps,
                                     k_steps_end, ms_step)
                        logging.info("-------------------------")
                        logging.debug("Iteration %d/%d", cur_it,
                                      self._total_it)
                        logging.debug("lr: %.7f - momentum: %.7f", lr,
                                      momentum)

                        train_metric_values = evaluate_metrics(self._metrics,
                                                               train_data,
                                                               self._net,
                                                               nb_batches=25,
                                                               ctx=self._ctx)

                        val_metric_values = evaluate_metrics(self._metrics,
                                                             self._val_data,
                                                             self._net,
                                                             nb_batches=None,
                                                             ctx=self._ctx)

                        # spike_detected = False
                        # spike_detected = old_val_loss * 1.5 < val_metric_values['loss']
                        # if np.isnan(val_metric_values['loss']):
                        #    spike_detected = True

                        # check for spikes
                        if self._use_spike_recovery is True and (
                                old_val_loss * self._spike_thresh <
                                val_metric_values["loss"]
                                or np.isnan(val_metric_values["loss"])):
                            nb_spikes += 1
                            logging.warning(
                                "Spike %d/%d occurred - val_loss: %.3f",
                                nb_spikes,
                                self._max_spikes,
                                val_metric_values["loss"],
                            )
                            if nb_spikes >= self._max_spikes:

                                val_loss = val_metric_values["loss"]
                                val_p_acc = val_metric_values["policy_acc"]

                                logging.debug(
                                    "The maximum number of spikes has been reached. Stop training."
                                )
                                # finally stop training because the number of lr drops has been achieved
                                print()
                                print("Elapsed time for training(hh:mm:ss): " +
                                      str(
                                          datetime.timedelta(
                                              seconds=round(time() - t_s))))

                                if self._log_metrics_to_tensorboard is True:
                                    self.sw.close()

                                return (k_steps, val_loss,
                                        val_p_acc), (k_steps_best,
                                                     val_loss_best,
                                                     val_p_acc_best)

                            logging.debug("Recover to latest checkpoint")
                            # ## Load the best model once again
                            model_path = "./weights/model-%.5f-%.3f-%04d.params" % (
                                val_loss_best,
                                val_p_acc_best,
                                k_steps_best,
                            )
                            logging.debug("load current best model:%s" %
                                          model_path)
                            self._net.load_parameters(model_path,
                                                      ctx=self._ctx)
                            k_steps = k_steps_best
                            logging.debug("k_step is back at %d", k_steps_best)

                            # print the elapsed time
                            t_delta = time() - t_s_steps
                            print(" - %.ds" % t_delta)
                            t_s_steps = time()

                        else:

                            # update the val_loss_value to compare with using spike recovery
                            old_val_loss = val_metric_values["loss"]

                            # log the metric values to tensorboard
                            self._log_metrics(train_metric_values,
                                              global_step=k_steps,
                                              prefix="train_")
                            self._log_metrics(val_metric_values,
                                              global_step=k_steps,
                                              prefix="val_")

                            if self._export_grad_histograms is True:
                                grads = []
                                # logging the gradients of parameters for checking convergence
                                for i_p, name in enumerate(self._param_names):
                                    if "bn" not in name and "batch" not in name:
                                        grads.append(self._params[name].grad())
                                        self.sw.add_histogram(
                                            tag=name,
                                            values=grads[-1],
                                            global_step=k_steps,
                                            bins=20)

                            # check if a new checkpoint shall be created
                            if val_loss_best is None or val_metric_values[
                                    "loss"] < val_loss_best:
                                # update val_loss_best
                                val_loss_best = val_metric_values["loss"]
                                val_p_acc_best = val_metric_values[
                                    "policy_acc"]
                                k_steps_best = k_steps

                                if self._export_weights is True:
                                    prefix = "./weights/model-%.5f-%.3f" % (
                                        val_loss_best, val_p_acc_best)
                                    # the export function saves both the architecture and the weights
                                    self._net.export(prefix,
                                                     epoch=k_steps_best)
                                    print()
                                    logging.info(
                                        "Saved checkpoint to %s-%04d.params" %
                                        (prefix, k_steps_best))

                                # reset the patience counter
                                patience_cnt = 0

                            # print the elapsed time
                            t_delta = time() - t_s_steps
                            print(" - %.ds" % t_delta)
                            t_s_steps = time()

                            # log the samples per second metric to tensorbaord
                            self.sw.add_scalar(
                                tag="samples_per_second",
                                value={
                                    "hybrid_sync":
                                    data.shape[0] * self._batch_steps / t_delta
                                },
                                global_step=k_steps,
                            )

                            # log the current learning rate
                            self.sw.add_scalar(tag="lr",
                                               value=self._lr_schedule(cur_it),
                                               global_step=k_steps)

                            # log the current momentum value
                            self.sw.add_scalar(
                                tag="momentum",
                                value=self._momentum_schedule(cur_it),
                                global_step=k_steps)

                            if cur_it >= self._total_it:

                                val_loss = val_metric_values["loss"]
                                val_p_acc = val_metric_values["policy_acc"]

                                logging.debug(
                                    "The number of given iterations has been reached"
                                )
                                # finally stop training because the number of lr drops has been achieved
                                print()
                                print("Elapsed time for training(hh:mm:ss): " +
                                      str(
                                          datetime.timedelta(
                                              seconds=round(time() - t_s))))

                                if self._log_metrics_to_tensorboard is True:
                                    self.sw.close()

                                return (k_steps, val_loss,
                                        val_p_acc), (k_steps_best,
                                                     val_loss_best,
                                                     val_p_acc_best)
                            """
Beispiel #15
0
def main():
    # config
    batch_size = 32
    logger = logging.getLogger('logger')
    logger.setLevel(logging.DEBUG)
    ctx = mx.cpu(0)
    calib_mode = 'entropy'
    excluded_sym_names = ['stem_conv0']
    num_calib_batches = 128
    quantized_dtype = 'int8'

    symbol_path = glob.glob(main_config["model_architecture_dir"] + "*")[0]
    params_path = glob.glob(main_config["model_weights_dir"] + "*")[0]
    print("symbol_path:", symbol_path)
    print("params_path:", params_path)

    epoch = int(params_path[-11:-7])
    print(epoch)

    # load calibration dataset
    _, x_train, yv_train, yp_train, plys_to_end, _ = load_pgn_dataset(
        normalize=True)
    calib_data = mx.io.NDArrayIter({'data': x_train}, {},
                                   batch_size,
                                   shuffle=True)

    # construct the model name based on the parameter file
    prefix = symbol_path.split("/")[-1].replace("-symbol.json", "")
    sym = mx.sym.load(symbol_path)
    sym = remove_labels(sym, main_config['value_output'] + '_output',
                        main_config['policy_output'] + '_output')

    # https://github.com/apache/incubator-mxnet/issues/6951
    save_dict = mx.nd.load(params_path)
    arg_params = {}
    aux_params = {}
    for key, val in save_dict.items():
        param_type, name = key.split(":", 1)
        if param_type == "arg":
            arg_params[name] = val
        if param_type == "aux":
            aux_params[name] = val

    # quantize model
    sym = sym.get_backend_symbol('MKLDNN_QUANTIZE')
    label_names = []
    qsym, qarg_params, aux_params = quantize_model(
        sym=sym,
        arg_params=arg_params,
        aux_params=aux_params,
        ctx=ctx,
        excluded_sym_names=excluded_sym_names,
        excluded_op_names=excluded_sym_names,
        calib_mode=calib_mode,
        calib_data=calib_data,
        num_calib_examples=num_calib_batches * batch_size,
        quantized_dtype=quantized_dtype,
        quantize_mode='smart',
        label_names=label_names,
        logger=logger)

    sym_name = '%s-symbol.json' % (prefix + '-int8')
    save_symbol(sym_name, qsym, logger)
    param_name = '%s-%04d.params' % (prefix + '-int8', epoch)
    save_params(param_name, qarg_params, aux_params, logger)