Beispiel #1
0
def test_sort_epochs_and_values(mode):
    reporter = Reporter()
    key1 = uuid.uuid4().hex
    stats_list = [{"aa": 0.3}, {"aa": 0.5}, {"aa": 0.2}]
    for e in range(len(stats_list)):
        reporter.set_epoch(e + 1)
        with reporter.observe(key1) as sub:
            sub.register(stats_list[e])
    if mode not in ("min", "max"):
        with pytest.raises(ValueError):
            reporter.sort_epochs_and_values(key1, "aa", mode)
        return
    else:
        sort_values = reporter.sort_epochs_and_values(key1, "aa", mode)

    if mode == "min":
        sign = 1
    else:
        sign = -1
    desired = sorted(
        [(e + 1, stats_list[e]["aa"]) for e in range(len(stats_list))],
        key=lambda x: sign * x[1],
    )

    for e in range(len(stats_list)):
        assert sort_values[e] == desired[e]
Beispiel #2
0
def test_sort_epochs_and_values_no_key():
    reporter = Reporter()
    key1 = uuid.uuid4().hex
    stats_list = [{"aa": 0.3}, {"aa": 0.5}, {"aa": 0.2}]
    for e in range(len(stats_list)):
        reporter.set_epoch(e + 1)
        with reporter.observe(key1) as sub:
            sub.register(stats_list[e])
    with pytest.raises(KeyError):
        reporter.sort_epochs_and_values("foo", "bar", "min")
def average_nbest_models(
    output_dir: Path,
    reporter: Reporter,
    best_model_criterion: Sequence[Sequence[str]],
    nbest: int,
) -> None:
    assert check_argument_types()
    # 1. Get nbests: List[Tuple[str, str, List[Tuple[epoch, value]]]]
    nbest_epochs = [(ph, k, reporter.sort_epochs_and_values(ph, k, m)[:nbest])
                    for ph, k, m in best_model_criterion
                    if reporter.has(ph, k)]

    _loaded = {}
    for ph, cr, epoch_and_values in nbest_epochs:
        # Note that len(epoch_and_values) doesn't always equal to nbest.
        op = output_dir / f"{ph}.{cr}.ave_{len(epoch_and_values)}best.pth"
        logging.info(f"Averaging {len(epoch_and_values)}best models: "
                     f'criterion="{ph}.{cr}": {op}')

        if len(epoch_and_values) == 0:
            continue
        elif len(epoch_and_values) == 1:
            # The averaged model is same as the best model
            e, _ = epoch_and_values[0]
            op = output_dir / f"{e}epoch.pth"
            for sym_op in [
                    output_dir / f"{ph}.{cr}.ave.pth",
                    output_dir /
                    f"{ph}.{cr}.ave_{len(epoch_and_values)}best.pth",
            ]:
                if sym_op.is_symlink() or sym_op.exists():
                    sym_op.unlink()
                sym_op.symlink_to(op.name)
        else:
            avg = None
            # 2.a Averaging model
            for e, _ in epoch_and_values:
                if e not in _loaded:
                    _loaded[e] = torch.load(
                        output_dir / f"{e}epoch.pth",
                        map_location="cpu",
                    )
                states = _loaded[e]

                if avg is None:
                    avg = states
                else:
                    # Accumulated
                    for k in avg:
                        avg[k] += states[k]
            for k in avg:
                avg[k] /= len(epoch_and_values)

            # 2.b Save the ave model and create a symlink
            torch.save(avg, op)
            sym_op = output_dir / f"{ph}.{cr}.ave.pth"
            if sym_op.is_symlink() or sym_op.exists():
                sym_op.unlink()
            sym_op.symlink_to(op.name)
def average_nbest_models(
    output_dir: Path,
    reporter: Reporter,
    best_model_criterion: Sequence[Sequence[str]],
    nbest: Union[Collection[int], int],
    suffix: Optional[str] = None,
) -> None:
    """Generate averaged model from n-best models

    Args:
        output_dir: The directory contains the model file for each epoch
        reporter: Reporter instance
        best_model_criterion: Give criterions to decide the best model.
            e.g. [("valid", "loss", "min"), ("train", "acc", "max")]
        nbest: Number of best model files to be averaged
        suffix: A suffix added to the averaged model file name
    """
    assert check_argument_types()
    if isinstance(nbest, int):
        nbests = [nbest]
    else:
        nbests = list(nbest)
    if len(nbests) == 0:
        warnings.warn("At least 1 nbest values are required")
        nbests = [1]
    if suffix is not None:
        suffix = suffix + "."
    else:
        suffix = ""

    # 1. Get nbests: List[Tuple[str, str, List[Tuple[epoch, value]]]]
    nbest_epochs = [(ph, k, reporter.sort_epochs_and_values(ph, k,
                                                            m)[:max(nbests)])
                    for ph, k, m in best_model_criterion
                    if reporter.has(ph, k)]

    _loaded = {}
    for ph, cr, epoch_and_values in nbest_epochs:
        _nbests = [i for i in nbests if i <= len(epoch_and_values)]
        if len(_nbests) == 0:
            _nbests = [1]

        for n in _nbests:
            if n == 0:
                continue
            elif n == 1:
                # The averaged model is same as the best model
                e, _ = epoch_and_values[0]
                op = output_dir / f"{e}epoch.pth"
                sym_op = output_dir / f"{ph}.{cr}.ave_1best.{suffix}pth"
                if sym_op.is_symlink() or sym_op.exists():
                    sym_op.unlink()
                sym_op.symlink_to(op.name)
            else:
                op = output_dir / f"{ph}.{cr}.ave_{n}best.{suffix}pth"
                logging.info(f"Averaging {n}best models: "
                             f'criterion="{ph}.{cr}": {op}')

                avg = None
                # 2.a. Averaging model
                for e, _ in epoch_and_values[:n]:
                    if e not in _loaded:
                        _loaded[e] = torch.load(
                            output_dir / f"{e}epoch.pth",
                            map_location="cpu",
                        )
                    states = _loaded[e]

                    if avg is None:
                        avg = states
                    else:
                        # Accumulated
                        for k in avg:
                            avg[k] = avg[k] + states[k]
                for k in avg:
                    if str(avg[k].dtype).startswith("torch.int"):
                        # For int type, not averaged, but only accumulated.
                        # e.g. BatchNorm.num_batches_tracked
                        # (If there are any cases that requires averaging
                        #  or the other reducing method, e.g. max/min, for integer type,
                        #  please report.)
                        pass
                    else:
                        avg[k] = avg[k] / n

                # 2.b. Save the ave model and create a symlink
                torch.save(avg, op)

        # 3. *.*.ave.pth is a symlink to the max ave model
        op = output_dir / f"{ph}.{cr}.ave_{max(_nbests)}best.{suffix}pth"
        sym_op = output_dir / f"{ph}.{cr}.ave.{suffix}pth"
        if sym_op.is_symlink() or sym_op.exists():
            sym_op.unlink()
        sym_op.symlink_to(op.name)
Beispiel #5
0
def average_nbest_models(
    output_dir: Path,
    reporter: Reporter,
    best_model_criterion: Sequence[Sequence[str]],
    nbest: int,
) -> None:
    """Generate averaged model from n-best models

    Args:
        output_dir: The directory contains the model file for each epoch
        reporter: Reporter instance
        best_model_criterion: Give criterions to decide the best model.
            e.g. [("valid", "loss", "min"), ("train", "acc", "max")]
        nbest:
    """
    assert check_argument_types()
    # 1. Get nbests: List[Tuple[str, str, List[Tuple[epoch, value]]]]
    nbest_epochs = [
        (ph, k, reporter.sort_epochs_and_values(ph, k, m)[:nbest])
        for ph, k, m in best_model_criterion
        if reporter.has(ph, k)
    ]

    _loaded = {}
    for ph, cr, epoch_and_values in nbest_epochs:
        # Note that len(epoch_and_values) doesn't always equal to nbest.
        op = output_dir / f"{ph}.{cr}.ave_{len(epoch_and_values)}best.pth"
        logging.info(
            f"Averaging {len(epoch_and_values)}best models: "
            f'criterion="{ph}.{cr}": {op}'
        )

        if len(epoch_and_values) == 0:
            continue
        elif len(epoch_and_values) == 1:
            # The averaged model is same as the best model
            e, _ = epoch_and_values[0]
            op = output_dir / f"{e}epoch.pth"
            for sym_op in [
                output_dir / f"{ph}.{cr}.ave.pth",
                output_dir / f"{ph}.{cr}.ave_{len(epoch_and_values)}best.pth",
            ]:
                if sym_op.is_symlink() or sym_op.exists():
                    sym_op.unlink()
                sym_op.symlink_to(op.name)
        else:
            avg = None
            # 2.a Averaging model
            for e, _ in epoch_and_values:
                if e not in _loaded:
                    _loaded[e] = torch.load(
                        output_dir / f"{e}epoch.pth", map_location="cpu",
                    )
                states = _loaded[e]

                if avg is None:
                    avg = states
                else:
                    # Accumulated
                    for k in avg:
                        avg[k] += states[k]
            for k in avg:
                if str(avg[k].dtype).startswith("torch.int"):
                    # For int type, not averaged, but only accumulated.
                    # e.g. BatchNorm.num_batches_tracked
                    # (If there are any cases that requires averaging
                    #  or the other reducing method, e.g. max/min, for integer type,
                    #  please report.)
                    pass
                else:
                    avg[k] /= len(epoch_and_values)

            # 2.b Save the ave model and create a symlink
            torch.save(avg, op)
            sym_op = output_dir / f"{ph}.{cr}.ave.pth"
            if sym_op.is_symlink() or sym_op.exists():
                sym_op.unlink()
            sym_op.symlink_to(op.name)