Beispiel #1
0
def test_faster_rcnn_train_all(mock_loss, mock_train_one_epoch, config,
                               dataset):
    """test train on all epochs."""
    loss_val = 0.1
    mock_loss.return_value = loss_val
    log_dir = os.path.join(tmp_name, "train")
    if not os.path.exists(log_dir):
        os.mkdir(log_dir)
    writer = MagicMock()

    # XXX This is just a hot fix to prevent a mysterious folder such as:
    # <MagicMock name='mock.logdir' id='140420520377936'> showed up after
    # running this test.
    writer.logdir = tmp_name

    kfp_writer = MagicMock()

    checkpointer = EstimatorCheckpoint(
        estimator_name=config.estimator,
        checkpoint_dir=log_dir,
        distributed=False,
    )

    estimator = FasterRCNN(
        config=config,
        writer=writer,
        checkpointer=checkpointer,
        kfp_writer=kfp_writer,
        logdir="/tmp",
    )
    estimator.writer = writer
    estimator.kfp_writer = kfp_writer
    estimator.checkpointer = checkpointer

    estimator.device = torch.device("cpu")
    checkpointer.save = MagicMock()
    train_dataset = dataset
    val_dataset = dataset
    label_mappings = train_dataset.label_mappings
    is_distributed = False
    train_sampler = FasterRCNN.create_sampler(is_distributed=is_distributed,
                                              dataset=train_dataset,
                                              is_train=True)
    val_sampler = FasterRCNN.create_sampler(is_distributed=is_distributed,
                                            dataset=val_dataset,
                                            is_train=False)

    train_loader = dataloader_creator(config, train_dataset, train_sampler,
                                      TRAIN, is_distributed)
    val_loader = dataloader_creator(config, val_dataset, val_sampler, VAL,
                                    is_distributed)
    epoch = 0
    estimator.train_loop(
        train_dataloader=train_loader,
        label_mappings=label_mappings,
        val_dataloader=val_loader,
        train_sampler=train_sampler,
    )
    writer.add_scalar.assert_called_with("val/loss", loss_val, epoch)
    mock_train_one_epoch.assert_called_once()
Beispiel #2
0
def test_faster_rcnn_save(config):
    """test save model."""

    log_dir = tmp_name + "/train/"
    kfp_writer = MagicMock()
    writer = MagicMock()

    # XXX This is just a hot fix to prevent a mysterious folder such as:
    # <MagicMock name='mock.logdir' id='140420520377936'> showed up after
    # running this test.
    writer.logdir = tmp_name

    checkpointer = EstimatorCheckpoint(
        estimator_name=config.estimator,
        checkpoint_dir=log_dir,
        distributed=False,
    )
    estimator = FasterRCNN(
        config=config,
        writer=writer,
        checkpointer=checkpointer,
        kfp_writer=kfp_writer,
        logdir="/tmp",
    )
    estimator.writer = writer
    estimator.kfp_writer = kfp_writer
    estimator.checkpointer = checkpointer
    estimator.device = torch.device("cpu")
    estimator.save(log_dir + "FasterRCNN.estimator")

    assert any([
        name.startswith("FasterRCNN.estimator") for name in os.listdir(log_dir)
    ])
Beispiel #3
0
def test_faster_rcnn_load(config):
    """test load model."""

    ckpt_dir = tmp_name + "/train/FasterRCNN.estimator"
    config.checkpoint_file = ckpt_dir
    log_dir = tmp_name + "/load/"
    config.logdir = log_dir
    kfp_writer = MagicMock()
    writer = SummaryWriter(config.logdir, write_to_disk=True)
    checkpointer = EstimatorCheckpoint(
        estimator_name=config.estimator,
        checkpoint_dir=log_dir,
        distributed=False,
    )
    estimator = FasterRCNN(
        config=config,
        writer=writer,
        checkpointer=checkpointer,
        kfp_writer=kfp_writer,
        logdir="/tmp",
    )
    estimator.writer = writer
    estimator.kfp_writer = kfp_writer
    estimator.checkpointer = checkpointer

    estimator.device = torch.device("cpu")
    estimator.load(ckpt_dir)
    assert os.listdir(log_dir)[0].startswith("events.out.tfevents")
def test_faster_rcnn_predict(mock_create, config, dataset):
    """test predict."""
    mock_create.return_value = dataset
    ckpt_dir = tmp_name + "/train/FasterRCNN.estimator"

    config.checkpoint_file = ckpt_dir
    kfp_writer = MagicMock()
    writer = MagicMock()
    checkpointer = EstimatorCheckpoint(
        estimator_name=config.estimator,
        log_dir=config.system.logdir,
        distributed=config.system["distributed"],
    )
    estimator = FasterRCNN(
        config=config,
        writer=writer,
        device=torch.device("cpu"),
        checkpointer=checkpointer,
        kfp_writer=kfp_writer,
    )
    image_size = (256, 256)
    image = Image.fromarray(np.random.random(image_size), "L")
    image = torchvision.transforms.functional.to_tensor(image)
    result = estimator.predict(image)
    assert result == []
def test_create_writer_when_checkpoint_dir_gcs():
    mock_gcs_writer = Mock()
    with patch(
        "datasetinsights.io.checkpoint.GCSEstimatorWriter",
        MagicMock(return_value=mock_gcs_writer),
    ):
        writer = EstimatorCheckpoint._create_writer("gs://abucket/path", "def")

        assert writer == mock_gcs_writer
Beispiel #6
0
def test_create_writer():
    mock_local_writer = Mock()
    with patch(
            "datasetinsights.io.checkpoint.LocalEstimatorWriter",
            MagicMock(return_value=mock_local_writer),
    ):
        writer = EstimatorCheckpoint._create_writer("/path/to/folder", "abc")

        assert writer == mock_local_writer

    mock_gcs_writer = Mock()
    with patch(
            "datasetinsights.io.checkpoint.GCSEstimatorWriter",
            MagicMock(return_value=mock_gcs_writer),
    ):
        writer = EstimatorCheckpoint._create_writer("gs://abucket/path", "def")

        assert writer == mock_gcs_writer
def test_create_writer_when_checkpoint_dir_local():
    mock_local_writer = Mock()
    with patch(
        "datasetinsights.io.checkpoint.LocalEstimatorWriter",
        MagicMock(return_value=mock_local_writer),
    ):
        with tempfile.TemporaryDirectory() as tmp:
            writer = EstimatorCheckpoint._create_writer(tmp, "abc")

            assert writer == mock_local_writer
def test_create_writer_when_checkpoint_dir_none():
    mock_local_writer = Mock()
    with patch(
        "datasetinsights.io.checkpoint.LocalEstimatorWriter",
        MagicMock(return_value=mock_local_writer),
    ):
        writer = EstimatorCheckpoint._create_writer(
            checkpoint_dir=None, estimator_name="abc"
        )

        assert writer == mock_local_writer
def test_faster_rcnn_train_all(
    mock_create, mock_loss, mock_train_one_epoch, config, dataset
):
    """test train on all epochs."""
    loss_val = 0.1
    mock_create.return_value = dataset
    mock_loss.return_value = loss_val
    log_dir = tmp_name + "/train/"
    config.system.logdir = log_dir
    writer = MagicMock()
    kfp_writer = MagicMock()

    checkpointer = EstimatorCheckpoint(
        estimator_name=config.estimator,
        log_dir=log_dir,
        distributed=config.system["distributed"],
    )

    estimator = FasterRCNN(
        config=config,
        writer=writer,
        device=torch.device("cpu"),
        checkpointer=checkpointer,
        kfp_writer=kfp_writer,
    )
    train_dataset = create_dataset(config, TRAIN)
    val_dataset = create_dataset(config, VAL)
    label_mappings = train_dataset.label_mappings
    is_distributed = config.system.distributed
    train_sampler = FasterRCNN.create_sampler(
        is_distributed=is_distributed, dataset=train_dataset, is_train=True
    )
    val_sampler = FasterRCNN.create_sampler(
        is_distributed=is_distributed, dataset=val_dataset, is_train=False
    )

    train_loader = dataloader_creator(
        config, train_dataset, train_sampler, TRAIN
    )
    val_loader = dataloader_creator(config, val_dataset, val_sampler, VAL)
    epoch = 0
    estimator.train_loop(
        train_dataloader=train_loader,
        label_mappings=label_mappings,
        val_dataloader=val_loader,
        train_sampler=train_sampler,
    )
    writer.add_scalar.assert_called_with("val/loss", loss_val, epoch)
    mock_train_one_epoch.assert_called_once()
Beispiel #10
0
def create_estimator(
    name,
    config,
    *,
    tb_log_dir=None,
    no_cuda=None,
    checkpoint_dir=None,
    kfp_metrics_dir=const.DEFAULT_KFP_METRICS_DIR,
    kfp_metrics_filename=const.DEFAULT_KFP_METRICS_FILENAME,
    no_val=None,
    **kwargs,
):
    """Create a new instance of the estimators subclass

    Args:
        name (str): unique identifier for a estimators subclass
        config (dict): parameters specific to each estimators subclass
            used to create a estimators instance

    Returns:
        an instance of the specified estimators subclass
    """

    estimators_cls = _find_estimator(name)

    # todo this makes it so that we lose the tensorboard
    #  writer of non-master processes which could make debugging harder

    writer = SummaryWriter(tb_log_dir)
    kfp_writer = KubeflowPipelineWriter(
        filename=kfp_metrics_filename, filepath=kfp_metrics_dir,
    )
    checkpointer = EstimatorCheckpoint(
        estimator_name=name, checkpoint_dir=checkpoint_dir, distributed=False,
    )

    return estimators_cls(
        config=config,
        writer=writer,
        kfp_writer=kfp_writer,
        checkpointer=checkpointer,
        logdir=tb_log_dir,
        no_cuda=no_cuda,
        no_val=no_val,
        kfp_metrics_dir=kfp_metrics_dir,
        kfp_metrics_filename=kfp_metrics_filename,
        **kwargs,
    )
Beispiel #11
0
def test_get_loader_from_path():
    loader = EstimatorCheckpoint._get_loader_from_path("gs://some/path")
    assert loader == load_from_gcs

    loader = EstimatorCheckpoint._get_loader_from_path("http://some/path")
    assert loader == load_from_http

    loader = EstimatorCheckpoint._get_loader_from_path("https://some/path")
    assert loader == load_from_http

    loader = EstimatorCheckpoint._get_loader_from_path("/path/to/folder")
    assert loader == load_local

    with pytest.raises(ValueError, match=r"Given path:"):
        EstimatorCheckpoint._get_loader_from_path("dfdge")
def test_faster_rcnn_load(mock_create, config, dataset):
    """test load model."""
    mock_create.return_value = dataset
    ckpt_dir = tmp_name + "/train/FasterRCNN.estimator"
    config.checkpoint_file = ckpt_dir
    log_dir = tmp_name + "/load/"
    config.system.logdir = log_dir
    kfp_writer = MagicMock()
    writer = SummaryWriter(config.system.logdir, write_to_disk=True)
    checkpointer = EstimatorCheckpoint(
        estimator_name=config.estimator,
        log_dir=log_dir,
        distributed=config.system["distributed"],
    )
    estimator = FasterRCNN(
        config=config,
        writer=writer,
        device=torch.device("cpu"),
        checkpointer=checkpointer,
        kfp_writer=kfp_writer,
    )
    estimator.load(ckpt_dir)
    assert os.listdir(log_dir)[0].startswith("events.out.tfevents")
Beispiel #13
0
def test_faster_rcnn_predict(config, dataset):
    """test predict."""

    checkpoint_file = tmp_name + "/train/FasterRCNN.estimator"
    kfp_writer = MagicMock()
    writer = MagicMock()

    # XXX This is just a hot fix to prevent a mysterious folder such as:
    # <MagicMock name='mock.logdir' id='140420520377936'> showed up after
    # running this test.
    writer.logdir = tmp_name

    checkpointer = EstimatorCheckpoint(
        estimator_name=config.estimator,
        checkpoint_dir="/tmp",
        distributed=False,
    )
    estimator = FasterRCNN(
        config=config,
        writer=writer,
        checkpointer=checkpointer,
        kfp_writer=kfp_writer,
        checkpoint_file=checkpoint_file,
        logdir="/tmp",
        no_cuda=True,
    )
    estimator.writer = writer
    estimator.kfp_writer = kfp_writer
    estimator.checkpointer = checkpointer

    estimator.device = torch.device("cpu")
    image_size = (256, 256)
    image = Image.fromarray(np.random.random(image_size), "L")

    result = estimator.predict(image)
    assert result == []
def test_faster_rcnn_save(mock_create, config, dataset):
    """test save model."""
    mock_create.return_value = dataset
    log_dir = tmp_name + "/test_save/"
    config.system.logdir = log_dir
    kfp_writer = MagicMock()
    writer = MagicMock()
    checkpointer = EstimatorCheckpoint(
        estimator_name=config.estimator,
        log_dir=log_dir,
        distributed=config.system["distributed"],
    )
    estimator = FasterRCNN(
        config=config,
        writer=writer,
        device=torch.device("cpu"),
        checkpointer=checkpointer,
        kfp_writer=kfp_writer,
    )
    estimator.save(log_dir + "FasterRCNN_test")

    assert any(
        [name.startswith("FasterRCNN_test") for name in os.listdir(log_dir)]
    )
def test_get_gcs_loader_from_path():
    loader = EstimatorCheckpoint._get_loader_from_path("gs://some/path")
    assert loader == load_from_gcs
def test_get_loader_raises_error():
    filepath = "some/wrong/path"
    with pytest.raises(ValueError, match=r"Given path:"):
        EstimatorCheckpoint._get_loader_from_path(filepath)
def test_create_raises_value_error():
    incorrect_checkpoint_dir = "http://some/path"

    with pytest.raises(ValueError):
        EstimatorCheckpoint._create_writer(incorrect_checkpoint_dir, "abc")
def test_get_http_loader_from_path(filepath):
    loader = EstimatorCheckpoint._get_loader_from_path(filepath)
    assert loader == load_from_http
def test_get_local_loader_from_path():
    file_name = "FasterRCNN.estimator"
    with tempfile.TemporaryDirectory() as tmp:
        with open(os.path.join(tmp, file_name), "w") as f:
            loader = EstimatorCheckpoint._get_loader_from_path(f.name)
            assert loader == load_local
Beispiel #20
0
def run(command, cfg):
    if cfg.system.verbose:
        root_logger = logging.getLogger()
        root_logger.setLevel(logging.DEBUG)

    logger.info("Run command: %s with config: %s\n", command, cfg)

    if torch.cuda.is_available() and not cfg.system.no_cuda:
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")
    logdir = cfg.system.logdir
    if logdir == const.NULL_STRING:
        # Use logdir=None to force using SummaryWriter default logdir,
        # which points to ./runs/<model>_<timestamp>
        logdir = None

    # todo this makes it so that we lose the tensorboard writer of non-master
    # processes which could make debugging harder
    writer = SummaryWriter(logdir, write_to_disk=is_master())
    kfp_writer = KubeflowPipelineWriter(filename=cfg.system.metricsfilename,
                                        filepath=cfg.system.metricsdir)
    checkpointer = EstimatorCheckpoint(
        estimator_name=cfg.estimator,
        log_dir=writer.logdir,
        distributed=cfg.system.distributed,
    )
    estimator = Estimator.create(
        cfg.estimator,
        config=cfg,
        writer=writer,
        kfp_writer=kfp_writer,
        device=device,
        checkpointer=checkpointer,
        gpu=args.gpu,
        rank=args.rank,
    )

    if command == "train":
        estimator.train()
    elif command == "evaluate":
        estimator.evaluate()
    elif command == "download-train":
        # TODO (YC)
        # We should remove reference to auth-token in various places to
        # enable download synthetic dataset. Usim is working on a solution
        # that will enable customers to sprcify cloud storage path
        # to store simulations. In the future, we should simply rely
        # on gcs service accounts to access simulation data for a given
        # run execution id.
        Dataset.create(
            cfg.train.dataset.name,
            data_root=cfg.system.data_root,
            auth_token=cfg.system.auth_token,  # XXX(YC) This should be removed
            **cfg.train.dataset.args,
        )
        Dataset.create(
            cfg.val.dataset.name,
            data_root=cfg.system.data_root,
            auth_token=cfg.system.auth_token,  # XXX(YC) This should be removed
            **cfg.val.dataset.args,
        )
    elif command == "download-evaluate":
        Dataset.create(
            cfg.test.dataset.name,
            data_root=cfg.system.data_root,
            auth_token=cfg.system.auth_token,  # XXX(YC) This should be removed
            **cfg.test.dataset.args,
        )

    writer.close()
    kfp_writer.write_metric()