Example #1
0
        def _pipelines(model_class):
            config = {}
            data_format = predict_args = predict_kwargs = None
            if issubclass(model_class, TFModel):
                data_format = 'channels_last'
                config.update({'dtype': None})
                predict_args = ()
                predict_kwargs = dict(images=B('images'))
            elif issubclass(model_class, TorchModel):
                data_format = 'channels_first'
                config.update({'dtype': 'float32'})
                predict_args = (B('images'),)
                predict_kwargs = dict()

            dataset, model_config = model_setup_images_clf(data_format)
            config.update({'model_class': model_class, 'model_config': model_config})

            save_pipeline = (Pipeline()
                             .init_variable('predictions', default=[])
                             .init_model('dynamic', C('model_class'), 'model', C('model_config'))
                             .to_array(dtype=C('dtype'))
                             .predict_model('model', *predict_args,
                                            fetches='predictions', save_to=V('predictions', mode='a'),
                                            **predict_kwargs))
            load_pipeline = (Pipeline()
                             .init_variable('predictions', default=[])
                             .to_array(dtype=C('dtype'))
                             .predict_model('model', *predict_args,
                                            fetches='predictions', save_to=V('predictions', mode='a'),
                                            **predict_kwargs))

            save_pipeline = (save_pipeline << dataset) << config
            load_pipeline = (load_pipeline << dataset) << config
            return save_pipeline, load_pipeline
Example #2
0
    def test_run(self, save_path, pipelines, model_class):
        """
        Check model loading and saving during pipeline iterations

        A model is initialised in save_pipeline, then for each batch:
            predictions are obtained and saved;
            current model state is saved;
            the model is trained.
        After that in load_pipeline for each batch in same dataset:
            the model from corresponding iteration is loaded;
            predictions are obtained and saved.

        Predictions from save_pipeline and from load_pipeline should be equal
        """
        save_pipeline, load_pipeline = pipelines(model_class)

        train_args, train_kwargs = self.train_args(model_class)

        save_tmpl = (Pipeline()
                     .save_model('model', path=save_path + I("current").str())
                     .train_model('model', *train_args, **train_kwargs))

        save_pipeline = save_pipeline + save_tmpl
        save_pipeline.run(BATCH_SIZE, n_epochs=1, bar=True)
        saved_predictions = save_pipeline.get_variable('predictions')

        load_tmpl = (Pipeline()
                     .load_model('dynamic', C('model_class'), 'model', path=save_path + I("current").str()))

        load_pipeline = load_tmpl + load_pipeline
        load_pipeline.run(BATCH_SIZE, n_epochs=1, bar=True)
        loaded_predictions = load_pipeline.get_variable('predictions')

        assert (np.concatenate(saved_predictions) == np.concatenate(loaded_predictions)).all()
Example #3
0
def test_d(size, n_splits):
    """Test checks for behaviour of D expression in `set_dataset` action.

    size
        size of the dataset.
    n_splits
        the number if cv folds.
    """
    dataset = Dataset(size)
    dataset.cv_split(n_splits=n_splits)

    pipeline = (Pipeline().init_variable('indices', default=[]).update(
        V('indices', mode='a'),
        B('indices')[0])) << dataset.CV(C('fold')).train

    result = list(range(size))

    for fold in range(n_splits):
        pipeline.set_config({'fold': fold})
        start = fold * (size // n_splits)
        end = (fold + 1) * (size // n_splits)

        for _ in range(2):
            pipeline.reset('vars')
            pipeline.run(1)

            assert pipeline.v('indices') == result[:start] + result[end:]
Example #4
0
def pipeline():
    """ Creates a pipeline configured to use a given model with a specified configuration.

    Notes
    -----
    Pipeline can be executed only if its config contains the following parameters:

    model_class : TFModel
        Architecture of model. List of available models is defined at 'AVAILABLE_MODELS'.

    model_config : Config
       Model parameters.

    Returns
    -------
    Pipeline
        A pipeline that contains model initialization and training with a given config.
    """

    test_pipeline = (Pipeline().init_variable('current_loss').init_model(
        'dynamic', C('model_class'), 'model',
        C('model_config')).to_array().train_model('model',
                                                  fetches='loss',
                                                  images=B('images'),
                                                  labels=B('labels'),
                                                  save_to=V('current_loss')))
    return test_pipeline
Example #5
0
def model_pipeline():
    """ Creates instance of Pipeline that is configured to use given model
    with passed parameters.

    Parameters
    ----------

    model_class : subclass of TFModel
        Architecture of model. List of available models is defined at 'AVAILABLE_MODELS'.

    current_config : dict
        Dictionary with parameters of model.

    Returns
    -------
    Pipeline
        Test pipeline that consists of initialization of model and
        preparing for training with given config.
    """

    test_pipeline = (Pipeline().init_variable('current_loss').init_model(
        'dynamic', C('model_class'), 'TestModel',
        C('model_config')).to_array().train_model('TestModel',
                                                  fetches='loss',
                                                  images=B('images'),
                                                  labels=B('labels'),
                                                  save_to=V('current_loss')))
    return test_pipeline
Example #6
0
    def test_save_to_c(self):
        pipeline = Pipeline(config=Config(some=100))

        save_data_to(data=200, dst=C('value'), pipeline=pipeline)

        assert pipeline.config['some'] == 100
        assert pipeline.config['value'] == 200
Example #7
0
    def test_save_to_list(self):
        arr = np.zeros(3)
        pipeline = Pipeline(config=Config(some=100))

        save_data_to(data=[[1, 2, 3], 200],
                     dst=[arr, C('value')],
                     pipeline=pipeline)

        assert (arr == [1, 2, 3]).all()
        assert pipeline.config['value'] == 200
def get_batch(data,
              pipeline,
              index=DATASET_SIZE,
              batch_class=Batch,
              skip=2,
              dst=False):
    """
    Parameters
    ----------
    data
        data to use
    pipeline : bool or Pipeline
        whether to get a batch from a dataset or a pipeline

    index : DatasetIndex

    batch_class : type

    skip : int
        how many batches to skip

    dst : bool or list of str
        preload data when False or load to components given
    """

    if dst is False:
        dataset = Dataset(index, preloaded=data, batch_class=batch_class)
    else:
        dataset = Dataset(index, batch_class=batch_class)

    template_pipeline = (
        Pipeline().init_variable('dummy').update(
            V('dummy'), B.data)  # touch batch data to fire preloading
    )

    if isinstance(pipeline, Pipeline):
        template_pipeline = pipeline + template_pipeline

    source = (
        dataset >> template_pipeline) if pipeline is not False else dataset

    #skip K batches
    for _ in range(skip + 1):
        batch = source.next_batch(10)

    if dst is not False:
        batch = batch.load(src=data, dst=dst)

    return batch
    def test_none(self, dst):
        labels = np.arange(DATASET_SIZE)
        images = np.ones((DATASET_SIZE, ) + IMAGE_SHAPE) * labels.reshape(
            -1, 1, 1)
        data = dict(images=images, labels=labels + 1000)

        pipeline = Pipeline().add_components('new')
        batch = get_batch(data,
                          pipeline,
                          batch_class=MyBatch4,
                          skip=2,
                          dst=dst)

        assert (batch.images[:, 0, 0] == np.arange(20, 30)).all()
        assert (batch.labels == np.arange(1020, 1030)).all()
        assert batch.nodata1 is None
        assert batch.nodata2 is None
        assert batch.new is None
Example #10
0
def test_rebatch(batch_size, rebatch_size):
    """ checks that rebatch produces batches of expected lengths (and doesn't crash)"""
    data = np.vstack([np.array([i, i]) for i in range(DATASET_SIZE)])
    data = (data, )
    dataset = Dataset(index=DATASET_SIZE, batch_class=MyBatch, preloaded=data)

    # workaround for pipeline variables getting lost after rebatch
    batch_lengths = {'before': [], 'after': []}

    def get_batch_len(batch, dump):
        batch_lengths[dump].append(batch.size)

    p = (Pipeline().call(get_batch_len, 'before').rebatch(rebatch_size).call(
        get_batch_len, 'after')) << dataset

    p.run(batch_size=batch_size, n_epochs=1, bar=True)

    check_batch_lengths(batch_lengths['before'], batch_size)
    check_batch_lengths(batch_lengths['after'], rebatch_size)
Example #11
0
    'body/block/layout': 'cna',
    'device': C('device')  # it's technical parameter for TFModel
}

mnist = MNIST()
train_root = mnist.train.p.run(BATCH_SIZE,
                               shuffle=True,
                               n_epochs=None,
                               lazy=True)
test_root = mnist.test.p.run(BATCH_SIZE, shuffle=True, n_epochs=1, lazy=True)

train_template = (Pipeline().init_variable(
    'loss', init_on_each_run=list).init_variable(
        'accuracy', init_on_each_run=list).init_model(
            'dynamic', VGG16, 'conv',
            config=model_config).to_array().train_model('conv',
                                                        images=B('images'),
                                                        labels=B('labels'),
                                                        fetches='loss',
                                                        save_to=V('loss',
                                                                  mode='w')))

test_template = (Pipeline().init_variable('predictions').init_variable(
    'metrics', init_on_each_run=None).import_model(
        'conv', C('import_from')).to_array().predict_model(
            'conv',
            images=B('images'),
            fetches='predictions',
            save_to=V('predictions')).gather_metrics(
                'class',
                targets=B('labels'),
                predictions=V('predictions'),