def _pipelines(model_class): config = {} data_format = predict_args = predict_kwargs = None if issubclass(model_class, TFModel): data_format = 'channels_last' config.update({'dtype': None}) predict_args = () predict_kwargs = dict(images=B('images')) elif issubclass(model_class, TorchModel): data_format = 'channels_first' config.update({'dtype': 'float32'}) predict_args = (B('images'),) predict_kwargs = dict() dataset, model_config = model_setup_images_clf(data_format) config.update({'model_class': model_class, 'model_config': model_config}) save_pipeline = (Pipeline() .init_variable('predictions', default=[]) .init_model('dynamic', C('model_class'), 'model', C('model_config')) .to_array(dtype=C('dtype')) .predict_model('model', *predict_args, fetches='predictions', save_to=V('predictions', mode='a'), **predict_kwargs)) load_pipeline = (Pipeline() .init_variable('predictions', default=[]) .to_array(dtype=C('dtype')) .predict_model('model', *predict_args, fetches='predictions', save_to=V('predictions', mode='a'), **predict_kwargs)) save_pipeline = (save_pipeline << dataset) << config load_pipeline = (load_pipeline << dataset) << config return save_pipeline, load_pipeline
def test_run(self, save_path, pipelines, model_class): """ Check model loading and saving during pipeline iterations A model is initialised in save_pipeline, then for each batch: predictions are obtained and saved; current model state is saved; the model is trained. After that in load_pipeline for each batch in same dataset: the model from corresponding iteration is loaded; predictions are obtained and saved. Predictions from save_pipeline and from load_pipeline should be equal """ save_pipeline, load_pipeline = pipelines(model_class) train_args, train_kwargs = self.train_args(model_class) save_tmpl = (Pipeline() .save_model('model', path=save_path + I("current").str()) .train_model('model', *train_args, **train_kwargs)) save_pipeline = save_pipeline + save_tmpl save_pipeline.run(BATCH_SIZE, n_epochs=1, bar=True) saved_predictions = save_pipeline.get_variable('predictions') load_tmpl = (Pipeline() .load_model('dynamic', C('model_class'), 'model', path=save_path + I("current").str())) load_pipeline = load_tmpl + load_pipeline load_pipeline.run(BATCH_SIZE, n_epochs=1, bar=True) loaded_predictions = load_pipeline.get_variable('predictions') assert (np.concatenate(saved_predictions) == np.concatenate(loaded_predictions)).all()
def test_d(size, n_splits): """Test checks for behaviour of D expression in `set_dataset` action. size size of the dataset. n_splits the number if cv folds. """ dataset = Dataset(size) dataset.cv_split(n_splits=n_splits) pipeline = (Pipeline().init_variable('indices', default=[]).update( V('indices', mode='a'), B('indices')[0])) << dataset.CV(C('fold')).train result = list(range(size)) for fold in range(n_splits): pipeline.set_config({'fold': fold}) start = fold * (size // n_splits) end = (fold + 1) * (size // n_splits) for _ in range(2): pipeline.reset('vars') pipeline.run(1) assert pipeline.v('indices') == result[:start] + result[end:]
def pipeline(): """ Creates a pipeline configured to use a given model with a specified configuration. Notes ----- Pipeline can be executed only if its config contains the following parameters: model_class : TFModel Architecture of model. List of available models is defined at 'AVAILABLE_MODELS'. model_config : Config Model parameters. Returns ------- Pipeline A pipeline that contains model initialization and training with a given config. """ test_pipeline = (Pipeline().init_variable('current_loss').init_model( 'dynamic', C('model_class'), 'model', C('model_config')).to_array().train_model('model', fetches='loss', images=B('images'), labels=B('labels'), save_to=V('current_loss'))) return test_pipeline
def model_pipeline(): """ Creates instance of Pipeline that is configured to use given model with passed parameters. Parameters ---------- model_class : subclass of TFModel Architecture of model. List of available models is defined at 'AVAILABLE_MODELS'. current_config : dict Dictionary with parameters of model. Returns ------- Pipeline Test pipeline that consists of initialization of model and preparing for training with given config. """ test_pipeline = (Pipeline().init_variable('current_loss').init_model( 'dynamic', C('model_class'), 'TestModel', C('model_config')).to_array().train_model('TestModel', fetches='loss', images=B('images'), labels=B('labels'), save_to=V('current_loss'))) return test_pipeline
def test_save_to_c(self): pipeline = Pipeline(config=Config(some=100)) save_data_to(data=200, dst=C('value'), pipeline=pipeline) assert pipeline.config['some'] == 100 assert pipeline.config['value'] == 200
def test_save_to_list(self): arr = np.zeros(3) pipeline = Pipeline(config=Config(some=100)) save_data_to(data=[[1, 2, 3], 200], dst=[arr, C('value')], pipeline=pipeline) assert (arr == [1, 2, 3]).all() assert pipeline.config['value'] == 200
def get_batch(data, pipeline, index=DATASET_SIZE, batch_class=Batch, skip=2, dst=False): """ Parameters ---------- data data to use pipeline : bool or Pipeline whether to get a batch from a dataset or a pipeline index : DatasetIndex batch_class : type skip : int how many batches to skip dst : bool or list of str preload data when False or load to components given """ if dst is False: dataset = Dataset(index, preloaded=data, batch_class=batch_class) else: dataset = Dataset(index, batch_class=batch_class) template_pipeline = ( Pipeline().init_variable('dummy').update( V('dummy'), B.data) # touch batch data to fire preloading ) if isinstance(pipeline, Pipeline): template_pipeline = pipeline + template_pipeline source = ( dataset >> template_pipeline) if pipeline is not False else dataset #skip K batches for _ in range(skip + 1): batch = source.next_batch(10) if dst is not False: batch = batch.load(src=data, dst=dst) return batch
def test_none(self, dst): labels = np.arange(DATASET_SIZE) images = np.ones((DATASET_SIZE, ) + IMAGE_SHAPE) * labels.reshape( -1, 1, 1) data = dict(images=images, labels=labels + 1000) pipeline = Pipeline().add_components('new') batch = get_batch(data, pipeline, batch_class=MyBatch4, skip=2, dst=dst) assert (batch.images[:, 0, 0] == np.arange(20, 30)).all() assert (batch.labels == np.arange(1020, 1030)).all() assert batch.nodata1 is None assert batch.nodata2 is None assert batch.new is None
def test_rebatch(batch_size, rebatch_size): """ checks that rebatch produces batches of expected lengths (and doesn't crash)""" data = np.vstack([np.array([i, i]) for i in range(DATASET_SIZE)]) data = (data, ) dataset = Dataset(index=DATASET_SIZE, batch_class=MyBatch, preloaded=data) # workaround for pipeline variables getting lost after rebatch batch_lengths = {'before': [], 'after': []} def get_batch_len(batch, dump): batch_lengths[dump].append(batch.size) p = (Pipeline().call(get_batch_len, 'before').rebatch(rebatch_size).call( get_batch_len, 'after')) << dataset p.run(batch_size=batch_size, n_epochs=1, bar=True) check_batch_lengths(batch_lengths['before'], batch_size) check_batch_lengths(batch_lengths['after'], rebatch_size)
'body/block/layout': 'cna', 'device': C('device') # it's technical parameter for TFModel } mnist = MNIST() train_root = mnist.train.p.run(BATCH_SIZE, shuffle=True, n_epochs=None, lazy=True) test_root = mnist.test.p.run(BATCH_SIZE, shuffle=True, n_epochs=1, lazy=True) train_template = (Pipeline().init_variable( 'loss', init_on_each_run=list).init_variable( 'accuracy', init_on_each_run=list).init_model( 'dynamic', VGG16, 'conv', config=model_config).to_array().train_model('conv', images=B('images'), labels=B('labels'), fetches='loss', save_to=V('loss', mode='w'))) test_template = (Pipeline().init_variable('predictions').init_variable( 'metrics', init_on_each_run=None).import_model( 'conv', C('import_from')).to_array().predict_model( 'conv', images=B('images'), fetches='predictions', save_to=V('predictions')).gather_metrics( 'class', targets=B('labels'), predictions=V('predictions'),