def _pipelines(model_class): config = {} data_format = predict_args = predict_kwargs = None if issubclass(model_class, TFModel): data_format = 'channels_last' config.update({'dtype': None}) predict_args = () predict_kwargs = dict(images=B('images')) elif issubclass(model_class, TorchModel): data_format = 'channels_first' config.update({'dtype': 'float32'}) predict_args = (B('images'),) predict_kwargs = dict() dataset, model_config = model_setup_images_clf(data_format) config.update({'model_class': model_class, 'model_config': model_config}) save_pipeline = (Pipeline() .init_variable('predictions', default=[]) .init_model('dynamic', C('model_class'), 'model', C('model_config')) .to_array(dtype=C('dtype')) .predict_model('model', *predict_args, fetches='predictions', save_to=V('predictions', mode='a'), **predict_kwargs)) load_pipeline = (Pipeline() .init_variable('predictions', default=[]) .to_array(dtype=C('dtype')) .predict_model('model', *predict_args, fetches='predictions', save_to=V('predictions', mode='a'), **predict_kwargs)) save_pipeline = (save_pipeline << dataset) << config load_pipeline = (load_pipeline << dataset) << config return save_pipeline, load_pipeline
def test_d(size, n_splits): """Test checks for behaviour of D expression in `set_dataset` action. size size of the dataset. n_splits the number if cv folds. """ dataset = Dataset(size) dataset.cv_split(n_splits=n_splits) pipeline = (Pipeline().init_variable('indices', default=[]).update( V('indices', mode='a'), B('indices')[0])) << dataset.CV(C('fold')).train result = list(range(size)) for fold in range(n_splits): pipeline.set_config({'fold': fold}) start = fold * (size // n_splits) end = (fold + 1) * (size // n_splits) for _ in range(2): pipeline.reset('vars') pipeline.run(1) assert pipeline.v('indices') == result[:start] + result[end:]
def pipeline(): """ Creates a pipeline configured to use a given model with a specified configuration. Notes ----- Pipeline can be executed only if its config contains the following parameters: model_class : TFModel Architecture of model. List of available models is defined at 'AVAILABLE_MODELS'. model_config : Config Model parameters. Returns ------- Pipeline A pipeline that contains model initialization and training with a given config. """ test_pipeline = (Pipeline().init_variable('current_loss').init_model( 'dynamic', C('model_class'), 'model', C('model_config')).to_array().train_model('model', fetches='loss', images=B('images'), labels=B('labels'), save_to=V('current_loss'))) return test_pipeline
def model_pipeline(): """ Creates instance of Pipeline that is configured to use given model with passed parameters. Parameters ---------- model_class : subclass of TFModel Architecture of model. List of available models is defined at 'AVAILABLE_MODELS'. current_config : dict Dictionary with parameters of model. Returns ------- Pipeline Test pipeline that consists of initialization of model and preparing for training with given config. """ test_pipeline = (Pipeline().init_variable('current_loss').init_model( 'dynamic', C('model_class'), 'TestModel', C('model_config')).to_array().train_model('TestModel', fetches='loss', images=B('images'), labels=B('labels'), save_to=V('current_loss'))) return test_pipeline
def get_batch(data, pipeline, index=DATASET_SIZE, batch_class=Batch, skip=2, dst=False): """ Parameters ---------- data data to use pipeline : bool or Pipeline whether to get a batch from a dataset or a pipeline index : DatasetIndex batch_class : type skip : int how many batches to skip dst : bool or list of str preload data when False or load to components given """ if dst is False: dataset = Dataset(index, preloaded=data, batch_class=batch_class) else: dataset = Dataset(index, batch_class=batch_class) template_pipeline = ( Pipeline().init_variable('dummy').update( V('dummy'), B.data) # touch batch data to fire preloading ) if isinstance(pipeline, Pipeline): template_pipeline = pipeline + template_pipeline source = ( dataset >> template_pipeline) if pipeline is not False else dataset #skip K batches for _ in range(skip + 1): batch = source.next_batch(10) if dst is not False: batch = batch.load(src=data, dst=dst) return batch
def main(): device_id = 4 print("Used: ", get_mem_info(device_id)) dataset = Imagenette160(bar=True) model_config = dict(model=UNet) model_config['device'] = f'cuda:{device_id}' model_config['loss'] = 'mse' train_pipeline = (dataset.train.p.crop( shape=(160, 160), origin='center').init_variable('loss_history', []).to_array( channels='first', dtype=np.float32).multiply(1. / 255).init_model( 'dynamic', UNet, 'unet', config=model_config).train_model('unet', B.images, B.images, fetches='loss', save_to=V('loss_history', mode='a'), use_lock=True)) # init_batch_size = 2 # n_iters = 50 # batch_size = init_batch_size # torch.cuda.empty_cache() # first_run_memory = get_run_mem(dataset, device_id, model_config, train_pipeline, batch_size=batch_size, n_iters=n_iters) # torch.cuda.empty_cache() # for i in range(1, 6): # init_batch_size = pow(2,(i-1))*batch_size # second_run_memory = get_run_mem(dataset, device_id, model_config, train_pipeline, batch_size=pow(2,i)*batch_size, n_iters=n_iters) # print("Batches: ", pow(2,(i-1))*batch_size, pow(2,i)*batch_size) # print(first_run_memory, second_run_memory) # print("Max batch size:", init_batch_size * (100 - 2 * first_run_memory + second_run_memory)/(second_run_memory - first_run_memory)) # first_run_memory = second_run_memory n_iters = 50 batch_size = 78 second_run_memory = get_run_mem(dataset, device_id, model_config, train_pipeline, batch_size=batch_size, n_iters=n_iters) print(second_run_memory)
def test_i(name, expectation, limit_name, limit_value, result): """ Check for behaviour of I under different pipeline configurations. name Name of I, defines its output. expectation Test is expected to raise an error when names requires calculaion of total iterations (e.g. for 'm') and this number is not defined in pipeline (limit_value is None). limit_name 'n_epochs' or 'n_iters' limit_value Total numer of epochs or iteration to run. result Expected output of I. If None, I is expected to raise an error. """ kwargs = {'batch_size': 2, limit_name: limit_value, 'lazy': True} pipeline = (Dataset(10).pipeline().init_variable('var', -1).update( V('var', mode='w'), I(name)).run(**kwargs)) with expectation: _ = pipeline.next_batch() assert pipeline.get_variable('var') == result
import numpy as np sys.path.append('..') from batchflow import B, C, D, F, L, V, R, P, I, Dataset, Pipeline, Batch, apply_parallel, inbatch_parallel, action #-------------------- # COMMON #-------------------- @pytest.mark.parametrize('named_expr', [ C('option'), C('not defined', default=10), B('size'), D('size'), V('var'), R('normal', 0, 1), R('normal', 0, 1, size=B.size), F(lambda batch: 0), L(lambda: 0), ]) def test_general_get(named_expr): pipeline = (Dataset(10).pipeline({ 'option': 0 }).init_variable('var').do_nothing(named_expr).run(2, lazy=True)) failed = False try: _ = pipeline.next_batch() except KeyError: failed = True
'body/block/layout': 'cna', 'device': 'gpu:2' } mnist = MNIST() train_ppl = (mnist.train.p.init_variable( 'loss', init_on_each_run=list).init_variable( 'accuracy', init_on_each_run=list).init_model( 'dynamic', VGG16, 'conv', config=model_config).to_array( channels='first', dtype='float32').train_model( 'conv', B('images'), B('labels'), fetches='loss', save_to=V('loss', mode='w')).run(BATCH_SIZE, shuffle=True, n_epochs=1, lazy=True)) test_ppl = (mnist.test.p.init_variable('predictions').init_variable( 'metrics', init_on_each_run=None).import_model('conv', train_ppl).to_array( channels='first', dtype='float32').predict_model( 'conv', B('images'), targets=B('labels'), fetches='predictions', save_to=V('predictions')).gather_metrics( 'class', targets=B('labels'), predictions=V('predictions'),
# pylint: disable=redefined-outer-name, missing-docstring, bad-continuation import sys from contextlib import ExitStack as does_not_raise import pytest sys.path.append('..') from batchflow import B, C, D, F, L, V, R, P, I, Dataset @pytest.mark.parametrize('named_expr', [ C('option'), B('size'), D('size'), V('var'), R('normal', 0, 1), P('normal', 0, 1), F(lambda batch: 0), L(lambda: 0), ]) def test_general_get(named_expr): pipeline = (Dataset(10).pipeline({ 'option': 0 }).init_variable('var').do_nothing(named_expr).run(2, lazy=True)) failed = False try: _ = pipeline.next_batch() except KeyError: failed = True if failed:
mnist = MNIST() train_root = mnist.train.p.run(BATCH_SIZE, shuffle=True, n_epochs=None, lazy=True) test_root = mnist.test.p.run(BATCH_SIZE, shuffle=True, n_epochs=1, lazy=True) train_template = (Pipeline().init_variable( 'loss', init_on_each_run=list).init_variable( 'accuracy', init_on_each_run=list).init_model( 'dynamic', VGG16, 'conv', config=model_config).to_array().train_model('conv', images=B('images'), labels=B('labels'), fetches='loss', save_to=V('loss', mode='w'))) test_template = (Pipeline().init_variable('predictions').init_variable( 'metrics', init_on_each_run=None).import_model( 'conv', C('import_from')).to_array().predict_model( 'conv', images=B('images'), fetches='predictions', save_to=V('predictions')).gather_metrics( 'class', targets=B('labels'), predictions=V('predictions'), fmt='logits', axis=-1, save_to=V('metrics', mode='a')))