Ejemplo n.º 1
0
def test_sequential_scheduler_with_exception(dev_str, call):
    schedule_filepath = os.path.join(THIS_DIR, 'schedule_with_exception.json')
    scheduler = SequentialScheduler(schedule_filepath, num_attempts=2)
    helpers.remove_dirs()
    scheduler_len = len(scheduler.run())
    helpers.remove_dirs()
    assert scheduler_len == 1
Ejemplo n.º 2
0
def test_visualizing(dev_str, call, compile_mode):
    if call is helpers.np_call:
        # numpy does not support gradients, required for training
        pytest.skip()

    # currently only PyTorch supports graph compilation
    compile_mode = compile_mode if ivy.current_framework_str(
    ) == 'torch' else False

    builder_helpers.remove_dirs()
    data_loader_spec_args = {'batch_size': 1, 'dev_strs': [dev_str]}
    trainer_spec_args = {
        'total_iterations': 10,
        'ld_chkpt': False,
        'save_freq': 1,
        'compile_mode': compile_mode
    }
    trainer = builder.build_trainer(
        ExampleDataLoaderMin,
        ExampleNetworkMin,
        ExampleTrainerMin,
        data_loader_spec_args=data_loader_spec_args,
        trainer_spec_args=trainer_spec_args)
    trainer.setup()
    try:
        trainer.visualize()
    except OSError:
        pass
    trainer.close()
    builder_helpers.remove_dirs()
Ejemplo n.º 3
0
def test_simple_multi_dev_trainers(dev_str, call, compile_mode):
    if call is helpers.np_call:
        # numpy does not support gradients, required for training
        pytest.skip()

    if call is not helpers.torch_call:
        # ToDo: add multi-dev support for all backends, not just torch
        pytest.skip()

    # currently only PyTorch supports graph compilation
    compile_mode = compile_mode if ivy.current_framework_str(
    ) == 'torch' else False

    # devices
    dev_strs = list()
    dev_str0 = dev_str
    dev_strs.append(dev_str0)
    if 'gpu' in dev_str:
        idx = ivy.num_gpus() - 1
        dev_str1 = dev_str[:-1] + str(idx)
        dev_strs.append(dev_str1)

    # test
    builder_helpers.remove_dirs()
    simple_example.main(compile_mode=compile_mode, dev_strs=dev_strs)
    builder_helpers.remove_dirs()
Ejemplo n.º 4
0
def test_sequential_scheduler(dev_str, call):
    schedule_filepath = os.path.join(THIS_DIR, 'schedule.json')
    scheduler = SequentialScheduler(schedule_filepath)
    helpers.remove_dirs()
    scheduler_len = len(scheduler.run())
    helpers.remove_dirs()
    assert scheduler_len == 2
Ejemplo n.º 5
0
def test_checkpoint_save_and_restore_via_public_trainer_methods(
        dev_str, call, compile_mode):
    if call is helpers.np_call:
        # numpy does not support gradients, required for training
        pytest.skip()

    # currently only PyTorch supports graph compilation
    compile_mode = compile_mode if ivy.current_framework_str(
    ) == 'torch' else False

    builder_helpers.remove_dirs()
    data_loader_spec_args = {'batch_size': 1, 'dev_strs': [dev_str]}
    trainer_spec_args = {
        'total_iterations': 0,
        'ld_chkpt': False,
        'compile_mode': compile_mode
    }
    trainer = builder.build_trainer(
        ExampleDataLoaderMin,
        ExampleNetworkMin,
        ExampleTrainerMin,
        data_loader_spec_args=data_loader_spec_args,
        trainer_spec_args=trainer_spec_args)
    trainer.setup()
    chkpt0_path = os.path.join('chkpt/', 'test_chkpt0.hdf5')
    trainer.save(chkpt0_path)
    assert os.path.exists(chkpt0_path)
    trainer.train()
    chkpt1_path = os.path.join('chkpt/', 'test_chkpt1.hdf5')
    trainer.save(chkpt1_path)
    trainer.close()
    assert os.path.exists(chkpt1_path)

    data_loader_spec_args = {'batch_size': 1, 'dev_strs': [dev_str]}
    trainer_spec_args = {
        'total_iterations': 10,
        'ld_chkpt': False,
        'save_freq': 1,
        'compile_mode': compile_mode
    }
    trainer = builder.build_trainer(
        ExampleDataLoaderMin,
        ExampleNetworkMin,
        ExampleTrainerMin,
        data_loader_spec_args=data_loader_spec_args,
        trainer_spec_args=trainer_spec_args)
    trainer.setup()
    trainer.restore(chkpt0_path)
    trainer.train()
    chkpt3_path = os.path.join('chkpt/', 'test_chkpt3.hdf5')
    trainer.save(chkpt3_path)
    trainer.close()
    assert os.path.exists(chkpt3_path)
    builder_helpers.remove_dirs()
Ejemplo n.º 6
0
def test_simple_trainers(dev_str, call, compile_mode):
    if call is helpers.np_call:
        # numpy does not support gradients, required for training
        pytest.skip()
    # currently only PyTorch supports graph compilation
    compile_mode = compile_mode if ivy.current_framework_str(
    ) == 'torch' else False
    # test
    builder_helpers.remove_dirs()
    simple_example.main(compile_mode=compile_mode)
    builder_helpers.remove_dirs()
Ejemplo n.º 7
0
def test_full_trainers(dev_str, call, compile_mode):
    if call is helpers.np_call:
        # numpy does not support gradients, required for training
        pytest.skip()
    if call is helpers.jnp_call and ivy.wrapped_mode():
        # Jax does not support ivy.Array instances when calling _jax.grad()
        pytest.skip()
    # currently only PyTorch supports graph compilation
    compile_mode = compile_mode if ivy.current_framework_str(
    ) == 'torch' else False
    # test
    builder_helpers.remove_dirs()
    full_example.main(compile_mode=compile_mode)
    builder_helpers.remove_dirs()
Ejemplo n.º 8
0
def test_checkpoint_loading(dev_str, call, compile_mode):
    if call is helpers.np_call:
        # numpy does not support gradients, required for training
        pytest.skip()

    # currently only PyTorch supports graph compilation
    compile_mode = compile_mode if ivy.current_framework_str(
    ) == 'torch' else False

    builder_helpers.remove_dirs()
    data_loader_spec_args = {'batch_size': 1, 'dev_strs': [dev_str]}
    trainer_spec_args = {
        'total_iterations': 10,
        'ld_chkpt': False,
        'save_freq': 1,
        'compile_mode': compile_mode
    }
    trainer = builder.build_trainer(
        ExampleDataLoaderMin,
        ExampleNetworkMin,
        ExampleTrainerMin,
        data_loader_spec_args=data_loader_spec_args,
        trainer_spec_args=trainer_spec_args)
    trainer.setup()
    trainer.train()
    trainer.close()
    trainer_spec_args = {
        'total_iterations': 20,
        'ld_chkpt': True,
        'save_freq': 1,
        'compile_mode': compile_mode
    }
    trainer = builder.build_trainer(
        ExampleDataLoaderMin,
        ExampleNetworkMin,
        ExampleTrainerMin,
        data_loader_spec_args=data_loader_spec_args,
        trainer_spec_args=trainer_spec_args)
    trainer.setup()
    trainer.train()
    trainer.close()
    checkpoint_nums = [
        int(fname.split('-')[-1].split('.')[0])
        for fname in os.listdir('log/chkpts')
    ]
    assert max(checkpoint_nums) == 19
    builder_helpers.remove_dirs()
Ejemplo n.º 9
0
def test_sequential_scheduler_with_dynamic_schedule_file_edit(dev_str, call):
    schedule_filepath = os.path.join(THIS_DIR, 'schedule.json')
    scheduler = SequentialScheduler(schedule_filepath)
    helpers.remove_dirs()

    with open(schedule_filepath) as file:
        original_schedule_dict = json.load(file)
    new_schedule_dict = dict(**{'minimal_again': ['demos.simple_example.main', '']},
                             **original_schedule_dict)

    success = [False]

    def write_to_file():
        with open(schedule_filepath, 'w') as schedule_file:
            json.dump(new_schedule_dict, schedule_file, indent=2, separators=(',', ': '))

    def run_scheduler(success_list):
        time.sleep(0.2)
        num_completed_tasks = len(scheduler.run())
        print('found length: {}'.format(num_completed_tasks))
        if num_completed_tasks == 3:
            print('returning true!')
            success_list[0] = True
        else:
            raise Exception('Expected 3 tasks to run, but actually ' + str(num_completed_tasks) + ' ran.')

    file_write_thread = threading.Thread(target=write_to_file)
    schedule_thread = threading.Thread(target=run_scheduler, args=(success,))
    schedule_thread.start()
    file_write_thread.start()
    schedule_thread.join()
    file_write_thread.join()

    with open(schedule_filepath, 'w') as file:
        json.dump(original_schedule_dict, file, indent=2, separators=(',', ': '))

    helpers.remove_dirs()
    assert success[0]
Ejemplo n.º 10
0
def test_tune_numeric_spec(dev_str, call):
    if call is not helpers.torch_call:
        # ToDo: work out why the backend framework is fixed for tune after the first call,
        #  and include other frameworks in test once this is fixed
        pytest.skip()

    builder_helpers.remove_dirs()
    data_loader_spec_args = {'batch_size': 1}
    trainer_spec_args = {
        'total_iterations': 10,
        'ld_chkpt': False,
        'log_freq': 1,
        'log_dir': os.path.join(THIS_DIR, 'log')
    }
    tuner_spec_args = {
        'framework': ivy.current_framework_str(),
        'train_steps_per_tune_step': 2,
        'trainer_spec': {
            'initial_learning_rate': {
                'min': 10**-6,
                'max': 10**-3,
                'exponent': 10
            }
        },
        'name': 'tune',
        'num_samples': 5,
        'parallel_trials': 1,
        'grace_period': 1,
        'checkpoint_freq': 0
    }
    tuner = builder.build_tuner(ExampleDataLoader,
                                ExampleNetwork,
                                ExampleTrainer,
                                data_loader_spec_args=data_loader_spec_args,
                                trainer_spec_args=trainer_spec_args,
                                tuner_spec_args=tuner_spec_args)
    tuner.tune()
    builder_helpers.remove_dirs()
Ejemplo n.º 11
0
def test_reduced_cost_after_checkpoint_load(dev_str, call, compile_mode):
    if call is helpers.np_call:
        # numpy does not support gradients, required for training
        pytest.skip()
    if call is helpers.jnp_call and ivy.wrapped_mode():
        # Jax does not support ivy.Array instances when calling _jax.grad()
        pytest.skip()

    example_dir = os.path.relpath(
        os.path.join(os.path.dirname(os.path.abspath(__file__)),
                     '../ivy_builder_demos'))

    # currently only PyTorch supports graph compilation
    compile_mode = compile_mode if ivy.current_framework_str(
    ) == 'torch' else False

    # dataset dirs specification
    dataset_dirs_args = dict()

    # dataset specification
    dataset_spec_filepath = os.path.join(example_dir, 'json_specs',
                                         'dataset_spec.json.example')
    dataset_spec_args = builder.parse_json_to_cont(dataset_spec_filepath)

    # data loader specification
    data_loader_spec_filepath = os.path.join(example_dir, 'json_specs',
                                             'data_loader_spec.json.example')
    data_loader_spec_args = builder.parse_json_to_cont(
        data_loader_spec_filepath)

    # network specification
    network_spec_filepath = os.path.join(example_dir, 'json_specs',
                                         'network_spec.json.example')
    network_spec_args = builder.parse_json_to_cont(network_spec_filepath)

    builder_helpers.remove_dirs()

    ivy.seed(0)
    trainer_spec_args = {
        'total_iterations': 1,
        'ld_chkpt': False,
        'save_freq': 1,
        'compile_mode': compile_mode
    }
    trainer = builder.build_trainer(
        ExampleDataLoader,
        ExampleNetwork,
        ExampleTrainer,
        dataset_dirs_args=dataset_dirs_args,
        dataset_dirs_class=ExampleDatasetDirs,
        dataset_spec_args=dataset_spec_args,
        dataset_spec_class=ExampleDatasetSpec,
        data_loader_spec_args=data_loader_spec_args,
        data_loader_spec_class=ExampleDataLoaderSpec,
        network_spec_args=network_spec_args,
        network_spec_class=ExampleNetworkSpec,
        trainer_spec_args=trainer_spec_args)
    trainer.setup()
    trainer.train()
    initial_cost = trainer._total_cost
    assert trainer._global_step == 1
    trainer.close()

    ivy.seed(0)
    steps_to_take_first = 10
    trainer_spec_args = {
        'total_iterations': steps_to_take_first,
        'ld_chkpt': False,
        'save_freq': 1,
        'compile_mode': compile_mode
    }
    trainer = builder.build_trainer(
        ExampleDataLoader,
        ExampleNetwork,
        ExampleTrainer,
        dataset_dirs_args=dataset_dirs_args,
        dataset_dirs_class=ExampleDatasetDirs,
        dataset_spec_args=dataset_spec_args,
        dataset_spec_class=ExampleDatasetSpec,
        data_loader_spec_args=data_loader_spec_args,
        data_loader_spec_class=ExampleDataLoaderSpec,
        network_spec_args=network_spec_args,
        network_spec_class=ExampleNetworkSpec,
        trainer_spec_args=trainer_spec_args)
    trainer.setup()
    trainer.train()
    ten_step_cost = trainer._total_cost
    assert trainer._global_step == steps_to_take_first
    trainer.close()
    assert initial_cost > ten_step_cost

    steps_to_take_second = 20
    trainer_spec_args = {
        'total_iterations': steps_to_take_second,
        'ld_chkpt': True,
        'save_freq': 1,
        'compile_mode': compile_mode
    }
    trainer = builder.build_trainer(
        ExampleDataLoader,
        ExampleNetwork,
        ExampleTrainer,
        dataset_dirs_args=dataset_dirs_args,
        dataset_dirs_class=ExampleDatasetDirs,
        dataset_spec_args=dataset_spec_args,
        dataset_spec_class=ExampleDatasetSpec,
        data_loader_spec_args=data_loader_spec_args,
        data_loader_spec_class=ExampleDataLoaderSpec,
        network_spec_args=network_spec_args,
        network_spec_class=ExampleNetworkSpec,
        trainer_spec_args=trainer_spec_args)
    trainer.setup()
    trainer.train()
    twenty_step_cost = trainer._total_cost
    assert trainer._global_step == steps_to_take_second
    trainer.close()
    assert ten_step_cost > twenty_step_cost
    builder_helpers.remove_dirs()
Ejemplo n.º 12
0
def test_tune_general_spec(dev_str, call):
    if call is not helpers.torch_call:
        # ToDo: work out why the backend framework is fixed for tune after the first call,
        #  and include other frameworks in test once this is fixed
        pytest.skip()

    builder_helpers.remove_dirs()
    data_loader_spec_args = {'batch_size': 1}
    trainer_spec_args = {
        'total_iterations': 2,
        'ld_chkpt': False,
        'log_freq': 1,
        'log_dir': os.path.join(THIS_DIR, 'log')
    }
    tuner_spec_args = {
        'framework': ivy.current_framework_str(),
        'train_steps_per_tune_step': 1,
        'network_spec': {
            'spec_a': {
                "configs": [{
                    'param_0': True
                }, {
                    'param_1': False
                }],
                "grid": True
            },
            'spec_b': {
                "configs": [{
                    'param_0': True
                }, {
                    'param_0': False
                }]
            },
            'spec_c': {
                "spec_c_a": {
                    "configs": [1, 2],
                    "grid": True
                },
                "spec_c_b": {
                    "configs": ['100', '200']
                },
            },
            'spec_d_AND_spec_e_AND_spec_f': {
                "configs": [(False, False, False), (False, True, False),
                            (True, False, True), (False, True, True)],
                "grid":
                True
            }
        },
        'name': 'tune',
        'num_samples': 1,
        'parallel_trials': 1,
        'grace_period': 1,
        'checkpoint_freq': 0
    }
    tuner = builder.build_tuner(ExampleDataLoader,
                                ExampleNetwork,
                                ExampleTrainer,
                                data_loader_spec_args=data_loader_spec_args,
                                trainer_spec_args=trainer_spec_args,
                                tuner_spec_args=tuner_spec_args)
    tuner.tune()
    builder_helpers.remove_dirs()
Ejemplo n.º 13
0
def test_tune_resume_training(dev_str, call):
    if call is not helpers.torch_call:
        # ToDo: work out why the backend framework is fixed for tune after the first call,
        #  and include other frameworks in test once this is fixed
        pytest.skip()
    if ivy.wrapped_mode():
        # this test fails when running all tests for some reason, need to further investigate
        pytest.skip()

    builder_helpers.remove_dirs()

    # tuner spec args
    train_steps_per_tune_step = 2
    data_loader_spec_args = {'batch_size': 1}
    tuner_spec_args = {
        'framework': ivy.current_framework_str(),
        'train_steps_per_tune_step': train_steps_per_tune_step,
        'trainer_spec': {
            'initial_learning_rate': {
                'min': 10**-5,
                'max': 10**-4,
                'num_grid_samples': 2,
                'grid': True
            }
        },
        'name': 'tune',
        'num_samples': 1,
        'parallel_trials': 1,
        'grace_period': -1,
        'checkpoint_freq': 0
    }

    # first run
    total_iterations = 5
    trainer_spec_args = {
        'total_iterations': total_iterations,
        'ld_chkpt': False,
        'log_freq': 1,
        'log_dir': os.path.join(THIS_DIR, 'log'),
        'save_freq': 1
    }
    tuner = builder.build_tuner(ExampleDataLoader,
                                ExampleNetwork,
                                ExampleTrainer,
                                data_loader_spec_args=data_loader_spec_args,
                                trainer_spec_args=trainer_spec_args,
                                tuner_spec_args=tuner_spec_args)
    first_results = ivy.Container(tuner.tune().results)
    first_losses = first_results.at_keys('cost').to_flat_list()

    # second run
    trainer_spec_args = {
        'total_iterations': total_iterations * 2,
        'ld_chkpt': True,
        'log_freq': 1,
        'log_dir': os.path.join(THIS_DIR, 'log'),
        'save_freq': 1
    }
    tuner = builder.build_tuner(ExampleDataLoader,
                                ExampleNetwork,
                                ExampleTrainer,
                                data_loader_spec_args=data_loader_spec_args,
                                trainer_spec_args=trainer_spec_args,
                                tuner_spec_args=tuner_spec_args)
    second_results = ivy.Container(tuner.tune().results)
    second_losses = second_results.at_keys('cost').to_flat_list()

    # assertion

    # first session ends training at ceil(5/2)=3 timesteps
    first_timestep = int(
        math.ceil(total_iterations / train_steps_per_tune_step))
    assert min([
        fts == first_timestep
        for fts in first_results.at_keys('timestep').to_flat_list()
    ])

    # second session ends training at ceil(10/2)=5 timesteps
    second_timestep = int(
        math.ceil(total_iterations * 2 / train_steps_per_tune_step))
    assert min([
        sts == second_timestep
        for sts in second_results.at_keys('timestep').to_flat_list()
    ])

    # both sessions trained for ceil(5/2)=3 training iterations
    training_iteration = int(
        math.ceil(total_iterations / train_steps_per_tune_step))
    assert min([
        fti == sti == training_iteration for fti, sti in zip(
            first_results.at_keys('training_iteration').to_flat_list(),
            second_results.at_keys('training_iteration').to_flat_list())
    ])

    # the loss is lower for the second session, after the checkpoint load from the first
    assert min([
        second_loss < first_loss
        for first_loss, second_loss in zip(first_losses, second_losses)
    ])

    # end
    builder_helpers.remove_dirs()