def test_visualizing(dev_str, call, compile_mode): if call is helpers.np_call: # numpy does not support gradients, required for training pytest.skip() # currently only PyTorch supports graph compilation compile_mode = compile_mode if ivy.current_framework_str( ) == 'torch' else False builder_helpers.remove_dirs() data_loader_spec_args = {'batch_size': 1, 'dev_strs': [dev_str]} trainer_spec_args = { 'total_iterations': 10, 'ld_chkpt': False, 'save_freq': 1, 'compile_mode': compile_mode } trainer = builder.build_trainer( ExampleDataLoaderMin, ExampleNetworkMin, ExampleTrainerMin, data_loader_spec_args=data_loader_spec_args, trainer_spec_args=trainer_spec_args) trainer.setup() try: trainer.visualize() except OSError: pass trainer.close() builder_helpers.remove_dirs()
def test_simple_multi_dev_trainers(dev_str, call, compile_mode): if call is helpers.np_call: # numpy does not support gradients, required for training pytest.skip() if call is not helpers.torch_call: # ToDo: add multi-dev support for all backends, not just torch pytest.skip() # currently only PyTorch supports graph compilation compile_mode = compile_mode if ivy.current_framework_str( ) == 'torch' else False # devices dev_strs = list() dev_str0 = dev_str dev_strs.append(dev_str0) if 'gpu' in dev_str: idx = ivy.num_gpus() - 1 dev_str1 = dev_str[:-1] + str(idx) dev_strs.append(dev_str1) # test builder_helpers.remove_dirs() simple_example.main(compile_mode=compile_mode, dev_strs=dev_strs) builder_helpers.remove_dirs()
def test_simple_trainers(dev_str, call, compile_mode): if call is helpers.np_call: # numpy does not support gradients, required for training pytest.skip() # currently only PyTorch supports graph compilation compile_mode = compile_mode if ivy.current_framework_str( ) == 'torch' else False # test builder_helpers.remove_dirs() simple_example.main(compile_mode=compile_mode) builder_helpers.remove_dirs()
def test_checkpoint_save_and_restore_via_public_trainer_methods( dev_str, call, compile_mode): if call is helpers.np_call: # numpy does not support gradients, required for training pytest.skip() # currently only PyTorch supports graph compilation compile_mode = compile_mode if ivy.current_framework_str( ) == 'torch' else False builder_helpers.remove_dirs() data_loader_spec_args = {'batch_size': 1, 'dev_strs': [dev_str]} trainer_spec_args = { 'total_iterations': 0, 'ld_chkpt': False, 'compile_mode': compile_mode } trainer = builder.build_trainer( ExampleDataLoaderMin, ExampleNetworkMin, ExampleTrainerMin, data_loader_spec_args=data_loader_spec_args, trainer_spec_args=trainer_spec_args) trainer.setup() chkpt0_path = os.path.join('chkpt/', 'test_chkpt0.hdf5') trainer.save(chkpt0_path) assert os.path.exists(chkpt0_path) trainer.train() chkpt1_path = os.path.join('chkpt/', 'test_chkpt1.hdf5') trainer.save(chkpt1_path) trainer.close() assert os.path.exists(chkpt1_path) data_loader_spec_args = {'batch_size': 1, 'dev_strs': [dev_str]} trainer_spec_args = { 'total_iterations': 10, 'ld_chkpt': False, 'save_freq': 1, 'compile_mode': compile_mode } trainer = builder.build_trainer( ExampleDataLoaderMin, ExampleNetworkMin, ExampleTrainerMin, data_loader_spec_args=data_loader_spec_args, trainer_spec_args=trainer_spec_args) trainer.setup() trainer.restore(chkpt0_path) trainer.train() chkpt3_path = os.path.join('chkpt/', 'test_chkpt3.hdf5') trainer.save(chkpt3_path) trainer.close() assert os.path.exists(chkpt3_path) builder_helpers.remove_dirs()
def test_full_trainers(dev_str, call, compile_mode): if call is helpers.np_call: # numpy does not support gradients, required for training pytest.skip() if call is helpers.jnp_call and ivy.wrapped_mode(): # Jax does not support ivy.Array instances when calling _jax.grad() pytest.skip() # currently only PyTorch supports graph compilation compile_mode = compile_mode if ivy.current_framework_str( ) == 'torch' else False # test builder_helpers.remove_dirs() full_example.main(compile_mode=compile_mode) builder_helpers.remove_dirs()
def test_checkpoint_loading(dev_str, call, compile_mode): if call is helpers.np_call: # numpy does not support gradients, required for training pytest.skip() # currently only PyTorch supports graph compilation compile_mode = compile_mode if ivy.current_framework_str( ) == 'torch' else False builder_helpers.remove_dirs() data_loader_spec_args = {'batch_size': 1, 'dev_strs': [dev_str]} trainer_spec_args = { 'total_iterations': 10, 'ld_chkpt': False, 'save_freq': 1, 'compile_mode': compile_mode } trainer = builder.build_trainer( ExampleDataLoaderMin, ExampleNetworkMin, ExampleTrainerMin, data_loader_spec_args=data_loader_spec_args, trainer_spec_args=trainer_spec_args) trainer.setup() trainer.train() trainer.close() trainer_spec_args = { 'total_iterations': 20, 'ld_chkpt': True, 'save_freq': 1, 'compile_mode': compile_mode } trainer = builder.build_trainer( ExampleDataLoaderMin, ExampleNetworkMin, ExampleTrainerMin, data_loader_spec_args=data_loader_spec_args, trainer_spec_args=trainer_spec_args) trainer.setup() trainer.train() trainer.close() checkpoint_nums = [ int(fname.split('-')[-1].split('.')[0]) for fname in os.listdir('log/chkpts') ] assert max(checkpoint_nums) == 19 builder_helpers.remove_dirs()
def test_tune_numeric_spec(dev_str, call): if call is not helpers.torch_call: # ToDo: work out why the backend framework is fixed for tune after the first call, # and include other frameworks in test once this is fixed pytest.skip() builder_helpers.remove_dirs() data_loader_spec_args = {'batch_size': 1} trainer_spec_args = { 'total_iterations': 10, 'ld_chkpt': False, 'log_freq': 1, 'log_dir': os.path.join(THIS_DIR, 'log') } tuner_spec_args = { 'framework': ivy.current_framework_str(), 'train_steps_per_tune_step': 2, 'trainer_spec': { 'initial_learning_rate': { 'min': 10**-6, 'max': 10**-3, 'exponent': 10 } }, 'name': 'tune', 'num_samples': 5, 'parallel_trials': 1, 'grace_period': 1, 'checkpoint_freq': 0 } tuner = builder.build_tuner(ExampleDataLoader, ExampleNetwork, ExampleTrainer, data_loader_spec_args=data_loader_spec_args, trainer_spec_args=trainer_spec_args, tuner_spec_args=tuner_spec_args) tuner.tune() builder_helpers.remove_dirs()
def test_reduced_cost_after_checkpoint_load(dev_str, call, compile_mode): if call is helpers.np_call: # numpy does not support gradients, required for training pytest.skip() if call is helpers.jnp_call and ivy.wrapped_mode(): # Jax does not support ivy.Array instances when calling _jax.grad() pytest.skip() example_dir = os.path.relpath( os.path.join(os.path.dirname(os.path.abspath(__file__)), '../ivy_builder_demos')) # currently only PyTorch supports graph compilation compile_mode = compile_mode if ivy.current_framework_str( ) == 'torch' else False # dataset dirs specification dataset_dirs_args = dict() # dataset specification dataset_spec_filepath = os.path.join(example_dir, 'json_specs', 'dataset_spec.json.example') dataset_spec_args = builder.parse_json_to_cont(dataset_spec_filepath) # data loader specification data_loader_spec_filepath = os.path.join(example_dir, 'json_specs', 'data_loader_spec.json.example') data_loader_spec_args = builder.parse_json_to_cont( data_loader_spec_filepath) # network specification network_spec_filepath = os.path.join(example_dir, 'json_specs', 'network_spec.json.example') network_spec_args = builder.parse_json_to_cont(network_spec_filepath) builder_helpers.remove_dirs() ivy.seed(0) trainer_spec_args = { 'total_iterations': 1, 'ld_chkpt': False, 'save_freq': 1, 'compile_mode': compile_mode } trainer = builder.build_trainer( ExampleDataLoader, ExampleNetwork, ExampleTrainer, dataset_dirs_args=dataset_dirs_args, dataset_dirs_class=ExampleDatasetDirs, dataset_spec_args=dataset_spec_args, dataset_spec_class=ExampleDatasetSpec, data_loader_spec_args=data_loader_spec_args, data_loader_spec_class=ExampleDataLoaderSpec, network_spec_args=network_spec_args, network_spec_class=ExampleNetworkSpec, trainer_spec_args=trainer_spec_args) trainer.setup() trainer.train() initial_cost = trainer._total_cost assert trainer._global_step == 1 trainer.close() ivy.seed(0) steps_to_take_first = 10 trainer_spec_args = { 'total_iterations': steps_to_take_first, 'ld_chkpt': False, 'save_freq': 1, 'compile_mode': compile_mode } trainer = builder.build_trainer( ExampleDataLoader, ExampleNetwork, ExampleTrainer, dataset_dirs_args=dataset_dirs_args, dataset_dirs_class=ExampleDatasetDirs, dataset_spec_args=dataset_spec_args, dataset_spec_class=ExampleDatasetSpec, data_loader_spec_args=data_loader_spec_args, data_loader_spec_class=ExampleDataLoaderSpec, network_spec_args=network_spec_args, network_spec_class=ExampleNetworkSpec, trainer_spec_args=trainer_spec_args) trainer.setup() trainer.train() ten_step_cost = trainer._total_cost assert trainer._global_step == steps_to_take_first trainer.close() assert initial_cost > ten_step_cost steps_to_take_second = 20 trainer_spec_args = { 'total_iterations': steps_to_take_second, 'ld_chkpt': True, 'save_freq': 1, 'compile_mode': compile_mode } trainer = builder.build_trainer( ExampleDataLoader, ExampleNetwork, ExampleTrainer, dataset_dirs_args=dataset_dirs_args, dataset_dirs_class=ExampleDatasetDirs, dataset_spec_args=dataset_spec_args, dataset_spec_class=ExampleDatasetSpec, data_loader_spec_args=data_loader_spec_args, data_loader_spec_class=ExampleDataLoaderSpec, network_spec_args=network_spec_args, network_spec_class=ExampleNetworkSpec, trainer_spec_args=trainer_spec_args) trainer.setup() trainer.train() twenty_step_cost = trainer._total_cost assert trainer._global_step == steps_to_take_second trainer.close() assert ten_step_cost > twenty_step_cost builder_helpers.remove_dirs()
def test_tune_general_spec(dev_str, call): if call is not helpers.torch_call: # ToDo: work out why the backend framework is fixed for tune after the first call, # and include other frameworks in test once this is fixed pytest.skip() builder_helpers.remove_dirs() data_loader_spec_args = {'batch_size': 1} trainer_spec_args = { 'total_iterations': 2, 'ld_chkpt': False, 'log_freq': 1, 'log_dir': os.path.join(THIS_DIR, 'log') } tuner_spec_args = { 'framework': ivy.current_framework_str(), 'train_steps_per_tune_step': 1, 'network_spec': { 'spec_a': { "configs": [{ 'param_0': True }, { 'param_1': False }], "grid": True }, 'spec_b': { "configs": [{ 'param_0': True }, { 'param_0': False }] }, 'spec_c': { "spec_c_a": { "configs": [1, 2], "grid": True }, "spec_c_b": { "configs": ['100', '200'] }, }, 'spec_d_AND_spec_e_AND_spec_f': { "configs": [(False, False, False), (False, True, False), (True, False, True), (False, True, True)], "grid": True } }, 'name': 'tune', 'num_samples': 1, 'parallel_trials': 1, 'grace_period': 1, 'checkpoint_freq': 0 } tuner = builder.build_tuner(ExampleDataLoader, ExampleNetwork, ExampleTrainer, data_loader_spec_args=data_loader_spec_args, trainer_spec_args=trainer_spec_args, tuner_spec_args=tuner_spec_args) tuner.tune() builder_helpers.remove_dirs()
def test_tune_resume_training(dev_str, call): if call is not helpers.torch_call: # ToDo: work out why the backend framework is fixed for tune after the first call, # and include other frameworks in test once this is fixed pytest.skip() if ivy.wrapped_mode(): # this test fails when running all tests for some reason, need to further investigate pytest.skip() builder_helpers.remove_dirs() # tuner spec args train_steps_per_tune_step = 2 data_loader_spec_args = {'batch_size': 1} tuner_spec_args = { 'framework': ivy.current_framework_str(), 'train_steps_per_tune_step': train_steps_per_tune_step, 'trainer_spec': { 'initial_learning_rate': { 'min': 10**-5, 'max': 10**-4, 'num_grid_samples': 2, 'grid': True } }, 'name': 'tune', 'num_samples': 1, 'parallel_trials': 1, 'grace_period': -1, 'checkpoint_freq': 0 } # first run total_iterations = 5 trainer_spec_args = { 'total_iterations': total_iterations, 'ld_chkpt': False, 'log_freq': 1, 'log_dir': os.path.join(THIS_DIR, 'log'), 'save_freq': 1 } tuner = builder.build_tuner(ExampleDataLoader, ExampleNetwork, ExampleTrainer, data_loader_spec_args=data_loader_spec_args, trainer_spec_args=trainer_spec_args, tuner_spec_args=tuner_spec_args) first_results = ivy.Container(tuner.tune().results) first_losses = first_results.at_keys('cost').to_flat_list() # second run trainer_spec_args = { 'total_iterations': total_iterations * 2, 'ld_chkpt': True, 'log_freq': 1, 'log_dir': os.path.join(THIS_DIR, 'log'), 'save_freq': 1 } tuner = builder.build_tuner(ExampleDataLoader, ExampleNetwork, ExampleTrainer, data_loader_spec_args=data_loader_spec_args, trainer_spec_args=trainer_spec_args, tuner_spec_args=tuner_spec_args) second_results = ivy.Container(tuner.tune().results) second_losses = second_results.at_keys('cost').to_flat_list() # assertion # first session ends training at ceil(5/2)=3 timesteps first_timestep = int( math.ceil(total_iterations / train_steps_per_tune_step)) assert min([ fts == first_timestep for fts in first_results.at_keys('timestep').to_flat_list() ]) # second session ends training at ceil(10/2)=5 timesteps second_timestep = int( math.ceil(total_iterations * 2 / train_steps_per_tune_step)) assert min([ sts == second_timestep for sts in second_results.at_keys('timestep').to_flat_list() ]) # both sessions trained for ceil(5/2)=3 training iterations training_iteration = int( math.ceil(total_iterations / train_steps_per_tune_step)) assert min([ fti == sti == training_iteration for fti, sti in zip( first_results.at_keys('training_iteration').to_flat_list(), second_results.at_keys('training_iteration').to_flat_list()) ]) # the loss is lower for the second session, after the checkpoint load from the first assert min([ second_loss < first_loss for first_loss, second_loss in zip(first_losses, second_losses) ]) # end builder_helpers.remove_dirs()