def bn_inception_train_and_eval(train_data, test_data, mean_data, num_quantization_bits=32, epoch_size=1281167, max_epochs=300, minibatch_size=None, restore=True, log_to_file=None, num_mbs_per_log=100, gen_heartbeat=False, scale_up=False, profiling=False): _cntk_py.set_computation_network_trace_level(0) # NOTE: scaling up minibatch_size increases sample throughput. In 8-GPU machine, # ResNet110 samples-per-second is ~7x of single GPU, comparing to ~3x without scaling # up. However, bigger minibatch size on the same number of samples means less updates, # thus leads to higher training error. This is a trade-off of speed and accuracy if minibatch_size is None: mb_size = 32 * (Communicator.num_workers() if scale_up else 1) else: mb_size = minibatch_size progress_printer = ProgressPrinter( freq=num_mbs_per_log, tag='Training', log_to_file=log_to_file, rank=Communicator.rank(), gen_heartbeat=gen_heartbeat, num_epochs=max_epochs) network = create_bn_inception() trainer = create_trainer(network, epoch_size, max_epochs, mb_size, num_quantization_bits, progress_printer) train_source = create_image_mb_source(train_data, mean_data, True, total_number_of_samples=max_epochs * epoch_size) test_source = create_image_mb_source(test_data, mean_data, False, total_number_of_samples=FULL_DATA_SWEEP) train_and_test(network, trainer, train_source, test_source, mb_size, epoch_size, restore, profiling)
def bn_inception_train_and_eval(train_data, test_data, mean_data, num_quantization_bits=32, epoch_size=50000, max_epochs=200, minibatch_size=None, restore=True, log_to_file=None, num_mbs_per_log=100, gen_heartbeat=False, scale_up=False, profiling=False): _cntk_py.set_computation_network_trace_level(0) # NOTE: scaling up minibatch_size increases sample throughput. In 8-GPU machine, # ResNet110 samples-per-second is ~7x of single GPU, comparing to ~3x without scaling # up. However, bigger minibatch size on the same number of samples means less updates, # thus leads to higher training error. This is a trade-off of speed and accuracy if minibatch_size is None: mb_size = 128 * (Communicator.num_workers() if scale_up else 1) else: mb_size = minibatch_size progress_printer = ProgressPrinter( freq=num_mbs_per_log, tag='Training', log_to_file=log_to_file, rank=Communicator.rank(), gen_heartbeat=gen_heartbeat, num_epochs=max_epochs) network = create_bn_inception() trainer = create_trainer(network, epoch_size, max_epochs, mb_size, num_quantization_bits, progress_printer) train_source = create_image_mb_source(train_data, mean_data, True, total_number_of_samples=max_epochs * epoch_size) test_source = create_image_mb_source(test_data, mean_data, False, total_number_of_samples=FULL_DATA_SWEEP) train_and_test(network, trainer, train_source, test_source, max_epochs, mb_size, epoch_size, restore, profiling)
def sequence_to_sequence_translator(train_data, test_data, epoch_size=908241, num_quantization_bits=default_quantization_bits, block_size=3200, warm_up=0, minibatch_size=72, max_epochs=10, randomize_data=False, log_to_file=None, num_mbs_per_log=10, gen_heartbeat=False): cntk.debugging.set_computation_network_trace_level(0) from _cntk_py import set_fixed_random_seed set_fixed_random_seed(1) from Sequence2Sequence import create_model distributed_sync_report_freq = None if block_size is not None: distributed_sync_report_freq = 1 progress_printer = ProgressPrinter(freq=num_mbs_per_log, tag='Training', log_to_file=log_to_file, rank=Communicator.rank(), gen_heartbeat=gen_heartbeat, num_epochs=max_epochs, distributed_freq=distributed_sync_report_freq) # create inputs and create model model = create_model() train_reader = create_reader(train_data, randomize_data, size=max_epochs*epoch_size) test_reader = create_reader(test_data, False, size=max_epochs*epoch_size*10) train_and_test(model, train_reader, test_reader, block_size, num_quantization_bits, max_epochs, epoch_size, minibatch_size, progress_printer, warm_up)
def sequence_to_sequence_translator( train_data, test_data, epoch_size=908241, num_quantization_bits=default_quantization_bits, block_size=3200, warm_up=0, minibatch_size=72, max_epochs=10, randomize_data=False, log_to_file=None, num_mbs_per_log=10, gen_heartbeat=False): cntk.debugging.set_computation_network_trace_level(0) from _cntk_py import set_fixed_random_seed set_fixed_random_seed(1) from Sequence2Sequence import create_model distributed_sync_report_freq = None if block_size is not None: distributed_sync_report_freq = 1 progress_printer = ProgressPrinter( freq=num_mbs_per_log, tag='Training', log_to_file=log_to_file, rank=Communicator.rank(), gen_heartbeat=gen_heartbeat, num_epochs=max_epochs, distributed_freq=distributed_sync_report_freq) # create inputs and create model model = create_model() train_reader = create_reader(train_data, randomize_data, size=max_epochs * epoch_size) test_reader = create_reader(test_data, False, size=max_epochs * epoch_size * 10) train_and_test(model, train_reader, test_reader, block_size, num_quantization_bits, max_epochs, epoch_size, minibatch_size, progress_printer, warm_up)
def vgg19_train_and_eval(train_data, test_data, num_quantization_bits=32, minibatch_size=128, epoch_size=1281167, max_epochs=80, restore=True, log_to_file=None, num_mbs_per_log=None, gen_heartbeat=False, testing=False): _cntk_py.set_computation_network_trace_level(0) progress_printer = ProgressPrinter(freq=num_mbs_per_log, tag='Training', log_to_file=log_to_file, rank=Communicator.rank(), gen_heartbeat=gen_heartbeat, num_epochs=max_epochs) network = create_vgg19() trainer = create_trainer(network, epoch_size, num_quantization_bits, progress_printer) train_source = create_image_mb_source(train_data, True, total_number_of_samples=max_epochs * epoch_size) if testing: # reduce number of samples for validation when testing num_of_validation_samples = max_epochs * epoch_size * 10 else: num_of_validation_samples = FULL_DATA_SWEEP test_source = create_image_mb_source( test_data, False, total_number_of_samples=num_of_validation_samples) train_and_test(network, trainer, train_source, test_source, minibatch_size, epoch_size, restore)
def vgg19_train_and_eval(train_data, test_data, num_quantization_bits=32, minibatch_size=128, epoch_size = 1281167, max_epochs=80, restore=True, log_to_file=None, num_mbs_per_log=None, gen_heartbeat=False, testing=False): _cntk_py.set_computation_network_trace_level(0) progress_printer = ProgressPrinter( freq=num_mbs_per_log, tag='Training', log_to_file=log_to_file, rank=Communicator.rank(), gen_heartbeat=gen_heartbeat, num_epochs=max_epochs) network = create_vgg19() trainer = create_trainer(network, epoch_size, num_quantization_bits, progress_printer) train_source = create_image_mb_source(train_data, True, total_number_of_samples=max_epochs * epoch_size) if testing: # reduce number of samples for validation when testing num_of_validation_samples = max_epochs * epoch_size * 10 else: num_of_validation_samples = FULL_DATA_SWEEP test_source = create_image_mb_source(test_data, False, total_number_of_samples=num_of_validation_samples) train_and_test(network, trainer, train_source, test_source, minibatch_size, epoch_size, restore)