def main(epochs, buffer_size, batch_size, train_mode, distribution_strategy, num_gpus, workers, w_type, w_index): strategy = get_distribution_strategy(strategy=distribution_strategy, num_gpus=num_gpus, workers=workers, typ=w_type, index=w_index) print_msg ('Number of devices: {}'.format(strategy.num_replicas_in_sync), 'info') data_obj = Dataset(batch_size=128) train_dataset, test_dataset = data_obj.create_dataset() steps_per_epoch = data_obj.get_buffer_size()//(batch_size) train_obj = Benchmark(epochs, steps_per_epoch, 'resnet56') with strategy.scope(): # Create and compile model within strategy scope train_obj.create_model('resnet56') train_obj.compile_model() print_msg('Training...', 'info') train_obj.run(train_dataset, test_dataset, train_mode) print_msg('Training Done.', 'succ')
def main(epochs, buffer_size, batch_size, train_mode, display_every, distribution_strategy, num_gpus, workers, w_type, w_index, setup_cluster, verbose): if verbose: os.environ["TF_CPP_MIN_LOG_LEVEL"] = str(verbose) strategy = get_distribution_strategy(strategy=distribution_strategy, train_mode=train_mode, num_gpus=num_gpus, workers=workers, typ=w_type, index=w_index, setup=setup_cluster) if num_gpus == 1: num_gpus = strategy.num_replicas_in_sync print_msg('Number of devices: {}'.format(num_gpus), 'info') data_obj = Dataset(batch_size) train_dataset, test_dataset = data_obj.create_dataset() steps_per_epoch = data_obj.get_buffer_size() // (batch_size) train_obj = Benchmark(epochs, steps_per_epoch, batch_size, display_every, num_gpus, 'resnet56', strategy) print_msg('Training...', 'info') train_obj.run(train_dataset, test_dataset, train_mode) print_msg('Training Done.', 'succ')
parser = argparse.ArgumentParser() parser.add_argument("train", help="training", type=bool) parser.add_argument("config", help="config file path", type=str) args = parser.parse_args() with open(args.config) as f: config = yaml.load(f) if args.train: config = config["train"] else: config = config["test"] if args.train: dataset = Dataset(config["source_data_path"], config["target_data_path"]) en, ko = dataset.create_dataset() en_tensor, en_tokenizer, ko_tensor, ko_tokenizer = dataset.load_dataset( config["num_words"]) en_words_count = len(en_tokenizer.word_index) + 1 ko_words_count = len(ko_tokenizer.word_index) + 1 train_ds = tf.data.Dataset.from_tensor_slices( (en_tensor, ko_tensor)).shuffle(10000).batch( config["batch_size"]).prefetch(1024) model = Seq2seq(source_words_count=en_words_count, target_words_count=ko_words_count, sos=ko_tokenizer.word_index["<start>"], eos=ko_tokenizer.word_index["<end>"]) loss_object = tf.keras.losses.SparseCategoricalCrossentropy() optimizer = tf.keras.optimizers.Adam()