def main(): num_points = 32 * 100 * 2 data = [i * (1 / num_points) for i in range(num_points)] it = parallel_it.from_items(data, 2, False).for_each(lambda x: [x, x]) # this will create MLDataset with column RangeIndex(range(2)) ds = ml_data.from_parallel_iter(it, True, batch_size=32, repeated=False) tf_ds = ds.to_tf(feature_columns=[0], label_column=1) trainer = TFTrainer( model_creator=model_creator, data_creator=make_data_creator(tf_ds), num_replicas=2, config={ "batch_size": 32, "fit_config": { "steps_per_epoch": 100, }, }, ) for _ in range(10): trainer.train() model = trainer.get_model() print("f(0.5)=", float(model.predict([0.5])))
def main(smoke_test, num_replicas, use_gpu=False, augment_data=False, batch_size=32): data_size = 60000 test_size = 10000 batch_size = batch_size num_train_steps = 10 if smoke_test else data_size // batch_size num_eval_steps = 10 if smoke_test else test_size // batch_size trainer = TFTrainer( model_creator=create_model, data_creator=(data_augmentation_creator if augment_data else data_creator), num_replicas=num_replicas, use_gpu=use_gpu, verbose=True, config={ "batch_size": batch_size, "fit_config": { "steps_per_epoch": num_train_steps, }, "evaluate_config": { "steps": num_eval_steps, }, }, ) training_start = time.time() num_epochs = 1 if smoke_test else 3 for i in range(num_epochs): # Trains num epochs train_stats1 = trainer.train() train_stats1.update(trainer.validate()) print(f"iter {i}:", train_stats1) dt = (time.time() - training_start) / 3 print(f"Training on workers takes: {dt:.3f} seconds/epoch") model = trainer.get_model() trainer.shutdown() dataset, test_dataset = data_augmentation_creator( dict(batch_size=batch_size)) training_start = time.time() model.fit(dataset, steps_per_epoch=num_train_steps, epochs=1) dt = time.time() - training_start print(f"Training on workers takes: {dt:.3f} seconds/epoch") scores = model.evaluate(test_dataset, steps=num_eval_steps) print("Test loss:", scores[0]) print("Test accuracy:", scores[1])
tensorboard_callback = TensorBoard(log_dir=ssd_log_path) learning_rate_callback = LearningRateScheduler(train_utils.scheduler, verbose=0) training_start = time.time() num_epochs = 1 if args.smoke_test else args.epochs for i in range(num_epochs): # Train num epochs train_stats1 = trainer.train() # train_stats1.update(train.validate()) print(f"iter {i}:", train_stats1) dt = (time.time() - training_start) / num_epochs print(f"Training on workers takes: {dt:.3f} seconds/epoch") model = trainer.get_model() trainer.shutdown() training_start = time.time() ssd_model.fit(ssd_train_feed, steps_per_epoch=step_size_train, validation_data=ssd_val_feed, validation_steps=step_size_val, epochs=opt.epochs, callbacks=[ checkpoint_callback, tensorboard_callback, learning_rate_callback ]) dt = (time.time() - training_start) print(f"Training on workers takes: {dt:.3f} seconds/epoch")