def test_json(monkeypatch, ray_start_4_cpus, make_temp_dir, workers_to_log, detailed, filename): if detailed: monkeypatch.setenv(ENABLE_DETAILED_AUTOFILLED_METRICS_ENV, "1") config = TestConfig() num_iters = 5 num_workers = 4 if workers_to_log is None: num_workers_to_log = num_workers elif isinstance(workers_to_log, int): num_workers_to_log = 1 else: num_workers_to_log = len(workers_to_log) def train_func(): for i in range(num_iters): train.report(index=i) return 1 if filename is None: # if None, use default value callback = JsonLoggerCallback(workers_to_log=workers_to_log) else: callback = JsonLoggerCallback(filename=filename, workers_to_log=workers_to_log) trainer = Trainer(config, num_workers=num_workers, logdir=make_temp_dir) trainer.start() trainer.run(train_func, callbacks=[callback]) if filename is None: assert str( callback.log_path.name) == JsonLoggerCallback._default_filename else: assert str(callback.log_path.name) == filename with open(callback.log_path, "r") as f: log = json.load(f) print(log) assert len(log) == num_iters assert len(log[0]) == num_workers_to_log assert all(len(element) == len(log[0]) for element in log) assert all( all(worker["index"] == worker[TRAINING_ITERATION] - 1 for worker in element) for element in log) assert all( all( all(key in worker for key in BASIC_AUTOFILLED_KEYS) for worker in element) for element in log) if detailed: assert all( all( all(key in worker for key in DETAILED_AUTOFILLED_KEYS) for worker in element) for element in log) else: assert all( all(not any(key in worker for key in DETAILED_AUTOFILLED_KEYS) for worker in element) for element in log)
def train_linear(num_workers=2, use_gpu=False, epochs=3): trainer = Trainer(backend="torch", num_workers=num_workers, use_gpu=use_gpu) config = {"lr": 1e-2, "hidden_size": 1, "batch_size": 4, "epochs": epochs} trainer.start() results = trainer.run( train_func, config, callbacks=[JsonLoggerCallback(), TBXLoggerCallback()] ) trainer.shutdown() print(results) return results
def train_linear(num_workers=2): trainer = Trainer(TorchConfig(backend="gloo"), num_workers=num_workers) config = {"lr": 1e-2, "hidden_size": 1, "batch_size": 4, "epochs": 3} trainer.start() results = trainer.run( train_func, config, callbacks=[JsonLoggerCallback(), TBXLoggerCallback()]) trainer.shutdown() print(results) return results
def train_fashion_mnist(num_workers=2, use_gpu=False): trainer = Trainer( backend="torch", num_workers=num_workers, use_gpu=use_gpu) trainer.start() result = trainer.run( train_func=train_func, config={ "lr": 1e-3, "batch_size": 64, "epochs": 4 }, callbacks=[JsonLoggerCallback()]) trainer.shutdown() print(f"Loss results: {result}")
def train_linear(num_workers=2, use_gpu=False): datasets = get_datasets() trainer = Trainer("torch", num_workers=num_workers, use_gpu=use_gpu) config = {"lr": 1e-2, "hidden_size": 1, "batch_size": 4, "epochs": 3} trainer.start() results = trainer.run( train_func, config, dataset=datasets, callbacks=[JsonLoggerCallback(), TBXLoggerCallback()], ) trainer.shutdown() print(results) return results