def set_watcher(self, filename): if not _TENSORWATCH_AVAILABLE: return False if self.distributed and self.local_rank > 0: return False self.watcher = tensorwatch.Watcher(filename=filename) self.watcher.make_notebook() return True
def dynamic_hist(): w = tw.Watcher() s = w.create_stream() v = tw.Visualizer(s, vis_type='histogram', bins=6, clear_after_each=True) v.show() for _ in range(100): s.write([random.random() * 10 for _ in range(100)]) tw.plt_loop(count=3)
def set_watcher(self, filename, port=0): if not _TENSORWATCH_AVAILABLE: return False if self.distributed and self.local_rank > 0: return False self.watcher = tensorwatch.Watcher(filename=filename, port=port) # default streams self._default_streams() self.watcher.make_notebook() return True
def main(): w = tw.Watcher() s1 = w.create_stream() s2 = w.create_stream(name='accuracy', vis_args=tw.VisArgs(vis_type='line', xtitle='X-Axis', clear_after_each=False, history_len=2)) s3 = w.create_stream(name='loss', expr='lambda d:d.loss') w.make_notebook()
def dynamic_bar(): w = tw.Watcher() s = w.create_stream() v = tw.Visualizer(s, vis_type='bar', clear_after_each=True) v.show() for i in range(100): s.write([('a' + str(i), random.random() * 10) for i in range(10)]) tw.plt_loop(count=3)
def writer(): watcher = tw.Watcher(filename=r'c:\temp\test.log', port=None) with watcher.create_stream('metric1') as stream1: for i in range(3): stream1.write((i, i * i)) with watcher.create_stream('metric2') as stream2: for i in range(3): stream2.write((i, i * i * i))
def reader1(): print('---------------------------reader1---------------------------') watcher = tw.Watcher(filename=r'c:\temp\test.log', port=None) stream1 = watcher.open_stream('metric1') stream1.console_debug = True stream1.load() stream2 = watcher.open_stream('metric2') stream2.console_debug = True stream2.load()
def dynamic_pie(): w = tw.Watcher() s = w.create_stream() v = tw.Visualizer(s, vis_type='pie', bins=6, clear_after_each=True) v.show() for _ in range(100): s.write([('label' + str(i), random.random() * 10, None, i * 0.01) for i in range(12)]) tw.plt_loop(count=3)
def static_hist(): w = tw.Watcher() s = w.create_stream() v = tw.Visualizer(s, vis_type='histogram', bins=6) v.show() for _ in range(100): s.write(random.random() * 10) tw.plt_loop()
def __init__(self, exp_name, run_name, epoch_config, model, callbacks, metrics, log_config, port): super(TensorWatchProbe, self).__init__(exp_name, run_name, epoch_config, model, callbacks, metrics, log_config) tw.utils.set_debug_verbosity(log_config.debug_verbosity) self.tw = tw.Watcher(port=port) self.metrics = metrics self.tw.set_globals(model=model, metrics=metrics.stats)
def static_bar(): w = tw.Watcher() s = w.create_stream() v = tw.Visualizer(s, vis_type='bar') v.show() for i in range(10): s.write(int(random.random() * 10)) tw.plt_loop()
def dynamic_line3d(): w = tw.Watcher() s = w.create_stream() v = tw.Visualizer(s, vis_type='line3d', clear_after_each=True) v.show() for i in range(100): s.write([(i, random.random() * 10, z) for i in range(10) for z in range(10)]) tw.plt_loop(count=3)
def reader2(): print('---------------------------reader2---------------------------') watcher = tw.Watcher(filename=r'c:\temp\test.log', port=None) stream1 = watcher.open_stream('metric1') for item in stream1.read_all(): print(item) stream2 = watcher.open_stream('metric2') for item in stream2.read_all(): print(item)
def static_pie(): w = tw.Watcher() s = w.create_stream() v = tw.Visualizer(s, vis_type='pie', bins=6) v.show() for i in range(6): s.write(('label' + str(i), random.random() * 10, None, 0.5 if i == 3 else 0)) tw.plt_loop()
def reader3(): print('---------------------------reader3---------------------------') watcher = tw.Watcher(filename=r'c:\temp\test.log', port=None) stream1 = watcher.open_stream('metric1') stream2 = watcher.open_stream('metric2') vis1 = tw.Visualizer(stream1, vis_type='line') vis2 = tw.Visualizer(stream2, vis_type='line', host=vis1) vis1.show() tw.plt_loop()
def run_episode(env: gym.Env, agent: BaseAgent, render=False): start_time = time.time() print('Started', start_time) watcher = tw.Watcher(filename='random_agent.log') logger = watcher.create_stream(name='reward') watcher.make_notebook() obs = env.reset() agent.reset(env) reward, env_done, i, total_r = 0.0, False, 0, 0.0 while not env_done: action = agent.act(obs, reward, env_done) obs, reward, env_done, info = env.step(action=action) if render: rendered = env.render(mode='human') total_r += reward logger.write((i, total_r)) i += 1 print('Done: reward, time', total_r, time.time() - start_time) return total_r
import gym import ray import numpy as np from ray.rllib.agents.dqn import DQNTrainer, DEFAULT_CONFIG from ray.tune.logger import pretty_print from ray.tune.registry import register_env from podworld.envs import PodWorldEnv import tensorwatch as tw import time start_time = time.time() print('Started', start_time) watcher = tw.Watcher(filename='dqn_agent.log') logger = watcher.create_stream(name='reward') watcher.make_notebook() ray.init(num_gpus=1) np.seterr(all='raise') config = DEFAULT_CONFIG.copy() config.update({ "gamma": 0.99, "lr": 0.0001, "learning_starts": 10000, "buffer_size": 50000, "sample_batch_size": 4, "train_batch_size": 320, "schedule_max_timesteps": 2000000, "exploration_final_eps": 0.01, "exploration_fraction": 0.1,
import time, random import tensorwatch as tw # create watcher object as usual w = tw.Watcher() weights = None for i in range(10000): weights = [random.random() for _ in range(5)] # let watcher observe variables we have # this has almost no performance cost w.observe(weights=weights) time.sleep(1)
import tensorwatch as tw import time w = tw.Watcher(filename='test.log') s = w.create_stream(name='my_metric') #w.make_notebook() for i in range(1000): s.write((i, i * i)) time.sleep(1)
import time import tensorwatch as tw srv = tw.Watcher() sum = 0 for i in range(10000): sum += i srv.observe(i=i, sum=sum) #print(i, sum) time.sleep(1)
import gym import ray import numpy as np from ray.rllib.agents.ppo import PPOTrainer, DEFAULT_CONFIG from ray.tune.logger import pretty_print from ray.tune.registry import register_env from podworld.envs import PodWorldEnv import tensorwatch as tw import time start_time = time.time() print('Started', start_time) watcher = tw.Watcher(filename='ppo_agent.log') logger = watcher.create_stream(name='reward') watcher.make_notebook() ray.init(num_gpus=1) np.seterr(all='raise') config = DEFAULT_CONFIG.copy() config.update({ "lambda": 0.95, "kl_coeff": 0.5, "clip_rewards": True, "clip_param": 0.1, "vf_clip_param": 10, #10 "entropy_coeff": 0.01, "batch_mode": "truncate_episodes", "observation_filter": "NoFilter", "vf_share_layers": True,
def main(**args): t_logfile = NamedTemporaryFile(mode="w+", suffix=".log") logging.basicConfig( level=args["verbosity"], format="%(asctime)s: %(message)s", handlers=[ logging.StreamHandler(), logging.FileHandler(t_logfile.name) ], ) logging.info("Execution options: %s", pformat(args)) # Preliminary Setup if USE_CUDA: os.environ["CUDA_VISIBLE_DEVICES"] = args["gpu_id"] logging.info("• CUDA is enabled") for device_id in args["gpu_id"].split(): device_id = int(device_id) logging.info("%s", torch.cuda.get_device_name(device_id)) else: logging.info("• CPU only (no CUDA)") seed = args["manual_seed"] if seed is None: seed = random.randint(1, 10000) logging.info("• Random Seed: %d", seed) random.seed(seed) torch.manual_seed(seed) if USE_CUDA: torch.cuda.manual_seed_all(seed) if not os.path.isdir(args["checkpoint"]): os.makedirs(args["checkpoint"], exist_ok=True) # Data logging.info("• Preparing '%(dataset)s' dataset", args) num_classes, trainloader, testloader = initialize_dataloaders( args["dataset"], workers=args["workers"], train_batch=args["train_batch"], test_batch=args["test_batch"], ) # Model & Architecture arch = args["arch"] logging.info("• Initializing '%s' architecture", arch) model = initialize_model(arch, num_classes, **args) logging.info("%s", model) model = torch.nn.DataParallel(model) if USE_CUDA: model = model.cuda() torch.backends.cudnn.benchmark = True num_params = sum([p.numel() for p in model.parameters()]) num_learnable = sum( [p.numel() for p in model.parameters() if p.requires_grad]) logging.info( "• Number of parameters: %(params)d (%(learnable)d learnable)", { "params": num_params, "learnable": num_learnable }, ) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD( model.parameters(), lr=args["lr"], momentum=args["momentum"], weight_decay=args["weight_decay"], ) # Tensorwatch Initialization w = tw.Watcher(filename=args["tensorwatch_log"]) loss_stream = w.create_stream(name="train_loss") acc_stream = w.create_stream(name="train_acc") test_loss_stream = w.create_stream(name="test_loss") test_acc_stream = w.create_stream(name="test_acc") lr_stream = w.create_stream(name="lr") if args["mode"] == "evaluate": logging.info("Only evaluation") with torch.no_grad(): test_loss, test_acc = test(testloader, model, criterion) logging.info( "Test Loss: %(loss).8f, Test Acc: %(acc).2f", { "loss": test_loss, "acc": test_acc }, ) elif args["mode"] == "train": best_acc = 0 start_epoch = args["start_epoch"] title = args["dataset"] + "-" + arch scribe = Scribe(os.path.join(args["checkpoint"], "progress.txt"), title=title) scribe.set_names([ "Learning Rate", "Train Loss", "Valid Loss", "Train Acc.", "Valid Acc.", ]) lr = args["lr"] interrupted = False for epoch in range(start_epoch, args["epochs"]): train_loss, train_acc, test_loss, test_acc = 0, -1, 0, -1 try: lr = update_learning_rate(lr, args["schedule"], args["gamma"], optimizer, epoch) logging.info( "Epoch %(cur_epoch)d/%(epochs)d | LR: %(lr)f", { "cur_epoch": epoch + 1, "epochs": args["epochs"], "lr": lr }, ) train_loss, train_acc = train(trainloader, model, criterion, optimizer) with torch.no_grad(): test_loss, test_acc = test(testloader, model, criterion) except KeyboardInterrupt: logging.warning("Caught Keyboard Interrupt at epoch %d", epoch + 1) interrupted = True finally: # append model progress scribe.append((lr, train_loss, test_loss, train_acc, test_acc)) loss_stream.write((epoch, train_loss)) acc_stream.write((epoch, train_acc)) test_loss_stream.write((epoch, test_loss)) test_acc_stream.write((epoch, test_acc)) lr_stream.write((epoch, lr)) # save the model is_best = test_acc > best_acc best_acc = max(test_acc, best_acc) save_checkpoint( { "epoch": epoch + 1, "state_dict": model.state_dict(), "acc": test_acc, "best_acc": best_acc, "optimizer": optimizer.state_dict(), }, is_best, checkpoint=args["checkpoint"], ) if interrupted: break scribe.close() scribe.plot( plot_title="Training Accuracy Progress", names=["Train Acc.", "Valid Acc."], xlabel="Epoch", ylabel="Accuracy", ) scribe.savefig(os.path.join(args["checkpoint"], "progress_acc.eps")) scribe.plot( plot_title="Training Loss Progress", names=["Train Loss", "Valid Loss"], xlabel="Epoch", ylabel="Cross Entropy Loss", ) scribe.savefig(os.path.join(args["checkpoint"], "progress_loss.eps")) logging.info("Best evaluation accuracy: %f", best_acc) logging.info("Results saved to %s", args["checkpoint"]) shutil.copy(t_logfile.name, args["checkpoint"]) t_logfile.close() elif args["mode"] == "profile": logging.info("Only profiling one pass, one input") for (inputs, _) in testloader: break logging.info("Input Size: %s", inputs.size()) with torch.no_grad(): if USE_CUDA: with torch.cuda.profiler.profile() as prof: # warmup the CUDA memory allocator and profiler # model(inputs) with torch.autograd.profiler.emit_nvtx(enabled=USE_CUDA): model(inputs) else: with torch.autograd.profiler.profile( use_cuda=USE_CUDA) as prof: model(inputs) logging.info(prof)
import time import tensorwatch as tw from tensorwatch import utils utils.set_debug_verbosity(4) srv = tw.Watcher(filename=r'c:\temp\sum.log') s1 = srv.create_stream('sum', expr='lambda v:(v.i, v.sum)') s2 = srv.create_stream('sum_2', expr='lambda v:(v.i, v.sum/2)') sum = 0 for i in range(10000): sum += i srv.observe(i=i, sum=sum) #print(i, sum) time.sleep(1)