def main(): # Initialize variables device = "/gpu:0" if Constants.USE_GPU else "/cpu:0" learning_rate_input = tf.placeholder("float") initial_learning_rate = log_uniform(Constants.ALPHA.LOW, Constants.ALPHA.HIGH, Constants.ALPHA.LOG_RATE) # Initialize network global_network, grad_applier = init_network(device, learning_rate_input) # Load our saver class saver = Saver() # Initialize threads Data.a3c_threads = create_a3c_threads(global_network, learning_rate_input, initial_learning_rate, grad_applier, saver.data, device) # Initialize TF session = init_tf() summary = init_tf_summary(session) # Initialize / Load existing checkpoint saver.load(session) # Install Ctrl+C signal handler def signal_handler(sig, frame): print('CTRL+C was pressed, attempting to stop and save.') Data.stop_requested = True signal.signal(signal.SIGINT, signal_handler) # Create and trigger the threads threads = run_system_threads(session, Data, summary, saver) print('Press Ctrl+C to stop') # Finish and save results finish(threads) saver.save(session)
device = torch.device( "cuda:{}".format(gpu) if torch.cuda.is_available() else "cpu") model = SegNet(sigma=opt.sigma).to(device) optimizer = optim.Adam(model.parameters(), lr=1e-4) # Recover weights, if required if opt.recover: ckpt_file = os.path.join(model_dir, opt.reco_type + '_weights.pth') ckpt = torch.load(ckpt_file, map_location=device) model.load_state_dict(ckpt['model_state_dict']) epoch = ckpt['iter_nb'] + 1 print('Model recovered from {}.'.format(ckpt_file)) if 'optimizer_state_dict' in ckpt: optimizer.load_state_dict(ckpt['optimizer_state_dict']) print('Optimizer recovered from {}.'.format(ckpt_file)) saver.load() else: epoch = 0 # Metrics metrics = [ 'SEMANTIC_LOSS', 'IOU_0', 'IOU_1', 'IOU_2', 'IOU_3', 'IOU_4', 'IOU_5', 'IOU_6', 'MEAN_IOU', 'PIX_ACC', 'DEPTH_LOSS', 'ABS_ERR', 'REL_ERR' ] saver.add_metrics(metrics) # Create datasets dataset_path = opt.dataroot city_train_set = Cityscapes(root=dataset_path, train=True) city_test_set = Cityscapes(root=dataset_path, train=False)
if __name__ == '__main__': map = Map(133, 33, '#') start = Start(133, 33) finish = GameOver(133, 33) paconator = Paco(1, 15, 133, 33) getKey = GetKey() save = Saver() pigeons = [] lasers = [] bones = [] play_game = False game_over = False ammo = 20 points = 0 x = save.load() record = int(x) WindowSize() # ====== START SCREEN ====== while True: key = getKey() for y in range(11): ClearConsole() if y < 10: for x in range(87): map.set_point(start.screen[y][x].x, start.screen[y][x].y, start.screen[y][x].character) map.set_point(start.screen[y + 12][x].x, start.screen[y + 12][x].y, start.screen[y + 12][x].character)
#saver manages data and tf model saving saver = Saver(data) #testing env for playing in the end env_test = Environment(gym.make(Constants.ENV), Agent(master_network, Constants.EPS_START, Constants.EPS_STOP, Constants.EPS_STEPS), summary, saver, render=True, eps_start=0., eps_end=0.) #load last tf checkpoint saver.load(master_network.session) #dont train if replaymode is set if not Constants.REPLAY_MODE: #initialize threads envs = [ Environment( gym.make(Constants.ENV), Agent(master_network, Constants.EPS_START, Constants.EPS_STOP, Constants.EPS_STEPS), summary, saver) for i in range(Constants.THREADS - 1) ] #create cvshow thread envs.append( Environment(
class Experiment: def __init__(self, opt): assert opt.save_dir is not None, "No directory given for saving the model." os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = opt.gpu self.opt = opt self.device = torch.device("cpu" if opt.gpu == "" or not torch.cuda.is_available() else "cuda") self.train_set = Chembl("train", kekulize=opt.kekulize, random_order=opt.random_order) self.valid_set = Chembl("valid", kekulize=opt.kekulize, random_order=opt.random_order) self.test_set = Chembl("test", kekulize=opt.kekulize, random_order=opt.random_order) self.train_loader = torch.utils.data.DataLoader( self.train_set, batch_size=opt.batch_size, shuffle=True, collate_fn=Chembl.collate, num_workers=1, pin_memory=True) self.valid_loader = torch.utils.data.DataLoader( self.valid_set, batch_size=256, shuffle=False, collate_fn=Chembl.collate, num_workers=1, pin_memory=True) self.test_loader = torch.utils.data.DataLoader( self.test_set, batch_size=256, shuffle=False, collate_fn=Chembl.collate, num_workers=1, pin_memory=True) self.model = GraphGen(self.train_set.n_node_types(), self.train_set.n_edge_types(), opt.state_size, n_max_nodes=30, n_max_edges=2 * self.train_set.get_max_bonds(), dropout=opt.dropout) self.model = self.model.to(self.device) if opt.optimizer == "adam": self.optimizer = torch.optim.Adam(self.model.parameters(), lr=opt.lr, weight_decay=opt.wd, eps=1e-10) elif opt.optimizer == "sgd": self.optimizer = torch.optim.SGD(self.model.parameters(), lr=opt.lr, momentum=0.99, nesterov=True) else: assert False, "Invalid optimizer: %s" % opt.optimizer self.loss_plot = Plot2D("loss", 10, xlabel="iter", ylabel="loss") self.valid_loss_plot = Plot2D("Validation loss", 1, xlabel="iter", ylabel="loss") self.percent_valid = Plot2D("Valid molecules", 1, xlabel="iter", ylabel="%") self.mol_images = Image("Molecules") self.iteration = 0 self.epoch = 0 self.best_loss = float("inf") self.best_loss_iteration = 0 self.best_loss_epoch = 0 self.test_loss = None self.patience = 5 self.lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(self.optimizer, opt.lr_milestones, opt.lr_gamma) \ if opt.lr_milestones else None self.saver = Saver(self, os.path.join(opt.save_dir, "save")) self.saver.load() def _move_to_device(self, d): if isinstance(d, tuple): return tuple(self._move_to_device(list(d))) elif isinstance(d, list): return [self._move_to_device(v) for v in d] elif isinstance(d, dict): return {k: self._move_to_device(v) for k, v in d} elif torch.is_tensor(d): return d.to(self.device) else: return d def state_dict(self): return { "model": self.model.state_dict(), "optimizer": self.optimizer.state_dict(), "loss_plot": self.loss_plot.state_dict(), "valid_loss_plot": self.valid_loss_plot.state_dict(), "percent_valid": self.percent_valid.state_dict(), "iteration": self.iteration, "epoch": self.epoch, "best_loss": self.best_loss, "best_loss_iteration": self.best_loss_iteration, "best_loss_epoch": self.best_loss_epoch, "test_loss": self.test_loss, "lr_scheduler": self.lr_scheduler.state_dict() if self.lr_scheduler is not None else None } def load_state_dict(self, state_dict): self.model.load_state_dict(state_dict["model"]) self.optimizer.load_state_dict(state_dict["optimizer"]) self.valid_loss_plot.load_state_dict(state_dict["valid_loss_plot"]) self.loss_plot.load_state_dict(state_dict["loss_plot"]) self.percent_valid.load_state_dict(state_dict["percent_valid"]) self.iteration = state_dict["iteration"] self.epoch = state_dict["epoch"] self.best_loss = state_dict["best_loss"] self.best_loss_iteration = state_dict["best_loss_iteration"] self.best_loss_epoch = state_dict["best_loss_epoch"] self.test_loss = state_dict["test_loss"] if self.lr_scheduler is not None: s = state_dict.get("lr_scheduler") if s is not None: self.lr_scheduler.load_state_dict(s) def test(self, loader=None): self.model.eval() loss_sum = 0 cnt = 0 with torch.no_grad(): for d in tqdm(loader): d = self._move_to_device(d) _, loss = self.model(d) cnt += d[0].shape[0] loss_sum += loss.item() * d[0].shape[0] loss = loss_sum / cnt return loss def load_the_best(self): self.saver.load(self.best_loss_iteration) def do_final_test(self): self.load_the_best() self.test_loss = self.test(self.test_loader) print("----------------------------------------------") print("Training done.") print(" Validation loss:", self.best_loss) print(" Test loss:", self.test_loss) print(" Epoch:", self.best_loss_epoch) print(" Iteration:", self.best_loss_iteration) self.saver.save("best") def display_generated(self): self.model.eval() graphs = [] with torch.no_grad(): for i in range(1): g = self.model.generate(32, self.device) g = g.get_final_graph() graphs.append(g) self.model.train() img = self.train_set.draw_molecules(graphs) self.mol_images.draw(img) def train(self): running = True while running: print("Epoch %d" % self.epoch) self.model.train() for d in tqdm(self.train_loader): d = self._move_to_device(d) g, loss = self.model(d) assert torch.isfinite(loss), "Loss is %s" % loss.item() self.loss_plot.add_point(self.iteration, loss.item()) self.optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.25) self.optimizer.step() self.iteration += 1 if self.iteration % 30 == 0: self.display_generated() # Do a validation step validation_loss = self.test(self.valid_loader) self.valid_loss_plot.add_point(self.iteration, validation_loss) g = self.generate(10000) self.percent_valid.add_point(self.iteration, g["ratio_ok"] * 100) # Early stopping if validation_loss <= self.best_loss: self.best_loss = validation_loss self.best_loss_iteration = self.iteration self.best_loss_epoch = self.epoch elif (self.epoch - self.best_loss_epoch) > self.patience: running = not opt.early_stop if self.lr_scheduler is not None: self.lr_scheduler.step() # Save the model self.saver.save(self.iteration) self.epoch += 1 def train_done(self): return self.test_loss is not None def generate(self, n_test=100000): v = self.train_set.start_verification() bsize = 200 self.model.eval() with torch.no_grad(): for i in tqdm(range(n_test // bsize)): res = self.model.generate(bsize, self.device) self.train_set.verify(v, res.get_final_graph()) gc.collect() return self.train_set.get_verification_results(v)
shuffle=True) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = YOLO() if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model) model.to(device) criterion = YOLO_loss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) yolo_saver = Saver(model, join(save_root, r'yolo', r'save'), "yolo", max_to_keep=20) loaded_index = yolo_saver.load() for epoch in range(num_epochs): for i, (images, labels) in enumerate(data_loader): images = images.cuda() labels = labels.cuda() # Forward pass outputs = model(images) loss = criterion(outputs, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step()
ModelClass = load_class(config.model._class_) net: nn.Module = ModelClass( **{k: v for k, v in config.model.items() if k != '_class_'}) net.to(config.opts.device) OptimizerClass = load_class(config.optimizer._class_) optimizer: optim.Optimizer = OptimizerClass( params=net.parameters(), **{k: v for k, v in config.optimizer.items() if k != '_class_'}) if config.training.restore: train_state = saver.load(model=net, optimizer=optimizer, device=config.training.device) else: train_state = Munch(epochs=0, samples=0) if config.opts.log: with open(folder_run / 'config.yml', mode='w') as f: f.write(config.toYAML()) logger.add_text('Config', textwrap.indent(config.toYAML(), ' '), global_step=train_state.samples) def make_dataloader(dataset, shuffle) -> data.DataLoader: return data.DataLoader(dataset, batch_size=config.training.batch_size,