def init(args): if not os.path.exists('checkpoints'): os.makedirs('checkpoints') if not os.path.exists('checkpoints/' + args.exp_name): os.makedirs('checkpoints/' + args.exp_name) os.system('cp ./mixup.yml ./checkpoints/' + args.exp_name) io = IOStream('checkpoints/' + args.exp_name + '/run.log') return io
def init(args, configpath): if not os.path.exists('checkpoints'): os.makedirs('checkpoints') if not os.path.exists('checkpoints/' + args.exp_name): os.makedirs('checkpoints/' + args.exp_name) os.system('cp ' + configpath + ' ./checkpoints/' + args.exp_name) io = IOStream('checkpoints/' + args.exp_name + '/run.log') return io
def _init_(args): # initialize parameters path = 'results/' + args.exp_name + str(args.nFold) if not os.path.exists(path): os.mkdir(path) args.resume = path + '/checkpoint.pth.tar' args.best = path + '/bestmodel.pth.tar' args.io = IOStream(path + '/run.log') args.start_epoch = 0 args.best_prec1 = 0 args.device = device
def voting(net, testloader, device, args): name = '/evaluate_voting' + str( datetime.datetime.now().strftime('-%Y%m%d%H%M%S')) + '.log' io = IOStream(args.checkpoint + name) io.cprint(str(args)) net.eval() best_acc = 0 best_mean_acc = 0 pointscale = PointcloudScale(scale_low=0.8, scale_high=1.18) # set the range of scaling for i in range(args.NUM_PEPEAT): test_true = [] test_pred = [] for batch_idx, (data, label) in enumerate(testloader): data, label = data.to(device), label.to(device).squeeze() pred = 0 for v in range(args.NUM_VOTE): new_data = data # batch_size = data.size()[0] if v > 0: new_data.data = pointscale(new_data.data) with torch.no_grad(): pred += F.softmax(net(new_data.permute(0, 2, 1)), dim=1) # sum 10 preds pred /= args.NUM_VOTE # avg the preds! label = label.view(-1) pred_choice = pred.max(dim=1)[1] test_true.append(label.cpu().numpy()) test_pred.append(pred_choice.detach().cpu().numpy()) test_true = np.concatenate(test_true) test_pred = np.concatenate(test_pred) test_acc = 100. * metrics.accuracy_score(test_true, test_pred) test_mean_acc = 100. * metrics.balanced_accuracy_score( test_true, test_pred) if test_acc > best_acc: best_acc = test_acc if test_mean_acc > best_mean_acc: best_mean_acc = test_mean_acc outstr = 'Voting %d, test acc: %.3f, test mean acc: %.3f, [current best(mean_acc: %.3f all_acc: %.3f)]' % \ (i, test_acc, test_mean_acc, best_acc, best_mean_acc) io.cprint(outstr) final_outstr = 'Final voting test acc: %.6f,' % (best_acc * 100) io.cprint(final_outstr)
default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--dropout', type=float, default=0.0, help='dropout rate') parser.add_argument('--model_path', type=str, default='', metavar='N', help='Pretrained model path') args = parser.parse_args() config = Config.from_json_file('config.json') args.feat_dim = config.n_address _init_() io = IOStream('checkpoints/' + args.exp_name + '/run.log') io.cprint(str(args)) args.cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) if args.cuda: io.cprint('Using GPU') torch.cuda.manual_seed(args.seed) else: torch.manual_seed(args.seed) io.cprint('Using CPU') train(args, config, io)
help='Dimension of embeddings') parser.add_argument('--k', type=int, default=20, metavar='N', help='Num of nearest neighbors to use') parser.add_argument('--model_path', type=str, default='', metavar='N', help='Pretrained model path') args = parser.parse_args() _init_() io = IOStream('checkpoints/' + args.exp_name + '/run.log') io.cprint(str(args)) args.cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) if args.cuda: io.cprint('Using GPU : ' + str(torch.cuda.current_device()) + ' from ' + str(torch.cuda.device_count()) + ' devices') torch.cuda.manual_seed(args.seed) else: io.cprint('Using CPU') if not args.eval: train(args, io) else: test(args, io)
def get_args(): parser = argparse.ArgumentParser() parser.add_argument('--manualSeed', type=int, help='manual seed') parser.add_argument('--batch_size', type=int, default=30) parser.add_argument('--epochs', type=int, default=241) parser.add_argument('--workers', type=int, default=6, help='num of workers to load data for each DataLoader') parser.add_argument('--checkpoints_dir', '-CDIR', default='experiments_deco', help='Folder where all experiments get stored') parser.add_argument( '--exp_name', '-EXP', default='exp', help='will create an exp_name folder under checkpoints_dir') parser.add_argument('--config', '-C', required=True, help='path to valid configuration file') parser.add_argument('--parallel', action='store_true', help="Multi-GPU Training") parser.add_argument( '--it_test', type=int, default=10, help='at each it_test epoch: perform test and checkpoint') parser.add_argument('--restart_from', default='', help='restart interrupted training from checkpoint') parser.add_argument( '--class_choice', default= "Airplane,Bag,Cap,Car,Chair,Guitar,Lamp,Laptop,Motorbike,Mug,Pistol,Skateboard,Table", help='Classes to train on: default is 13 classes used in PF-Net') parser.add_argument( '--data_root', default= "/home/antonioa/data/shapenetcore_partanno_segmentation_benchmark_v0") # crop params parser.add_argument('--crop_point_num', type=int, default=512, help='number of points to crop') parser.add_argument('--context_point_num', type=int, default=512, help='number of points of the frame region') parser.add_argument('--num_holes', type=int, default=1, help='number of crop_point_num holes') parser.add_argument( '--pool1_points', '-P1', type=int, default=1280, help= 'points selected at pooling layer 1, we use 1280 in all experiments') parser.add_argument( '--pool2_points', '-P2', type=int, default=512, help= 'points selected at pooling layer 2, should match crop_point_num i.e. 512' ) # parser.add_argument('--fps_centroids', '-FPS', action='store_true', help='different crop logic than pfnet') parser.add_argument( '--raw_weight', '-RW', type=float, default=1, help= 'weights the intermediate pred (frame reg.) loss, use 0 this to disable regularization.' ) args = parser.parse_args() args.fps_centroids = False # make experiment dirs args.save_dir = os.path.join(args.checkpoints_dir, args.exp_name) args.models_dir = os.path.join(args.save_dir, 'models') args.vis_dir = os.path.join(args.save_dir, 'train_visz') safe_make_dirs([ args.save_dir, args.models_dir, args.vis_dir, os.path.join(args.save_dir, 'backup_code') ]) # instantiate loggers io_logger = IOStream(os.path.join(args.save_dir, 'log.txt')) tb_logger = SummaryWriter(logdir=args.save_dir) return args, io_logger, tb_logger
parser.add_argument('--tau', type=float, default=1e2, help='balancing weight for loss function [default: 1e2]') args = parser.parse_args() args.adj_lr = { 'steps': [int(temp) for temp in args.step], 'decay_rates': [float(temp) for temp in args.dr] } args.feature_transform, args.augment = bool(args.feature_transform), bool( args.augment) ### Set random seed args.seed = args.seed if args.seed > 0 else random.randint(1, 10000) if not os.path.exists('checkpoints/' + args.lggan): os.mkdir('checkpoints/' + args.lggan) io = IOStream('checkpoints/' + args.lggan + '/run.log') io.cprint(str(args)) TAU = args.tau ITERATION = 100 # create adversarial example path ADV_PATH = args.adv_path if not os.path.exists('results'): os.mkdir('results') ADV_PATH = os.path.join('results', ADV_PATH) if not os.path.exists(ADV_PATH): os.mkdir(ADV_PATH) ADV_PATH = os.path.join(ADV_PATH, 'test') NUM_CLASSES = 40 def write_h5(data, data_orig, label, label_orig, num_batches):
os.makedirs(os.path.join(save_dir, 'models')) if not os.path.exists(point_netG_saving): os.makedirs(point_netG_saving) if not os.path.exists(point_netD_saving): os.makedirs(point_netD_saving) if not os.path.exists(os.path.join(save_dir, 'backup-code')): os.makedirs(os.path.join(save_dir, 'backup-code')) if not os.path.exists(os.path.join(save_dir, "train_visz")): os.makedirs(os.path.join(save_dir, "train_visz")) filename = os.path.abspath(__file__).split('/')[-1] os.system('cp {} {}'.format( os.path.abspath(__file__), os.path.join(save_dir, 'backup-code', '{}.backup'.format(filename)))) io = IOStream(os.path.join(save_dir, 'log.txt')) tb = SummaryWriter(logdir=save_dir) io.cprint("PFNet training -\n num holes: %d, cropped points around each: %d" % (opt.num_holes, opt.crop_point_num)) io.cprint('-' * 30) io.cprint('Arguments: ') io.cprint(str(opt) + '\n') USE_CUDA = True device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") point_netG = _netG(opt.num_scales, opt.each_scales_size, opt.point_scales_list, opt.crop_point_num * opt.num_holes) if opt.D_choose == 1: point_netD = _netlocalD(opt.crop_point_num * opt.num_holes) resume_epoch = 0
torch.nn.init.normal_(m.weight.data, 0.0, 0.02) elif classname.find("Conv1d") != -1: torch.nn.init.normal_(m.weight.data, 0.0, 0.02) elif classname.find("BatchNorm2d") != -1: torch.nn.init.normal_(m.weight.data, 1.0, 0.02) torch.nn.init.constant_(m.bias.data, 0.0) elif classname.find("BatchNorm1d") != -1: torch.nn.init.normal_(m.weight.data, 1.0, 0.02) torch.nn.init.constant_(m.bias.data, 0.0) args = parse_args() exp_dir = os.path.join(args.checkpoints_dir, args.exp_name + '_' + str(int(time.time()))) tb_dir, models_dir = osp.join(exp_dir, "tb_logs"), osp.join(exp_dir, "models") safe_make_dirs([tb_dir, models_dir]) io = IOStream(osp.join(exp_dir, "log.txt")) io.cprint(f"Arguments: {str(args)} \n") tb_writer = SummaryWriter(logdir=tb_dir) centroids = np.asarray([[1, 0, 0], [0, 0, 1], [1, 0, 1], [-1, 0, 0], [-1, 1, 0]]) # same as PFNet if args.num_positive_samples > 2: criterion = SupConLoss(temperature=args.temp, base_temperature=1, contrast_mode='all') else: criterion = SimCLRLoss(temperature=args.temp) io.cprint("Contrastive learning params: ") io.cprint(f"criterion: {str(criterion)}") io.cprint(f"num positive samples: {args.num_positive_samples}") io.cprint(f"centroids cropping: {str(centroids)}") train_transforms = transforms.Compose(
def main(opt): exp_dir = osp.join(opt.checkpoints_dir, opt.exp_name) tb_dir, models_dir = osp.join(exp_dir, "tb_logs"), osp.join(exp_dir, "models") safe_make_dirs([tb_dir, models_dir]) io = IOStream(osp.join(exp_dir, "log.txt")) tb_logger = SummaryWriter(logdir=tb_dir) assert os.path.exists(opt.config), "wrong config path" with open(opt.config) as cf: config = json.load(cf) io.cprint(f"Arguments: {str(opt)}") io.cprint(f"Config: {str(config)} \n") if len(opt.class_choice) > 0: class_choice = ''.join(opt.class_choice.split()).split( ",") # sanitize + split(",") io.cprint("Class choice: {}".format(str(class_choice))) else: class_choice = None train_dataset = PretextDataset(root=opt.data_root, task='denoise', class_choice=class_choice, npoints=config["num_points"], split='train', normalize=True, noise_mean=config["noise_mean"], noise_std=config["noise_std"]) test_dataset = PretextDataset(root=opt.data_root, task='denoise', class_choice=class_choice, npoints=config["num_points"], split='test', normalize=True, noise_mean=config["noise_mean"], noise_std=config["noise_std"]) train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True, drop_last=True, num_workers=opt.workers) test_loader = DataLoader(test_dataset, batch_size=opt.batch_size, shuffle=False, drop_last=False, num_workers=opt.workers) criterion = nn.MSELoss() # loss function for denoising device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # MODEL model = GPDLocalFE(config) if opt.parallel: io.cprint( f"DataParallel training with {torch.cuda.device_count()} GPUs") model = nn.DataParallel(model) model = model.to(device) io.cprint(f'model: {str(model)}') # OPTIMIZER + SCHEDULER optimizer = torch.optim.Adam(model.parameters(), lr=0.001) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5) train_start = time.time() for epoch in range(opt.epochs): # TRAIN # we compute both MSE and Chamfer Distance distances between the cleaned pointcloud and the clean GT, # where cleaned = model(noised) # .. Anyway MSE is used as loss function and Chamfer Distance is just an additional metric ep_start = time.time() train_mse, train_cd = train_one_epoch(train_loader, model, optimizer, criterion, device) train_time = time.strftime("%M:%S", time.gmtime(time.time() - ep_start)) io.cprint("Train %d, time: %s, MSE (loss): %.6f, CD (dist): %.6f" % (epoch, train_time, train_mse, train_cd)) tb_logger.add_scalar("Train/MSE_loss", train_mse, epoch) tb_logger.add_scalar("Train/CD_dist", train_cd, epoch) # TEST mse_test, cd_test = test(test_loader, model, criterion, device) io.cprint("Test %d, MSE (loss): %.6f, CD (dist): %.6f" % (epoch, mse_test, cd_test)) tb_logger.add_scalar("Test/MSE", mse_test, epoch) tb_logger.add_scalar("Test/CD", cd_test, epoch) # LR SCHEDULING scheduler.step() if epoch % 10 == 0: torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict() if not opt.parallel else model.module.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'scheduler_state_dict': scheduler.state_dict(), }, osp.join(models_dir, "local_denoise_{}.pth".format(epoch))) hours, rem = divmod(time.time() - train_start, 3600) minutes, seconds = divmod(rem, 60) io.cprint("Training ended in {:0>2}:{:0>2}:{:05.2f}".format( int(hours), int(minutes), seconds))
def __init__(self, actions, calculate_reward, get_legal_actions, transition, version=0, load_model=True, load_memories=False, best=False, trainer=True, memories=[]): create_folders() if memories != []: self.memories = memories else: self.memories = [] self.load_model = load_model self.load_memories = load_memories self.actions = actions self.get_legal_actions = get_legal_actions self.calculate_reward = calculate_reward self.transition = transition self.best = best self.io = IOStream("checkpoints/run.log") self.cuda = False self.models = setup_models(self.io, load_model, self.cuda, trainer) self.optims = setup_optims(self.models, self.cuda) self.version = version if not best: if load_memories and version is not "best" and memories == []: print("Loading Memories...") try: self.memories = pickle.load( open("checkpoints/memories.p", "rb")) except FileNotFoundError: print("Memories not found, making new memories.") print("Loading History...") try: self.history = pickle.load(open("checkpoints/history.p", "rb")) except FileNotFoundError: print("Loss history not found, starting new history.") self.history = { "readout": [], "policy": [], "value": [], "total": [] } self.best_net = MCTSnet(self.actions, self.calculate_reward, self.get_legal_actions, self.transition, self.version, self.load_model, self.load_memories, best=True, trainer=False)
class MCTSnet: def __init__(self, actions, calculate_reward, get_legal_actions, transition, version=0, load_model=True, load_memories=False, best=False, trainer=True, memories=[]): create_folders() if memories != []: self.memories = memories else: self.memories = [] self.load_model = load_model self.load_memories = load_memories self.actions = actions self.get_legal_actions = get_legal_actions self.calculate_reward = calculate_reward self.transition = transition self.best = best self.io = IOStream("checkpoints/run.log") self.cuda = False self.models = setup_models(self.io, load_model, self.cuda, trainer) self.optims = setup_optims(self.models, self.cuda) self.version = version if not best: if load_memories and version is not "best" and memories == []: print("Loading Memories...") try: self.memories = pickle.load( open("checkpoints/memories.p", "rb")) except FileNotFoundError: print("Memories not found, making new memories.") print("Loading History...") try: self.history = pickle.load(open("checkpoints/history.p", "rb")) except FileNotFoundError: print("Loss history not found, starting new history.") self.history = { "readout": [], "policy": [], "value": [], "total": [] } self.best_net = MCTSnet(self.actions, self.calculate_reward, self.get_legal_actions, self.transition, self.version, self.load_model, self.load_memories, best=True, trainer=False) def choose_row(self): while True: try: inp = int(input("Pick a row, 1-7: ")) inp -= 1 return inp except Exception as e: print("Invalid choice.") def play_cpu(self, root_state, curr_player=0): eval_mode(self.models) root_state = np.array(root_state, dtype="float32") joint_state = [np.copy(root_state), np.copy(root_state)] results = dict() results["player_one"] = 0 results["player_two"] = 0 results["draw"] = 0 np.set_printoptions(precision=3) # if (curr_player==0): # first_player=True # else: # first_player=False game_over = False joint = np.copy(joint_state) while not game_over: legal_actions = self.get_legal_actions(joint) if len(legal_actions) == 0: results["draw"] += 1 break if curr_player == 0: joint_copy = np.copy(joint) blank = [["_" for _ in range(7)] for _ in range(6)] # dsp = np.array(blank, dtype=object) m1 = np.ma.masked_where(joint_copy[0] > 0, blank) np.ma.set_fill_value(m1, "O") m1 = m1.filled() m2 = np.ma.masked_where(joint_copy[1] > 0, m1) np.ma.set_fill_value(m2, "X") m2 = m2.filled() print(m2) row = self.choose_row() idx = legal_actions[row] action = self.actions[idx] else: pi, _ = self.run_simulations(joint, curr_player, 0) print(pi) pi = self.apply_temp_to_policy(pi, 0, T=0) idx = np.random.choice(len(self.actions), p=pi) action = self.actions[idx] joint[curr_player] = self.transition(joint[curr_player], action) reward, game_over = self.calculate_reward(joint) if game_over: if reward == -1: results["player_two"] += 1 elif reward == 1: results["player_one"] += 1 else: curr_player += 1 curr_player = curr_player % 2 print(results) def do_round(self, results, joint_state, curr_player, T=config.TAU, record_memories=True): if record_memories: memories = [] game_over = False joint = np.copy(joint_state) turn = 0 while not game_over: turn += 1 legal_actions = self.get_legal_actions(joint) if len(legal_actions) == 0: results["draw"] += 1 break if curr_player == 0: pi, memory = self.run_simulations(joint, curr_player, turn) else: pi, memory = self.best_net.run_simulations( joint, curr_player, turn) pre_temp_idx = np.random.choice(len(self.actions), p=pi) pi = self.apply_temp_to_policy(pi, turn, T) idx = np.random.choice(len(self.actions), p=pi) memory["readout"]["output"] = F.log_softmax( memory["readout"]["output"], dim=0)[pre_temp_idx] if record_memories: memories.extend([memory]) action = self.actions[idx] joint[curr_player] = self.transition(joint[curr_player], action) reward, game_over = self.calculate_reward(joint) if game_over: if reward == -1: results["player_two"] += 1 elif reward == 1: results["player_one"] += 1 else: curr_player += 1 curr_player = curr_player % 2 if record_memories: for memory in memories: if memory["curr_player"] == 0: memory["result"] = reward else: memory["result"] = -1 * reward self.memories.extend(memories) def self_play(self, root_state, curr_player=0, save_model=True, T=config.TAU, record_memories=True): # Consider separating the network evaluation from the games, since # the network evaluation will be through deterministic games # So we want a stochastic policy since it will see more states and be more robust # but we need to save the best model according to what the best deterministic policy is # since that is ultimately what we want. eval_mode(self.models) root_state = np.array(root_state, dtype="float32") joint_state = [np.copy(root_state), np.copy(root_state)] results = dict() results["player_one"] = 0 results["player_two"] = 0 results["draw"] = 0 np.set_printoptions(precision=3) for _ in tqdm(range(config.EPISODES)): self.do_round(results, joint_state, curr_player, T=T, record_memories=record_memories) # results["player_one"] = 0 # results["player_two"] = 0 # results["draw"] = 0 # for _ in tqdm(range(config.EVALUATION_EPISODES)): # self.do_round(results, joint_state, curr_player, # T=0, record_memories=False) # print("Deterministic Results: ", results) if T == 0: name = "Deterministic" else: name = "Stochastic" print("{} Results: ".format(name), results) if save_model: if results["player_one"] > results[ "player_two"] * config.SCORING_THRESHOLD: self.save_best_model() self.best_net.models = setup_models(self.best_net.io, self.best_net.load_model, self.best_net.cuda, trainer=False) self.best_net.optims = setup_optims(self.best_net.models, self.best_net.cuda) elif results["player_two"] > results[ "player_one"] * config.SCORING_THRESHOLD: # load best model to training model self.models = setup_models(self.io, self.load_model, self.cuda, trainer=False) self.optims = setup_optims(self.models, self.cuda) # self.save_training_model() # self.memories = self.memories[-config.MAX_MEMORIES:] print("Num memories: {}".format(len(self.memories))) # Note, I am loading old memories from a bad version # It will eventually get overwritten, but it is a little inefficient to reference those return self.memories def save_best_model(self): self.io.cprint("Saving best model") for name, model in self.models.items(): torch.save(model, "checkpoints/models/%s.t7" % (name + "_best")) def save_training_model(self): self.io.cprint("Saving training model") for name, model in self.models.items(): torch.save(model, "checkpoints/models/%s.t7" % (name + "_training")) def load_training_model(self): self.models = setup_models(self.io, self.load_model, self.cuda, trainer=True) self.optims = setup_optims(self.models, self.cuda) def save_memories(self): print("Saving Memories...") pickle.dump(self.memories, open("checkpoints/memories.p", "wb")) def plot_losses(self): plt.plot(self.history["readout"], "r") plt.plot(self.history["policy"], "m") plt.plot(self.history["value"], "c") plt.plot(self.history["total"], "y") plt.show() def run_simulations(self, joint_states, curr_player, turn): self.embeddings = dict() S = dict() A = dict() R = dict() H = dict() N = dict() game_over = False memory = { "curr_player": curr_player, "result": None, "policy": { "output": [] }, "readout": { "output": None }, "value": { "output": None } } root_state = np.concatenate( (np.expand_dims(joint_states[0], 0), np.expand_dims(joint_states[1], 0), np.zeros(shape=np.expand_dims(joint_states[1], 0).shape) + curr_player), axis=0) def convert_to_pytorch_state(state): channel_one = cast_to_torch(state[0], self.cuda).unsqueeze(0) channel_two = cast_to_torch(state[1], self.cuda).unsqueeze(0) channel_three = cast_to_torch(state[2], self.cuda).unsqueeze(0) return torch.cat([channel_one, channel_two, channel_three], 0).unsqueeze(0) def get_state_mask(state, legal_actions): flattened = state[:2].flatten() flattened[legal_actions] = 1 return flattened.reshape(state[0].shape) input_state = convert_to_pytorch_state(state) memory = torch.tensor(root_state.shape) memory = 0 set_trace() for _ in range(config.MCTS_SIMS + 1): #consider adding a probas to do another sim and tradeoff between number of sims #vs performance, i.e. maximize perf minimize sims (exploratory_state, strongest_transition, updated_memory, input_state_value) = mcts(input_state, memory) input_state = exploratory_state memory = updated_memory # if sim < config.MCTS_SIMS: # memory["strongest_transitions"].append(strongest_transition) #So basically I want to accumulate a bunch of moves from running the network legal_actions = self.get_legal_actions(root_state[:2]) view = root_state[legal_actions] probas = F.softmax(view, dim=0) idx = np.random.choice(probas.data.numpy(), p=probas) log_probas = F.log_softmax(view, dim=0) memory["final_transition"] = strongest_transition memory["log_probas"] = log_probas memory["value"] = input_state_value new_state = np.copy(root_state) * get_state_mask( root_state, legal_actions) new_state[legal_actions[idx]] = 1 return new_state t = 0 #+1 sims since the first is used to expand the embedding for sim in range(config.MCTS_SIMS + 1): while True: try: N[hashed] += 1 except: N[hashed] = 0 break legal_actions = self.get_legal_actions(S[t][:2]) reward, game_over = self.calculate_reward(S[t][:2]) R[t] = reward if len(legal_actions) == 0 or game_over: game_over = True break # consider moving the value head here and using it in the backups action = self.simulate(self.embeddings[hashed], S[t], sim, memory) A[t] = action new_state = self.transition(np.copy(S[t][:2][curr_player]), A[t]) S[t + 1] = np.copy(S[t]) S[t + 1][curr_player] = np.copy(new_state) t += 1 curr_player += 1 curr_player = curr_player % 2 S[t][2] = curr_player S[t].flags.writeable = False hashed = hash(S[t].data.tobytes()) S[t].flags.writeable = True if not game_over and len(legal_actions) > 0: state_one = cast_to_torch(S[t][0], self.cuda).unsqueeze(0) state_two = cast_to_torch(S[t][1], self.cuda).unsqueeze(0) state_three = cast_to_torch(S[t][2], self.cuda).unsqueeze(0) state = torch.cat([state_one, state_two, state_three], 0).unsqueeze(0) self.models["emb"].eval() H[t] = self.embeddings[hashed] = self.models["emb"](state) if t > 0: H = self.backup(H, R, S, t, memory) t = 0 self.models["readout"].eval() logits = self.models["readout"](H[0]) memory["readout"]["output"] = logits pi = self.correct_policy(logits, joint_states, is_root=False) return pi, memory def apply_temp_to_policy(self, pi, turn, T=config.TAU): if turn == config.TURNS_UNTIL_TAU0 or T == 0: temp = np.zeros(shape=pi.shape) temp[np.argmax(pi)] = 1 pi = temp else: return pi # T = T - ((1 / config.TURNS_UNTIL_TAU0) * (turn+1)) # if T <= .1: # T = 0 # temp = np.zeros(shape=pi.shape) # temp[np.argmax(pi)] = 1 # pi = temp # else: # pi = pi**(1 / T) # pol_sum = (np.sum(pi) * 1.0) # if pol_sum != 0: # pi = pi / pol_sum return pi def simulate(self, emb, joint_state, sim, memory): emb = emb.view(1, 1, 8, 16) self.models["policy"].eval() logits, value = self.models["policy"](emb) if sim == 1: is_root = True else: is_root = False # might want to use uncorrected policy, idk pi = self.correct_policy(logits, joint_state, is_root=is_root) # if sim == 1: # I think I actually want this to be the last sim since I want the most recent # output from the policy net idx = np.random.choice(len(self.actions), p=pi) action = self.actions[idx] memory["policy"]["output"].append({ "log_action_prob": F.log_softmax(logits, dim=0)[idx], "value": value, "is_root": is_root }) return action def backup(self, H, R, S, _t, memory, is_for_inp=False): for t in reversed(range(_t)): reward = cast_to_torch([R[t]], self.cuda) comb_state_1 = S[t + 1][0] + S[t + 1][1] comb_state_2 = S[t][0] + S[t][1] action = comb_state_1 - comb_state_2 action = cast_to_torch(action, self.cuda).view(-1) inp = torch.cat([H[t], H[t + 1], reward, action], 0) self.models["backup"].eval() H[t] = self.models["backup"](inp, H[t]) return H def correct_policy(self, logits, joint_state, is_root): odds = np.exp(logits.data.numpy()) policy = odds / np.sum(odds) if is_root: nu = np.random.dirichlet([config.ALPHA] * len(self.actions)) policy = policy * (1 - config.EPSILON) + nu * config.EPSILON mask = np.zeros(policy.shape) legal_actions = self.get_legal_actions(joint_state[:2]) mask[legal_actions] = 1 policy = policy * mask pol_sum = (np.sum(policy) * 1.0) if pol_sum == 0: return policy else: return policy / pol_sum return policy def zero_grad(self): for _, optim in self.optims.items(): optim.zero_grad() def optim_step(self): for _, optim in self.optims.items(): optim.step() # todo: update model to use CLR and stuff # https://github.com/fastai/fastai/blob/master/fastai/learner.py # def save(self): # for name, model in self.models.items(): # torch.save(model, "checkpoints/%s.t7" % (name + "_tmp")) # def load(self): # for name, model in self.models.items(): # torch.load(model, "checkpoints/%s.t7" % (name + "_tmp")) # def find_lr(self): # self.save() # layer_opt = self.get_layer_opt def train(self, minibatches, last_loop=False): for e in range(config.EPOCHS): last_epoch = (e == (config.EPOCHS - 1)) if e > 0: shuffle(minibatches) read_loss_data = 0 pol_loss_data = 0 val_loss_data = 0 total_loss_data = 0 for mb in minibatches: self.zero_grad() pol_loss = 0 val_loss = 0 read_loss = 0 weights = [1, 1, 1] num_val_losses = 0 num_pol_losses = 0 num_read_losses = 0 for i, memory in enumerate(mb): result = memory["result"] pol_trajectories = memory["policy"]["output"] for action in pol_trajectories: if action["is_root"]: root_value = action["value"] root_log_action_prob = action["log_action_prob"] else: pol_loss += - \ action["log_action_prob"] * \ (result - action["value"]) num_pol_losses += 1 val_loss += F.mse_loss( action["value"], Variable(torch.FloatTensor(np.array([result])), volatile=True)) num_val_losses += 1 val_loss += F.mse_loss( root_value, Variable(torch.FloatTensor(np.array([result])), volatile=True)) pol_loss += -root_log_action_prob * (result - root_value) read_loss += - \ memory["readout"]["output"]*(result - root_value) num_pol_losses += 1 num_val_losses += 1 num_read_losses += 1 val_loss = val_loss / (len(mb)) pol_loss = pol_loss / (len(mb)) read_loss = read_loss / (len(mb)) total_loss = (read_loss * weights[0] + pol_loss * weights[1] + val_loss * weights[2]) read_loss_data += read_loss.data.numpy()[0] * weights[0] pol_loss_data += pol_loss.data.numpy()[0] * weights[1] val_loss_data += val_loss.data.numpy()[0] * weights[2] total_loss_data += total_loss.data.numpy()[0] # if (last_epoch): # total_loss.backward(retain_graph=False) # else: # total_loss.backward(retain_graph=True) total_loss.backward() assert (root_value.grad is not None and root_log_action_prob.grad is not None and mb[0]["readout"]["output"].grad is not None) set_trace() # orig_params = {} # for name, model in self.models.items(): # orig_params[name] = [] # for parameters in model.parameters(): # orig_params[name].extend([np.copy(parameters.detach().data.numpy())]) self.optim_step() # for name, model in self.models.items(): # for i, parameters in enumerate(model.parameters()): # if not (orig_params[name][i] == parameters.detach().data.numpy()).all(): # print(name) # set_trace() # test = "hi" read_loss_data /= len(minibatches) pol_loss_data /= len(minibatches) val_loss_data /= len(minibatches) total_loss_data /= len(minibatches) if len(self.history["readout"]) == 0: self.history["readout"].extend([read_loss_data]) self.history["policy"].extend([pol_loss_data]) self.history["value"].extend([val_loss_data]) self.history["total"].extend([total_loss_data]) pickle.dump(self.history, open("checkpoints/history.p", "wb")) elif last_loop and last_epoch and len(self.history["readout"]) > 0: prev_readout = self.history["readout"][-1] prev_policy = self.history["policy"][-1] prev_value = self.history["value"][-1] prev_total = self.history["total"][-1] r_sign = "" if prev_readout > read_loss_data else "+" p_sign = "" if prev_policy > pol_loss_data else "+" v_sign = "" if prev_value > val_loss_data else "+" t_sign = "" if prev_total > total_loss_data else "+" r_diff = ((read_loss_data - prev_readout) / prev_readout) * 100 p_diff = ((pol_loss_data - prev_policy) / prev_policy) * 100 v_diff = ((val_loss_data - prev_value) / prev_value) * 100 t_diff = ((total_loss_data - prev_total) / prev_total) * 100 print("readout loss: {} ({}{}%)".format( np.round(read_loss_data, 4), r_sign, r_diff)) print("policy loss: {} ({}{}%)".format( np.round(pol_loss_data, 4), p_sign, p_diff)) print("value loss: {} ({}{}%)".format( np.round(val_loss_data, 4), v_sign, v_diff)) print("total loss: {} ({}{}%)".format( np.round(total_loss_data, 4), t_sign, t_diff)) self.history["readout"].extend([read_loss_data]) self.history["policy"].extend([pol_loss_data]) self.history["value"].extend([val_loss_data]) self.history["total"].extend([total_loss_data]) pickle.dump(self.history, open("checkpoints/history.p", "wb")) def train_memories(self): train_mode(self.models) self.io.cprint("Training memories") # add a test there that takes the oldest memories, creates a minibatches with them # and runs one test that sees to see if the loss is changing all of the parts # of the network, i.e. the parameters before and after the update are different # https://blog.slavv.com/37-reasons-why-your-neural-network-is-not-working-4020854bd607 # gives some hints. param update magnitudes should be 1e-3 # if len(self.memories) > config.MIN_MEMORIES: # num_samples = config.NUM_SAMPLES - (config.NUM_SAMPLES%config.BATCH_SIZE) for i in tqdm(range(config.TRAINING_LOOPS)): last_loop = (i == (config.TRAINING_LOOPS - 1)) shuffle(self.memories) minibatches = [self.memories[:config.BATCH_SIZE]] # minibatches = [ # data[x:x + config.BATCH_SIZE] # for x in range(0, len(data), config.BATCH_SIZE) # ] self.train(minibatches, last_loop)
bD = [sp.csr_matrix(s[0, ...]) for s in bD] bU = [sp.csr_matrix(s[0, ...]) for s in bU] with open(os.path.join(args['downsample_directory'],'pai_matrices.pkl'), 'wb') as fp: pickle.dump([Adj, sizes, bD, bU], fp) else: print("Loading adj Matrices ..") with open(os.path.join(args['downsample_directory'],'pai_matrices.pkl'), 'rb') as fp: [Adj, sizes, bD, bU] = pickle.load(fp) tD = [sparse_mx_to_torch_sparse_tensor(s) for s in bD] tU = [sparse_mx_to_torch_sparse_tensor(s) for s in bU] #%% torch.manual_seed(args['seed']) print(device) io = IOStream(os.path.join(args['results_folder']) + '/run.log') io.cprint(str(args)) #%% # Building model, optimizer, and loss function dataset_train = autoencoder_dataset( root_dir=args['data'], points_dataset='train', shapedata=shapedata, normalization=args['normalization']) dataloader_train = DataLoader( dataset_train, batch_size=args['batch_size'], shuffle=args['shuffle'], num_workers = args['num_workers'] )