def main(args): # Load the arguments. model_dir = os.path.dirname(args.model_path) params = Dict2Obj( json.load(open(os.path.join(model_dir, "args.json"), "r"))) # Config logging log_format = '%(levelname)-8s %(message)s' logfile = os.path.join(model_dir, 'eval.log') logging.basicConfig(filename=logfile, level=logging.INFO, format=log_format) logging.getLogger().addHandler(logging.StreamHandler()) logging.info(json.dumps(args.__dict__)) # Load vocabulary wrapper. vocab = load_vocab(params.vocab_path) # Build data loader logging.info("Building data loader...") # Load GloVe embedding. if params.use_glove: embedding = get_glove_embedding(params.embedding_name, 300, vocab) else: embedding = None # Processing input text logging.info("Processing input text...") text, length = process_text(args.text, vocab, max_length=20) d_text = text logging.info("Done") # Build the models logging.info('Creating IQ model...') model = Classifier(len(vocab), embedding_dim=params.embedding_dim, embedding=embedding, hidden_dim=params.num_hidden_nodes, output_dim=params.num_output_nodes, num_layers=params.num_layers, bidirectional=params.bidirectional, dropout=params.dropout, rnn_cell=params.rnn_cell) logging.info("Done") logging.info("Loading model.") model.load_state_dict( torch.load(args.model_path + "model-tf-" + args.state + ".pkl")) # Setup GPUs. if torch.cuda.is_available(): logging.info("Using available GPU...") model.cuda() predict(model, d_text)
def classification_accuracy(dataset,cls_path, cls_checkpoint, layer, fully_supervised): """ Given a trained classifier, return the classification accuracy on CIFAR10 test data """ args = parser.parse_args() dataset = args.dataset cls_path = args.cls_path cls_checkpoint = args.cls_checkpoint layer = args.layer # GPU setup if torch.cuda.is_available: device = torch.device('cuda:0') batch_size = BATCH_SIZE*torch.cuda.device_count() else: device = torch.device('cpu') batch_size = BATCH_SIZE # Load classifier cls_file = get_cls_checkpoint(cls_path,cls_checkpoint) print("Checkpoint to be loaded:",cls_file) cls_file = checkpoint_parser(cls_file) fully_supervised = True if layer=='' else False classifier = Classifier(eval=True, layer=layer, fully_supervised=fully_supervised) classifier.load_state_dict(cls_file) classifier = nn.DataParallel(classifier).to(device) # Load test data _, _, test_batches, _ = data_loader(dataset, batch_size) # Accuracy evaluation acc = [] test_batches = tqdm(test_batches) for batch, test_label in test_batches: batch = batch.to(device) y = classifier(batch) _, predict_label = y.max(1) predict_label = predict_label.to(torch.device('cpu')) # from tensor to numpy array predict_label = predict_label.numpy() test_label = test_label.numpy() batch_acc = predict_label == test_label batch_acc = batch_acc.tolist() acc += batch_acc print(np.mean(acc))
class Solver(object): def __init__(self, config): # Configurations self.config = config # Build the models self.build_models() def build_models(self): # Models self.net = Classifier().to(self.config['device']) # Optimizers self.optimizer = getattr(torch.optim, self.config['optimizer'])( self.net.parameters(), lr=self.config['lr'], ) # Citerion self.criterion = nn.CrossEntropyLoss(reduce=False) # Record logging.info(self.net) def save_model(self, filename): save_path = os.path.join(self.config['save_path'], f'{filename}') try: logging.info( f'Saved best Neural network ckeckpoints into {save_path}') torch.save(self.net.state_dict(), save_path, _use_new_zipfile_serialization=False) except: logging.error(f'Error saving weights to {save_path}') def restore_model(self, filename): weight_path = os.path.join(self.config['save_path'], f'{filename}') try: logging.info(f'Loading the trained Extractor from {weight_path}') self.net.load_state_dict( torch.load(weight_path, map_location=lambda storage, loc: storage)) except: logging.error(f'Error loading weights from {weight_path}')
def main(): args = parser.parse_args() # classifier if args.classifier is not None: snapshot = torch.load(args.classifier, map_location=lambda s, _: s) classifier = Classifier(snapshot['channels']) classifier.load_state_dict(snapshot['model']) else: classifier = None # dataset raw_loader = torch.utils.data.DataLoader(Dataset( os.path.join(DATA_DIR, 'raw')), batch_size=args.batch, shuffle=True, drop_last=True) noised_loader = torch.utils.data.DataLoader(Dataset( os.path.join(DATA_DIR, 'noised_tgt')), batch_size=args.batch, shuffle=True, drop_last=True) # model generator_f = Generator(args.channels) generator_r = Generator(args.channels) discriminator_f = Discriminator(args.channels) discriminator_r = Discriminator(args.channels) # train trainer = Trainer(generator_f, generator_r, discriminator_f, discriminator_r, classifier, args.gpu) for epoch in range(args.epoch): trainer.train(noised_loader, raw_loader, epoch < args.epoch // 10) print('[{}] {}'.format(epoch, trainer), flush=True) snapshot = { 'channels': args.channels, 'model': generator_f.state_dict() } torch.save(snapshot, '{}.tmp'.format(args.file)) os.rename('{}.tmp'.format(args.file), args.file)
def __init__(self, net_cfgs, test_dataloader, validate_thresh): self.test_dataloader = test_dataloader self.validate_thresh = validate_thresh self.net_list = [] for cfg in net_cfgs: backbone_cfg = cfg.copy() backbone_type = backbone_cfg.pop('type') checkpoint = backbone_cfg.pop('checkpoint') if backbone_type == 'ResNet': backbone = ResNet(**backbone_cfg) elif backbone_type == 'ResNeXt': backbone = ResNeXt(**backbone_cfg) elif backbone_type == 'DenseNet': backbone = DenseNet(**backbone_cfg) classifier = Classifier(backbone, backbone.out_feat_dim).cuda() assert os.path.exists(checkpoint) state_dict = torch.load(checkpoint) classifier.load_state_dict(state_dict['model_params']) classifier.eval() self.net_list.append(classifier)
return flags if __name__ == '__main__': flags = FLAGS() test_dataset = NCaltech101(flags.test_dataset) # construct loader, responsible for streaming data to gpu test_loader = Loader(test_dataset, flags, flags.device) # model, load and put to device model = Classifier() ckpt = torch.load(flags.checkpoint) model.load_state_dict(ckpt["state_dict"]) model = model.to(flags.device) model = model.eval() sum_accuracy = 0 sum_loss = 0 print("Test step") for events, labels in tqdm.tqdm(test_loader): with torch.no_grad(): pred_labels, _ = model(events) loss, accuracy = cross_entropy_loss_and_accuracy( pred_labels, labels) sum_accuracy += accuracy sum_loss += loss
data_gen = dict() loader_name = loader_config.pop('name') for stage in stages: data_loader = DataLoader(loader_name, **loader_config) if data_list[stage] is not None: data_loader.set_data_list(data_list[stage]) data_gen[stage] = DataGenerator(data_loader, generator_config[stage]) # - GPUs os.environ['CUDA_VISIBLE_DEVICES'] = str(config['gpus']) torch.backends.cudnn.enabled = True # - model model = Classifier(out_channels=2) if args.checkpoint is not None: model.load_state_dict(torch.load(args.checkpoint)) print('Load checkpoint:', args.checkpoint) if torch.cuda.device_count() > 0: model = model.cuda() model.zero_grad() # - optimizer optim = Optimizer(config['optimizer'])(model) optim.zero_grad() weight = torch.tensor([0.1, 0.99]) if torch.cuda.device_count() > 0: weight = weight.cuda() criterion = torch.nn.CrossEntropyLoss(weight)
def main(args): # Load the arguments. model_dir = os.path.dirname(args.model_path) params = Dict2Obj( json.load(open(os.path.join(model_dir, "args.json"), "r"))) # Config logging log_format = '%(levelname)-8s %(message)s' logfile = os.path.join(model_dir, 'eval.log') logging.basicConfig(filename=logfile, level=logging.INFO, format=log_format) logging.getLogger().addHandler(logging.StreamHandler()) logging.info(json.dumps(args.__dict__)) # Load vocabulary wrapper. vocab = load_vocab(params.vocab_path) # Build data loader logging.info("Building data loader...") # Load GloVe embedding. if params.use_glove: embedding = get_glove_embedding(params.embedding_name, 300, vocab) else: embedding = None # Build data loader logging.info("Building data loader...") data_loader = get_loader(args.dataset, args.batch_size, shuffle=False, num_workers=args.num_workers, max_examples=args.max_examples) logging.info("Done") # Build the models logging.info('Creating a multi class classification model...') model = Classifier(len(vocab), embedding_dim=params.embedding_dim, embedding=embedding, hidden_dim=params.num_hidden_nodes, output_dim=params.num_output_nodes, num_layers=params.num_layers, bidirectional=params.bidirectional, dropout=params.dropout, rnn_cell=params.rnn_cell) logging.info("Done") logging.info("Loading model.") model.load_state_dict( torch.load(args.model_path + "model-tf-" + args.state + ".pkl")) # Setup GPUs. if torch.cuda.is_available(): logging.info("Using available GPU...") model.cuda() scores, gts, preds = evaluate(model, data_loader, vocab, args, params) # Print and save the scores. print(scores) with open(os.path.join(model_dir, args.results_path), 'w') as results_file: json.dump(scores, results_file) with open(os.path.join(model_dir, args.preds_path), 'w') as preds_file: json.dump(preds, preds_file) with open(os.path.join(model_dir, args.gts_path), 'w') as gts_file: json.dump(gts, gts_file)
with open("values.json", "r") as v: values = json.load(v) random.seed() roomWidth = 9 roomHeight = 6 classes = values["classes"] models = [] PATH = values["classifier"] classifier = Classifier() classifier.load_state_dict(torch.load(PATH, map_location=device)) PATH = values["comparator"] identifier = Comparator() identifier.load_state_dict(torch.load(PATH, map_location=device)) transform = transforms.Compose([ transforms.Grayscale(), transforms.Resize(227), transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, )) ]) transform2 = transforms.Compose([ transforms.Grayscale(), transforms.Resize((96, 96)), transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, )) ])
print("Classifier Accuracy %f | Adversarial in-bounds accuracy %f" % (total_correct/total, total_incorrect_inbds/total)) except OSError as e: # print(e) # print(total, total_correct, total_inbds, total_correct_inbds) return total, total_correct/total, total_incorrect_inbds/total except Exception as e: print("Error", e) if __name__ == "__main__": norms = [1.0, 2.5, 3.0, 3.5] methods = ['vanilla', 'classification', 'proxi_dist', 'combined', 'identity'] cla = Classifier(args) classifier_pt = torch.load('classifier.pt') cla.load_state_dict(classifier_pt) cla.eval() for method in methods: print(method) model = MADVAE(args) model_pt = torch.load( f'../pretrained_model/{method}/params.pt') model.load_state_dict(model_pt) model.eval() if torch.cuda.is_available(): print("Using CUDA") model = model.cuda() cla = cla.cuda()
class Agent(): def __init__(self, state_size, action_size, config): self.seed = config["seed"] torch.manual_seed(self.seed) np.random.seed(seed=self.seed) random.seed(self.seed) self.env = gym.make(config["env_name"]) self.env.seed(self.seed) self.state_size = state_size self.action_size = action_size self.clip = config["clip"] self.device = 'cuda' print("Clip ", self.clip) print("cuda ", torch.cuda.is_available()) self.double_dqn = config["DDQN"] print("Use double dqn", self.double_dqn) self.lr_pre = config["lr_pre"] self.batch_size = config["batch_size"] self.lr = config["lr"] self.tau = config["tau"] print("self tau", self.tau) self.gamma = 0.99 self.target_entropy = -torch.prod(torch.Tensor(action_size).to(self.device)).item() self.fc1 = config["fc1_units"] self.fc2 = config["fc2_units"] self.log_alpha = torch.zeros(1, requires_grad=True, device=self.device) self.alpha = self.log_alpha.exp() self.alpha_optim = optim.Adam([self.log_alpha], lr=config["lr_alpha"]) self.policy = SACActor(state_size, action_size, self.seed).to(self.device) self.policy_optim = optim.Adam(self.policy.parameters(), lr=config["lr_policy"]) self.qnetwork_local = QNetwork(state_size, action_size, self.seed, self.fc1, self.fc2).to(self.device) self.qnetwork_target = QNetwork(state_size, action_size, self.seed, self.fc1, self.fc2).to(self.device) self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=self.lr) self.soft_update(self.qnetwork_local, self.qnetwork_target, 1) self.q_shift_local = SQNetwork(state_size, action_size, self.seed, self.fc1, self.fc2).to(self.device) self.q_shift_target = SQNetwork(state_size, action_size,self.seed, self.fc1, self.fc2).to(self.device) self.optimizer_shift = optim.Adam(self.q_shift_local.parameters(), lr=self.lr) self.soft_update(self.q_shift_local, self.q_shift_target, 1) self.R_local = SQNetwork(state_size, action_size, self.seed, self.fc1, self.fc2).to(self.device) self.R_target = SQNetwork(state_size, action_size, self.seed, self.fc1, self.fc2).to(self.device) self.optimizer_r = optim.Adam(self.R_local.parameters(), lr=self.lr) self.soft_update(self.R_local, self.R_target, 1) self.steps = 0 self.predicter = Classifier(state_size, action_size, self.seed, 256, 256).to(self.device) self.optimizer_pre = optim.Adam(self.predicter.parameters(), lr=self.lr_pre) pathname = "lr_{}_batch_size_{}_fc1_{}_fc2_{}_seed_{}".format(self.lr, self.batch_size, self.fc1, self.fc2, self.seed) pathname += "_clip_{}".format(config["clip"]) pathname += "_tau_{}".format(config["tau"]) now = datetime.now() dt_string = now.strftime("%d_%m_%Y_%H:%M:%S") pathname += dt_string tensorboard_name = str(config["locexp"]) + '/runs/' + pathname self.vid_path = str(config["locexp"]) + '/vid' self.writer = SummaryWriter(tensorboard_name) print("summery writer ", tensorboard_name) self.average_prediction = deque(maxlen=100) self.average_same_action = deque(maxlen=100) self.all_actions = [] for a in range(self.action_size): action = torch.Tensor(1) * 0 + a self.all_actions.append(action.to(self.device)) def learn(self, memory_ex, memory_all): self.steps += 1 logging.debug("--------------------------New update-----------------------------------------------") states, next_states, actions, dones = memory_ex.expert_policy(self.batch_size) self.state_action_frq(states, actions) states, next_states, actions, dones = memory_all.expert_policy(self.batch_size) self.compute_shift_function(states, next_states, actions, dones) self.compute_r_function(states, actions) self.compute_q_function(states, next_states, actions, dones) self.soft_update(self.R_local, self.R_target, self.tau) self.soft_update(self.q_shift_local, self.q_shift_target, self.tau) self.soft_update(self.qnetwork_local, self.qnetwork_target, self.tau) return def compute_q_function(self, states, next_states, actions, dones): """Update value parameters using given batch of experience tuples. Params ====== experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples gamma (float): discount factor """ qf1, qf2 = self.qnetwork_local(states) q_value1 = qf1.gather(1, actions) q_value2 = qf2.gather(1, actions) with torch.no_grad(): q1_target, q2_target = self.qnetwork_target(next_states) min_q_target = torch.min(q1_target, q2_target) next_action_prob, next_action_log_prob = self.policy(next_states) next_q_target = (next_action_prob * (min_q_target - self.alpha * next_action_log_prob)).sum(dim=1, keepdim=True) rewards = self.R_target(states).detach().gather(1, actions.detach()).squeeze(0) Q_targets = rewards + ((1 - dones) * self.gamma * next_q_target) loss = F.mse_loss(q_value2, Q_targets.detach()) + F.mse_loss(q_value1, Q_targets.detach()) # Get max predicted Q values (for next states) from target model self.writer.add_scalar('losss/q_loss', loss, self.steps) # Minimize the loss self.optimizer.zero_grad() loss.backward() # torch.nn.utils.clip_grad_norm_(self.qnetwork_local.parameters(), 1) self.optimizer.step() # --------------------------update-policy-------------------------------------------------------- action_prob, log_action_prob = self.policy(states) with torch.no_grad(): q_pi1, q_pi2 = self.qnetwork_local(states) min_q_values = torch.min(q_pi1, q_pi2) #policy_loss = (action_prob * ((self.alpha * log_action_prob) - min_q_values).detach()).sum(dim=1).mean() policy_loss = (action_prob * ((self.alpha * log_action_prob) - min_q_values)).sum(dim=1).mean() self.policy_optim.zero_grad() policy_loss.backward() self.policy_optim.step() self.writer.add_scalar('loss/policy', policy_loss, self.steps) # --------------------------update-alpha-------------------------------------------------------- alpha_loss =(action_prob.detach() * (-self.log_alpha * (log_action_prob + self.target_entropy).detach())).sum(dim=1).mean() self.alpha_optim.zero_grad() alpha_loss.backward() self.alpha_optim.step() self.writer.add_scalar('loss/alpha', alpha_loss, self.steps) self.alpha = self.log_alpha.exp() def compute_shift_function(self, states, next_states, actions, dones): """Update value parameters using given batch of experience tuples. Params ====== experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples gamma (float): discount factor """ actions = actions.type(torch.int64) with torch.no_grad(): # Get max predicted Q values (for next states) from target model #if self.double_dqn: qt1, qt2 = self.qnetwork_local(next_states) q_min = torch.min(qt1, qt2) max_q, max_actions = q_min.max(1) Q_targets_next1, Q_targets_next2 = self.qnetwork_target(next_states) Q_targets_next = torch.min(Q_targets_next1, Q_targets_next2) Q_targets_next = Q_targets_next.gather(1, max_actions.type(torch.int64).unsqueeze(1)) #else: #Q_targets_next = self.qnetwork_target(next_states).detach().max(1)[0].unsqueeze(1) # Compute Q targets for current states Q_targets = self.gamma * Q_targets_next * (dones) # Get expected Q values from local model Q_expected = self.q_shift_local(states).gather(1, actions) # Compute loss loss = F.mse_loss(Q_expected, Q_targets.detach()) # Minimize the loss self.optimizer_shift.zero_grad() loss.backward() self.writer.add_scalar('Shift_loss', loss, self.steps) self.optimizer_shift.step() def compute_r_function(self, states, actions, debug=False, log=False): actions = actions.type(torch.int64) # sum all other actions # print("state shape ", states.shape) size = states.shape[0] idx = 0 all_zeros = [1 for i in range(actions.shape[0])] zeros = False y_shift = self.q_shift_target(states).gather(1, actions).detach() log_a = self.get_action_prob(states, actions).detach() y_r_part1 = log_a - y_shift y_r_part2 = torch.empty((size, 1), dtype=torch.float32).to(self.device) for a, s in zip(actions, states): y_h = 0 taken_actions = 0 for b in self.all_actions: b = b.type(torch.int64).unsqueeze(1) n_b = self.get_action_prob(s.unsqueeze(0), b) if torch.eq(a, b) or n_b is None: continue taken_actions += 1 y_s = self.q_shift_target(s.unsqueeze(0)).detach().gather(1, b).item() n_b = n_b.data.item() - y_s r_hat = self.R_target(s.unsqueeze(0)).gather(1, b).item() y_h += (r_hat - n_b) if log: text = "a {} r _hat {:.2f} - n_b {:.2f} | sh {:.2f} ".format(b.item(), r_hat, n_b, y_s) logging.debug(text) if taken_actions == 0: all_zeros[idx] = 0 zeros = True y_r_part2[idx] = 0.0 else: y_r_part2[idx] = (1. / taken_actions) * y_h idx += 1 y_r = y_r_part1 + y_r_part2 # check if there are zeros (no update for this tuble) remove them from states and if zeros: #print(all_zeros) #print(states) #print(actions) mask = torch.BoolTensor(all_zeros) states = states[mask] actions = actions[mask] y_r = y_r[mask] y = self.R_local(states).gather(1, actions) if log: text = "Action {:.2f} r target {:.2f} = n_a {:.2f} + n_b {:.2f} y {:.2f}".format(actions[0].item(), y_r[0].item(), y_r_part1[0].item(), y_r_part2[0].item(), y[0].item()) logging.debug(text) r_loss = F.mse_loss(y, y_r.detach()) # sys.exit() # Minimize the loss self.optimizer_r.zero_grad() r_loss.backward() # torch.nn.utils.clip_grad_norm_(self.R_local.parameters(), 5) self.optimizer_r.step() self.writer.add_scalar('Reward_loss', r_loss, self.steps) def get_action_prob(self, states, actions): """ """ actions = actions.type(torch.long) # check if action prob is zero output = self.predicter(states) output = F.softmax(output, dim=1) action_prob = output.gather(1, actions) action_prob = action_prob + torch.finfo(torch.float32).eps # check if one action if its to small if action_prob.shape[0] == 1: if action_prob.cpu().detach().numpy()[0][0] < 1e-4: return None action_prob = torch.log(action_prob) action_prob = torch.clamp(action_prob, min= self.clip, max=0) return action_prob def state_action_frq(self, states, action): """ Train classifer to compute state action freq """ self.predicter.train() output = self.predicter(states, train=True) output = output.squeeze(0) # logging.debug("out predicter {})".format(output)) y = action.type(torch.long).squeeze(1) #print("y shape", y.shape) loss = nn.CrossEntropyLoss()(output, y) self.optimizer_pre.zero_grad() loss.backward() #torch.nn.utils.clip_grad_norm_(self.predicter.parameters(), 1) self.optimizer_pre.step() self.writer.add_scalar('Predict_loss', loss, self.steps) self.predicter.eval() def test_predicter(self, memory): """ """ self.predicter.eval() same_state_predition = 0 for i in range(memory.idx): states = memory.obses[i] actions = memory.actions[i] states = torch.as_tensor(states, device=self.device).unsqueeze(0) actions = torch.as_tensor(actions, device=self.device) output = self.predicter(states) output = F.softmax(output, dim=1) #print("state 0", output.data) # create one hot encode y from actions y = actions.type(torch.long).item() p = torch.argmax(output.data).item() #print("a {} p {}".format(y, p)) text = "r {}".format(self.R_local(states.detach()).detach()) #print(text) if y==p: same_state_predition += 1 text = "Same prediction {} of {} ".format(same_state_predition, memory.idx) print(text) logging.debug(text) def soft_update(self, local_model, target_model, tau=4): """Soft update model parameters. θ_target = τ*θ_local + (1 - τ)*θ_target Params ====== local_model (PyTorch model): weights will be copied from target_model (PyTorch model): weights will be copied to tau (float): interpolation parameter """ # print("use tau", tau) for target_param, local_param in zip(target_model.parameters(), local_model.parameters()): target_param.data.copy_(tau * local_param.data + (1.0 - tau) * target_param.data) def load(self, filename): self.predicter.load_state_dict(torch.load(filename + "_predicter.pth")) self.optimizer_pre.load_state_dict(torch.load(filename + "_predicter_optimizer.pth")) self.R_local.load_state_dict(torch.load(filename + "_r_net.pth")) self.qnetwork_local.load_state_dict(torch.load(filename + "_q_net.pth")) print("Load models to {}".format(filename)) def save(self, filename): """ """ mkdir("", filename) torch.save(self.predicter.state_dict(), filename + "_predicter.pth") torch.save(self.optimizer_pre.state_dict(), filename + "_predicter_optimizer.pth") torch.save(self.qnetwork_local.state_dict(), filename + "_q_net.pth") torch.save(self.optimizer.state_dict(), filename + "_q_net_optimizer.pth") torch.save(self.R_local.state_dict(), filename + "_r_net.pth") torch.save(self.q_shift_local.state_dict(), filename + "_q_shift_net.pth") print("save models to {}".format(filename)) def test_q_value(self, memory): test_elements = memory.idx all_diff = 0 error = True used_elements_r = 0 used_elements_q = 0 r_error = 0 q_error = 0 for i in range(test_elements): states = memory.obses[i] actions = memory.actions[i] states = torch.as_tensor(states, device=self.device).unsqueeze(0) actions = torch.as_tensor(actions, device=self.device) one_hot = torch.Tensor([0 for i in range(self.action_size)], device="cpu") one_hot[actions.item()] = 1 with torch.no_grad(): r_values = self.R_local(states) q_values1, q_values2 = self.qnetwork_local(states) q_values = torch.min(q_values1, q_values2) soft_r = F.softmax(r_values, dim=1).to("cpu") soft_q = F.softmax(q_values, dim=1).to("cpu") actions = actions.type(torch.int64) kl_q = F.kl_div(soft_q.log(), one_hot, None, None, 'sum') kl_r = F.kl_div(soft_r.log(), one_hot, None, None, 'sum') if kl_r == float("inf"): pass else: r_error += kl_r used_elements_r += 1 if kl_q == float("inf"): pass else: q_error += kl_q used_elements_q += 1 average_q_kl = q_error / used_elements_q average_r_kl = r_error / used_elements_r text = "Kl div of Reward {} of {} elements".format(average_q_kl, used_elements_r) print(text) text = "Kl div of Q_values {} of {} elements".format(average_r_kl, used_elements_q) print(text) self.writer.add_scalar('KL_reward', average_r_kl, self.steps) self.writer.add_scalar('KL_q_values', average_q_kl, self.steps) def act(self, state): with torch.no_grad(): state = torch.FloatTensor(state).to(self.device).unsqueeze(0) action_prob, _ = self.policy(state) action = torch.argmax(action_prob) action = action.cpu().numpy() return action def eval_policy(self, record=False, eval_episodes=4): if record: env = wrappers.Monitor(self.env, str(self.vid_path) + "/{}".format(self.steps), video_callable=lambda episode_id: True, force=True) else: env = self.env average_reward = 0 scores_window = deque(maxlen=100) s = 0 for i_epiosde in range(eval_episodes): episode_reward = 0 state = env.reset() while True: s += 1 action = self.act(state) state, reward, done, _ = env.step(action) episode_reward += reward if done: break scores_window.append(episode_reward) if record: return average_reward = np.mean(scores_window) print("Eval Episode {} average Reward {} ".format(eval_episodes, average_reward)) self.writer.add_scalar('Eval_reward', average_reward, self.steps)
def run_infer(opt): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') n_classes = 6 weight_path = opt.weight_path tst_preds = [] TEST_DIR = opt.test_dir test = pd.DataFrame() # test['image_id'] = sorted(list(os.listdir(TEST_DIR))) content = pd.read_csv('/content/test_aug.csv') test_image = [] for i in content["image_id"]: test_image.append(i) test['image_id'] = sorted(test_image) if "vit" in opt.model_arch: testset = ICDARDataset(test, TEST_DIR, transforms=get_inference_transforms(384)) else: testset = ICDARDataset(test, TEST_DIR, transforms=get_inference_transforms( opt.img_size)) tst_loader = DataLoader(testset, batch_size=opt.valid_bs, num_workers=opt.num_workers, shuffle=False, pin_memory=False) print("[INFO] Found {} folds in weight path".format( len(os.listdir(weight_path)))) print(os.listdir(weight_path)) print("[INFO] Start inference ...") # for fold in os.listdir(weight_path): # print(fold) # model = Classifier(opt.model_arch, n_classes).to(device) # model_path = os.path.join(weight_path, fold, "best.pt") # model.load_state_dict(torch.load(model_path)['model']) # with torch.no_grad(): # for _ in range(opt.tta): # tst_preds += [inference_one_epoch(model, tst_loader, device)] # del model model = Classifier(opt.model_arch, n_classes).to(device) model_path = '/content/drive/MyDrive/Emotion_Speech_Recognition/Models/Classify_image/resnet/best.pt' model.load_state_dict(torch.load(model_path)['model']) with torch.no_grad(): for _ in range(opt.tta): tst_preds += [inference_one_epoch(model, tst_loader, device)] del model avg_tst_preds = np.mean(tst_preds, axis=0) if not (os.path.isdir(opt.work_dir)): os.mkdir(opt.work_dir) # np.save(os.path.join(opt.work_dir, "{}.npy".format(opt.model_arch)), avg_tst_preds) test['predict'] = np.argmax(avg_tst_preds, axis=1) gt = [] for i in content["image_id"]: gt.append(i.split('/')[0]) test['gt'] = gt test.to_csv('submission.csv', index=False) torch.cuda.empty_cache()
# load data transform = transforms.Compose( [transforms.CenterCrop(args.image_size), transforms.ToTensor()]) testset = datasets.MNIST('./data', train=False, download=True, transform=transform) dataloader = torch.utils.data.DataLoader(testset, batch_size=args.batch_size, shuffle=True, num_workers=1) model = Classifier(args) model_pt = torch.load(args.save_path) model.load_state_dict(model_pt) model.eval() correct = 0 total = 0 for data, label in dataloader: if torch.cuda.is_available(): model = model.cuda() data = data.cuda() label = label.cuda() outputs = model(data) _, predicted = torch.max(outputs.data, 1) total += label.size(0) correct += (predicted == label).sum().item()
n_frames = 30 n_vid_features = 3600 n_aud_features = 1 n_head = 3 n_layers = 3 dim_feedforward = 128 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') autoencoder = Autoencoder() autoencoder.load_state_dict(torch.load(AUTOENCODER)) autoencoder.to(device) autoencoder.eval() model = Classifier(n_vid_features, n_aud_features, n_head, n_layers, dim_feedforward) model.load_state_dict(torch.load(CLASSIFIER)) model = model.to(device) model.eval() start_time = datetime.datetime.now() print(f'start time: {str(start_time)}') print(f'using device: {device}') count = 0 count_wrong = 0 val_dataset = EncodedDeepfakeDataset(VAL_FOLDERS, autoencoder.encoder, n_frames=n_frames, n_audio_reads=576, device=device, cache_folder="encode_cache") dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True) for i, batch in enumerate(dataloader): if i * batch_size >= test_size: break
def train_sentiment(opts): device = torch.device("cuda" if use_cuda else "cpu") glove_loader = GloveLoader(os.path.join(opts.data_dir, 'glove', opts.glove_emb_file)) train_loader = DataLoader(RottenTomatoesReviewDataset(opts.data_dir, 'train', glove_loader, opts.maxlen), \ batch_size=opts.bsize, shuffle=True, num_workers=opts.nworkers) valid_loader = DataLoader(RottenTomatoesReviewDataset(opts.data_dir, 'val', glove_loader, opts.maxlen), \ batch_size=opts.bsize, shuffle=False, num_workers=opts.nworkers) model = Classifier(opts.hidden_size, opts.dropout_p, glove_loader, opts.enc_arch) if opts.optim == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=opts.lr, weight_decay=opts.wd) else: raise NotImplementedError("Unknown optim type") criterion = nn.CrossEntropyLoss() start_n_iter = 0 # for choosing the best model best_val_acc = 0.0 model_path = os.path.join(opts.save_path, 'model_latest.net') if opts.resume and os.path.exists(model_path): # restoring training from save_state print ('====> Resuming training from previous checkpoint') save_state = torch.load(model_path, map_location='cpu') model.load_state_dict(save_state['state_dict']) start_n_iter = save_state['n_iter'] best_val_acc = save_state['best_val_acc'] opts = save_state['opts'] opts.start_epoch = save_state['epoch'] + 1 model = model.to(device) # for logging logger = TensorboardXLogger(opts.start_epoch, opts.log_iter, opts.log_dir) logger.set(['acc', 'loss']) logger.n_iter = start_n_iter for epoch in range(opts.start_epoch, opts.epochs): model.train() logger.step() for batch_idx, data in enumerate(train_loader): acc, loss = run_iter(opts, data, model, criterion, device) # optimizer step optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), opts.max_norm) optimizer.step() logger.update(acc, loss) val_loss, val_acc, time_taken = evaluate(opts, model, valid_loader, criterion, device) # log the validation losses logger.log_valid(time_taken, val_acc, val_loss) print ('') # Save the model to disk if val_acc >= best_val_acc: best_val_acc = val_acc save_state = { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'n_iter': logger.n_iter, 'opts': opts, 'val_acc': val_acc, 'best_val_acc': best_val_acc } model_path = os.path.join(opts.save_path, 'model_best.net') torch.save(save_state, model_path) save_state = { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'n_iter': logger.n_iter, 'opts': opts, 'val_acc': val_acc, 'best_val_acc': best_val_acc } model_path = os.path.join(opts.save_path, 'model_latest.net') torch.save(save_state, model_path)
'optimizer': optimizer.state_dict(), 'global_count': global_count } torch.save(state, osp.join(args.save_path, filename)) if is_best: shutil.copyfile(osp.join(args.save_path, filename), osp.join(args.save_path, 'model_best.pth.tar')) if args.resume == True: # load checkpoint state = torch.load(osp.join(args.save_path, 'model_best.pth.tar')) init_epoch = state['epoch'] resumed_state = state['state_dict'] # resumed_state = {'module.'+k:v for k,v in resumed_state.items()} model.load_state_dict(resumed_state) trlog = state['trlog'] optimizer.load_state_dict(state['optimizer']) initial_lr = optimizer.param_groups[0]['lr'] global_count = state['global_count'] else: init_epoch = 1 trlog = {} trlog['args'] = vars(args) trlog['train_loss'] = [] trlog['val_loss_dist'] = [] trlog['val_loss_sim'] = [] trlog['train_acc'] = [] trlog['val_acc_sim'] = [] trlog['val_acc_dist'] = [] trlog['max_acc_dist'] = 0.0
class Agent(): def __init__(self, state_size, action_size, config): self.env_name = config["env_name"] self.state_size = state_size self.action_size = action_size self.seed = config["seed"] self.clip = config["clip"] self.device = 'cuda' print("Clip ", self.clip) print("cuda ", torch.cuda.is_available()) self.double_dqn = config["DDQN"] print("Use double dqn", self.double_dqn) self.lr_pre = config["lr_pre"] self.batch_size = config["batch_size"] self.lr = config["lr"] self.tau = config["tau"] print("self tau", self.tau) self.gamma = 0.99 self.fc1 = config["fc1_units"] self.fc2 = config["fc2_units"] self.fc3 = config["fc3_units"] self.qnetwork_local = QNetwork(state_size, action_size, self.fc1, self.fc2, self.fc3, self.seed).to(self.device) self.qnetwork_target = QNetwork(state_size, action_size, self.fc1, self.fc2,self.fc3, self.seed).to(self.device) self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=self.lr) self.soft_update(self.qnetwork_local, self.qnetwork_target, 1) self.q_shift_local = QNetwork(state_size, action_size, self.fc1, self.fc2, self.fc3, self.seed).to(self.device) self.q_shift_target = QNetwork(state_size, action_size, self.fc1, self.fc2, self.fc3, self.seed).to(self.device) self.optimizer_shift = optim.Adam(self.q_shift_local.parameters(), lr=self.lr) self.soft_update(self.q_shift_local, self.q_shift_target, 1) self.R_local = QNetwork(state_size, action_size, self.fc1, self.fc2, self.fc3, self.seed).to(self.device) self.R_target = QNetwork(state_size, action_size, self.fc1, self.fc2, self.fc3, self.seed).to(self.device) self.optimizer_r = optim.Adam(self.R_local.parameters(), lr=self.lr) self.soft_update(self.R_local, self.R_target, 1) self.expert_q = DQNetwork(state_size, action_size, seed=self.seed).to(self.device) self.expert_q.load_state_dict(torch.load('checkpoint.pth')) self.memory = Memory(action_size, config["buffer_size"], self.batch_size, self.seed, self.device) self.t_step = 0 self.steps = 0 self.predicter = Classifier(state_size, action_size, self.seed).to(self.device) self.optimizer_pre = optim.Adam(self.predicter.parameters(), lr=self.lr_pre) pathname = "lr_{}_batch_size_{}_fc1_{}_fc2_{}_fc3_{}_seed_{}".format(self.lr, self.batch_size, self.fc1, self.fc2, self.fc3, self.seed) pathname += "_clip_{}".format(config["clip"]) pathname += "_tau_{}".format(config["tau"]) now = datetime.now() dt_string = now.strftime("%d_%m_%Y_%H:%M:%S") pathname += dt_string tensorboard_name = str(config["locexp"]) + '/runs/' + pathname self.writer = SummaryWriter(tensorboard_name) print("summery writer ", tensorboard_name) self.average_prediction = deque(maxlen=100) self.average_same_action = deque(maxlen=100) self.all_actions = [] for a in range(self.action_size): action = torch.Tensor(1) * 0 + a self.all_actions.append(action.to(self.device)) def learn(self, memory): logging.debug("--------------------------New episode-----------------------------------------------") states, next_states, actions, dones = memory.expert_policy(self.batch_size) self.steps += 1 self.state_action_frq(states, actions) self.compute_shift_function(states, next_states, actions, dones) for i in range(1): for a in range(self.action_size): action = torch.ones([self.batch_size, 1], device= self.device) * a self.compute_r_function(states, action) self.compute_q_function(states, next_states, actions, dones) self.soft_update(self.q_shift_local, self.q_shift_target, self.tau) self.soft_update(self.R_local, self.R_target, self.tau) self.soft_update(self.qnetwork_local, self.qnetwork_target, self.tau) return def learn_predicter(self, memory): """ """ states, next_states, actions, dones = memory.expert_policy(self.batch_size) self.state_action_frq(states, actions) def state_action_frq(self, states, action): """ Train classifer to compute state action freq """ self.predicter.train() output = self.predicter(states, train=True) output = output.squeeze(0) # logging.debug("out predicter {})".format(output)) y = action.type(torch.long).squeeze(1) #print("y shape", y.shape) loss = nn.CrossEntropyLoss()(output, y) self.optimizer_pre.zero_grad() loss.backward() #torch.nn.utils.clip_grad_norm_(self.predicter.parameters(), 1) self.optimizer_pre.step() self.writer.add_scalar('Predict_loss', loss, self.steps) self.predicter.eval() def test_predicter(self, memory): """ """ self.predicter.eval() same_state_predition = 0 for i in range(memory.idx): states = memory.obses[i] actions = memory.actions[i] states = torch.as_tensor(states, device=self.device).unsqueeze(0) actions = torch.as_tensor(actions, device=self.device) output = self.predicter(states) output = F.softmax(output, dim=1) # create one hot encode y from actions y = actions.type(torch.long).item() p =torch.argmax(output.data).item() if y==p: same_state_predition += 1 #self.average_prediction.append(same_state_predition) #average_pred = np.mean(self.average_prediction) #self.writer.add_scalar('Average prediction acc', average_pred, self.steps) #logging.debug("Same prediction {} of 100".format(same_state_predition)) text = "Same prediction {} of {} ".format(same_state_predition, memory.idx) print(text) # self.writer.add_scalar('Action prediction acc', same_state_predition, self.steps) self.predicter.train() def get_action_prob(self, states, actions): """ """ actions = actions.type(torch.long) # check if action prob is zero output = self.predicter(states) output = F.softmax(output, dim=1) # print("get action_prob ", output) # output = output.squeeze(0) action_prob = output.gather(1, actions) action_prob = action_prob + torch.finfo(torch.float32).eps # check if one action if its to small if action_prob.shape[0] == 1: if action_prob.cpu().detach().numpy()[0][0] < 1e-4: return None # logging.debug("action_prob {})".format(action_prob)) action_prob = torch.log(action_prob) action_prob = torch.clamp(action_prob, min= self.clip, max=0) return action_prob def compute_shift_function(self, states, next_states, actions, dones): """Update value parameters using given batch of experience tuples. Params ====== experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples gamma (float): discount factor """ actions = actions.type(torch.int64) with torch.no_grad(): # Get max predicted Q values (for next states) from target model if self.double_dqn: qt = self.q_shift_local(next_states) max_q, max_actions = qt.max(1) Q_targets_next = self.qnetwork_target(next_states).gather(1, max_actions.unsqueeze(1)) else: Q_targets_next = self.qnetwork_target(next_states).max(1)[0].unsqueeze(1) # Compute Q targets for current states Q_targets = (self.gamma * Q_targets_next * (dones)) # Get expected Q values from local model Q_expected = self.q_shift_local(states).gather(1, actions) # Compute loss loss = F.mse_loss(Q_expected, Q_targets) # Minimize the loss self.optimizer_shift.zero_grad() loss.backward() self.writer.add_scalar('Shift_loss', loss, self.steps) self.optimizer_shift.step() def compute_r_function(self, states, actions, debug=False, log=False): """ """ actions = actions.type(torch.int64) # sum all other actions # print("state shape ", states.shape) size = states.shape[0] idx = 0 all_zeros = [] with torch.no_grad(): y_shift = self.q_shift_target(states).gather(1, actions) log_a = self.get_action_prob(states, actions) index_list = index_None_value(log_a) # print("is none", index_list) if index_list is None: return y_r_part1 = log_a - y_shift y_r_part2 = torch.empty((size, 1), dtype=torch.float32).to(self.device) for a, s in zip(actions, states): y_h = 0 taken_actions = 0 for b in self.all_actions: b = b.type(torch.int64).unsqueeze(1) n_b = self.get_action_prob(s.unsqueeze(0), b) if torch.eq(a, b) or n_b is None: logging.debug("best action {} ".format(a)) logging.debug("n_b action {} ".format(b)) logging.debug("n_b {} ".format(n_b)) continue taken_actions += 1 r_hat = self.R_target(s.unsqueeze(0)).gather(1, b) y_s = self.q_shift_target(s.unsqueeze(0)).gather(1, b) n_b = n_b - y_s y_h += (r_hat - n_b) if debug: print("action", b.item()) print("r_pre {:.3f}".format(r_hat.item())) print("n_b {:.3f}".format(n_b.item())) if taken_actions == 0: all_zeros.append(idx) else: y_r_part2[idx] = (1. / taken_actions) * y_h idx += 1 #print(y_r_part2, y_r_part1) y_r = y_r_part1 + y_r_part2 #print("_________________") #print("r update zeros ", len(all_zeros)) if len(index_list) > 0: print("none list", index_list) y = self.R_local(states).gather(1, actions) if log: text = "Action {:.2f} y target {:.2f} = n_a {:.2f} + {:.2f} and pre{:.2f}".format(actions.item(), y_r.item(), y_r_part1.item(), y_r_part2.item(), y.item()) logging.debug(text) if debug: print("expet action ", actions.item()) # print("y r {:.3f}".format(y.item())) # print("log a prob {:.3f}".format(log_a.item())) # print("n_a {:.3f}".format(y_r_part1.item())) print("Correct action p {:.3f} ".format(y.item())) print("Correct action target {:.3f} ".format(y_r.item())) print("part1 corret action {:.2f} ".format(y_r_part1.item())) print("part2 incorret action {:.2f} ".format(y_r_part2.item())) #print("y", y.shape) #print("y_r", y_r.shape) r_loss = F.mse_loss(y, y_r) #con = input() #sys.exit() # Minimize the loss self.optimizer_r.zero_grad() r_loss.backward() #torch.nn.utils.clip_grad_norm_(self.R_local.parameters(), 5) self.optimizer_r.step() self.writer.add_scalar('Reward_loss', r_loss, self.steps) if debug: print("after update r pre ", self.R_local(states).gather(1, actions).item()) print("after update r target ", self.R_target(states).gather(1, actions).item()) # ------------------- update target network ------------------- # #self.soft_update(self.R_local, self.R_target, 5e-3) if debug: print("after soft upda r target ", self.R_target(states).gather(1, actions).item()) def compute_q_function(self, states, next_states, actions, dones, debug=False, log= False): """Update value parameters using given batch of experience tuples. Params ====== experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples gamma (float): discount factor """ actions = actions.type(torch.int64) if debug: print("---------------q_update------------------") print("expet action ", actions.item()) print("state ", states) with torch.no_grad(): # Get max predicted Q values (for next states) from target model if self.double_dqn: qt = self.qnetwork_local(next_states) max_q, max_actions = qt.max(1) Q_targets_next = self.qnetwork_target(next_states).gather(1, max_actions.unsqueeze(1)) else: Q_targets_next = self.qnetwork_target(next_states).max(1)[0].unsqueeze(1) # Compute Q targets for current states rewards = self.R_target(states).gather(1, actions) Q_targets = rewards + (self.gamma * Q_targets_next * (dones)) if debug: print("reward {}".format(rewards.item())) print("Q target next {}".format(Q_targets_next.item())) print("Q_target {}".format(Q_targets.item())) # Get expected Q values from local model Q_expected = self.qnetwork_local(states).gather(1, actions) if log: text = "Action {:.2f} q target {:.2f} = r_a {:.2f} + target {:.2f} and pre{:.2f}".format(actions.item(), Q_targets.item(), rewards.item(), Q_targets_next.item(), Q_expected.item()) logging.debug(text) if debug: print("q for a {}".format(Q_expected)) # Compute loss loss = F.mse_loss(Q_expected, Q_targets) self.writer.add_scalar('Q_loss', loss, self.steps) # Minimize the loss self.optimizer.zero_grad() loss.backward() self.optimizer.step() if debug: print("q after update {}".format(self.qnetwork_local(states))) print("q loss {}".format(loss.item())) # ------------------- update target network ------------------- # def dqn_train(self, n_episodes=2000, max_t=1000, eps_start=1.0, eps_end=0.01, eps_decay=0.995): env = gym.make('LunarLander-v2') scores = [] # list containing scores from each episode scores_window = deque(maxlen=100) # last 100 scores eps = eps_start for i_episode in range(1, n_episodes+1): state = env.reset() score = 0 for t in range(max_t): self.t_step += 1 action = self.dqn_act(state, eps) next_state, reward, done, _ = env.step(action) self.step(state, action, reward, next_state, done) state = next_state score += reward if done: self.test_q() break scores_window.append(score) # save most recent score scores.append(score) # save most recent score eps = max(eps_end, eps_decay*eps) # decrease epsilon print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)), end="") if i_episode % 100 == 0: print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window))) if np.mean(scores_window)>=200.0: print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode-100, np.mean(scores_window))) break def test_policy(self): env = gym.make('LunarLander-v2') logging.debug("new episode") average_score = [] average_steps = [] average_action = [] for i in range(5): state = env.reset() score = 0 same_action = 0 logging.debug("new episode") for t in range(200): state = torch.from_numpy(state).float().unsqueeze(0).to(self.device) q_expert = self.expert_q(state) q_values = self.qnetwork_local(state) logging.debug("q expert a0: {:.2f} a1: {:.2f} a2: {:.2f} a3: {:.2f}".format(q_expert.data[0][0], q_expert.data[0][1], q_expert.data[0][2], q_expert.data[0][3])) logging.debug("q values a0: {:.2f} a1: {:.2f} a2: {:.2f} a3: {:.2f} )".format(q_values.data[0][0], q_values.data[0][1], q_values.data[0][2], q_values.data[0][3])) action = torch.argmax(q_values).item() action_e = torch.argmax(q_expert).item() if action == action_e: same_action += 1 next_state, reward, done, _ = env.step(action) state = next_state score += reward if done: average_score.append(score) average_steps.append(t) average_action.append(same_action) break mean_steps = np.mean(average_steps) mean_score = np.mean(average_score) mean_action= np.mean(average_action) self.writer.add_scalar('Ave_epsiode_length', mean_steps , self.steps) self.writer.add_scalar('Ave_same_action', mean_action, self.steps) self.writer.add_scalar('Ave_score', mean_score, self.steps) def step(self, state, action, reward, next_state, done): # Save experience in replay memory self.memory.add(state, action, reward, next_state, done) # Learn every UPDATE_EVERY time steps. self.t_step = (self.t_step + 1) % 4 if self.t_step == 0: # If enough samples are available in memory, get random subset and learn if len(self.memory) > self.batch_size: experiences = self.memory.sample() self.update_q(experiences) def dqn_act(self, state, eps=0.): """Returns actions for given state as per current policy. Params ====== state (array_like): current state eps (float): epsilon, for epsilon-greedy action selection """ state = torch.from_numpy(state).float().unsqueeze(0).to(device) self.qnetwork_local.eval() with torch.no_grad(): action_values = self.qnetwork_local(state) self.qnetwork_local.train() # Epsilon-greedy action selection if random.random() > eps: return np.argmax(action_values.cpu().data.numpy()) else: return random.choice(np.arange(self.action_size)) def update_q(self, experiences, debug=False): """Update value parameters using given batch of experience tuples. Params ====== experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples gamma (float): discount factor """ states, actions, rewards, next_states, dones = experiences # Get max predicted Q values (for next states) from target model with torch.no_grad(): Q_targets_next = self.qnetwork_target(next_states).max(1)[0].unsqueeze(1) # Compute Q targets for current states Q_targets = rewards + (self.gamma * Q_targets_next * (1 - dones)) # Get expected Q values from local model Q_expected = self.qnetwork_local(states).gather(1, actions) if debug: print("----------------------") print("----------------------") print("Q target", Q_targets) print("pre", Q_expected) print("all local",self.qnetwork_local(states)) # Compute loss loss = F.mse_loss(Q_expected, Q_targets) # Minimize the loss self.optimizer.zero_grad() loss.backward() self.optimizer.step() # ------------------- update target network ------------------- # self.soft_update(self.qnetwork_local, self.qnetwork_target) def test_q(self): experiences = self.memory.test_sample() self.update_q(experiences, True) def test_q_value(self, memory): same_action = 0 test_elements = memory.idx all_diff = 0 error = True self.predicter.eval() for i in range(test_elements): # print("lop", i) states = memory.obses[i] next_states = memory.next_obses[i] actions = memory.actions[i] dones = memory.not_dones[i] states = torch.as_tensor(states, device=self.device).unsqueeze(0) next_states = torch.as_tensor(next_states, device=self.device) actions = torch.as_tensor(actions, device=self.device) dones = torch.as_tensor(dones, device=self.device) with torch.no_grad(): output = self.predicter(states) output = F.softmax(output, dim=1) q_values = self.qnetwork_local(states) expert_values = self.expert_q(states) print("q values ", q_values) print("ex values ", expert_values) best_action = torch.argmax(q_values).item() actions = actions.type(torch.int64) q_max = q_values.max(1) #print("q values", q_values) q = q_values[0][actions.item()].item() #print("q action", q) max_q = q_max[0].data.item() diff = max_q - q all_diff += diff #print("q best", max_q) #print("difference ", diff) if actions.item() != best_action: r = self.R_local(states) rt = self.R_target(states) qt = self.qnetwork_target(states) logging.debug("------------------false action --------------------------------") logging.debug("expert action {})".format(actions.item())) logging.debug("out predicter a0: {:.2f} a1: {:.2f} a2: {:.2f} a3: {:.2f} )".format(output.data[0][0], output.data[0][1], output.data[0][2], output.data[0][3])) logging.debug("q values a0: {:.2f} a1: {:.2f} a2: {:.2f} a3: {:.2f} )".format(q_values.data[0][0], q_values.data[0][1], q_values.data[0][2], q_values.data[0][3])) logging.debug("q target a0: {:.2f} a1: {:.2f} a2: {:.2f} a3: {:.2f} )".format(qt.data[0][0], qt.data[0][1], qt.data[0][2], qt.data[0][3])) logging.debug("rewards a0: {:.2f} a1: {:.2f} a2: {:.2f} a3: {:.2f} )".format(r.data[0][0], r.data[0][1], r.data[0][2], r.data[0][3])) logging.debug("re target a0: {:.2f} a1: {:.2f} a2: {:.2f} a3: {:.2f} )".format(rt.data[0][0], rt.data[0][1], rt.data[0][2], rt.data[0][3])) """ logging.debug("---------Reward Function------------") action = torch.Tensor(1) * 0 + 0 self.compute_r_function(states, action.unsqueeze(0).to(self.device), log= True) action = torch.Tensor(1) * 0 + 1 self.compute_r_function(states, action.unsqueeze(0).to(self.device), log= True) action = torch.Tensor(1) * 0 + 2 self.compute_r_function(states, action.unsqueeze(0).to(self.device), log= True) action = torch.Tensor(1) * 0 + 3 self.compute_r_function(states, action.unsqueeze(0).to(self.device), log= True) logging.debug("------------------Q Function --------------------------------") action = torch.Tensor(1) * 0 + 0 self.compute_q_function(states, next_states.unsqueeze(0), action.unsqueeze(0).to(self.device), dones, log= True) action = torch.Tensor(1) * 0 + 1 self.compute_q_function(states, next_states.unsqueeze(0), action.unsqueeze(0).to(self.device), dones, log= True) action = torch.Tensor(1) * 0 + 2 self.compute_q_function(states, next_states.unsqueeze(0), action.unsqueeze(0).to(self.device), dones, log= True) action = torch.Tensor(1) * 0 + 3 self.compute_q_function(states, next_states.unsqueeze(0), action.unsqueeze(0).to(self.device), dones, log= True) """ if actions.item() == best_action: same_action += 1 continue print("-------------------------------------------------------------------------------") print("state ", i) print("expert ", actions) print("q values", q_values.data) print("action prob predicter ", output.data) self.compute_r_function(states, actions.unsqueeze(0), True) self.compute_q_function(states, next_states.unsqueeze(0), actions.unsqueeze(0), dones, True) else: if error: continue print("-------------------------------------------------------------------------------") print("expert action ", actions.item()) print("best action q ", best_action) print(i) error = False continue # logging.debug("experte action {} q fun {}".format(actions.item(), q_values)) print("-------------------------------------------------------------------------------") print("state ", i) print("expert ", actions) print("q values", q_values.data) print("action prob predicter ", output.data) self.compute_r_function(states, actions.unsqueeze(0), True) self.compute_q_function(states, next_states.unsqueeze(0), actions.unsqueeze(0), dones, True) self.writer.add_scalar('diff', all_diff, self.steps) self.average_same_action.append(same_action) av_action = np.mean(self.average_same_action) self.writer.add_scalar('Same_action', same_action, self.steps) print("Same actions {} of {}".format(same_action, test_elements)) self.predicter.train() def soft_update(self, local_model, target_model, tau=4): """Soft update model parameters. θ_target = τ*θ_local + (1 - τ)*θ_target Params ====== local_model (PyTorch model): weights will be copied from target_model (PyTorch model): weights will be copied to tau (float): interpolation parameter """ # print("use tau", tau) for target_param, local_param in zip(target_model.parameters(), local_model.parameters()): target_param.data.copy_(tau * local_param.data + (1.0 - tau) * target_param.data) def save(self, filename): """ """ mkdir("", filename) torch.save(self.predicter.state_dict(), filename + "_predicter.pth") torch.save(self.optimizer_pre.state_dict(), filename + "_predicter_optimizer.pth") torch.save(self.qnetwork_local.state_dict(), filename + "_q_net.pth") """ torch.save(self.optimizer_q.state_dict(), filename + "_q_net_optimizer.pth") torch.save(self.q_shift_local.state_dict(), filename + "_q_shift_net.pth") torch.save(self.optimizer_q_shift.state_dict(), filename + "_q_shift_net_optimizer.pth") """ print("save models to {}".format(filename)) def load(self, filename): self.predicter.load_state_dict(torch.load(filename + "_predicter.pth")) self.optimizer_pre.load_state_dict(torch.load(filename + "_predicter_optimizer.pth")) print("Load models to {}".format(filename))
bin1 = 0 bin2 = 0 bin3 = 0 bin4 = 0 tot1 = 0 tot2 = 0 tot3 = 0 tot4 = 0 # model = LSTM_main(config).to(device) # PATH = bi_path model = Classifier(config).to(device) PATH = base_path model.load_state_dict(torch.load(PATH, map_location=device)) model = model.to(device) print(model) for i in range(len(labels)): s1_embed, s1_len = get_batch_from_idx(s1[i].split(), embeddings, config) s2_embed, s2_len = get_batch_from_idx(s2[i].split(), embeddings, config) u = torch.sum(s1_embed,0).to(device) v = torch.sum(s1_embed,0).to(device) feats = torch.cat((u, v, torch.abs(u- v), u*v), 0).to(device) with torch.no_grad(): out = model.forward(feats).to(device)
def run_training(opt): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') work_dir, epochs, train_batch, valid_batch, weights = \ opt.work_dir, opt.epochs, opt.train_bs, opt.valid_bs, opt.weights # Directories last = os.path.join(work_dir, 'last.pt') best = os.path.join(work_dir, 'best.pt') # -------------------------------------- # Setup train and validation set # -------------------------------------- data = pd.read_csv(opt.train_csv) images_path = opt.data_dir n_classes = 6 # fixed coding :V data['class'] = data.apply(lambda row: categ[row["class"]], axis=1) train_loader, val_loader = prepare_dataloader(data, opt.fold, train_batch, valid_batch, opt.img_size, opt.num_workers, data_root=images_path) # if not opt.ovr_val: # handwritten_data = pd.read_csv(opt.handwritten_csv) # printed_data = pd.read_csv(opt.printed_csv) # handwritten_data['class'] = handwritten_data.apply(lambda row: categ[row["class"]], axis =1) # printed_data['class'] = printed_data.apply(lambda row: categ[row["class"]], axis =1) # _, handwritten_val_loader = prepare_dataloader( # handwritten_data, opt.fold, train_batch, valid_batch, opt.img_size, opt.num_workers, data_root=images_path) # _, printed_val_loader = prepare_dataloader( # printed_data, opt.fold, train_batch, valid_batch, opt.img_size, opt.num_workers, data_root=images_path) # -------------------------------------- # Models # -------------------------------------- model = Classifier(model_name=opt.model_name, n_classes=n_classes, pretrained=True).to(device) if opt.weights is not None: cp = torch.load(opt.weights) model.load_state_dict(cp['model']) # ------------------------------------------- # Setup optimizer, scheduler, criterion loss # ------------------------------------------- optimizer = AdamW(model.parameters(), lr=1e-4, weight_decay=1e-6) scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=1, eta_min=1e-6, last_epoch=-1) scaler = GradScaler() loss_tr = nn.CrossEntropyLoss().to(device) loss_fn = nn.CrossEntropyLoss().to(device) # -------------------------------------- # Setup training # -------------------------------------- if os.path.exists(work_dir) == False: os.mkdir(work_dir) best_loss = 1e5 start_epoch = 0 best_epoch = 0 # for early stopping if opt.resume == True: checkpoint = torch.load(last) start_epoch = checkpoint["epoch"] model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint["scheduler"]) best_loss = checkpoint["best_loss"] # -------------------------------------- # Start training # -------------------------------------- print("[INFO] Start training...") for epoch in range(start_epoch, epochs): train_one_epoch(epoch, model, loss_tr, optimizer, train_loader, device, scheduler=scheduler, scaler=scaler) with torch.no_grad(): if opt.ovr_val: val_loss = valid_one_epoch_overall(epoch, model, loss_fn, val_loader, device, scheduler=None) else: val_loss = valid_one_epoch(epoch, model, loss_fn, handwritten_val_loader, printed_val_loader, device, scheduler=None) if val_loss < best_loss: best_loss = val_loss best_epoch = epoch torch.save( { 'epoch': epoch, 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'best_loss': best_loss }, os.path.join(best)) print('best model found for epoch {}'.format(epoch + 1)) torch.save( { 'epoch': epoch, 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'best_loss': best_loss }, os.path.join(last)) if epoch - best_epoch > opt.patience: print("Early stop achieved at", epoch + 1) break del model, optimizer, train_loader, val_loader, scheduler, scaler torch.cuda.empty_cache()
def train(seed=0, dataset='grid', samplers=(UniformDatasetSampler, UniformLatentSampler), latent_dim=2, model_dim=256, device='cuda', conditional=False, learning_rate=2e-4, betas=(0.5, 0.9), batch_size=256, iterations=400, n_critic=5, objective='gan', gp_lambda=10, output_dir='results', plot=False, spec_norm=True): experiment_name = [ seed, dataset, samplers[0].__name__, samplers[1].__name__, latent_dim, model_dim, device, conditional, learning_rate, betas[0], betas[1], batch_size, iterations, n_critic, objective, gp_lambda, plot, spec_norm ] experiment_name = '_'.join([str(p) for p in experiment_name]) results_dir = os.path.join(output_dir, experiment_name) network_dir = os.path.join(results_dir, 'networks') eval_log = os.path.join(results_dir, 'eval.log') os.makedirs(results_dir, exist_ok=True) os.makedirs(network_dir, exist_ok=True) eval_file = open(eval_log, 'w') if plot: samples_dir = os.path.join(results_dir, 'samples') os.makedirs(samples_dir, exist_ok=True) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) data, labels = load_data(dataset) data_dim, num_classes = data.shape[1], len(set(labels)) data_sampler = samplers[0]( torch.tensor(data).float(), torch.tensor(labels).long()) if conditional else samplers[0]( torch.tensor(data).float()) noise_sampler = samplers[1]( latent_dim, labels) if conditional else samplers[1](latent_dim) if conditional: test_data, test_labels = load_data(dataset, split='test') test_dataset = TensorDataset( torch.tensor(test_data).to(device).float(), torch.tensor(test_labels).to(device).long()) test_dataloader = DataLoader(test_dataset, batch_size=4096) G = Generator(latent_dim + num_classes, model_dim, data_dim).to(device).train().train() D = Discriminator(model_dim, data_dim + num_classes, spec_norm=spec_norm).to(device).train() C_real = Classifier(model_dim, data_dim, num_classes).to(device).train() C_fake = Classifier(model_dim, data_dim, num_classes).to(device).train() C_fake.load_state_dict(deepcopy(C_real.state_dict())) C_real_optimizer = optim.Adam(C_real.parameters(), lr=2 * learning_rate) C_fake_optimizer = optim.Adam(C_fake.parameters(), lr=2 * learning_rate) C_crit = nn.CrossEntropyLoss() else: G = Generator(latent_dim, model_dim, data_dim).to(device).train() D = Discriminator(model_dim, data_dim, spec_norm=spec_norm).to(device).train() D_optimizer = optim.Adam(D.parameters(), lr=learning_rate, betas=betas) G_optimizer = optim.Adam(G.parameters(), lr=learning_rate, betas=betas) if objective == 'gan': fake_target = torch.zeros(batch_size, 1).to(device) real_target = torch.ones(batch_size, 1).to(device) elif objective == 'wgan': grad_target = torch.ones(batch_size, 1).to(device) elif objective == 'hinge': bound = torch.zeros(batch_size, 1).to(device) sub = torch.ones(batch_size, 1).to(device) stats = {'D': [], 'G': [], 'C_it': [], 'C_real': [], 'C_fake': []} if plot: fixed_latent_batch = noise_sampler.get_batch(20000) sample_figure = plt.figure(num=0, figsize=(5, 5)) loss_figure = plt.figure(num=1, figsize=(10, 5)) if conditional: accuracy_figure = plt.figure(num=2, figsize=(10, 5)) for it in range(iterations + 1): # Train Discriminator data_batch = data_sampler.get_batch(batch_size) latent_batch = noise_sampler.get_batch(batch_size) if conditional: x_real, y_real = data_batch[0].to(device), data_batch[1].to(device) real_sample = torch.cat([x_real, y_real], dim=1) z_fake, y_fake = latent_batch[0].to(device), latent_batch[1].to( device) x_fake = G(torch.cat([z_fake, y_fake], dim=1)).detach() fake_sample = torch.cat([x_fake, y_fake], dim=1) else: x_real = data_batch.to(device) real_sample = x_real z_fake = latent_batch.to(device) x_fake = G(z_fake).detach() fake_sample = x_fake D.zero_grad() real_pred = D(real_sample) fake_pred = D(fake_sample) if is_recorded(data_sampler): data_sampler.record(real_pred.detach().cpu().numpy()) if is_weighted(data_sampler): weights = torch.tensor( data_sampler.get_weights()).to(device).float().view( real_pred.shape) else: weights = torch.ones_like(real_pred).to(device) if objective == 'gan': D_loss = F.binary_cross_entropy(fake_pred, fake_target).mean() + ( weights * F.binary_cross_entropy(real_pred, real_target)).mean() stats['D'].append(D_loss.item()) elif objective == 'wgan': alpha = torch.rand(batch_size, 1).expand(real_sample.size()).to(device) interpolate = (alpha * real_sample + (1 - alpha) * fake_sample).requires_grad_(True) gradients = torch.autograd.grad(outputs=D(interpolate), inputs=interpolate, grad_outputs=grad_target, create_graph=True, retain_graph=True, only_inputs=True)[0] gradient_penalty = (gradients.norm(2, dim=1) - 1).pow(2).mean() * gp_lambda D_loss = fake_pred.mean() - (real_pred * weights).mean() stats['D'].append(-D_loss.item()) D_loss += gradient_penalty elif objective == 'hinge': D_loss = -(torch.min(real_pred - sub, bound) * weights).mean() - torch.min(-fake_pred - sub, bound).mean() stats['D'].append(D_loss.item()) D_loss.backward() D_optimizer.step() # Train Generator if it % n_critic == 0: G.zero_grad() latent_batch = noise_sampler.get_batch(batch_size) if conditional: z_fake, y_fake = latent_batch[0].to( device), latent_batch[1].to(device) x_fake = G(torch.cat([z_fake, y_fake], dim=1)) fake_pred = D(torch.cat([x_fake, y_fake], dim=1)) else: z_fake = latent_batch.to(device) x_fake = G(z_fake) fake_pred = D(x_fake) if objective == 'gan': G_loss = F.binary_cross_entropy(fake_pred, real_target).mean() stats['G'].extend([G_loss.item()] * n_critic) elif objective == 'wgan': G_loss = -fake_pred.mean() stats['G'].extend([-G_loss.item()] * n_critic) elif objective == 'hinge': G_loss = -fake_pred.mean() stats['G'].extend([-G_loss.item()] * n_critic) G_loss.backward() G_optimizer.step() if conditional: # Train fake classifier C_fake.train() C_fake.zero_grad() C_fake_loss = C_crit(C_fake(x_fake.detach()), y_fake.argmax(1)) C_fake_loss.backward() C_fake_optimizer.step() # Train real classifier C_real.train() C_real.zero_grad() C_real_loss = C_crit(C_real(x_real), y_real.argmax(1)) C_real_loss.backward() C_real_optimizer.step() if it % 5 == 0: C_real.eval() C_fake.eval() real_correct, fake_correct, total = 0.0, 0.0, 0.0 for idx, (sample, label) in enumerate(test_dataloader): real_correct += ( C_real(sample).argmax(1).view(-1) == label).sum() fake_correct += ( C_fake(sample).argmax(1).view(-1) == label).sum() total += sample.shape[0] stats['C_it'].append(it) stats['C_real'].append(real_correct.item() / total) stats['C_fake'].append(fake_correct.item() / total) line = f"{it}\t{stats['D'][-1]:.3f}\t{stats['G'][-1]:.3f}" if conditional: line += f"\t{stats['C_real'][-1]*100:.3f}\t{stats['C_fake'][-1]*100:.3f}" print(line, eval_file) if plot: if conditional: z_fake, y_fake = fixed_latent_batch[0].to( device), fixed_latent_batch[1].to(device) x_fake = G(torch.cat([z_fake, y_fake], dim=1)) else: z_fake = fixed_latent_batch.to(device) x_fake = G(z_fake) generated = x_fake.detach().cpu().numpy() plt.figure(0) plt.clf() plt.scatter(generated[:, 0], generated[:, 1], marker='.', color=(0, 1, 0, 0.01)) plt.axis('equal') plt.xlim(-1, 1) plt.ylim(-1, 1) plt.savefig(os.path.join(samples_dir, f'{it}.png')) plt.figure(1) plt.clf() plt.plot(stats['G'], label='Generator') plt.plot(stats['D'], label='Discriminator') plt.legend() plt.savefig(os.path.join(results_dir, 'loss.png')) if conditional: plt.figure(2) plt.clf() plt.plot(stats['C_it'], stats['C_real'], label='Real') plt.plot(stats['C_it'], stats['C_fake'], label='Fake') plt.legend() plt.savefig(os.path.join(results_dir, 'accuracy.png')) save_model(G, os.path.join(network_dir, 'G_trained.pth')) save_model(D, os.path.join(network_dir, 'D_trained.pth')) save_stats(stats, os.path.join(results_dir, 'stats.pth')) if conditional: save_model(C_real, os.path.join(network_dir, 'C_real_trained.pth')) save_model(C_fake, os.path.join(network_dir, 'C_fake_trained.pth')) eval_file.close()