def __init__(self, memory, nb_status, nb_actions, action_noise=None, gamma=0.99, tau=0.001, normalize_observations=True, batch_size=128, observation_range=(-5., 5.), action_range=(-1., 1.), actor_lr=1e-4, critic_lr=1e-3): self.nb_status = nb_status self.nb_actions = nb_actions self.action_range = action_range self.observation_range = observation_range self.normalize_observations = normalize_observations self.actor = Actor(self.nb_status, self.nb_actions) self.actor_target = Actor(self.nb_status, self.nb_actions) self.actor_optim = Adam(self.actor.parameters(), lr=actor_lr) self.critic = Critic(self.nb_status, self.nb_actions) self.critic_target = Critic(self.nb_status, self.nb_actions) self.critic_optim = Adam(self.critic.parameters(), lr=critic_lr) # Create replay buffer self.memory = memory # SequentialMemory(limit=args.rmsize, window_length=args.window_length) self.action_noise = action_noise # Hyper-parameters self.batch_size = batch_size self.tau = tau self.discount = gamma if self.normalize_observations: self.obs_rms = RunningMeanStd() else: self.obs_rms = None
def pretrain(self, train_data, corrupter, tester): src, rel, dst = train_data n_train = len(src) optimizer = Adam(self.mdl.parameters()) #optimizer = SGD(self.mdl.parameters(), lr=1e-4) n_epoch = self.config.n_epoch n_batch = self.config.n_batch best_perf = 0 for epoch in range(n_epoch): epoch_loss = 0 rand_idx = t.randperm(n_train) src = src[rand_idx] rel = rel[rand_idx] dst = dst[rand_idx] src_corrupted, dst_corrupted = corrupter.corrupt(src, rel, dst) src_cuda = src.cuda() rel_cuda = rel.cuda() dst_cuda = dst.cuda() src_corrupted = src_corrupted.cuda() dst_corrupted = dst_corrupted.cuda() for s0, r, t0, s1, t1 in batch_by_num(n_batch, src_cuda, rel_cuda, dst_cuda, src_corrupted, dst_corrupted, n_sample=n_train): self.mdl.zero_grad() loss = t.sum(self.mdl.pair_loss(Variable(s0), Variable(r), Variable(t0), Variable(s1), Variable(t1))) loss.backward() optimizer.step() self.mdl.constraint() epoch_loss += loss.data[0] logging.info('Epoch %d/%d, Loss=%f', epoch + 1, n_epoch, epoch_loss / n_train) if (epoch + 1) % self.config.epoch_per_test == 0: test_perf = tester() if test_perf > best_perf: self.save(os.path.join(config().task.dir, self.config.model_file)) best_perf = test_perf return best_perf
def pretrain(self, train_data, corrupter, tester): src, rel, dst = train_data n_train = len(src) n_epoch = self.config.n_epoch n_batch = self.config.n_batch optimizer = Adam(self.mdl.parameters(), weight_decay=self.weight_decay) best_perf = 0 for epoch in range(n_epoch): epoch_loss = 0 if epoch % self.config.sample_freq == 0: rand_idx = t.randperm(n_train) src = src[rand_idx] rel = rel[rand_idx] dst = dst[rand_idx] src_corrupted, rel_corrupted, dst_corrupted = corrupter.corrupt(src, rel, dst) src_corrupted = src_corrupted.cuda() rel_corrupted = rel_corrupted.cuda() dst_corrupted = dst_corrupted.cuda() for ss, rs, ts in batch_by_num(n_batch, src_corrupted, rel_corrupted, dst_corrupted, n_sample=n_train): self.mdl.zero_grad() label = t.zeros(len(ss)).type(t.LongTensor).cuda() loss = t.sum(self.mdl.softmax_loss(Variable(ss), Variable(rs), Variable(ts), label)) loss.backward() optimizer.step() epoch_loss += loss.data[0] logging.info('Epoch %d/%d, Loss=%f', epoch + 1, n_epoch, epoch_loss / n_train) if (epoch + 1) % self.config.epoch_per_test == 0: test_perf = tester() if test_perf > best_perf: self.save(os.path.join(config().task.dir, self.config.model_file)) best_perf = test_perf return best_perf
def _init_optimizers(self): if self.generator_optim is None or self.critic_optim is None: from torch.optim import Adam trainable_generator_params = ( p for p in self.generator.parameters() if p.requires_grad) trainable_critic_params = ( p for p in self.critic.parameters() if p.requires_grad) self.generator_optim = Adam( trainable_generator_params, lr=0.0001, betas=(0, 0.9)) self.critic_optim = Adam( trainable_critic_params, lr=0.0001, betas=(0, 0.9))
def __init__(self, nb_status, nb_actions, args, writer): self.clip_actor_grad = args.clip_actor_grad self.nb_status = nb_status * args.window_length self.nb_actions = nb_actions self.discrete = args.discrete self.pic = args.pic self.writer = writer self.select_time = 0 if self.pic: self.nb_status = args.pic_status # Create Actor and Critic Network net_cfg = { 'hidden1':args.hidden1, 'hidden2':args.hidden2, 'use_bn':args.bn, 'init_method':args.init_method } if args.pic: self.cnn = CNN(1, args.pic_status) self.cnn_target = CNN(1, args.pic_status) self.cnn_optim = Adam(self.cnn.parameters(), lr=args.crate) self.actor = Actor(self.nb_status, self.nb_actions, **net_cfg) self.actor_target = Actor(self.nb_status, self.nb_actions, **net_cfg) self.actor_optim = Adam(self.actor.parameters(), lr=args.prate) self.critic = Critic(self.nb_status, self.nb_actions, **net_cfg) self.critic_target = Critic(self.nb_status, self.nb_actions, **net_cfg) self.critic_optim = Adam(self.critic.parameters(), lr=args.rate) hard_update(self.actor_target, self.actor) # Make sure target is with the same weight hard_update(self.critic_target, self.critic) if args.pic: hard_update(self.cnn_target, self.cnn) #Create replay buffer self.memory = rpm(args.rmsize) # SequentialMemory(limit=args.rmsize, window_length=args.window_length) self.random_process = Myrandom(size=nb_actions) # Hyper-parameters self.batch_size = args.batch_size self.tau = args.tau self.discount = args.discount self.depsilon = 1.0 / args.epsilon # self.epsilon = 1.0 self.s_t = None # Most recent state self.a_t = None # Most recent action self.use_cuda = args.cuda # if self.use_cuda: self.cuda()
def __init__(self, nb_status, nb_actions, args): self.num_actor = 3 self.nb_status = nb_status * args.window_length self.nb_actions = nb_actions self.discrete = args.discrete self.pic = args.pic if self.pic: self.nb_status = args.pic_status # Create Actor and Critic Network net_cfg = { 'hidden1':args.hidden1, 'hidden2':args.hidden2, 'use_bn':args.bn } if args.pic: self.cnn = CNN(3, args.pic_status) self.cnn_optim = Adam(self.cnn.parameters(), lr=args.crate) self.actors = [Actor(self.nb_status, self.nb_actions) for _ in range(self.num_actor)] self.actor_targets = [Actor(self.nb_status, self.nb_actions) for _ in range(self.num_actor)] self.actor_optims = [Adam(self.actors[i].parameters(), lr=args.prate) for i in range(self.num_actor)] self.critic = Critic(self.nb_status, self.nb_actions, **net_cfg) self.critic_target = Critic(self.nb_status, self.nb_actions, **net_cfg) self.critic_optim = Adam(self.critic.parameters(), lr=args.rate) for i in range(self.num_actor): hard_update(self.actor_targets[i], self.actors[i]) # Make sure target is with the same weight hard_update(self.critic_target, self.critic) #Create replay buffer self.memory = rpm(args.rmsize) # SequentialMemory(limit=args.rmsize, window_length=args.window_length) self.random_process = Myrandom(size=nb_actions) # Hyper-parameters self.batch_size = args.batch_size self.tau = args.tau self.discount = args.discount self.depsilon = 1.0 / args.epsilon # self.epsilon = 1.0 self.s_t = None # Most recent state self.a_t = None # Most recent action self.use_cuda = args.cuda # if self.use_cuda: self.cuda()
def __init__(self, gamma, tau, hidden_size, num_inputs, action_space): self.num_inputs = num_inputs self.action_space = action_space self.actor = Actor(hidden_size, self.num_inputs, self.action_space) self.actor_target = Actor(hidden_size, self.num_inputs, self.action_space) self.actor_optim = Adam(self.actor.parameters(), lr=1e-4) self.critic = Critic(hidden_size, self.num_inputs, self.action_space) self.critic_target = Critic(hidden_size, self.num_inputs, self.action_space) self.critic_optim = Adam(self.critic.parameters(), lr=1e-3) self.gamma = gamma self.tau = tau hard_update(self.actor_target, self.actor) # Make sure target is with the same weight hard_update(self.critic_target, self.critic)
def __init__(self, gamma, tau, hidden_size, num_inputs, action_space): self.action_space = action_space self.num_inputs = num_inputs self.model = Policy(hidden_size, num_inputs, action_space) self.target_model = Policy(hidden_size, num_inputs, action_space) self.optimizer = Adam(self.model.parameters(), lr=1e-3) self.gamma = gamma self.tau = tau hard_update(self.target_model, self.model)
def train(self, data): data = data['data'] self.network.train() optimizer = Adam(trainable_parameters(self.network), lr=1e-5) for epoch, batch in self._driver(data): self.network.zero_grad() # choose a batch of anchors indices, anchor = self._select_batch(data) anchor_v = self._variable(anchor) a = self._apply_network_and_normalize(anchor_v) # choose negative examples negative_indices, negative = self._select_batch(data) negative_v = self._variable(negative) n = self._apply_network_and_normalize(negative_v) # choose a deformation for this batch and apply it to produce the # positive examples deformation = choice(self.deformations) positive = deformation(anchor, data[indices, ...]) \ .astype(np.float32) positive_v = self._variable(positive) p = self._apply_network_and_normalize(positive_v) error = self.loss.forward(a, p, n) error.backward() optimizer.step() self.on_batch_complete( epoch=epoch, batch=batch, error=float(error.data.cpu().numpy().squeeze()), deformation=deformation.__name__) return self.network
def learn(learning_rate, iterations, x, y, validation=None, stop_early=False, run_comment=''): # Define a neural network using high-level modules. writer = SummaryWriter(comment=run_comment) model = Sequential( Linear(len(x[0]), len(y[0]), bias=True) # n inputs -> 1 output ) loss_fn = BCEWithLogitsLoss(reduction='sum') # reduction=mean converges slower. # TODO: Add an option to twiddle pos_weight, which lets us trade off precision and recall. Maybe also graph using add_pr_curve(), which can show how that tradeoff is going. optimizer = Adam(model.parameters(),lr=learning_rate) if validation: validation_ins, validation_outs = validation previous_validation_loss = None with progressbar(range(iterations)) as bar: for t in bar: y_pred = model(x) # Make predictions. loss = loss_fn(y_pred, y) writer.add_scalar('loss', loss, t) if validation: validation_loss = loss_fn(model(validation_ins), validation_outs) if stop_early: if previous_validation_loss is not None and previous_validation_loss < validation_loss: print('Stopping early at iteration {t} because validation error rose.'.format(t=t)) model.load_state_dict(previous_model) break else: previous_validation_loss = validation_loss previous_model = model.state_dict() writer.add_scalar('validation_loss', validation_loss, t) writer.add_scalar('training_accuracy_per_tag', accuracy_per_tag(model, x, y), t) optimizer.zero_grad() # Zero the gradients. loss.backward() # Compute gradients. optimizer.step() # Horizontal axis is what confidence. Vertical is how many samples were that confidence. writer.add_histogram('confidence', confidences(model, x), t) writer.close() return model
def train(self, training_data: TrainingData) -> None: x_train, y_train, x_val, y_val, vocab, class_to_i, i_to_class = preprocess_dataset(training_data) self.class_to_i = class_to_i self.i_to_class = i_to_class log.info('Batchifying data') train_batches = batchify(x_train, y_train, shuffle=True) val_batches = batchify(x_val, y_val, shuffle=False) self.model = ElmoModel(len(i_to_class), dropout=self.dropout) if CUDA: self.model = self.model.cuda() log.info(f'Parameters:\n{self.parameters()}') log.info(f'Model:\n{self.model}') parameters = list(self.model.classifier.parameters()) for mix in self.model.elmo._scalar_mixes: parameters.extend(list(mix.parameters())) self.optimizer = Adam(parameters) self.criterion = nn.CrossEntropyLoss() self.scheduler = lr_scheduler.ReduceLROnPlateau(self.optimizer, patience=5, verbose=True, mode='max') temp_prefix = get_tmp_filename() self.model_file = f'{temp_prefix}.pt' manager = TrainingManager([ BaseLogger(log_func=log.info), TerminateOnNaN(), EarlyStopping(monitor='test_acc', patience=10, verbose=1), MaxEpochStopping(100), ModelCheckpoint(create_save_model(self.model), self.model_file, monitor='test_acc') ]) log.info('Starting training') epoch = 0 while True: self.model.train() train_acc, train_loss, train_time = self.run_epoch(train_batches) random.shuffle(train_batches) self.model.eval() test_acc, test_loss, test_time = self.run_epoch(val_batches, train=False) stop_training, reasons = manager.instruct( train_time, train_loss, train_acc, test_time, test_loss, test_acc ) if stop_training: log.info(' '.join(reasons)) break else: self.scheduler.step(test_acc) epoch += 1
def __init__(self, args, exp_model, logging_func): self.args = args # Exploration Model self.exp_model = exp_model self.log = logging_func["log"] # Experience Replay self.replay = ExpReplay(args.exp_replay_size, args.stale_limit, exp_model, args, priority=self.args.prioritized) # DQN and Target DQN model = get_models(args.model) self.dqn = model(actions=args.actions, atoms=args.atoms) self.target_dqn = model(actions=args.actions, atoms=args.atoms) dqn_params = 0 for weight in self.dqn.parameters(): weight_params = 1 for s in weight.size(): weight_params *= s dqn_params += weight_params print("Distrib DQN has {:,} parameters.".format(dqn_params)) self.target_dqn.eval() if args.gpu: print("Moving models to GPU.") self.dqn.cuda() self.target_dqn.cuda() # Optimizer self.optimizer = Adam(self.dqn.parameters(), lr=args.lr) # self.optimizer = RMSprop(self.dqn.parameters(), lr=args.lr) self.T = 0 self.target_sync_T = -self.args.t_max
class BaseModel(object): def __init__(self): self.mdl = None # type: BaseModule self.mdl = nn.DataParallel(self.mdl) # type: BaseModule self.mdl = self.mdl.cuda() self.weight_decay = 0 def save(self, filename): torch.save(self.mdl.state_dict(), filename) def load(self, filename): self.mdl.load_state_dict(torch.load(filename, map_location=lambda storage, location: storage.cuda())) def gen_step(self, src, rel, dst, n_sample=1, temperature=1.0, train=True): if not hasattr(self, 'opt'): self.opt = Adam(self.mdl.parameters(), weight_decay=self.weight_decay) n, m = dst.size() rel_var = Variable(rel.cuda()) src_var = Variable(src.cuda()) dst_var = Variable(dst.cuda()) logits = self.mdl.module.prob_logit(src_var, rel_var, dst_var) / temperature probs = nnf.softmax(logits) row_idx = torch.arange(0, n).type(torch.LongTensor).unsqueeze(1).expand(n, n_sample) sample_idx = torch.multinomial(probs, n_sample, replacement=True) sample_srcs = src[row_idx, sample_idx.data.cpu()] sample_dsts = dst[row_idx, sample_idx.data.cpu()] rewards = yield sample_srcs, sample_dsts if train: self.mdl.zero_grad() log_probs = nnf.log_softmax(logits) reinforce_loss = -torch.sum(Variable(rewards) * log_probs[row_idx.cuda(), sample_idx.data]) reinforce_loss.backward() self.opt.step() self.mdl.module.constraint() yield None def dis_step(self, src, rel, dst, src_fake, dst_fake, train=True): if not hasattr(self, 'opt'): self.opt = Adam(self.mdl.parameters(), weight_decay=self.weight_decay) src_var = Variable(src.cuda()) rel_var = Variable(rel.cuda()) dst_var = Variable(dst.cuda()) src_fake_var = Variable(src_fake.cuda()) dst_fake_var = Variable(dst_fake.cuda()) losses = self.mdl.module.pair_loss(src_var, rel_var, dst_var, src_fake_var, dst_fake_var) fake_scores = self.mdl.module.score(src_fake_var, rel_var, dst_fake_var) if train: self.mdl.zero_grad() torch.sum(losses).backward() self.opt.step() self.mdl.module.constraint() return losses.data, -fake_scores.data def test_link(self, test_data, n_ent, heads, tails, filt=True): # self.mdl = nn.DataParallel(self.mdl) # self.mdl = self.mdl.cuda() mrr_tot = 0 mr_tot = 0 hit10_tot = 0 hit1_tot = 0 count = 0 for batch_s, batch_r, batch_t in batch_by_size(config().test_batch_size, *test_data): batch_size = batch_s.size(0) rel_var = Variable(batch_r.unsqueeze(1).expand(batch_size, n_ent).cuda()) src_var = Variable(batch_s.unsqueeze(1).expand(batch_size, n_ent).cuda()) dst_var = Variable(batch_t.unsqueeze(1).expand(batch_size, n_ent).cuda()) all_var = Variable(torch.arange(0, n_ent).unsqueeze(0).expand(batch_size, n_ent).type(torch.LongTensor).cuda(), volatile=True) batch_dst_scores = self.mdl.module.score(src_var, rel_var, all_var).data batch_src_scores = self.mdl.module.score(all_var, rel_var, dst_var).data for s, r, t, dst_scores, src_scores in zip(batch_s, batch_r, batch_t, batch_dst_scores, batch_src_scores): if filt: if tails[(s.item(), r.item())]._nnz() > 1: tmp = dst_scores[t].item() dst_scores += tails[(s.item(), r.item())].cuda() * 1e30 dst_scores[t] = tmp if heads[(t.item(), r.item())]._nnz() > 1: tmp = src_scores[s].item() src_scores += heads[(t.item(), r.item())].cuda() * 1e30 src_scores[s] = tmp mrr, mr, hit1, hit10 = mrr_mr_hitk2(dst_scores, t) mrr_tot += mrr mr_tot += mr hit1_tot += hit1 hit10_tot += hit10 mrr, mr, hit1, hit10 = mrr_mr_hitk2(src_scores, s) mrr_tot += mrr mr_tot += mr hit1_tot += hit1 hit10_tot += hit10 count += 2 logging.info('Test_MRR=%f, Test_MR=%f, Test_H@1=%f, Test_H@10=%f', mrr_tot.item() / count, mr_tot / count, hit1_tot / count, hit10_tot / count) return mrr_tot.item() / count def eval_link(self, test_data, n_ent, heads, tails, filt=True): mrr_tot = 0 mr_tot = 0 hit10_tot = 0 hit1_tot = 0 count = 0 for batch_s, batch_r, batch_t in batch_by_size(config().test_batch_size, *test_data): batch_size = batch_s.size(0) rel_var = Variable(batch_r.unsqueeze(1).expand(batch_size, n_ent).cuda()) src_var = Variable(batch_s.unsqueeze(1).expand(batch_size, n_ent).cuda()) dst_var = Variable(batch_t.unsqueeze(1).expand(batch_size, n_ent).cuda()) all_var = Variable(torch.arange(0, n_ent).unsqueeze(0).expand(batch_size, n_ent).type(torch.LongTensor).cuda(), volatile=True) batch_dst_scores = self.mdl.module.score(src_var, rel_var, all_var).data batch_src_scores = self.mdl.module.score(all_var, rel_var, dst_var).data for s, r, t, dst_scores, src_scores in zip(batch_s, batch_r, batch_t, batch_dst_scores, batch_src_scores): if filt: if tails[(s.item(), r.item())]._nnz() > 1: tmp = dst_scores[t].item() dst_scores += tails[(s.item(), r.item())].cuda() * 1e30 dst_scores[t] = tmp if heads[(t.item(), r.item())]._nnz() > 1: tmp = src_scores[s].item() src_scores += heads[(t.item(), r.item())].cuda() * 1e30 src_scores[s] = tmp mrr, mr, hit1, hit10 = mrr_mr_hitk2(dst_scores, t) mrr_tot += mrr mr_tot += mr hit1_tot += hit1 hit10_tot += hit10 mrr, mr, hit1, hit10 = mrr_mr_hitk2(src_scores, s) mrr_tot += mrr mr_tot += mr hit1_tot += hit1 hit10_tot += hit10 count += 2 logging.info('Test_MRR=%f, Test_MR=%f, Test_H@1=%f, Test_H@10=%f', mrr_tot / count, mr_tot / count, hit1_tot / count, hit10_tot / count) return mrr_tot / count
class RnnGuesser(AbstractGuesser): def __init__(self, config_num): super(RnnGuesser, self).__init__(config_num) if self.config_num is not None: guesser_conf = conf['guessers']['qanta.guesser.rnn.RnnGuesser'][self.config_num] self.gradient_clip = guesser_conf['gradient_clip'] self.n_hidden_units = guesser_conf['n_hidden_units'] self.n_hidden_layers = guesser_conf['n_hidden_layers'] self.nn_dropout = guesser_conf['dropout'] self.batch_size = guesser_conf['batch_size'] self.use_wiki = guesser_conf['use_wiki'] self.n_wiki_sentences = guesser_conf['n_wiki_sentences'] self.wiki_title_replace_token = guesser_conf['wiki_title_replace_token'] self.lowercase = guesser_conf['lowercase'] self.random_seed = guesser_conf['random_seed'] self.page_field: Optional[Field] = None self.qanta_id_field: Optional[Field] = None self.text_field: Optional[Field] = None self.n_classes = None self.emb_dim = None self.model_file = None self.model: Optional[RnnModel] = None self.optimizer = None self.criterion = None self.scheduler = None @property def ans_to_i(self): return self.page_field.vocab.stoi @property def i_to_ans(self): return self.page_field.vocab.itos def parameters(self): return conf['guessers']['qanta.guesser.rnn.RnnGuesser'][self.config_num] def train(self, training_data): log.info('Loading Quiz Bowl dataset') train_iter, val_iter, dev_iter = QuizBowl.iters( batch_size=self.batch_size, lower=self.lowercase, use_wiki=self.use_wiki, n_wiki_sentences=self.n_wiki_sentences, replace_title_mentions=self.wiki_title_replace_token, sort_within_batch=True ) log.info(f'Training Data={len(training_data[0])}') log.info(f'N Train={len(train_iter.dataset.examples)}') log.info(f'N Test={len(val_iter.dataset.examples)}') fields: Dict[str, Field] = train_iter.dataset.fields self.page_field = fields['page'] self.n_classes = len(self.ans_to_i) self.qanta_id_field = fields['qanta_id'] self.emb_dim = 300 self.text_field = fields['text'] log.info(f'Text Vocab={len(self.text_field.vocab)}') log.info('Initializing Model') self.model = RnnModel( self.n_classes, text_field=self.text_field, emb_dim=self.emb_dim, n_hidden_units=self.n_hidden_units, n_hidden_layers=self.n_hidden_layers, nn_dropout=self.nn_dropout ) if CUDA: self.model = self.model.cuda() log.info(f'Parameters:\n{self.parameters()}') log.info(f'Model:\n{self.model}') self.optimizer = Adam(self.model.parameters()) self.criterion = nn.CrossEntropyLoss() self.scheduler = lr_scheduler.ReduceLROnPlateau(self.optimizer, patience=5, verbose=True, mode='max') temp_prefix = get_tmp_filename() self.model_file = f'{temp_prefix}.pt' manager = TrainingManager([ BaseLogger(log_func=log.info), TerminateOnNaN(), EarlyStopping(monitor='test_acc', patience=10, verbose=1), MaxEpochStopping(100), ModelCheckpoint(create_save_model(self.model), self.model_file, monitor='test_acc') ]) log.info('Starting training') epoch = 0 while True: self.model.train() train_acc, train_loss, train_time = self.run_epoch(train_iter) self.model.eval() test_acc, test_loss, test_time = self.run_epoch(val_iter) stop_training, reasons = manager.instruct( train_time, train_loss, train_acc, test_time, test_loss, test_acc ) if stop_training: log.info(' '.join(reasons)) break else: self.scheduler.step(test_acc) epoch += 1 def run_epoch(self, iterator: Iterator): is_train = iterator.train batch_accuracies = [] batch_losses = [] batch_size = self.batch_size hidden_init = self.model.init_hidden(batch_size) epoch_start = time.time() for batch in iterator: text, lengths = batch.text lengths = list(lengths.cpu().numpy()) if len(lengths) != batch_size: batch_size = len(lengths) hidden_init = self.model.init_hidden(batch_size) page = batch.page qanta_ids = batch.qanta_id.cuda() if is_train: self.model.zero_grad() out, hidden = self.model( text, lengths, hidden_init, qanta_ids ) _, preds = torch.max(out, 1) accuracy = torch.mean(torch.eq(preds, page).float()).data[0] batch_loss = self.criterion(out, page) if is_train: batch_loss.backward() torch.nn.utils.clip_grad_norm(self.model.parameters(), self.gradient_clip) self.optimizer.step() batch_accuracies.append(accuracy) batch_losses.append(batch_loss.data[0]) epoch_end = time.time() return np.mean(batch_accuracies), np.mean(batch_losses), epoch_end - epoch_start def guess(self, questions: List[QuestionText], max_n_guesses: Optional[int]): if len(questions) == 0: return [] batch_size = 128 if len(questions) < batch_size: return self._guess_batch(questions, max_n_guesses) else: all_guesses = [] for i in range(0, len(questions), batch_size): batch_questions = questions[i:i + batch_size] guesses = self._guess_batch(batch_questions, max_n_guesses) all_guesses.extend(guesses) return all_guesses def _guess_batch(self, questions: List[QuestionText], max_n_guesses: Optional[int]): if len(questions) == 0: return [] examples = [self.text_field.preprocess(q) for q in questions] padded_examples, lengths = self.text_field.pad(examples) padded_examples = np.array(padded_examples, dtype=np.object) lengths = np.array(lengths) order = np.argsort(-lengths) rev_order = np.argsort(order) ordered_examples = padded_examples[order] ordered_lengths = lengths[order] text, lengths = self.text_field.numericalize((ordered_examples, ordered_lengths), device=None, train=False) lengths = list(lengths.cpu().numpy()) qanta_ids = self.qanta_id_field.process([0 for _ in questions]).cuda() guesses = [] hidden_init = self.model.init_hidden(len(questions)) out, _ = self.model(text, lengths, hidden_init, qanta_ids) ordered_probs = F.softmax(out).data.cpu().numpy() probs = ordered_probs[rev_order] n_examples = probs.shape[0] preds = np.argsort(-probs, axis=1) for i in range(n_examples): guesses.append([]) for p in preds[i][:max_n_guesses]: guesses[-1].append((self.i_to_ans[p], probs[i][p])) return guesses def save(self, directory: str): shutil.copyfile(self.model_file, os.path.join(directory, 'rnn.pt')) shell(f'rm -f {self.model_file}') with open(os.path.join(directory, 'rnn.pkl'), 'wb') as f: cloudpickle.dump({ 'page_field': self.page_field, 'text_field': self.text_field, 'qanta_id_field': self.qanta_id_field, 'n_classes': self.n_classes, 'gradient_clip': self.gradient_clip, 'n_hidden_units': self.n_hidden_units, 'n_hidden_layers': self.n_hidden_layers, 'nn_dropout': self.nn_dropout, 'batch_size': self.batch_size, 'use_wiki': self.use_wiki, 'n_wiki_sentences': self.n_wiki_sentences, 'wiki_title_replace_token': self.wiki_title_replace_token, 'lowercase': self.lowercase, 'random_seed': self.random_seed, 'config_num': self.config_num }, f) @classmethod def load(cls, directory: str): with open(os.path.join(directory, 'rnn.pkl'), 'rb') as f: params = cloudpickle.load(f) guesser = RnnGuesser(params['config_num']) guesser.page_field = params['page_field'] guesser.qanta_id_field = params['qanta_id_field'] guesser.text_field = params['text_field'] guesser.n_classes = params['n_classes'] guesser.gradient_clip = params['gradient_clip'] guesser.n_hidden_units = params['n_hidden_units'] guesser.n_hidden_layers = params['n_hidden_layers'] guesser.nn_dropout = params['nn_dropout'] guesser.use_wiki = params['use_wiki'] guesser.n_wiki_sentences = params['n_wiki_sentences'] guesser.wiki_title_replace_token = params['wiki_title_replace_token'] guesser.lowercase = params['lowercase'] guesser.random_seed = params['random_seed'] guesser.model = RnnModel( guesser.n_classes, text_field=guesser.text_field, init_embeddings=False, emb_dim=300, n_hidden_layers=guesser.n_hidden_layers, n_hidden_units=guesser.n_hidden_units ) guesser.model.load_state_dict(torch.load( os.path.join(directory, 'rnn.pt'), map_location=lambda storage, loc: storage )) guesser.model.eval() if CUDA: guesser.model = guesser.model.cuda() return guesser @classmethod def targets(cls): return ['rnn.pt', 'rnn.pkl'] def web_api(self, host='0.0.0.0', port=6000, debug=False): from flask import Flask, jsonify, request app = Flask(__name__) @app.route('/api/answer_question', methods=['POST']) def answer_question_base(): text = request.form['text'] guess, score = self.guess([text], 1)[0][0] return jsonify({'guess': guess, 'score': float(score)}) @app.route('/api/interface_get_highlights', methods=['POST']) def get_highlights(): questions = [request.form['text']] examples = [self.text_field.preprocess(q) for q in questions] padded_examples, lengths = self.text_field.pad(examples) padded_examples = np.array(padded_examples, dtype=np.object) lengths = np.array(lengths) order = np.argsort(-lengths) # rev_order = np.argsort(order) ordered_examples = padded_examples[order] ordered_lengths = lengths[order] text, lengths = self.text_field.numericalize((ordered_examples, ordered_lengths), device=-1, train=False) lengths = list(lengths.cpu().numpy()) qanta_ids = self.qanta_id_field.process([0 for _ in questions]) # .cuda() hidden_init = self.model.init_hidden(len(questions)) text = Variable(text.data, volatile=False) out, _ = self.model(text, lengths, hidden_init, qanta_ids, extract_grad_hook('embed')) guessForEvidence = request.form['guessForEvidence'] guessForEvidence = guessForEvidence.split("style=\"color:blue\">")[1].split("</a>")[0].lower() indicator = -1 guess = str(guessForEvidence) guesses = self.guess([request.form['text']], 500)[0] for index, (g, s) in enumerate(guesses): print(g.lower().replace("_", " ")[0:25]) print(guessForEvidence) if g.lower().replace("_", " ")[0:25] == guessForEvidence: print("INDICATOR SET") indicator = index guess = g.lower().replace("_", " ")[0:25] break if indicator == -1: highlights = { 'wiki': ['No Evidence', 'No Evidence'], 'qb': ['No Evidence', 'No Evidence'], 'guess': guess, 'visual': 'No Evidence' } return jsonify(highlights) # label = torch.max(out,1)[1] label = torch.topk(out, k=500, dim=1) label = label[1][0][indicator] # [0] criterion = nn.CrossEntropyLoss() loss = criterion(out, label) self.model.zero_grad() loss.backward() grads = extracted_grads['embed'].transpose(0, 1) grads = grads.data.cpu() scores = grads.sum(dim=2).numpy() grads = grads.numpy() text = text.transpose(0, 1).data.cpu().numpy() scores = scores.tolist() normalized_scores = scores # normalize scores across the words, doing positive and negatives seperately # final scores should be in range [0,1] 0 is dark red, 1 is dark blue. 0.5 is no highlight total_score_pos = 1e-6 # 1e-6 for case where all positive/neg scores are 0 total_score_neg = 1e-6 for idx, s in enumerate(normalized_scores): s[0] = s[0] * s[0] * s[0] / 5 if s[0] < 0: total_score_neg = total_score_neg + math.fabs(s[0]) else: total_score_pos = total_score_pos + s[0] for idx, s in enumerate(normalized_scores): if s[0] < 0: normalized_scores[idx] = (s[0] / total_score_neg) / 2 # / by 2 to get max of -0.5 else: normalized_scores[idx] = 0.0 normalized_scores = [0.5 + n for n in normalized_scores] # center scores returnVal = "" for s in normalized_scores: returnVal = returnVal + ' ' + str(s) localPreprocess = create_qb_tokenizer() examples = [localPreprocess(q) for q in questions] words = [] for t in examples[0]: words.append(str(t)) visual = colorize(words, normalized_scores, colors='RdBu') print("Guess", guess) highlights = { 'wiki': [returnVal, returnVal], 'qb': [returnVal, returnVal], 'guess': guess, 'visual': visual } return jsonify(highlights) @app.route('/api/interface_answer_question', methods=['POST']) def answer_question(): text = request.form['text'] answer = request.form['answer'] answer = answer.replace(" ", "_").lower() guesses = self.guess([text], 20)[0] score_fn = [] sum_normalize = 0.0 for (g, s) in guesses: exp = np.exp(3*float(s)) score_fn.append(exp) sum_normalize += exp for index, (g, s) in enumerate(guesses): guesses[index] = (g, score_fn[index] / sum_normalize) guess = [] score = [] answer_found = False num = 0 for index, (g, s) in enumerate(guesses): if index >= 5: break guess.append(g) score.append(float(s)) for gue in guess: if (gue.lower() == answer.lower()): answer_found = True num = -1 if (not answer_found): for index, (g, s) in enumerate(guesses): if (g.lower() == answer.lower()): guess.append(g) score.append(float(s)) num = index + 1 if (num == 0): print("num was 0") if (request.form['bell'] == 'true'): return "Num0" guess = [g.replace("_", " ") for g in guess] return jsonify({'guess': guess, 'score': score, 'num': num}) app.run(host=host, port=port, debug=debug)
def train(args, model, enc=False): best_acc = 0 pos_weight = torch.FloatTensor([1.5]) # assume background:lane = 0.6:0.4 assert os.path.exists( args.datadir), "Error: datadir (dataset directory) could not be loaded" co_transform = MyCoTransform(enc, augment=True, height=args.height) #1024) co_transform_val = MyCoTransform(enc, augment=False, height=args.height) #1024) #dataset_train = cityscapes(args.datadir, co_transform, 'train') #dataset_val = cityscapes(args.datadir, co_transform_val, 'val') dataset_train = avlane(args.datadir, co_transform, 'train') dataset_val = avlane(args.datadir, co_transform_val, 'val') loader = DataLoader(dataset_train, num_workers=args.num_workers, batch_size=args.batch_size, shuffle=True) loader_val = DataLoader(dataset_val, num_workers=args.num_workers, batch_size=args.batch_size, shuffle=False) if args.cuda: pos_weight = pos_weight.cuda() #criterion = CrossEntropyLoss2d(weight) criterion = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight) #print(type(criterion)) savedir = f'../save/{args.savedir}' if (enc): automated_log_path = savedir + "/automated_log_encoder.txt" modeltxtpath = savedir + "/model_encoder.txt" else: automated_log_path = savedir + "/automated_log.txt" modeltxtpath = savedir + "/model.txt" if (not os.path.exists(automated_log_path) ): #dont add first line if it exists with open(automated_log_path, "a") as myfile: myfile.write( "Epoch\t\tTrain-loss\t\tTest-loss\t\tTrain-IoU\t\tTest-IoU\t\tlearningRate" ) with open(modeltxtpath, "w") as myfile: myfile.write(str(model)) #TODO: reduce memory in first gpu: https://discuss.pytorch.org/t/multi-gpu-training-memory-usage-in-balance/4163/4 #https://github.com/pytorch/pytorch/issues/1893 #optimizer = Adam(model.parameters(), 5e-4, (0.9, 0.999), eps=1e-08, weight_decay=2e-4) ## scheduler 1 optimizer = Adam(model.parameters(), 5e-4, (0.9, 0.999), eps=1e-08, weight_decay=1e-4) ## scheduler 2 start_epoch = 1 if args.resume: #Must load weights, optimizer, epoch and best value. if enc: filenameCheckpoint = savedir + '/checkpoint_enc.pth.tar' else: filenameCheckpoint = savedir + '/checkpoint.pth.tar' assert os.path.exists( filenameCheckpoint ), "Error: resume option was used but checkpoint was not found in folder" checkpoint = torch.load(filenameCheckpoint) start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) best_acc = checkpoint['best_acc'] print("=> Loaded checkpoint at epoch {})".format(checkpoint['epoch'])) #scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5) # set up scheduler ## scheduler 1 lambda1 = lambda epoch: pow( (1 - ((epoch - 1) / args.num_epochs)), 0.9) ## scheduler 2 scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1) ## scheduler 2 if args.visualize and args.steps_plot > 0: board = Dashboard(args.port) for epoch in range(start_epoch, args.num_epochs + 1): print("----- TRAINING - EPOCH", epoch, "-----") scheduler.step(epoch) ## scheduler 2 epoch_loss = [] time_train = [] doIouTrain = args.iouTrain doIouVal = args.iouVal if (doIouTrain): #iouEvalTrain = iouEval(NUM_CLASSES) iouEvalTrain = iouEval_binary(NUM_CLASSES) usedLr = 0 for param_group in optimizer.param_groups: print("LEARNING RATE: ", param_group['lr']) usedLr = float(param_group['lr']) model.train() for step, (images, labels) in enumerate(loader): start_time = time.time() #print (labels.size()) #print (np.unique(labels.numpy())) #print("labels: ", np.unique(labels[0].numpy())) #labels = torch.ones(4, 1, 512, 1024).long() if args.cuda: images = images.cuda() labels = labels.cuda() inputs = Variable(images) targets = Variable(labels) outputs = model(inputs, only_encode=enc) #print("targets", np.unique(targets[:, 0].cpu().data.numpy())) optimizer.zero_grad() #loss = criterion(outputs, targets[:, 0]) loss = criterion(outputs.squeeze(), targets[:, 0].float()) loss.backward() optimizer.step() #epoch_loss.append(loss.data[0]) epoch_loss.append(loss.data) time_train.append(time.time() - start_time) if (doIouTrain): #start_time_iou = time.time() #iouEvalTrain.addBatch(outputs.max(1)[1].unsqueeze(1).data, targets.data) preds = torch.where(outputs > 0.5, torch.ones([1], dtype=torch.long).cuda(), torch.zeros([1], dtype=torch.long).cuda()) iouEvalTrain.addBatch(preds.squeeze(), targets[:, 0]) # no_grad handles it already #print ("Time to add confusion matrix: ", time.time() - start_time_iou) #print(outputs.size()) if args.visualize and args.steps_plot > 0 and step % args.steps_plot == 0: start_time_plot = time.time() image = inputs[0].cpu().data #image[0] = image[0] * .229 + .485 #image[1] = image[1] * .224 + .456 #image[2] = image[2] * .225 + .406 #print("output", np.unique(outputs[0].cpu().max(0)[1].data.numpy())) board.image(image, f'input (epoch: {epoch}, step: {step})') if isinstance(outputs, list): #merge gpu tensors board.image( color_transform( outputs[0][0].cpu().max(0)[1].data.unsqueeze(0)), f'output (epoch: {epoch}, step: {step})') else: board.image( color_transform( outputs[0].cpu().max(0)[1].data.unsqueeze(0)), f'output (epoch: {epoch}, step: {step})') board.image(color_transform(targets[0].cpu().data), f'target (epoch: {epoch}, step: {step})') print("Time to paint images: ", time.time() - start_time_plot) if args.steps_loss > 0 and step % args.steps_loss == 0: average = sum(epoch_loss) / len(epoch_loss) print( f'loss: {average:0.4} (epoch: {epoch}, step: {step})', "// Avg time/img: %.4f s" % (sum(time_train) / len(time_train) / args.batch_size)) average_epoch_loss_train = sum(epoch_loss) / len(epoch_loss) iouTrain = 0 if (doIouTrain): #iouTrain, iou_classes = iouEvalTrain.getIoU() #iouStr = getColorEntry(iouTrain)+'{:0.2f}'.format(iouTrain*100) + '\033[0m' iouVal = iouEvalVal.getIoU() iouStr = '{0.2f}'.format(iouVal.item() * 100) print("EPOCH IoU on TRAIN set: ", iouStr, "%") #Validate on 183 val images after each epoch of training print("----- VALIDATING - EPOCH", epoch, "-----") model.eval() epoch_loss_val = [] time_val = [] if (doIouVal): #iouEvalVal = iouEval(NUM_CLASSES) iouEvalVal = iouEval_binary(NUM_CLASSES) with torch.no_grad(): for step, (images, labels) in enumerate(loader_val): start_time = time.time() if args.cuda: images = images.cuda() labels = labels.cuda() #inputs = Variable(images, volatile=True) #volatile flag makes it free backward or outputs for eval inputs = images #targets = Variable(labels, volatile=True) targets = labels outputs = model(inputs, only_encode=enc) #loss = criterion(outputs, targets[:, 0]) loss = criterion(outputs.squeeze(), targets[:, 0].float()) #epoch_loss_val.append(loss.data[0]) epoch_loss_val.append(loss.data) time_val.append(time.time() - start_time) #Add batch to calculate TP, FP and FN for iou estimation if (doIouVal): #start_time_iou = time.time() #iouEvalVal.addBatch(outputs.max(1)[1].unsqueeze(1).data, targets.data) preds = torch.where( outputs > 0.5, torch.ones([1], dtype=torch.long).cuda(), torch.zeros([1], dtype=torch.long).cuda()) iouEvalVal.addBatch( preds.squeeze(), targets[:, 0]) # no_grad handles it already #print ("Time to add confusion matrix: ", time.time() - start_time_iou) if args.visualize and args.steps_plot > 0 and step % args.steps_plot == 0: start_time_plot = time.time() image = inputs[0].cpu().data board.image(image, f'VAL input (epoch: {epoch}, step: {step})') if isinstance(outputs, list): #merge gpu tensors board.image( color_transform(outputs[0][0].cpu().max(0) [1].data.unsqueeze(0)), f'VAL output (epoch: {epoch}, step: {step})') else: board.image( color_transform( outputs[0].cpu().max(0)[1].data.unsqueeze(0)), f'VAL output (epoch: {epoch}, step: {step})') board.image(color_transform(targets[0].cpu().data), f'VAL target (epoch: {epoch}, step: {step})') print("Time to paint images: ", time.time() - start_time_plot) if args.steps_loss > 0 and step % args.steps_loss == 0: average = sum(epoch_loss_val) / len(epoch_loss_val) print( f'VAL loss: {average:0.4} (epoch: {epoch}, step: {step})', "// Avg time/img: %.4f s" % (sum(time_val) / len(time_val) / args.batch_size)) average_epoch_loss_val = sum(epoch_loss_val) / len(epoch_loss_val) #scheduler.step(average_epoch_loss_val, epoch) ## scheduler 1 # update lr if needed iouVal = 0 if (doIouVal): #iouVal, iou_classes = iouEvalVal.getIoU() #iouStr = getColorEntry(iouVal)+'{:0.2f}'.format(iouVal*100) + '\033[0m' iouVal = iouEvalVal.getIoU() iouStr = '{:0.2f}'.format(iouVal.item() * 100) print("EPOCH IoU on VAL set: ", iouStr, "%") # remember best valIoU and save checkpoint if iouVal == 0: current_acc = -average_epoch_loss_val else: current_acc = iouVal is_best = current_acc > best_acc best_acc = max(current_acc, best_acc) if enc: filenameCheckpoint = savedir + '/checkpoint_enc.pth.tar' filenameBest = savedir + '/model_best_enc.pth.tar' else: filenameCheckpoint = savedir + '/checkpoint.pth.tar' filenameBest = savedir + '/model_best.pth.tar' save_checkpoint( { 'epoch': epoch + 1, 'arch': str(model), 'state_dict': model.state_dict(), 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), }, is_best, filenameCheckpoint, filenameBest) #SAVE MODEL AFTER EPOCH if (enc): filename = f'{savedir}/model_encoder-{epoch:03}.pth' filenamebest = f'{savedir}/model_encoder_best.pth' else: filename = f'{savedir}/model-{epoch:03}.pth' filenamebest = f'{savedir}/model_best.pth' if args.epochs_save > 0 and step > 0 and step % args.epochs_save == 0: torch.save(model.state_dict(), filename) print(f'save: {filename} (epoch: {epoch})') if (is_best): torch.save(model.state_dict(), filenamebest) print(f'save: {filenamebest} (epoch: {epoch})') if (not enc): with open(savedir + "/best.txt", "w") as myfile: myfile.write("Best epoch is %d, with Val-IoU= %.4f" % (epoch, iouVal)) else: with open(savedir + "/best_encoder.txt", "w") as myfile: myfile.write("Best epoch is %d, with Val-IoU= %.4f" % (epoch, iouVal)) #SAVE TO FILE A ROW WITH THE EPOCH RESULT (train loss, val loss, train IoU, val IoU) #Epoch Train-loss Test-loss Train-IoU Test-IoU learningRate with open(automated_log_path, "a") as myfile: myfile.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.8f" % (epoch, average_epoch_loss_train, average_epoch_loss_val, iouTrain, iouVal, usedLr)) return (model) #return model (convenience for encoder-decoder training)
def __call__(self, config): print("Preprocess the data") train = Corpus.load(config.ftrain) dev = Corpus.load(config.fdev) test = Corpus.load(config.ftest) if config.preprocess or not os.path.exists(config.vocab): vocab = Vocab.from_corpus(corpus=train, min_freq=2) vocab.read_embeddings(Embedding.load(config.fembed, config.unk)) torch.save(vocab, config.vocab) else: vocab = torch.load(config.vocab) config.update({ 'n_words': vocab.n_init, 'n_chars': vocab.n_chars, 'n_labels': vocab.n_labels, 'pad_index': vocab.pad_index, 'unk_index': vocab.unk_index }) print(vocab) print("Load the dataset") trainset = TextDataset(vocab.numericalize(train)) devset = TextDataset(vocab.numericalize(dev)) testset = TextDataset(vocab.numericalize(test)) # set the data loaders train_loader = batchify(trainset, config.batch_size, True) dev_loader = batchify(devset, config.batch_size) test_loader = batchify(testset, config.batch_size) print(f"{'train:':6} {len(trainset):5} sentences in total, " f"{len(train_loader):3} batches provided") print(f"{'dev:':6} {len(devset):5} sentences in total, " f"{len(dev_loader):3} batches provided") print(f"{'test:':6} {len(testset):5} sentences in total, " f"{len(test_loader):3} batches provided") print("Create the model") tagger = Tagger(config, vocab.embed).to(config.device) print(f"{tagger}\n") optimizer = Adam(tagger.parameters(), config.lr) model = Model(config, vocab, tagger, optimizer) total_time = timedelta() best_e, best_metric = 1, SpanF1Method(vocab) for epoch in range(1, config.epochs + 1): start = datetime.now() # train one epoch and update the parameters model.train(train_loader) print(f"Epoch {epoch} / {config.epochs}:") loss, train_metric = model.evaluate(train_loader) print(f"{'train:':6} Loss: {loss:.4f} {train_metric}") loss, dev_metric = model.evaluate(dev_loader) print(f"{'dev:':6} Loss: {loss:.4f} {dev_metric}") loss, test_metric = model.evaluate(test_loader) print(f"{'test:':6} Loss: {loss:.4f} {test_metric}") t = datetime.now() - start # save the model if it is the best so far if dev_metric > best_metric: best_e, best_metric = epoch, dev_metric model.tagger.save(config.model) print(f"{t}s elapsed (saved)\n") else: print(f"{t}s elapsed\n") total_time += t if epoch - best_e >= config.patience: break model.tagger = Tagger.load(config.model) loss, metric = model.evaluate(test_loader, config.punct) print(f"max score of dev is {best_metric.score:.2%} at epoch {best_e}") print(f"the score of test at epoch {best_e} is {metric.score:.2%}") print(f"average time of each epoch is {total_time / epoch}s") print(f"{total_time}s elapsed")
class Sarsa_Agent: def __init__(self, args, exp_model, logging_func): self.args = args # Exploration Model self.exp_model = exp_model # Experience Replay self.replay = ExpReplay(args.exp_replay_size, args.stale_limit, exp_model, args, priority=self.args.prioritized) # DQN and Target DQN model = get_models(args.model) self.dqn = model(actions=args.actions) self.target_dqn = model(actions=args.actions) dqn_params = 0 for weight in self.dqn.parameters(): weight_params = 1 for s in weight.size(): weight_params *= s dqn_params += weight_params print("DQN has {:,} parameters.".format(dqn_params)) self.target_dqn.eval() if args.gpu: print("Moving models to GPU.") self.dqn.cuda() self.target_dqn.cuda() # Optimizer self.optimizer = Adam(self.dqn.parameters(), lr=args.lr) self.T = 0 self.target_sync_T = -self.args.t_max self.prev_exp = None self.train_T = 0 def sync_target_network(self): for target, source in zip(self.target_dqn.parameters(), self.dqn.parameters()): target.data = source.data def act(self, state, epsilon, exp_model): # self.T += 1 self.dqn.eval() orig_state = state[:, :, -1:] state = torch.from_numpy(state).float().transpose_(0, 2).unsqueeze(0) q_values = self.dqn(Variable(state, volatile=True)).cpu().data[0] q_values_numpy = q_values.numpy() extra_info = {} extra_info["Q_Values"] = q_values_numpy if self.args.optimistic_init: if not self.args.ucb: for a in range(self.args.actions): _, info = exp_model.bonus(orig_state, a, dont_remember=True) action_pseudo_count = info["Pseudo_Count"] # TODO: Log the optimism bonuses q_values[a] += self.args.optimistic_scaler / np.sqrt(action_pseudo_count + 0.01) else: action_counts = [] for a in range(self.args.actions): _, info = exp_model.bonus(orig_state, a, dont_remember=True) action_pseudo_count = info["Pseudo_Count"] action_counts.append(action_pseudo_count) total_count = sum(action_counts) for ai, a in enumerate(action_counts): # TODO: Log the optimism bonuses q_values[ai] += self.args.optimistic_scaler * np.sqrt(2 * np.log(max(1, total_count)) / (a + 0.01)) if np.random.random() < epsilon: action = np.random.randint(low=0, high=self.args.actions) else: action = q_values.max(0)[1][0] # Torch... if self.args.force_low_count_action: # Calculate the counts for each actions for a in range(self.args.actions): _, info = exp_model.bonus(orig_state, a, dont_remember=True) action_pseudo_count = info["Pseudo_Count"] # Pick the first one out of simplicity if action_pseudo_count < self.args.min_action_count: action = a extra_info["Forced_Action"] = a break extra_info["Action"] = action return action, extra_info def experience(self, state, action, reward, state_next, steps, terminated, pseudo_reward=0, density=1, exploring=False): if not exploring: self.T += 1 if self.prev_exp is not None: s = self.prev_exp[0] a = self.prev_exp[1] r = self.prev_exp[2] sn = self.prev_exp[3] an = action steps = 1 tt = terminated pr = self.prev_exp[4] self.replay.Add_Exp(s, a, r, sn, an, steps, tt, pr, 1) self.prev_exp = (state, action, reward, state_next, pseudo_reward) def end_of_trajectory(self): self.replay.end_of_trajectory() self.prev_exp = None # self.replay.Clear() def train(self): if self.T - self.target_sync_T > self.args.target: self.sync_target_network() self.target_sync_T = self.T info = {} if self.T - self.train_T >= self.args.sarsa_train: for _ in range(self.args.sarsa_train): self.train_T = self.T self.dqn.eval() # TODO: Use a named tuple for experience replay n_step_sample = self.args.n_step batch, indices, is_weights = self.replay.Sample_N(self.args.batch_size, n_step_sample, self.args.gamma) columns = list(zip(*batch)) states = Variable(torch.from_numpy(np.array(columns[0])).float().transpose_(1, 3)) actions = Variable(torch.LongTensor(columns[1])) terminal_states = Variable(torch.FloatTensor(columns[5])) rewards = Variable(torch.FloatTensor(columns[2])) actions_next = Variable(torch.LongTensor(columns[6])) # Have to clip rewards for DQN rewards = torch.clamp(rewards, -1, 1) steps = Variable(torch.FloatTensor(columns[4])) new_states = Variable(torch.from_numpy(np.array(columns[3])).float().transpose_(1, 3)) target_dqn_qvals = self.target_dqn(new_states).cpu() # Make a new variable with those values so that these are treated as constants target_dqn_qvals_data = Variable(target_dqn_qvals.data) q_value_targets = (Variable(torch.ones(terminal_states.size()[0])) - terminal_states) inter = Variable(torch.ones(terminal_states.size()[0]) * self.args.gamma) # print(steps) q_value_targets = q_value_targets * torch.pow(inter, steps) q_value_targets = q_value_targets * target_dqn_qvals_data.gather(1, actions_next.view(-1, 1)) q_value_targets = q_value_targets + rewards self.dqn.train() if self.args.gpu: actions = actions.cuda() q_value_targets = q_value_targets.cuda() model_predictions = self.dqn(states).gather(1, actions.view(-1, 1)) # info = {} td_error = model_predictions - q_value_targets info["TD_Error"] = td_error.mean().data[0] # Update the priorities if not self.args.density_priority: self.replay.Update_Indices(indices, td_error.cpu().data.numpy(), no_pseudo_in_priority=self.args.count_td_priority) # If using prioritised we need to weight the td_error if self.args.prioritized and self.args.prioritized_is: # print(td_error) weights_tensor = torch.from_numpy(is_weights).float() weights_tensor = Variable(weights_tensor) if self.args.gpu: weights_tensor = weights_tensor.cuda() # print(weights_tensor) td_error = td_error * weights_tensor l2_loss = (td_error).pow(2).mean() info["Loss"] = l2_loss.data[0] # Update self.optimizer.zero_grad() l2_loss.backward() # Taken from pytorch clip_grad_norm # Remove once the pip version it up to date with source gradient_norm = clip_grad_norm(self.dqn.parameters(), self.args.clip_value) if gradient_norm is not None: info["Norm"] = gradient_norm self.optimizer.step() if "States" in info: states_trained = info["States"] info["States"] = states_trained + columns[0] else: info["States"] = columns[0] self.replay.Clear() return info
from torchfusion.gan.learners import * from torchfusion.gan.applications import StandardGenerator, StandardProjectionDiscriminator from torch.optim import Adam from torchfusion.datasets import mnist_loader import torch.cuda as cuda import torch.nn as nn G = StandardGenerator(output_size=(1, 32, 32), latent_size=128, num_classes=10) D = StandardProjectionDiscriminator(input_size=(1, 32, 32), num_classes=10) if cuda.is_available(): G = nn.DataParallel(G.cuda()) D = nn.DataParallel(D.cuda()) g_optim = Adam(G.parameters(), lr=0.0002, betas=(0.5, 0.999)) d_optim = Adam(D.parameters(), lr=0.0002, betas=(0.5, 0.999)) dataset = mnist_loader(size=32, batch_size=64) learner = StandardGanLearner(G, D) if __name__ == "__main__": learner.train(dataset, gen_optimizer=g_optim, disc_optimizer=d_optim, num_classes=10, model_dir="./mnist-gan", latent_size=128, batch_log=False)
train_df = pd.read_csv(f"data/{args.dataset}/preprocessed_data_train.csv", sep="\t") test_df = pd.read_csv(f"data/{args.dataset}/preprocessed_data_test.csv", sep="\t") # Student-wise train-val-test split user_ids = X[:, 0].toarray().flatten() users_test = test_df["user_id"].unique() users_train_val = train_df["user_id"].unique() split = int(0.8 * len(users_train_val)) users_train, users_val = users_train_val[:split], users_train_val[split:] train = X[np.where(np.isin(user_ids, users_train))] val = X[np.where(np.isin(user_ids, users_val))] test = X[np.where(np.isin(user_ids, users_test))] model = FeedForward(train.shape[1] - 5, args.hid_size, args.drop_prob).cuda() optimizer = Adam(model.parameters(), lr=args.lr) # Train param_str = f"{args.dataset}, features={features_suffix}" logger = Logger(os.path.join(args.logdir, param_str)) saver = Saver(args.savedir, param_str) train_ffw( train, val, model, optimizer, logger, saver, args.num_epochs, args.batch_size ) logger.close() model = saver.load() model.eval() pred_test = np.zeros(len(test_df)) for k in range(0, test.shape[0], args.batch_size): inputs, labels = get_tensors(test[k : k + args.batch_size])
OPTIM = Adam(params=[ { "params": MODEL.fpn.stem.parameters(), 'lr': 0.0001 }, { "params": MODEL.fpn.low_features.parameters(), 'lr': 0.00015 }, { "params": MODEL.fpn.mid_features.parameters(), 'lr': 0.00015 }, { "params": MODEL.fpn.top_features.parameters(), 'lr': 0.002 }, { "params": MODEL.cls_head.parameters(), 'lr': 0.0022 }, { "params": MODEL.base_classifier.parameters(), 'lr': 0.0025 }, { "params": MODEL.boxes_classifier.parameters(), 'lr': 0.0025 }, { "params": MODEL.final_classifier.parameters(), 'lr': 0.004 }, ])
class ModelPlain4(ModelBase): """Train with pixel loss""" def __init__(self, opt): super(ModelPlain4, self).__init__(opt) # ------------------------------------ # define network # ------------------------------------ self.netG = define_G(opt) self.netG = self.model_to_device(self.netG) """ # ---------------------------------------- # Preparation before training with data # Save model during training # ---------------------------------------- """ # ---------------------------------------- # initialize training # ---------------------------------------- def init_train(self): self.opt_train = self.opt['train'] # training option self.load() # load model self.netG.train() # set training mode,for BN self.define_loss() # define loss self.define_optimizer() # define optimizer self.define_scheduler() # define scheduler self.log_dict = OrderedDict() # log # ---------------------------------------- # load pre-trained G model # ---------------------------------------- def load(self): load_path_G = self.opt['path']['pretrained_netG'] if load_path_G is not None: print('Loading model for G [{:s}] ...'.format(load_path_G)) self.load_network(load_path_G, self.netG) # ---------------------------------------- # save model # ---------------------------------------- def save(self, iter_label): self.save_network(self.save_dir, self.netG, 'G', iter_label) # ---------------------------------------- # define loss # ---------------------------------------- def define_loss(self): G_lossfn_type = self.opt_train['G_lossfn_type'] if G_lossfn_type == 'l1': self.G_lossfn = nn.L1Loss().to(self.device) elif G_lossfn_type == 'l2': self.G_lossfn = nn.MSELoss().to(self.device) elif G_lossfn_type == 'l2sum': self.G_lossfn = nn.MSELoss(reduction='sum').to(self.device) elif G_lossfn_type == 'ssim': self.G_lossfn = SSIMLoss().to(self.device) else: raise NotImplementedError( 'Loss type [{:s}] is not found.'.format(G_lossfn_type)) self.G_lossfn_weight = self.opt_train['G_lossfn_weight'] # ---------------------------------------- # define optimizer # ---------------------------------------- def define_optimizer(self): G_optim_params = [] for k, v in self.netG.named_parameters(): if v.requires_grad: G_optim_params.append(v) else: print('Params [{:s}] will not optimize.'.format(k)) self.G_optimizer = Adam(G_optim_params, lr=self.opt_train['G_optimizer_lr'], weight_decay=0) # ---------------------------------------- # define scheduler, only "MultiStepLR" # ---------------------------------------- def define_scheduler(self): self.schedulers.append( lr_scheduler.MultiStepLR(self.G_optimizer, self.opt_train['G_scheduler_milestones'], self.opt_train['G_scheduler_gamma'])) """ # ---------------------------------------- # Optimization during training with data # Testing/evaluation # ---------------------------------------- """ # ---------------------------------------- # feed L/H data # ---------------------------------------- def feed_data(self, data, need_H=True): self.L = data['L'].to(self.device) # low-quality image self.k = data['k'].to(self.device) # blur kernel self.sf = np.int( data['sf'][0, ...].squeeze().cpu().numpy()) # scale factor self.sigma = data['sigma'].to(self.device) # noise level if need_H: self.H = data['H'].to(self.device) # H # ---------------------------------------- # update parameters and get loss # ---------------------------------------- def optimize_parameters(self, current_step): self.G_optimizer.zero_grad() self.E = self.netG(self.L, self.k, self.sf, self.sigma) G_loss = self.G_lossfn_weight * self.G_lossfn(self.E, self.H) G_loss.backward() # ------------------------------------ # clip_grad # ------------------------------------ # `clip_grad_norm` helps prevent the exploding gradient problem. G_optimizer_clipgrad = self.opt_train[ 'G_optimizer_clipgrad'] if self.opt_train[ 'G_optimizer_clipgrad'] else 0 if G_optimizer_clipgrad > 0: torch.nn.utils.clip_grad_norm_( self.parameters(), max_norm=self.opt_train['G_optimizer_clipgrad'], norm_type=2) self.G_optimizer.step() # ------------------------------------ # regularizer # ------------------------------------ G_regularizer_orthstep = self.opt_train[ 'G_regularizer_orthstep'] if self.opt_train[ 'G_regularizer_orthstep'] else 0 if G_regularizer_orthstep > 0 and current_step % G_regularizer_orthstep == 0 and current_step % self.opt[ 'train']['checkpoint_save'] != 0: self.netG.apply(regularizer_orth) G_regularizer_clipstep = self.opt_train[ 'G_regularizer_clipstep'] if self.opt_train[ 'G_regularizer_clipstep'] else 0 if G_regularizer_clipstep > 0 and current_step % G_regularizer_clipstep == 0 and current_step % self.opt[ 'train']['checkpoint_save'] != 0: self.netG.apply(regularizer_clip) self.log_dict['G_loss'] = G_loss.item() #/self.E.size()[0] # ---------------------------------------- # test / inference # ---------------------------------------- def test(self): self.netG.eval() with torch.no_grad(): self.E = self.netG(self.L, self.k, self.sf, self.sigma) self.netG.train() # ---------------------------------------- # get log_dict # ---------------------------------------- def current_log(self): return self.log_dict # ---------------------------------------- # get L, E, H image # ---------------------------------------- def current_visuals(self, need_H=True): out_dict = OrderedDict() out_dict['L'] = self.L.detach()[0].float().cpu() out_dict['E'] = self.E.detach()[0].float().cpu() if need_H: out_dict['H'] = self.H.detach()[0].float().cpu() return out_dict # ---------------------------------------- # get L, E, H batch images # ---------------------------------------- def current_results(self, need_H=True): out_dict = OrderedDict() out_dict['L'] = self.L.detach().float().cpu() out_dict['E'] = self.E.detach().float().cpu() if need_H: out_dict['H'] = self.H.detach().float().cpu() return out_dict """ # ---------------------------------------- # Information of netG # ---------------------------------------- """ # ---------------------------------------- # print network # ---------------------------------------- def print_network(self): msg = self.describe_network(self.netG) print(msg) # ---------------------------------------- # print params # ---------------------------------------- def print_params(self): msg = self.describe_params(self.netG) print(msg) # ---------------------------------------- # network information # ---------------------------------------- def info_network(self): msg = self.describe_network(self.netG) return msg # ---------------------------------------- # params information # ---------------------------------------- def info_params(self): msg = self.describe_params(self.netG) return msg
def project_unseen_tensor(model, test_data, num_visits, num_feats, pos_prior, reg_weight, smooth_weight, lr, seed, batch_size, smooth_shape, iters, num_workers=5): if seed is not None: torch.manual_seed(seed) # set up the projector projector = LogisticPARAFAC2(num_visits, num_feats, model.rank, alpha=model.alpha, gamma=model.gamma, is_projector=True) projector.V.data = model.V.data.clone() projector.V.requires_grad = False projector.Phi.data = model.Phi.data.clone() projector.cuda() smoothness = SmoothnessConstraint(beta=smooth_shape) tf_loss_func = PULoss(prior=pos_prior) optimizer = Adam([projector.U, projector.S], lr=lr) collator_train = PaddedDenseTensor(test_data, num_feats, subset='train') data_loader = DataLoader(TensorDataset(torch.arange(len(num_visits))), shuffle=True, num_workers=num_workers, batch_size=batch_size, collate_fn=collator_train) pbar = tqdm(total=iters, desc=f'Projecting unseen test tensor onto factor matrices') for epoch in range(iters): epoch_tf_loss = AverageMeter() epoch_uni_reg = AverageMeter() for pids, Xdense, masks, deltas in data_loader: num_visits_batch = masks.squeeze(-1).sum(dim=1).cuda() num_visits_batch, pt_idx = num_visits_batch.sort(descending=True) pids = pids[pt_idx].cuda() Xdense = Xdense[pt_idx].cuda() masks = masks[pt_idx].cuda() deltas = deltas[pt_idx].cuda() / 7 optimizer.zero_grad() output = projector(pids) loss, out = tf_loss_func(output, Xdense, masks=masks) uni_reg = projector.uniqueness_regularization(pids) out = out + reg_weight * uni_reg smoothness_reg = smoothness(projector.U[pids], num_visits_batch, deltas) out = out + smooth_weight * smoothness_reg out.backward() optimizer.step() projector.projection() epoch_tf_loss.update(loss.item(), n=masks.sum()) epoch_uni_reg.update(uni_reg.item(), n=pids.shape[0]) pbar.update() pbar.set_description(f'Projection done.') pbar.close() return projector.cpu()
def train_logistic_parafac2(indata, num_visits, num_feats, log_path, pos_prior, reg_weight, smooth_weight, rank, weight_decay, alpha, gamma, lr, seed, batch_size, smooth_shape, iters, patience, num_workers=5): if seed is not None: torch.manual_seed(seed) model = LogisticPARAFAC2(num_visits, num_feats, rank, alpha=alpha, gamma=gamma).cuda() smoothness = SmoothnessConstraint(beta=smooth_shape) tf_loss_func = PULoss(prior=pos_prior) optimizer_pt_reps = Adam([model.U, model.S], lr=lr, weight_decay=weight_decay) optimizer_phenotypes = Adam([model.V], lr=lr, weight_decay=weight_decay) lr_scheduler_pt_reps = ReduceLROnPlateau(optimizer_pt_reps, mode='max', cooldown=10, min_lr=1e-6) lr_scheduler_phenotypes = ReduceLROnPlateau(optimizer_phenotypes, mode='max', cooldown=10, min_lr=1e-6) writer = SummaryWriter(log_path) collators = [ PaddedDenseTensor(indata, num_feats, subset=subset) for subset in ('train', 'validation', 'test') ] loaders = [ DataLoader(TensorDataset(torch.arange(len(num_visits))), shuffle=True, num_workers=num_workers, batch_size=batch_size, collate_fn=collator) for collator in collators ] train_loader, valid_loader, test_loader = loaders early_stopping = EarlyStopping(patience=patience) for epoch in range(iters): epoch_tf_loss = AverageMeter() epoch_uni_reg = AverageMeter() pbar = tqdm(total=len(train_loader), desc=f'Epoch {epoch+1}') lr = optimizer_pt_reps.param_groups[0]['lr'] for pids, Xdense, masks, deltas in train_loader: num_visits_batch = masks.squeeze(-1).sum(dim=1).cuda() num_visits_batch, pt_idx = num_visits_batch.sort(descending=True) pids = pids[pt_idx].cuda() Xdense = Xdense[pt_idx].cuda() masks = masks[pt_idx].cuda() deltas = deltas[pt_idx].cuda() / 7 # transform days to weeks # update U & S model.S.requires_grad = True model.U.requires_grad = True model.V.requires_grad = False optimizer_pt_reps.zero_grad() output = model(pids) loss, out = tf_loss_func(output, Xdense, masks=masks) uni_reg = model.uniqueness_regularization(pids) out = out + reg_weight * uni_reg smoothness_reg = smoothness(model.U[pids], num_visits_batch, deltas=deltas) out = out + smooth_weight * smoothness_reg out.backward() optimizer_pt_reps.step() model.projection() epoch_uni_reg.update(uni_reg.item(), n=pids.shape[0]) # update V model.S.requires_grad = False model.U.requires_grad = False model.V.requires_grad = True optimizer_phenotypes.zero_grad() output = model(pids) loss, out = tf_loss_func(output, Xdense, masks=masks) out.backward() optimizer_phenotypes.step() model.projection() epoch_tf_loss.update(loss.item(), n=masks.sum()) pbar.update() model.update_phi() ap_valid = validate(model, valid_loader) lr_scheduler_pt_reps.step(ap_valid) lr_scheduler_phenotypes.step(ap_valid) pbar.set_description(f'Epoch {epoch+1}: loss={epoch_tf_loss.avg:.5e}, ' f'uni_reg={epoch_uni_reg.avg:.5e}' f', lr={lr:.2e}' f', completion@valid: PR-AUC={ap_valid:.3f}') pbar.close() writer.add_scalar('training/loss', epoch_tf_loss.avg, epoch + 1) writer.add_scalar('training/uniqueness_regularization', epoch_uni_reg.avg, epoch + 1) writer.add_scalar('validation/completion-AP', ap_valid, epoch + 1) # early stopping if early_stopping(ap_valid, model): print('Early Stopped.') break model = early_stopping.best_model print('Model with best validation performance is restored.') ap_valid = validate(model, valid_loader) print(f'Best PR-AUC for completion@validation set: {ap_valid:.3f}') ap_test = validate(model, test_loader) print(f'PR-AUC for completion@test set: {ap_test:.3f}\n\n') return model.cpu(), ap_valid, ap_test
def experiment(exp_specs): ptu.set_gpu_mode(exp_specs['use_gpu']) # Set up logging ---------------------------------------------------------- exp_id = exp_specs['exp_id'] exp_prefix = exp_specs['exp_name'] seed = exp_specs['seed'] set_seed(seed) setup_logger(exp_prefix=exp_prefix, exp_id=exp_id, variant=exp_specs) # Prep the data ----------------------------------------------------------- path = 'junk_vis/debug_att_vae_shallower_48_64_dim_0p1_kl_stronger_seg_conv' (X_train, Y_train), (X_test, Y_test) = multi_mnist(path, max_digits=2, canvas_size=48, seed=42, use_max=False) convert_dict = {0: [0., 0.], 1: [1., 0.], 2: [1., 1.]} Num_train = np.array([convert_dict[a.shape[0]] for a in Y_train]) Num_test = np.array([convert_dict[a.shape[0]] for a in Y_test]) X_train = X_train[:, None, ...] X_test = X_test[:, None, ...] X_train, X_test = torch.FloatTensor(X_train) / 255.0, torch.FloatTensor( X_test) / 255.0 mask_train, mask_test = torch.FloatTensor(Num_train), torch.FloatTensor( Num_test) train_ds = TensorDataset(X_train, Num_train) val_ds = TensorDataset(X_test, Num_test) # Model Definition -------------------------------------------------------- model = AttentiveVAE([1, 48, 48], exp_specs['vae_specs']['z_dim'], exp_specs['vae_specs']['x_encoder_specs'], exp_specs['vae_specs']['z_seg_conv_specs'], exp_specs['vae_specs']['z_seg_fc_specs'], exp_specs['vae_specs']['z_obj_conv_specs'], exp_specs['vae_specs']['z_obj_fc_specs'], exp_specs['vae_specs']['z_seg_recon_fc_specs'], exp_specs['vae_specs']['z_seg_recon_upconv_specs'], exp_specs['vae_specs']['z_obj_recon_fc_specs'], exp_specs['vae_specs']['z_obj_recon_upconv_specs'], exp_specs['vae_specs']['recon_upconv_part_specs']) if ptu.gpu_enabled(): model.cuda() # Optimizer --------------------------------------------------------------- model_optim = Adam(model.parameters(), lr=float(exp_specs['model_lr']), weight_decay=float(exp_specs['model_wd'])) # ------------------------------------------------------------------------- global_iter = 0 for epoch in range(exp_specs['epochs']): train_loader = DataLoader(train_ds, batch_size=exp_specs['batch_size'], shuffle=True, num_workers=4, pin_memory=False, drop_last=True) for iter_num, img_batch in enumerate(train_loader): img_batch, num_batch = img_batch[0], img_batch[1] if ptu.gpu_enabled(): img_batch = img_batch.cuda() what_means, what_log_covs, where_means, where_log_covs, masks, recon_mean, recon_log_cov = model( img_batch, num_batch) elbo, KL = model.compute_ELBO(what_means + where_means, what_log_covs + where_log_covs, recon_mean, recon_log_cov, img_batch, average_over_batch=True) loss = -1. * elbo loss = loss + 1. * sum([m.mean() for m in masks]) loss.backward() model_optim.step() if global_iter % exp_specs['freq_val'] == 0: with torch.no_grad(): print('\nValidating Iter %d...' % global_iter) model.eval() idxs = np.random.choice(int(X_test.size(0)), size=exp_specs['batch_size'], replace=False) img_batch, num_batch = X_test[idxs], Num_test[idxs] if ptu.gpu_enabled(): img_batch = img_batch.cuda() what_means, what_log_covs, where_means, where_log_covs, masks, recon_mean, recon_log_cov = model( img_batch, num_batch) elbo, KL = model.compute_ELBO(what_means + where_means, what_log_covs + where_log_covs, recon_mean, recon_log_cov, img_batch, average_over_batch=True) mse = ((recon_mean - img_batch)**2).mean() print('ELBO:\t%.4f' % elbo) print('MSE:\t%.4f' % mse) print('KL:\t%.4f' % KL) for i in range(1): save_pytorch_tensor_as_img( img_batch[i].data.cpu(), os.path.join(path, '%d_%d_img.png' % (global_iter, i))) save_pytorch_tensor_as_img( recon_mean[i].data.cpu(), os.path.join(path, '%d_%d_recon.png' % (global_iter, i))) save_pytorch_tensor_as_img( masks[0][i].data.cpu(), os.path.join(path, '%d_%d_mask_0.png' % (global_iter, i))) # save_pytorch_tensor_as_img(masks[1][i].data.cpu(), os.path.join(path, '%d_%d_mask_1.png'%(global_iter, i))) model.train() global_iter += 1
test_set = datasets.ImageFolder(data_dir2, transform = test_transformations) test_loader = DataLoader(test_set, batch_size = batch_size, shuffle = False, num_workers = 4) #check gpu support is available cuda_avail =torch.cuda.is_available() #create optimizer, model and lossfunction model = Custom_class(num_classes = 2) if cuda_avail: model.cuda() optimizer = Adam(model.parameters(), lr = 0.001, weight_decay = 0.0001) loss_fn = nn.CrossEntropyLoss() def lr_rate(epoch): lr = 0.001 if(epoch > 5): lr = lr/5 elif(epoch > 8): lr = lr/8 for param_group in optimizer.param_groups: param_group["lr"] = lr def save_models(epoch): torch.save(model.state_dict, "orangemodel_{}.model".format(epoch)) print("checkpoint saved")
def main(): if not os.path.exists(os.path.join("saved", model_name)): os.makedirs(os.path.join("saved", model_name)) data_path = 'Graph/GAMENet_master/data/records_final.pkl' voc_path = 'Graph/GAMENet_master/data/voc_final.pkl' ehr_adj_path = 'Graph/GAMENet_master/data/ehr_adj_final.pkl' ddi_adj_path = 'Graph/GAMENet_master/data/ddi_A_final.pkl' device = torch.device('cuda:0') ehr_adj = dill.load(open(ehr_adj_path, 'rb')) ddi_adj = dill.load(open(ddi_adj_path, 'rb')) data = dill.load(open(data_path, 'rb')) voc = dill.load(open(voc_path, 'rb')) diag_voc, pro_voc, med_voc = voc['diag_voc'], voc['pro_voc'], voc['med_voc'] split_point = int(len(data) * 2 / 3) data_train = data[:split_point] eval_len = int(len(data[split_point:]) / 2) data_test = data[split_point:split_point + eval_len] data_eval = data[split_point+eval_len:] EPOCH = 40 LR = 0.0002 TEST = args.eval Neg_Loss = args.ddi DDI_IN_MEM = args.ddi TARGET_DDI = 0.05 T = 0.5 decay_weight = 0.85 voc_size = (len(diag_voc.idx2word), len(pro_voc.idx2word), len(med_voc.idx2word)) model = GAMENet(voc_size, ehr_adj, ddi_adj, emb_dim=64, device=device, ddi_in_memory=DDI_IN_MEM) if TEST: model.load_state_dict(torch.load(open(resume_name, 'rb'))) model.to(device=device) print('parameters', get_n_params(model)) optimizer = Adam(list(model.parameters()), lr=LR) if TEST: eval(model, data_test, voc_size, 0) else: history = defaultdict(list) best_epoch = 0 best_ja = 0 for epoch in range(EPOCH): loss_record1 = [] start_time = time.time() model.train() prediction_loss_cnt = 0 neg_loss_cnt = 0 for step, input in enumerate(data_train): for idx, adm in enumerate(input): seq_input = input[:idx+1] loss1_target = np.zeros((1, voc_size[2])) loss1_target[:, adm[2]] = 1 loss3_target = np.full((1, voc_size[2]), -1) for idx, item in enumerate(adm[2]): loss3_target[0][idx] = item target_output1, batch_neg_loss = model(seq_input) loss1 = F.binary_cross_entropy_with_logits(target_output1, torch.FloatTensor(loss1_target).to(device)) loss3 = F.multilabel_margin_loss(F.sigmoid(target_output1), torch.LongTensor(loss3_target).to(device)) if Neg_Loss: target_output1 = F.sigmoid(target_output1).detach().cpu().numpy()[0] target_output1[target_output1 >= 0.5] = 1 target_output1[target_output1 < 0.5] = 0 y_label = np.where(target_output1 == 1)[0] current_ddi_rate = ddi_rate_score([[y_label]]) if current_ddi_rate <= TARGET_DDI: loss = 0.9 * loss1 + 0.01 * loss3 prediction_loss_cnt += 1 else: rnd = np.exp((TARGET_DDI - current_ddi_rate)/T) if np.random.rand(1) < rnd: loss = batch_neg_loss neg_loss_cnt += 1 else: loss = 0.9 * loss1 + 0.01 * loss3 prediction_loss_cnt += 1 else: loss = 0.9 * loss1 + 0.01 * loss3 optimizer.zero_grad() loss.backward(retain_graph=True) optimizer.step() loss_record1.append(loss.item()) llprint('\rTrain--Epoch: %d, Step: %d/%d, L_p cnt: %d, L_neg cnt: %d' % (epoch, step, len(data_train), prediction_loss_cnt, neg_loss_cnt)) # annealing T *= decay_weight ddi_rate, ja, prauc, avg_p, avg_r, avg_f1 = eval(model, data_eval, voc_size, epoch) history['ja'].append(ja) history['ddi_rate'].append(ddi_rate) history['avg_p'].append(avg_p) history['avg_r'].append(avg_r) history['avg_f1'].append(avg_f1) history['prauc'].append(prauc) end_time = time.time() elapsed_time = (end_time - start_time) / 60 llprint('\tEpoch: %d, Loss: %.4f, One Epoch Time: %.2fm, Appro Left Time: %.2fh\n' % (epoch, np.mean(loss_record1), elapsed_time, elapsed_time * ( EPOCH - epoch - 1)/60)) torch.save(model.state_dict(), open( os.path.join('saved', model_name, 'Epoch_%d_JA_%.4f_DDI_%.4f.model' % (epoch, ja, ddi_rate)), 'wb')) print('') if epoch != 0 and best_ja < ja: best_epoch = epoch best_ja = ja dill.dump(history, open(os.path.join('saved', model_name, 'history.pkl'), 'wb')) # test torch.save(model.state_dict(), open( os.path.join('saved', model_name, 'final.model'), 'wb')) print('best_epoch:', best_epoch)
class BaseDDPGAgent(BaseAgent): def __init__(self, observation_space, action_space, actor_lr=1e-4, critic_lr=1e-3, gamma=0.99, tau=1e-2): super(BaseDDPGAgent, self).__init__(observation_space, action_space) self.actor = DDPGActorNet(observation_space.shape[0], action_space.shape[0], 256) self.target_actor = deepcopy(self.actor) self.actor_optim = Adam(self.actor.parameters(), lr=actor_lr) self.critic = DDPGCriticNet(observation_space.shape[0], action_space.shape[0], 256) self.target_critic = deepcopy(self.critic) self.critic_optim = Adam(self.critic.parameters(), lr=critic_lr) self.gamma = gamma self.tau = tau self.noise = OUNoise(self.action_space) # Internal vars self._step = 0 @property def models(self): return [ self.actor, self.target_actor, self.critic, self.target_critic ] @property def checkpoint(self): return { 'actor': self.actor.state_dict(), 'critic': self.critic.state_dict(), } @checkpoint.setter def checkpoint(self, cp): self.actor.load_state_dict(cp['actor']) self.critic.load_state_dict(cp['critic']) self.target_actor = deepcopy(self.actor) self.target_critic = deepcopy(self.critic) def act(self, obs, **kwargs): obs_tensor = self.obs_to_tensor(obs) action = self.actor(obs_tensor) action = action.cpu().detach().numpy() action = self.noise.get_action(action, self._step) action = self.clip_action(action) self._step += 1 return np.expand_dims(action, axis=1) def clip_action(self, action: np.array): low_bound = self.action_space.low upper_bound = self.action_space.high action = low_bound + (action + 1.0) * 0.5 * (upper_bound - low_bound) action = np.clip(action, low_bound, upper_bound) return action def learn(self, obs, action, reward, next_obs, done, **kwargs): actor_loss = - self.critic(obs, self.actor(obs)).mean() next_action = self.target_actor(next_obs).detach() current_q = self.critic(obs, action) target_q = reward + (1.0 - done.float()) * self.gamma * self.target_critic(next_obs, next_action) # pylint: disable=line-too-long critic_loss = F.mse_loss(current_q, target_q.detach()) self.actor_optim.zero_grad() actor_loss.backward() self.actor_optim.step() self.critic_optim.zero_grad() critic_loss.backward() self.critic_optim.step() polyak_average_(self.actor, self.target_actor, self.tau) polyak_average_(self.critic, self.target_critic, self.tau) return actor_loss.detach().cpu().item(), \ critic_loss.detach().cpu().item() def reset(self): self.noise.reset() self._step = 0
cond_mask[0,:,(object_y-object_h):(object_y+object_h),(object_x-object_w):(object_x+object_w)] = 1 flow_mask = 1-cond_mask a_old = toCuda(torch.zeros(1,1,h,w)) p_old = toCuda(torch.zeros(1,1,h,w)) return v_cond,cond_mask,flow_mask,a_old,p_old # initialize flow_v (this is the variable, we want to optimize such that we reach the target frequency) start_v = 0.3 if params.cuda: flow_v = torch.ones(1,1,1,1,requires_grad=True,device="cuda") else: flow_v = torch.ones(1,1,1,1,requires_grad=True) # initialize optimizer optim = Adam([flow_v],lr=0.2) E_fft_ys = [] # to obtain smoother gradients, we scale the v_y(t) curve with a gaussian velocity_y_curve_scaler = torch.exp(-((toCuda(torch.arange(n_time_steps).unsqueeze(1))-n_time_steps/2)/n_time_steps*4)**2) # optimization loop: for epoch in range(200): v_cond,cond_mask,flow_mask,a_old,p_old = get_problem(w,h) v_cond = normal2staggered(v_cond) cond_mask_mac = (normal2staggered(cond_mask.repeat(1,2,1,1))==1).float() flow_mask_mac = (normal2staggered(flow_mask.repeat(1,2,1,1))>=0.5).float() # warm up simulation with n_warmup_time_steps with torch.no_grad():
def train(args): device = torch.device("cuda" if args.cuda else "cpu") np.random.seed(args.seed) torch.manual_seed(args.seed) transform = transforms.Compose([ transforms.Resize(args.image_size), transforms.CenterCrop(args.image_size), transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)) ]) train_dataset = datasets.ImageFolder(args.dataset, transform) train_loader = DataLoader(train_dataset, batch_size=args.batch_size) transformer = TransformerNet().to(device) optimizer = Adam(transformer.parameters(), args.lr) mse_loss = torch.nn.MSELoss() vgg = Vgg16(requires_grad=False).to(device) style_transform = transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)) ]) style = utils.load_image(args.style_image, size=args.style_size) style = style_transform(style) style = style.repeat(args.batch_size, 1, 1, 1).to(device) features_style = vgg(utils.normalize_batch(style)) gram_style = [utils.gram_matrix(y) for y in features_style] for e in range(args.epochs): transformer.train() agg_content_loss = 0. agg_style_loss = 0. count = 0 for batch_id, (x, _) in enumerate(train_loader): n_batch = len(x) count += n_batch optimizer.zero_grad() x = x.to(device) y = transformer(x) y = utils.normalize_batch(y) x = utils.normalize_batch(x) features_y = vgg(y) features_x = vgg(x) content_loss = args.content_weight * mse_loss(features_y.relu2_2, features_x.relu2_2) style_loss = 0. for ft_y, gm_s in zip(features_y, gram_style): gm_y = utils.gram_matrix(ft_y) style_loss += mse_loss(gm_y, gm_s[:n_batch, :, :]) style_loss *= args.style_weight total_loss = content_loss + style_loss total_loss.backward() optimizer.step() agg_content_loss += content_loss.item() agg_style_loss += style_loss.item() if (batch_id + 1) % args.log_interval == 0: mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.6f}\tstyle: {:.6f}\ttotal: {:.6f}".format( time.ctime(), e + 1, count, len(train_dataset), agg_content_loss / (batch_id + 1), agg_style_loss / (batch_id + 1), (agg_content_loss + agg_style_loss) / (batch_id + 1) ) print(mesg) if args.checkpoint_model_dir is not None and (batch_id + 1) % args.checkpoint_interval == 0: transformer.eval().cpu() ckpt_model_filename = "ckpt_epoch_" + str(e) + "_batch_id_" + str(batch_id + 1) + ".pth" ckpt_model_path = os.path.join(args.checkpoint_model_dir, ckpt_model_filename) torch.save(transformer.state_dict(), ckpt_model_path) transformer.to(device).train() # save model transformer.eval().cpu() save_model_filename = "epoch_" + str(args.epochs) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str( args.content_weight) + "_" + str(args.style_weight) + ".model" save_model_path = os.path.join(args.save_model_dir, save_model_filename) torch.save(transformer.state_dict(), save_model_path) print("\nDone, trained model saved at", save_model_path)
'n_layers': args.n_layers, 'emb_size': emb_size, 'dim_m': args.model_dim, 'n_heads': args.n_heads, 'dim_i': args.inner_dim, 'dropout': args.dropout, 'embedding_weights': embeddings } model = TransformerSummarizer(**m_args).to(device) m_args['embedding_weights'] = None optimizer = Adam(model.learnable_parameters(), lr=args.learning_rate, amsgrad=True, betas=[0.9, 0.98], eps=1e-9) logging.info('Start training') for i in range(args.iters): try: train_batch = loader.next_batch(args.train_bs, 'train', device) loss, seq = model.train_step(train_batch, optimizer) if i % args.train_interval == 0: logging.info('Iteration %d; Loss: %f', i, loss) writer.add_scalar('Loss', loss, i) if i % args.train_sample_interval == 0: text = loader.decode_raw(train_batch.src)[0]
class Trainer: def __init__(self, model, validation_dataloader, n_epochs, lr, n_primary, custom_loss, gamma2, class_balance=False, spw_mode=False, l1=0., l2=0.): self.model = model self.validation_dataloader = validation_dataloader self.lr = lr self.optimizer = Adam(self.model.parameters(), self.lr) # self.model, self.optimizer = amp.initialize(self.model, self.optimizer, opt_level='O0', loss_scale=1.0)#'dynamic' self.scheduler = CosineAnnealingLR(self.optimizer, T_max=10, eta_min=0, last_epoch=-1) self.n_epochs = n_epochs self.module_names = self.validation_dataloader.dataset.module_names self.n_primary = n_primary self.custom_loss = custom_loss self.gamma2 = gamma2 self.custom_loss_fn = dict(none=None, cox=CoxLoss())[self.custom_loss] self.class_balance = class_balance self.SPWMode = spw_mode self.l1 = l1 self.l2 = l2 self.construct_plots = False def compute_custom_loss(self, y_pred_caps, y_true, y_true_orig): if self.custom_loss == 'none': return 0. else: loss = self.custom_loss_fn(y_pred_caps, y_true, y_true_orig) return loss def initialize_dirs(self): for d in [ 'figures/{}'.format(x) for x in [ 'embedding_primarycaps_aligned', 'embedding_primarycaps', 'embedding_primarycaps_cat', 'embedding_outputcaps' ] ]: os.makedirs(d, exist_ok=True) os.makedirs('results/routing_weights', exist_ok=True) #@pysnooper.snoop('fit_model.log') def fit(self, dataloader): if not self.SPWMode: self.initialize_dirs() if self.class_balance: self.weights = torch.tensor(compute_class_weight( 'balanced', np.arange(len(dataloader.dataset.binarizer.classes_)), np.argmax(dataloader.dataset.y, axis=1)), dtype=torch.float) if not self.SPWMode: self.weights = torch.vstack([self.weights] * dataloader.batch_size) if torch.cuda.is_available(): self.weights = self.weights.cuda() else: self.weights = 1. self.losses = dict(train=[], val=[]) best_model = self.model self.val_losses = [] for epoch in range(self.n_epochs): self.epoch = epoch self.losses['train'].append(self.train_loop(dataloader)) val_loss = self.val_test_loop(self.validation_dataloader)[0] self.val_losses.append(val_loss[0]) self.losses['val'].append(val_loss) if val_loss[0] <= min(self.val_losses): best_model = copy.deepcopy(self.model) self.model = best_model return self def predict(self, dataloader): self.initialize_dirs() self.epoch = 'Test' test_loss, Y = self.val_test_loop(dataloader) return Y #@pysnooper.snoop('train_loop.log') def train_loop(self, dataloader): self.model.train(True) running_loss = 0. Y = {'true': [], 'pred': []} n_batch = (len(dataloader.dataset.y_orig) // dataloader.batch_size) for i, batch in enumerate(dataloader): x_orig = batch[0] #print(x_orig) y_true = batch[-1] #[-2] #y_true_orig=batch[-1] module_x = batch[1:-1] #2] if torch.cuda.is_available(): x_orig = x_orig.cuda() y_true = y_true.cuda() #y_true_orig=y_true_orig.cuda() module_x = [mod.cuda() for mod in module_x ] if not self.SPWMode else module_x[0].cuda() if not self.SPWMode: x_orig, x_hat, y_pred, embedding, primary_caps_out = self.model( x_orig, module_x) loss, margin_loss, recon_loss = self.model.calculate_loss( x_orig, x_hat, y_pred, y_true, weights=self.weights) else: y_true = y_true.argmax(1) y_pred, _ = self.model(x_orig, module_x) loss = self.model.calculate_loss(y_pred, y_true) margin_loss = loss loss = loss + self.model.calc_elastic_norm_loss( self.l1, self.l2, module_x) #loss=loss+self.gamma2*self.compute_custom_loss(y_pred, y_true, y_true_orig) self.optimizer.zero_grad() loss.backward() # with amp.scale_loss(loss,self.optimizer) as scaled_loss, detect_anomaly(): # scaled_loss.backward() #loss.backward() self.optimizer.step() if not self.SPWMode: Y['true'].extend( y_true.argmax(1).detach().cpu().numpy().flatten().astype( int).tolist()) Y['pred'].extend( F.softmax(torch.sqrt((y_pred**2).sum(2))).argmax(1).detach( ).cpu().numpy().astype(int).flatten().tolist()) else: Y['true'].extend( y_true.detach().cpu().numpy().flatten().astype( int).tolist()) Y['pred'].extend( y_pred.argmax(1).detach().cpu().numpy().flatten().astype( int).tolist()) train_loss = margin_loss.item() #print(loss) print('Epoch {} [{}/{}]: Train Loss {}'.format( self.epoch, i, n_batch, train_loss)) running_loss += train_loss #y_true,y_pred=Y['true'],Y['pred'] running_loss /= (i + 1) print('Epoch {}: Train Loss {}, Train R2: {}, Train MAE: {}'.format( self.epoch, running_loss, r2_score(Y['true'], Y['pred']), mean_absolute_error(Y['true'], Y['pred']))) print(classification_report(Y['true'], Y['pred'])) #print(capsnet.primary_caps.get_weights()) self.scheduler.step() return running_loss # @pysnooper.snoop('val_loop.log') def val_test_loop(self, dataloader): self.model.train(False) running_loss = np.zeros((3, )).astype(float) n_batch = int( np.ceil(len(dataloader.dataset.y_orig) / dataloader.batch_size)) Y = { 'true': [], 'pred': [], 'embedding_primarycaps_aligned': [], 'embedding_primarycaps': [], 'embedding_primarycaps_cat': [], 'embedding_outputcaps': [], 'routing_weights': [], 'z': [] } with torch.no_grad(): for i, batch in enumerate(dataloader): x_orig = batch[0] y_true = batch[-1] #2 #y_true_orig=batch[-1] module_x = batch[1:-1] #2 if torch.cuda.is_available(): x_orig = x_orig.cuda() y_true = y_true.cuda() #y_true_orig=y_true_orig.cuda() module_x = [mod.cuda() for mod in module_x ] if not self.SPWMode else module_x[0].cuda() if not self.SPWMode: x_orig, x_hat, y_pred, embedding, primary_caps_out = self.model( x_orig, module_x) loss, margin_loss, recon_loss = self.model.calculate_loss( x_orig, x_hat, y_pred, y_true, weights=self.weights) else: y_true = y_true.argmax(1) y_pred, Z = self.model(x_orig, module_x) loss = self.model.calculate_loss(y_pred, y_true) margin_loss = loss recon_loss = self.model.calc_elastic_norm_loss( self.l1, self.l2, module_x) loss = loss + recon_loss #loss=loss+self.gamma2*self.compute_custom_loss(y_pred, y_true, y_true_orig) val_loss = margin_loss.item() #print(loss) print('Epoch {} [{}/{}]: Val Loss {}, Recon/Elastic Loss {}'. format(self.epoch, i, n_batch, val_loss, recon_loss)) running_loss = running_loss + np.array( [loss.item( ), margin_loss.item( ), recon_loss.item()] if not self.SPWMode else [val_loss, margin_loss.item( ), recon_loss.item()]) if not self.SPWMode: routing_coefs = self.model.caps_output_layer.return_routing_coef( ).detach().cpu().numpy() #print(routing_coefs.shape) routing_coefs = routing_coefs[..., 0, 0] #print(routing_coefs.shape) Y['routing_weights'].append( routing_coefs ) #pd.DataFrame(routing_coefs.T,index=dataloader.dataset.binarizer.classes_,columns=dataloader.dataset.module_names) Y['embedding_primarycaps'].append( torch.cat([ primary_caps_out[i] for i in range(x_orig.size(0)) ], dim=0).detach().cpu().numpy()) primary_caps_out = primary_caps_out.view( primary_caps_out.size(0), primary_caps_out.size(1) * primary_caps_out.size(2)) Y['embedding_outputcaps'].append( embedding.detach().cpu().numpy()) Y['embedding_primarycaps_cat'].append( primary_caps_out.detach().cpu().numpy()) primary_caps_aligned = self.model.caps_output_layer.return_embedding_previous_layer( ) Y['embedding_primarycaps_aligned'].append( primary_caps_aligned.detach().cpu().numpy() ) # [...,0,:] torch.cat([primary_caps_aligned[i] for i in range(x_orig.size(0))],dim=0) Y['true'].extend( y_true.argmax(1).detach().cpu().numpy().astype( int).flatten().tolist()) Y['pred'].extend((y_pred**2).sum(2).argmax(1).detach().cpu( ).numpy().astype(int).flatten().tolist()) else: Y['true'].extend( y_true.detach().cpu().numpy().flatten().astype( int).tolist()) Y['pred'].extend( y_pred.argmax(1).detach().cpu().numpy().flatten(). astype(int).tolist()) Y['z'].append(Z.detach().cpu().numpy()) running_loss /= (i + 1) if not self.SPWMode: #Y['routing_weights'].iloc[:,:]=Y['routing_weights'].values/(i+1) rw = np.concatenate(Y['routing_weights'], axis=0) #print(rw.shape) Y['routing_weights'] = xr.DataArray( rw, coords={ 'sample': dataloader.dataset.sample_names, 'primary_capsules': dataloader.dataset.module_names, 'output_capsules': dataloader.dataset.binarizer.classes_ }, dims={ 'sample': len(dataloader.dataset.sample_names), 'primary_capsules': len(dataloader.dataset.module_names), 'output_capsules': len(dataloader.dataset.binarizer.classes_) }) Y['embedding_primarycaps_aligned'] = np.concatenate( Y['embedding_primarycaps_aligned'], axis=0) print(Y['embedding_primarycaps_aligned'].shape) Y['pred'] = np.array(Y['pred']).astype(str) Y['true'] = np.array(Y['true']).astype(str) print( 'Epoch {}: Val Loss {}, Margin Loss {}, Recon Loss {}, Val R2: {}, Val MAE: {}' .format( self.epoch, running_loss[0], running_loss[1], running_loss[2], r2_score(Y['true'].astype(float), Y['pred'].astype(float)), mean_absolute_error(Y['true'].astype(float), Y['pred'].astype(float)))) print(classification_report(Y['true'], Y['pred'])) Y_plot = copy.deepcopy(Y) Y_plot['embedding_primarycaps_aligned'] = np.concatenate( [ Y_plot['embedding_primarycaps_aligned'][i, :, 0, :] for i in range( Y_plot['embedding_primarycaps_aligned'].shape[0]) ], axis=0) #print(Y_plot['embedding_primarycaps_aligned']) self.make_plots(Y_plot, dataloader) self.save_routing_weights(Y) Y['embedding_primarycaps_aligned'] = xr.DataArray( Y['embedding_primarycaps_aligned'], coords={ 'sample': dataloader.dataset.sample_names, 'primary_capsules': dataloader.dataset.module_names, 'output_capsules': dataloader.dataset.binarizer.classes_, 'z_primary': np.arange(Y['embedding_primarycaps_aligned'].shape[3]) }, dims={ 'sample': len(dataloader.dataset.sample_names), 'primary_capsules': len(dataloader.dataset.module_names), 'output_capsules': len(dataloader.dataset.binarizer.classes_), 'z_primary': Y['embedding_primarycaps_aligned'].shape[3] }) else: Y['pred'] = np.array(Y['pred']).astype(str) Y['true'] = np.array(Y['true']).astype(str) Y['z'] = pd.DataFrame(np.vstack(Y['z']), index=dataloader.dataset.sample_names, columns=dataloader.dataset.module_names) print( 'Epoch {}: Val Loss {}, Margin Loss {}, Recon Loss {}, Val R2: {}, Val MAE: {}' .format( self.epoch, running_loss[0], running_loss[1], running_loss[2], r2_score(Y['true'].astype(float), Y['pred'].astype(float)), mean_absolute_error(Y['true'].astype(float), Y['pred'].astype(float)))) print(classification_report(Y['true'], Y['pred'])) return running_loss, Y #@pysnooper.snoop('plots.log') def make_plots(self, Y, dataloader): for k in ['embedding_primarycaps', 'embedding_primarycaps_aligned']: Y[k] = pd.DataFrame(PCA(n_components=2).fit_transform( np.vstack(Y[k])), columns=['x', 'y']) Y[k]['pos'] = self.module_names * dataloader.dataset.y.shape[ 0] #ma_v.beta.shape[0]#Y['true'] Y[k]['true'] = list( reduce(lambda x, y: x + y, [[i] * self.n_primary for i in Y['true']])) for k2 in ['pos', 'true']: fig = px.scatter(Y[k], x="x", y="y", color=k2) #, text='color') py.plot(fig, filename='figures/{0}/{0}.{1}.{2}.html'.format( k, self.epoch, k2), auto_open=False) for k in ['embedding_outputcaps', 'embedding_primarycaps_cat']: Y[k] = pd.DataFrame(PCA(n_components=2).fit_transform( np.vstack(Y[k])), columns=['x', 'y']) for k2 in ['true', 'pred']: Y[k]['color'] = Y[k2] fig = px.scatter(Y[k], x="x", y="y", color="color") py.plot(fig, filename='figures/{0}/{0}.{1}.{2}.html'.format( k, self.epoch, k2), auto_open=False) def save_routing_weights(self, Y): pickle.dump( Y['routing_weights'], open( 'results/routing_weights/routing_weights.{}.p'.format( self.epoch), 'wb'))
def train(self, training_data): log.info('Loading Quiz Bowl dataset') train_iter, val_iter, dev_iter = QuizBowl.iters( batch_size=self.batch_size, lower=self.lowercase, use_wiki=self.use_wiki, n_wiki_sentences=self.n_wiki_sentences, replace_title_mentions=self.wiki_title_replace_token, combined_ngrams=self.combined_ngrams, unigrams=self.unigrams, bigrams=self.bigrams, trigrams=self.trigrams, combined_max_vocab_size=self.combined_max_vocab_size, unigram_max_vocab_size=self.unigram_max_vocab_size, bigram_max_vocab_size=self.bigram_max_vocab_size, trigram_max_vocab_size=self.trigram_max_vocab_size ) log.info(f'N Train={len(train_iter.dataset.examples)}') log.info(f'N Test={len(val_iter.dataset.examples)}') fields: Dict[str, Field] = train_iter.dataset.fields self.page_field = fields['page'] self.n_classes = len(self.ans_to_i) self.qanta_id_field = fields['qanta_id'] self.emb_dim = 300 if 'text' in fields: self.text_field = fields['text'] log.info(f'Text Vocab={len(self.text_field.vocab)}') if 'unigram' in fields: self.unigram_field = fields['unigram'] log.info(f'Unigram Vocab={len(self.unigram_field.vocab)}') if 'bigram' in fields: self.bigram_field = fields['bigram'] log.info(f'Bigram Vocab={len(self.bigram_field.vocab)}') if 'trigram' in fields: self.trigram_field = fields['trigram'] log.info(f'Trigram Vocab={len(self.trigram_field.vocab)}') log.info('Initializing Model') self.model = DanModel( self.n_classes, text_field=self.text_field, unigram_field=self.unigram_field, bigram_field=self.bigram_field, trigram_field=self.trigram_field, emb_dim=self.emb_dim, n_hidden_units=self.n_hidden_units, n_hidden_layers=self.n_hidden_layers, nn_dropout=self.nn_dropout, pooling=self.pooling ) if CUDA: self.model = self.model.cuda() log.info(f'Parameters:\n{self.parameters()}') log.info(f'Model:\n{self.model}') self.optimizer = Adam(self.model.parameters()) self.criterion = nn.CrossEntropyLoss() self.scheduler = lr_scheduler.ReduceLROnPlateau(self.optimizer, patience=5, verbose=True, mode='max') temp_prefix = get_tmp_filename() self.model_file = f'{temp_prefix}.pt' manager = TrainingManager([ BaseLogger(log_func=log.info), TerminateOnNaN(), EarlyStopping(monitor='test_acc', patience=10, verbose=1), MaxEpochStopping(100), ModelCheckpoint(create_save_model(self.model), self.model_file, monitor='test_acc') ]) log.info('Starting training') epoch = 0 while True: self.model.train() train_acc, train_loss, train_time = self.run_epoch(train_iter) self.model.eval() test_acc, test_loss, test_time = self.run_epoch(val_iter) stop_training, reasons = manager.instruct( train_time, train_loss, train_acc, test_time, test_loss, test_acc ) if stop_training: log.info(' '.join(reasons)) break else: self.scheduler.step(test_acc) epoch += 1
actions = torch.stack([step.action for step in batch]) rewards = Variable(Tensor([step.reward for step in batch])) succ_states = torch.stack([step.succ_state for step in batch]) dones = Variable(Tensor([step.done for step in batch])) return states, actions, rewards, succ_states, dones def get_critic_train_data(succ_states, rewards, dones): # r + Q(s, pi(s')) Q_succ = critic_target(succ_states, actor_target(succ_states)).squeeze() td_estimate = rewards + ((1 - dones) * hparams.discount * Q_succ) return td_estimate.detach() actor_opt = Adam(actor.parameters()) critic_opt = Adam(critic.parameters()) buffer = ReplayBuffer(hparams.buffer_size) s, rews = np_to_var(env.reset()), [] for hparam in hparams.trials(5): exp.add_argparse_meta(hparam) for timestep in range(hparam.num_steps): noise = Normal( mean=Variable(torch.zeros(A)), std=hparam.noise_factor * Variable(torch.ones(A)), ) if timestep % 1000 == 0: hparam.noise_factor /= 2
class DDPG(object): def __init__(self, gamma, tau, hidden_size, num_inputs, action_space): self.num_inputs = num_inputs self.action_space = action_space self.actor = Actor(hidden_size, self.num_inputs, self.action_space) self.actor_target = Actor(hidden_size, self.num_inputs, self.action_space) self.actor_optim = Adam(self.actor.parameters(), lr=1e-4) self.critic = Critic(hidden_size, self.num_inputs, self.action_space) self.critic_target = Critic(hidden_size, self.num_inputs, self.action_space) self.critic_optim = Adam(self.critic.parameters(), lr=1e-3) self.gamma = gamma self.tau = tau hard_update(self.actor_target, self.actor) # Make sure target is with the same weight hard_update(self.critic_target, self.critic) def select_action(self, state, exploration=None): self.actor.eval() mu = self.actor((Variable(state, volatile=True))) self.actor.train() mu = mu.data if exploration is not None: mu += torch.Tensor(exploration.noise()) return mu.clamp(-1, 1) def update_parameters(self, batch): state_batch = Variable(torch.cat(batch.state)) next_state_batch = Variable(torch.cat(batch.next_state), volatile=True) action_batch = Variable(torch.cat(batch.action)) reward_batch = Variable(torch.cat(batch.reward)) mask_batch = Variable(torch.cat(batch.mask)) next_action_batch = self.actor_target(next_state_batch) next_state_action_values = self.critic_target(next_state_batch, next_action_batch) reward_batch = torch.unsqueeze(reward_batch, 1) expected_state_action_batch = reward_batch + (self.gamma * next_state_action_values) self.critic_optim.zero_grad() state_action_batch = self.critic((state_batch), (action_batch)) value_loss = MSELoss(state_action_batch, expected_state_action_batch) value_loss.backward() self.critic_optim.step() self.actor_optim.zero_grad() policy_loss = -self.critic((state_batch),self.actor((state_batch))) policy_loss = policy_loss.mean() policy_loss.backward() self.actor_optim.step() soft_update(self.actor_target, self.actor, self.tau) soft_update(self.critic_target, self.critic, self.tau)
class ElmoGuesser(AbstractGuesser): def __init__(self, config_num): super(ElmoGuesser, self).__init__(config_num) if config_num is not None: guesser_conf = conf['guessers']['qanta.guesser.elmo.ElmoGuesser'][self.config_num] self.random_seed = guesser_conf['random_seed'] self.dropout = guesser_conf['dropout'] else: self.random_seed = None self.dropout = None self.model = None self.i_to_class = None self.class_to_i = None self.optimizer = None self.criterion = None self.scheduler = None self.model_file = None def parameters(self): return conf['guessers']['qanta.guesser.elmo.ElmoGuesser'][self.config_num] def train(self, training_data: TrainingData) -> None: x_train, y_train, x_val, y_val, vocab, class_to_i, i_to_class = preprocess_dataset(training_data) self.class_to_i = class_to_i self.i_to_class = i_to_class log.info('Batchifying data') train_batches = batchify(x_train, y_train, shuffle=True) val_batches = batchify(x_val, y_val, shuffle=False) self.model = ElmoModel(len(i_to_class), dropout=self.dropout) if CUDA: self.model = self.model.cuda() log.info(f'Parameters:\n{self.parameters()}') log.info(f'Model:\n{self.model}') parameters = list(self.model.classifier.parameters()) for mix in self.model.elmo._scalar_mixes: parameters.extend(list(mix.parameters())) self.optimizer = Adam(parameters) self.criterion = nn.CrossEntropyLoss() self.scheduler = lr_scheduler.ReduceLROnPlateau(self.optimizer, patience=5, verbose=True, mode='max') temp_prefix = get_tmp_filename() self.model_file = f'{temp_prefix}.pt' manager = TrainingManager([ BaseLogger(log_func=log.info), TerminateOnNaN(), EarlyStopping(monitor='test_acc', patience=10, verbose=1), MaxEpochStopping(100), ModelCheckpoint(create_save_model(self.model), self.model_file, monitor='test_acc') ]) log.info('Starting training') epoch = 0 while True: self.model.train() train_acc, train_loss, train_time = self.run_epoch(train_batches) random.shuffle(train_batches) self.model.eval() test_acc, test_loss, test_time = self.run_epoch(val_batches, train=False) stop_training, reasons = manager.instruct( train_time, train_loss, train_acc, test_time, test_loss, test_acc ) if stop_training: log.info(' '.join(reasons)) break else: self.scheduler.step(test_acc) epoch += 1 def run_epoch(self, batches, train=True): batch_accuracies = [] batch_losses = [] epoch_start = time.time() for x_batch, y_batch, length_batch in batches: if train: self.model.zero_grad() out = self.model(x_batch.cuda(), length_batch.cuda()) _, preds = torch.max(out, 1) accuracy = torch.mean(torch.eq(preds, y_batch).float()).data[0] batch_loss = self.criterion(out, y_batch) if train: batch_loss.backward() torch.nn.utils.clip_grad_norm(self.model.parameters(), .25) self.optimizer.step() batch_accuracies.append(accuracy) batch_losses.append(batch_loss.data[0]) epoch_end = time.time() return np.mean(batch_accuracies), np.mean(batch_losses), epoch_end - epoch_start def guess(self, questions: List[QuestionText], max_n_guesses: Optional[int]) -> List[List[Tuple[Page, float]]]: y_data = np.zeros((len(questions))) x_data = [tokenize_question(q) for q in questions] batches = batchify(x_data, y_data, shuffle=False, batch_size=32) guesses = [] for x_batch, y_batch, length_batch in batches: out = self.model(x_batch.cuda(), length_batch.cuda()) probs = F.softmax(out).data.cpu().numpy() preds = np.argsort(-probs, axis=1) n_examples = probs.shape[0] for i in range(n_examples): example_guesses = [] for p in preds[i][:max_n_guesses]: example_guesses.append((self.i_to_class[p], probs[i][p])) guesses.append(example_guesses) return guesses @classmethod def targets(cls) -> List[str]: return ['elmo.pt', 'elmo.pkl'] @classmethod def load(cls, directory: str): with open(os.path.join(directory, 'elmo.pkl'), 'rb') as f: params = cloudpickle.load(f) guesser = ElmoGuesser(params['config_num']) guesser.class_to_i = params['class_to_i'] guesser.i_to_class = params['i_to_class'] guesser.random_seed = params['random_seed'] guesser.dropout = params['dropout'] guesser.model = ElmoModel(len(guesser.i_to_class)) guesser.model.load_state_dict(torch.load( os.path.join(directory, 'elmo.pt'), map_location=lambda storage, loc: storage )) guesser.model.eval() if CUDA: guesser.model = guesser.model.cuda() return guesser def save(self, directory: str) -> None: shutil.copyfile(self.model_file, os.path.join(directory, 'elmo.pt')) shell(f'rm -f {self.model_file}') with open(os.path.join(directory, 'elmo.pkl'), 'wb') as f: cloudpickle.dump({ 'class_to_i': self.class_to_i, 'i_to_class': self.i_to_class, 'config_num': self.config_num, 'random_seed': self.random_seed, 'dropout': self.dropout }, f)
def main(args, path_to_candidate_bonds): if args['train_path'] is None: train_set = USPTORank( subset='train', candidate_bond_path=path_to_candidate_bonds['train'], max_num_change_combos_per_reaction=args['max_num_change_combos_per_reaction_train'], num_processes=args['num_processes']) else: train_set = WLNRankDataset( path_to_reaction_file=args['train_path'], candidate_bond_path=path_to_candidate_bonds['train'], mode='train', max_num_change_combos_per_reaction=args['max_num_change_combos_per_reaction_train'], num_processes=args['num_processes']) train_set.ignore_large() if args['val_path'] is None: val_set = USPTORank( subset='val', candidate_bond_path=path_to_candidate_bonds['val'], max_num_change_combos_per_reaction=args['max_num_change_combos_per_reaction_eval'], num_processes=args['num_processes']) else: val_set = WLNRankDataset( path_to_reaction_file=args['val_path'], candidate_bond_path=path_to_candidate_bonds['val'], mode='val', max_num_change_combos_per_reaction=args['max_num_change_combos_per_reaction_eval'], num_processes=args['num_processes']) if args['num_workers'] > 1: torch.multiprocessing.set_sharing_strategy('file_system') train_loader = DataLoader(train_set, batch_size=args['batch_size'], collate_fn=collate_rank_train, shuffle=True, num_workers=args['num_workers']) val_loader = DataLoader(val_set, batch_size=args['batch_size'], collate_fn=collate_rank_eval, shuffle=False, num_workers=args['num_workers']) model = WLNReactionRanking( node_in_feats=args['node_in_feats'], edge_in_feats=args['edge_in_feats'], node_hidden_feats=args['hidden_size'], num_encode_gnn_layers=args['num_encode_gnn_layers']).to(args['device']) criterion = CrossEntropyLoss(reduction='sum') optimizer = Adam(model.parameters(), lr=args['lr']) from utils import Optimizer optimizer = Optimizer(model, args['lr'], optimizer, max_grad_norm=args['max_norm']) acc_sum = 0 grad_norm_sum = 0 dur = [] total_samples = 0 for epoch in range(args['num_epochs']): t0 = time.time() model.train() for batch_id, batch_data in enumerate(train_loader): batch_reactant_graphs, batch_product_graphs, \ batch_combo_scores, batch_labels, batch_num_candidate_products = batch_data batch_combo_scores = batch_combo_scores.to(args['device']) batch_labels = batch_labels.to(args['device']) reactant_node_feats = batch_reactant_graphs.ndata.pop('hv').to(args['device']) reactant_edge_feats = batch_reactant_graphs.edata.pop('he').to(args['device']) product_node_feats = batch_product_graphs.ndata.pop('hv').to(args['device']) product_edge_feats = batch_product_graphs.edata.pop('he').to(args['device']) pred = model(reactant_graph=batch_reactant_graphs, reactant_node_feats=reactant_node_feats, reactant_edge_feats=reactant_edge_feats, product_graphs=batch_product_graphs, product_node_feats=product_node_feats, product_edge_feats=product_edge_feats, candidate_scores=batch_combo_scores, batch_num_candidate_products=batch_num_candidate_products) # Check if the ground truth candidate has the highest score batch_loss = 0 product_graph_start = 0 for i in range(len(batch_num_candidate_products)): product_graph_end = product_graph_start + batch_num_candidate_products[i] reaction_pred = pred[product_graph_start:product_graph_end, :] acc_sum += float(reaction_pred.max(dim=0)[1].detach().cpu().data.item() == 0) batch_loss += criterion(reaction_pred.reshape(1, -1), batch_labels[i, :]) product_graph_start = product_graph_end grad_norm_sum += optimizer.backward_and_step(batch_loss) total_samples += args['batch_size'] if total_samples % args['print_every'] == 0: progress = 'Epoch {:d}/{:d}, iter {:d}/{:d} | time {:.4f} | ' \ 'accuracy {:.4f} | grad norm {:.4f}'.format( epoch + 1, args['num_epochs'], (batch_id + 1) * args['batch_size'] // args['print_every'], len(train_set) // args['print_every'], (sum(dur) + time.time() - t0) / total_samples * args['print_every'], acc_sum / args['print_every'], grad_norm_sum / args['print_every']) print(progress) acc_sum = 0 grad_norm_sum = 0 if total_samples % args['decay_every'] == 0: dur.append(time.time() - t0) old_lr = optimizer.lr optimizer.decay_lr(args['lr_decay_factor']) new_lr = optimizer.lr print('Learning rate decayed from {:.4f} to {:.4f}'.format(old_lr, new_lr)) torch.save({'model_state_dict': model.state_dict()}, args['result_path'] + '/model_{:d}.pkl'.format(total_samples)) prediction_summary = 'total samples {:d}, (epoch {:d}/{:d}, iter {:d}/{:d})\n'.format( total_samples, epoch + 1, args['num_epochs'], (batch_id + 1) * args['batch_size'] // args['print_every'], len(train_set) // args['print_every']) + candidate_ranking_eval(args, model, val_loader) print(prediction_summary) with open(args['result_path'] + '/val_eval.txt', 'a') as f: f.write(prediction_summary) t0 = time.time() model.train()
image_size = 256 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") np.random.seed(42) torch.manual_seed(42) transform = transforms.Compose([ transforms.Resize(image_size), transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)) ]) train_dataset = datasets.ImageFolder(dataset_path, transform) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) transformer = TransformerNet().to( device) # this is the network that will require training optimizer = Adam(transformer.parameters(), learning_rate) mse_loss = torch.nn.MSELoss() vgg = Vgg16(requires_grad=False).to( device) # this network is used to obtain the feature maps style_transform = transforms.Compose( [transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255))]) style = utils.load_image(style_image, size=None) # The size for 'mosaic.jpg' style image is 1024 x 1024 x 3 style = style_transform( style) # the shape will be [3, style_image_size, style_image_size] style = style.repeat(batch_size, 1, 1, 1).to( device ) # the size here will be [batch_size, 3, style_image_size, style_image_size]
class DDPG(object): def __init__(self, nb_status, nb_actions, args): self.num_actor = 3 self.nb_status = nb_status * args.window_length self.nb_actions = nb_actions self.discrete = args.discrete # Create Actor and Critic Network net_cfg = { 'hidden1': args.hidden1, 'hidden2': args.hidden2, 'use_bn': args.bn } self.actors = [Actor(self.nb_status, self.nb_actions) for _ in range(self.num_actor)] self.actor_targets = [Actor(self.nb_status, self.nb_actions) for _ in range(self.num_actor)] self.actor_optims = [Adam(self.actors[i].parameters(), lr=args.prate) for i in range(self.num_actor)] self.critic = Critic(self.nb_status, self.nb_actions, **net_cfg) self.critic_target = Critic(self.nb_status, self.nb_actions, **net_cfg) self.critic_optim = Adam(self.critic.parameters(), lr=args.rate) for i in range(self.num_actor): hard_update(self.actor_targets[i], self.actors[i]) # Make sure target is with the same weight hard_update(self.critic_target, self.critic) # Create replay buffer self.memory = rpm(args.rmsize) # SequentialMemory(limit=args.rmsize, window_length=args.window_length) self.random_process = Myrandom(size=nb_actions) # Hyper-parameters self.batch_size = args.batch_size self.tau = args.tau self.discount = args.discount self.depsilon = 1.0 / args.epsilon # self.epsilon = 1.0 self.s_t = None # Most recent state self.a_t = None # Most recent action self.use_cuda = args.cuda # if self.use_cuda: self.cuda() def update_policy(self, train_actor=True): # Sample batch state_batch, action_batch, reward_batch, \ next_state_batch, terminal_batch = self.memory.sample_batch(self.batch_size) # Prepare for the target q batch next_q_values = 0 for i in range(self.num_actor): next_q_values = next_q_values + self.critic_target([ to_tensor(next_state_batch, volatile=True), self.actor_targets[i](to_tensor(next_state_batch, volatile=True)), ]) # print('batch of picture is ok') next_q_values = next_q_values / self.num_actor next_q_values.volatile = False target_q_batch = to_tensor(reward_batch) + \ self.discount * to_tensor((1 - terminal_batch.astype(np.float))) * next_q_values # Critic update self.critic.zero_grad() q_batch = self.critic([to_tensor(state_batch), to_tensor(action_batch)]) # print(reward_batch, next_q_values*self.discount, target_q_batch, terminal_batch.astype(np.float)) value_loss = criterion(q_batch, target_q_batch) value_loss.backward() self.critic_optim.step() sum_policy_loss = 0 for i in range(self.num_actor): self.actors[i].zero_grad() policy_loss = -self.critic([ to_tensor(state_batch), self.actors[i](to_tensor(state_batch)) ]) policy_loss = policy_loss.mean() policy_loss.backward() if train_actor: self.actor_optims[i].step() sum_policy_loss += policy_loss # Target update soft_update(self.actor_targets[i], self.actors[i], self.tau) soft_update(self.critic_target, self.critic, self.tau) return -sum_policy_loss / self.num_actor, value_loss def cuda(self): for i in range(self.num_actor): self.actors[i].cuda() self.actor_targets[i].cuda() self.critic.cuda() self.critic_target.cuda() def observe(self, r_t, s_t1, done): self.memory.append([self.s_t, self.a_t, r_t, s_t1, done]) self.s_t = s_t1 def random_action(self): action = np.random.uniform(-1., 1., self.nb_actions) self.a_t = action if self.discrete: return action.argmax() else: return action def select_action(self, s_t, decay_epsilon=True, return_fix=False, noise_level=0): actions = [] status = [] tot_score = [] for i in range(self.num_actor): action = to_numpy(self.actors[i](to_tensor(np.array([s_t]), volatile=True))).squeeze(0) noise_level = noise_level * max(self.epsilon, 0) action = action + self.random_process.sample() * noise_level status.append(s_t) actions.append(action) tot_score.append(0.) scores = self.critic([to_tensor(np.array(status), volatile=True), to_tensor(np.array(actions), volatile=True)]) for j in range(self.num_actor): tot_score[j] += scores.data[j][0] best = np.array(tot_score).argmax() if decay_epsilon: self.epsilon -= self.depsilon self.a_t = actions[best] return actions[best] def reset(self, obs): self.s_t = obs self.random_process.reset_status() def load_weights(self, output, num=0): if output is None: return for i in range(self.num_actor): actor = self.actors[i] actor_target = self.actor_targets[i] actor.load_state_dict( torch.load('{}/actor{}_{}.pkl'.format(output, num, i)) ) actor_target.load_state_dict( torch.load('{}/actor{}_{}.pkl'.format(output, num, i)) ) self.critic.load_state_dict( torch.load('{}/critic{}.pkl'.format(output, num)) ) self.critic_target.load_state_dict( torch.load('{}/critic{}.pkl'.format(output, num)) ) def save_model(self, output, num): if self.use_cuda: for i in range(self.num_actor): self.actors[i].cpu() self.critic.cpu() for i in range(self.num_actor): torch.save( self.actors[i].state_dict(), '{}/actor{}_{}.pkl'.format(output, num, i) ) torch.save( self.critic.state_dict(), '{}/critic{}.pkl'.format(output, num) ) if self.use_cuda: for i in range(self.num_actor): self.actors[i].cuda() self.critic.cuda()
class DanGuesser(AbstractGuesser): def __init__(self, config_num): super(DanGuesser, self).__init__(config_num) if self.config_num is not None: guesser_conf = conf['guessers']['qanta.guesser.dan.DanGuesser'][self.config_num] self.gradient_clip = guesser_conf['gradient_clip'] self.n_hidden_units = guesser_conf['n_hidden_units'] self.n_hidden_layers = guesser_conf['n_hidden_layers'] self.nn_dropout = guesser_conf['nn_dropout'] self.batch_size = guesser_conf['batch_size'] self.use_wiki = guesser_conf['use_wiki'] self.n_wiki_sentences = guesser_conf['n_wiki_sentences'] self.wiki_title_replace_token = guesser_conf['wiki_title_replace_token'] self.lowercase = guesser_conf['lowercase'] self.combined_ngrams = guesser_conf['combined_ngrams'] self.unigrams = guesser_conf['unigrams'] self.bigrams = guesser_conf['bigrams'] self.trigrams = guesser_conf['trigrams'] self.combined_max_vocab_size = guesser_conf['combined_max_vocab_size'] self.unigram_max_vocab_size = guesser_conf['unigram_max_vocab_size'] self.bigram_max_vocab_size = guesser_conf['bigram_max_vocab_size'] self.trigram_max_vocab_size = guesser_conf['trigram_max_vocab_size'] self.pooling = guesser_conf['pooling'] self.random_seed = guesser_conf['random_seed'] self.page_field: Optional[Field] = None self.qanta_id_field: Optional[Field] = None self.text_field: Optional[Field] = None self.unigram_field: Optional[Field] = None self.bigram_field: Optional[Field] = None self.trigram_field: Optional[Field] = None self.n_classes = None self.emb_dim = None self.model_file = None self.model = None self.optimizer = None self.criterion = None self.scheduler = None @property def ans_to_i(self): return self.page_field.vocab.stoi @property def i_to_ans(self): return self.page_field.vocab.itos def parameters(self): return conf['guessers']['qanta.guesser.dan.DanGuesser'][self.config_num] def train(self, training_data): log.info('Loading Quiz Bowl dataset') train_iter, val_iter, dev_iter = QuizBowl.iters( batch_size=self.batch_size, lower=self.lowercase, use_wiki=self.use_wiki, n_wiki_sentences=self.n_wiki_sentences, replace_title_mentions=self.wiki_title_replace_token, combined_ngrams=self.combined_ngrams, unigrams=self.unigrams, bigrams=self.bigrams, trigrams=self.trigrams, combined_max_vocab_size=self.combined_max_vocab_size, unigram_max_vocab_size=self.unigram_max_vocab_size, bigram_max_vocab_size=self.bigram_max_vocab_size, trigram_max_vocab_size=self.trigram_max_vocab_size ) log.info(f'N Train={len(train_iter.dataset.examples)}') log.info(f'N Test={len(val_iter.dataset.examples)}') fields: Dict[str, Field] = train_iter.dataset.fields self.page_field = fields['page'] self.n_classes = len(self.ans_to_i) self.qanta_id_field = fields['qanta_id'] self.emb_dim = 300 if 'text' in fields: self.text_field = fields['text'] log.info(f'Text Vocab={len(self.text_field.vocab)}') if 'unigram' in fields: self.unigram_field = fields['unigram'] log.info(f'Unigram Vocab={len(self.unigram_field.vocab)}') if 'bigram' in fields: self.bigram_field = fields['bigram'] log.info(f'Bigram Vocab={len(self.bigram_field.vocab)}') if 'trigram' in fields: self.trigram_field = fields['trigram'] log.info(f'Trigram Vocab={len(self.trigram_field.vocab)}') log.info('Initializing Model') self.model = DanModel( self.n_classes, text_field=self.text_field, unigram_field=self.unigram_field, bigram_field=self.bigram_field, trigram_field=self.trigram_field, emb_dim=self.emb_dim, n_hidden_units=self.n_hidden_units, n_hidden_layers=self.n_hidden_layers, nn_dropout=self.nn_dropout, pooling=self.pooling ) if CUDA: self.model = self.model.cuda() log.info(f'Parameters:\n{self.parameters()}') log.info(f'Model:\n{self.model}') self.optimizer = Adam(self.model.parameters()) self.criterion = nn.CrossEntropyLoss() self.scheduler = lr_scheduler.ReduceLROnPlateau(self.optimizer, patience=5, verbose=True, mode='max') temp_prefix = get_tmp_filename() self.model_file = f'{temp_prefix}.pt' manager = TrainingManager([ BaseLogger(log_func=log.info), TerminateOnNaN(), EarlyStopping(monitor='test_acc', patience=10, verbose=1), MaxEpochStopping(100), ModelCheckpoint(create_save_model(self.model), self.model_file, monitor='test_acc') ]) log.info('Starting training') epoch = 0 while True: self.model.train() train_acc, train_loss, train_time = self.run_epoch(train_iter) self.model.eval() test_acc, test_loss, test_time = self.run_epoch(val_iter) stop_training, reasons = manager.instruct( train_time, train_loss, train_acc, test_time, test_loss, test_acc ) if stop_training: log.info(' '.join(reasons)) break else: self.scheduler.step(test_acc) epoch += 1 def run_epoch(self, iterator: Iterator): is_train = iterator.train batch_accuracies = [] batch_losses = [] epoch_start = time.time() for batch in iterator: input_dict = {} lengths_dict = {} if hasattr(batch, 'text'): text, lengths = batch.text input_dict['text'] = text lengths_dict['text'] = lengths if hasattr(batch, 'unigram'): text, lengths = batch.unigram input_dict['unigram'] = text lengths_dict['unigram'] = lengths if hasattr(batch, 'bigram'): text, lengths = batch.bigram input_dict['bigram'] = text lengths_dict['bigram'] = lengths if hasattr(batch, 'trigram'): text, lengths = batch.trigram input_dict['trigram'] = text lengths_dict['trigram'] = lengths page = batch.page qanta_ids = batch.qanta_id.cuda() if is_train: self.model.zero_grad() out = self.model(input_dict, lengths_dict, qanta_ids) _, preds = torch.max(out, 1) accuracy = torch.mean(torch.eq(preds, page).float()).data[0] batch_loss = self.criterion(out, page) if is_train: batch_loss.backward() torch.nn.utils.clip_grad_norm(self.model.parameters(), self.gradient_clip) self.optimizer.step() batch_accuracies.append(accuracy) batch_losses.append(batch_loss.data[0]) epoch_end = time.time() return np.mean(batch_accuracies), np.mean(batch_losses), epoch_end - epoch_start def guess(self, questions: List[QuestionText], max_n_guesses: Optional[int]): if len(questions) == 0: return [] batch_size = 500 if len(questions) < batch_size: return self._guess_batch(questions, max_n_guesses) else: all_guesses = [] for i in range(0, len(questions), batch_size): batch_questions = questions[i:i + batch_size] guesses = self._guess_batch(batch_questions, max_n_guesses) all_guesses.extend(guesses) return all_guesses def _guess_batch(self, questions: List[QuestionText], max_n_guesses: Optional[int]): if len(questions) == 0: return [] input_dict = {} lengths_dict = {} if self.text_field is not None: examples = [self.text_field.preprocess(q) for q in questions] text, lengths = self.text_field.process(examples, None, False) input_dict['text'] = text lengths_dict['text'] = lengths if self.unigram_field is not None: examples = [self.unigram_field.preprocess(q) for q in questions] text, lengths = self.unigram_field.process(examples, None, False) input_dict['unigram'] = text lengths_dict['unigram'] = lengths if self.bigram_field is not None: examples = [self.bigram_field.preprocess(q) for q in questions] text, lengths = self.bigram_field.process(examples, None, False) input_dict['bigram'] = text lengths_dict['bigram'] = lengths if self.trigram_field is not None: examples = [self.trigram_field.preprocess(q) for q in questions] text, lengths = self.trigram_field.process(examples, None, False) input_dict['trigram'] = text lengths_dict['trigram'] = lengths qanta_ids = self.qanta_id_field.process([0 for _ in questions]).cuda() guesses = [] out = self.model(input_dict, lengths_dict, qanta_ids) probs = F.softmax(out).data.cpu().numpy() n_examples = probs.shape[0] preds = np.argsort(-probs, axis=1) for i in range(n_examples): guesses.append([]) for p in preds[i][:max_n_guesses]: guesses[-1].append((self.i_to_ans[p], probs[i][p])) return guesses def save(self, directory: str): shutil.copyfile(self.model_file, os.path.join(directory, 'dan.pt')) shell(f'rm -f {self.model_file}') with open(os.path.join(directory, 'dan.pkl'), 'wb') as f: cloudpickle.dump({ 'page_field': self.page_field, 'combined_text_field': self.text_field, 'unigram_text_field': self.unigram_field, 'bigram_text_field': self.bigram_field, 'trigram_text_field': self.trigram_field, 'combined_ngrams': self.combined_ngrams, 'unigrams': self.unigrams, 'bigrams': self.bigrams, 'trigrams': self.trigrams, 'combined_max_vocab_size': self.combined_max_vocab_size, 'unigram_max_vocab_size': self.unigram_max_vocab_size, 'bigram_max_vocab_size': self.bigram_max_vocab_size, 'trigram_max_vocab_size': self.trigram_max_vocab_size, 'qanta_id_field': self.qanta_id_field, 'n_classes': self.n_classes, 'gradient_clip': self.gradient_clip, 'n_hidden_units': self.n_hidden_units, 'n_hidden_layers': self.n_hidden_layers, 'nn_dropout': self.nn_dropout, 'batch_size': self.batch_size, 'use_wiki': self.use_wiki, 'n_wiki_sentences': self.n_wiki_sentences, 'wiki_title_replace_token': self.wiki_title_replace_token, 'lowercase': self.lowercase, 'pooling': self.pooling, 'random_seed': self.random_seed, 'config_num': self.config_num }, f) @classmethod def load(cls, directory: str): with open(os.path.join(directory, 'dan.pkl'), 'rb') as f: params = cloudpickle.load(f) guesser = DanGuesser(params['config_num']) guesser.page_field = params['page_field'] guesser.qanta_id_field = params['qanta_id_field'] guesser.text_field = params['combined_text_field'] guesser.unigram_field = params['unigram_text_field'] guesser.bigram_field = params['bigram_text_field'] guesser.trigram_field = params['trigram_text_field'] guesser.combined_ngrams = params['combined_ngrams'] guesser.unigrams = params['unigrams'] guesser.bigrams = params['bigrams'] guesser.trigrams = params['trigrams'] guesser.combined_max_vocab_size = params['combined_max_vocab_size'] guesser.unigram_max_vocab_size = params['unigram_max_vocab_size'] guesser.bigram_max_vocab_size = params['bigram_max_vocab_size'] guesser.trigram_max_vocab_size = params['trigram_max_vocab_size'] guesser.n_classes = params['n_classes'] guesser.gradient_clip = params['gradient_clip'] guesser.n_hidden_units = params['n_hidden_units'] guesser.n_hidden_layers = params['n_hidden_layers'] guesser.nn_dropout = params['nn_dropout'] guesser.use_wiki = params['use_wiki'] guesser.n_wiki_sentences = params['n_wiki_sentences'] guesser.wiki_title_replace_token = params['wiki_title_replace_token'] guesser.lowercase = params['lowercase'] guesser.pooling = params['pooling'] guesser.random_seed = params['random_seed'] guesser.model = DanModel( guesser.n_classes, text_field=guesser.text_field, unigram_field=guesser.unigram_field, bigram_field=guesser.bigram_field, trigram_field=guesser.trigram_field, init_embeddings=False, emb_dim=300, n_hidden_layers=guesser.n_hidden_layers, n_hidden_units=guesser.n_hidden_units, pooling=guesser.pooling ) guesser.model.load_state_dict(torch.load( os.path.join(directory, 'dan.pt'), map_location=lambda storage, loc: storage )) guesser.model.eval() if CUDA: guesser.model = guesser.model.cuda() return guesser @classmethod def targets(cls): return ['dan.pt', 'dan.pkl']
def __init__(self, env, test_env, log_dir, num_steps=5 * (10**7), batch_size=32, c=0, sensitive=False, N=200, kappa=1.0, lr=5e-5, memory_size=10**6, gamma=0.99, multi_step=1, update_interval=4, target_update_interval=10000, start_steps=50000, epsilon_train=0.01, epsilon_eval=0.001, epsilon_decay_steps=250000, double_q_learning=False, log_interval=100, eval_interval=250000, num_eval_steps=125000, max_episode_steps=27000, grad_cliping=None, cuda=True, seed=0): super(QRDQNAgent, self).__init__(env, test_env, log_dir, num_steps, batch_size, memory_size, gamma, multi_step, update_interval, target_update_interval, start_steps, epsilon_train, epsilon_eval, epsilon_decay_steps, double_q_learning, log_interval, eval_interval, num_eval_steps, max_episode_steps, grad_cliping, cuda, seed) # Online network. self.online_net = QRDQN(num_states=self.env.nrow * self.env.ncol, num_actions=self.num_actions, N=N, sensitive=sensitive, c=c).to(self.device) # Target network. self.target_net = QRDQN(num_states=self.env.nrow * self.env.ncol, num_actions=self.num_actions, N=N, sensitive=sensitive, c=c).to(self.device) # Copy parameters of the learning network to the target network. self.update_target() # Disable calculations of gradients of the target network. disable_gradients(self.target_net) self.optim = Adam(self.online_net.parameters(), lr=lr, eps=1e-2 / batch_size) # Fixed fractions. taus = torch.arange(0, N + 1, device=self.device, dtype=torch.float32) / N self.tau_hats = ((taus[1:] + taus[:-1]) / 2.0).view(1, N) self.N = N self.kappa = kappa self.c = c self.sensitive = sensitive self.num_cvar = int(np.ceil(self.N * self.c))
def fit(): epochs = 50000 hidden_size = 128 emb_size = 128 resample = False gamma = 0.99 lr = 1e-4 batch_size = 64 use_cuda = True random_state = 42 num_layers = 1 # data db = load_db() # success jobs = db.jobs_with(state='success') #jobs = db.all_jobs() jobs = list(jobs) #jobs = [j for j in jobs if j['content']['info']['max_depth'] == 5] X = [j['content']['info']['architecture'] for j in jobs] R = [max(j['stats']['valid']['accuracy']) if j['state'] == 'success' else -0.1 for j in jobs] #threshold = 0.8 #X = [x for x, r in zip(X, R) if r > threshold] #R = [1 for r in R if r > threshold] R = np.array(R) vect = Vectorizer(grammar, pad=True) X = vect.transform(X) X = [[0] + x for x in X] X = np.array(X).astype('int32') print(X.shape) X, R = shuffle(X, R, random_state=random_state) n_train = int(len(X) * 0.8) X_train = X[0:n_train] R_train = R[0:n_train] X_test = X[n_train:] R_test = R[n_train:] if resample: X_train, R_train = _resample(X_train, R_train, nb=10) print('Number of training data : {}'.format(len(X_train))) # model vocab_size = len(vect.tok_to_id) model = RnnModel( vocab_size=vocab_size, emb_size=emb_size, hidden_size=hidden_size, num_layers=num_layers, use_cuda=use_cuda, ) model.vect = vect model.grammar = grammar model.apply(_weights_init) if use_cuda: model = model.cuda() optim = Adam(model.parameters(), lr=lr) # Training I_train = X_train[:, 0:-1] O_train = X_train[:, 1:] I_test = X_test[:, 0:-1] O_test = X_test[:, 1:] avg_loss = 0. avg_precision = 0. nupdates = 0 best_loss = float('inf') last_epoch_annealing = 0 last_epoch_improving = 0 for i in range(epochs): model.train() for j in range(0, len(I_train), batch_size): inp = I_train[j:j+batch_size] out = O_train[j:j+batch_size] r = R_train[j:j+batch_size] out = out.flatten() inp = torch.from_numpy(inp).long() inp = Variable(inp) out = torch.from_numpy(out).long() out = Variable(out) r = torch.from_numpy(r).float() r = r.repeat(1, O_train.shape[1]) r = r.view(-1, 1) r = Variable(r) r = r.cuda() if use_cuda: inp = inp.cuda() out = out.cuda() model.zero_grad() y = model(inp) true = out.data pred = y.data ind = torch.arange(0, true.size(0)).long().cuda() ind = ind[true != 0] loss = nn.functional.nll_loss(r[ind] * nn.functional.log_softmax(y[ind]), out[ind]) precision = acc(pred[ind], true[ind]) loss.backward() optim.step() avg_loss = avg_loss * gamma + loss.data[0] * (1 - gamma) avg_precision = avg_precision * gamma + precision * (1 - gamma) nupdates += 1 print('Epoch : {:05d}, Train loss : {:.6f}, Train Precision : {:.6f}'.format(i, avg_loss, avg_precision)) precisions = [] losses = [] model.eval() for j in range(0, len(I_test), batch_size): inp = I_test[j:j+batch_size] out = O_test[j:j+batch_size] r = R_test[j:j+batch_size] out = out.flatten() inp = torch.from_numpy(inp).long() inp = Variable(inp) out = torch.from_numpy(out).long() out = Variable(out) r = torch.from_numpy(r).float() r = r.repeat(1, O_train.shape[1]) r = r.view(-1, 1) r = Variable(r) r = r.cuda() if use_cuda: inp = inp.cuda() out = out.cuda() y = model(inp) true = out.data pred = y.data ind = torch.arange(0, true.size(0)).long().cuda() ind = ind[true != 0] loss = nn.functional.nll_loss(r[ind] * nn.functional.log_softmax(y[ind]), out[ind]) precision = acc(pred[ind], true[ind]) precisions.append(precision) losses.append(loss.data[0]) mean_precision = np.mean(precisions) mean_loss = np.mean(losses) print('Epoch : {:05d}, Test loss : {:.6f}, Test precision : {:.6f}'.format(i, mean_loss, mean_precision)) if mean_loss < best_loss: best_loss = mean_loss print('Improved score, saving the model.') torch.save(model, 'rnn.th') last_epoch_improving = i else: print('No improvements.') if i - last_epoch_improving >= 100 and i - last_epoch_annealing >= 100: last_epoch_annealing = i print('Annealing learning rate.') for param_group in optim.param_groups: param_group['lr'] *= 0.1
class DDPG(object): def __init__(self, nb_status, nb_actions, args, writer): self.clip_actor_grad = args.clip_actor_grad self.nb_status = nb_status * args.window_length self.nb_actions = nb_actions self.writer = writer self.select_time = 0 # Create Actor and Critic Network net_cfg = { 'hidden1':args.hidden1, 'hidden2':args.hidden2, 'init_method':args.init_method } self.actor = Actor(self.nb_status, self.nb_actions, **net_cfg) self.actor_target = Actor(self.nb_status, self.nb_actions, **net_cfg) self.actor_optim = Adam(self.actor.parameters(), lr=args.prate) self.critic = Critic(self.nb_status, self.nb_actions, **net_cfg) self.critic_target = Critic(self.nb_status, self.nb_actions, **net_cfg) self.critic_optim = Adam(self.critic.parameters(), lr=args.rate) hard_update(self.actor_target, self.actor) # Make sure target is with the same weight hard_update(self.critic_target, self.critic) #Create replay buffer self.memory = rpm(args.rmsize) self.random_process = Myrandom(size=nb_actions) # Hyper-parameters self.batch_size = args.batch_size self.tau = args.tau self.discount = args.discount self.depsilon = 1.0 / args.epsilon # self.epsilon = 1.0 self.s_t = None # Most recent state self.a_t = None # Most recent action self.use_cuda = args.cuda # if self.use_cuda: self.cuda() def update_policy(self, train_actor = True): # Sample batch state_batch, action_batch, reward_batch, \ next_state_batch, terminal_batch = self.memory.sample_batch(self.batch_size) # Prepare for the target q batch next_q_values = self.critic_target([ to_tensor(next_state_batch, volatile=True), self.actor_target(to_tensor(next_state_batch, volatile=True)), ]) # print('batch of picture is ok') next_q_values.volatile = False target_q_batch = to_tensor(reward_batch) + \ self.discount * to_tensor((1 - terminal_batch.astype(np.float))) * next_q_values # Critic update self.critic.zero_grad() q_batch = self.critic([to_tensor(state_batch), to_tensor(action_batch)]) # print(reward_batch, next_q_values*self.discount, target_q_batch, terminal_batch.astype(np.float)) value_loss = nn.MSELoss()(q_batch, target_q_batch) value_loss.backward() self.critic_optim.step() self.actor.zero_grad() policy_loss = -self.critic([ to_tensor(state_batch), self.actor(to_tensor(state_batch)) ]) policy_loss = policy_loss.mean() policy_loss.backward() if self.clip_actor_grad is not None: torch.nn.utils.clip_grad_norm(self.actor.parameters(), float(self.clip_actor_grad)) if self.writer != None: mean_policy_grad = np.array(np.mean([np.linalg.norm(p.grad.data.cpu().numpy().ravel()) for p in self.actor.parameters()])) #print(mean_policy_grad) self.writer.add_scalar('train/mean_policy_grad', mean_policy_grad, self.select_time) if train_actor: self.actor_optim.step() # Target update soft_update(self.actor_target, self.actor, self.tau) soft_update(self.critic_target, self.critic, self.tau) return -policy_loss, value_loss def eval(self): self.actor.eval() self.actor_target.eval() self.critic.eval() self.critic_target.eval() def train(self): self.actor.train() self.actor_target.train() self.critic.train() self.critic_target.train() def cuda(self): self.actor.cuda() self.actor_target.cuda() self.critic.cuda() self.critic_target.cuda() def observe(self, r_t, s_t1, done): self.memory.append([self.s_t, self.a_t, r_t, s_t1, done]) self.s_t = s_t1 def random_action(self): action = np.random.uniform(-1.,1.,self.nb_actions) self.a_t = action return action def select_action(self, s_t, decay_epsilon=True, return_fix=False, noise_level=0): self.eval() # print(s_t.shape) action = to_numpy( self.actor(to_tensor(np.array([s_t]))) ).squeeze(0) self.train() noise_level = noise_level * max(self.epsilon, 0) action = action * (1 - noise_level) + (self.random_process.sample() * noise_level) action = np.clip(action, -1., 1.) if decay_epsilon: self.epsilon -= self.depsilon self.a_t = action return action def reset(self, obs): self.s_t = obs self.random_process.reset_status() def load_weights(self, output, num=1): if output is None: return self.actor.load_state_dict( torch.load('{}/actor{}.pkl'.format(output, num)) ) self.actor_target.load_state_dict( torch.load('{}/actor{}.pkl'.format(output, num)) ) self.critic.load_state_dict( torch.load('{}/critic{}.pkl'.format(output, num)) ) self.critic_target.load_state_dict( torch.load('{}/critic{}.pkl'.format(output, num)) ) def save_model(self, output, num): if self.use_cuda: self.actor.cpu() self.critic.cpu() torch.save( self.actor.state_dict(), '{}/actor{}.pkl'.format(output, num) ) torch.save( self.critic.state_dict(), '{}/critic{}.pkl'.format(output, num) ) if self.use_cuda: self.actor.cuda() self.critic.cuda()
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) dataset = ImageFolder(opt.data_path, transform=transform) dataloader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, drop_last=True) netd = NetD(opt) netg = NetG(opt) if opt.netd_path: netd.load_state_dict(torch.load(opt.netd_path, map_location=lambda storage, loc: storage)) if opt.netg_path: netg.load_state_dict(torch.load(opt.netg_path, map_location=lambda storage, loc: storage)) optimizer_g = Adam(netg.parameters(), opt.lr1, betas=(opt.beta1, 0.999)) optimizer_d = Adam(netd.parameters(), opt.lr2, betas=(opt.beta1, 0.999)) criterion = nn.BCELoss() true_labels = Variable(torch.ones(opt.batch_size)) fake_labels = Variable(torch.zeros(opt.batch_size)) fix_noises = Variable(torch.randn(opt.batch_size, opt.nz, 1, 1)) noises = Variable(torch.randn(opt.batch_size, opt.nz, 1, 1)) if opt.use_gpu: netd.cuda() netg.cuda() criterion.cuda() true_labels, fake_labels = true_labels.cuda(), fake_labels.cuda() fix_noises, noises = fix_noises.cuda(), noises.cuda()
def main(stdscr): # about synthetic data anomaly_ratio = args.anomaly_ratio anomaly_type = args.anomaly_type # about chunk filter filtering = args.filtering n_neighbors = args.n_neighbors iqr_multiplier = args.iqr_multiplier filter_size = args.filter_size normalization = args.normalization # about autoencoder model_name = args.model_name lr = args.lr batch_size = args.batch_size epoch = args.epoch patience = args.patience retrain = args.retrain time_step = args.time_step gamma = args.gamma random_state = 42 data_len = 300000 num_feature = 25 t_idx = int(data_len * 0.3) v_idx = int(data_len * 0.5) data_name = \ f'{anomaly_type}_ratio_{str(anomaly_ratio)[2:]:<03s}' model_name += f'_{data_name}_filtering_{filtering}' torch.manual_seed(random_state) torch.cuda.manual_seed(random_state) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") vis = Custom_Vis() # Generate synthetic data and split train, valid and test set generator = Generator(random_state=random_state) generator.generate(shape=(data_len, num_feature), anomaly_ratio=anomaly_ratio, split_ratio=[0.3, 0.2, 0.5], min_feature_ratio=0.2, max_feature_ratio=0.5, min_len=20, max_len=200, anomaly_type=anomaly_type) data = generator.data.copy() label = generator.label.copy() ano_set = generator.ano_set ano_feature = generator.ano_features train_valid_data, train_valid_label = \ data[:v_idx], label[:v_idx] test_data, test_label = data[v_idx:], label[v_idx:] transformed_data = MinMaxScaler().fit_transform(train_valid_data) chunk_data = seq2chunk(data=transformed_data, win_size=filter_size) # apply filtering or not if filtering: model_name += f'_iqr_multiplier{iqr_multiplier}' start = time.time() mts_filter = Chunk_Filter(data=chunk_data) filter_pred = mts_filter.fit(matrix_type='nng', n_neighbors=n_neighbors, iqr_multiplier=iqr_multiplier, normalization=normalization) # print filtering result acc, recall, precision, f1 = \ mts_filter.get_metric(train_valid_label) filter_result = { 'accuracy': acc, 'recall': recall, 'precision': precision, 'f1': f1, 'time': time.time() - start } vis.print_params(env=model_name, params=filter_result, title='Filtering Result', clear=True) vis.lof_score(env=model_name, lof_score=mts_filter.lof_score) clear = False else: acc, recall, precision, f1 = None, None, None, None iqr_multiplier = 0 filter_pred = np.zeros(shape=train_valid_data.shape[0]) clear = True # initialize curses module curses.use_default_colors() curses.init_pair(1, curses.COLOR_WHITE, curses.COLOR_BLACK) curses.init_pair(2, curses.COLOR_GREEN, curses.COLOR_BLACK) curses.init_pair(3, curses.COLOR_YELLOW, curses.COLOR_BLACK) curses.init_pair(4, curses.COLOR_RED, curses.COLOR_BLACK) stdscr = curses.initscr() stdscr.addstr(0, 0, f'{"-"*10}Data and Filtering Information{"-"*10}', curses.color_pair(4) | curses.A_BOLD) stdscr.addstr(1, 0, f'data: {data_name}', curses.color_pair(3) | curses.A_BOLD) stdscr.addstr(2, 0, f'seed: {random_state}', curses.color_pair(3) | curses.A_BOLD) stdscr.addstr(3, 0, f'model: {model_name}', curses.color_pair(3) | curses.A_BOLD) stdscr.addstr(4, 0, f'filtering accuracy: {acc}', curses.color_pair(3) | curses.A_BOLD) stdscr.addstr(5, 0, f'filtering recall: {recall}', curses.color_pair(3) | curses.A_BOLD) stdscr.addstr(6, 0, f'filtering precision: {precision}', curses.color_pair(3) | curses.A_BOLD) stdscr.addstr(7, 0, f'filtering f1: {f1}', curses.color_pair(3) | curses.A_BOLD) stdscr.addstr(7, 0, f'filtering f1: {f1}', curses.color_pair(3) | curses.A_BOLD) stdscr.addstr(8, 0, f'{"-"*10}Model Training Information{"-"*10}', curses.color_pair(4) | curses.A_BOLD) train_data, train_label = \ train_valid_data[:t_idx], train_valid_label[:t_idx] valid_data, valid_label = \ train_valid_data[t_idx:], train_valid_label[t_idx:] train_pred = filter_pred[:t_idx] valid_pred = filter_pred[t_idx:v_idx] # normalize train data scaler = MinMaxScaler().fit(train_data[train_pred == 0]) train_data = scaler.transform(train_data) valid_data = scaler.transform(valid_data) test_data = scaler.transform(test_data) print_params = { 'data length': data_len, 'anomaly ratio': anomaly_ratio, 'anomaly type': anomaly_type, '# of anomaly in train set': int(train_label.sum()), '# of anomaly in validation set': int(valid_label.sum()), '# of anomaly in test set': int(test_label.sum()), '# of filtered in trainset': int(train_data[train_pred == 1].shape[0]), '# of filtered in validset': int(valid_data[valid_pred == 1].shape[0]), 'filtering': filtering, 'n_neighbors': n_neighbors, 'train ratio': 0.3, 'valid ratio': 0.2, 'test ratio': 0.5 } vis.print_params(env=model_name, params=print_params, title='Dataset Info', clear=clear) vis.data_plot(env=f'{data_name}', data=data, ano_set=generator.ano_set, ano_features=generator.ano_features, clear=True) train_dataset = Dataset(data=train_data, filtering=filtering, y_pred=train_pred, time_step=time_step) valid_dataset = Dataset(data=valid_data, filtering=filtering, y_pred=valid_pred, time_step=time_step) valid_label = valid_dataset.get_label(valid_label) # # update epoch n_seq, n_f_seq = train_dataset.n_seq, train_dataset.n_f_seq epoch = update_epoch(epoch=epoch, n_seq=n_seq, n_f_seq=n_f_seq, batch_size=batch_size) train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, drop_last=True, pin_memory=True) valid_loader = DataLoader(dataset=valid_dataset, batch_size=batch_size, shuffle=False, pin_memory=True) if filtering: train_valid_data = np.concatenate((train_data, valid_data)) filtered_dataset = Dataset(data=train_valid_data, filtering=filtering, y_pred=(filter_pred == 0), time_step=1) filtered_loader = DataLoader(dataset=filtered_dataset, batch_size=batch_size, shuffle=False, pin_memory=True) model = Model(num_feature=num_feature).to(device) if torch.cuda.device_count() > 1: model = nn.DataParallel(model).to(device) checkpoint = Checkpoint(model_name=model_name) early_stopping = Early_stopping(patience=patience) parameters = list(model.parameters()) optimizer = Adam(parameters, lr=lr, weight_decay=1e-2) scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=gamma) training_time = 0 if retrain: model, optimizer = checkpoint.load_checkpoint(model, optimizer) checkpoint.load_log() e = checkpoint.epoch_list[-1] else: e = 0 while e < epoch: e += 1 batch_time = time.time() # train model at each epoch t_loss, t_loss_list, batch_list = train(model=model, train_loader=train_loader, optimizer=optimizer, epoch=e) v_loss, valid_score = valid(model=model, valid_loader=valid_loader) # apply scheduler scheduler.step() # record training time iter_time = time.time() - batch_time training_time += iter_time # print process stdscr.addstr(9, 0, f'Epoch: ', curses.color_pair(1) | curses.A_BOLD) stdscr.addstr(f'{e}/{epoch}', curses.color_pair(2) | curses.A_BOLD) stdscr.addstr(10, 0, f'train time: ', curses.color_pair(1) | curses.A_BOLD) stdscr.addstr(f'{int(training_time//60):2d}m {training_time%60:5.2f}s', curses.color_pair(2) | curses.A_BOLD) stdscr.addstr(11, 0, f'iteration time: ', curses.color_pair(1) | curses.A_BOLD) stdscr.addstr(f'{int(iter_time//60):2d}m {iter_time%60:5.2f}s', curses.color_pair(2) | curses.A_BOLD) stdscr.addstr(12, 0, f'train loss: ', curses.color_pair(1) | curses.A_BOLD) stdscr.addstr(f'{t_loss}', curses.color_pair(2) | curses.A_BOLD) stdscr.addstr(13, 0, f'valid_loss: ', curses.color_pair(1) | curses.A_BOLD) stdscr.addstr(f'{v_loss}', curses.color_pair(2) | curses.A_BOLD) # count for early stop and save log early_stop, is_best = early_stopping(score=v_loss, curses=curses, stdscr=stdscr, lower_best=True) checkpoint.save_log(batch_list=batch_list, epoch=e, train_loss_list_per_batch=t_loss_list, train_loss_per_epoch=t_loss, valid_loss=v_loss) checkpoint.save_checkpoint(model=model, optimizer=optimizer, is_best=is_best) stdscr.refresh() # get filtered score at each epoch if filtering: filtered_score = get_score(model=model, data_loader=filtered_loader) else: filtered_score = None # visualization for training process vis.print_training(env=model_name, EPOCH=epoch, epoch=e, training_time=training_time, iter_time=iter_time, avg_train_loss=t_loss, valid_loss=v_loss, patience=patience, counter=early_stopping.counter) vis.loss_plot(env=model_name, checkpoint=checkpoint) if anomaly_ratio > 0: vis.score_distribution(env=model_name, anomaly_label=valid_label, anomaly_score=valid_score, filtered_score=filtered_score) vis.ROC_curve(env=model_name, anomaly_label=valid_label, anomaly_score=valid_score) if early_stop: break test_dataset = Dataset(data=test_data, filtering=False, y_pred=None, time_step=1) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, pin_memory=True) test_label = test_dataset.get_label(test_label) model = checkpoint.load_model(model) anomaly_score = get_score(model=model, data_loader=test_loader) if filtering: filtered_score = get_score(model=model, data_loader=filtered_loader) else: filtered_score = None checkpoint.save_anomaly_score(anomaly_label=test_label, anomaly_score=anomaly_score, filtered_score=filtered_score) vis.score_distribution(env=model_name, anomaly_label=test_label, anomaly_score=anomaly_score, filtered_score=filtered_score) auroc = vis.ROC_curve(env=model_name, anomaly_label=test_label, anomaly_score=anomaly_score) with open(f'./{args.model_name}.csv', 'a') as f: f.write(f'{random_state},{anomaly_type},{anomaly_ratio}, ' f'{iqr_multiplier},{model_name.split("_")[0]},{auroc}\n') print('-' * 50)
class WassersteinGanTrainer(Trainer): """ Args: network (nn.Module): the network to train latent_dimension (tuple): A tuple that defines the shape of the latent dimension (noise) that is the generator's input n_critic_iterations (int): The number of minibatches the critic sees for every minibatch the generator sees epochs: The total number of passes over the training set batch_size: The size of a minibatch preprocess_minibatch (function): function that takes the current epoch, and a minibatch, and mutates the minibatch kwargs_factory (callable): function that takes the current epoch and outputs args to pass to the generator and discriminator """ def __init__( self, network, latent_dimension, n_critic_iterations, epochs, batch_size, preprocess_minibatch=None, kwargs_factory=None, debug_gradient=False, checkpoint_epochs=1): super(WassersteinGanTrainer, self).__init__(epochs, batch_size) self.checkpoint_epochs = checkpoint_epochs self.debug_gradient = debug_gradient self.arg_maker = kwargs_factory self.preprocess = preprocess_minibatch self.n_critic_iterations = n_critic_iterations self.latent_dimension = latent_dimension self.network = network self.critic = network.discriminator self.generator = network.generator self.samples = None self.register_batch_complete_callback(self._log) self.generator_optim = None self.critic_optim = None def _log(self, *args, **kwargs): if kwargs['batch'] % 10: return msg = 'Epoch {epoch}, batch {batch}, generator {generator_score}, ' \ 'real {real_score}, critic {critic_loss}' print(msg.format(**kwargs)) def _minibatch(self, data): indices = np.random.randint(0, len(data), self.batch_size) return data[indices, ...] def _gradient_penalty(self, real_samples, fake_samples, kwargs): """ Compute the norm of the gradients for each sample in a batch, and penalize anything on either side of unit norm """ import torch from torch.autograd import Variable, grad real_samples = real_samples.view(fake_samples.shape) subset_size = real_samples.shape[0] real_samples = real_samples[:subset_size] fake_samples = fake_samples[:subset_size] alpha = torch.rand(subset_size) if self.use_cuda: alpha = alpha.cuda() alpha = alpha.view((-1,) + ((1,) * (real_samples.dim() - 1))) interpolates = alpha * real_samples + ((1 - alpha) * fake_samples) interpolates = Variable(interpolates, requires_grad=True) if self.use_cuda: interpolates = interpolates.cuda() d_output = self.critic(interpolates, **kwargs) grad_ouputs = torch.ones(d_output.size()) if self.use_cuda: grad_ouputs = grad_ouputs.cuda() gradients = grad( outputs=d_output, inputs=interpolates, grad_outputs=grad_ouputs, create_graph=True, retain_graph=True, only_inputs=True)[0] return ((gradients.norm(2, dim=1) - 1) ** 2).mean() * 10 def freeze_generator(self): for p in self.generator.parameters(): p.requires_grad = False def unfreeze_generator(self): for p in self.generator.parameters(): p.requires_grad = True def freeze_discriminator(self): for p in self.critic.parameters(): p.requires_grad = False def unfreeze_discriminator(self): for p in self.critic.parameters(): p.requires_grad = True def _debug_network_gradient(self, network): if not self.debug_gradient: return for n, p in network.named_parameters(): g = p.grad if g is not None: print((n, g.min().data[0], g.max().data[0], g.mean().data[0])) def zero_generator_gradients(self): self._debug_network_gradient(self.generator) self.generator.zero_grad() def zero_discriminator_gradients(self): self._debug_network_gradient(self.critic) self.critic.zero_grad() def _init_optimizers(self): if self.generator_optim is None or self.critic_optim is None: from torch.optim import Adam trainable_generator_params = ( p for p in self.generator.parameters() if p.requires_grad) trainable_critic_params = ( p for p in self.critic.parameters() if p.requires_grad) self.generator_optim = Adam( trainable_generator_params, lr=0.0001, betas=(0, 0.9)) self.critic_optim = Adam( trainable_critic_params, lr=0.0001, betas=(0, 0.9)) def _cuda(self, device=None): self.generator = self.generator.cuda() self.critic = self.critic.cuda() def train(self, data): self.network.train() self.unfreeze_discriminator() self.unfreeze_generator() data = data.astype(np.float32) noise_shape = (self.batch_size,) + self.latent_dimension noise = self._tensor(noise_shape) self._init_optimizers() start = self._current_epoch stop = self._current_epoch + self.checkpoint_epochs for epoch in range(start, stop): if epoch >= self.epochs: break if self.arg_maker: kwargs = self.arg_maker(epoch) else: kwargs = dict() for i in range(0, len(data), self.batch_size): self.zero_generator_gradients() self.zero_discriminator_gradients() self.freeze_generator() self.unfreeze_discriminator() for c in range(self.n_critic_iterations): self.zero_discriminator_gradients() input_v = self._variable(self._minibatch(data)) if self.preprocess: input_v = self.preprocess(epoch, input_v) d_real = self.critic.forward(input_v, **kwargs) # train discriminator on fake data noise.normal_(0, 1) noise_v = Variable(noise, volatile=True) fake = Variable( self.generator.forward(noise_v, **kwargs).data) if self.preprocess: fake = self.preprocess(epoch, fake) d_fake = self.critic.forward(fake, **kwargs) real_mean = torch.mean(d_real) fake_mean = torch.mean(d_fake) gp = self._gradient_penalty(input_v.data, fake.data, kwargs) d_loss = (fake_mean - real_mean) + gp d_loss.backward() self.critic_optim.step() self.zero_discriminator_gradients() self.zero_generator_gradients() self.unfreeze_generator() self.freeze_discriminator() # train generator noise.normal_(0, 1) noise_v = Variable(noise) fake = self.generator.forward(noise_v, **kwargs) if self.preprocess: fake = self.preprocess(epoch, fake) self.samples = fake d_fake = self.critic.forward(fake, **kwargs) g_loss = -torch.mean(d_fake) g_loss.backward() self.generator_optim.step() gl = g_loss.data.item() dl = d_loss.data.item() rl = real_mean.data.item() self.on_batch_complete( epoch=epoch, batch=i, generator_score=gl, real_score=rl, critic_loss=dl, samples=self.samples, network=self.network) self._current_epoch += 1 return self.network
config.dev_path, 'test': config.test_path }) return bundle data_bundle = cache() print(data_bundle) model = Model(data_bundle.get_vocab(Const.INPUTS(0)), config) print(model) loss = SoftmaxLoss() metric = CRMetric() optim = Adam(model.parameters(), lr=config.lr) lr_decay_callback = LRCallback(optim.param_groups, config.lr_decay) trainer = Trainer( model=model, train_data=data_bundle.datasets["train"], dev_data=data_bundle.datasets["dev"], loss=loss, metrics=metric, check_code_level=-1, sampler=None, batch_size=1, device=torch.device("cuda:" + config.cuda) if torch.cuda.is_available() else None, metric_key='f',
class DQN_Distribution_Agent: def __init__(self, args, exp_model, logging_func): self.args = args # Exploration Model self.exp_model = exp_model self.log = logging_func["log"] # Experience Replay self.replay = ExpReplay(args.exp_replay_size, args.stale_limit, exp_model, args, priority=self.args.prioritized) # DQN and Target DQN model = get_models(args.model) self.dqn = model(actions=args.actions, atoms=args.atoms) self.target_dqn = model(actions=args.actions, atoms=args.atoms) dqn_params = 0 for weight in self.dqn.parameters(): weight_params = 1 for s in weight.size(): weight_params *= s dqn_params += weight_params print("Distrib DQN has {:,} parameters.".format(dqn_params)) self.target_dqn.eval() if args.gpu: print("Moving models to GPU.") self.dqn.cuda() self.target_dqn.cuda() # Optimizer self.optimizer = Adam(self.dqn.parameters(), lr=args.lr) # self.optimizer = RMSprop(self.dqn.parameters(), lr=args.lr) self.T = 0 self.target_sync_T = -self.args.t_max def sync_target_network(self): for target, source in zip(self.target_dqn.parameters(), self.dqn.parameters()): target.data = source.data def act(self, state, epsilon, exp_model, evaluation=False): # self.T += 1 self.dqn.eval() orig_state = state[:, :, -1:] state = torch.from_numpy(state).float().transpose_(0, 2).unsqueeze(0) q_values_distributions = self.dqn(Variable(state, volatile=True)).cpu().data[0] # TODO: Log Q-Value distributions # print(q_values_distributions) values = torch.linspace(self.args.v_min, self.args.v_max, steps=self.args.atoms) values = values.view(1, self.args.atoms) values = values.expand(self.args.actions, self.args.atoms) # print(values, q_values_distributions, torch.sum(q_values_distributions * values, dim=1)) q_value_expectations = torch.sum(q_values_distributions * values, dim=1) q_values_numpy = q_value_expectations.numpy() extra_info = {} if self.args.optimistic_init and not evaluation: raise NotImplementedError q_values_pre_bonus = np.copy(q_values_numpy) if not self.args.ucb: for a in range(self.args.actions): _, info = exp_model.bonus(orig_state, a, dont_remember=True) action_pseudo_count = info["Pseudo_Count"] # TODO: Log the optimism bonuses optimism_bonus = self.args.optimistic_scaler / np.sqrt(action_pseudo_count + 0.01) self.log("Bandit/Action_{}".format(a), optimism_bonus, step=self.T) q_values[a] += optimism_bonus else: action_counts = [] for a in range(self.args.actions): _, info = exp_model.bonus(orig_state, a, dont_remember=True) action_pseudo_count = info["Pseudo_Count"] action_counts.append(action_pseudo_count) total_count = sum(action_counts) for ai, a in enumerate(action_counts): # TODO: Log the optimism bonuses optimisim_bonus = self.args.optimistic_scaler * np.sqrt(2 * np.log(max(1, total_count)) / (a + 0.01)) self.log("Bandit/UCB/Action_{}".format(ai), optimisim_bonus, step=self.T) q_values[ai] += optimisim_bonus extra_info["Action_Bonus"] = q_values_numpy - q_values_pre_bonus extra_info["Q_Values"] = q_values_numpy if np.random.random() < epsilon: action = np.random.randint(low=0, high=self.args.actions) else: action = int(np.argmax(q_values_numpy)) # action = q_values.max(0)[1][0] # Torch... extra_info["Action"] = action return action, extra_info def experience(self, state, action, reward, state_next, steps, terminated, pseudo_reward=0, density=1, exploring=False): if not exploring: self.T += 1 self.replay.Add_Exp(state, action, reward, state_next, steps, terminated, pseudo_reward, density) def end_of_trajectory(self): self.replay.end_of_trajectory() def train(self): if self.T - self.target_sync_T > self.args.target: self.sync_target_network() self.target_sync_T = self.T info = {} for _ in range(self.args.iters): self.dqn.eval() batch, indices, is_weights = self.replay.Sample_N(self.args.batch_size, self.args.n_step, self.args.gamma) columns = list(zip(*batch)) states = Variable(torch.from_numpy(np.array(columns[0])).float().transpose_(1, 3)) actions = Variable(torch.LongTensor(columns[1])) terminal_states = Variable(torch.FloatTensor(columns[5])) rewards = Variable(torch.FloatTensor(columns[2])) # Have to clip rewards for DQN rewards = torch.clamp(rewards, -1, 1) steps = Variable(torch.FloatTensor(columns[4])) new_states = Variable(torch.from_numpy(np.array(columns[3])).float().transpose_(1, 3)) target_dqn_qvals = self.target_dqn(new_states).cpu() # Make a new variable with those values so that these are treated as constants target_dqn_qvals_data = Variable(target_dqn_qvals.data) q_value_gammas = (Variable(torch.ones(terminal_states.size()[0])) - terminal_states) inter = Variable(torch.ones(terminal_states.size()[0]) * self.args.gamma) # print(steps) q_value_gammas = q_value_gammas * torch.pow(inter, steps) values = torch.linspace(self.args.v_min, self.args.v_max, steps=self.args.atoms) values = Variable(values) values = values.view(1, 1, self.args.atoms) values = values.expand(self.args.batch_size, self.args.actions, self.args.atoms) # print(values) q_value_gammas = q_value_gammas.view(self.args.batch_size, 1, 1) q_value_gammas = q_value_gammas.expand(self.args.batch_size, self.args.actions, self.args.atoms) # print(q_value_gammas) gamma_values = q_value_gammas * values # print(gamma_values) rewards = rewards.view(self.args.batch_size, 1, 1) rewards = rewards.expand(self.args.batch_size, self.args.actions, self.args.atoms) # print(rewards) operator_q_values = rewards + gamma_values # print(operator_q_values) clipped_operator_q_values = torch.clamp(operator_q_values, self.args.v_min, self.args.v_max) delta_z = (self.args.v_max - self.args.v_min) / (self.args.atoms - 1) # Using the notation from the categorical paper b_j = (clipped_operator_q_values - self.args.v_min) / delta_z # print(b_j) lower_bounds = torch.floor(b_j) upper_bounds = torch.ceil(b_j) # Work out the max action atom_values = Variable(torch.linspace(self.args.v_min, self.args.v_max, steps=self.args.atoms)) atom_values = atom_values.view(1, 1, self.args.atoms) atom_values = atom_values.expand(self.args.batch_size, self.args.actions, self.args.atoms) # Sum over the atoms dimension target_expected_qvalues = torch.sum(target_dqn_qvals_data * atom_values, dim=2) # Get the maximum actions index across the batch size max_actions = target_expected_qvalues.max(dim=1)[1].view(-1) # Project back onto the original support for the max actions q_value_distribution_targets = torch.zeros(self.args.batch_size, self.args.atoms) # Distributions for the max actions # print(target_dqn_qvals_data, max_actions) q_value_max_actions_distribs = target_dqn_qvals_data.index_select(dim=1, index=max_actions)[:,0,:] # print(q_value_max_actions_distribs) # Lower_bounds_actions lower_bounds_actions = lower_bounds.index_select(dim=1, index=max_actions)[:,0,:] upper_bounds_actions = upper_bounds.index_select(dim=1, index=max_actions)[:,0,:] b_j_actions = b_j.index_select(dim=1, index=max_actions)[:,0,:] lower_bound_values_to_add = q_value_max_actions_distribs * (upper_bounds_actions - b_j_actions) upper_bound_values_to_add = q_value_max_actions_distribs * (b_j_actions - lower_bounds_actions) # print(lower_bounds_actions) # print(lower_bound_values_to_add) # Naive looping for b in range(self.args.batch_size): for l, pj in zip(lower_bounds_actions.data.type(torch.LongTensor)[b], lower_bound_values_to_add[b].data): q_value_distribution_targets[b][l] += pj for u, pj in zip(upper_bounds_actions.data.type(torch.LongTensor)[b], upper_bound_values_to_add[b].data): q_value_distribution_targets[b][u] += pj self.dqn.train() if self.args.gpu: actions = actions.cuda() # q_value_targets = q_value_targets.cuda() q_value_distribution_targets = q_value_distribution_targets.cuda() model_predictions = self.dqn(states).index_select(1, actions.view(-1))[:,0,:] q_value_distribution_targets = Variable(q_value_distribution_targets) # print(q_value_distribution_targets) # print(model_predictions) # Cross entropy loss ce_loss = -torch.sum(q_value_distribution_targets * torch.log(model_predictions), dim=1) ce_batch_loss = ce_loss.mean() info = {} self.log("DQN/X_Entropy_Loss", ce_batch_loss.data[0], step=self.T) # Update self.optimizer.zero_grad() ce_batch_loss.backward() # Taken from pytorch clip_grad_norm # Remove once the pip version it up to date with source gradient_norm = clip_grad_norm(self.dqn.parameters(), self.args.clip_value) if gradient_norm is not None: info["Norm"] = gradient_norm self.optimizer.step() if "States" in info: states_trained = info["States"] info["States"] = states_trained + columns[0] else: info["States"] = columns[0] # Pad out the states to be of size batch_size if len(info["States"]) < self.args.batch_size: old_states = info["States"] new_states = old_states[0] * (self.args.batch_size - len(old_states)) info["States"] = new_states return info
def train( hyp, # path/to/hyp.yaml or hyp dictionary opt, device, ): save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, = \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \ opt.resume, opt.noval, opt.nosave, opt.workers # Directories w = save_dir / 'weights' # weights dir w.mkdir(parents=True, exist_ok=True) # make dir last, best, results_file = w / 'last.pt', w / 'best.pt', save_dir / 'results.txt' # Hyperparameters if isinstance(hyp, str): with open(hyp) as f: hyp = yaml.safe_load(f) # load hyps dict LOGGER.info( colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items())) # Save run settings with open(save_dir / 'hyp.yaml', 'w') as f: yaml.safe_dump(hyp, f, sort_keys=False) with open(save_dir / 'opt.yaml', 'w') as f: yaml.safe_dump(vars(opt), f, sort_keys=False) # Configure plots = not evolve # create plots cuda = device.type != 'cpu' init_seeds(1 + RANK) with open(data) as f: data_dict = yaml.safe_load(f) # data dict # Loggers loggers = {'wandb': None, 'tb': None} # loggers dict if RANK in [-1, 0]: # TensorBoard if plots: prefix = colorstr('tensorboard: ') LOGGER.info( f"{prefix}Start with 'tensorboard --logdir {opt.project}', view at http://localhost:6006/" ) loggers['tb'] = SummaryWriter(str(save_dir)) # W&B opt.hyp = hyp # add hyperparameters run_id = torch.load(weights).get('wandb_id') if weights.endswith( '.pt') and os.path.isfile(weights) else None run_id = run_id if opt.resume else None # start fresh run if transfer learning wandb_logger = WandbLogger(opt, save_dir.stem, run_id, data_dict) loggers['wandb'] = wandb_logger.wandb if loggers['wandb']: data_dict = wandb_logger.data_dict weights, epochs, hyp = opt.weights, opt.epochs, opt.hyp # may update values if resuming nc = 1 if single_cls else int(data_dict['nc']) # number of classes names = ['item'] if single_cls and len( data_dict['names']) != 1 else data_dict['names'] # class names assert len( names ) == nc, f'{len(names)} names found for nc={nc} dataset in {data}' # check is_coco = data.endswith('coco.yaml') and nc == 80 # COCO dataset # Model pretrained = weights.endswith('.pt') if pretrained: with torch_distributed_zero_first(RANK): weights = attempt_download( weights) # download if not found locally ckpt = torch.load(weights, map_location=device) # load checkpoint model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create exclude = [ 'anchor' ] if (cfg or hyp.get('anchors')) and not resume else [] # exclude keys csd = ckpt['model'].float().state_dict( ) # checkpoint state_dict as FP32 csd = intersect_dicts(csd, model.state_dict(), exclude=exclude) # intersect model.load_state_dict(csd, strict=False) # load LOGGER.info( f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}' ) # report else: model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create with torch_distributed_zero_first(RANK): check_dataset(data_dict) # check train_path, val_path = data_dict['train'], data_dict['val'] # Freeze freeze = [] # parameter names to freeze (full or partial) for k, v in model.named_parameters(): v.requires_grad = True # train all layers if any(x in k for x in freeze): print(f'freezing {k}') v.requires_grad = False # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}") g0, g1, g2 = [], [], [] # optimizer parameter groups for v in model.modules(): if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): # bias g2.append(v.bias) if isinstance(v, nn.BatchNorm2d): # weight with decay g0.append(v.weight) elif hasattr(v, 'weight') and isinstance( v.weight, nn.Parameter): # weight without decay g1.append(v.weight) if opt.adam: optimizer = Adam(g0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum else: optimizer = SGD(g0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) optimizer.add_param_group({ 'params': g1, 'weight_decay': hyp['weight_decay'] }) # add g1 with weight_decay optimizer.add_param_group({'params': g2}) # add g2 (biases) LOGGER.info( f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups " f"{len(g0)} weight, {len(g1)} weight (no decay), {len(g2)} bias") del g0, g1, g2 # Scheduler if opt.linear_lr: lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp[ 'lrf'] # linear else: lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf'] scheduler = lr_scheduler.LambdaLR( optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) # EMA ema = ModelEMA(model) if RANK in [-1, 0] else None # Resume start_epoch, best_fitness = 0, 0.0 if pretrained: # Optimizer if ckpt['optimizer'] is not None: optimizer.load_state_dict(ckpt['optimizer']) best_fitness = ckpt['best_fitness'] # EMA if ema and ckpt.get('ema'): ema.ema.load_state_dict(ckpt['ema'].float().state_dict()) ema.updates = ckpt['updates'] # Results if ckpt.get('training_results') is not None: results_file.write_text( ckpt['training_results']) # write results.txt # Epochs start_epoch = ckpt['epoch'] + 1 if resume: assert start_epoch > 0, f'{weights} training to {epochs} epochs is finished, nothing to resume.' if epochs < start_epoch: LOGGER.info( f"{weights} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {epochs} more epochs." ) epochs += ckpt['epoch'] # finetune additional epochs del ckpt, csd # Image sizes gs = max(int(model.stride.max()), 32) # grid size (max stride) nl = model.model[ -1].nl # number of detection layers (used for scaling hyp['obj']) imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple # DP mode if cuda and RANK == -1 and torch.cuda.device_count() > 1: logging.warning( 'DP not recommended, instead use torch.distributed.run for best DDP Multi-GPU results.\n' 'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.' ) model = torch.nn.DataParallel(model) # SyncBatchNorm if opt.sync_bn and cuda and RANK != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) LOGGER.info('Using SyncBatchNorm()') # Trainloader train_loader, dataset = create_dataloader(train_path, imgsz, batch_size // WORLD_SIZE, gs, single_cls, hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=RANK, workers=workers, image_weights=opt.image_weights, quad=opt.quad, prefix=colorstr('train: ')) mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class nb = len(train_loader) # number of batches assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}' # Process 0 if RANK in [-1, 0]: val_loader = create_dataloader(val_path, imgsz, batch_size // WORLD_SIZE * 2, gs, single_cls, hyp=hyp, cache=opt.cache_images and not noval, rect=True, rank=-1, workers=workers, pad=0.5, prefix=colorstr('val: '))[0] if not resume: labels = np.concatenate(dataset.labels, 0) # c = torch.tensor(labels[:, 0]) # classes # cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency # model._initialize_biases(cf.to(device)) if plots: plot_labels(labels, names, save_dir, loggers) # Anchors if not opt.noautoanchor: check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) model.half().float() # pre-reduce anchor precision # DDP mode if cuda and RANK != -1: model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK) # Model parameters hyp['box'] *= 3. / nl # scale to layers hyp['cls'] *= nc / 80. * 3. / nl # scale to classes and layers hyp['obj'] *= (imgsz / 640)**2 * 3. / nl # scale to image size and layers hyp['label_smoothing'] = opt.label_smoothing model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.class_weights = labels_to_class_weights( dataset.labels, nc).to(device) * nc # attach class weights model.names = names # Start training t0 = time.time() nw = max(round(hyp['warmup_epochs'] * nb), 1000) # number of warmup iterations, max(3 epochs, 1k iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training last_opt_step = -1 maps = np.zeros(nc) # mAP per class results = (0, 0, 0, 0, 0, 0, 0 ) # P, R, [email protected], [email protected], val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move scaler = amp.GradScaler(enabled=cuda) compute_loss = ComputeLoss(model) # init loss class LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n' f'Using {train_loader.num_workers} dataloader workers\n' f'Logging results to {save_dir}\n' f'Starting training for {epochs} epochs...') for epoch in range( start_epoch, epochs ): # epoch ------------------------------------------------------------------ model.train() # Update image weights (optional) if opt.image_weights: # Generate indices if RANK in [-1, 0]: cw = model.class_weights.cpu().numpy() * ( 1 - maps)**2 / nc # class weights iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights dataset.indices = random.choices( range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx # Broadcast if DDP if RANK != -1: indices = (torch.tensor(dataset.indices) if RANK == 0 else torch.zeros(dataset.n)).int() dist.broadcast(indices, 0) if RANK != 0: dataset.indices = indices.cpu().numpy() # Update mosaic border # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders mloss = torch.zeros(4, device=device) # mean losses if RANK != -1: train_loader.sampler.set_epoch(epoch) pbar = enumerate(train_loader) LOGGER.info( ('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'total', 'labels', 'img_size')) if RANK in [-1, 0]: pbar = tqdm(pbar, total=nb) # progress bar optimizer.zero_grad() for i, ( imgs, targets, paths, _ ) in pbar: # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float( ) / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: xi = [0, nw] # x interp # compute_loss.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) accumulate = max( 1, np.interp(ni, xi, [1, nbs / batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp(ni, xi, [ hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch) ]) if 'momentum' in x: x['momentum'] = np.interp( ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) # Multi-scale if opt.multi_scale: sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:] ] # new shape (stretched to gs-multiple) imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Forward with amp.autocast(enabled=cuda): pred = model(imgs) # forward loss, loss_items = compute_loss( pred, targets.to(device)) # loss scaled by batch_size if RANK != -1: loss *= WORLD_SIZE # gradient averaged between devices in DDP mode if opt.quad: loss *= 4. # Backward scaler.scale(loss).backward() # Optimize if ni - last_opt_step >= accumulate: scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() if ema: ema.update(model) last_opt_step = ni # Print if RANK in [-1, 0]: mloss = (mloss * i + loss_items) / (i + 1 ) # update mean losses mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB) s = ('%10s' * 2 + '%10.4g' * 6) % (f'{epoch}/{epochs - 1}', mem, *mloss, targets.shape[0], imgs.shape[-1]) pbar.set_description(s) # Plot if plots and ni < 3: f = save_dir / f'train_batch{ni}.jpg' # filename Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start() if loggers['tb'] and ni == 0: # TensorBoard with warnings.catch_warnings(): warnings.simplefilter( 'ignore') # suppress jit trace warning loggers['tb'].add_graph( torch.jit.trace(de_parallel(model), imgs[0:1], strict=False), []) elif plots and ni == 10 and loggers['wandb']: wandb_logger.log({ 'Mosaics': [ loggers['wandb'].Image(str(x), caption=x.name) for x in save_dir.glob('train*.jpg') if x.exists() ] }) # end batch ------------------------------------------------------------------------------------------------ # Scheduler lr = [x['lr'] for x in optimizer.param_groups] # for loggers scheduler.step() # DDP process 0 or single-GPU if RANK in [-1, 0]: # mAP ema.update_attr(model, include=[ 'yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights' ]) final_epoch = epoch + 1 == epochs if not noval or final_epoch: # Calculate mAP wandb_logger.current_epoch = epoch + 1 results, maps, _ = val.run(data_dict, batch_size=batch_size // WORLD_SIZE * 2, imgsz=imgsz, model=ema.ema, single_cls=single_cls, dataloader=val_loader, save_dir=save_dir, save_json=is_coco and final_epoch, verbose=nc < 50 and final_epoch, plots=plots and final_epoch, wandb_logger=wandb_logger, compute_loss=compute_loss) # Write with open(results_file, 'a') as f: f.write(s + '%10.4g' * 7 % results + '\n') # append metrics, val_loss # Log tags = [ 'train/box_loss', 'train/obj_loss', 'train/cls_loss', # train loss 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/box_loss', 'val/obj_loss', 'val/cls_loss', # val loss 'x/lr0', 'x/lr1', 'x/lr2' ] # params for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags): if loggers['tb']: loggers['tb'].add_scalar(tag, x, epoch) # TensorBoard if loggers['wandb']: wandb_logger.log({tag: x}) # W&B # Update best mAP fi = fitness(np.array(results).reshape( 1, -1)) # weighted combination of [P, R, [email protected], [email protected]] if fi > best_fitness: best_fitness = fi wandb_logger.end_epoch(best_result=best_fitness == fi) # Save model if (not nosave) or (final_epoch and not evolve): # if save ckpt = { 'epoch': epoch, 'best_fitness': best_fitness, 'training_results': results_file.read_text(), 'model': deepcopy(de_parallel(model)).half(), 'ema': deepcopy(ema.ema).half(), 'updates': ema.updates, 'optimizer': optimizer.state_dict(), 'wandb_id': wandb_logger.wandb_run.id if loggers['wandb'] else None } # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) if loggers['wandb']: if ((epoch + 1) % opt.save_period == 0 and not final_epoch) and opt.save_period != -1: wandb_logger.log_model(last.parent, opt, epoch, fi, best_model=best_fitness == fi) del ckpt # end epoch ---------------------------------------------------------------------------------------------------- # end training ----------------------------------------------------------------------------------------------------- if RANK in [-1, 0]: LOGGER.info( f'{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.\n' ) if plots: plot_results(save_dir=save_dir) # save as results.png if loggers['wandb']: files = [ 'results.png', 'confusion_matrix.png', *[f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R')] ] wandb_logger.log({ "Results": [ loggers['wandb'].Image(str(save_dir / f), caption=f) for f in files if (save_dir / f).exists() ] }) if not evolve: if is_coco: # COCO dataset for m in [last, best ] if best.exists() else [last]: # speed, mAP tests results, _, _ = val.run( data_dict, batch_size=batch_size // WORLD_SIZE * 2, imgsz=imgsz, model=attempt_load(m, device).half(), iou_thres= 0.7, # NMS IoU threshold for best pycocotools results single_cls=single_cls, dataloader=val_loader, save_dir=save_dir, save_json=True, plots=False) # Strip optimizers for f in last, best: if f.exists(): strip_optimizer(f) # strip optimizers if loggers['wandb']: # Log the stripped model loggers['wandb'].log_artifact( str(best if best.exists() else last), type='model', name='run_' + wandb_logger.wandb_run.id + '_model', aliases=['latest', 'best', 'stripped']) wandb_logger.finish_run() torch.cuda.empty_cache() return results
class DDPG(object): def __init__(self, gamma, tau, hidden_size, num_inputs, action_space): self.num_inputs = num_inputs self.action_space = action_space self.actor = Actor(hidden_size, self.num_inputs, self.action_space) self.actor_target = Actor(hidden_size, self.num_inputs, self.action_space) self.actor_perturbed = Actor(hidden_size, self.num_inputs, self.action_space) self.actor_optim = Adam(self.actor.parameters(), lr=1e-4) self.critic = Critic(hidden_size, self.num_inputs, self.action_space) self.critic_target = Critic(hidden_size, self.num_inputs, self.action_space) self.critic_optim = Adam(self.critic.parameters(), lr=1e-3) self.gamma = gamma self.tau = tau hard_update(self.actor_target, self.actor) # Make sure target is with the same weight hard_update(self.critic_target, self.critic) def select_action(self, state, action_noise=None, param_noise=None): self.actor.eval() if param_noise is not None: mu = self.actor_perturbed((Variable(state))) else: mu = self.actor((Variable(state))) self.actor.train() mu = mu.data if action_noise is not None: mu += torch.Tensor(action_noise.noise()) return mu.clamp(-1, 1) def update_parameters(self, batch): state_batch = Variable(torch.cat(batch.state)) action_batch = Variable(torch.cat(batch.action)) reward_batch = Variable(torch.cat(batch.reward)) mask_batch = Variable(torch.cat(batch.mask)) next_state_batch = Variable(torch.cat(batch.next_state)) next_action_batch = self.actor_target(next_state_batch) next_state_action_values = self.critic_target(next_state_batch, next_action_batch) reward_batch = reward_batch.unsqueeze(1) mask_batch = mask_batch.unsqueeze(1) expected_state_action_batch = reward_batch + (self.gamma * mask_batch * next_state_action_values) self.critic_optim.zero_grad() state_action_batch = self.critic((state_batch), (action_batch)) value_loss = F.mse_loss(state_action_batch, expected_state_action_batch) value_loss.backward() self.critic_optim.step() self.actor_optim.zero_grad() policy_loss = -self.critic((state_batch),self.actor((state_batch))) policy_loss = policy_loss.mean() policy_loss.backward() self.actor_optim.step() soft_update(self.actor_target, self.actor, self.tau) soft_update(self.critic_target, self.critic, self.tau) return value_loss.item(), policy_loss.item() def perturb_actor_parameters(self, param_noise): """Apply parameter noise to actor model, for exploration""" hard_update(self.actor_perturbed, self.actor) params = self.actor_perturbed.state_dict() for name in params: if 'ln' in name: pass param = params[name] param += torch.randn(param.shape) * param_noise.current_stddev def save_model(self, env_name, suffix="", actor_path=None, critic_path=None): if not os.path.exists('models/'): os.makedirs('models/') if actor_path is None: actor_path = "models/ddpg_actor_{}_{}".format(env_name, suffix) if critic_path is None: critic_path = "models/ddpg_critic_{}_{}".format(env_name, suffix) print('Saving models to {} and {}'.format(actor_path, critic_path)) torch.save(self.actor.state_dict(), actor_path) torch.save(self.critic.state_dict(), critic_path) def load_model(self, actor_path, critic_path): print('Loading models from {} and {}'.format(actor_path, critic_path)) if actor_path is not None: self.actor.load_state_dict(torch.load(actor_path)) if critic_path is not None: self.critic.load_state_dict(torch.load(critic_path))
def experiment(exp_specs): ptu.set_gpu_mode(exp_specs['use_gpu']) # Set up logging ---------------------------------------------------------- exp_id = exp_specs['exp_id'] exp_prefix = exp_specs['exp_name'] seed = exp_specs['seed'] set_seed(seed) setup_logger(exp_prefix=exp_prefix, exp_id=exp_id, variant=exp_specs) # Prep the data ----------------------------------------------------------- env_specs = { 'flat_repr': False, 'one_hot_repr': False, 'maze_h': 9, 'maze_w': 9, 'obs_h': 5, 'obs_w': 5, 'scale': 4, 'num_objs': 10 } maze_constructor = lambda: PartiallyObservedGrid(env_specs) data_loader = VerySpecificOnTheFLyDataLoader(maze_constructor, exp_specs['episode_length'], exp_specs['batch_size'], use_gpu=ptu.gpu_enabled()) val_data_loader = VerySpecificOnTheFLyDataLoader( maze_constructor, exp_specs['episode_length'], exp_specs['batch_size'], use_gpu=ptu.gpu_enabled()) # Model Definition -------------------------------------------------------- model = RecurrentModel() if ptu.gpu_enabled(): model.cuda() # Optimizer --------------------------------------------------------------- model_optim = Adam(model.parameters(), lr=float(exp_specs['model_lr']), weight_decay=float(exp_specs['model_wd'])) # ------------------------------------------------------------------------- freq_bptt = exp_specs['freq_bptt'] episode_length = exp_specs['episode_length'] losses = [] for iter_num in range(int(float(exp_specs['max_iters']))): if iter_num % freq_bptt == 0: if iter_num > 0: # loss = loss / freq_bptt loss.backward() model_optim.step() prev_h_batch = prev_h_batch.detach() prev_c_batch = prev_c_batch.detach() loss = 0 if iter_num % episode_length == 0: prev_h_batch = Variable( torch.zeros(exp_specs['batch_size'], model.lstm_dim)) prev_c_batch = Variable( torch.zeros(exp_specs['batch_size'], model.lstm_dim)) if ptu.gpu_enabled(): prev_h_batch = prev_h_batch.cuda() prev_c_batch = prev_c_batch.cuda() train_loss_print = '\t'.join(losses) losses = [] obs_batch, act_batch = data_loader.get_next_batch() recon, log_cov, prev_h_batch, prev_c_batch = model.forward( obs_batch, act_batch, prev_h_batch, prev_c_batch) losses.append('%.4f' % ((obs_batch - recon)**2).mean()) if iter_num % episode_length != 0: # temp = (obs_batch - recon)**2 / 4. # temp[:,:,1:4,1:4] = temp[:,:,1:4,1:4] * 4. temp = (obs_batch - recon)**2 loss = loss + temp.sum() / float( exp_specs['batch_size']) + model.reg_loss # loss = loss - compute_diag_log_prob(recon, log_cov, obs_batch)/float(exp_specs['batch_size']) if iter_num % (500 * episode_length) in range(2 * episode_length): save_pytorch_tensor_as_img( recon[0].data.cpu(), 'junk_vis/recurrent_deconv_stronger_2/rnn_recon_%d.png' % iter_num) save_pytorch_tensor_as_img( obs_batch[0].data.cpu(), 'junk_vis/recurrent_deconv_stronger_2/rnn_obs_%d.png' % iter_num) if iter_num % exp_specs['freq_val'] == 0: model.eval() # print(mask[0], torch.mean(mask, 1), torch.std(mask, 1), torch.min(mask, 1), torch.max(mask, 1)) print('\nValidating Iter %d...' % iter_num) val_prev_h_batch = Variable( torch.zeros(exp_specs['batch_size'], model.lstm_dim)) val_prev_c_batch = Variable( torch.zeros(exp_specs['batch_size'], model.lstm_dim)) if ptu.gpu_enabled(): val_prev_h_batch = val_prev_h_batch.cuda() val_prev_c_batch = val_prev_c_batch.cuda() losses = [] for i in range(episode_length): obs_batch, act_batch = val_data_loader.get_next_batch() recon, log_cov, val_prev_h_batch, val_prev_c_batch = model.forward( obs_batch, act_batch, val_prev_h_batch, val_prev_c_batch) # val_loss = compute_diag_log_prob(recon, log_cov, obs_batch)/float(exp_specs['batch_size']) losses.append('%.4f' % ((obs_batch - recon)**2).mean()) loss_print = '\t'.join(losses) print('Val MSE:\t' + loss_print) print('Train MSE:\t' + train_loss_print) model.train()
def _run(data_processor, data_file_name, dataset, data_generator, num_batches, vocabulary_size, number_examples, context_size, num_noise_words, vec_dim, num_epochs, batch_size, lr, model_ver, vec_combine_method, save_all): ''' Averagely, the time consumption: max_generated_batches = 5 CPU: backward time: 600~650 ms sampling time: 1 ms forward time: 5~7 ms GPU: backward time: 3 ms sampling time: 72 ms forward time: 1~2 ms Should rewrite sampling to speed up on GPU DocTag2Vec on CPU: 121882 words/s, 8 workers processing one document time = 650~850 ms training on 173403030 raw words (68590824 effective words) took 646.2s, 106138 effective words/s Data Generation, the major bottleneck is still generation, maybe due to the lock: GPU (Desktop) generating batch time: 1200~2001 ms, (1508425387839, 1508425389840) transfer batch to Torch: 1 ms, (1508425389840, 1508425389841) #worker = 1: 300~600 words/s #worker = 8: 600~4000 words/s (around 2500 often) After changing to torch.sampler, getting worse, data-prepare time is not stable CPU (Mac) #worker = 8: generating batch time: 1200~1527 ms, (1508424953768, 1508424955295) transfer batch to Torch: 1 ms, (1508424955295, 1508424955296) Generating one example time: 2~5 ms, (1508458881118, 1508458881122) Generating one document time: 50~400 ms, (1508458881118, 1508458881122) Generating one batch time: 650~700 ms, (1508458880690, 1508458881122) After changing to torch.sampler Generating one example time: 4~7 ms Generating one batch time: 900~1200 ms ''' model = DistributedMemory( vec_dim, num_docs=len(dataset), num_words=vocabulary_size) cost_func = NegativeSampling() optimizer = Adam(params=model.parameters(), lr=lr) logger = logging.getLogger('root') if torch.cuda.is_available(): model.cuda() logger.info("Running on GPU - CUDA") else: logger.info("Running on CPU") logger.info("Dataset comprised of {:d} documents.".format(len(dataset))) logger.info("Vocabulary size is {:d}.\n".format(vocabulary_size)) logger.info("Training started.") best_loss = float_info.max prev_model_file_path = "" progbar = Progbar(num_batches, batch_size=batch_size, total_examples = number_examples) for epoch_i in range(num_epochs): epoch_start_time = time.time() loss = [] for batch_i in range(num_batches): start_time = current_milli_time() batch = next(data_generator) current_time = current_milli_time() print('data-prepare time: %d ms' % (round(current_time - start_time))) start_time = current_milli_time() x = model.forward( batch.context_ids, batch.doc_ids, batch.target_noise_ids) x = cost_func.forward(x) loss.append(x.data[0]) print('forward time: %d ms' % round(current_milli_time() - start_time)) start_time = current_milli_time() model.zero_grad() x.backward() optimizer.step() print('backward time: %d ms' % round(current_milli_time() - start_time)) progbar.update(epoch_i, batch_i, ) # _print_progress(epoch_i, batch_i, num_batches) # end of epoch loss = torch.mean(torch.FloatTensor(loss)) is_best_loss = loss < best_loss best_loss = min(loss, best_loss) progbar.update(epoch_i, batch_i, [('loss', loss), ('best_loss', best_loss)]) model_file_name = MODEL_NAME.format( data_file_name[:-4], model_ver, vec_combine_method, context_size, num_noise_words, vec_dim, batch_size, lr, epoch_i + 1, loss) model_file_path = join(MODELS_DIR, model_file_name) if not os.path.exists(MODELS_DIR): os.makedirs(MODELS_DIR) state = { 'epoch': epoch_i + 1, 'model_state_dict': model.state_dict(), 'best_loss': best_loss, 'optimizer_state_dict': optimizer.state_dict() } if save_all: torch.save(state, model_file_path) elif is_best_loss: try: remove(prev_model_file_path) except FileNotFoundError: pass torch.save(state, model_file_path) prev_model_file_path = model_file_path epoch_total_time = round(time.time() - epoch_start_time) logger.info(" ({:d}s) - loss: {:.4f}".format(epoch_total_time, loss))
def main(args): model = load_config(args.model) dataset = load_config(args.dataset) device = torch.device("cuda" if model["common"]["cuda"] else "cpu") if model["common"]["cuda"] and not torch.cuda.is_available(): sys.exit("Error: CUDA requested but not available") # if args.batch_size < 2: # sys.exit('Error: PSPNet requires more than one image for BatchNorm in Pyramid Pooling') os.makedirs(model["common"]["checkpoint"], exist_ok=True) num_classes = len(dataset["common"]["classes"]) net = UNet(num_classes) net = DataParallel(net) net = net.to(device) if model["common"]["cuda"]: torch.backends.cudnn.benchmark = True try: weight = torch.Tensor(dataset["weights"]["values"]) except KeyError: if model["opt"]["loss"] in ("CrossEntropy", "mIoU", "Focal"): sys.exit( "Error: The loss function used, need dataset weights values") optimizer = Adam(net.parameters(), lr=model["opt"]["lr"], weight_decay=model["opt"]["decay"]) resume = 0 if args.checkpoint: def map_location(storage, _): return storage.cuda() if model["common"]["cuda"] else storage.cpu() # https://github.com/pytorch/pytorch/issues/7178 chkpt = torch.load(args.checkpoint, map_location=map_location) net.load_state_dict(chkpt["state_dict"]) if args.resume: optimizer.load_state_dict(chkpt["optimizer"]) resume = chkpt["epoch"] if model["opt"]["loss"] == "CrossEntropy": criterion = CrossEntropyLoss2d(weight=weight).to(device) elif model["opt"]["loss"] == "mIoU": criterion = mIoULoss2d(weight=weight).to(device) elif model["opt"]["loss"] == "Focal": criterion = FocalLoss2d(weight=weight).to(device) elif model["opt"]["loss"] == "Lovasz": criterion = LovaszLoss2d().to(device) else: sys.exit("Error: Unknown [opt][loss] value !") train_loader, val_loader = get_dataset_loaders(model, dataset, args.workers) num_epochs = model["opt"]["epochs"] if resume >= num_epochs: sys.exit( "Error: Epoch {} set in {} already reached by the checkpoint provided" .format(num_epochs, args.model)) history = collections.defaultdict(list) log = Log(os.path.join(model["common"]["checkpoint"], "log")) log.log("--- Hyper Parameters on Dataset: {} ---".format( dataset["common"]["dataset"])) log.log("Batch Size:\t {}".format(model["common"]["batch_size"])) log.log("Image Size:\t {}".format(model["common"]["image_size"])) log.log("Learning Rate:\t {}".format(model["opt"]["lr"])) log.log("Weight Decay:\t {}".format(model["opt"]["decay"])) log.log("Loss function:\t {}".format(model["opt"]["loss"])) if "weight" in locals(): log.log("Weights :\t {}".format(dataset["weights"]["values"])) log.log("---") for epoch in range(resume, num_epochs): log.log("Epoch: {}/{}".format(epoch + 1, num_epochs)) train_hist = train(train_loader, num_classes, device, net, optimizer, criterion) log.log( "Train loss: {:.4f}, mIoU: {:.3f}, {} IoU: {:.3f}, MCC: {:.3f}". format( train_hist["loss"], train_hist["miou"], dataset["common"]["classes"][1], train_hist["fg_iou"], train_hist["mcc"], )) for k, v in train_hist.items(): history["train " + k].append(v) val_hist = validate(val_loader, num_classes, device, net, criterion) log.log( "Validate loss: {:.4f}, mIoU: {:.3f}, {} IoU: {:.3f}, MCC: {:.3f}". format(val_hist["loss"], val_hist["miou"], dataset["common"]["classes"][1], val_hist["fg_iou"], val_hist["mcc"])) for k, v in val_hist.items(): history["val " + k].append(v) visual = "history-{:05d}-of-{:05d}.png".format(epoch + 1, num_epochs) plot(os.path.join(model["common"]["checkpoint"], visual), history) checkpoint = "checkpoint-{:05d}-of-{:05d}.pth".format( epoch + 1, num_epochs) states = { "epoch": epoch + 1, "state_dict": net.state_dict(), "optimizer": optimizer.state_dict() } torch.save(states, os.path.join(model["common"]["checkpoint"], checkpoint))
class DDPG(object): def __init__(self, memory, nb_status, nb_actions, action_noise=None, gamma=0.99, tau=0.001, normalize_observations=True, batch_size=128, observation_range=(-5., 5.), action_range=(-1., 1.), actor_lr=1e-4, critic_lr=1e-3): self.nb_status = nb_status self.nb_actions = nb_actions self.action_range = action_range self.observation_range = observation_range self.normalize_observations = normalize_observations self.actor = Actor(self.nb_status, self.nb_actions) self.actor_target = Actor(self.nb_status, self.nb_actions) self.actor_optim = Adam(self.actor.parameters(), lr=actor_lr) self.critic = Critic(self.nb_status, self.nb_actions) self.critic_target = Critic(self.nb_status, self.nb_actions) self.critic_optim = Adam(self.critic.parameters(), lr=critic_lr) # Create replay buffer self.memory = memory # SequentialMemory(limit=args.rmsize, window_length=args.window_length) self.action_noise = action_noise # Hyper-parameters self.batch_size = batch_size self.tau = tau self.discount = gamma if self.normalize_observations: self.obs_rms = RunningMeanStd() else: self.obs_rms = None def pi(self, obs, apply_noise=True, compute_Q=True): obs = np.array([obs]) action = to_numpy(self.actor(to_tensor(obs))).squeeze(0) if compute_Q: q = self.critic([to_tensor(obs), to_tensor(action)]).cpu().data else: q = None if self.action_noise is not None and apply_noise: noise = self.action_noise() assert noise.shape == action.shape action += noise action = np.clip(action, self.action_range[0], self.action_range[1]) return action, q[0][0] def store_transition(self, obs0, action, reward, obs1, terminal1): self.memory.append(obs0, action, reward, obs1, terminal1) if self.normalize_observations: self.obs_rms.update(np.array([obs0])) def train(self): # Get a batch. batch = self.memory.sample(batch_size=self.batch_size) next_q_values = self.critic_target([ to_tensor(batch['obs1'], volatile=True), self.actor_target(to_tensor(batch['obs1'], volatile=True))]) next_q_values.volatile = False target_q_batch = to_tensor(batch['rewards']) + \ self.discount * to_tensor(1 - batch['terminals1'].astype('float32')) * next_q_values self.critic.zero_grad() q_batch = self.critic([to_tensor(batch['obs0']), to_tensor(batch['actions'])]) value_loss = criterion(q_batch, target_q_batch) value_loss.backward() self.critic_optim.step() self.actor.zero_grad() policy_loss = -self.critic([to_tensor(batch['obs0']), self.actor(to_tensor(batch['obs0']))]).mean() policy_loss.backward() self.actor_optim.step() # Target update soft_update(self.actor_target, self.actor, self.tau) soft_update(self.critic_target, self.critic, self.tau) return value_loss.cpu().data[0], policy_loss.cpu().data[0] def initialize(self): hard_update(self.actor_target, self.actor) # Make sure target is with the same weight hard_update(self.critic_target, self.critic) def update_target_net(self): soft_update(self.actor_target, self.actor, self.tau) soft_update(self.critic_target, self.critic, self.tau) def reset(self): if self.action_noise is not None: self.action_noise.reset() def cuda(self): self.actor.cuda() self.actor_target.cuda() self.critic.cuda() self.critic_target.cuda()
def train(self, dataset, style_img, save_model_dir, checkpoint_model_dir, epochs=2, batch_size=4, image_size=256, seed=42, arg_cuda=0, content_weight=1e5, style_weight=1e10, lr=1e-3, log_interval=500, checkpoint_interval=2000): # dataset - путь к набору данных для тренинга (каталог с картинками для тренинга внутри этого каталога) # style_img - изображение стиля # save_model_dir - имя каталога, где сохраняется файл с параметрами итоговой модели # checkpoint_model_dir - имя каталога для сохранения файлов с параметрами моделей в процессе обучения # epochs - количество эпох # batch_size - размер батча # image_size - размер картинки для контента и стиля (в функции выполняется трансформация изображений в соотутствии с image_size) # seed - начальное значение для формирования набора случайных чисел # arg_cuda - arg_cuda - используется ли GPU (по умолчанию - не используется) # content_weight - "вес" контента (коэффициент, показывающий, насколько в итоговой стилизованой картинке учитывается контент) # style_weight - "вес" стиля (коэффициент, показывающий, насколько в итоговой стилизованой картинке учитывается стиль) # lr - learning rate # log_interval - через чколько итераций выдавать лог # checkpoint_interval - через сколько итераций сохранять параметры модели в файл device = torch.device("cuda" if arg_cuda else "cpu") np.random.seed(seed) torch.manual_seed(seed) transform = transforms.Compose([ transforms.Resize(image_size), transforms.CenterCrop(image_size), transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)) ]) # загружаем батч из датасета train_dataset = datasets.ImageFolder(dataset, transform) train_loader = DataLoader(train_dataset, batch_size=batch_size) print('Train dataset is loaded') transformer = TransformerNet().to(device) optimizer = Adam(transformer.parameters(), lr) mse_loss = torch.nn.MSELoss() vgg = Vgg16(requires_grad=False, layers_to_unfreeze=5).to(device) style_transform = transforms.Compose( [transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255))]) # style = utils.load_image(args.style_image, size= None) style = style_img style = style_transform(style) style = style.repeat(batch_size, 1, 1, 1).to(device) features_style = vgg(utils.normalize_batch(style)) gram_style = [utils.gram_matrix(y) for y in features_style] print('Epochs = ', epochs) # тренируем модель for e in range(epochs): transformer.train() agg_content_loss = 0. agg_style_loss = 0. count = 0 for batch_id, (x, _) in enumerate(train_loader): # print('Epoch=', e+1, 'Batch_id=', batch_id) n_batch = len(x) count += n_batch optimizer.zero_grad() x = x.to(device) y = transformer(x) y = utils.normalize_batch(y) x = utils.normalize_batch(x) features_y = vgg(y) features_x = vgg(x) content_loss = content_weight * mse_loss( features_y.relu2_2, features_x.relu2_2) style_loss = 0. for ft_y, gm_s in zip(features_y, gram_style): gm_y = utils.gram_matrix(ft_y) style_loss += mse_loss(gm_y, gm_s[:n_batch, :, :]) style_loss *= style_weight total_loss = content_loss + style_loss total_loss.backward() optimizer.step() agg_content_loss += content_loss.item() agg_style_loss += style_loss.item() if (batch_id + 1) % log_interval == 0: mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.6f}\tstyle: {:.6f}\ttotal: {:.6f}".format( time.ctime(), e + 1, count, len(train_dataset), agg_content_loss / (batch_id + 1), agg_style_loss / (batch_id + 1), (agg_content_loss + agg_style_loss) / (batch_id + 1)) print(mesg) if checkpoint_model_dir is not None and ( batch_id + 1) % checkpoint_interval == 0: transformer.eval().cpu() ckpt_model_filename = "ckpt_epoch_" + str( e) + "_batch_id_" + str(batch_id + 1) + ".pth" ckpt_model_path = os.path.join(checkpoint_model_dir, ckpt_model_filename) torch.save(transformer.state_dict(), ckpt_model_path) transformer.to(device).train() # save model transformer.eval().cpu() save_model_filename = "epoch_" + str(epochs) + "_" + str( time.ctime()).replace(' ', '_') + "_" + str( content_weight) + "_" + str(style_weight) + ".model" save_model_path = os.path.join(save_model_dir, save_model_filename) # torch.save(transformer.state_dict(), save_model_path) #NNN # torch.save(transformer.state_dict(), 'neural_style/save_model_dir/StyleTransTan.model') #NNN fin_model_dict = transformer.state_dict() torch.save(fin_model_dict, 'neural_style/save_model_dir/StyleTransTan.pth') # NNN print("\nDone, trained model saved at", save_model_path) return fin_model_dict
class DDPG(object): def __init__(self, nb_status, nb_actions, args, writer): self.clip_actor_grad = args.clip_actor_grad self.nb_status = nb_status * args.window_length self.nb_actions = nb_actions self.discrete = args.discrete self.pic = args.pic self.writer = writer self.select_time = 0 if self.pic: self.nb_status = args.pic_status # Create Actor and Critic Network net_cfg = { 'hidden1':args.hidden1, 'hidden2':args.hidden2, 'use_bn':args.bn, 'init_method':args.init_method } if args.pic: self.cnn = CNN(1, args.pic_status) self.cnn_target = CNN(1, args.pic_status) self.cnn_optim = Adam(self.cnn.parameters(), lr=args.crate) self.actor = Actor(self.nb_status, self.nb_actions, **net_cfg) self.actor_target = Actor(self.nb_status, self.nb_actions, **net_cfg) self.actor_optim = Adam(self.actor.parameters(), lr=args.prate) self.critic = Critic(self.nb_status, self.nb_actions, **net_cfg) self.critic_target = Critic(self.nb_status, self.nb_actions, **net_cfg) self.critic_optim = Adam(self.critic.parameters(), lr=args.rate) hard_update(self.actor_target, self.actor) # Make sure target is with the same weight hard_update(self.critic_target, self.critic) if args.pic: hard_update(self.cnn_target, self.cnn) #Create replay buffer self.memory = rpm(args.rmsize) # SequentialMemory(limit=args.rmsize, window_length=args.window_length) self.random_process = Myrandom(size=nb_actions) # Hyper-parameters self.batch_size = args.batch_size self.tau = args.tau self.discount = args.discount self.depsilon = 1.0 / args.epsilon # self.epsilon = 1.0 self.s_t = None # Most recent state self.a_t = None # Most recent action self.use_cuda = args.cuda # if self.use_cuda: self.cuda() def normalize(self, pic): pic = pic.swapaxes(0, 2).swapaxes(1, 2) return pic def update_policy(self): # Sample batch state_batch, action_batch, reward_batch, \ next_state_batch, terminal_batch = self.memory.sample_batch(self.batch_size) # Prepare for the target q batch if self.pic: state_batch = np.array([self.normalize(x) for x in state_batch]) state_batch = to_tensor(state_batch, volatile=True) state_batch = self.cnn(state_batch) next_state_batch = np.array([self.normalize(x) for x in next_state_batch]) next_state_batch = to_tensor(next_state_batch, volatile=True) next_state_batch = self.cnn_target(next_state_batch) next_q_values = self.critic_target([ next_state_batch, self.actor_target(next_state_batch) ]) else: next_q_values = self.critic_target([ to_tensor(next_state_batch, volatile=True), self.actor_target(to_tensor(next_state_batch, volatile=True)), ]) # print('batch of picture is ok') next_q_values.volatile = False target_q_batch = to_tensor(reward_batch) + \ self.discount * to_tensor((1 - terminal_batch.astype(np.float))) * next_q_values # Critic update self.critic.zero_grad() if self.pic: self.cnn.zero_grad() if self.pic: state_batch.volatile = False q_batch = self.critic([state_batch, to_tensor(action_batch)]) else: q_batch = self.critic([to_tensor(state_batch), to_tensor(action_batch)]) # print(reward_batch, next_q_values*self.discount, target_q_batch, terminal_batch.astype(np.float)) value_loss = criterion(q_batch, target_q_batch) value_loss.backward() self.critic_optim.step() if self.pic: self.cnn_optim.step() self.actor.zero_grad() if self.pic: self.cnn.zero_grad() if self.pic: state_batch.volatile = False policy_loss = -self.critic([ state_batch, self.actor(state_batch) ]) else: policy_loss = -self.critic([ to_tensor(state_batch), self.actor(to_tensor(state_batch)) ]) policy_loss = policy_loss.mean() policy_loss.backward() if self.clip_actor_grad is not None: torch.nn.utils.clip_grad_norm(self.actor.parameters(), float(self.clip_actor_grad)) if self.writer != None: mean_policy_grad = np.array(np.mean([np.linalg.norm(p.grad.data.cpu().numpy().ravel()) for p in self.actor.parameters()])) #print(mean_policy_grad) self.writer.add_scalar('train/mean_policy_grad', mean_policy_grad, self.select_time) self.actor_optim.step() if self.pic: self.cnn_optim.step() # Target update soft_update(self.actor_target, self.actor, self.tau) soft_update(self.critic_target, self.critic, self.tau) if self.pic: soft_update(self.cnn_target, self.cnn, self.tau) return -policy_loss, value_loss def eval(self): self.actor.eval() self.actor_target.eval() self.critic.eval() self.critic_target.eval() if(self.pic): self.cnn.eval() self.cnn_target.eval() def train(self): self.actor.train() self.actor_target.train() self.critic.train() self.critic_target.train() if(self.pic): self.cnn.train() self.cnn_target.train() def cuda(self): self.cnn.cuda() self.cnn_target.cuda() self.actor.cuda() self.actor_target.cuda() self.critic.cuda() self.critic_target.cuda() def observe(self, r_t, s_t1, done): self.memory.append([self.s_t, self.a_t, r_t, s_t1, done]) self.s_t = s_t1 def random_action(self, fix=False): action = np.random.uniform(-1.,1.,self.nb_actions) self.a_t = action if self.discrete and fix == False: action = action.argmax() # if self.pic: # action = np.concatenate((softmax(action[:16]), softmax(action[16:]))) return action def select_action(self, s_t, decay_epsilon=True, return_fix=False, noise_level=0): self.eval() if self.pic: s_t = self.normalize(s_t) s_t = self.cnn(to_tensor(np.array([s_t]))) if self.pic: action = to_numpy( self.actor_target(s_t) ).squeeze(0) else: action = to_numpy( self.actor(to_tensor(np.array([s_t]))) ).squeeze(0) self.train() noise_level = noise_level * max(self.epsilon, 0) if np.random.uniform(0, 1) < noise_level: action = self.random_action(fix=True) # episilon greedy if decay_epsilon: self.epsilon -= self.depsilon self.a_t = action if return_fix: return action if self.discrete: return action.argmax() else: return action def reset(self, obs): self.s_t = obs self.random_process.reset_status() def load_weights(self, output, num=1): if output is None: return self.actor.load_state_dict( torch.load('{}/actor{}.pkl'.format(output, num)) ) self.actor_target.load_state_dict( torch.load('{}/actor{}.pkl'.format(output, num)) ) self.critic.load_state_dict( torch.load('{}/critic{}.pkl'.format(output, num)) ) self.critic_target.load_state_dict( torch.load('{}/critic{}.pkl'.format(output, num)) ) def save_model(self, output, num): if self.use_cuda: self.cnn.cpu() self.actor.cpu() self.critic.cpu() torch.save( self.actor.state_dict(), '{}/actor{}.pkl'.format(output, num) ) torch.save( self.critic.state_dict(), '{}/critic{}.pkl'.format(output, num) ) if self.use_cuda: self.cnn.cuda() self.actor.cuda() self.critic.cuda()
class PPO: """ Proximal Policy Optimization (by clipping), with early stopping based on approximate KL """ def __init__(self, env, actor_critic: AbstractActorCritic, params: PpoParams, buf: PpoBuffer, wandb=None) -> None: self.env = env self.params = params self.wandb = wandb self.ac = actor_critic self.buf = buf # Random seed # this should be done outside # torch.manual_seed(params.seed) # np.random.seed(params.seed) # Set up optimizers for policy and value function # self.pi_optimizer = Adam(self.ac.get_policy_params(), lr=params.pi_lr) # self.vf_optimizer = Adam(self.ac.get_value_params(), lr=params.vf_lr) self.optimizer = Adam( list(self.ac.get_policy_params()) + list(self.ac.get_value_params()), lr=params.lr, eps=params.eps ) self.obs = None self.ep_rew_mean = deque(maxlen=params.rew_smooth_len) self.ep_len_mean = deque(maxlen=params.rew_smooth_len) self.total_steps = 0 dateTimeObj = datetime.now() self.timestamp = dateTimeObj.strftime("%Y%m%d_%H%M%S") def send_timers(self, timers): out = {k + "_mean": np.mean(v) for k, v in timers.items()} out2 = {k + "_sum": np.sum(v) for k, v in timers.items()} self.wandb.log({**out, **out2}) def compute_loss_pi(self, data): """ Computes policy loss """ obs, act, adv, logp_old = data["obs"], data["act"], data["adv"], data["logp"] # Policy loss pi, logp = self.ac.pi(obs, act) ratio = torch.exp(logp - logp_old) clip_adv = torch.clamp(ratio, 1 - self.params.clip_ratio, 1 + self.params.clip_ratio) * adv loss_pi = -(torch.min(ratio * adv, clip_adv)).mean() # Useful extra info approx_kl = (logp_old - logp).mean().item() ent = pi.entropy().mean().item() clipped = ratio.gt(1 + self.params.clip_ratio) | ratio.lt(1 - self.params.clip_ratio) clipfrac = torch.as_tensor(clipped, dtype=torch.float32).mean().item() pi_info = dict(kl=approx_kl, ent=ent, cf=clipfrac) return loss_pi, pi_info def compute_loss_v(self, data): """ Computes value loss """ obs, ret = data["obs"], data["ret"] return ((self.ac.v(obs) - ret) ** 2).mean() def update(self, timed=False): """ Updates the policy and value function based on the latest replay buffer """ timers = {"get_buf": [], "train_pi": [], "train_v": []} start = time.time() data = self.buf.get() timers["get_buf"].append(time.time() - start) ## this is only used for debugging - compute the old loss of policy and value function # pi_l_old, pi_info_old = self.compute_loss_pi(data) # pi_l_old = pi_l_old.item() # v_l_old = self.compute_loss_v(data).item() # Train policy with multiple steps of gradient descent if self.params.verbose: tqdm.write("Training pi") self.ac.train() for i in range(self.params.train_iters): # policy loss start_pi = time.time() self.optimizer.zero_grad() loss_pi, pi_info = self.compute_loss_pi(data) kl = np.mean(pi_info["kl"]) if kl > 1.5 * self.params.target_kl: if self.params.verbose: tqdm.write(f"Early stopping at step {i}/{self.params.train_iters} due to reaching max kl.") break # value loss start_val = time.time() loss_v = self.compute_loss_v(data) (loss_v * self.params.val_loss_coef + loss_pi).backward() self.optimizer.step() timers["train_v"].append(time.time() - start_val) timers["train_pi"].append(time.time() - start_pi) self.wandb.log( {"logstd mean": self.ac.pi.log_std.detach().cpu().numpy().mean()} ) # FIXME: this only works with the gaussian policy ## Log changes from update # kl, ent, cf = pi_info["kl"], pi_info_old["ent"], pi_info["cf"] # logger.store( # LossPi=pi_l_old, # LossV=v_l_old, # KL=kl, # Entropy=ent, # ClipFrac=cf, # DeltaLossPi=(loss_pi.item() - pi_l_old), # DeltaLossV=(loss_v.item() - v_l_old), # ) if timed: self.send_timers(timers) def train_loop(self, timed=False): """ Automatic training loop for PPO that trains for prespecified number of epochs """ # Main loop: collect experience in env and update/log each epoch for epoch in trange(self.params.epochs): if self.params.verbose: tqdm.write("Collecting data") self.collect_data(timed) # Save model if (epoch % self.params.save_freq == 0) or (epoch == self.params.epochs - 1): # logger.save_state({"env": env}, None) self.ac.save(self.params.policy_dir, f"{self.params.env_name}-s_{self.params.seed}-t_{self.timestamp}") # Perform PPO update! if self.params.verbose: tqdm.write("Updating PPO") self.update(timed) if self.params.lr_decay: lr_decay(self.optimizer, epoch, self.params.epochs, self.params.lr) if self.params.env_name.startswith("2Pupper"): self.env.send_wandb_video() # Log info about epoch # logger.log_tabular("Epoch", epoch) # logger.log_tabular("EpRet", with_min_and_max=True) # logger.log_tabular("EpLen", average_only=True) # logger.log_tabular("VVals", with_min_and_max=True) # logger.log_tabular("TotalEnvInteracts", (epoch + 1) * steps_per_epoch) # logger.log_tabular("LossPi", average_only=True) # logger.log_tabular("LossV", average_only=True) # logger.log_tabular("DeltaLossPi", average_only=True) # logger.log_tabular("DeltaLossV", average_only=True) # logger.log_tabular("Entropy", average_only=True) # logger.log_tabular("KL", average_only=True) # logger.log_tabular("ClipFrac", average_only=True) # logger.log_tabular("StopIter", average_only=True) # logger.log_tabular("Time", time.time() - start_time) # logger.dump_tabular() def collect_data(self, timed=False): """ Fill up the replay buffer with fresh rollouts based on the current policy """ if self.obs is None: self.obs, self.ep_ret, self.ep_len = self.env.reset(), 0, 0 episode_counter = 0 self.ac.eval() timers = {"ac_step": [], "env_step": [], "buf_store": [], "buf_finish": []} for t in trange(self.params.steps_per_epoch): start = time.time() self.act, self.val, self.logp = self.ac.step( torch.as_tensor(self.obs, dtype=torch.float32).to(self.params.device) ) timers["ac_step"].append(time.time() - start) start = time.time() self.next_obs, self.rew, self.done, misc = self.env.step(self.act) timers["env_step"].append(time.time() - start) self.total_steps += 1 self.ep_ret += self.rew self.ep_len += 1 start = time.time() buf_objs = (self.obs, self.act, self.rew, self.val, self.logp) self.buf.store(*buf_objs) timers["buf_store"].append(time.time() - start) # logger.store(VVals=v) # Update obs (critical!) self.obs = self.next_obs timeout = self.ep_len == self.params.max_ep_len terminal = self.done or timeout epoch_ended = t == self.params.steps_per_epoch - 1 if terminal or epoch_ended: episode_counter += 1 if epoch_ended and not terminal and self.params.verbose: tqdm.write(f"Warning: trajectory cut off by epoch at {self.ep_len} steps.") # if trajectory didn't reach terminal state, bootstrap value target if timeout or epoch_ended: _, self.val, _ = self.ac.step(torch.as_tensor(self.obs, dtype=torch.float32).to(self.params.device)) else: self.val = 0 self.ep_rew_mean.append(self.ep_ret) self.ep_len_mean.append(self.ep_len) if episode_counter % self.params.log_ep_freq == 0: if self.wandb is not None: self.wandb.log( { "Reward Mean": np.mean(self.ep_rew_mean), "Episode Length Mean": np.mean(self.ep_len_mean), }, step=self.total_steps, ) start = time.time() self.buf.finish_path(self.val) timers["buf_finish"].append(time.time() - start) if terminal: # only save EpRet / EpLen if trajectory finished # logger.store(EpRet=ep_ret, EpLen=ep_len) pass self.obs, self.ep_ret, self.ep_len = self.env.reset(), 0, 0 if timed: self.send_timers(timers) def play(self, episodes=3): """ play n episodes with the current policy and return the observations, rewards, and actions """ obs = self.env.reset() episode_counter = 0 obs_buf = [] rew_buf = [] act_buf = [] while True: with torch.no_grad(): obs_buf.append(np.copy(obs)) act, _, _ = self.ac.step(torch.as_tensor(obs, dtype=torch.float32).to(self.params.device)) obs, rew, done, misc = self.env.step(act) rew_buf.append(np.copy(rew)) act_buf.append(np.copy(act)) if "state" in misc: obs_buf.pop(-1) obs_buf.append(misc["state"]) if done: episode_counter += 1 if episode_counter == episodes: break else: obs = self.env.reset() self.obs, self.ep_ret, self.ep_len = self.env.reset(), 0, 0 return obs_buf, rew_buf, act_buf
test_flag = False # test mode flag, adopted epsilon = 1 is test_flag = True Agent_id_list = ['INT_01', 'INT_11', 'INT_21'] # id list of intersections n_agents = len(Agent_id_list) # number of agent print( "Episode Num: %d\r\n Step_Num per epi: %d \r\n buffer size: %d\r\n epsilon: %f\r\n" % (total_epi, step_p_epi, replay_buffer_size, epsilon)) # initialize the replay buffer replay_buffer = ReplayBuffer(replay_buffer_size) # initialize the Mixing network qmixer = QMixer(n_agents, state_shape, mixing_embed_dim, obs_dim, action_dim).to(device) q_optimizer = Adam(qmixer.parameters(), lr=learning_rate) target_qmixer = copy.deepcopy(qmixer) action_dict = {} # dictionary of storing action for each agent print("Start Experiment") for episode in range(total_epi): episode_reward = 0 Env = Traffic_Env() #lack of input Env.reset() env_state = Env.get_state() # state of whole environment env_obs = Env.get_obs() # observation of whole environment for step in range(step_p_epi): # +++++++++++++++++++++++Done in fog node++++++++++++++++++++++ # for agent_id in Agent_id_list: # get observation of each agent agent_obs = Env.get_agent_obs(
val_iter = Iterator(val, batch_size=args.batch_size, device=0, repeat=False, sort_key=lambda x: (len(x.src), len(x.trg)), batch_size_fn=batch_size_fn, train=False) model_prl = nn.DataParallel(model, device_ids=args.devices) if args.load_model and os.path.exists(args.model_path): model.load_state_dict(torch.load('models/params.pkl')) else: model_opt = NoamOpt( model.src_embed[0].d_model, args.factor, args.warm_up, Adam(model.parameters(), lr=0, betas=args.betas, eps=args.eps)) for epoch_index in range(args.num_epochs): print(f"Epoch {epoch_index+1}:") model_prl.train() run_epoch((rebatch(tgt_pad_idx, b) for b in train_iter), model_prl, MultiGPULossCompute(model.generator, criterion, devices=args.devices, opt=model_opt)) model_prl.eval() loss = run_epoch((rebatch(tgt_pad_idx, b) for b in val_iter), model_prl, MultiGPULossCompute(model.generator, criterion, devices=args.devices, opt=None))
def fit(self, model, feature_extraction, protocol, log_dir, subset='train', epochs=1000, restart=0, gpu=False): import tensorboardX writer = tensorboardX.SummaryWriter(log_dir=log_dir) checkpoint = Checkpoint(log_dir=log_dir, restart=restart > 0) batch_generator = SpeechSegmentGenerator( feature_extraction, per_label=self.per_label, per_fold=self.per_fold, duration=self.duration, parallel=self.parallel) batches = batch_generator(protocol, subset=subset) batch = next(batches) batches_per_epoch = batch_generator.batches_per_epoch if restart > 0: weights_pt = checkpoint.WEIGHTS_PT.format( log_dir=log_dir, epoch=restart) model.load_state_dict(torch.load(weights_pt)) if gpu: model = model.cuda() model.internal = False parameters = list(model.parameters()) if self.variant in [2, 3, 4, 5, 6, 7, 8]: # norm batch-normalization self.norm_bn = nn.BatchNorm1d( 1, eps=1e-5, momentum=0.1, affine=True) if gpu: self.norm_bn = self.norm_bn.cuda() parameters += list(self.norm_bn.parameters()) if self.variant in [9]: # norm batch-normalization self.norm_bn = nn.BatchNorm1d( 1, eps=1e-5, momentum=0.1, affine=False) if gpu: self.norm_bn = self.norm_bn.cuda() parameters += list(self.norm_bn.parameters()) if self.variant in [5, 6, 7]: self.positive_bn = nn.BatchNorm1d( 1, eps=1e-5, momentum=0.1, affine=False) self.negative_bn = nn.BatchNorm1d( 1, eps=1e-5, momentum=0.1, affine=False) if gpu: self.positive_bn = self.positive_bn.cuda() self.negative_bn = self.negative_bn.cuda() parameters += list(self.positive_bn.parameters()) parameters += list(self.negative_bn.parameters()) if self.variant in [8, 9]: self.delta_bn = nn.BatchNorm1d( 1, eps=1e-5, momentum=0.1, affine=False) if gpu: self.delta_bn = self.delta_bn.cuda() parameters += list(self.delta_bn.parameters()) optimizer = Adam(parameters) if restart > 0: optimizer_pt = checkpoint.OPTIMIZER_PT.format( log_dir=log_dir, epoch=restart) optimizer.load_state_dict(torch.load(optimizer_pt)) if gpu: for state in optimizer.state.values(): for k, v in state.items(): if torch.is_tensor(v): state[k] = v.cuda() epoch = restart if restart > 0 else -1 while True: epoch += 1 if epoch > epochs: break loss_avg, tloss_avg, closs_avg = 0., 0., 0. if epoch % 5 == 0: log_positive = [] log_negative = [] log_delta = [] log_norm = [] desc = 'Epoch #{0}'.format(epoch) for i in tqdm(range(batches_per_epoch), desc=desc): model.zero_grad() batch = next(batches) X = batch['X'] if not getattr(model, 'batch_first', True): X = np.rollaxis(X, 0, 2) X = np.array(X, dtype=np.float32) X = Variable(torch.from_numpy(X)) if gpu: X = X.cuda() fX = model(X) # pre-compute pairwise distances distances = self.pdist(fX) # sample triplets triplets = getattr(self, 'batch_{0}'.format(self.sampling)) anchors, positives, negatives = triplets(batch['y'], distances) # compute triplet loss tlosses, deltas, pos_index, neg_index = self.triplet_loss( distances, anchors, positives, negatives, return_delta=True) tloss = torch.mean(tlosses) if self.variant == 1: closses = F.sigmoid( F.softsign(deltas) * torch.norm(fX[anchors], 2, 1, keepdim=True)) # if d(a, p) < d(a, n) (i.e. good case) # --> sign(delta) < 0 # --> loss decreases when norm increases. # i.e. encourages longer anchor # if d(a, p) > d(a, n) (i.e. bad case) # --> sign(delta) > 0 # --> loss increases when norm increases # i.e. encourages shorter anchor elif self.variant == 2: norms_ = torch.norm(fX, 2, 1, keepdim=True) norms_ = F.sigmoid(self.norm_bn(norms_)) confidence = (norms_[anchors] + norms_[positives] + norms_[negatives]) / 3 # if |x| is average # --> normalized |x| = 0 # --> confidence = 0.5 # if |x| is bigger than average # --> normalized |x| >> 0 # --> confidence = 1 # if |x| is smaller than average # --> normalized |x| << 0 # --> confidence = 0 correctness = F.sigmoid(-deltas / np.pi * 6) # if d(a, p) = d(a, n) (i.e. uncertain case) # --> correctness = 0.5 # if d(a, p) - d(a, n) = -𝛑 (i.e. best possible case) # --> correctness = 1 # if d(a, p) - d(a, n) = +𝛑 (i.e. worst possible case) # --> correctness = 0 closses = torch.abs(confidence - correctness) # small if (and only if) confidence & correctness agree elif self.variant == 3: norms_ = torch.norm(fX, 2, 1, keepdim=True) norms_ = F.sigmoid(self.norm_bn(norms_)) confidence = (norms_[anchors] * norms_[positives] * norms_[negatives]) / 3 correctness = F.sigmoid(-(deltas + np.pi / 4) / np.pi * 6) # correctness = 0.5 at delta == -pi/4 # correctness = 1 for delta == -pi # correctness = 0 for delta < 0 closses = torch.abs(confidence - correctness) elif self.variant == 4: norms_ = torch.norm(fX, 2, 1, keepdim=True) norms_ = F.sigmoid(self.norm_bn(norms_)) confidence = (norms_[anchors] * norms_[positives] * norms_[negatives]) ** 1/3 correctness = F.sigmoid(-(deltas + np.pi / 4) / np.pi * 6) # correctness = 0.5 at delta == -pi/4 # correctness = 1 for delta == -pi # correctness = 0 for delta < 0 # delta = pos - neg ... should be < 0 closses = torch.abs(confidence - correctness) elif self.variant == 5: norms_ = torch.norm(fX, 2, 1, keepdim=True) confidence = F.sigmoid(self.norm_bn(norms_)) confidence_pos = .5 * (confidence[anchors] + confidence[positives]) # low positive distance == high correctness correctness_pos = F.sigmoid( -self.positive_bn(distances[pos_index].view(-1, 1))) confidence_neg = .5 * (confidence[anchors] + confidence[negatives]) # high negative distance == high correctness correctness_neg = F.sigmoid( self.negative_bn(distances[neg_index].view(-1, 1))) closses = .5 * (torch.abs(confidence_pos - correctness_pos) \ + torch.abs(confidence_neg - correctness_neg)) elif self.variant == 6: norms_ = torch.norm(fX, 2, 1, keepdim=True) confidence = F.sigmoid(self.norm_bn(norms_)) confidence_pos = .5 * (confidence[anchors] + confidence[positives]) # low positive distance == high correctness correctness_pos = F.sigmoid( -self.positive_bn(distances[pos_index].view(-1, 1))) closses = torch.abs(confidence_pos - correctness_pos) elif self.variant == 7: norms_ = torch.norm(fX, 2, 1, keepdim=True) confidence = F.sigmoid(self.norm_bn(norms_)) confidence_neg = .5 * (confidence[anchors] + confidence[negatives]) # high negative distance == high correctness correctness_neg = F.sigmoid( self.negative_bn(distances[neg_index].view(-1, 1))) closses = torch.abs(confidence_neg - correctness_neg) elif self.variant in [8, 9]: norms_ = torch.norm(fX, 2, 1, keepdim=True) norms_ = F.sigmoid(self.norm_bn(norms_)) confidence = (norms_[anchors] * norms_[positives] * norms_[negatives]) / 3 correctness = F.sigmoid(-self.delta_bn(deltas)) closses = torch.abs(confidence - correctness) closs = torch.mean(closses) if epoch % 5 == 0: if gpu: fX_npy = fX.data.cpu().numpy() pdist_npy = distances.data.cpu().numpy() delta_npy = deltas.data.cpu().numpy() else: fX_npy = fX.data.numpy() pdist_npy = distances.data.numpy() delta_npy = deltas.data.numpy() log_norm.append(np.linalg.norm(fX_npy, axis=1)) same_speaker = pdist(batch['y'].reshape((-1, 1)), metric='chebyshev') < 1 log_positive.append(pdist_npy[np.where(same_speaker)]) log_negative.append(pdist_npy[np.where(~same_speaker)]) log_delta.append(delta_npy) # log loss if gpu: tloss_ = float(tloss.data.cpu().numpy()) closs_ = float(closs.data.cpu().numpy()) else: tloss_ = float(tloss.data.numpy()) closs_ = float(closs.data.numpy()) tloss_avg += tloss_ closs_avg += closs_ loss_avg += tloss_ + closs_ loss = tloss + closs loss.backward() optimizer.step() tloss_avg /= batches_per_epoch writer.add_scalar('tloss', tloss_avg, global_step=epoch) closs_avg /= batches_per_epoch writer.add_scalar('closs', closs_avg, global_step=epoch) loss_avg /= batches_per_epoch writer.add_scalar('loss', loss_avg, global_step=epoch) if epoch % 5 == 0: log_positive = np.hstack(log_positive) writer.add_histogram( 'embedding/pairwise_distance/positive', log_positive, global_step=epoch, bins=np.linspace(0, np.pi, 50)) log_negative = np.hstack(log_negative) writer.add_histogram( 'embedding/pairwise_distance/negative', log_negative, global_step=epoch, bins=np.linspace(0, np.pi, 50)) _, _, _, eer = det_curve( np.hstack([np.ones(len(log_positive)), np.zeros(len(log_negative))]), np.hstack([log_positive, log_negative]), distances=True) writer.add_scalar('eer', eer, global_step=epoch) log_norm = np.hstack(log_norm) writer.add_histogram( 'norm', log_norm, global_step=epoch, bins='doane') log_delta = np.vstack(log_delta) writer.add_histogram( 'delta', log_delta, global_step=epoch, bins='doane') checkpoint.on_epoch_end(epoch, model, optimizer) if hasattr(self, 'norm_bn'): confidence_pt = self.CONFIDENCE_PT.format( log_dir=log_dir, epoch=epoch) torch.save(self.norm_bn.state_dict(), confidence_pt)
def TrainModel(model, saving_name, criterion, epochs, interval, batch_size, device, save = True): if device < 0: env = torch.device('cpu') print('Envirnment setting done, using device: cpu') else: torch.backends.cudnn.benchmark = True cuda.set_device(device) env = torch.device('cuda:' + str(device)) print('Envirnment setting done, using device: CUDA_' + str(device)) model.float().to(env) criterion.to(env) optim = Adam(model.parameters()) transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) train_set = torchvision.datasets.CIFAR10(root = './data', train = True, download = True, transform = transform) train_eval_set = torchvision.datasets.CIFAR10(root = './data', train = True, download = True, transform = transform) test_set = torchvision.datasets.CIFAR10(root = './data', train = False, download = True, transform = transform) train_loader = DataLoader(train_set, batch_size = batch_size, shuffle = False) train_eval_loader = DataLoader(train_eval_set, batch_size = batch_size, shuffle = False) test_loader = DataLoader(test_set, batch_size = batch_size, shuffle = False) print('Model Structure:') print(model) print('Model Parameter numbers: ',sum(p.numel() for p in model.parameters() if p.requires_grad)) history = ['Epoch,Grad_norm,Train Loss,Train Acc.,Test Loss,Test Acc.\n'] for epoch in range(epochs): print('Start training epoch: ', epoch + 1) for iter, data in enumerate(train_loader): train_x, train_y = data train_x = train_x.float().to(env) train_y = train_y.long().to(env) optim.zero_grad() out = model(train_x) loss = criterion(out, train_y) loss.backward() optim.step() if iter != 0 and iter % 10 == 0: print('Iter: ', iter, ' | Loss: %6f' % loss.detach()) grad_norm = Acquire_grad_norm(model) train_loss = [] train_total = [] train_correct = 0 with torch.no_grad(): for _iter, train_data in enumerate(train_eval_loader): train_x, train_y = train_data train_x = train_x.float().to(env) train_y = train_y.long().to(env) train_out = model(train_x) _, prediction = torch.max(train_out.data, 1) train_total.append(train_y.size(0)) train_correct += (prediction == train_y).sum().item() train_loss.append(criterion(train_out, train_y).detach()) train_loss = torch.tensor(train_loss) train_total = torch.tensor(train_total) test_loss = [] test_total = [] test_correct = 0 with torch.no_grad(): for _iter, test_data in enumerate(test_loader): test_x, test_y = test_data test_x = test_x.float().to(env) test_y = test_y.long().to(env) test_out = model(test_x) _, prediction = torch.max(test_out.data, 1) test_total.append(test_y.size(0)) test_correct += (prediction == test_y).sum().item() test_loss.append(criterion(test_out, test_y).detach()) test_loss = torch.tensor(test_loss) test_total = torch.tensor(test_total) train_loss = float((torch.sum(torch.mul(train_loss, train_total.float())) / torch.sum(train_total)).detach()) train_acc = float((100 * train_correct / torch.sum(train_total)).detach()) test_loss = float((torch.sum(torch.mul(test_loss, test_total.float())) / torch.sum(test_total)).detach().numpy()) test_acc = float((100 * test_correct / torch.sum(test_total)).detach()) history.append(str(epoch + 1) + ',' + str(grad_norm) + ',' + str(train_loss) + ',' + str(train_acc) + ',' + str(test_loss) + ',' + str(test_acc) + '\n') print('\nEpoch: ', epoch + 1, '| Grad_norm: %6f' % grad_norm, '| Train loss: %6f' % train_loss, '| Train Acc.: %2f' % train_acc, '| Test loss: %6f' % test_loss, '| Test Acc.: %2f' % test_acc, '\n') if (epoch + 1) % interval == 0 and save: torch.save(model, saving_name + '_E' + str(epoch + 1) + '.pkl') f = open(saving_name + '.csv', 'w') f.writelines(history) f.close() if save: torch.save(model, saving_name + '.pkl') print('All process done.')