def __init__(self, env, results_path, tok, episode_len=20): super(Seq2SeqAgent, self).__init__(env, results_path) self.tok = tok self.episode_len = episode_len self.feature_size = self.env.feature_size # Models enc_hidden_size = args.rnn_dim//2 if args.bidir else args.rnn_dim self.encoder = model.EncoderLSTM(tok.vocab_size(), args.wemb, enc_hidden_size, padding_idx, args.dropout, bidirectional=args.bidir).cuda() self.decoder = model.AttnDecoderLSTM(args.aemb, args.rnn_dim, args.dropout, feature_size=self.feature_size + args.angle_feat_size).cuda() self.critic = model.Critic().cuda() self.models = (self.encoder, self.decoder, self.critic) # Optimizers self.encoder_optimizer = args.optimizer(self.encoder.parameters(), lr=args.lr) self.decoder_optimizer = args.optimizer(self.decoder.parameters(), lr=args.lr) self.critic_optimizer = args.optimizer(self.critic.parameters(), lr=args.lr) self.optimizers = (self.encoder_optimizer, self.decoder_optimizer, self.critic_optimizer) # Evaluations self.losses = [] self.criterion = nn.CrossEntropyLoss(ignore_index=args.ignoreid, size_average=False) # Logs sys.stdout.flush() self.logs = defaultdict(list)
def __init__(self, env, results_path, tok, episode_len=20): super(Seq2PolicyAgent, self).__init__(env, results_path) self._iter = 0 self.tok = tok self.episode_len = episode_len self.feature_size = self.env.feature_size # Models enc_hidden_size = args.rnn_dim//2 if args.bidir else args.rnn_dim self.encoder = model.EncoderLSTM(tok.vocab_size(), args.wemb, enc_hidden_size, padding_idx, args.dropout, bidirectional=args.bidir).cuda() if args.original_decoder: self.decoder = model.AttnDecoderLSTM(args.aemb, args.rnn_dim, args.dropout, feature_size=self.feature_size + args.angle_feat_size).cuda() else: self.decoder = model.AttnPolicyLSTM(args.aemb, args.rnn_dim, args.dropout, feature_size=self.feature_size + args.angle_feat_size, latent_dim=args.vae_latent_dim).cuda() if args.fix_vae: print("fix the parameters in sub policy") for param in self.decoder.policy.parameters(): param.requires_grad = False self.critic = model.Critic().cuda() self.models = (self.encoder, self.decoder, self.critic) # Optimizers self.encoder_optimizer = args.optimizer(self.encoder.parameters(), lr=args.lr) self.decoder_optimizer = args.optimizer(self.decoder.parameters(), lr=args.lr) self.critic_optimizer = args.optimizer(self.critic.parameters(), lr=args.lr) self.optimizers = (self.encoder_optimizer, self.decoder_optimizer, self.critic_optimizer) # Evaluations self.losses = [] self.criterion = nn.CrossEntropyLoss(ignore_index=args.ignoreid, size_average=False) # Logs sys.stdout.flush() self.logs = defaultdict(list)