def __init__(self, maze_dims, action_dim, act_proc_dim, z_dim, x_encoder_specs, pre_gru_specs, gru_specs, prior_part_specs, inference_part_specs, decoder_part_specs, masked_latent): super().__init__() self.act_proc_dim = act_proc_dim self.action_fc = nn.Linear(action_dim, self.act_proc_dim, bias=True) in_ch = maze_dims[0] in_h = maze_dims[1] self.x_encoder, out_ch, out_h = make_conv_net(in_ch, in_h, x_encoder_specs) x_enc_channels = out_ch x_enc_h = out_h self.x_enc_ch = out_ch self.x_enc_h = out_h flat_x_enc_dim = x_enc_channels * x_enc_h * x_enc_h self.prior_fc_seq, hidden_dim = make_fc_net( self.act_proc_dim + gru_specs['hidden_size'], prior_part_specs) self.prior_mean_fc = nn.Linear(hidden_dim, z_dim, bias=True) self.prior_log_cov_fc = nn.Linear(hidden_dim, z_dim, bias=True) self.posterior_fc_seq, hidden_dim = make_fc_net( self.act_proc_dim + gru_specs['hidden_size'] + flat_x_enc_dim, inference_part_specs) self.posterior_mean_fc = nn.Linear(hidden_dim, z_dim, bias=True) self.posterior_log_cov_fc = nn.Linear(hidden_dim, z_dim, bias=True) self.pre_gru_seq, hidden_dim = make_fc_net( self.act_proc_dim + flat_x_enc_dim + z_dim, pre_gru_specs) self.gru_cell = nn.GRUCell(hidden_dim, gru_specs['hidden_size'], bias=True) self.h_dim = [gru_specs['hidden_size']] # models for the decoding/generation self.recon_fc_seq, out_h = make_fc_net(z_dim + self.h_dim[0], decoder_part_specs['fc_part']) assert out_h == x_enc_h * x_enc_h * x_enc_channels # just for convenience we use these dims self.recon_upconv_seq, out_ch, out_h = make_upconv_net( x_enc_channels, x_enc_h, decoder_part_specs['conv_part']) self.recon_mean_conv = nn.Conv2d(out_ch, 3, 3, stride=1, padding=1, bias=True) self.recon_log_cov_conv = nn.Conv2d(out_ch, 3, 3, stride=1, padding=1, bias=True) assert out_h == maze_dims[1]
def __init__(self, maze_dims, action_dim, act_proc_dim, z_dim, x_encoder_specs, pre_gru_specs, gru_specs, prior_part_specs, inference_part_specs, decoder_part_specs, masked_latent): super().__init__() self.act_proc_dim = act_proc_dim self.action_fc = nn.Linear(action_dim, self.act_proc_dim, bias=True) in_ch = maze_dims[0] in_h = maze_dims[1] maze_flat_dim = in_ch * in_h * in_h self.maze_dims = maze_dims self.x_encoder, out_h = make_fc_net(in_h * in_h * in_ch, x_encoder_specs) self.x_enc_h = out_h self.prior_fc_seq, hidden_dim = make_fc_net( self.act_proc_dim + gru_specs['hidden_size'], prior_part_specs) self.prior_mean_fc = nn.Linear(hidden_dim, z_dim, bias=True) self.prior_log_cov_fc = nn.Linear(hidden_dim, z_dim, bias=True) self.posterior_fc_seq, hidden_dim = make_fc_net( self.act_proc_dim + gru_specs['hidden_size'] + self.x_enc_h, inference_part_specs) self.posterior_mean_fc = nn.Linear(hidden_dim, z_dim, bias=True) self.posterior_log_cov_fc = nn.Linear(hidden_dim, z_dim, bias=True) self.pre_gru_seq, hidden_dim = make_fc_net( self.act_proc_dim + self.x_enc_h + z_dim, pre_gru_specs) self.gru_cell = nn.GRUCell(hidden_dim, gru_specs['hidden_size'], bias=True) self.h_dim = [gru_specs['hidden_size']] # models for the decoding/generation self.recon_fc_seq, out_h = make_fc_net(z_dim + self.h_dim[0], decoder_part_specs['fc_part']) self.recon_mean_fc = nn.Linear(out_h, maze_flat_dim, bias=True) self.recon_log_cov_fc = nn.Linear(out_h, maze_flat_dim, bias=True) ae_dim = 256 self.autoencoder = nn.Sequential(nn.Linear(48, ae_dim, bias=False), nn.BatchNorm1d(ae_dim), nn.ReLU(), nn.Linear(ae_dim, ae_dim, bias=False), nn.BatchNorm1d(ae_dim), nn.ReLU(), nn.Linear(ae_dim, ae_dim, bias=False), nn.BatchNorm1d(ae_dim), nn.ReLU()) # self.autoencoder, _ = make_fc_net( # maze_flat_dim, # { # 'hidden_sizes': [128, 128, 32, 128, 128], # 'use_bn': True # } # ) self.fc = nn.Linear(ae_dim, 48, bias=True)
def __init__( self, maze_dims, action_proc_dim, z_dim, x_encoder_specs, pre_lstm_dim, lstm_dim, prior_part_specs, inference_part_specs, decoder_part_specs, ): super().__init__() in_ch = maze_dims[0] in_h = maze_dims[1] self.x_encoder, out_ch, out_h = make_conv_net(in_ch, in_h, x_encoder_specs) x_enc_channels = out_ch x_enc_h = out_h self.prior_action_fc = nn.Linear(4, action_proc_dim, bias=True) self.post_action_fc = nn.Linear(4, action_proc_dim, bias=True) self.recon_action_fc = nn.Linear(4, action_proc_dim, bias=True) self.pre_lstm_action_fc = nn.Linear(4, action_proc_dim, bias=True) self.lstm = nn.LSTMCell( pre_lstm_dim, lstm_dim, bias=True ) self.attention_seq = nn.Sequential( nn.Linear(lstm_dim + action_proc_dim, lstm_dim, bias=False), nn.BatchNorm1d(lstm_dim), nn.ReLU(), nn.Linear(lstm_dim, lstm_dim), # nn.Sigmoid() # nn.Softmax() ) self.prior_fc_seq, hidden_dim = make_fc_net(lstm_dim + action_proc_dim, prior_part_specs) self.prior_mean_fc = nn.Linear(hidden_dim, z_dim, bias=True) self.prior_log_cov_fc = nn.Linear(hidden_dim, z_dim, bias=True) out_ch = gru_specs['num_channels'] # models for the posterior self.posterior_fc_seq, hidden_dim = make_fc_net(lstm_dim + x_enc_channels*x_enc_h*x_enc_h + action_proc_dim, inference_part_specs) self.posterior_mean_fc = nn.Linear(hidden_dim, z_dim, bias=True) self.posterior_log_cov_fc = nn.Linear(hidden_dim, z_dim, bias=True) # models for the decoding/generation self.recon_fc_seq, out_h = make_fc_net(z_dim + lstm_dim + action_proc_dim, decoder_part_specs['fc_part_specs']) self.recon_upconv_seq, out_ch, out_h = make_upconv_net(gru_specs['num_channels'] + z_dim, self.h_dim[1], decoder_part_specs['upconv_part_specs']) self.recon_mean_conv = nn.Conv2d(out_ch, 3, 3, stride=1, padding=1, bias=True) self.recon_log_cov_conv = nn.Conv2d(out_ch, 3, 3, stride=1, padding=1, bias=True) assert out_h == maze_dims[1]
def __init__( self, maze_dims, z_dim, encoder_specs, decoder_specs ): super().__init__() in_ch = maze_dims[0] in_h = maze_dims[1] # make the encoder self.encoder_conv_seq, x_enc_ch, x_enc_h = make_conv_net(in_ch, in_h, encoder_specs['conv_part_specs']) self.x_enc_ch = x_enc_ch self.x_enc_h = x_enc_h flat_inter_img_dim = x_enc_ch * x_enc_h * x_enc_h self.z_mask_conv_seq, _, _ = make_conv_net( x_enc_ch, x_enc_h, { 'kernel_sizes': [3], 'num_channels': [64], 'strides': [1], 'paddings': [1], 'use_bn': True } ) self.z_mask_fc_seq, _ = make_fc_net(64*x_enc_h*x_enc_h, {'hidden_sizes': [1024], 'use_bn':True}) self.z_mask_fc = nn.Linear(1024, 128, bias=True) self.z_mask_gen_fc_seq, _ = make_fc_net(128, {'hidden_sizes': [1024, 4*x_enc_h*x_enc_h], 'use_bn':True}) self.z_mask_gen_conv = nn.Conv2d(4, 1, 3, stride=1, padding=1, bias=True) self.encoder_fc_seq, h_dim = make_fc_net(flat_inter_img_dim, encoder_specs['fc_part_specs']) self.z_mean_fc = nn.Linear(h_dim, z_dim, bias=True) self.z_log_cov_fc = nn.Linear(h_dim, z_dim, bias=True) # make the decoder self.decoder_fc_seq, h_dim = make_fc_net(z_dim, decoder_specs['fc_part_specs']) # assert h_dim == flat_inter_img_dim self.decoder_upconv_seq, out_ch, out_h = make_upconv_net(x_enc_ch, x_enc_h, decoder_specs['upconv_part_specs']) self.recon_mean_conv = nn.Conv2d(out_ch, 1, 1, stride=1, padding=0, bias=True) self.recon_log_cov_conv = nn.Conv2d(out_ch, 1, 1, stride=1, padding=0, bias=True) assert out_h == maze_dims[1], str(out_h) + ' != ' + str(maze_dims[1])
def __init__(self, maze_dims, action_dim, act_proc_dim, z_dim, x_encoder_specs, pre_gru_specs, gru_specs, prior_part_specs, inference_part_specs, decoder_part_specs, masked_latent): super().__init__() self.act_proc_dim = act_proc_dim self.action_fc = nn.Linear(action_dim, self.act_proc_dim, bias=True) in_ch = maze_dims[0] in_h = maze_dims[1] maze_flat_dim = in_ch * in_h * in_h self.maze_dims = maze_dims self.x_encoder, out_h = make_fc_net(in_h * in_h * in_ch, x_encoder_specs) self.x_enc_h = out_h self.prior_fc_seq, hidden_dim = make_fc_net( self.act_proc_dim + gru_specs['hidden_size'], prior_part_specs) self.prior_mean_fc = nn.Linear(hidden_dim, z_dim, bias=True) self.prior_log_cov_fc = nn.Linear(hidden_dim, z_dim, bias=True) self.posterior_fc_seq, hidden_dim = make_fc_net( self.act_proc_dim + gru_specs['hidden_size'] + self.x_enc_h, inference_part_specs) self.posterior_mean_fc = nn.Linear(hidden_dim, z_dim, bias=True) self.posterior_log_cov_fc = nn.Linear(hidden_dim, z_dim, bias=True) self.pre_gru_seq, hidden_dim = make_fc_net( self.act_proc_dim + self.x_enc_h + z_dim, pre_gru_specs) self.gru_cell = nn.GRUCell(hidden_dim, gru_specs['hidden_size'], bias=True) self.h_dim = [gru_specs['hidden_size']] # models for the decoding/generation self.recon_fc_seq, out_h = make_fc_net(z_dim + self.h_dim[0], decoder_part_specs['fc_part']) self.recon_mean_fc = nn.Linear(out_h, maze_flat_dim, bias=True) self.recon_log_cov_fc = nn.Linear(out_h, maze_flat_dim, bias=True)
def __init__(self, maze_dims, z_dim, encoder_specs, decoder_specs): super().__init__() in_ch = maze_dims[0] in_h = maze_dims[1] # make the encoder self.encoder_conv_seq, x_enc_ch, x_enc_h = make_conv_net( in_ch, in_h, encoder_specs['conv_part_specs']) self.x_enc_ch = x_enc_ch self.x_enc_h = x_enc_h flat_inter_img_dim = x_enc_ch * x_enc_h * x_enc_h self.encoder_fc_seq, h_dim = make_fc_net( flat_inter_img_dim, encoder_specs['fc_part_specs']) self.z_mean_fc = nn.Linear(h_dim, z_dim, bias=True) self.z_log_cov_fc = nn.Linear(h_dim, z_dim, bias=True) # make the decoder self.decoder_fc_seq, h_dim = make_fc_net( z_dim, decoder_specs['fc_part_specs']) assert h_dim == flat_inter_img_dim self.decoder_upconv_seq, out_ch, out_h = make_upconv_net( x_enc_ch, x_enc_h, decoder_specs['upconv_part_specs']) self.recon_mean_conv = nn.Conv2d(out_ch, 1, 1, stride=1, padding=0, bias=True) self.recon_log_cov_conv = nn.Conv2d(out_ch, 1, 1, stride=1, padding=0, bias=True) assert out_h == maze_dims[1], str(out_h) + ' != ' + str(maze_dims[1])
def __init__(self, maze_dims, z_dim, encoder_specs, decoder_specs): super().__init__() in_ch = maze_dims[0] in_h = maze_dims[1] # make the encoder self.encoder_conv_seq, x_enc_ch, x_enc_h = make_conv_net( in_ch, in_h, encoder_specs['conv_part_specs']) self.x_enc_ch = x_enc_ch self.x_enc_h = x_enc_h flat_inter_img_dim = x_enc_ch * x_enc_h * x_enc_h self.enc_mask_seq, _, _ = make_conv_net( x_enc_ch, x_enc_h, { 'kernel_sizes': [3], 'num_channels': [64], 'strides': [2], 'paddings': [1], 'use_bn': True }) self.enc_mask_conv = nn.Conv2d(64, 1, 1, stride=1, padding=0, bias=True) # meshgrid xv, yv = np.meshgrid(np.linspace(-1., 1., x_enc_h), np.linspace(-1., 1., x_enc_h)) xv, yv = xv[None, None, ...], yv[None, None, ...] xv, yv = torch.FloatTensor(xv), torch.FloatTensor(yv) self.mesh = torch.cat([xv, yv], 1) self.mesh = Variable(self.mesh, requires_grad=False).cuda() self.encoder_fc_seq, h_dim = make_fc_net( flat_inter_img_dim, encoder_specs['fc_part_specs']) self.z_mean_fc = nn.Linear(h_dim, z_dim, bias=True) self.z_log_cov_fc = nn.Linear(h_dim, z_dim, bias=True) # make the decoder self.decoder_fc_seq, h_dim = make_fc_net( z_dim, decoder_specs['fc_part_specs']) # assert h_dim == flat_inter_img_dim self.decoder_upconv_seq, out_ch, out_h = make_upconv_net( 130, x_enc_h, decoder_specs['upconv_part_specs']) self.recon_mean_conv = nn.Conv2d(out_ch, 1, 1, stride=1, padding=0, bias=True) self.recon_log_cov_conv = nn.Conv2d(out_ch, 1, 1, stride=1, padding=0, bias=True) assert out_h == maze_dims[1], str(out_h) + ' != ' + str(maze_dims[1])
def __init__(self, maze_dims, z_dim, x_encoder_specs, z_seg_conv_specs, z_seg_fc_specs, z_obj_conv_specs, z_obj_fc_specs, z_seg_recon_fc_specs, z_seg_recon_upconv_specs, z_obj_recon_fc_specs, z_obj_recon_upconv_specs, recon_upconv_part_specs): super().__init__() in_ch = maze_dims[0] in_h = maze_dims[1] self.x_encoder, x_enc_ch, x_enc_h = make_conv_net( in_ch, in_h, x_encoder_specs) self.x_enc_ch = x_enc_ch self.x_enc_h = x_enc_h flat_inter_img_dim = x_enc_ch * x_enc_h * x_enc_h # self.convgru = ConvGRUCell(x_enc_ch, gru_specs['channels'], gru_specs['kernel_size']) # self.gru_ch = gru_specs['channels'] self.z_seg_conv_seq, out_ch, out_h = make_conv_net( x_enc_ch + 1, x_enc_h, z_seg_conv_specs) self.z_seg_fc_seq, out_h = make_fc_net(out_ch * out_h * out_h, z_seg_fc_specs) self.z_seg_mean_fc = nn.Linear(out_h, z_dim, bias=True) self.z_seg_log_cov_fc = nn.Linear(out_h, z_dim, bias=True) # self.z_obj_conv_seq, z_conv_ch, z_conv_h = make_conv_net(x_enc_ch, x_enc_h, z_obj_conv_specs) # flat_dim = z_conv_ch*z_conv_h*z_conv_h # self.z_conv_ch, self.z_conv_h = z_conv_ch, z_conv_h self.z_obj_fc_seq, out_h = make_fc_net(flat_inter_img_dim, z_obj_fc_specs) self.z_obj_mean_fc = nn.Linear(out_h, z_dim, bias=True) self.z_obj_log_cov_fc = nn.Linear(out_h, z_dim, bias=True) self.z_seg_mask_fc_seq, out_h = make_fc_net(z_dim, z_seg_recon_fc_specs) # print(out_h) # print(z_conv_ch, z_conv_h) # assert out_h == z_conv_h*z_conv_h*z_conv_ch self.z_seg_mask_upconv_seq, out_ch, out_h = make_upconv_net( x_enc_ch, x_enc_h, z_seg_recon_upconv_specs) self.z_seg_mask_conv = nn.Conv2d(out_ch, 1, 3, stride=1, padding=1, bias=True) print(out_h) self.z_obj_recon_fc_seq, z_recon_dim = make_fc_net( z_dim, z_obj_recon_fc_specs) # self.z_obj_recon_upconv_seq, out_ch, out_h = make_upconv_net(z_conv_ch, z_conv_h, z_obj_recon_upconv_specs) self.recon_upconv_seq, out_ch, out_h = make_upconv_net( x_enc_ch, x_enc_h, recon_upconv_part_specs) print(out_h) self.recon_mean_conv = nn.Conv2d(out_ch, 1, 1, stride=1, padding=0, bias=True) self.recon_log_cov_conv = nn.Conv2d(out_ch, 1, 1, stride=1, padding=0, bias=True) assert out_h == maze_dims[1], str(out_h) + ' != ' + str(maze_dims[1])