def __init__(self, input_dim, hidden_rnn_dim, rnn_layers, rnn_dropout, learn_initial_state=True): super(BiRnn, self).__init__() # RNN # Note: batch_first=True means input and output dims are treated as # (batch, seq, feature) self.input_dim = input_dim self.hidden_rnn_dim = hidden_rnn_dim self.f_rnn = nn.GRU(self.input_dim, self.hidden_rnn_dim, num_layers=rnn_layers, dropout=rnn_dropout, bidirectional=True) # Noisy hidden init state # Note: Only works with GRU right now self.learn_init = learn_initial_state if self.learn_init: # Initial state (dim: num_layers * num_directions, batch, hidden_size) self.init_network = Gaussian(input_dim=1, output_dim=self.f_rnn.hidden_size, hidden_units=[256])
def __init__( self, feature_shape, action_shape, output_dim, # typically mode_dim hidden_rnn_dim, hidden_units, rnn_layers, rnn_dropout): super(ModeEncoder, self).__init__() self.f_rnn_features = BiRnn(feature_shape, hidden_rnn_dim=hidden_rnn_dim, rnn_layers=rnn_layers, rnn_dropout=rnn_dropout) self.f_rnn_actions = BiRnn(action_shape, hidden_rnn_dim=hidden_rnn_dim, rnn_layers=rnn_layers, rnn_dropout=rnn_dropout) # Concatenation of 2*hidden_rnn_dim from the features rnn and # 2*hidden_rnn_dim from actions rnn, hence input dim is 4*hidden_rnn_dim self.f_dist = Gaussian(input_dim=4 * hidden_rnn_dim, output_dim=output_dim, hidden_units=hidden_units)
def __init__(self, feature_dim, mode_dim, hidden_rnn_dim, hidden_units, rnn_dropout, num_rnn_layers): super(BaseNetwork, self).__init__() self.rnn = BiRnn(input_dim=feature_dim, hidden_rnn_dim=hidden_rnn_dim, rnn_layers=num_rnn_layers, rnn_dropout=rnn_dropout) self.mode_dist = Gaussian(input_dim=2 * hidden_rnn_dim, output_dim=mode_dim, hidden_units=hidden_units)
def __init__(self, state_rep_dim, mode_dim, action_dim, hidden_units, leaky_slope, std=None): super(ActionDecoder, self).__init__() self.net = Gaussian(input_dim=state_rep_dim + mode_dim, output_dim=action_dim, hidden_units=hidden_units, std=std, leaky_slope=leaky_slope)
def __init__(self, latent1_dim, latent2_dim, mode_dim, action_dim, hidden_units, leaky_slope, action_normalized, std=None): super(ActionDecoderNormal, self).__init__() self.action_normalized = action_normalized self.net = Gaussian(input_dim=latent1_dim + latent2_dim + mode_dim, output_dim=action_dim, hidden_units=hidden_units, std=std, leaky_slope=leaky_slope)
def __init__( self, feature_shape, action_dim, output_dim, # typicall mode_dim hidden_rnn_dim, hidden_units, rnn_dropout, rnn_layers): super(BaseNetwork, self).__init__() #self.rnn = BiRnn(feature_shape + action_dim, # hidden_rnn_dim=hidden_rnn_dim, # rnn_layers=rnn_layers) self.rnn = BiRnn(feature_shape, hidden_rnn_dim=hidden_rnn_dim, rnn_layers=rnn_layers, rnn_dropout=rnn_dropout) self.mode_dist = Gaussian(input_dim=2 * hidden_rnn_dim, output_dim=output_dim, hidden_units=hidden_units)
def __init__(self, latent1_dim, latent2_dim, mode_dim, action_dim, hidden_units, leaky_slope, action_normalized, std=None): super(ActionDecoderModeRepeat, self).__init__() latent_dim = latent1_dim + latent2_dim if latent_dim > mode_dim: self.mode_repeat = 10 * latent_dim // mode_dim else: self.mode_repeat = 1 self.action_normalized = action_normalized self.net = Gaussian(latent_dim + self.mode_repeat * mode_dim, action_dim, hidden_units=hidden_units, leaky_slope=leaky_slope, std=std)
def __init__(self, observation_shape, action_shape, feature_dim, latent1_dim, latent2_dim, hidden_units, hidden_units_encoder, hidden_units_decoder, std_decoder, device, leaky_slope, state_rep): super(DynLatentNetwork, self).__init__() self.device = device # p(z1(0)) = N(0, I) self.latent1_init_prior = ConstantGaussian(latent1_dim) # p(z2(0) | z1(0)) self.latent2_init_prior = Gaussian( input_dim=latent1_dim, output_dim=latent2_dim, hidden_units=hidden_units, leaky_slope=leaky_slope) # p(z1(t+1) | z2(t), a(t), x_gt(t-1)) self.latent1_prior = Gaussian( input_dim=latent2_dim + action_shape[0] + feature_dim, output_dim=latent1_dim, hidden_units=hidden_units, leaky_slope=leaky_slope) # p(z2(t+1) | z1(t+1), z2(t), a(t), x_gt(t-1)) self.latent2_prior = Gaussian( input_dim=latent1_dim + latent2_dim + action_shape[0], output_dim=latent2_dim, hidden_units=hidden_units, leaky_slope=leaky_slope) # q(z1(0) | feat(0)) self.latent1_init_posterior = Gaussian( input_dim=feature_dim, output_dim=latent1_dim, hidden_units=hidden_units, leaky_slope=leaky_slope) # q(z2(0) | z1(0)) = p(z2(0) | z1(0)) self.latent2_init_posterior = self.latent2_init_prior # q(z1(t+1) | feat(t+1), z2(t), a(t)) self.latent1_posterior = Gaussian( input_dim=latent2_dim + action_shape[0] + feature_dim, output_dim=latent1_dim, hidden_units=hidden_units, leaky_slope=leaky_slope) # q(z2(t+1) | z1(t+1), z2(t), a(t)) = p(z2(t+1) | z1(t+1), z2(t), a(t)) self.latent2_posterior = self.latent2_prior # feat(t) = x(t) : This encoding is performed deterministically. if state_rep: # State representation self.encoder = EncoderStateRep(observation_shape[0], observation_shape[0], hidden_units=hidden_units_encoder) else: # Conv-nets for pixel observations self.encoder = Encoder(observation_shape[0], feature_dim, leaky_slope=leaky_slope) # p(x(t) | z1(t), z2(t)) if state_rep: self.decoder = Gaussian( latent1_dim + latent2_dim, observation_shape[0], std=std_decoder, hidden_units=hidden_units_decoder, leaky_slope=leaky_slope ) else: self.decoder = Decoder( latent1_dim + latent2_dim, observation_shape[0], std=std_decoder, leaky_slope=leaky_slope ) # Dimensions self.latent1_dim = latent1_dim self.latent2_dim = latent2_dim