def test_networkbody_vector(): torch.manual_seed(0) obs_size = 4 network_settings = NetworkSettings() obs_shapes = [(obs_size,)] networkbody = NetworkBody( create_observation_specs_with_shapes(obs_shapes), network_settings, encoded_act_size=2, ) optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3) sample_obs = 0.1 * torch.ones((1, obs_size)) sample_act = 0.1 * torch.ones((1, 2)) for _ in range(300): encoded, _ = networkbody([sample_obs], sample_act) assert encoded.shape == (1, network_settings.hidden_units) # Try to force output to 1 loss = torch.nn.functional.mse_loss(encoded, torch.ones(encoded.shape)) optimizer.zero_grad() loss.backward() optimizer.step() # In the last step, values should be close to 1 for _enc in encoded.flatten().tolist(): assert _enc == pytest.approx(1.0, abs=0.1)
def test_gaussian_distribution(conditional_sigma, tanh_squash): torch.manual_seed(0) hidden_size = 16 act_size = 4 sample_embedding = torch.ones((1, 16)) gauss_dist = GaussianDistribution( hidden_size, act_size, conditional_sigma=conditional_sigma, tanh_squash=tanh_squash, ) # Make sure backprop works force_action = torch.zeros((1, act_size)) optimizer = torch.optim.Adam(gauss_dist.parameters(), lr=3e-3) for _ in range(50): dist_inst = gauss_dist(sample_embedding) if tanh_squash: assert isinstance(dist_inst, TanhGaussianDistInstance) else: assert isinstance(dist_inst, GaussianDistInstance) log_prob = dist_inst.log_prob(force_action) loss = torch.nn.functional.mse_loss(log_prob, -2 * torch.ones(log_prob.shape)) optimizer.zero_grad() loss.backward() optimizer.step() for prob in log_prob.flatten().tolist(): assert prob == pytest.approx(-2, abs=0.1)
def test_sac_optimizer_update(dummy_config, rnn, visual, discrete): torch.manual_seed(0) # Test evaluate optimizer = create_sac_optimizer_mock(dummy_config, use_rnn=rnn, use_discrete=discrete, use_visual=visual) # Test update update_buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES, optimizer.policy.behavior_spec, memory_size=24) # Mock out reward signal eval update_buffer["extrinsic_rewards"] = update_buffer["environment_rewards"] return_stats = optimizer.update( update_buffer, num_sequences=update_buffer.num_experiences // optimizer.policy.sequence_length, ) # Make sure we have the right stats required_stats = [ "Losses/Policy Loss", "Losses/Value Loss", "Losses/Q1 Loss", "Losses/Q2 Loss", "Policy/Entropy Coeff", "Policy/Learning Rate", ] for stat in required_stats: assert stat in return_stats.keys()
def test_multinetworkbody_num_agents(with_actions): torch.manual_seed(0) act_size = 2 obs_size = 4 network_settings = NetworkSettings() obs_shapes = [(obs_size,)] action_spec = ActionSpec(act_size, tuple(act_size for _ in range(act_size))) networkbody = MultiAgentNetworkBody( create_observation_specs_with_shapes(obs_shapes), network_settings, action_spec ) sample_obs = [[0.1 * torch.ones((1, obs_size))]] # simulate baseline in POCA sample_act = [ AgentAction( 0.1 * torch.ones((1, 2)), [0.1 * torch.ones(1) for _ in range(act_size)] ) ] for n_agent, max_so_far in [(1, 1), (5, 5), (4, 5), (10, 10), (5, 10), (1, 10)]: if with_actions: encoded, _ = networkbody( obs_only=sample_obs * (n_agent - 1), obs=sample_obs, actions=sample_act ) else: encoded, _ = networkbody(obs_only=sample_obs * n_agent, obs=[], actions=[]) # look at the last value of the hidden units (the number of agents) target = (n_agent * 1.0 / max_so_far) * 2 - 1 assert abs(encoded[0, -1].item() - target) < 1e-6 assert encoded[0, -1].item() <= 1 assert encoded[0, -1].item() >= -1
def test_networkbody_lstm(): torch.manual_seed(0) obs_size = 4 seq_len = 16 network_settings = NetworkSettings( memory=NetworkSettings.MemorySettings(sequence_length=seq_len, memory_size=12) ) obs_shapes = [(obs_size,)] networkbody = NetworkBody( create_observation_specs_with_shapes(obs_shapes), network_settings ) optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-4) sample_obs = torch.ones((1, seq_len, obs_size)) for _ in range(200): encoded, _ = networkbody([sample_obs], memories=torch.ones(1, seq_len, 12)) # Try to force output to 1 loss = torch.nn.functional.mse_loss(encoded, torch.ones(encoded.shape)) optimizer.zero_grad() loss.backward() optimizer.step() # In the last step, values should be close to 1 for _enc in encoded.flatten().tolist(): assert _enc == pytest.approx(1.0, abs=0.1)
def test_networkbody_visual(): torch.manual_seed(1) vec_obs_size = 4 obs_size = (84, 84, 3) network_settings = NetworkSettings() obs_shapes = [(vec_obs_size,), obs_size] networkbody = NetworkBody( create_observation_specs_with_shapes(obs_shapes), network_settings ) optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3) sample_obs = 0.1 * torch.ones((1, 84, 84, 3), dtype=torch.float32) sample_vec_obs = torch.ones((1, vec_obs_size), dtype=torch.float32) obs = [sample_vec_obs] + [sample_obs] loss = 1 step = 0 while loss > 1e-6 and step < 1e3: encoded, _ = networkbody(obs) assert encoded.shape == (1, network_settings.hidden_units) # Try to force output to 1 loss = torch.nn.functional.mse_loss(encoded, torch.ones(encoded.shape)) optimizer.zero_grad() loss.backward() optimizer.step() step += 1 # In the last step, values should be close to 1 for _enc in encoded.flatten().tolist(): assert _enc == pytest.approx(1.0, abs=0.1)
def test_valuenetwork(): torch.manual_seed(0) obs_size = 4 num_outputs = 2 network_settings = NetworkSettings() obs_spec = create_observation_specs_with_shapes([(obs_size,)]) stream_names = [f"stream_name{n}" for n in range(4)] value_net = ValueNetwork( stream_names, obs_spec, network_settings, outputs_per_stream=num_outputs ) optimizer = torch.optim.Adam(value_net.parameters(), lr=3e-3) for _ in range(50): sample_obs = torch.ones((1, obs_size)) values, _ = value_net([sample_obs]) loss = 0 for s_name in stream_names: assert values[s_name].shape == (1, num_outputs) # Try to force output to 1 loss += torch.nn.functional.mse_loss( values[s_name], torch.ones((1, num_outputs)) ) optimizer.zero_grad() loss.backward() optimizer.step() # In the last step, values should be close to 1 for value in values.values(): for _out in value.tolist(): assert _out[0] == pytest.approx(1.0, abs=0.1)
def test_reward_decreases(demo_to_buffer: Any, use_actions: bool, behavior_spec: BehaviorSpec, seed: int) -> None: np.random.seed(seed) torch.manual_seed(seed) buffer_expert = create_agent_buffer(behavior_spec, 1000) buffer_policy = create_agent_buffer(behavior_spec, 1000) demo_to_buffer.return_value = None, buffer_expert gail_settings = GAILSettings(demo_path="", learning_rate=0.005, use_vail=False, use_actions=use_actions) gail_rp = create_reward_provider(RewardSignalType.GAIL, behavior_spec, gail_settings) init_reward_expert = gail_rp.evaluate(buffer_expert)[0] init_reward_policy = gail_rp.evaluate(buffer_policy)[0] for _ in range(20): gail_rp.update(buffer_policy) reward_expert = gail_rp.evaluate(buffer_expert)[0] reward_policy = gail_rp.evaluate(buffer_policy)[0] assert reward_expert >= 0 # GAIL / VAIL reward always positive assert reward_policy >= 0 reward_expert = gail_rp.evaluate(buffer_expert)[0] reward_policy = gail_rp.evaluate(buffer_policy)[0] assert reward_expert > reward_policy # Expert reward greater than non-expert reward assert (reward_expert > init_reward_expert ) # Expert reward getting better as network trains assert (reward_policy < init_reward_policy ) # Non-expert reward getting worse as network trains
def test_reward_decreases_vail(demo_to_buffer: Any, use_actions: bool, behavior_spec: BehaviorSpec, seed: int) -> None: np.random.seed(seed) torch.manual_seed(seed) buffer_expert = create_agent_buffer(behavior_spec, 1000) buffer_policy = create_agent_buffer(behavior_spec, 1000) demo_to_buffer.return_value = None, buffer_expert gail_settings = GAILSettings(demo_path="", learning_rate=0.005, use_vail=True, use_actions=use_actions) DiscriminatorNetwork.initial_beta = 0.0 # we must set the initial value of beta to 0 for testing # If we do not, the kl-loss will dominate early and will block the estimator gail_rp = create_reward_provider(RewardSignalType.GAIL, behavior_spec, gail_settings) for _ in range(20): gail_rp.update(buffer_policy) reward_expert = gail_rp.evaluate(buffer_expert)[0] reward_policy = gail_rp.evaluate(buffer_policy)[0] assert reward_expert >= 0 # GAIL / VAIL reward always positive assert reward_policy >= 0 reward_expert = gail_rp.evaluate(buffer_expert)[0] reward_policy = gail_rp.evaluate(buffer_policy)[0] assert reward_expert > reward_policy # Expert reward greater than non-expert reward
def test_initialization_layer(): torch.manual_seed(0) # Test Zero layer = linear_layer( 3, 4, kernel_init=Initialization.Zero, bias_init=Initialization.Zero ) assert torch.all(torch.eq(layer.weight.data, torch.zeros_like(layer.weight.data))) assert torch.all(torch.eq(layer.bias.data, torch.zeros_like(layer.bias.data)))
def test_tanh_gaussian_dist_instance(): torch.manual_seed(0) act_size = 4 dist_instance = TanhGaussianDistInstance(torch.zeros(1, act_size), torch.ones(1, act_size)) for _ in range(10): action = dist_instance.sample() assert action.shape == (1, act_size) assert torch.max(action) < 1.0 and torch.min(action) > -1.0
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None: np.random.seed(seed) torch.manual_seed(seed) rnd_settings = RNDSettings(32, 0.01) rnd_rp = RNDRewardProvider(behavior_spec, rnd_settings) buffer = create_agent_buffer(behavior_spec, 5) rnd_rp.update(buffer) reward_old = rnd_rp.evaluate(buffer)[0] for _ in range(100): rnd_rp.update(buffer) reward_new = rnd_rp.evaluate(buffer)[0] assert reward_new < reward_old
def test_next_state_prediction(behavior_spec: BehaviorSpec, seed: int) -> None: np.random.seed(seed) torch.manual_seed(seed) curiosity_settings = CuriositySettings(32, 0.1) curiosity_rp = CuriosityRewardProvider(behavior_spec, curiosity_settings) buffer = create_agent_buffer(behavior_spec, 5) for _ in range(100): curiosity_rp.update(buffer) prediction = curiosity_rp._network.predict_next_state(buffer)[0] target = curiosity_rp._network.get_next_state(buffer)[0] error = float(ModelUtils.to_numpy(torch.mean((prediction - target)**2))) assert error < 0.001
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None: np.random.seed(seed) torch.manual_seed(seed) curiosity_settings = CuriositySettings(32, 0.01) curiosity_rp = CuriosityRewardProvider(behavior_spec, curiosity_settings) buffer = create_agent_buffer(behavior_spec, 5) curiosity_rp.update(buffer) reward_old = curiosity_rp.evaluate(buffer)[0] for _ in range(20): curiosity_rp.update(buffer) reward_new = curiosity_rp.evaluate(buffer)[0] assert reward_new < reward_old
def test_lstm_layer(): torch.manual_seed(0) # Test zero for LSTM layer = lstm_layer( 4, 4, kernel_init=Initialization.Zero, bias_init=Initialization.Zero ) for name, param in layer.named_parameters(): if "weight" in name: assert torch.all(torch.eq(param.data, torch.zeros_like(param.data))) elif "bias" in name: assert torch.all( torch.eq(param.data[4:8], torch.ones_like(param.data[4:8])) )
def test_continuous_action_prediction(behavior_spec: BehaviorSpec, seed: int) -> None: np.random.seed(seed) torch.manual_seed(seed) curiosity_settings = CuriositySettings(32, 0.1) curiosity_rp = CuriosityRewardProvider(behavior_spec, curiosity_settings) buffer = create_agent_buffer(behavior_spec, 5) for _ in range(200): curiosity_rp.update(buffer) prediction = curiosity_rp._network.predict_action(buffer)[0] target = torch.tensor(buffer["continuous_action"][0]) error = torch.mean((prediction - target)**2).item() assert error < 0.001
def test_multinetworkbody_lstm(with_actions): torch.manual_seed(0) obs_size = 4 act_size = 2 seq_len = 16 n_agents = 3 network_settings = NetworkSettings(memory=NetworkSettings.MemorySettings( sequence_length=seq_len, memory_size=12)) obs_shapes = [(obs_size, )] action_spec = ActionSpec(act_size, tuple(act_size for _ in range(act_size))) networkbody = MultiAgentNetworkBody( create_observation_specs_with_shapes(obs_shapes), network_settings, action_spec) optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-4) sample_obs = [[0.1 * torch.ones((seq_len, obs_size))] for _ in range(n_agents)] # simulate baseline in POCA sample_act = [ AgentAction( 0.1 * torch.ones((seq_len, 2)), [0.1 * torch.ones(seq_len) for _ in range(act_size)], ) for _ in range(n_agents - 1) ] for _ in range(300): if with_actions: encoded, _ = networkbody( obs_only=sample_obs[:1], obs=sample_obs[1:], actions=sample_act, memories=torch.ones(1, 1, 12), sequence_length=seq_len, ) else: encoded, _ = networkbody( obs_only=sample_obs, obs=[], actions=[], memories=torch.ones(1, 1, 12), sequence_length=seq_len, ) # Try to force output to 1 loss = torch.nn.functional.mse_loss(encoded, torch.ones(encoded.shape)) optimizer.zero_grad() loss.backward() optimizer.step() # In the last step, values should be close to 1 for _enc in encoded.flatten().tolist(): assert _enc == pytest.approx(1.0, abs=0.1)
def test_gaussian_dist_instance(): torch.manual_seed(0) act_size = 4 dist_instance = GaussianDistInstance(torch.zeros(1, act_size), torch.ones(1, act_size)) action = dist_instance.sample() assert action.shape == (1, act_size) for log_prob in dist_instance.log_prob(torch.zeros( (1, act_size))).flatten(): # Log prob of standard normal at 0 assert log_prob == pytest.approx(-0.919, abs=0.01) for ent in dist_instance.entropy().flatten(): # entropy of standard normal at 0, based on 1/2 + ln(sqrt(2pi)sigma) assert ent == pytest.approx(1.42, abs=0.01)
def test_lstm_class(): torch.manual_seed(0) input_size = 12 memory_size = 64 batch_size = 8 seq_len = 16 lstm = LSTM(input_size, memory_size) assert lstm.memory_size == memory_size sample_input = torch.ones((batch_size, seq_len, input_size)) sample_memories = torch.ones((1, batch_size, memory_size)) out, mem = lstm(sample_input, sample_memories) # Hidden size should be half of memory_size assert out.shape == (batch_size, seq_len, memory_size // 2) assert mem.shape == (1, batch_size, memory_size)
def test_predict_minimum_training(): # of 5 numbers, predict index of min np.random.seed(1336) torch.manual_seed(1336) n_k = 5 size = n_k + 1 embedding_size = 64 entity_embeddings = EntityEmbeddings(size, [size], embedding_size, [n_k], concat_self=False) transformer = ResidualSelfAttention(embedding_size) l_layer = LinearEncoder(embedding_size, 2, n_k) loss = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam( list(entity_embeddings.parameters()) + list(transformer.parameters()) + list(l_layer.parameters()), lr=0.001, weight_decay=1e-6, ) batch_size = 200 onehots = ModelUtils.actions_to_onehot( torch.range(0, n_k - 1).unsqueeze(1), [n_k])[0] onehots = onehots.expand((batch_size, -1, -1)) losses = [] for _ in range(400): num = np.random.randint(0, n_k) inp = torch.rand((batch_size, num + 1, 1)) with torch.no_grad(): # create the target : The minimum argmin = torch.argmin(inp, dim=1) argmin = argmin.squeeze() argmin = argmin.detach() sliced_oh = onehots[:, :num + 1] inp = torch.cat([inp, sliced_oh], dim=2) embeddings = entity_embeddings(inp, [inp]) masks = EntityEmbeddings.get_masks([inp]) prediction = transformer(embeddings, masks) prediction = l_layer(prediction) ce = loss(prediction, argmin) losses.append(ce.item()) print(ce.item()) optimizer.zero_grad() ce.backward() optimizer.step() assert np.array(losses[-20:]).mean() < 0.1
def test_multi_categorical_distribution(): torch.manual_seed(0) hidden_size = 16 act_size = [3, 3, 4] sample_embedding = torch.ones((1, 16)) gauss_dist = MultiCategoricalDistribution(hidden_size, act_size) # Make sure backprop works optimizer = torch.optim.Adam(gauss_dist.parameters(), lr=3e-3) def create_test_prob(size: int) -> torch.Tensor: test_prob = torch.tensor([[1.0 - 0.01 * (size - 1)] + [0.01] * (size - 1)]) # High prob for first action return test_prob.log() for _ in range(100): dist_insts = gauss_dist(sample_embedding, masks=torch.ones((1, sum(act_size)))) loss = 0 for i, dist_inst in enumerate(dist_insts): assert isinstance(dist_inst, CategoricalDistInstance) log_prob = dist_inst.all_log_prob() test_log_prob = create_test_prob(act_size[i]) # Force log_probs to match the high probability for the first action generated by # create_test_prob loss += torch.nn.functional.mse_loss(log_prob, test_log_prob) optimizer.zero_grad() loss.backward() optimizer.step() for dist_inst, size in zip(dist_insts, act_size): # Check that the log probs are close to the fake ones that we generated. test_log_probs = create_test_prob(size) for _prob, _test_prob in zip( dist_inst.all_log_prob().flatten().tolist(), test_log_probs.flatten().tolist(), ): assert _prob == pytest.approx(_test_prob, abs=0.1) # Test masks masks = [] for branch in act_size: masks += [0] * (branch - 1) + [1] masks = torch.tensor([masks]) dist_insts = gauss_dist(sample_embedding, masks=masks) for dist_inst in dist_insts: log_prob = dist_inst.all_log_prob() assert log_prob.flatten()[-1] == pytest.approx(0, abs=0.001)
def test_categorical_dist_instance(): torch.manual_seed(0) act_size = 4 test_prob = torch.tensor([[1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1)]) # High prob for first action dist_instance = CategoricalDistInstance(test_prob) for _ in range(10): action = dist_instance.sample() assert action.shape == (1, 1) assert action < act_size # Make sure the first action as higher probability than the others. prob_first_action = dist_instance.log_prob(torch.tensor([0])) for i in range(1, act_size): assert dist_instance.log_prob(torch.tensor([i])) < prob_first_action
def test_simple_transformer_training(): np.random.seed(1336) torch.manual_seed(1336) size, n_k, = 3, 5 embedding_size = 64 entity_embeddings = EntityEmbeddings(size, [size], [n_k], embedding_size) transformer = ResidualSelfAttention(embedding_size, [n_k]) l_layer = linear_layer(embedding_size, size) optimizer = torch.optim.Adam(list(transformer.parameters()) + list(l_layer.parameters()), lr=0.001) batch_size = 200 point_range = 3 init_error = -1.0 for _ in range(250): center = torch.rand((batch_size, size)) * point_range * 2 - point_range key = torch.rand( (batch_size, n_k, size)) * point_range * 2 - point_range with torch.no_grad(): # create the target : The key closest to the query in euclidean distance distance = torch.sum((center.reshape( (batch_size, 1, size)) - key)**2, dim=2) argmin = torch.argmin(distance, dim=1) target = [] for i in range(batch_size): target += [key[i, argmin[i], :]] target = torch.stack(target, dim=0) target = target.detach() embeddings = entity_embeddings(center, [key]) masks = EntityEmbeddings.get_masks([key]) prediction = transformer.forward(embeddings, masks) prediction = l_layer(prediction) prediction = prediction.reshape((batch_size, size)) error = torch.mean((prediction - target)**2, dim=1) error = torch.mean(error) / 2 if init_error == -1.0: init_error = error.item() else: assert error.item() < init_error print(error.item()) optimizer.zero_grad() error.backward() optimizer.step() assert error.item() < 0.3
def test_predict_closest_training(): np.random.seed(1336) torch.manual_seed(1336) size, n_k, = 3, 5 embedding_size = 64 entity_embeddings = EntityEmbedding(size, n_k, embedding_size) entity_embeddings.add_self_embedding(size) transformer = ResidualSelfAttention(embedding_size, n_k) l_layer = linear_layer(embedding_size, size) optimizer = torch.optim.Adam( list(entity_embeddings.parameters()) + list(transformer.parameters()) + list(l_layer.parameters()), lr=0.001, weight_decay=1e-6, ) batch_size = 200 for _ in range(200): center = torch.rand((batch_size, size)) key = torch.rand((batch_size, n_k, size)) with torch.no_grad(): # create the target : The key closest to the query in euclidean distance distance = torch.sum((center.reshape( (batch_size, 1, size)) - key)**2, dim=2) argmin = torch.argmin(distance, dim=1) target = [] for i in range(batch_size): target += [key[i, argmin[i], :]] target = torch.stack(target, dim=0) target = target.detach() embeddings = entity_embeddings(center, key) masks = get_zero_entities_mask([key]) prediction = transformer.forward(embeddings, masks) prediction = l_layer(prediction) prediction = prediction.reshape((batch_size, size)) error = torch.mean((prediction - target)**2, dim=1) error = torch.mean(error) / 2 print(error.item()) optimizer.zero_grad() error.backward() optimizer.step() assert error.item() < 0.02
def test_all_masking(mask_value): # We make sure that a mask of all zeros or all ones will not trigger an error np.random.seed(1336) torch.manual_seed(1336) size, n_k, = 3, 5 embedding_size = 64 entity_embeddings = EntityEmbedding(size, n_k, embedding_size) entity_embeddings.add_self_embedding(size) transformer = ResidualSelfAttention(embedding_size, n_k) l_layer = linear_layer(embedding_size, size) optimizer = torch.optim.Adam( list(entity_embeddings.parameters()) + list(transformer.parameters()) + list(l_layer.parameters()), lr=0.001, weight_decay=1e-6, ) batch_size = 20 for _ in range(5): center = torch.rand((batch_size, size)) key = torch.rand((batch_size, n_k, size)) with torch.no_grad(): # create the target : The key closest to the query in euclidean distance distance = torch.sum((center.reshape( (batch_size, 1, size)) - key)**2, dim=2) argmin = torch.argmin(distance, dim=1) target = [] for i in range(batch_size): target += [key[i, argmin[i], :]] target = torch.stack(target, dim=0) target = target.detach() embeddings = entity_embeddings(center, key) masks = [torch.ones_like(key[:, :, 0]) * mask_value] prediction = transformer.forward(embeddings, masks) prediction = l_layer(prediction) prediction = prediction.reshape((batch_size, size)) error = torch.mean((prediction - target)**2, dim=1) error = torch.mean(error) / 2 optimizer.zero_grad() error.backward() optimizer.step()
def test_visual_encoder_trains(vis_class, size): torch.manual_seed(0) image_size = (size, size, 1) batch = 100 inputs = torch.cat([ torch.zeros((batch, ) + image_size), torch.ones((batch, ) + image_size) ], dim=0) target = torch.cat([torch.zeros((batch, )), torch.ones((batch, ))], dim=0) enc = vis_class(image_size[0], image_size[1], image_size[2], 1) optimizer = torch.optim.Adam(enc.parameters(), lr=0.001) for _ in range(15): prediction = enc(inputs)[:, 0] loss = torch.mean((target - prediction)**2) optimizer.zero_grad() loss.backward() optimizer.step() assert loss.item() < 0.05
def test_multinetworkbody_visual(with_actions): torch.manual_seed(0) act_size = 2 n_agents = 3 obs_size = 4 vis_obs_size = (84, 84, 3) network_settings = NetworkSettings() obs_shapes = [(obs_size, ), vis_obs_size] action_spec = ActionSpec(act_size, tuple(act_size for _ in range(act_size))) networkbody = MultiAgentNetworkBody( create_observation_specs_with_shapes(obs_shapes), network_settings, action_spec) optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3) sample_obs = [[0.1 * torch.ones( (1, obs_size))] + [0.1 * torch.ones((1, 84, 84, 3))] for _ in range(n_agents)] # simulate baseline in POCA sample_act = [ AgentAction(0.1 * torch.ones((1, 2)), [0.1 * torch.ones(1) for _ in range(act_size)]) for _ in range(n_agents - 1) ] for _ in range(300): if with_actions: encoded, _ = networkbody(obs_only=sample_obs[:1], obs=sample_obs[1:], actions=sample_act) else: encoded, _ = networkbody(obs_only=sample_obs, obs=[], actions=[]) assert encoded.shape == (1, network_settings.hidden_units) # Try to force output to 1 loss = torch.nn.functional.mse_loss(encoded, torch.ones(encoded.shape)) optimizer.zero_grad() loss.backward() optimizer.step() # In the last step, values should be close to 1 for _enc in encoded.flatten().tolist(): assert _enc == pytest.approx(1.0, abs=0.1)
def test_multi_head_attention_training(): np.random.seed(1336) torch.manual_seed(1336) size, n_h, n_k, n_q = 3, 10, 5, 1 embedding_size = 64 mha = MultiHeadAttention(size, size, size, size, n_h, embedding_size) optimizer = torch.optim.Adam(mha.parameters(), lr=0.001) batch_size = 200 point_range = 3 init_error = -1.0 for _ in range(50): query = torch.rand( (batch_size, n_q, size)) * point_range * 2 - point_range key = torch.rand( (batch_size, n_k, size)) * point_range * 2 - point_range value = key with torch.no_grad(): # create the target : The key closest to the query in euclidean distance distance = torch.sum((query - key)**2, dim=2) argmin = torch.argmin(distance, dim=1) target = [] for i in range(batch_size): target += [key[i, argmin[i], :]] target = torch.stack(target, dim=0) target = target.detach() prediction, _ = mha.forward(query, key, value) prediction = prediction.reshape((batch_size, size)) error = torch.mean((prediction - target)**2, dim=1) error = torch.mean(error) / 2 if init_error == -1.0: init_error = error.item() else: assert error.item() < init_error print(error.item()) optimizer.zero_grad() error.backward() optimizer.step() assert error.item() < 0.5
def test_networkbody_visual(): torch.manual_seed(0) vec_obs_size = 4 obs_size = (84, 84, 3) network_settings = NetworkSettings() obs_shapes = [(vec_obs_size, ), obs_size] networkbody = NetworkBody(obs_shapes, network_settings) optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3) sample_obs = 0.1 * torch.ones((1, 84, 84, 3)) sample_vec_obs = torch.ones((1, vec_obs_size)) for _ in range(150): encoded, _ = networkbody([sample_vec_obs], [sample_obs]) assert encoded.shape == (1, network_settings.hidden_units) # Try to force output to 1 loss = torch.nn.functional.mse_loss(encoded, torch.ones(encoded.shape)) optimizer.zero_grad() loss.backward() optimizer.step() # In the last step, values should be close to 1 for _enc in encoded.flatten(): assert _enc == pytest.approx(1.0, abs=0.1)
def test_layer_norm(): torch.manual_seed(0) torch_ln = torch.nn.LayerNorm(10, elementwise_affine=False) cust_ln = LayerNorm() sample_input = torch.rand(10) assert torch.all( torch.isclose( torch_ln(sample_input), cust_ln(sample_input), atol=1e-5, rtol=0.0 ) ) sample_input = torch.rand((4, 10)) assert torch.all( torch.isclose( torch_ln(sample_input), cust_ln(sample_input), atol=1e-5, rtol=0.0 ) ) sample_input = torch.rand((7, 6, 10)) assert torch.all( torch.isclose( torch_ln(sample_input), cust_ln(sample_input), atol=1e-5, rtol=0.0 ) )