def test_seq2slate_transformer_simulation_hard_tsp(self): """ Solve Traveling Salesman Problem. Data comes from multiple sets of cities. """ device = torch.device("cuda") batch_size = 4096 epochs = 8 num_batches = 300 expect_reward_threshold = 1.02 hidden_size = 32 num_candidates = 6 diverse_input = True learning_rate = 0.001 learning_method = SIMULATION policy_gradient_interval = 1 run_seq2slate_tsp( MODEL_TRANSFORMER, batch_size, epochs, num_candidates, num_batches, hidden_size, diverse_input, learning_rate, expect_reward_threshold, learning_method, policy_gradient_interval, device, )
def test_seq2slate_transformer_off_policy_simple_tsp(self): """ Solve Traveling Salesman Problem. Data comes from one set of nodes (cities). """ device = torch.device("cpu") batch_size = 4096 epochs = 1 num_batches = 100 expect_reward_threshold = 1.02 hidden_size = 32 num_candidates = 6 diverse_input = False learning_rate = 0.001 learning_method = OFF_POLICY policy_gradient_interval = 1 run_seq2slate_tsp( MODEL_TRANSFORMER, batch_size, epochs, num_candidates, num_batches, hidden_size, diverse_input, learning_rate, expect_reward_threshold, learning_method, policy_gradient_interval, device, )