Example #1
0
    def step(self):
        self.nnet.eval()
        #Find lowest cost state
        lowest_cost_state: State = list(self.opened.keys())[0]
        for item in self.opened:
            if self.opened[lowest_cost_state][0] + self.opened[lowest_cost_state][1] > self.opened[item][0] + self.opened[item][1]:
                lowest_cost_state = item
        self.curr_state = lowest_cost_state

        #Add Children to open
        children = self.env.expand([lowest_cost_state])[0][0]
        children_output = self.nnet(states_nnet_to_pytorch_input(self.env.state_to_nnet_input(children), 'cpu').float()).cpu().data.numpy()
        for i, child in enumerate(children):
            adjusted_huer: float = h_prime(children_output[i][0], self.o_c_max, self.cutoffs)
            #print("Adjusted huer: ",adjusted_huer)
            if child in self.closed:
                if (adjusted_huer + self.opened[lowest_cost_state][1]+1.0) < (self.closed[child][0] + self.closed[child][1]):
                    self.opened[child] = adjusted_huer, self.opened[lowest_cost_state][1]+1.0
                    del self.closed[child]
                else:
                    continue
            elif child in self.opened:
                if (adjusted_huer + self.opened[lowest_cost_state][1]+1.0) < (self.opened[child][0] + self.opened[child][1]):
                    self.opened[child] = adjusted_huer, self.opened[lowest_cost_state][1]+1.0
            else:
                self.opened[child] = adjusted_huer, self.opened[lowest_cost_state][1]+1.0
def main():
    # get environment
    env: Environment = env_utils.get_environment("puzzle8")

    # get nnet model
    nnet: nn.Module = get_nnet_model()
    device = torch.device('cpu')
    batch_size: int = 100
    num_itrs: int = 10000

    # get data
    print("Preparing Data\n")
    data = pickle.load(open("data/data.pkl", "rb"))

    states_nnet, outputs = sample_training_data(data['states'], data['output'], env, batch_size*num_itrs)

    # train with supervised learning
    print("Training DNN\n")
    nnet.train()
    train_nnet(nnet, states_nnet, outputs, batch_size, num_itrs, 0)

    # get performance
    print("Evaluating DNN\n")
    nnet.eval()
    for cost_to_go in np.unique(data["output"]):
        idxs_targ: np.array = np.where(data["output"] == cost_to_go)[0]
        states_targ: List[State] = [data["states"][idx] for idx in idxs_targ]
        states_targ_nnet: np.ndarray = env.state_to_nnet_input(states_targ)

        out_nnet = nnet(states_nnet_to_pytorch_input(states_targ_nnet, device)).cpu().data.numpy()

        mse = float(np.mean((out_nnet - cost_to_go) ** 2))
        print("Cost-To-Go: %i, Ave DNN Output: %f, MSE: %f" % (cost_to_go, float(np.mean(out_nnet)), mse))
Example #3
0
def evaluate_cost_to_go(nnet, device, env: Environment, states: List[State], outputs: np.array, f):
    for cost_to_go in np.unique(outputs):
        idxs_targ: np.array = np.where(outputs == cost_to_go)[0]
        states_targ: List[State] = [states[idx] for idx in idxs_targ]
        states_targ_nnet: np.ndarray = env.state_to_nnet_input(states_targ)

        out_nnet = nnet(states_nnet_to_pytorch_input(
            states_targ_nnet, device).float()).cpu().data.numpy()

        mse = float(np.mean((out_nnet - cost_to_go) ** 2))
        f.write("Cost-To-Go: %i, Ave DNN Output: %f, MSE: %f \n" %
                (cost_to_go, float(np.mean(out_nnet)), mse))
Example #4
0
def main():
    torch.set_num_threads(1)

    # get environment
    env: Environment = env_utils.get_environment("puzzle8")

    # get nnet model
    nnet: nn.Module = get_nnet_model()
    # get optimizer and lr scheduler
    optimizer = torch.optim.Adam(nnet.parameters(), lr=1e-3)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=100,
                                                gamma=0.996)
    criterion = nn.MSELoss()

    device = torch.device('cpu')
    batch_size: int = 100
    num_itrs: int = 10000
    with open("sample_outputs/exercise_1_akash.txt", 'w') as f:
        # get data
        f.write("Preparing Data\n")
        data = pickle.load(open("data/data.pkl", "rb"))

        states_nnet, outputs = sample_training_data(data['states'],
                                                    data['output'], env,
                                                    batch_size * num_itrs)

        # train with supervised learning
        f.write("Training DNN\n")
        nnet.train()
        train_nnet(nnet, states_nnet, outputs, batch_size, num_itrs, 0,
                   criterion, optimizer, scheduler, f)

        # get performance
        f.write("Evaluating DNN\n")
        nnet.eval()
        for cost_to_go in np.unique(data["output"]):
            idxs_targ: np.array = np.where(data["output"] == cost_to_go)[0]
            states_targ: List[State] = [
                data["states"][idx] for idx in idxs_targ
            ]
            states_targ_nnet: np.ndarray = env.state_to_nnet_input(states_targ)

            out_nnet = nnet(
                states_nnet_to_pytorch_input(
                    states_targ_nnet, device).float()).cpu().data.numpy()

            mse = float(np.mean((out_nnet - cost_to_go)**2))
            f.write("Cost-To-Go: %i, Ave DNN Output: %f, MSE: %f \n" %
                    (cost_to_go, float(np.mean(out_nnet)), mse))
Example #5
0
def generate_plot(nnet: nn.Module(), device, env: Environment, states: List[State], outputs: np.array):
    nnet.eval()

    states_targ_nnet: np.ndarray = env.state_to_nnet_input(states)
    out_nnet = nnet(states_nnet_to_pytorch_input(states_targ_nnet, device).float()).cpu().data.numpy()
    out_nnet, _ = flatten(out_nnet)
    outputs, _ = flatten(outputs)

    out_nnet_array = np.array(out_nnet)
    outputs_array = np.array(outputs)

    random_indexs = list(range(len(out_nnet_array)))
    random.shuffle(random_indexs)

    random_states: np.ndarray = []
    sample_expected: np.ndarray = []
    sample_outputs: np.ndarray = []

    for i in range(100):
        random_states.append(states[random_indexs[i]])
        sample_expected.append(outputs_array[random_indexs[i]])
        sample_outputs.append(out_nnet_array[random_indexs[i]])

    h_new: np.ndarray = approx_admissible_conv(env, nnet, out_nnet_array, outputs_array, states, random_states, sample_outputs, sample_expected)

    #before, after = plt.subplots()
    plt.scatter(sample_expected, sample_outputs, c = '000000', linewidths = 0.1)
    #plt.plot([0,0],[30,30], c = 'g')
    plt.axline([0,0],[30,30], linewidth =3, c = 'g')
    plt.ylabel('NNet output')
    plt.xlabel('Expected value')
    plt.title("Output vs Expected")
    plt.show()
    #before.savefig("preconversion.pdf")

    
    plt.scatter(sample_expected, h_new, c = '000000', linewidths = 0.1)
    plt.axline([0,0],[30,30], linewidth =3, c = 'g')
    plt.ylabel('Converted output')
    plt.xlabel('Expected value')
    plt.title("Converted Output vs Expected")
    plt.show()