def SIN(max_epochs, learning_rate, no_hidden):
    np.random.seed(213)
    inputs = []
    outputs = []
    for i in range(0, 200):
        four_inputs_vector = list(np.random.uniform(-1.0, 1.0, 4))
        four_inputs_vector = [float(four_inputs_vector[0]),float(four_inputs_vector[1]), 
                              float(four_inputs_vector[2]),float(four_inputs_vector[3])]
        inputs.append(four_inputs_vector)
    inputs=np.array(inputs)

    for i in range(200):
        outputs.append(np.sin([inputs[i][0] - inputs[i][1] + inputs[i][2] - inputs[i][3]]))

    no_in = 4
    no_out = 1
    NN = MLP(no_in, no_hidden, no_out)
    NN.randomise()
    print('\nMax Epoch:\n' + str(max_epochs), file=log)
    print('\nLearning Rate:\n' + str(learning_rate), file=log)
    print('\nBefore Training:\n', file=log)

    for i in range(150):
        NN.forward(inputs[i],'tanh')
        print('Target:\t{}\t Output:\t {}'.format(str(outputs[i]),str(NN.O)), file=log)
    print('Training:\n', file=log)
    
    
#    training process
    for i in range(0, max_epochs):
        error = 0
        NN.forward(inputs[:150],'tanh')
        error = NN.backward(inputs[:150], outputs[:150],'tanh')
        NN.updateWeights(learning_rate)
       #prints error every 5% of epochs
        if (i + 1) % (max_epochs / 20) == 0:
            print(' Error at Epoch:\t' + str(i + 1) + '\t  is \t' + str(error), file=log)
    
    difference=float(0)
    print('\n Testing :\n', file=log)
    for i in range(150, len(inputs)):
        NN.forward(inputs[i], 'tanh')
        print('Target:\t{}\t Output:\t {}'.format(str(outputs[i]), str(NN.O)), file=log)
        difference+=np.abs(outputs[i][0]-NN.O[0])

    accuracy=1-(difference/50)
    accuracylist.append(accuracy)
    print('\nAccuracy:{}'.format(accuracy),file=log)
    print('\ntestError:{}'.format(difference/50),file=log)
Ejemplo n.º 2
0
def XOR(max_epochs, learning_rate):
    np.random.seed(1)
    inputs = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
    outputs = np.array([[0], [1], [1], [0]])

    NI = 2
    NH = 4
    NO = 1
    NN = MLP(NI, NH, NO)

    NN.randomise()
    print('\nMax Epoch:\n' + str(max_epochs), file=log)
    print('\nLearning Rate:\n' + str(learning_rate), file=log)
    print('\nBefore Training:\n', file=log)
    for i in range(len(inputs)):
        NN.forward(inputs[i], 'sigmoid')
        print('Target:\t {}  Output:\t {}'.format(str(outputs[i]), str(NN.O)),
              file=log)
    print('\nTraining:\n', file=log)

    for i in range(0, max_epochs):
        NN.forward(inputs, 'sigmoid')
        error = NN.backward(inputs, outputs, 'sigmoid')
        NN.updateWeights(learning_rate)

        if (i + 1) % (max_epochs / 20) == 0:
            print(' Error at Epoch:\t' + str(i + 1) + '\t\t  is \t\t' +
                  str(error),
                  file=log)

    print('\n After Training :\n', file=log)

    accuracy = float(0)
    for i in range(len(inputs)):
        NN.forward(inputs[i], 'sigmoid')
        print('Target:\t {}  Output:\t {}'.format(str(outputs[i]), str(NN.O)),
              file=log)
        if (outputs[i][0] == 0):
            accuracy += 1 - NN.O[0]
        elif (outputs[i][0] == 1):
            accuracy += NN.O[0]
    print('\nAccuracy:{}'.format(accuracy / 4), file=log)
Ejemplo n.º 3
0
for i in range(0, 50):
    summed_vector_comps = numpy.sum(sin_inputs)
    sin_desired_output.append([numpy.sin(numpy.sum(sin_inputs[i]))])

sin_desired_output = numpy.array(sin_desired_output)

# saves output to file
with open(
        'test_output/sin_output/sin_output_size_(' + str(INPUTS) + ', ' +
        str(HIDDEN) + ', ' + str(OUTPUTS) + ')_learning_rate_' +
        str(LEARNING_RATE) + '_epochs_' + str(MAX_EPOCHS) + '.txt', 'w') as f:

    print("\nPreTraining Testing:\n")
    f.write('\nPreTraining Testing:\n')
    for i in range(len(sin_inputs) - 10, len(sin_inputs)):
        mlp.forward(sin_inputs[i], True)
        print("Target:\t" + str(sin_desired_output[i]) + "\t\tOutput:\t" +
              str(mlp.o) + "\n")
        f.write('Target:\t' + str(sin_desired_output[i]) + '\t\tOutput:\t' +
                str(mlp.o) + '\n')

    f.write('MLP Size\t\t\t(' + str(INPUTS) + ', ' + str(HIDDEN) + ', ' +
            str(OUTPUTS) + ')\n')
    f.write('Epochs:\t\t\t\t' + str(MAX_EPOCHS) + '\n')
    f.write('Learning Rate:\t\t' + str(LEARNING_RATE) + '\n\n')

    print("Training:\n")
    f.write('Training:\n')
    for i in range(0, MAX_EPOCHS):
        error = 0
        mlp.forward(sin_inputs[:len(sin_inputs) - 10], True)
Ejemplo n.º 4
0
mlp.randomise()
# print(mlp)

xor_inputs = numpy.array([[0, 0], [0, 1], [1, 0], [1, 1]])
xor_desired_output = numpy.array([[0], [1], [1], [0]])

# saves output to file
with open(
        'test_output/xor_output/xor_output_size_(' + str(INPUTS) + ', ' +
        str(HIDDEN) + ', ' + str(OUTPUTS) + ')_learning_rate_' +
        str(LEARNING_RATE) + '_epochs_' + str(MAX_EPOCHS) + '.txt', 'w') as f:

    print("\nPreTraining Testing:\n")
    f.write('\nPreTraining Testing:\n')
    for i in range(len(xor_inputs)):
        mlp.forward(xor_inputs[i], False)
        print("Target:\t" + str(xor_desired_output[i]) + "\t\tOutput:\t" +
              str(mlp.o) + "\n")
        f.write('Target:\t' + str(xor_desired_output[i]) + '\t\tOutput:\t' +
                str(mlp.o) + '\n')

    f.write('MLP Size\t(' + str(INPUTS) + ', ' + str(HIDDEN) + ', ' +
            str(OUTPUTS) + ')\n\n')
    f.write('Epochs:\t\t' + str(MAX_EPOCHS) + '\n')
    f.write('Learning Rate:\t' + str(LEARNING_RATE) + '\n\n')

    print("Training:\n")
    f.write('Training:\n')
    for i in range(0, MAX_EPOCHS):
        error = 0
        mlp.forward(xor_inputs, False)
Ejemplo n.º 5
0
class DQN():
    def __init__(self, env, alpha, gamma, episode_num, target_reward,
                 step_count, minbatch, memory_size, flag):
        self.env = env
        self.alpha = alpha
        self.gamma = gamma
        self.episode_num = episode_num
        self.target_reward = target_reward
        self.step_count = step_count
        # self.test_step=test_step
        self.minbatch = minbatch
        self.memory_size = memory_size
        self.flag = flag
        self.Q = MLP()
        self.state_dim = env.observation_space.shape[0]
        self.action_dim = env.action_space.spaces[
            0].n * env.action_space.spaces[1].n
        # self.action_dim = env.action_space.n

        self.Q.creat2(self.state_dim, env.action_space.spaces[0].n,
                      env.action_space.spaces[1].n)

        self.memory_num = 0
        self.memory = np.zeros((memory_size, self.state_dim * 2 + 4))
        self.optimizer = torch.optim.Adam(self.Q.parameters(), lr=alpha)
        self.loss_func = nn.MSELoss()

    # action1 action2 legal
    def action2set(self, action2s):
        actionLen = example.action2Len(action2s)
        action2Seleced = []
        for index in range(actionLen):
            action2Seleced.append(example.get_action2(action2s, index))
        return action2Seleced

    def getAction1(self, act1Set, action1_value):
        nonzeroind = np.nonzero(act1Set)[0]
        index = torch.LongTensor([nonzeroind])
        action1_values = torch.gather(action1_value.data, 1, index)
        action1 = torch.max(Variable(action1_values), 1)[1].data.numpy()[0]
        action1 = nonzeroind[action1]
        return action1

    def getAction1_random(self, act1Set):
        act1Set1 = np.nonzero(act1Set)
        action1 = choice(act1Set1[0])
        return action1

    def getAction2(self, candidate, action1, action2_value):
        action2 = example.getLegalAction2(candidate, action1)
        action2set_ = self.action2set(action2)
        action2_ = []
        for index2 in range(50):
            action2_.append(0)
        for index in range(len(action2set_)):
            a = action2set_[index]
            action2_[a] = 1
        nonzeroind2 = np.nonzero(action2_)[0]
        index2 = torch.LongTensor([nonzeroind2])
        action2_values = torch.gather(action2_value.data, 1, index2)
        action2 = torch.max(Variable(action2_values), 1)[1].data.numpy()[0]
        action2 = nonzeroind2[action2]
        return action2

    def getAction2_random(self, candidate, action1):
        action2_ = example.getLegalAction2(candidate, action1)
        action2set = self.action2set(action2_)
        action2 = choice(action2set)
        return action2

    def getAction(self, action1, action2):
        action = []
        action.append(action1)
        action.append(action2)
        action = tuple(action)
        return action

    def choose_action(self, state, episode, act1Set, candidate):
        # epsilon = 0.5 * (0.993) ** episode
        epsilon = 0.8 * (0.993)**episode
        if epsilon < 0.3:
            epsilon = 0.3
        state = Variable(torch.unsqueeze(torch.FloatTensor(state), 0))

        if np.random.uniform() > epsilon:
            # print("action by rl")
            action1_value, action2_value = self.Q.forward(state)
            #actions_value = self.Q.forward(state)
            action1 = torch.max(action1_value, 1)[1].data.numpy()[0]
            action2 = torch.max(action2_value, 1)[1].data.numpy()[0]
            # action1 = self.getAction1(act1Set, action1_value)
            # action2 = self.getAction2(candidate, action1, action2_value)
            action = self.getAction(action1, action2)
        else:
            # print("action randomly")
            action1 = random.randint(0, 34)
            action2 = random.randint(0, 49)
            # action1 = self.getAction1_random(act1Set)
            # action2 = self.getAction2_random(candidate, action1)
            action = self.getAction(action1, action2)

        return action

    def select_action(self, state, act1Set, candidate):
        '''
        action = np.random.randint(0, self.action_dim)
        '''
        state = Variable(torch.unsqueeze(torch.FloatTensor(state), 0))
        action1_value, action2_value = self.Q.forward(state)
        action1 = self.getAction1(act1Set, action1_value)
        action2 = self.getAction2(candidate, action1, action2_value)
        action = self.getAction(action1, action2)
        return action

    def store_transition(self, state, action0, action1, reward, done,
                         next_state):
        transition = np.hstack((state, [action0, action1, reward,
                                        done], next_state))
        index = self.memory_num % self.memory_size
        self.memory[index, :] = transition
        self.memory_num += 1

    def learn(self):

        sample = np.random.choice(self.memory_size, self.minbatch)
        batch = self.memory[sample, :]
        state_batch = Variable(torch.FloatTensor(batch[:, :self.state_dim]))
        action1_batch = Variable(
            torch.LongTensor(batch[:, self.state_dim:self.state_dim +
                                   1].astype(int)))
        action2_batch = Variable(
            torch.LongTensor(batch[:, self.state_dim + 1:self.state_dim +
                                   2].astype(int)))
        reward_batch = Variable(
            torch.FloatTensor(batch[:, self.state_dim + 2:self.state_dim + 3]))
        done_batch = Variable(
            torch.FloatTensor(batch[:, self.state_dim + 3:self.state_dim +
                                    4].astype(int)))
        next_state_batch = Variable(
            torch.FloatTensor(batch[:, -self.state_dim:]))

        # q = self.Q(state_batch).gather(1, action_batch)
        q1 = self.Q(state_batch)[0].gather(1, action1_batch)
        q2 = self.Q(state_batch)[1].gather(1, action2_batch)
        q1_next = self.Q(next_state_batch)[0].detach()
        q2_next = self.Q(next_state_batch)[1].detach()
        q1_val = q1_next.max(1)[0].view(self.minbatch, 1)
        q2_val = q2_next.max(1)[0].view(self.minbatch, 1)
        if self.flag == 0:
            for i in range(len(done_batch)):
                if done_batch[i].data[0] == 1:
                    q1_val[i] = 0
        y1 = reward_batch + self.gamma * q1_val
        loss1 = self.loss_func(q1, y1)

        y2 = reward_batch + self.gamma * q2_val
        loss2 = self.loss_func(q2, y2)
        loss = loss1 + loss2
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        result = loss.data[0]
        return result

    def train_CartPole(self, label):
        loss = []
        buf = StringIO.StringIO()
        # buf2 = StringIO.StringIO()
        buf3 = StringIO.StringIO()
        state_init, info_init = self.env.reset()
        for i_episode in range(self.episode_num):
            ep_r = 0
            #if i_episode % 500 == 0:
            state_init, info_init = self.env.reset()
            state = state_init
            info_ = info_init
            print("new episode")
            actIndex = astEncoder.setAction1s(info_)
            loss_num = 0
            epr = 0
            buf2 = StringIO.StringIO()
            # buf2.write("episode: %d" % (i_episode))
            for t in range(self.step_count):
                # env.render()

                action = self.choose_action(state, i_episode, actIndex,
                                            info_.candidate)
                next_state, reward, done, info_ = self.env.step(action)
                #if info_.fitness > 69:
                #state_init = next_state
                #info_init = info_
                actIndex = astEncoder.setAction1s(info_)
                # spin_reward = example.spin_(info_.candidate)
                if done and example.get_fitness(info_.candidate) > 78.4:
                    reward = self.target_reward
                    print("i_ep" + str(i_episode) + " step:" + str(t) +
                          " fitness" +
                          str(example.get_fitness(info_.candidate)))
                    spin_reward = example.spin_(info_.candidate)
                    # buf2.write("program at i_ep %d: step:%d  fitnessValue:%d\n" % (i_episode, t, example.get_fitness(info_.candidate)))
                    if spin_reward == 20:
                        buf.write("correct program at i_ep %d: step:%d \n" %
                                  (i_episode, t))
                        fo = open("./correctProg.txt", "a+")
                        fo.write(buf.getvalue())
                        fo.close()
                    if spin_reward == 5:
                        print("liveness")
                    if spin_reward == 10:
                        print("safety")
                    reward = reward + spin_reward

                self.store_transition(state, action[0], action[1], reward,
                                      done, next_state)
                epr += reward
                if self.memory_num > self.memory_size:
                    loss_num += self.learn()

                if done:
                    loss.append(loss_num / (t + 1))
                state = next_state
                if t % 100 == 0:
                    print("i_ep: ", i_episode, "step: ", t, "reward: ", epr)
                    #buf2.write("i_ep %d: step:%d reward: %f\n" % (i_episode, t, ep_r))
            fou = open("./rewards.txt", "a+")
            fou.write(buf2.getvalue())
            fou.close()
            # fou1 = open("/Users/zhuang/workspace-gp/testSwig2/record.txt", "a+")
            # fou1.write(buf3.getvalue())
            # fou1.close()

        # fo = open("/Users/zhuang/workspace-gp/testSwig2/foo.txt", "a+")
        # fo.write('state:\n' + str(state) + '\n' + 'action:\n' + str(action) + '\n' + 'reward:\n' + str(
        # reward) + '\n' + 'nextState:\n' + str(next_state) + '\n')
        # fo.close()

    def test(self, label):
        total_step = 0
        x = []
        y = []

        total_reward = 0
        rlist = []

        for i_episode in range(1000):
            if i_episode == 9999:
                self.env = wrappers.Monitor(self.env, './video/DQN/' + label)
            state = self.env.reset()
            i_reward = 0
            x.append(i_episode)
            for t in range(self.test_step):
                # self.env.render()
                action = self.select_action(state)

                next_state, reward, done, info = self.env.step(action)

                i_reward += reward

                if t == (self.test_step - 1):
                    total_step += t + 1
                    y.append(i_reward)

                    break

                if done:
                    y.append(i_reward)
                    total_step += t + 1
                    break
                state = next_state
            rlist.append(i_reward)
            total_reward += i_reward
            print('%d Episode finished after %f time steps' %
                  (i_episode, t + 1))
            ar = total_reward / (i_episode + 1)
            print('average reward:', ar)
            av = total_reward / (i_episode + 1)
            sum = 0
            for count in range(len(rlist)):
                sum += (rlist[count] - av)**2
            sr = math.sqrt(sum / len(y))
            print('standard deviation:', sr)
        self.pic(x, y, label, 'Reward')
def letter(max_epochs, learning_rate):
    np.random.seed(1)

    inputs = []
    outputs = []
    doutput = []
    columns = [
        "letter", "x-box", "y-box", "width", "height", "onpix", "x-bar",
        "y-bar", "x2bar", "y2bar", "xybar", "x2ybr", "xy2br", "x-ege", "xegvy",
        "y-ege", "yegvx"
    ]

    df = pd.read_csv("letter-recognition.data", names=columns)
    doutput = df["letter"]

    for i in range(len(doutput)):
        outputs.append(ord(str(doutput[i])) - ord('A'))

    inputs = df.drop(["letter"], axis=1)
    inputs = np.array(inputs)
    inputs = inputs / 15  #normalization

    #train set
    inputs_train = inputs[:16000]
    categorical_y = np.zeros((16000, 26))
    for i, l in enumerate(outputs[:16000]):
        categorical_y[i][l] = 1
    outputs_train = categorical_y

    #test set
    inputs_test = inputs[16000:]
    #    categorical_y = np.zeros((4000, 26))
    #    for i, l in enumerate(outputs[16000:]):
    #        categorical_y[i][l] = 1
    #    outputs_test=categorical_y

    #training process
    no_in = 16
    no_hidden = 10
    no_out = 26

    NN = MLP(no_in, no_hidden, no_out)
    NN.randomise()
    print('\nMax Epoch:\n' + str(max_epochs), file=log)
    print('\nLearning Rate:\n' + str(learning_rate), file=log)
    print('\nTraining Process:\n', file=log)

    for i in range(0, max_epochs):
        NN.forward(inputs_train, 'tanh')
        error = NN.backward(inputs_train, outputs_train, 'tanh')
        NN.updateWeights(learning_rate)

        if (i + 1) % (max_epochs / 20) == 0:
            print(' Error at Epoch:\t' + str(i + 1) + '\t  is \t' + str(error),
                  file=log)

    #testing process
    def to_character0(outputvector):
        listov = list(outputvector)
        a = listov.index(max(listov))
        return chr(a + ord('A'))

    prediction = []
    for i in range(4000):
        NN.forward(inputs_test[i], 'tanh')
        #    print('Target:\t{}\t Output:\t{}'.format(str(outputs_test[i]),str(NN.O)))
        #    print('Target:\t{}\t Output:\t{}'.format(str(doutput[16000+i]),str(to_character0(NN.O))))
        prediction.append(to_character0(NN.O))

    def to_character(n):
        return chr(int(n) + ord('A'))

    correct = {to_character(i): 0 for i in range(26)}
    letter_num = {to_character(i): 0 for i in range(26)}

    print('==' * 30, file=log)
    for i, _ in enumerate(doutput[16000:]):
        letter_num[doutput[16000 + i]] += 1
        # Print some predictions
        if i % 300 == 0:
            print('Expected: {} | Output: {}'.format(doutput[16000 + i],
                                                     prediction[i]),
                  file=log)
        if doutput[16000 + i] == prediction[i]:
            correct[prediction[i]] += 1

    print('==' * 30, file=log)
    # Calculate the accuracy
    accuracy = sum(correct.values()) / len(prediction)
    print('Test sample size: {} | Correctly predicted sample size: {}'.format(
        len(prediction), sum(correct.values())),
          file=log)
    print('Accuracy: %.3f' % accuracy, file=log)

    # Performance on each class
    print('==' * 30, file=log)
    for k, v in letter_num.items():
        print('{} => Sample Number: {} | Correct Number: {} | Accuracy: {}'.
              format(k, v, correct[k], correct[k] / v),
              file=log)