Example #1
0
def dqn(n_episodes=30000, max_t=40000, eps_start=1.0, eps_end=0.01, eps_decay=0.9995):
    """Deep Q-Learning.
    Params
    ======
        n_episodes (int): maximum number of training episodes
        max_t (int): maximum number of timesteps per episode, maximum frames
        eps_start (float): starting value of epsilon, for epsilon-greedy action selection
        eps_end (float): minimum value of epsilon
        eps_decay (float): multiplicative factor (per episode) for decreasing epsilon
    """
    scores = []  # list containing scores from each episode
    scores_window = deque(maxlen=100)  # last 100 scores
    eps = eps_start  # initialize epsilon
    for i_episode in range(1, n_episodes + 1):
        obs = env.reset()
        obs = pre_process(obs)
        state = init_state(obs)

        score = 0
        for t in range(max_t):
            action = agent.act(state, eps)
            next_state, reward, done, _ = env.step(action)
            # last three frames and current frame as the next state
            next_state = np.stack((state[1], state[2], state[3], pre_process(next_state)), axis=0)
            agent.step(state, action, reward, next_state, done)
            state = next_state
            score += reward
            if done:
                break
        scores_window.append(score)  # save most recent score
        scores.append(score)  # save most recent score
        eps = max(eps_end, eps_decay * eps)  # decrease epsilon
        print('\tEpsilon now : {:.2f}'.format(eps))
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)), end="")
        if i_episode % 1000 == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)))
            print('\rEpisode {}\tThe length of replay buffer now: {}'.format(i_episode, len(agent.memory)))

        if np.mean(scores_window) >= 50.0:
            print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode - 100,
                                                                                         np.mean(scores_window)))
            torch.save(agent.qnetwork_local.state_dict(), 'checkpoint/dqn_checkpoint_solved.pth')
            break

    torch.save(agent.qnetwork_local.state_dict(), 'checkpoint/dqn_checkpoint_8.pth')
    return scores
Example #2
0
def predict(im):
    im1, im2, im3, im4 = pre_process(im)
    model = load_model("Veri.h5")
    x = np.empty((4, 20, 20, 1))
    x[0] = ((np.asarray(im1, dtype='float64') / 256).reshape(20, 20, 1))
    x[1] = ((np.asarray(im2, dtype='float64') / 256).reshape(20, 20, 1))
    x[2] = ((np.asarray(im3, dtype='float64') / 256).reshape(20, 20, 1))
    x[3] = ((np.asarray(im4, dtype='float64') / 256).reshape(20, 20, 1))

    pre = ''
    for c in model.predict_classes(x):
        pre = pre + label2word(c)

    return pre
Example #3
0
def create_network():
    # Wrap the neural network in the scope named 'network'.
    # Create new variables during training, and re-use during testing.
    with tf.variable_scope('network'):
        # Just rename the input placeholder variable for convenience.
        # Create TensorFlow graph for pre-processing.
        # TODO: czemu to jest DISTORTED?
        images = tools.pre_process(images=x,
                                   training=True,
                                   img_size_cropped=24,
                                   num_channels=3)

        # Create TensorFlow graph for the main processing.
        y_pred, loss = main_network(images=images)

    return y_pred, loss
Example #4
0
ROI = []
for i in range(len(boxes)):
    X1 = boxes[i][0][0]
    X2 = boxes[i][1][0]
    Y1 = boxes[i][0][1]
    Y2 = boxes[i][1][1]
    roi = image[Y1:Y2, X1:X2]
    roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    (h, w) = roi.shape[:2]
    roi = cv2.resize(roi, (w * window_maximizer, h * window_maximizer),
                     interpolation=cv2.INTER_CUBIC)
    ROI.append(roi)

# Preprocessing the images
logging.info(f"""Pre-processing the regions with method {args["method"]}...""")
ROI = pre_process(ROI, visu, args["method"])

# Give regions to machine learning model in order to be classified
logging.info(f"Applying OCR to the regions...")
custom_config = r"--oem 0 --psm 6"
# see Tesseract's documentation for more info (command line: $ tesseract --help-oem ; $ tesseract --help-psm)
numbers = []
NbError = 0
for i in range(len(ROI)):
    region = ROI[i]
    number = pytesseract.image_to_string(region, config=custom_config)

    # test if the string can be casted into float
    isInt = True
    try:
        number = float(number)
Example #5
0
        # plot the scores
        fig = plt.figure()
        ax = fig.add_subplot(111)
        plt.plot(np.arange(len(scores)), scores)
        plt.ylabel('Score')
        plt.xlabel('Episode #')
        plt.show()

    else:
        # load the weights from file
        agent.qnetwork_local.load_state_dict(torch.load('checkpoint/dqn_checkpoint_8.pth'))
        rewards = []
        for i in range(10):  # episodes, play ten times
            total_reward = 0
            obs = env.reset()
            obs = pre_process(obs)
            state = init_state(obs)
            for j in range(10000):  # frames, in case stuck in one frame
                action = agent.act(state)
                env.render()
                next_state, reward, done, _ = env.step(action)
                obs = pre_process(next_state)
                state = np.stack((state[1], state[2], state[3], obs), axis=0)
                cv2.imshow('Breakout', obs)
                total_reward += reward

                # time.sleep(0.01)
                if done:
                    rewards.append(total_reward)
                    break
Example #6
0
            nn.ReLU(),
            nn.Linear(512, action_size)
        )

    def forward(self, state):
        """Build a network that maps state -> action values."""
        conv_out = self.conv(state).view(state.size()[0], -1)
        return self.fc(conv_out)

if __name__ == '__main__':
    env = gym.make('Breakout-v0')
    print('State shape: ', env.observation_space.shape)
    print('Number of actions: ', env.action_space.n)

    obs = env.reset()
    img = pre_process(obs)
    state = init_state(img)
    print(np.shape(state[0]))

    # plt.imshow(img, cmap='gray') # not working
    # display use cv2 module
    cv2.imshow('Breakout', img)
    cv2.waitKey(0)

    state = torch.randn(32, 4, 84, 84)  # (batch_size, 4 frames, img_height,img_width)
    state_size = state.size()

    cnn_model = QNetwork(state_size, action_size=4, seed=1)
    outputs = cnn_model(state)
    print(outputs)