def dqn(n_episodes=30000, max_t=40000, eps_start=1.0, eps_end=0.01, eps_decay=0.9995): """Deep Q-Learning. Params ====== n_episodes (int): maximum number of training episodes max_t (int): maximum number of timesteps per episode, maximum frames eps_start (float): starting value of epsilon, for epsilon-greedy action selection eps_end (float): minimum value of epsilon eps_decay (float): multiplicative factor (per episode) for decreasing epsilon """ scores = [] # list containing scores from each episode scores_window = deque(maxlen=100) # last 100 scores eps = eps_start # initialize epsilon for i_episode in range(1, n_episodes + 1): obs = env.reset() obs = pre_process(obs) state = init_state(obs) score = 0 for t in range(max_t): action = agent.act(state, eps) next_state, reward, done, _ = env.step(action) # last three frames and current frame as the next state next_state = np.stack((state[1], state[2], state[3], pre_process(next_state)), axis=0) agent.step(state, action, reward, next_state, done) state = next_state score += reward if done: break scores_window.append(score) # save most recent score scores.append(score) # save most recent score eps = max(eps_end, eps_decay * eps) # decrease epsilon print('\tEpsilon now : {:.2f}'.format(eps)) print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)), end="") if i_episode % 1000 == 0: print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window))) print('\rEpisode {}\tThe length of replay buffer now: {}'.format(i_episode, len(agent.memory))) if np.mean(scores_window) >= 50.0: print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode - 100, np.mean(scores_window))) torch.save(agent.qnetwork_local.state_dict(), 'checkpoint/dqn_checkpoint_solved.pth') break torch.save(agent.qnetwork_local.state_dict(), 'checkpoint/dqn_checkpoint_8.pth') return scores
def predict(im): im1, im2, im3, im4 = pre_process(im) model = load_model("Veri.h5") x = np.empty((4, 20, 20, 1)) x[0] = ((np.asarray(im1, dtype='float64') / 256).reshape(20, 20, 1)) x[1] = ((np.asarray(im2, dtype='float64') / 256).reshape(20, 20, 1)) x[2] = ((np.asarray(im3, dtype='float64') / 256).reshape(20, 20, 1)) x[3] = ((np.asarray(im4, dtype='float64') / 256).reshape(20, 20, 1)) pre = '' for c in model.predict_classes(x): pre = pre + label2word(c) return pre
def create_network(): # Wrap the neural network in the scope named 'network'. # Create new variables during training, and re-use during testing. with tf.variable_scope('network'): # Just rename the input placeholder variable for convenience. # Create TensorFlow graph for pre-processing. # TODO: czemu to jest DISTORTED? images = tools.pre_process(images=x, training=True, img_size_cropped=24, num_channels=3) # Create TensorFlow graph for the main processing. y_pred, loss = main_network(images=images) return y_pred, loss
ROI = [] for i in range(len(boxes)): X1 = boxes[i][0][0] X2 = boxes[i][1][0] Y1 = boxes[i][0][1] Y2 = boxes[i][1][1] roi = image[Y1:Y2, X1:X2] roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) (h, w) = roi.shape[:2] roi = cv2.resize(roi, (w * window_maximizer, h * window_maximizer), interpolation=cv2.INTER_CUBIC) ROI.append(roi) # Preprocessing the images logging.info(f"""Pre-processing the regions with method {args["method"]}...""") ROI = pre_process(ROI, visu, args["method"]) # Give regions to machine learning model in order to be classified logging.info(f"Applying OCR to the regions...") custom_config = r"--oem 0 --psm 6" # see Tesseract's documentation for more info (command line: $ tesseract --help-oem ; $ tesseract --help-psm) numbers = [] NbError = 0 for i in range(len(ROI)): region = ROI[i] number = pytesseract.image_to_string(region, config=custom_config) # test if the string can be casted into float isInt = True try: number = float(number)
# plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(np.arange(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #') plt.show() else: # load the weights from file agent.qnetwork_local.load_state_dict(torch.load('checkpoint/dqn_checkpoint_8.pth')) rewards = [] for i in range(10): # episodes, play ten times total_reward = 0 obs = env.reset() obs = pre_process(obs) state = init_state(obs) for j in range(10000): # frames, in case stuck in one frame action = agent.act(state) env.render() next_state, reward, done, _ = env.step(action) obs = pre_process(next_state) state = np.stack((state[1], state[2], state[3], obs), axis=0) cv2.imshow('Breakout', obs) total_reward += reward # time.sleep(0.01) if done: rewards.append(total_reward) break
nn.ReLU(), nn.Linear(512, action_size) ) def forward(self, state): """Build a network that maps state -> action values.""" conv_out = self.conv(state).view(state.size()[0], -1) return self.fc(conv_out) if __name__ == '__main__': env = gym.make('Breakout-v0') print('State shape: ', env.observation_space.shape) print('Number of actions: ', env.action_space.n) obs = env.reset() img = pre_process(obs) state = init_state(img) print(np.shape(state[0])) # plt.imshow(img, cmap='gray') # not working # display use cv2 module cv2.imshow('Breakout', img) cv2.waitKey(0) state = torch.randn(32, 4, 84, 84) # (batch_size, 4 frames, img_height,img_width) state_size = state.size() cnn_model = QNetwork(state_size, action_size=4, seed=1) outputs = cnn_model(state) print(outputs)