# plot if (arg.show_pre_image): # img = s_[-1] if (arg.show_orig_img): img = pre_process_screen(img, ret_type=arg.img_ret_type) add_rects(img=img, point=position_, env=env) # img = cv2.resize(img, (800, 600), interpolation=cv2.INTER_AREA) else: img = s_[-1] if (arg.show_resize_img): img = cv2.resize(img, (800, 600), interpolation=cv2.INTER_AREA) if (cv_img(img)): break_flag = 1 break if (model.memory_counter > MEMORY_CAPACITY and steps % arg.LEARN_TMP_STEP == 0): model.learn() # print('----------------------------- learn ------------------', model.learn_step_counter) # if done: # print('Ep: ', i_episode, # '| Ep_r: ', round(ep_r, 2)) # ---------- importrant data ---------------------- # ------------- r_d and r_p # print('r: {:5.1f}, steps: {:3}, r_d: {:5.2f}, r_p: {:5.2f}, d/p: {:-8.3f}'.format( r, steps, r_d, r_p, d_p))
for i in range(actions.__len__()): # batch times if (tt.stop_alt()): break_flag = 1 break steps += 1 a = actions[i] s_, r, done, info = env.step(a) if (not done): position_, press_shift, pos_passed = info s_ = preprocess_state(s_, position_, env) # plot if (arg.show_pre_image and cv_img(s_[-1])): break_flag = 1 break else: r = arg.reward_done position_ = (0, 0) s_ = preprocess_state(s_, position_, env) if (arg.show_pre_image and cv_img(s_[-1])): break_flag = 1 break tt.sleep(0.5) break # memory.push(s, r, done, info, s_) memory.push(s, a, r, info, s_)
point=env.end, ptype=arg.ptype_dest, pcolor=arg.pcolor_dest, ww=20, hh=20) # if (cv_img(img)): break_flag = 1; break # plt_img(img) # img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img.shape img.dtype # img = cv2.resize(img, (600, 400), interpolation=cv2.INTER_AREA) # img = cv2.resize(img, (800, 600), interpolation=cv2.INTER_AREA) # cv2.rectangle(img, xy0, xy1, (0, 0, 0), -1) if (arg.show_pre_image and cv_img(img)): break_flag = 1 break position = position_ rewards.append(r) s = s_ if (arg.debug_reward_d_p): print('{:5.2}, {:5.3f}, {:5},'.format( rewards[-1], env.rewards_d[-1], env.rewards_p[-1])) # print('{:5.2f}, {:5.3f}, {:5.3f},'.format( sum(rewards), sum(env.rewards_d), sum(env.rewards_p) ) ) if (done or step >= MAX_STEP - 1): r = arg.reward_done
death_n = 0 results = ((0, 0), (100, 100)) print('--- death! ---') # tt.sleep(1) for j in range(10): #plot death tt.sleep(0.01) screen0 = wind.grab_screen() # screen = screen0 #screen = pre_process_screen(screen0) screen = cv2.cvtColor(screen0, cv2.COLOR_BGR2RGB) # RGB if (cv_img(screen)): break wind.key_dp(vk.r, tmp) continue xy0, xy1 = results[0], results[1] # xy0, xy1 = (xy0[0] - 10, xy0[0] - 10), (xy1[0] + 10, xy1[0] + 10) position = np.array((np.array(xy0) + np.array(xy1)) / 2, dtype=int) # print(results) print('第{:4}次, position: {}'.format(i, position)) cv2.rectangle(screen, xy0, xy1, (255, 220, 0), -1) #screen = cv2.cvtColor(screen0, cv2.COLOR_BGR2GRAY)