for i in range(N_WORKERS): i_name = 'W_%i' % i # worker name workers.append(Worker(i_name, GLOBAL_AC)) worker = workers[0] worker.env.reset() worker.name # worker.env.render() # worker.env.close() worker.work() print('----- parameters: {}, \n----- episode: {}, Reward:{:.1f} '.format( (LR_A, LR_C), GLOBAL_RUNNING_R.index(max(GLOBAL_RUNNING_R)), max(GLOBAL_RUNNING_R))) print('# ({}, {}) - {:.1f};'.format(LR_A, LR_C, max(GLOBAL_RUNNING_R))) if (0): a_net = ActorCriticNet(n_input=N_S, n_out=N_A) n_input, n_out = N_S, N_A x = torch.randn([100, n_input]) print(a_net.forward(x)) for i in range(100): tt.sleep(0.1) if (tt.stop_alt()): print('break!') break 1
# i_episode = 0 for i_episode in range(MAX_EPISODE): try: if (break_flag): break except: break_flag = 0 s, position = env.reset(return_s_pos=1) s = preprocess_state(s, position, env) # actions = [0, 0, 0, 1, 1, 1, -1] t1 = Time() ep_r = 0 for i in range(MAX_STEP): if (tt.stop_alt('s')): print('----- break! -----') break_flag = 1 break steps += 1 # a = actions[i] # len(actions) a = model.choose_action(s) s_, r, done, info = env.step(a) position_, press_shift, pos_passed = info img = s_[-1] s_ = preprocess_state(s_, position_, env) if (i == MAX_STEP - 1 or done): r = arg.reward_done
wind.hwnd wind.process_id wind.move_to(0,0) process_handle = win32api.OpenProcess(0x1F0FFF, False, wind.process_id) kernel32 = ctypes.windll.LoadLibrary(r'C:\Windows\System32\kernel32.dll') kernel32.ReadProcessMemory(int(process_handle), ) int(process_handle) win32api.GetModuleHandle() wind.key_dp(vk.r, 0.5) for i in range(10): if(tt.stop_alt('s')): print('--- break ---'); break tt.sleep(0.1) if(i % 2): wind.key_dp(vk.left, 0.5) else: wind.key_dp(vk.right, 0.5) press_t = 0.5 wind.key_dp(vk.r, 0.5) t0 = Time(); i = 0; while(t0.during(10)): #t0.sleep(0.1) if(1): # control_parameters