def main(): ## Simulator simulator_args = {} simulator_args['config'] = 'config/config.cfg' simulator_args['resolution'] = (widthIn, heightIn) simulator_args['frame_skip'] = 1 simulator_args['color_mode'] = 'RGB24' simulator_args['game_args'] = "+name ICO +colorset 7" ## Agent agent_args = {} # preprocessing preprocess_input_images = lambda x: x / 255. - 0.5 agent_args['preprocess_input_images'] = lambda x: x / 255. - 0.5 agent_args['preprocess_input_measurements'] = lambda x: x / 100. - 0.5 agent_args['num_future_steps'] = 6 pred_scale_coeffs = np.expand_dims( (np.expand_dims(np.array([8., 40., 1.]), 1) * np.ones( (1, agent_args['num_future_steps']))).flatten(), 0) agent_args['meas_for_net_init'] = range(3) agent_args['meas_for_manual_init'] = range(3, 16) agent_args['resolution'] = (width, height) # just use grayscale for nnet inputs agent_args['num_channels'] = 1 # net parameters agent_args['conv_params'] = np.array([(16, 5, 4), (32, 3, 2), (64, 3, 2), (128, 3, 2)], dtype=[('out_channels', int), ('kernel', int), ('stride', int)]) agent_args['fc_img_params'] = np.array([(128, )], dtype=[('out_dims', int)]) agent_args['fc_meas_params'] = np.array([(128, ), (128, ), (128, )], dtype=[('out_dims', int)]) agent_args['fc_joint_params'] = np.array([(256, ), (256, ), (-1, )], dtype=[('out_dims', int)]) agent_args['target_dim'] = agent_args['num_future_steps'] * len( agent_args['meas_for_net_init']) agent_args['n_actions'] = 7 # experiment arguments agent_args['test_objective_params'] = (np.array([5, 11, 17]), np.array([1., 1., 1.])) agent_args['history_length'] = 3 agent_args['history_length_ico'] = 3 historyLen = agent_args['history_length'] print("HistoryLen: ", historyLen) print('starting simulator') simulator = DoomSimulator(simulator_args) num_channels = simulator.num_channels print('started simulator') agent_args['state_imgs_shape'] = (historyLen * num_channels, simulator.resolution[1], simulator.resolution[0]) agent_args['n_ffnet_input'] = (agent_args['resolution'][0] * agent_args['resolution'][1]) agent_args['n_ffnet_hidden'] = np.array([50, 5]) agent_args['n_ffnet_output'] = 1 agent_args['n_ffnet_act'] = 7 agent_args['n_ffnet_meas'] = simulator.num_meas agent_args['learning_rate'] = 1E-4 if 'meas_for_net_init' in agent_args: agent_args['meas_for_net'] = [] for ns in range(historyLen): agent_args['meas_for_net'] += [ i + simulator.num_meas * ns for i in agent_args['meas_for_net_init'] ] agent_args['meas_for_net'] = np.array(agent_args['meas_for_net']) else: agent_args['meas_for_net'] = np.arange(historyLen * simulator.num_meas) if len(agent_args['meas_for_manual_init']) > 0: agent_args['meas_for_manual'] = np.array([ i + simulator.num_meas * (historyLen - 1) for i in agent_args['meas_for_manual_init'] ]) # current timestep is the last in the stack else: agent_args['meas_for_manual'] = [] agent_args['state_meas_shape'] = (len(agent_args['meas_for_net']), ) # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) # sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) # agent = Agent(sess, agent_args) # agent.load('/home/paul/Dev/GameAI/vizdoom_cig2017/icolearner/ICO1/checkpoints/ICO-8600') # print("model loaded..") # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) # sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) img_buffer = np.zeros((historyLen, simulator.resolution[1], simulator.resolution[0], num_channels), dtype='uint8') meas_buffer = np.zeros((historyLen, simulator.num_meas)) act_buffer = np.zeros((historyLen, 7)) act_buffer_ico = np.zeros((agent_args['history_length_ico'], 7)) curr_step = 0 old_step = -1 term = False print("state_meas_shape: ", meas_buffer.shape, " == ", agent_args['state_meas_shape']) print("act_buffer_shape: ", act_buffer.shape) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) ag = Agent(sess, agent_args) diff_y = 0 diff_x = 0 diff_z = 0 diff_theta = 0 iter = 1 epoch = 200 radialFlowLeft = 30. radialFlowRight = 30. radialFlowInertia = 0.4 radialGain = 4. rotationGain = 50. errorThresh = 10. updatePtsFreq = 50 skipImage = 1 skipImageICO = 5 reflexGain = 0.01 oldHealth = 0. # create masks for left and right visual fields - note that these only cover the upper half of the image # this is to help prevent the tracking getting confused by the floor pattern half_height = round(height / 2) half_width = round(width / 2) maskLeft = np.zeros([height, width], np.uint8) maskLeft[half_height:, :half_width] = 1. maskRight = np.zeros([height, width], np.uint8) maskRight[half_height:, half_width:] = 1. lk_params = dict(winSize=(15, 15), maxLevel=2, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)) feature_params = dict(maxCorners=500, qualityLevel=0.03, minDistance=7, blockSize=7) imgCentre = np.array([ simulator_args['resolution'][0] / 2, simulator_args['resolution'][1] / 2 ]) print("Image centre: ", imgCentre) simpleInputs1 = np.zeros((width, height)) simpleInputs2 = np.zeros((width, height)) input_buff = np.zeros((1, width * height)) target_buff = np.zeros((1, 1)) meas_buff = np.zeros((1, simulator.num_meas)) netOut = 0. netErr = np.zeros((width, height)) delta = 0. while not term: if curr_step < historyLen: curr_act = np.zeros(7).tolist() img, meas, rwrd, term = simulator.step(curr_act) print("Image: ", img.shape, " max: ", np.amax(img), " min: ", np.amin(img)) if curr_step == 0: p0Left = cv2.goodFeaturesToTrack(img[:, :, 0], mask=maskLeft, **feature_params) p0Right = cv2.goodFeaturesToTrack(img[:, :, 0], mask=maskRight, **feature_params) img_buffer[curr_step % historyLen] = img meas_buffer[curr_step % historyLen] = meas act_buffer[curr_step % historyLen] = curr_act[:7] else: img1 = img_buffer[(curr_step - 2) % historyLen, :, :, :] img2 = img_buffer[(curr_step - 1) % historyLen, :, :, :] state = simulator._game.get_state() stateImg = state.screen_buffer greyImg1 = np.sum(img1, axis=0) greyImg2 = cv2.resize(stateImg, (width, height)) greyImg2 = np.array(np.sum(greyImg2, axis=2) / 3, dtype='uint8') if (curr_step % updatePtsFreq == 0): p0Left = cv2.goodFeaturesToTrack(img[:, :, 0], mask=maskLeft, **feature_params) p0Right = cv2.goodFeaturesToTrack(img[:, :, 0], mask=maskRight, **feature_params) p1Left, st, err = cv2.calcOpticalFlowPyrLK(img1[:, :, 0], img2[:, :, 0], p0Left, None, **lk_params) p1Right, st, err = cv2.calcOpticalFlowPyrLK( img1[:, :, 0], img2[:, :, 0], p0Right, None, **lk_params) flowLeft = (p1Left - p0Left)[:, 0, :] flowRight = (p1Right - p0Right)[:, 0, :] radialFlowTmpLeft = 0 radialFlowTmpRight = 0 for i in range(0, len(p0Left)): radialFlowTmpLeft += ((p0Left[i, 0, :] - imgCentre)).dot( flowLeft[i, :]) / float(len(p0Left)) for i in range(0, len(p0Right)): radialFlowTmpRight += ((p0Right[i, 0, :] - imgCentre)).dot( flowRight[i, :]) / float(len(p0Right)) rotation = act_buffer[(curr_step - 1) % historyLen][6] forward = act_buffer[(curr_step - 1) % historyLen][3] # keep separate radial errors for left and right fields radialFlowLeft = radialFlowLeft + radialFlowInertia * ( radialFlowTmpLeft - radialFlowLeft) radialFlowRight = radialFlowRight + radialFlowInertia * ( radialFlowTmpRight - radialFlowRight) expectFlowLeft = radialGain * forward + (rotationGain * rotation if rotation < 0. else 0.) expectFlowRight = radialGain * forward - (rotationGain * rotation if rotation > 0. else 0.) flowErrorLeft = forward * (expectFlowLeft - radialFlowLeft) / ( 1. + rotationGain * np.abs(rotation)) flowErrorRight = forward * (expectFlowRight - radialFlowRight) / ( 1. + rotationGain * np.abs(rotation)) flowErrorLeft = flowErrorLeft if flowErrorLeft > 0. else 0. flowErrorRight = flowErrorRight if flowErrorRight > 0. else 0. icoSteer = 0. if curr_step > 100: health = meas[1] # Don't run any networks when the player is dead! if (health < 101. and health > 0.): #print (curr_step) icoInLeft = (flowErrorLeft - errorThresh) if ( flowErrorLeft - errorThresh) > 0. else 0. / reflexGain icoInRight = (flowErrorRight - errorThresh) if ( flowErrorRight - errorThresh) > 0. else 0. / reflexGain icoInSteer = ((flowErrorRight - errorThresh) if (flowErrorRight - errorThresh) > 0. else 0. / reflexGain - (flowErrorLeft - errorThresh) if (flowErrorLeft - errorThresh) > 0. else 0. / reflexGain) centre, bottomLeft, topRight, colourStrength = getMaxColourPos( stateImg, [255, 0, 0]) colourSteer = imgCentre[0] delta = (colourSteer - imgCentre[0]) / width if (len(bottomLeft) > 0 and len(topRight) > 0 and ((topRight[0] - bottomLeft[0]) < width / 3) and ((topRight[1] - bottomLeft[1]) < height / 2)): colourSteer = bottomLeft[0] + int( 0.5 * (topRight[0] - bottomLeft[0])) # get the setpoint in the -.9/+.9 range simpleInputs1[:, :] = 0.1 * np.random.rand(width, height) simpleInputs2[:, :] = 0.1 * np.random.rand(width, height) greyImg2 = cv2.filter2D(greyImg2, -1, edge) input_buff[0, :] = np.ndarray.flatten( preprocess_input_images(greyImg2)) target_buff[...] = delta + netOut meas_buff[0, :] = meas ag.act_ffnet(input_buff, meas, target_buff) netOut = ag.ext_ffnet_output[0] #if (False): #net_output = np.ndarray.flatten(agent.test_ffnet(input_buff))[0] #else: #net_output = np.ndarray.flatten(agent.learn_ffnet(input_buff, target_buff))[0] netErr[:, :] = 0. diff_theta = diff_theta + 0.01 * colourStrength2 * ( colourSteer - imgCentre[0]) / width curr_act = np.zeros(7).tolist() curr_act[0] = 0 curr_act[1] = 0 curr_act[2] = 0 curr_act[3] = 0. #curr_act[3] + diff_z curr_act[3] = 0. curr_act[4] = 0 curr_act[5] = 0 curr_act[6] = curr_act[6] + diff_theta img, meas, rwrd, term = simulator.step(curr_act) if (not (meas is None)) and meas[0] > 30.: meas[0] = 30. if not term: img_buffer[curr_step % historyLen] = img meas_buffer[curr_step % historyLen] = meas act_buffer[curr_step % historyLen] = curr_act[:7] curr_step += 1 simulator.close_game() ag.save( '/home/paul/Dev/GameAI/vizdoom_cig2017/icolearner/ICO1/checkpoints/' + 'hack-' + str(iter))
def main(): ## Simulator simulator_args = {} simulator_args['config'] = 'config/config.cfg' simulator_args['resolution'] = (160, 120) simulator_args['frame_skip'] = 1 simulator_args['color_mode'] = 'GRAY' simulator_args['game_args'] = "+name ICO +colorset 7" ## Agent agent_args = {} # preprocessing agent_args['preprocess_input_images'] = lambda x: x / 255. - 0.5 agent_args['preprocess_input_measurements'] = lambda x: x / 100. - 0.5 agent_args['num_future_steps'] = 6 pred_scale_coeffs = np.expand_dims( (np.expand_dims(np.array([8., 40., 1.]), 1) * np.ones( (1, agent_args['num_future_steps']))).flatten(), 0) agent_args['postprocess_predictions'] = lambda x: x * pred_scale_coeffs agent_args['discrete_controls_manual'] = range(6, 12) agent_args['meas_for_net_init'] = range(3) agent_args['meas_for_manual_init'] = range(3, 16) agent_args['opposite_button_pairs'] = [(0, 1), (2, 3)] # net parameters agent_args['conv_params'] = np.array([(16, 5, 4), (32, 3, 2), (64, 3, 2), (128, 3, 2)], dtype=[('out_channels', int), ('kernel', int), ('stride', int)]) agent_args['fc_img_params'] = np.array([(128, )], dtype=[('out_dims', int)]) agent_args['fc_meas_params'] = np.array([(128, ), (128, ), (128, )], dtype=[('out_dims', int)]) agent_args['fc_joint_params'] = np.array([(256, ), (256, ), (-1, )], dtype=[('out_dims', int)]) agent_args['target_dim'] = agent_args['num_future_steps'] * len( agent_args['meas_for_net_init']) # efference copy # experiment arguments agent_args['test_objective_params'] = (np.array([5, 11, 17]), np.array([1., 1., 1.])) agent_args['history_length'] = 3 agent_args['test_checkpoint'] = 'model' print('starting simulator') simulator = DoomSimulator(simulator_args) print('started simulator') agent_args['discrete_controls'] = simulator.discrete_controls agent_args['continuous_controls'] = simulator.continuous_controls agent_args['state_imgs_shape'] = (agent_args['history_length'] * simulator.num_channels, simulator.resolution[1], simulator.resolution[0]) agent_args['n_ffnet_hidden'] = np.array([50, 50]) if 'meas_for_net_init' in agent_args: agent_args['meas_for_net'] = [] for ns in range(agent_args['history_length']): agent_args['meas_for_net'] += [ i + simulator.num_meas * ns for i in agent_args['meas_for_net_init'] ] agent_args['meas_for_net'] = np.array(agent_args['meas_for_net']) else: agent_args['meas_for_net'] = np.arange(agent_args['history_length'] * simulator.num_meas) if len(agent_args['meas_for_manual_init']) > 0: agent_args['meas_for_manual'] = np.array([ i + simulator.num_meas * (agent_args['history_length'] - 1) for i in agent_args['meas_for_manual_init'] ]) # current timestep is the last in the stack else: agent_args['meas_for_manual'] = [] agent_args['state_meas_shape'] = (len(agent_args['meas_for_net']), ) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) img_buffer = np.zeros( (agent_args['history_length'], simulator.num_channels, simulator.resolution[1], simulator.resolution[0])) meas_buffer = np.zeros((agent_args['history_length'], simulator.num_meas)) act_buffer = np.zeros((agent_args['history_length'], 6)) curr_step = 0 term = False print("state_meas_shape: ", meas_buffer.shape, " == ", agent_args['state_meas_shape']) print("act_buffer_shape: ", act_buffer.shape) agent_args['n_ffnet_meas'] = len(np.ndarray.flatten(meas_buffer)) agent_args['n_ffnet_act'] = len(np.ndarray.flatten(act_buffer)) ag = Agent(sess, agent_args) ag.load('./checkpoints') acts_to_replace = [ a + b + d + e for a in [[0, 0], [1, 1]] for b in [[0, 0], [1, 1]] for d in [[0]] for e in [[0], [1]] ] print(acts_to_replace) replacement_act = [0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0] # MOVE_FORWARD MOVE_BACKWARD TURN_LEFT TURN_RIGHT ATTACK SPEED SELECT_WEAPON2 SELECT_WEAPON3 SELECT_WEAPON4 SELECT_WEAPON5 SELECT_WEAPON6 SELECT_WEAPON7 # img, meas, rwrd, term = simulator.step(np.squeeze(ag.random_actions(1)).tolist()) diff_y = 0 diff_x = 0 diff_z = 0 inertia = 0.5 iter = 1 epoch = 200 userdoc = os.path.join(os.path.expanduser("~"), "Documents") while not term: if curr_step < agent_args['history_length']: curr_act = np.squeeze(ag.random_actions(1)).tolist() img, meas, rwrd, term = simulator.step(curr_act) else: state_imgs = np.transpose( np.reshape( img_buffer[np.arange( curr_step - agent_args['history_length'], curr_step) % agent_args['history_length']], (1, ) + agent_args['state_imgs_shape']), [0, 2, 3, 1]) state_meas = np.reshape( meas_buffer[np.arange(curr_step - agent_args['history_length'], curr_step) % agent_args['history_length']], (1, agent_args['history_length'] * simulator.num_meas)) # print ("imgs shape: ", state_imgs.shape, " meas shape: ", state_meas.shape) # print ("flat imgs shape: ", np.ndarray.flatten(state_imgs).shape, " flat meas shape: ", np.ndarray.flatten(state_meas).shape) # print ("meas shape: ", state_meas.shape) curr_act = np.squeeze(ag.random_actions(1)[0]).tolist() if curr_act[:6] in acts_to_replace: curr_act = replacement_act hack = [0] * len(curr_act) hack[6] = diff_x hack[8] = -diff_y * 0.2 hack[3] = 0 # diff_z # hack[6] = 1 # hack[8] = 1 curr_act[2] = 0 curr_act[3] = 10 img, meas, rwrd, term = simulator.step(curr_act) if (not (meas is None)) and meas[0] > 30.: meas[0] = 30. if (not (img is None)): # print ("state_imgs: ", np.shape(state_imgs), "state_meas: ", np.shape(state_meas), "curr_act: ", np.shape(curr_act)) # print ("img type: ", np.ndarray.flatten(ag.preprocess_input_images(img)).dtype, "state_img type: ", state_imgs.dtype, "state_meas type: ", state_meas.dtype) ag.act_ffnet( np.ndarray.flatten(state_imgs), np.ndarray.flatten(state_meas), np.array(np.ndarray.flatten(act_buffer), dtype='float64'), np.ndarray.flatten(ag.preprocess_input_images(img))) diff_image = np.absolute( np.reshape(np.array(ag.ext_ffnet_output), [img.shape[0], img.shape[1]]) - ag.preprocess_input_images(img)) diff_image = np.absolute( ag.preprocess_input_images( img_buffer[(curr_step - 1) % agent_args['history_length']] - ag.preprocess_input_images(img))) diff_image = ag.preprocess_input_images(img) diff_x = diff_x + inertia * ( (np.argmax(diff_image.sum(axis=0)) / float(diff_image.shape[1])) - 0.5 - diff_x) diff_y = diff_x + inertia * ( (np.argmax(diff_image.sum(axis=1)) / float(diff_image.shape[0])) - 0.5 - diff_y) # print ("diff_x: ", diff_x, " diff_y: ", hack[6], "centre_x: ", np.argmax(diff_image.sum(axis=0)), "centre_y: ", np.argmax(diff_image.sum(axis=1))) if (curr_step % epoch == 0): print("saving...") np.save( os.path.join('/home/paul', "hack"), np.reshape(np.array(ag.ext_ffnet_output), [img.shape[0], img.shape[1]])) np.save(os.path.join('/home/paul', "target"), ag.preprocess_input_images(img)) np.save(os.path.join('/home/paul', "diff"), diff_image) diff_x = np.random.normal(0, 2) diff_z = np.random.normal(10, 2) if not term: img_buffer[curr_step % agent_args['history_length']] = img meas_buffer[curr_step % agent_args['history_length']] = meas act_buffer[curr_step % agent_args['history_length']] = curr_act[:6] curr_step += 1 simulator.close_game() ag.save( '/home/paul/Dev/GameAI/vizdoom_cig2017/icolearner/ICO1/checkpoints/' + 'hack-' + str(iter))