def main(): ## Simulator simulator_args = {} simulator_args['config'] = 'config/config.cfg' simulator_args['resolution'] = (widthIn,heightIn) simulator_args['frame_skip'] = 1 simulator_args['color_mode'] = 'RGB24' simulator_args['game_args'] = "+name ICO +colorset 7" ## Agent agent_args = {} # preprocessing preprocess_input_images = lambda x: x / 255. - 0.5 agent_args['preprocess_input_images'] = lambda x: x / 255. - 0.5 agent_args['preprocess_input_measurements'] = lambda x: x / 100. - 0.5 agent_args['num_future_steps'] = 6 pred_scale_coeffs = np.expand_dims( (np.expand_dims(np.array([8., 40., 1.]), 1) * np.ones((1, agent_args['num_future_steps']))).flatten(), 0) agent_args['meas_for_net_init'] = range(3) agent_args['meas_for_manual_init'] = range(3, 16) agent_args['resolution'] = (width,height) # just use grayscale for nnet inputs agent_args['num_channels'] = 1 # net parameters agent_args['net_type'] = "fc" # agent_args['net_type'] = "conv" agent_args['conv_params'] = np.array([(16, 5, 4), (32, 3, 2), (64, 3, 2), (128, 3, 2)], dtype=[('out_channels', int), ('kernel', int), ('stride', int)]) agent_args['fc_img_params'] = np.array([(128,)], dtype=[('out_dims', int)]) agent_args['fc_meas_params'] = np.array([(128,), (128,), (128,)], dtype=[('out_dims', int)]) agent_args['fc_joint_params'] = np.array([(256,), (256,), (-1,)], dtype=[('out_dims', int)]) agent_args['target_dim'] = agent_args['num_future_steps'] * len(agent_args['meas_for_net_init']) agent_args['n_actions'] = 7 # experiment arguments agent_args['test_objective_params'] = (np.array([5, 11, 17]), np.array([1., 1., 1.])) agent_args['history_length'] = 3 agent_args['history_length_ico'] = 3 historyLen = agent_args['history_length'] print ("HistoryLen: ", historyLen) print('starting simulator') simulator = DoomSimulator(simulator_args) num_channels = simulator.num_channels print('started simulator') agent_args['state_imgs_shape'] = ( historyLen * num_channels, simulator.resolution[1], simulator.resolution[0]) agent_args['n_ffnet_input'] = (agent_args['resolution'][0]*agent_args['resolution'][1]) agent_args['n_ffnet_hidden'] = np.array([50,5]) agent_args['n_ffnet_output'] = 1 agent_args['n_ffnet_act'] = 7 agent_args['n_ffnet_meas'] = simulator.num_meas agent_args['learning_rate'] = 1E-4 modelDir = os.path.join(os.path.expanduser("~"), "Dev/GameAI/vizdoom_cig2017/icodoom/ICO1/Models") if 'meas_for_net_init' in agent_args: agent_args['meas_for_net'] = [] for ns in range(historyLen): agent_args['meas_for_net'] += [i + simulator.num_meas * ns for i in agent_args['meas_for_net_init']] agent_args['meas_for_net'] = np.array(agent_args['meas_for_net']) else: agent_args['meas_for_net'] = np.arange(historyLen * simulator.num_meas) if len(agent_args['meas_for_manual_init']) > 0: agent_args['meas_for_manual'] = np.array([i + simulator.num_meas * (historyLen - 1) for i in agent_args[ 'meas_for_manual_init']]) # current timestep is the last in the stack else: agent_args['meas_for_manual'] = [] agent_args['state_meas_shape'] = (len(agent_args['meas_for_net']),) # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) # sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) # agent = Agent(sess, agent_args) # agent.load('/home/paul/Dev/GameAI/vizdoom_cig2017/icolearner/ICO1/checkpoints/ICO-8600') # print("model loaded..") # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) # sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) img_buffer = np.zeros( (historyLen, simulator.resolution[1], simulator.resolution[0], num_channels), dtype='uint8') meas_buffer = np.zeros((historyLen, simulator.num_meas)) act_buffer = np.zeros((historyLen, 7)) act_buffer_ico = np.zeros((agent_args['history_length_ico'], 7)) curr_step = 0 old_step = -1 term = False print ("state_meas_shape: ", meas_buffer.shape, " == ", agent_args['state_meas_shape']) print ("act_buffer_shape: ", act_buffer.shape) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,log_device_placement=False)) ag = Agent(sess, agent_args) if (os.path.isfile("checkpoints/checkpoint")): ag.load('/home/paul/Dev/GameAI/vizdoom_cig2017/icodoom/ICO1/checkpoints/') print("model loaded..") else: print ("No model file, initialising...") diff_y = 0 diff_x = 0 diff_z = 0 diff_theta = 0 iter = 1 epoch = 200 radialFlowLeft = 30. radialFlowRight = 30. radialFlowInertia = 0.4 radialGain = 4. rotationGain = 50. errorThresh = 10. updatePtsFreq = 50 skipImage = 1 skipImageICO = 5 reflexGain = 1E-4 flowGain = 0. netGain = 10. oldHealth = 0. # create masks for left and right visual fields - note that these only cover the upper half of the image # this is to help prevent the tracking getting confused by the floor pattern half_height = round(height/2) half_width = round(width/2) maskLeft = np.zeros([height, width], np.uint8) maskLeft[half_height:, :half_width] = 1. maskRight = np.zeros([height, width], np.uint8) maskRight[half_height:, half_width:] = 1. lk_params = dict(winSize=(15, 15), maxLevel=2, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)) feature_params = dict(maxCorners=500, qualityLevel=0.03, minDistance=7, blockSize=7) imgCentre = np.array([int(simulator_args['resolution'][0] / 2), int(simulator_args['resolution'][1] /2)]) print ("Image centre: ", imgCentre) rawInputs = np.zeros((height, width)) cheatInputs = np.zeros((width, height)) input_buff = np.zeros((1,width*height)) target_buff = np.zeros((1,1)) meas_buff = np.zeros((1,simulator.num_meas)) netOut = 0. netErr = np.zeros((width,height)) delta = 0. shoot = 0 reflexOn = False iter = 0 while not term: if curr_step < historyLen: curr_act = np.zeros(7).tolist() img, meas, rwrd, term = simulator.step(curr_act) print("Image: ", img.shape, " max: ", np.amax(img), " min: ", np.amin(img)) if curr_step == 0: p0Left = cv2.goodFeaturesToTrack(img[:,:,0], mask=maskLeft, **feature_params) p0Right = cv2.goodFeaturesToTrack(img[:,:,0], mask=maskRight, **feature_params) img_buffer[curr_step % historyLen] = img meas_buffer[curr_step % historyLen] = meas act_buffer[curr_step % historyLen] = curr_act[:7] else: img1 = img_buffer[(curr_step-2) % historyLen,:,:,:] img2 = img_buffer[(curr_step-1) % historyLen,:,:,:] state = simulator._game.get_state() stateImg = state.screen_buffer if(curr_step % updatePtsFreq == 0): p0Left = cv2.goodFeaturesToTrack(img[:,:,0], mask=maskLeft, **feature_params) p0Right = cv2.goodFeaturesToTrack(img[:,:,0], mask=maskRight, **feature_params) p1Left, st, err = cv2.calcOpticalFlowPyrLK(img1[:,:,0], img2[:,:,0], p0Left, None, **lk_params) p1Right, st, err = cv2.calcOpticalFlowPyrLK(img1[:,:,0], img2[:,:,0], p0Right, None, **lk_params) flowLeft = (p1Left - p0Left)[:,0,:] flowRight = (p1Right - p0Right)[:,0,:] radialFlowTmpLeft = 0 radialFlowTmpRight = 0 for i in range(0, len(p0Left)): radialFlowTmpLeft += ((p0Left[i,0,:] - imgCentre)).dot(flowLeft[i,:]) / float(len(p0Left)) for i in range(0, len(p0Right)): radialFlowTmpRight += ((p0Right[i,0,:] - imgCentre)).dot(flowRight[i,:]) / float(len(p0Right)) rotation = act_buffer[(curr_step - 1) % historyLen][6] forward = act_buffer[(curr_step - 1) % historyLen][3] # keep separate radial errors for left and right fields radialFlowLeft = radialFlowLeft + radialFlowInertia * (radialFlowTmpLeft - radialFlowLeft) radialFlowRight = radialFlowRight + radialFlowInertia * (radialFlowTmpRight - radialFlowRight) expectFlowLeft = radialGain * forward + (rotationGain * rotation if rotation < 0. else 0.) expectFlowRight = radialGain * forward - (rotationGain * rotation if rotation > 0. else 0.) flowErrorLeft = forward * (expectFlowLeft - radialFlowLeft) / (1. + rotationGain * np.abs(rotation)) flowErrorRight = forward * (expectFlowRight - radialFlowRight) / (1. + rotationGain * np.abs(rotation)) flowErrorLeft = flowErrorLeft if flowErrorLeft > 0. else 0. flowErrorRight = flowErrorRight if flowErrorRight > 0. else 0. icoSteer = 0. if curr_step > 100: health = meas[1] if (health<0.1): reflexOn = False iter = 0 # Don't run any networks when the player is dead! if (health < 101. and health > 0.): icoInSteer = flowGain * ((flowErrorRight - errorThresh) if (flowErrorRight - errorThresh) > 0. else 0. - flowGain * (flowErrorLeft - errorThresh) if (flowErrorLeft - errorThresh) > 0. else 0. ) centre, bottomLeft, topRight, colourStrength = getMaxColourPos(stateImg, [255, 0, 0]) colourSteer = imgCentre[0] cheatInputs = stateImg*1. if(len(bottomLeft)>0 and len(topRight)>0 and ((topRight[0] - bottomLeft[0]) < width/3) and ((topRight[1] - bottomLeft[1]) < height/2)): colourSteer = bottomLeft[0] + int(0.5 * (topRight[0] - bottomLeft[0])) # cv2.imwrite("/home/paul/tmp/Backup/rect-" + str(curr_step) + ".jpg", cheatInputs) cv2.arrowedLine(cheatInputs, (colourSteer, imgCentre[1]+10), (colourSteer, imgCentre[1]), color=(255,255,255), thickness=2) rawInputs = np.array(np.sum(stateImg, axis=2) / 3) cheatInputs = np.array(np.sum(cheatInputs, axis=2) / 3) # cv2.imwrite("/home/paul/tmp/Backup/cheat-" + str(curr_step) + ".jpg", cheatInputs) input_buff[0,:] = np.ndarray.flatten(cheatInputs) input_buff = input_buff - np.mean(input_buff) input_buff = input_buff / np.sqrt(np.var(input_buff)) # we want the reflex to be delayed wrt to the image input, so that the image is. Otherwise the learning can # never reduce the error to zero no matter how good the controller. if (iter>2): delta = (float(colourSteer) - float(imgCentre[0]))/float(width) else: delta = 0 if(iter>2): if(np.abs(delta) < 0.01): shoot = 1 target_buff[...] = delta + netOut # target_buff[...] = delta # target_buff[...] = 0.2 meas_buff[0,:] = meas ag.act(input_buff, meas, target_buff) if(ag.net_type == 'conv'): netOut = np.ndarray.flatten(ag.ext_covnet_output)[0].flatten()[0] elif(ag.net_type == 'fc'): netOut = np.ndarray.flatten(ag.ext_fcnet_output)[0].flatten()[0] print (" *** ", delta, delta + netOut, netGain*netOut, ag.learning_rate) diff_theta = 0.6 * max(min((icoInSteer), 5.), -5.) netErr[:,:] = 0. diff_theta = diff_theta + reflexGain * colourStrength * delta curr_act = np.zeros(7).tolist() curr_act[0] = 0 curr_act[1] = 0 curr_act[2] = 0 #shoot curr_act[3] = curr_act[3] + diff_z curr_act[4] = 0 curr_act[5] = 0. curr_act[6] = diff_theta + netGain*netOut iter += 1 if (curr_step % epoch == 0): ag.save('/home/paul/Dev/GameAI/vizdoom_cig2017/icodoom/ICO1/checkpoints/BP', curr_step) img, meas, rwrd, term = simulator.step(curr_act) if (not (meas is None)) and meas[0] > 30.: meas[0] = 30. if not term: img_buffer[curr_step % historyLen] = img meas_buffer[curr_step % historyLen] = meas act_buffer[curr_step % historyLen] = curr_act[:7] curr_step += 1 simulator.close_game()
def main(): ## Simulator simulator_args = {} simulator_args['config'] = 'config/config.cfg' simulator_args['resolution'] = (160, 120) simulator_args['frame_skip'] = 2 simulator_args['color_mode'] = 'GRAY' simulator_args['game_args'] = "+name IntelAct +colorset 7" ## Agent agent_args = {} # preprocessing agent_args['preprocess_input_images'] = lambda x: x / 255. - 0.5 agent_args['preprocess_input_measurements'] = lambda x: x / 100. - 0.5 agent_args['num_future_steps'] = 6 pred_scale_coeffs = np.expand_dims( (np.expand_dims(np.array([8., 40., 1.]), 1) * np.ones( (1, agent_args['num_future_steps']))).flatten(), 0) agent_args['postprocess_predictions'] = lambda x: x * pred_scale_coeffs agent_args['discrete_controls_manual'] = range(6, 12) agent_args['meas_for_net_init'] = range(3) agent_args['meas_for_manual_init'] = range(3, 16) agent_args['opposite_button_pairs'] = [(0, 1), (2, 3)] # net parameters agent_args['conv_params'] = np.array([(16, 5, 4), (32, 3, 2), (64, 3, 2), (128, 3, 2)], dtype=[('out_channels', int), ('kernel', int), ('stride', int)]) agent_args['fc_img_params'] = np.array([(128, )], dtype=[('out_dims', int)]) agent_args['fc_meas_params'] = np.array([(128, ), (128, ), (128, )], dtype=[('out_dims', int)]) agent_args['fc_joint_params'] = np.array([(256, ), (256, ), (-1, )], dtype=[('out_dims', int)]) agent_args['target_dim'] = agent_args['num_future_steps'] * len( agent_args['meas_for_net_init']) # experiment arguments agent_args['test_objective_params'] = (np.array([5, 11, 17]), np.array([1., 1., 1.])) agent_args['history_length'] = 1 agent_args['test_checkpoint'] = 'model' print('starting simulator') simulator = DoomSimulator(simulator_args) print('started simulator') agent_args['discrete_controls'] = simulator.discrete_controls agent_args['continuous_controls'] = simulator.continuous_controls agent_args['state_imgs_shape'] = (agent_args['history_length'] * simulator.num_channels, simulator.resolution[1], simulator.resolution[0]) if 'meas_for_net_init' in agent_args: agent_args['meas_for_net'] = [] for ns in range(agent_args['history_length']): agent_args['meas_for_net'] += [ i + simulator.num_meas * ns for i in agent_args['meas_for_net_init'] ] agent_args['meas_for_net'] = np.array(agent_args['meas_for_net']) else: agent_args['meas_for_net'] = np.arange(agent_args['history_length'] * simulator.num_meas) if len(agent_args['meas_for_manual_init']) > 0: agent_args['meas_for_manual'] = np.array([ i + simulator.num_meas * (agent_args['history_length'] - 1) for i in agent_args['meas_for_manual_init'] ]) # current timestep is the last in the stack else: agent_args['meas_for_manual'] = [] agent_args['state_meas_shape'] = (len(agent_args['meas_for_net']), ) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) ag = Agent(sess, agent_args) ag.load('./checkpoints') img_buffer = np.zeros( (agent_args['history_length'], simulator.num_channels, simulator.resolution[1], simulator.resolution[0])) meas_buffer = np.zeros((agent_args['history_length'], simulator.num_meas)) curr_step = 0 term = False acts_to_replace = [ a + b + d + e for a in [[0, 0], [1, 1]] for b in [[0, 0], [1, 1]] for d in [[0]] for e in [[0], [1]] ] print(acts_to_replace) replacement_act = [0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0] #MOVE_FORWARD MOVE_BACKWARD TURN_LEFT TURN_RIGHT ATTACK SPEED SELECT_WEAPON2 SELECT_WEAPON3 SELECT_WEAPON4 SELECT_WEAPON5 SELECT_WEAPON6 SELECT_WEAPON7 while not term: if curr_step < agent_args['history_length']: img, meas, rwrd, term = simulator.step( np.squeeze(ag.random_actions(1)).tolist()) else: state_imgs = np.transpose( np.reshape( img_buffer[np.arange( curr_step - agent_args['history_length'], curr_step) % agent_args['history_length']], (1, ) + agent_args['state_imgs_shape']), [0, 2, 3, 1]) state_meas = np.reshape( meas_buffer[np.arange(curr_step - agent_args['history_length'], curr_step) % agent_args['history_length']], (1, agent_args['history_length'] * simulator.num_meas)) curr_act = np.squeeze( ag.act(state_imgs, state_meas, agent_args['test_objective_params'])[0]).tolist() if curr_act[:6] in acts_to_replace: curr_act = replacement_act img, meas, rwrd, term = simulator.step(curr_act) if (not (meas is None)) and meas[0] > 30.: meas[0] = 30. if not term: img_buffer[curr_step % agent_args['history_length']] = img meas_buffer[curr_step % agent_args['history_length']] = meas curr_step += 1 simulator.close_game()
def main(): ## Simulator simulator_args = {} simulator_args['config'] = 'config/config.cfg' simulator_args['resolution'] = (widthIn, heightIn) simulator_args['frame_skip'] = 1 simulator_args['color_mode'] = 'RGB24' simulator_args['game_args'] = "+name ICO +colorset 7" ## Agent agent_args = {} # preprocessing preprocess_input_images = lambda x: x / 255. - 0.5 agent_args['preprocess_input_images'] = lambda x: x / 255. - 0.5 agent_args['preprocess_input_measurements'] = lambda x: x / 100. - 0.5 agent_args['num_future_steps'] = 6 pred_scale_coeffs = np.expand_dims( (np.expand_dims(np.array([8., 40., 1.]), 1) * np.ones( (1, agent_args['num_future_steps']))).flatten(), 0) agent_args['meas_for_net_init'] = range(3) agent_args['meas_for_manual_init'] = range(3, 16) agent_args['resolution'] = (width, height) # just use grayscale for nnet inputs agent_args['num_channels'] = 1 # net parameters agent_args['conv_params'] = np.array([(16, 5, 4), (32, 3, 2), (64, 3, 2), (128, 3, 2)], dtype=[('out_channels', int), ('kernel', int), ('stride', int)]) agent_args['fc_img_params'] = np.array([(128, )], dtype=[('out_dims', int)]) agent_args['fc_meas_params'] = np.array([(128, ), (128, ), (128, )], dtype=[('out_dims', int)]) agent_args['fc_joint_params'] = np.array([(256, ), (256, ), (-1, )], dtype=[('out_dims', int)]) agent_args['target_dim'] = agent_args['num_future_steps'] * len( agent_args['meas_for_net_init']) agent_args['n_actions'] = 7 # experiment arguments agent_args['test_objective_params'] = (np.array([5, 11, 17]), np.array([1., 1., 1.])) agent_args['history_length'] = 3 agent_args['history_length_ico'] = 3 historyLen = agent_args['history_length'] print("HistoryLen: ", historyLen) print('starting simulator') simulator = DoomSimulator(simulator_args) num_channels = simulator.num_channels print('started simulator') agent_args['state_imgs_shape'] = (historyLen * num_channels, simulator.resolution[1], simulator.resolution[0]) agent_args['n_ffnet_input'] = (agent_args['resolution'][0] * agent_args['resolution'][1]) agent_args['n_ffnet_hidden'] = np.array([50, 5]) agent_args['n_ffnet_output'] = 1 agent_args['n_ffnet_act'] = 7 agent_args['n_ffnet_meas'] = simulator.num_meas agent_args['learning_rate'] = 1E-4 if 'meas_for_net_init' in agent_args: agent_args['meas_for_net'] = [] for ns in range(historyLen): agent_args['meas_for_net'] += [ i + simulator.num_meas * ns for i in agent_args['meas_for_net_init'] ] agent_args['meas_for_net'] = np.array(agent_args['meas_for_net']) else: agent_args['meas_for_net'] = np.arange(historyLen * simulator.num_meas) if len(agent_args['meas_for_manual_init']) > 0: agent_args['meas_for_manual'] = np.array([ i + simulator.num_meas * (historyLen - 1) for i in agent_args['meas_for_manual_init'] ]) # current timestep is the last in the stack else: agent_args['meas_for_manual'] = [] agent_args['state_meas_shape'] = (len(agent_args['meas_for_net']), ) # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) # sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) # agent = Agent(sess, agent_args) # agent.load('/home/paul/Dev/GameAI/vizdoom_cig2017/icolearner/ICO1/checkpoints/ICO-8600') # print("model loaded..") # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) # sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) img_buffer = np.zeros((historyLen, simulator.resolution[1], simulator.resolution[0], num_channels), dtype='uint8') meas_buffer = np.zeros((historyLen, simulator.num_meas)) act_buffer = np.zeros((historyLen, 7)) act_buffer_ico = np.zeros((agent_args['history_length_ico'], 7)) curr_step = 0 old_step = -1 term = False print("state_meas_shape: ", meas_buffer.shape, " == ", agent_args['state_meas_shape']) print("act_buffer_shape: ", act_buffer.shape) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) ag = Agent(sess, agent_args) diff_y = 0 diff_x = 0 diff_z = 0 diff_theta = 0 iter = 1 epoch = 200 radialFlowLeft = 30. radialFlowRight = 30. radialFlowInertia = 0.4 radialGain = 4. rotationGain = 50. errorThresh = 10. updatePtsFreq = 50 skipImage = 1 skipImageICO = 5 reflexGain = 0.01 oldHealth = 0. # create masks for left and right visual fields - note that these only cover the upper half of the image # this is to help prevent the tracking getting confused by the floor pattern half_height = round(height / 2) half_width = round(width / 2) maskLeft = np.zeros([height, width], np.uint8) maskLeft[half_height:, :half_width] = 1. maskRight = np.zeros([height, width], np.uint8) maskRight[half_height:, half_width:] = 1. lk_params = dict(winSize=(15, 15), maxLevel=2, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)) feature_params = dict(maxCorners=500, qualityLevel=0.03, minDistance=7, blockSize=7) imgCentre = np.array([ simulator_args['resolution'][0] / 2, simulator_args['resolution'][1] / 2 ]) print("Image centre: ", imgCentre) simpleInputs1 = np.zeros((width, height)) simpleInputs2 = np.zeros((width, height)) input_buff = np.zeros((1, width * height)) target_buff = np.zeros((1, 1)) meas_buff = np.zeros((1, simulator.num_meas)) netOut = 0. netErr = np.zeros((width, height)) delta = 0. while not term: if curr_step < historyLen: curr_act = np.zeros(7).tolist() img, meas, rwrd, term = simulator.step(curr_act) print("Image: ", img.shape, " max: ", np.amax(img), " min: ", np.amin(img)) if curr_step == 0: p0Left = cv2.goodFeaturesToTrack(img[:, :, 0], mask=maskLeft, **feature_params) p0Right = cv2.goodFeaturesToTrack(img[:, :, 0], mask=maskRight, **feature_params) img_buffer[curr_step % historyLen] = img meas_buffer[curr_step % historyLen] = meas act_buffer[curr_step % historyLen] = curr_act[:7] else: img1 = img_buffer[(curr_step - 2) % historyLen, :, :, :] img2 = img_buffer[(curr_step - 1) % historyLen, :, :, :] state = simulator._game.get_state() stateImg = state.screen_buffer greyImg1 = np.sum(img1, axis=0) greyImg2 = cv2.resize(stateImg, (width, height)) greyImg2 = np.array(np.sum(greyImg2, axis=2) / 3, dtype='uint8') if (curr_step % updatePtsFreq == 0): p0Left = cv2.goodFeaturesToTrack(img[:, :, 0], mask=maskLeft, **feature_params) p0Right = cv2.goodFeaturesToTrack(img[:, :, 0], mask=maskRight, **feature_params) p1Left, st, err = cv2.calcOpticalFlowPyrLK(img1[:, :, 0], img2[:, :, 0], p0Left, None, **lk_params) p1Right, st, err = cv2.calcOpticalFlowPyrLK( img1[:, :, 0], img2[:, :, 0], p0Right, None, **lk_params) flowLeft = (p1Left - p0Left)[:, 0, :] flowRight = (p1Right - p0Right)[:, 0, :] radialFlowTmpLeft = 0 radialFlowTmpRight = 0 for i in range(0, len(p0Left)): radialFlowTmpLeft += ((p0Left[i, 0, :] - imgCentre)).dot( flowLeft[i, :]) / float(len(p0Left)) for i in range(0, len(p0Right)): radialFlowTmpRight += ((p0Right[i, 0, :] - imgCentre)).dot( flowRight[i, :]) / float(len(p0Right)) rotation = act_buffer[(curr_step - 1) % historyLen][6] forward = act_buffer[(curr_step - 1) % historyLen][3] # keep separate radial errors for left and right fields radialFlowLeft = radialFlowLeft + radialFlowInertia * ( radialFlowTmpLeft - radialFlowLeft) radialFlowRight = radialFlowRight + radialFlowInertia * ( radialFlowTmpRight - radialFlowRight) expectFlowLeft = radialGain * forward + (rotationGain * rotation if rotation < 0. else 0.) expectFlowRight = radialGain * forward - (rotationGain * rotation if rotation > 0. else 0.) flowErrorLeft = forward * (expectFlowLeft - radialFlowLeft) / ( 1. + rotationGain * np.abs(rotation)) flowErrorRight = forward * (expectFlowRight - radialFlowRight) / ( 1. + rotationGain * np.abs(rotation)) flowErrorLeft = flowErrorLeft if flowErrorLeft > 0. else 0. flowErrorRight = flowErrorRight if flowErrorRight > 0. else 0. icoSteer = 0. if curr_step > 100: health = meas[1] # Don't run any networks when the player is dead! if (health < 101. and health > 0.): #print (curr_step) icoInLeft = (flowErrorLeft - errorThresh) if ( flowErrorLeft - errorThresh) > 0. else 0. / reflexGain icoInRight = (flowErrorRight - errorThresh) if ( flowErrorRight - errorThresh) > 0. else 0. / reflexGain icoInSteer = ((flowErrorRight - errorThresh) if (flowErrorRight - errorThresh) > 0. else 0. / reflexGain - (flowErrorLeft - errorThresh) if (flowErrorLeft - errorThresh) > 0. else 0. / reflexGain) centre, bottomLeft, topRight, colourStrength = getMaxColourPos( stateImg, [255, 0, 0]) colourSteer = imgCentre[0] delta = (colourSteer - imgCentre[0]) / width if (len(bottomLeft) > 0 and len(topRight) > 0 and ((topRight[0] - bottomLeft[0]) < width / 3) and ((topRight[1] - bottomLeft[1]) < height / 2)): colourSteer = bottomLeft[0] + int( 0.5 * (topRight[0] - bottomLeft[0])) # get the setpoint in the -.9/+.9 range simpleInputs1[:, :] = 0.1 * np.random.rand(width, height) simpleInputs2[:, :] = 0.1 * np.random.rand(width, height) greyImg2 = cv2.filter2D(greyImg2, -1, edge) input_buff[0, :] = np.ndarray.flatten( preprocess_input_images(greyImg2)) target_buff[...] = delta + netOut meas_buff[0, :] = meas ag.act_ffnet(input_buff, meas, target_buff) netOut = ag.ext_ffnet_output[0] #if (False): #net_output = np.ndarray.flatten(agent.test_ffnet(input_buff))[0] #else: #net_output = np.ndarray.flatten(agent.learn_ffnet(input_buff, target_buff))[0] netErr[:, :] = 0. diff_theta = diff_theta + 0.01 * colourStrength2 * ( colourSteer - imgCentre[0]) / width curr_act = np.zeros(7).tolist() curr_act[0] = 0 curr_act[1] = 0 curr_act[2] = 0 curr_act[3] = 0. #curr_act[3] + diff_z curr_act[3] = 0. curr_act[4] = 0 curr_act[5] = 0 curr_act[6] = curr_act[6] + diff_theta img, meas, rwrd, term = simulator.step(curr_act) if (not (meas is None)) and meas[0] > 30.: meas[0] = 30. if not term: img_buffer[curr_step % historyLen] = img meas_buffer[curr_step % historyLen] = meas act_buffer[curr_step % historyLen] = curr_act[:7] curr_step += 1 simulator.close_game() ag.save( '/home/paul/Dev/GameAI/vizdoom_cig2017/icolearner/ICO1/checkpoints/' + 'hack-' + str(iter))
def main(learning_rate_): learningRate = float(learning_rate_) FCLNet.setLearningRate(learningRate) print("learning rate ", learningRate, file=outFile) ## Simulator simulator_args = {} simulator_args['config'] = 'config/config.cfg' simulator_args['resolution'] = (widthIn, heightIn) simulator_args['frame_skip'] = 1 simulator_args['color_mode'] = 'RGB24' simulator_args['game_args'] = "+name FCL +colorset 7" historyLen = 3 print("HistoryLen: ", historyLen) print('starting simulator') simulator = DoomSimulator(simulator_args) num_channels = simulator.num_channels print('started simulator') modelDir = os.path.join(os.path.expanduser("~"), "Dev/GameAI/vizdoom_cig2017/icodoom/ICO1/Models") img_buffer = np.zeros((historyLen, simulator.resolution[1], simulator.resolution[0], num_channels), dtype='uint8') meas_buffer = np.zeros((historyLen, simulator.num_meas)) act_buffer = np.zeros((historyLen, 7)) curr_step = 0 term = False diff_z = 0 iter = 1 epoch = 200 radialFlowLeft = 30. radialFlowRight = 30. radialFlowInertia = 0.4 radialGain = 4. rotationGain = 50. errorThresh = 10. updatePtsFreq = 50 reflexGain = 1E-3 flowGain = 0. netGain = 40. reflexReduceGain = -0.05 # create masks for left and right visual fields - note that these only cover the upper half of the image # this is to help prevent the tracking getting confused by the floor pattern half_height = round(height / 2) half_width = round(width / 2) maskLeft = np.zeros([height, width], np.uint8) maskLeft[half_height:, :half_width] = 1. maskRight = np.zeros([height, width], np.uint8) maskRight[half_height:, half_width:] = 1. lk_params = dict(winSize=(15, 15), maxLevel=2, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)) feature_params = dict(maxCorners=500, qualityLevel=0.03, minDistance=7, blockSize=7) imgCentre = np.array([ int(simulator_args['resolution'][0] / 2), int(simulator_args['resolution'][1] / 2) ]) print("Image centre: ", imgCentre) rawInputs = np.zeros((height, width)) cheatInputs = np.zeros((width, height)) input_buff = np.zeros((width * height)) target_buff = np.zeros((1, 1)) meas_buff = np.zeros((1, simulator.num_meas)) netOut = 0. netErr = np.zeros(neuronsPerLayer[0]) delta = 0. shoot = 0 wtDist = np.zeros(FCLNet.getNumLayers()) reflexOn = False iter = 0 killed = False # FCLNet.saveModel("Models/hack.txt") while not term: if curr_step < historyLen: curr_act = np.zeros(7).tolist() img, meas, rwrd, term = simulator.step(curr_act) print("Image: ", img.shape, " max: ", np.amax(img), " min: ", np.amin(img)) if curr_step == 0: p0Left = cv2.goodFeaturesToTrack(img[:, :, 0], mask=maskLeft, **feature_params) p0Right = cv2.goodFeaturesToTrack(img[:, :, 0], mask=maskRight, **feature_params) img_buffer[curr_step % historyLen] = img meas_buffer[curr_step % historyLen] = meas act_buffer[curr_step % historyLen] = curr_act[:7] else: img1 = img_buffer[(curr_step - 2) % historyLen, :, :, :] img2 = img_buffer[(curr_step - 1) % historyLen, :, :, :] state = simulator._game.get_state() stateImg = state.screen_buffer icoSteer = 0. if curr_step > 100: health = meas[1] if (health < 0.1): reflexOn = False iter = 0 if (simulator._game.is_player_dead()) and killed == False: g = open("KD.txt", "a") s = "0 " + str(curr_step) + " " + str( datetime.now().timestamp()) + "\n" g.write(s) g.close() killed = True print("KILLED") if (not (simulator._game.is_player_dead())): killed = False # Don't run any networks when the player is dead! if (health < 101. and health > 0.): icoInSteer = 0. saveImage(curr_step, stateImg) centre, bottomLeft, topRight, colourStrength = getMaxColourPos( stateImg, [0, 0, 255], curr_step) colourSteer = imgCentre[0] if (len(bottomLeft) > 0 and len(topRight) > 0 and ((topRight[0] - bottomLeft[0]) < width / 3) and ((topRight[1] - bottomLeft[1]) < height / 2)): colourSteer = bottomLeft[0] + int( 0.5 * (topRight[0] - bottomLeft[0])) shoot = 1 rawInputs = np.array(np.sum(stateImg, axis=2) / 3) input_buff[:] = np.ndarray.flatten(rawInputs) input_buff = input_buff - np.mean(input_buff) input_buff = input_buff / np.sqrt(np.var(input_buff)) # we want the reflex to be delayed wrt to the image input, so that the image is. Otherwise the learning can # never reduce the error to zero no matter how good the controller. oldDelta = delta if (iter > 2): delta = (float(colourSteer) - float(imgCentre[0])) / float(width) else: delta = 0 deltaDiff = delta - oldDelta if (iter > 2): if (np.abs(delta) > 0.01): shoot = 0 netErr[:] = delta target_buff[...] = delta + netOut meas_buff[0, :] = meas FCLNet.setLearningRate(0.) FCLNet.doStep(input_buff, netErr) netOut = FCLNet.getOutput(0) + 0.3 * FCLNet.getOutput( 1) + 0.1 * FCLNet.getOutput(2) netOut1 = FCLNet.getOutput(3) + 0.3 * FCLNet.getOutput( 4) + 0.1 * FCLNet.getOutput(5) netErr += reflexReduceGain * netGain * (netOut - netOut1) FCLNet.setLearningRate(learningRate) FCLNet.doStep(input_buff, netErr) netOut = FCLNet.getOutput(0) + 0.3 * FCLNet.getOutput( 1) + 0.1 * FCLNet.getOutput(2) netOut1 = FCLNet.getOutput(3) + 0.3 * FCLNet.getOutput( 4) + 0.1 * FCLNet.getOutput(5) # print("%s" % (" SHOOT " if shoot == 1 else " "), deltaDiff, delta, netOut) for i in range(FCLNet.getNumLayers()): wtDist[i] = FCLNet.getLayer( i).getWeightDistanceFromInitialWeights() print(curr_step, delta, netErr[0], netOut - netOut1, health, file=outFile) print(' '.join(map(str, wtDist)), file=wtdistFile) diff_theta = 0.6 * max(min((icoInSteer), 5.), -5.) netErr[:] = 0. diff_theta = diff_theta + reflexGain * colourStrength * delta curr_act = np.zeros(7).tolist() curr_act[0] = 0 curr_act[1] = 0 curr_act[2] = shoot curr_act[3] = curr_act[3] + diff_z curr_act[4] = 0 curr_act[5] = 0. curr_act[6] = diff_theta + netGain * (netOut - netOut1) iter += 1 if (curr_step % epoch == 0): # uncomment to write models to file """ if not os.path.exists("Models"): os.makedirs("Models") FCLNet.saveModel("Models/BP-" + str(curr_step) + ".txt") file = open("Models/checkpoint", 'w') file.write("Models/BP-" + str(curr_step) + ".txt") file.close() """ img, meas, rwrd, term = simulator.step(curr_act) if (not (meas is None)) and meas[0] > 30.: meas[0] = 30. if not term: img_buffer[curr_step % historyLen] = img meas_buffer[curr_step % historyLen] = meas act_buffer[curr_step % historyLen] = curr_act[:7] curr_step += 1 simulator.close_game() outFile.close() wtdistFile.close()
def main(): ## Simulator simulator_args = {} simulator_args['config'] = 'config/config.cfg' simulator_args['resolution'] = (widthIn, heightIn) simulator_args['frame_skip'] = 1 simulator_args['color_mode'] = 'RGB24' simulator_args['game_args'] = "+name ICO +colorset 7" ## Agent agent_args = {} # preprocessing preprocess_input_images = lambda x: x / 255. - 0.5 agent_args['preprocess_input_images'] = lambda x: x / 255. - 0.5 agent_args['preprocess_input_measurements'] = lambda x: x / 100. - 0.5 agent_args['num_future_steps'] = 6 pred_scale_coeffs = np.expand_dims( (np.expand_dims(np.array([8., 40., 1.]), 1) * np.ones( (1, agent_args['num_future_steps']))).flatten(), 0) agent_args['meas_for_net_init'] = range(3) agent_args['meas_for_manual_init'] = range(3, 16) agent_args['resolution'] = (width, height) # just use grayscale for nnet inputs agent_args['num_channels'] = 1 # net parameters agent_args['conv_params'] = np.array([(16, 5, 4), (32, 3, 2), (64, 3, 2), (128, 3, 2)], dtype=[('out_channels', int), ('kernel', int), ('stride', int)]) agent_args['fc_img_params'] = np.array([(128, )], dtype=[('out_dims', int)]) agent_args['fc_meas_params'] = np.array([(128, ), (128, ), (128, )], dtype=[('out_dims', int)]) agent_args['fc_joint_params'] = np.array([(256, ), (256, ), (-1, )], dtype=[('out_dims', int)]) agent_args['target_dim'] = agent_args['num_future_steps'] * len( agent_args['meas_for_net_init']) agent_args['n_actions'] = 7 # experiment arguments agent_args['test_objective_params'] = (np.array([5, 11, 17]), np.array([1., 1., 1.])) agent_args['history_length'] = 3 agent_args['history_length_ico'] = 3 historyLen = agent_args['history_length'] print("HistoryLen: ", historyLen) print('starting simulator') simulator = DoomSimulator(simulator_args) num_channels = simulator.num_channels print('started simulator') agent_args['state_imgs_shape'] = (historyLen * num_channels, simulator.resolution[1], simulator.resolution[0]) agent_args['n_ffnet_inputs'] = 2 * (agent_args['resolution'][0] * agent_args['resolution'][1]) agent_args['n_ffnet_hidden'] = np.array([50, 5]) agent_args['n_ffnet_outputs'] = 1 agent_args['n_ffnet_act'] = 7 agent_args['n_ffnet_meas'] = simulator.num_meas agent_args['learning_rate'] = 1E-4 if 'meas_for_net_init' in agent_args: agent_args['meas_for_net'] = [] for ns in range(historyLen): agent_args['meas_for_net'] += [ i + simulator.num_meas * ns for i in agent_args['meas_for_net_init'] ] agent_args['meas_for_net'] = np.array(agent_args['meas_for_net']) else: agent_args['meas_for_net'] = np.arange(historyLen * simulator.num_meas) if len(agent_args['meas_for_manual_init']) > 0: agent_args['meas_for_manual'] = np.array([ i + simulator.num_meas * (historyLen - 1) for i in agent_args['meas_for_manual_init'] ]) # current timestep is the last in the stack else: agent_args['meas_for_manual'] = [] agent_args['state_meas_shape'] = (len(agent_args['meas_for_net']), ) # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) # sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) # agent = Agent(sess, agent_args) # agent.load('/home/paul/Dev/GameAI/vizdoom_cig2017/icolearner/ICO1/checkpoints/ICO-8600') # print("model loaded..") # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) # sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) img_buffer = np.zeros((historyLen, simulator.resolution[1], simulator.resolution[0], num_channels), dtype='uint8') meas_buffer = np.zeros((historyLen, simulator.num_meas)) act_buffer = np.zeros((historyLen, 7)) act_buffer_ico = np.zeros((agent_args['history_length_ico'], 7)) curr_step = 0 old_step = -1 term = False print("state_meas_shape: ", meas_buffer.shape, " == ", agent_args['state_meas_shape']) print("act_buffer_shape: ", act_buffer.shape) # ag = Agent(agent_args) diff_y = 0 diff_x = 0 diff_z = 0 diff_theta = 0 iter = 1 epoch = 200 radialFlowLeft = 30. radialFlowRight = 30. radialFlowInertia = 0.4 radialGain = 4. rotationGain = 50. errorThresh = 10. updatePtsFreq = 50 skipImage = 1 skipImageICO = 5 reflexGain = 0.01 oldHealth = 0. # create masks for left and right visual fields - note that these only cover the upper half of the image # this is to help prevent the tracking getting confused by the floor pattern half_height = round(height / 2) half_width = round(width / 2) maskLeft = np.zeros([height, width], np.uint8) maskLeft[half_height:, :half_width] = 1. maskRight = np.zeros([height, width], np.uint8) maskRight[half_height:, half_width:] = 1. netErr = np.zeros((width, height)) # deepIcoEfference = Deep_ICO(simulator_args['resolution'][0] * simulator_args['resolution'][1] + 7, 10, 1) nh = np.asarray([36, 36]) # deepIcoEfference = Deep_ICO_Conv(1, [1], 1, Deep_ICO_Conv.conv) # deepIcoEfference = Deep_ICO_Conv(simulator_args['resolution'][0] * simulator_args['resolution'][1] + 7, # nh, simulator_args['resolution'][0] * simulator_args['resolution'][1], Deep_ICO_Conv.conv) # deepIcoEfference.setLearningRate(0.01) # deepIcoEfference.setAlgorithm(Deep_ICO.backprop) # print ("Model type: ", "ff" if deepIcoEfference.getModelType() == 0 else "conv") # deepIcoEfference.initWeights(1 / (np.sqrt(float(simulator_args['resolution'][0] * simulator_args['resolution'][1] + 7)))) # deepIcoEfference.initWeights(0.0) outputImage = np.zeros(simulator_args['resolution'][0] * simulator_args['resolution'][1]) imageDiff = np.zeros(simulator_args['resolution'][0] * simulator_args['resolution'][1]) outputArray = np.zeros(1) #deepIcoEfference.getNoutputs()) lk_params = dict(winSize=(15, 15), maxLevel=2, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)) feature_params = dict(maxCorners=500, qualityLevel=0.03, minDistance=7, blockSize=7) imgCentre = np.array([ simulator_args['resolution'][0] / 2, simulator_args['resolution'][1] / 2 ]) print("Image centre: ", imgCentre) simpleInputs1 = np.zeros((width, height)) simpleInputs2 = np.zeros((width, height)) input_buff = np.zeros((1, width * height)) target_buff = np.zeros((1, 1)) t = threading.Thread(target=plotWeights) t.start() while not term: if curr_step < historyLen: curr_act = np.zeros(7).tolist() img, meas, rwrd, term = simulator.step(curr_act) print("Image: ", img.shape, " max: ", np.amax(img), " min: ", np.amin(img)) if curr_step == 0: p0Left = cv2.goodFeaturesToTrack(img[:, :, 0], mask=maskLeft, **feature_params) p0Right = cv2.goodFeaturesToTrack(img[:, :, 0], mask=maskRight, **feature_params) img_buffer[curr_step % historyLen] = img meas_buffer[curr_step % historyLen] = meas act_buffer[curr_step % historyLen] = curr_act[:7] else: img1 = img_buffer[(curr_step - 2) % historyLen, :, :, :] img2 = img_buffer[(curr_step - 1) % historyLen, :, :, :] state = simulator._game.get_state() stateImg = state.screen_buffer greyImg1 = np.sum(img1, axis=0) greyImg2 = cv2.resize(stateImg, (width, height)) greyImg2 = np.array(np.sum(greyImg2, axis=2) / 3, dtype='uint8') if (curr_step % updatePtsFreq == 0): p0Left = cv2.goodFeaturesToTrack(img[:, :, 0], mask=maskLeft, **feature_params) p0Right = cv2.goodFeaturesToTrack(img[:, :, 0], mask=maskRight, **feature_params) p1Left, st, err = cv2.calcOpticalFlowPyrLK(img1[:, :, 0], img2[:, :, 0], p0Left, None, **lk_params) p1Right, st, err = cv2.calcOpticalFlowPyrLK( img1[:, :, 0], img2[:, :, 0], p0Right, None, **lk_params) flowLeft = (p1Left - p0Left)[:, 0, :] flowRight = (p1Right - p0Right)[:, 0, :] radialFlowTmpLeft = 0 radialFlowTmpRight = 0 for i in range(0, len(p0Left)): radialFlowTmpLeft += ((p0Left[i, 0, :] - imgCentre)).dot( flowLeft[i, :]) / float(len(p0Left)) for i in range(0, len(p0Right)): radialFlowTmpRight += ((p0Right[i, 0, :] - imgCentre)).dot( flowRight[i, :]) / float(len(p0Right)) rotation = act_buffer[(curr_step - 1) % historyLen][6] forward = act_buffer[(curr_step - 1) % historyLen][3] # keep separate radial errors for left and right fields radialFlowLeft = radialFlowLeft + radialFlowInertia * ( radialFlowTmpLeft - radialFlowLeft) radialFlowRight = radialFlowRight + radialFlowInertia * ( radialFlowTmpRight - radialFlowRight) expectFlowLeft = radialGain * forward + (rotationGain * rotation if rotation < 0. else 0.) expectFlowRight = radialGain * forward - (rotationGain * rotation if rotation > 0. else 0.) flowErrorLeft = forward * (expectFlowLeft - radialFlowLeft) / ( 1. + rotationGain * np.abs(rotation)) flowErrorRight = forward * (expectFlowRight - radialFlowRight) / ( 1. + rotationGain * np.abs(rotation)) flowErrorLeft = flowErrorLeft if flowErrorLeft > 0. else 0. flowErrorRight = flowErrorRight if flowErrorRight > 0. else 0. icoSteer = 0. if curr_step > 100: health = meas[1] # Don't run any networks when the player is dead! if (health < 101. and health > 0.): #print (curr_step) icoInLeft = (flowErrorLeft - errorThresh) if ( flowErrorLeft - errorThresh) > 0. else 0. / reflexGain icoInRight = (flowErrorRight - errorThresh) if ( flowErrorRight - errorThresh) > 0. else 0. / reflexGain icoInSteer = ((flowErrorRight - errorThresh) if (flowErrorRight - errorThresh) > 0. else 0. / reflexGain - (flowErrorLeft - errorThresh) if (flowErrorLeft - errorThresh) > 0. else 0. / reflexGain) centre1, bottomLeft1, topRight1, colourStrength1 = getMaxColourPos( img1, [255, 0, 0]) centre2, bottomLeft2, topRight2, colourStrength2 = getMaxColourPos( img2, [255, 0, 0]) colourSteer = centre2[0] # get the setpoint in the -.9/+.9 range simpleInputs1[:, :] = 0.1 * np.random.rand(width, height) simpleInputs2[:, :] = 0.1 * np.random.rand(width, height) sp = 1.8 * (colourSteer - imgCentre[0]) / width print("ColourSteer: ", colourSteer, " ColourStrength: ", colourStrength2) if (colourStrength2 > 150.): #print ("ColourSteer: ", colourSteer, " ColourStrength: ", colourStrength) #inputs[colourSteer,:] = colourStrength / 300. simpleInputs2[bottomLeft2[0]:topRight2[0], bottomLeft2[1]:topRight2[1]] = 1. #print(bottomLeft[0], bottomLeft[1], topRight[0], topRight[1], np.sum(inputs)) else: colourStrength2 = 0. sp = 0 if (colourStrength1 > 150.): simpleInputs1[bottomLeft1[0]:topRight1[0], bottomLeft1[1]:topRight1[1]] = 1. netErr[:, :] = 0. #deepBP.doStep(np.ndarray.flatten(inputs), np.ndarray.flatten(netErr)) #icoSteer = deepBP.getOutput(0) #delta = sp - icoSteer delta = 0.06 * colourStrength2 * (colourSteer - imgCentre[0]) / width #delta = 0.6 * max(min((icoInSteer), 5.), -5.) #delta = 1. - icoSteer #input_buff[0,:] = preprocess_input_images(np.ndarray.flatten(img2[2,:,:])) #input_buff[0,:] = np.ndarray.flatten(inputs) #input_buff[0,:] = np.concatenate([np.ndarray.flatten(greyImg1), np.ndarray.flatten(greyImg2)]) greyImg2 = cv2.filter2D(greyImg2, -1, edge) input_buff[0, :] = np.ndarray.flatten( preprocess_input_images(greyImg2)) target_buff[0, 0] = delta if (False): deepBP.setLearningRate(0.) #net_output = np.ndarray.flatten(agent.test_ffnet(input_buff))[0] #else: #net_output = np.ndarray.flatten(agent.learn_ffnet(input_buff, target_buff))[0] netErr[:, :] = delta deepBP.doStep(preprocess_input_images(greyImg2.flatten()), netErr.flatten()) icoSteer = deepBP.getOutput(0) #print ("In ", inputs[colourSteer], "Error: ", netErr[0,0], "Wt ", deepBP.getLayer(0).getNeuron(0).getWeight(int(colourSteer)) # , "WtOut ", deepBP.getLayer(1).getNeuron(0).getWeight(0) #, " Out ", deepBP.getLayer(0).getNeuron(0).getOutput(), " NErr ", deepBP.getLayer(0).getNeuron(0).getError(), " OUT ", 40.*icoSteer #, " OUTErr ", deepBP.getLayer(1).getNeuron(0).getError()) #deepBP.doStep(np.ndarray.flatten(preprocess_input_images(img_buffer[(curr_step - 1) % historyLen, 2, :, :])), np.ndarray.flatten(netErr)) # deepBP.doStep(np.ndarray.flatten(inputs), np.ndarray.flatten(netErr)) #deepBP.doStep(np.ndarray.flatten(preprocess_input_images(img_buffer[(curr_step - 1) % historyLen, 0, :, :])), [0.0001 * colourStrength * (colourSteer - imgCentre[0])]) #deepBP.doStep([(colourSteer - imgCentre[0])/width], [0.0001*colourStrength * (colourSteer - imgCentre[0])]) print(" ** ", curr_step, icoSteer, " ", delta, " ", colourStrength2) #print (colourSteer, " In ", inputs[colourSteer], "Error: ", netErr[0,0], "Wt ", deepBP.getLayer(0).getNeuron(0).getWeight(int(colourSteer)) # , " NOut ", deepBP.getLayer(0).getNeuron(0).getOutput(), " NErr ", deepBP.getLayer(0).getNeuron(0).getError(), " OUT ", 40.*icoSteer # , "OUTRefl ", diff_theta + 0.03 * colourStrength * (colourSteer - imgCentre[0])/width # , " OUTErr ", deepBP.getLayer(1).getNeuron(0).getError()) diff_theta = 0.6 * max(min((icoInSteer), 5.), -5.) diff_theta = diff_theta + 0.01 * colourStrength2 * ( colourSteer - imgCentre[0]) / width diff_theta = diff_theta + 10. * icoSteer #diff_theta = diff_theta + 20. * net_output curr_act = np.zeros(7).tolist() curr_act[0] = 0 curr_act[1] = 0 curr_act[2] = 0 curr_act[3] = curr_act[3] + diff_z curr_act[3] = 0. curr_act[4] = 0 curr_act[5] = 0 curr_act[6] = curr_act[6] + diff_theta oldHealth = health img, meas, rwrd, term = simulator.step(curr_act) if (not (meas is None)) and meas[0] > 30.: meas[0] = 30. if not term: img_buffer[curr_step % historyLen] = img meas_buffer[curr_step % historyLen] = meas act_buffer[curr_step % historyLen] = curr_act[:7] #if curr_step % epoch == 0: # agent.save('/home/paul/Dev/GameAI/vizdoom_cig2017/icolearner/ICO1/checkpoints', curr_step) # np.save('/home/paul/tmp/icoSteer-' + str(curr_step), icoSteer.weights) # np.save('/home/paul/tmp/imageDiff-' + str(curr_step), imageDiff) # np.save('/home/paul/tmp/icoDetect-' + str(curr_step), icoDetect.weights) # icoSteer.saveInputs(curr_step) curr_step += 1 simulator.close_game()
def main(): ## Simulator simulator_args = {} simulator_args['config'] = 'config/config.cfg' simulator_args['resolution'] = (160, 120) simulator_args['frame_skip'] = 1 simulator_args['color_mode'] = 'GRAY' simulator_args['game_args'] = "+name ICO +colorset 7" ## Agent agent_args = {} # preprocessing agent_args['preprocess_input_images'] = lambda x: x / 255. - 0.5 agent_args['preprocess_input_measurements'] = lambda x: x / 100. - 0.5 agent_args['num_future_steps'] = 6 pred_scale_coeffs = np.expand_dims( (np.expand_dims(np.array([8., 40., 1.]), 1) * np.ones( (1, agent_args['num_future_steps']))).flatten(), 0) agent_args['postprocess_predictions'] = lambda x: x * pred_scale_coeffs agent_args['discrete_controls_manual'] = range(6, 12) agent_args['meas_for_net_init'] = range(3) agent_args['meas_for_manual_init'] = range(3, 16) agent_args['opposite_button_pairs'] = [(0, 1), (2, 3)] # net parameters agent_args['conv_params'] = np.array([(16, 5, 4), (32, 3, 2), (64, 3, 2), (128, 3, 2)], dtype=[('out_channels', int), ('kernel', int), ('stride', int)]) agent_args['fc_img_params'] = np.array([(128, )], dtype=[('out_dims', int)]) agent_args['fc_meas_params'] = np.array([(128, ), (128, ), (128, )], dtype=[('out_dims', int)]) agent_args['fc_joint_params'] = np.array([(256, ), (256, ), (-1, )], dtype=[('out_dims', int)]) agent_args['target_dim'] = agent_args['num_future_steps'] * len( agent_args['meas_for_net_init']) # efference copy # experiment arguments agent_args['test_objective_params'] = (np.array([5, 11, 17]), np.array([1., 1., 1.])) agent_args['history_length'] = 3 agent_args['test_checkpoint'] = 'model' print('starting simulator') simulator = DoomSimulator(simulator_args) print('started simulator') agent_args['discrete_controls'] = simulator.discrete_controls agent_args['continuous_controls'] = simulator.continuous_controls agent_args['state_imgs_shape'] = (agent_args['history_length'] * simulator.num_channels, simulator.resolution[1], simulator.resolution[0]) agent_args['n_ffnet_hidden'] = np.array([50, 50]) if 'meas_for_net_init' in agent_args: agent_args['meas_for_net'] = [] for ns in range(agent_args['history_length']): agent_args['meas_for_net'] += [ i + simulator.num_meas * ns for i in agent_args['meas_for_net_init'] ] agent_args['meas_for_net'] = np.array(agent_args['meas_for_net']) else: agent_args['meas_for_net'] = np.arange(agent_args['history_length'] * simulator.num_meas) if len(agent_args['meas_for_manual_init']) > 0: agent_args['meas_for_manual'] = np.array([ i + simulator.num_meas * (agent_args['history_length'] - 1) for i in agent_args['meas_for_manual_init'] ]) # current timestep is the last in the stack else: agent_args['meas_for_manual'] = [] agent_args['state_meas_shape'] = (len(agent_args['meas_for_net']), ) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) img_buffer = np.zeros( (agent_args['history_length'], simulator.num_channels, simulator.resolution[1], simulator.resolution[0])) meas_buffer = np.zeros((agent_args['history_length'], simulator.num_meas)) act_buffer = np.zeros((agent_args['history_length'], 6)) curr_step = 0 term = False print("state_meas_shape: ", meas_buffer.shape, " == ", agent_args['state_meas_shape']) print("act_buffer_shape: ", act_buffer.shape) agent_args['n_ffnet_meas'] = len(np.ndarray.flatten(meas_buffer)) agent_args['n_ffnet_act'] = len(np.ndarray.flatten(act_buffer)) ag = Agent(sess, agent_args) ag.load('./checkpoints') acts_to_replace = [ a + b + d + e for a in [[0, 0], [1, 1]] for b in [[0, 0], [1, 1]] for d in [[0]] for e in [[0], [1]] ] print(acts_to_replace) replacement_act = [0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0] # MOVE_FORWARD MOVE_BACKWARD TURN_LEFT TURN_RIGHT ATTACK SPEED SELECT_WEAPON2 SELECT_WEAPON3 SELECT_WEAPON4 SELECT_WEAPON5 SELECT_WEAPON6 SELECT_WEAPON7 # img, meas, rwrd, term = simulator.step(np.squeeze(ag.random_actions(1)).tolist()) diff_y = 0 diff_x = 0 diff_z = 0 inertia = 0.5 iter = 1 epoch = 200 userdoc = os.path.join(os.path.expanduser("~"), "Documents") while not term: if curr_step < agent_args['history_length']: curr_act = np.squeeze(ag.random_actions(1)).tolist() img, meas, rwrd, term = simulator.step(curr_act) else: state_imgs = np.transpose( np.reshape( img_buffer[np.arange( curr_step - agent_args['history_length'], curr_step) % agent_args['history_length']], (1, ) + agent_args['state_imgs_shape']), [0, 2, 3, 1]) state_meas = np.reshape( meas_buffer[np.arange(curr_step - agent_args['history_length'], curr_step) % agent_args['history_length']], (1, agent_args['history_length'] * simulator.num_meas)) # print ("imgs shape: ", state_imgs.shape, " meas shape: ", state_meas.shape) # print ("flat imgs shape: ", np.ndarray.flatten(state_imgs).shape, " flat meas shape: ", np.ndarray.flatten(state_meas).shape) # print ("meas shape: ", state_meas.shape) curr_act = np.squeeze(ag.random_actions(1)[0]).tolist() if curr_act[:6] in acts_to_replace: curr_act = replacement_act hack = [0] * len(curr_act) hack[6] = diff_x hack[8] = -diff_y * 0.2 hack[3] = 0 # diff_z # hack[6] = 1 # hack[8] = 1 curr_act[2] = 0 curr_act[3] = 10 img, meas, rwrd, term = simulator.step(curr_act) if (not (meas is None)) and meas[0] > 30.: meas[0] = 30. if (not (img is None)): # print ("state_imgs: ", np.shape(state_imgs), "state_meas: ", np.shape(state_meas), "curr_act: ", np.shape(curr_act)) # print ("img type: ", np.ndarray.flatten(ag.preprocess_input_images(img)).dtype, "state_img type: ", state_imgs.dtype, "state_meas type: ", state_meas.dtype) ag.act_ffnet( np.ndarray.flatten(state_imgs), np.ndarray.flatten(state_meas), np.array(np.ndarray.flatten(act_buffer), dtype='float64'), np.ndarray.flatten(ag.preprocess_input_images(img))) diff_image = np.absolute( np.reshape(np.array(ag.ext_ffnet_output), [img.shape[0], img.shape[1]]) - ag.preprocess_input_images(img)) diff_image = np.absolute( ag.preprocess_input_images( img_buffer[(curr_step - 1) % agent_args['history_length']] - ag.preprocess_input_images(img))) diff_image = ag.preprocess_input_images(img) diff_x = diff_x + inertia * ( (np.argmax(diff_image.sum(axis=0)) / float(diff_image.shape[1])) - 0.5 - diff_x) diff_y = diff_x + inertia * ( (np.argmax(diff_image.sum(axis=1)) / float(diff_image.shape[0])) - 0.5 - diff_y) # print ("diff_x: ", diff_x, " diff_y: ", hack[6], "centre_x: ", np.argmax(diff_image.sum(axis=0)), "centre_y: ", np.argmax(diff_image.sum(axis=1))) if (curr_step % epoch == 0): print("saving...") np.save( os.path.join('/home/paul', "hack"), np.reshape(np.array(ag.ext_ffnet_output), [img.shape[0], img.shape[1]])) np.save(os.path.join('/home/paul', "target"), ag.preprocess_input_images(img)) np.save(os.path.join('/home/paul', "diff"), diff_image) diff_x = np.random.normal(0, 2) diff_z = np.random.normal(10, 2) if not term: img_buffer[curr_step % agent_args['history_length']] = img meas_buffer[curr_step % agent_args['history_length']] = meas act_buffer[curr_step % agent_args['history_length']] = curr_act[:6] curr_step += 1 simulator.close_game() ag.save( '/home/paul/Dev/GameAI/vizdoom_cig2017/icolearner/ICO1/checkpoints/' + 'hack-' + str(iter))
def main(): ## Simulator simulator_args = {} simulator_args['config'] = 'config/config.cfg' simulator_args['resolution'] = (width, height) simulator_args['frame_skip'] = 1 simulator_args['color_mode'] = 'RGB24' simulator_args['game_args'] = "+name ICO +colorset 7" ## Agent agent_args = {} # preprocessing preprocess_input_images = lambda x: x / 255. - 0.5 agent_args['preprocess_input_images'] = lambda x: x / 255. - 0.5 agent_args['preprocess_input_measurements'] = lambda x: x / 100. - 0.5 agent_args['num_future_steps'] = 6 pred_scale_coeffs = np.expand_dims( (np.expand_dims(np.array([8., 40., 1.]), 1) * np.ones( (1, agent_args['num_future_steps']))).flatten(), 0) agent_args['meas_for_net_init'] = range(3) agent_args['meas_for_manual_init'] = range(3, 16) agent_args['resolution'] = (width, height) # just use grayscale for nnet inputs agent_args['num_channels'] = 1 # net parameters agent_args['net_type'] = "fc" agent_args['conv_params'] = np.array([(16, 5, 4), (32, 3, 2), (64, 3, 2), (128, 3, 2)], dtype=[('out_channels', int), ('kernel', int), ('stride', int)]) agent_args['fc_img_params'] = np.array([(128, )], dtype=[('out_dims', int)]) agent_args['fc_meas_params'] = np.array([(128, ), (128, ), (128, )], dtype=[('out_dims', int)]) agent_args['fc_joint_params'] = np.array([(256, ), (256, ), (-1, )], dtype=[('out_dims', int)]) agent_args['target_dim'] = agent_args['num_future_steps'] * len( agent_args['meas_for_net_init']) agent_args['n_actions'] = 7 # experiment arguments agent_args['test_objective_params'] = (np.array([5, 11, 17]), np.array([1., 1., 1.])) agent_args['history_length'] = 3 agent_args['history_length_ico'] = 3 historyLen = agent_args['history_length'] print("HistoryLen: ", historyLen) print('starting simulator') simulator = DoomSimulator(simulator_args) num_channels = simulator.num_channels print('started simulator') agent_args['state_imgs_shape'] = (historyLen * num_channels, simulator.resolution[1], simulator.resolution[0]) agent_args['n_ffnet_input'] = (agent_args['resolution'][0] * agent_args['resolution'][1]) agent_args['n_ffnet_hidden'] = np.array([50, 5]) agent_args['n_ffnet_output'] = 1 agent_args['n_ffnet_act'] = 7 agent_args['n_ffnet_meas'] = simulator.num_meas agent_args['learning_rate'] = 1E-3 modelDir = os.path.join(os.path.expanduser("~"), "Dev/GameAI/vizdoom_cig2017/icodoom/ICO1/Models") if 'meas_for_net_init' in agent_args: agent_args['meas_for_net'] = [] for ns in range(historyLen): agent_args['meas_for_net'] += [ i + simulator.num_meas * ns for i in agent_args['meas_for_net_init'] ] agent_args['meas_for_net'] = np.array(agent_args['meas_for_net']) else: agent_args['meas_for_net'] = np.arange(historyLen * simulator.num_meas) if len(agent_args['meas_for_manual_init']) > 0: agent_args['meas_for_manual'] = np.array([ i + simulator.num_meas * (historyLen - 1) for i in agent_args['meas_for_manual_init'] ]) # current timestep is the last in the stack else: agent_args['meas_for_manual'] = [] agent_args['state_meas_shape'] = (len(agent_args['meas_for_net']), ) # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) # sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) # agent = Agent(sess, agent_args) # agent.load('/home/paul/Dev/GameAI/vizdoom_cig2017/icolearner/ICO1/checkpoints/ICO-8600') # print("model loaded..") # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) # sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) img_buffer = np.zeros((historyLen, simulator.resolution[1], simulator.resolution[0], num_channels), dtype='uint8') meas_buffer = np.zeros((historyLen, simulator.num_meas)) act_buffer = np.zeros((historyLen, 7)) act_buffer_ico = np.zeros((agent_args['history_length_ico'], 7)) curr_step = 0 old_step = -1 term = False print("state_meas_shape: ", meas_buffer.shape, " == ", agent_args['state_meas_shape']) print("act_buffer_shape: ", act_buffer.shape) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) ag = Agent(sess, agent_args) if (os.path.isfile("checkpoints1/checkpoint")): ag.load( '/home/paul/Dev/GameAI/vizdoom_cig2017/icodoom/ICO1/checkpoints1/') print("model loaded..") else: print("No model file, initialising...") diff_y = 0 diff_x = 0 diff_z = 0 diff_theta = 0 epoch = 200 radialFlowLeft = 30. radialFlowRight = 30. radialFlowInertia = 0.4 radialGain = 4. rotationGain = 50. errorThresh = 10. updatePtsFreq = 50 skipImage = 1 skipImageICO = 5 reflexGain = 0.1 netGain = 0. #10. oldHealth = 0. # create masks for left and right visual fields - note that these only cover the upper half of the image # this is to help prevent the tracking getting confused by the floor pattern half_height = round(height / 2) half_width = round(width / 2) maskLeft = np.zeros([height, width], np.uint8) maskLeft[half_height:, :half_width] = 1. maskRight = np.zeros([height, width], np.uint8) maskRight[half_height:, half_width:] = 1. lk_params = dict(winSize=(15, 15), maxLevel=2, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)) feature_params = dict(maxCorners=500, qualityLevel=0.03, minDistance=7, blockSize=7) imgCentre = np.array([ int(simulator_args['resolution'][0] / 2), int(simulator_args['resolution'][1] / 2) ]) print("Image centre: ", imgCentre) simpleInputs = np.zeros((width, height)) input_buff = np.zeros((1, width * height)) target_buff = np.zeros((1, 1)) meas_buff = np.zeros((1, simulator.num_meas)) netOut = 0. netErr = np.zeros((width, height)) delta = 0. delta2 = 0 dontshoot = 1 deltaZeroCtr = 1 curr_act = np.zeros(7).tolist() reflexOn = False iter = 0 episodes = 1000 simulator._game.init() for i in range(episodes): # print ("Episode ", i) tc = 0 simulator._game.new_episode() while (tc < 500): screen_buf, meas, rwrd, term = simulator.step(curr_act) if (screen_buf is None): break midlinex = int(width / 2) midliney = int(height * 0.75) crcb = screen_buf screen_left = screen_buf[0:midliney, 0:midlinex, 2] screen_right = screen_buf[0:midliney, midlinex:width, 2] screen_left = cv2.filter2D(screen_left, -1, sharpen) screen_right = cv2.filter2D(screen_right, -1, sharpen) simpleInputs = preprocess_input_images( np.array(np.sum(crcb, axis=2) / 3)) # simpleInputs = cv2.filter2D(simpleInputs, -1, edge) # simpleInputs = simpleInputs - np.mean(simpleInputs) screen_diff = screen_left - np.fliplr(screen_right) screen_diff = cv2.resize(screen_diff, (width, height)) # cv2.imwrite('/tmp/left.png',screen_left) # cv2.imwrite('/tmp/right.png',screen_right) # cv2.imwrite("/home/paul/tmp/Images/diff-" + str(iter) + ".png", screen_diff) # cv2.imwrite("/home/paul/tmp/Images/raw-" + str(iter) + ".png", crcb) lavg = np.average(screen_left) ravg = np.average(screen_right) shoot = 0 if (dontshoot > 1): dontshoot = dontshoot - 1 else: if (tc > 30): shoot = 1 dontshoot = 5 centre, bottomLeft, topRight, colourStrength = getMaxColourPos( crcb, [255, 0, 0]) colourSteer = imgCentre[0] imgRect = np.zeros(crcb.shape) if (len(bottomLeft) > 0 and len(topRight) > 0 and ((topRight[0] - bottomLeft[0]) < width / 3) and ((topRight[1] - bottomLeft[1]) < height / 2)): colourSteer = bottomLeft[0] + int( 0.5 * (topRight[0] - bottomLeft[0])) cv2.arrowedLine(imgRect, (colourSteer, imgCentre[1] + 10), (colourSteer, imgCentre[1]), color=(255, 255, 255), thickness=2) # cv2.imwrite("/home/paul/tmp/Images/simple-" + str(iter) + ".jpg", simpleInputs) # cv2.imwrite("/home/paul/tmp/Images/rect-" + str(iter) + ".jpg", imgRect) # cv2.imwrite("/home/paul/tmp/Images/" + str(iter) + ".jpg", crcb) # cv2.imwrite("/home/paul/tmp/Images/Positive/arrow-" + str(iter) + ".jpg", imgRect) # cv2.imwrite("/home/paul/tmp/Images/Positive/" + str(iter) + ".jpg", crcb) # blue = cv2.filter2D(blue, -1, edge) # cv2.imwrite("/home/paul/tmp/Images/Positive/blue-" + str(curr_step) + ".jpg", blue) meas_buff[0, :] = meas imgRect = np.array(np.sum(imgRect, axis=2) / 3) # input_buff[0,:] = np.ndarray.flatten(imgRect) input_buff[0, :] = np.ndarray.flatten(imgRect) # input_buff[0,:] = np.random.normal(0.0, 0.01, size=width*height) # print("mean: ", np.mean(input_buff[0,:]), " var: ", np.var(input_buff[0,:])) if (tc > 2): delta = (float(colourSteer) - float(imgCentre[0])) / float(width) else: delta = 0 target_buff[...] = delta + netOut target_buff[...] = -0.5 ag.act_fcnet(input_buff, meas, target_buff) netOut = np.ndarray.flatten(ag.ext_fcnet_output)[0].flatten()[0] netErr[:, :] = 0. diff_theta = reflexGain * delta # print(tc, diff_theta, netGain*netOut, target_buff[0,0], delta) print(tc, reflexGain * delta, netOut) curr_act = np.zeros(7).tolist() curr_act[0] = 0 curr_act[1] = 0 curr_act[2] = 1 curr_act[3] = 0. #curr_act[3] + diff_z curr_act[3] = 0. curr_act[4] = 0 curr_act[5] = 0 curr_act[6] = diff_theta + netGain * netOut iter += 1 if (curr_step % epoch == 0): ag.save( '/home/paul/Dev/GameAI/vizdoom_cig2017/icodoom/ICO1/checkpoints1/BPBasic', curr_step) curr_step += 1 # 30 fps # time.sleep(0.03) tc += 1 simulator.close_game()
def main(): ## Simulator simulator_args = {} simulator_args['config'] = 'config/config.cfg' simulator_args['resolution'] = (widthIn, heightIn) simulator_args['frame_skip'] = 1 simulator_args['color_mode'] = 'RGB24' simulator_args['game_args'] = "+name ICO +colorset 7" ## Agent agent_args = {} # preprocessing preprocess_input_images = lambda x: x / 255. - 0.5 agent_args['preprocess_input_images'] = lambda x: x / 255. - 0.5 agent_args['preprocess_input_measurements'] = lambda x: x / 100. - 0.5 agent_args['num_future_steps'] = 6 pred_scale_coeffs = np.expand_dims( (np.expand_dims(np.array([8., 40., 1.]), 1) * np.ones( (1, agent_args['num_future_steps']))).flatten(), 0) agent_args['meas_for_net_init'] = range(3) agent_args['meas_for_manual_init'] = range(3, 16) agent_args['resolution'] = (width, height) # just use grayscale for nnet inputs agent_args['num_channels'] = 1 # net parameters agent_args['net_type'] = "fc" # agent_args['net_type'] = "conv" agent_args['conv_params'] = np.array([(16, 5, 4), (32, 3, 2), (64, 3, 2), (128, 3, 2)], dtype=[('out_channels', int), ('kernel', int), ('stride', int)]) agent_args['fc_img_params'] = np.array([(128, )], dtype=[('out_dims', int)]) agent_args['fc_meas_params'] = np.array([(128, ), (128, ), (128, )], dtype=[('out_dims', int)]) agent_args['fc_joint_params'] = np.array([(256, ), (256, ), (-1, )], dtype=[('out_dims', int)]) agent_args['target_dim'] = agent_args['num_future_steps'] * len( agent_args['meas_for_net_init']) agent_args['n_actions'] = 7 # experiment arguments agent_args['test_objective_params'] = (np.array([5, 11, 17]), np.array([1., 1., 1.])) agent_args['history_length'] = 3 agent_args['history_length_ico'] = 3 historyLen = agent_args['history_length'] print("HistoryLen: ", historyLen) print('starting simulator') simulator = DoomSimulator(simulator_args) num_channels = simulator.num_channels print('started simulator') agent_args['state_imgs_shape'] = (historyLen * num_channels, simulator.resolution[1], simulator.resolution[0]) agent_args['n_ffnet_input'] = (agent_args['resolution'][0] * agent_args['resolution'][1]) agent_args['n_ffnet_hidden'] = np.array([50, 5]) agent_args['n_ffnet_output'] = 1 agent_args['n_ffnet_act'] = 7 agent_args['n_ffnet_meas'] = simulator.num_meas agent_args['learning_rate'] = 1E-4 modelDir = os.path.join(os.path.expanduser("~"), "Dev/GameAI/vizdoom_cig2017/icodoom/ICO1/Models") if 'meas_for_net_init' in agent_args: agent_args['meas_for_net'] = [] for ns in range(historyLen): agent_args['meas_for_net'] += [ i + simulator.num_meas * ns for i in agent_args['meas_for_net_init'] ] agent_args['meas_for_net'] = np.array(agent_args['meas_for_net']) else: agent_args['meas_for_net'] = np.arange(historyLen * simulator.num_meas) if len(agent_args['meas_for_manual_init']) > 0: agent_args['meas_for_manual'] = np.array([ i + simulator.num_meas * (historyLen - 1) for i in agent_args['meas_for_manual_init'] ]) # current timestep is the last in the stack else: agent_args['meas_for_manual'] = [] agent_args['state_meas_shape'] = (len(agent_args['meas_for_net']), ) img_buffer = np.zeros((historyLen, simulator.resolution[1], simulator.resolution[0], num_channels), dtype='uint8') meas_buffer = np.zeros((historyLen, simulator.num_meas)) act_buffer = np.zeros((historyLen, 7)) curr_step = 0 term = False print("state_meas_shape: ", meas_buffer.shape, " == ", agent_args['state_meas_shape']) print("act_buffer_shape: ", act_buffer.shape) try: checkpointFile = open("Models/checkpoint") try: modelName = checkpointFile.read().splitlines() if (deepBP.loadModel(modelName[0])): print("loaded from Model file: ", modelName[0]) else: print("FAILED loading from Model file: ", modelName[0]) except: print("Checkpoint file contains no valid model") finally: checkpointFile.close except Exception: print("No checkpoint found...") diff_z = 0 iter = 1 epoch = 200 radialFlowLeft = 30. radialFlowRight = 30. radialFlowInertia = 0.4 radialGain = 4. rotationGain = 50. errorThresh = 10. updatePtsFreq = 50 reflexGain = 1E-3 flowGain = 0. netGain = 10. reflexReduceGain = -0.01 # create masks for left and right visual fields - note that these only cover the upper half of the image # this is to help prevent the tracking getting confused by the floor pattern half_height = round(height / 2) half_width = round(width / 2) maskLeft = np.zeros([height, width], np.uint8) maskLeft[half_height:, :half_width] = 1. maskRight = np.zeros([height, width], np.uint8) maskRight[half_height:, half_width:] = 1. lk_params = dict(winSize=(15, 15), maxLevel=2, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)) feature_params = dict(maxCorners=500, qualityLevel=0.03, minDistance=7, blockSize=7) imgCentre = np.array([ int(simulator_args['resolution'][0] / 2), int(simulator_args['resolution'][1] / 2) ]) print("Image centre: ", imgCentre) rawInputs = np.zeros((height, width)) cheatInputs = np.zeros((width, height)) input_buff = np.zeros((width * height)) target_buff = np.zeros((1, 1)) meas_buff = np.zeros((1, simulator.num_meas)) netOut = 0. netErr = np.zeros(nHidden[0]) delta = 0. shoot = 0 reflexOn = False iter = 0 killed = False deepBP.saveModel("Models/hack.txt") while not term: if curr_step < historyLen: curr_act = np.zeros(7).tolist() img, meas, rwrd, term = simulator.step(curr_act) print("Image: ", img.shape, " max: ", np.amax(img), " min: ", np.amin(img)) if curr_step == 0: p0Left = cv2.goodFeaturesToTrack(img[:, :, 0], mask=maskLeft, **feature_params) p0Right = cv2.goodFeaturesToTrack(img[:, :, 0], mask=maskRight, **feature_params) img_buffer[curr_step % historyLen] = img meas_buffer[curr_step % historyLen] = meas act_buffer[curr_step % historyLen] = curr_act[:7] else: img1 = img_buffer[(curr_step - 2) % historyLen, :, :, :] img2 = img_buffer[(curr_step - 1) % historyLen, :, :, :] state = simulator._game.get_state() stateImg = state.screen_buffer if (curr_step % updatePtsFreq == 0): p0Left = cv2.goodFeaturesToTrack(img[:, :, 0], mask=maskLeft, **feature_params) p0Right = cv2.goodFeaturesToTrack(img[:, :, 0], mask=maskRight, **feature_params) p1Left, st, err = cv2.calcOpticalFlowPyrLK(img1[:, :, 0], img2[:, :, 0], p0Left, None, **lk_params) p1Right, st, err = cv2.calcOpticalFlowPyrLK( img1[:, :, 0], img2[:, :, 0], p0Right, None, **lk_params) flowLeft = (p1Left - p0Left)[:, 0, :] flowRight = (p1Right - p0Right)[:, 0, :] radialFlowTmpLeft = 0 radialFlowTmpRight = 0 for i in range(0, len(p0Left)): radialFlowTmpLeft += ((p0Left[i, 0, :] - imgCentre)).dot( flowLeft[i, :]) / float(len(p0Left)) for i in range(0, len(p0Right)): radialFlowTmpRight += ((p0Right[i, 0, :] - imgCentre)).dot( flowRight[i, :]) / float(len(p0Right)) rotation = act_buffer[(curr_step - 1) % historyLen][6] forward = act_buffer[(curr_step - 1) % historyLen][3] # keep separate radial errors for left and right fields radialFlowLeft = radialFlowLeft + radialFlowInertia * ( radialFlowTmpLeft - radialFlowLeft) radialFlowRight = radialFlowRight + radialFlowInertia * ( radialFlowTmpRight - radialFlowRight) expectFlowLeft = radialGain * forward + (rotationGain * rotation if rotation < 0. else 0.) expectFlowRight = radialGain * forward - (rotationGain * rotation if rotation > 0. else 0.) flowErrorLeft = forward * (expectFlowLeft - radialFlowLeft) / ( 1. + rotationGain * np.abs(rotation)) flowErrorRight = forward * (expectFlowRight - radialFlowRight) / ( 1. + rotationGain * np.abs(rotation)) flowErrorLeft = flowErrorLeft if flowErrorLeft > 0. else 0. flowErrorRight = flowErrorRight if flowErrorRight > 0. else 0. icoSteer = 0. if curr_step > 100: health = meas[1] if curr_step == 10000: g = open("/home/paul/Dev/GameAI/vizdoom_cig2017/KD.txt", "a") g.write("Learning on\n") g.close() if curr_step < 10000: learningRate = 0. else: learningRate = 1e-3 if (health < 0.1): reflexOn = False iter = 0 if (simulator._game.is_player_dead()) and killed == False: g = open("/home/paul/Dev/GameAI/vizdoom_cig2017/KD.txt", "a") g.write("0\n") g.close() killed = True print("KILLED") if (not (simulator._game.is_player_dead())): killed = False # Don't run any networks when the player is dead! if (health < 101. and health > 0.): icoInSteer = flowGain * ( (flowErrorRight - errorThresh) if (flowErrorRight - errorThresh) > 0. else 0. - flowGain * (flowErrorLeft - errorThresh) if (flowErrorLeft - errorThresh) > 0. else 0.) centre, bottomLeft, topRight, colourStrength = getMaxColourPos( stateImg, [255, 0, 0]) colourSteer = imgCentre[0] if (len(bottomLeft) > 0 and len(topRight) > 0 and ((topRight[0] - bottomLeft[0]) < width / 3) and ((topRight[1] - bottomLeft[1]) < height / 2)): colourSteer = bottomLeft[0] + int( 0.5 * (topRight[0] - bottomLeft[0])) shoot = 1 # cv2.imwrite("/home/paul/tmp/Backup/rect-" + str(curr_step) + ".jpg", cheatInputs) rawInputs = np.array(np.sum(stateImg, axis=2) / 3) # cv2.imwrite("/home/paul/tmp/Backup/raw-" + str(curr_step) + ".jpg", rawInputs) input_buff[:] = np.ndarray.flatten(rawInputs) input_buff = input_buff - np.mean(input_buff) input_buff = input_buff / np.sqrt(np.var(input_buff)) # we want the reflex to be delayed wrt to the image input, so that the image is. Otherwise the learning can # never reduce the error to zero no matter how good the controller. oldDelta = delta if (iter > 2): delta = (float(colourSteer) - float(imgCentre[0])) / float(width) else: delta = 0 deltaDiff = delta - oldDelta if (iter > 2): if (np.abs(delta) > 0.01): shoot = 0 netErr[:] = delta target_buff[...] = delta + netOut meas_buff[0, :] = meas if (deepBP.getAlgorithm() == DeepFeedbackLearning.backprop ): netErr = netErr[0:1] deepBP.setLearningRate(0.) deepBP.doStep(np.ndarray.flatten(input_buff), netErr) netOut = deepBP.getOutput(0) netErr += reflexReduceGain * netGain * netOut deepBP.setLearningRate(learningRate) deepBP.doStep(np.ndarray.flatten(input_buff), netErr) netOut = deepBP.getOutput(0) # print("%s" % (" SHOOT " if shoot == 1 else " "), deltaDiff, delta, netOut) print(curr_step, delta, netGain * netOut) diff_theta = 0.6 * max(min((icoInSteer), 5.), -5.) netErr[:] = 0. diff_theta = diff_theta + reflexGain * colourStrength * delta # diff_z = -1. curr_act = np.zeros(7).tolist() curr_act[0] = 0 curr_act[1] = 0 curr_act[2] = shoot curr_act[3] = curr_act[3] + diff_z curr_act[4] = 0 curr_act[5] = 0. curr_act[6] = diff_theta + netGain * netOut iter += 1 if (curr_step % epoch == 0): if not os.path.exists("Models"): os.makedirs("Models") deepBP.saveModel("Models/BP-" + str(curr_step) + ".txt") file = open("Models/checkpoint", 'w') file.write("Models/BP-" + str(curr_step) + ".txt") file.close() img, meas, rwrd, term = simulator.step(curr_act) if (not (meas is None)) and meas[0] > 30.: meas[0] = 30. if not term: img_buffer[curr_step % historyLen] = img meas_buffer[curr_step % historyLen] = meas act_buffer[curr_step % historyLen] = curr_act[:7] curr_step += 1 simulator.close_game()