Exemple #1
0
def main():
    ## Simulator
    simulator_args = {}
    simulator_args['config'] = 'config/config.cfg'
    simulator_args['resolution'] = (widthIn,heightIn)
    simulator_args['frame_skip'] = 1
    simulator_args['color_mode'] = 'RGB24'
    simulator_args['game_args'] = "+name ICO +colorset 7"

    ## Agent
    agent_args = {}

    # preprocessing
    preprocess_input_images = lambda x: x / 255. - 0.5
    agent_args['preprocess_input_images'] = lambda x: x / 255. - 0.5
    agent_args['preprocess_input_measurements'] = lambda x: x / 100. - 0.5
    agent_args['num_future_steps'] = 6
    pred_scale_coeffs = np.expand_dims(
        (np.expand_dims(np.array([8., 40., 1.]), 1) * np.ones((1, agent_args['num_future_steps']))).flatten(), 0)
    agent_args['meas_for_net_init'] = range(3)
    agent_args['meas_for_manual_init'] = range(3, 16)
    agent_args['resolution'] = (width,height)
    # just use grayscale for nnet inputs
    agent_args['num_channels'] = 1


    # net parameters
    agent_args['net_type'] = "fc"
#    agent_args['net_type'] = "conv"
    agent_args['conv_params'] = np.array([(16, 5, 4), (32, 3, 2), (64, 3, 2), (128, 3, 2)],
                                         dtype=[('out_channels', int), ('kernel', int), ('stride', int)])
    agent_args['fc_img_params'] = np.array([(128,)], dtype=[('out_dims', int)])
    agent_args['fc_meas_params'] = np.array([(128,), (128,), (128,)], dtype=[('out_dims', int)])
    agent_args['fc_joint_params'] = np.array([(256,), (256,), (-1,)], dtype=[('out_dims', int)])
    agent_args['target_dim'] = agent_args['num_future_steps'] * len(agent_args['meas_for_net_init'])
    agent_args['n_actions'] = 7

    # experiment arguments
    agent_args['test_objective_params'] = (np.array([5, 11, 17]), np.array([1., 1., 1.]))
    agent_args['history_length'] = 3
    agent_args['history_length_ico'] = 3
    historyLen = agent_args['history_length']
    print ("HistoryLen: ", historyLen)

    print('starting simulator')
    simulator = DoomSimulator(simulator_args)
    num_channels = simulator.num_channels

    print('started simulator')

    agent_args['state_imgs_shape'] = (
    historyLen * num_channels, simulator.resolution[1], simulator.resolution[0])

    agent_args['n_ffnet_input'] = (agent_args['resolution'][0]*agent_args['resolution'][1])
    agent_args['n_ffnet_hidden'] = np.array([50,5])
    agent_args['n_ffnet_output'] = 1
    agent_args['n_ffnet_act'] = 7
    agent_args['n_ffnet_meas'] = simulator.num_meas
    agent_args['learning_rate'] = 1E-4

    modelDir = os.path.join(os.path.expanduser("~"), "Dev/GameAI/vizdoom_cig2017/icodoom/ICO1/Models")

    if 'meas_for_net_init' in agent_args:
        agent_args['meas_for_net'] = []
        for ns in range(historyLen):
            agent_args['meas_for_net'] += [i + simulator.num_meas * ns for i in agent_args['meas_for_net_init']]
        agent_args['meas_for_net'] = np.array(agent_args['meas_for_net'])
    else:
        agent_args['meas_for_net'] = np.arange(historyLen * simulator.num_meas)
    if len(agent_args['meas_for_manual_init']) > 0:
        agent_args['meas_for_manual'] = np.array([i + simulator.num_meas * (historyLen - 1) for i in
                                                  agent_args[
                                                      'meas_for_manual_init']])  # current timestep is the last in the stack
    else:
        agent_args['meas_for_manual'] = []

    agent_args['state_meas_shape'] = (len(agent_args['meas_for_net']),)

#    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
#    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))

#    agent = Agent(sess, agent_args)
#    agent.load('/home/paul/Dev/GameAI/vizdoom_cig2017/icolearner/ICO1/checkpoints/ICO-8600')
#    print("model loaded..")

    #    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
#    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))

    img_buffer = np.zeros(
        (historyLen, simulator.resolution[1], simulator.resolution[0], num_channels), dtype='uint8')

    meas_buffer = np.zeros((historyLen, simulator.num_meas))
    act_buffer = np.zeros((historyLen, 7))
    act_buffer_ico = np.zeros((agent_args['history_length_ico'], 7))
    curr_step = 0
    old_step = -1
    term = False

    print ("state_meas_shape: ", meas_buffer.shape, " == ", agent_args['state_meas_shape'])
    print ("act_buffer_shape: ", act_buffer.shape)

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,log_device_placement=False))
    ag = Agent(sess, agent_args)

    if (os.path.isfile("checkpoints/checkpoint")):
        ag.load('/home/paul/Dev/GameAI/vizdoom_cig2017/icodoom/ICO1/checkpoints/')
        print("model loaded..")
    else:
        print ("No model file, initialising...")


    diff_y = 0
    diff_x = 0
    diff_z = 0
    diff_theta = 0
    iter = 1
    epoch = 200
    radialFlowLeft = 30.
    radialFlowRight = 30.
    radialFlowInertia = 0.4
    radialGain = 4.
    rotationGain = 50.
    errorThresh = 10.
    updatePtsFreq = 50
    skipImage = 1
    skipImageICO = 5
    reflexGain = 1E-4
    flowGain = 0.
    netGain = 10.
    oldHealth = 0.

    # create masks for left and right visual fields - note that these only cover the upper half of the image
    # this is to help prevent the tracking getting confused by the floor pattern
    half_height = round(height/2)
    half_width = round(width/2)

    maskLeft = np.zeros([height, width], np.uint8)
    maskLeft[half_height:, :half_width] = 1.
    maskRight = np.zeros([height, width], np.uint8)
    maskRight[half_height:, half_width:] = 1.

    lk_params = dict(winSize=(15, 15), maxLevel=2, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))
    feature_params = dict(maxCorners=500, qualityLevel=0.03, minDistance=7, blockSize=7)

    imgCentre = np.array([int(simulator_args['resolution'][0] / 2), int(simulator_args['resolution'][1] /2)])
    print ("Image centre: ", imgCentre)
    rawInputs = np.zeros((height, width))
    cheatInputs = np.zeros((width, height))
    input_buff = np.zeros((1,width*height))
    target_buff = np.zeros((1,1))
    meas_buff = np.zeros((1,simulator.num_meas))
    netOut = 0.
    netErr = np.zeros((width,height))
    delta = 0.
    shoot = 0

    reflexOn = False
    iter = 0

    while not term:
        if curr_step < historyLen:
            curr_act = np.zeros(7).tolist()
            img, meas, rwrd, term = simulator.step(curr_act)
            print("Image: ", img.shape, " max: ", np.amax(img), " min: ", np.amin(img))

            if curr_step == 0:
                p0Left = cv2.goodFeaturesToTrack(img[:,:,0], mask=maskLeft, **feature_params)
                p0Right = cv2.goodFeaturesToTrack(img[:,:,0], mask=maskRight, **feature_params)

            img_buffer[curr_step % historyLen] = img
            meas_buffer[curr_step % historyLen] = meas
            act_buffer[curr_step % historyLen] = curr_act[:7]

        else:
            img1 = img_buffer[(curr_step-2) % historyLen,:,:,:]
            img2 = img_buffer[(curr_step-1) % historyLen,:,:,:]
            state = simulator._game.get_state()

            stateImg = state.screen_buffer

            if(curr_step % updatePtsFreq == 0):
                p0Left = cv2.goodFeaturesToTrack(img[:,:,0], mask=maskLeft, **feature_params)
                p0Right = cv2.goodFeaturesToTrack(img[:,:,0], mask=maskRight, **feature_params)

            p1Left, st, err = cv2.calcOpticalFlowPyrLK(img1[:,:,0], img2[:,:,0], p0Left, None, **lk_params)
            p1Right, st, err = cv2.calcOpticalFlowPyrLK(img1[:,:,0], img2[:,:,0], p0Right, None, **lk_params)
            flowLeft = (p1Left - p0Left)[:,0,:]
            flowRight = (p1Right - p0Right)[:,0,:]
            radialFlowTmpLeft = 0
            radialFlowTmpRight = 0

            for i in range(0, len(p0Left)):
                radialFlowTmpLeft += ((p0Left[i,0,:] - imgCentre)).dot(flowLeft[i,:]) / float(len(p0Left))
            for i in range(0, len(p0Right)):
                radialFlowTmpRight += ((p0Right[i,0,:] - imgCentre)).dot(flowRight[i,:]) / float(len(p0Right))

            rotation = act_buffer[(curr_step - 1) % historyLen][6]
            forward = act_buffer[(curr_step - 1) % historyLen][3]
            # keep separate radial errors for left and right fields
            radialFlowLeft = radialFlowLeft + radialFlowInertia * (radialFlowTmpLeft - radialFlowLeft)
            radialFlowRight = radialFlowRight + radialFlowInertia * (radialFlowTmpRight - radialFlowRight)
            expectFlowLeft = radialGain * forward + (rotationGain * rotation if rotation < 0. else 0.)
            expectFlowRight = radialGain * forward - (rotationGain * rotation if rotation > 0. else 0.)

            flowErrorLeft = forward * (expectFlowLeft - radialFlowLeft) / (1. + rotationGain * np.abs(rotation))
            flowErrorRight = forward * (expectFlowRight - radialFlowRight) / (1. + rotationGain * np.abs(rotation))
            flowErrorLeft = flowErrorLeft if flowErrorLeft > 0. else 0.
            flowErrorRight = flowErrorRight if flowErrorRight > 0. else 0.
            icoSteer = 0.

            if curr_step > 100:
                health = meas[1]

                if (health<0.1):
                    reflexOn = False
                    iter = 0


                # Don't run any networks when the player is dead!
                if (health < 101. and health > 0.):

                    icoInSteer = flowGain * ((flowErrorRight - errorThresh) if (flowErrorRight - errorThresh) > 0. else 0. -
                    flowGain * (flowErrorLeft - errorThresh) if (flowErrorLeft - errorThresh) > 0. else 0. )

                    centre, bottomLeft, topRight, colourStrength = getMaxColourPos(stateImg, [255, 0, 0])
                    colourSteer = imgCentre[0]
                    cheatInputs = stateImg*1.

                    if(len(bottomLeft)>0 and len(topRight)>0 and ((topRight[0] - bottomLeft[0]) < width/3) and ((topRight[1] - bottomLeft[1]) < height/2)):
                        colourSteer = bottomLeft[0] + int(0.5 * (topRight[0] - bottomLeft[0]))
#                        cv2.imwrite("/home/paul/tmp/Backup/rect-" + str(curr_step) + ".jpg", cheatInputs)

                    cv2.arrowedLine(cheatInputs, (colourSteer, imgCentre[1]+10), (colourSteer, imgCentre[1]), color=(255,255,255), thickness=2)

                    rawInputs = np.array(np.sum(stateImg, axis=2) / 3)
                    cheatInputs = np.array(np.sum(cheatInputs, axis=2) / 3)
#                    cv2.imwrite("/home/paul/tmp/Backup/cheat-" + str(curr_step) + ".jpg", cheatInputs)

                    input_buff[0,:] = np.ndarray.flatten(cheatInputs)
                    input_buff = input_buff - np.mean(input_buff)
                    input_buff = input_buff / np.sqrt(np.var(input_buff))

                    # we want the reflex to be delayed wrt to the image input, so that the image is. Otherwise the learning can
                    # never reduce the error to zero no matter how good the controller.
                    if (iter>2):
                        delta = (float(colourSteer) - float(imgCentre[0]))/float(width)
                    else:
                        delta = 0

                    if(iter>2):
                        if(np.abs(delta) < 0.01):
                            shoot = 1

                    target_buff[...] = delta + netOut
#                    target_buff[...] = delta

#                    target_buff[...] = 0.2
                    meas_buff[0,:] = meas

                    ag.act(input_buff, meas, target_buff)
                    if(ag.net_type == 'conv'):
                        netOut = np.ndarray.flatten(ag.ext_covnet_output)[0].flatten()[0]
                    elif(ag.net_type == 'fc'):
                        netOut = np.ndarray.flatten(ag.ext_fcnet_output)[0].flatten()[0]

                    print (" *** ", delta, delta + netOut, netGain*netOut, ag.learning_rate)

                    diff_theta = 0.6 * max(min((icoInSteer), 5.), -5.)

                    netErr[:,:] = 0.
                    diff_theta = diff_theta + reflexGain * colourStrength * delta

                    curr_act = np.zeros(7).tolist()
                    curr_act[0] = 0
                    curr_act[1] = 0
                    curr_act[2] = 0 #shoot
                    curr_act[3] = curr_act[3] + diff_z
                    curr_act[4] = 0
                    curr_act[5] = 0.
                    curr_act[6] = diff_theta + netGain*netOut

                    iter += 1


            if (curr_step % epoch == 0):
                ag.save('/home/paul/Dev/GameAI/vizdoom_cig2017/icodoom/ICO1/checkpoints/BP', curr_step)

            img, meas, rwrd, term = simulator.step(curr_act)
            if (not (meas is None)) and meas[0] > 30.:
                meas[0] = 30.

            if not term:
                img_buffer[curr_step % historyLen] = img
                meas_buffer[curr_step % historyLen] = meas
                act_buffer[curr_step % historyLen] = curr_act[:7]
        curr_step += 1


    simulator.close_game()
Exemple #2
0
def main():

    ## Simulator
    simulator_args = {}
    simulator_args['config'] = 'config/config.cfg'
    simulator_args['resolution'] = (160, 120)
    simulator_args['frame_skip'] = 2
    simulator_args['color_mode'] = 'GRAY'
    simulator_args['game_args'] = "+name IntelAct +colorset 7"

    ## Agent
    agent_args = {}

    # preprocessing
    agent_args['preprocess_input_images'] = lambda x: x / 255. - 0.5
    agent_args['preprocess_input_measurements'] = lambda x: x / 100. - 0.5
    agent_args['num_future_steps'] = 6
    pred_scale_coeffs = np.expand_dims(
        (np.expand_dims(np.array([8., 40., 1.]), 1) * np.ones(
            (1, agent_args['num_future_steps']))).flatten(), 0)
    agent_args['postprocess_predictions'] = lambda x: x * pred_scale_coeffs
    agent_args['discrete_controls_manual'] = range(6, 12)
    agent_args['meas_for_net_init'] = range(3)
    agent_args['meas_for_manual_init'] = range(3, 16)
    agent_args['opposite_button_pairs'] = [(0, 1), (2, 3)]

    # net parameters
    agent_args['conv_params'] = np.array([(16, 5, 4), (32, 3, 2), (64, 3, 2),
                                          (128, 3, 2)],
                                         dtype=[('out_channels', int),
                                                ('kernel', int),
                                                ('stride', int)])
    agent_args['fc_img_params'] = np.array([(128, )],
                                           dtype=[('out_dims', int)])
    agent_args['fc_meas_params'] = np.array([(128, ), (128, ), (128, )],
                                            dtype=[('out_dims', int)])
    agent_args['fc_joint_params'] = np.array([(256, ), (256, ), (-1, )],
                                             dtype=[('out_dims', int)])
    agent_args['target_dim'] = agent_args['num_future_steps'] * len(
        agent_args['meas_for_net_init'])

    # experiment arguments
    agent_args['test_objective_params'] = (np.array([5, 11, 17]),
                                           np.array([1., 1., 1.]))
    agent_args['history_length'] = 1
    agent_args['test_checkpoint'] = 'model'

    print('starting simulator')

    simulator = DoomSimulator(simulator_args)

    print('started simulator')

    agent_args['discrete_controls'] = simulator.discrete_controls
    agent_args['continuous_controls'] = simulator.continuous_controls
    agent_args['state_imgs_shape'] = (agent_args['history_length'] *
                                      simulator.num_channels,
                                      simulator.resolution[1],
                                      simulator.resolution[0])
    if 'meas_for_net_init' in agent_args:
        agent_args['meas_for_net'] = []
        for ns in range(agent_args['history_length']):
            agent_args['meas_for_net'] += [
                i + simulator.num_meas * ns
                for i in agent_args['meas_for_net_init']
            ]
        agent_args['meas_for_net'] = np.array(agent_args['meas_for_net'])
    else:
        agent_args['meas_for_net'] = np.arange(agent_args['history_length'] *
                                               simulator.num_meas)
    if len(agent_args['meas_for_manual_init']) > 0:
        agent_args['meas_for_manual'] = np.array([
            i + simulator.num_meas * (agent_args['history_length'] - 1)
            for i in agent_args['meas_for_manual_init']
        ])  # current timestep is the last in the stack
    else:
        agent_args['meas_for_manual'] = []
    agent_args['state_meas_shape'] = (len(agent_args['meas_for_net']), )

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                            log_device_placement=False))
    ag = Agent(sess, agent_args)
    ag.load('./checkpoints')

    img_buffer = np.zeros(
        (agent_args['history_length'], simulator.num_channels,
         simulator.resolution[1], simulator.resolution[0]))
    meas_buffer = np.zeros((agent_args['history_length'], simulator.num_meas))
    curr_step = 0
    term = False

    acts_to_replace = [
        a + b + d + e for a in [[0, 0], [1, 1]] for b in [[0, 0], [1, 1]]
        for d in [[0]] for e in [[0], [1]]
    ]
    print(acts_to_replace)
    replacement_act = [0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
    #MOVE_FORWARD   MOVE_BACKWARD   TURN_LEFT   TURN_RIGHT  ATTACK  SPEED   SELECT_WEAPON2  SELECT_WEAPON3  SELECT_WEAPON4  SELECT_WEAPON5  SELECT_WEAPON6  SELECT_WEAPON7

    while not term:
        if curr_step < agent_args['history_length']:
            img, meas, rwrd, term = simulator.step(
                np.squeeze(ag.random_actions(1)).tolist())
        else:
            state_imgs = np.transpose(
                np.reshape(
                    img_buffer[np.arange(
                        curr_step - agent_args['history_length'], curr_step) %
                               agent_args['history_length']],
                    (1, ) + agent_args['state_imgs_shape']), [0, 2, 3, 1])
            state_meas = np.reshape(
                meas_buffer[np.arange(curr_step - agent_args['history_length'],
                                      curr_step) %
                            agent_args['history_length']],
                (1, agent_args['history_length'] * simulator.num_meas))

            curr_act = np.squeeze(
                ag.act(state_imgs, state_meas,
                       agent_args['test_objective_params'])[0]).tolist()
            if curr_act[:6] in acts_to_replace:
                curr_act = replacement_act
            img, meas, rwrd, term = simulator.step(curr_act)
            if (not (meas is None)) and meas[0] > 30.:
                meas[0] = 30.

        if not term:
            img_buffer[curr_step % agent_args['history_length']] = img
            meas_buffer[curr_step % agent_args['history_length']] = meas
            curr_step += 1

    simulator.close_game()
Exemple #3
0
def main():
    ## Simulator
    simulator_args = {}
    simulator_args['config'] = 'config/config.cfg'
    simulator_args['resolution'] = (widthIn, heightIn)
    simulator_args['frame_skip'] = 1
    simulator_args['color_mode'] = 'RGB24'
    simulator_args['game_args'] = "+name ICO +colorset 7"

    ## Agent
    agent_args = {}

    # preprocessing
    preprocess_input_images = lambda x: x / 255. - 0.5
    agent_args['preprocess_input_images'] = lambda x: x / 255. - 0.5
    agent_args['preprocess_input_measurements'] = lambda x: x / 100. - 0.5
    agent_args['num_future_steps'] = 6
    pred_scale_coeffs = np.expand_dims(
        (np.expand_dims(np.array([8., 40., 1.]), 1) * np.ones(
            (1, agent_args['num_future_steps']))).flatten(), 0)
    agent_args['meas_for_net_init'] = range(3)
    agent_args['meas_for_manual_init'] = range(3, 16)
    agent_args['resolution'] = (width, height)
    # just use grayscale for nnet inputs
    agent_args['num_channels'] = 1

    # net parameters
    agent_args['conv_params'] = np.array([(16, 5, 4), (32, 3, 2), (64, 3, 2),
                                          (128, 3, 2)],
                                         dtype=[('out_channels', int),
                                                ('kernel', int),
                                                ('stride', int)])
    agent_args['fc_img_params'] = np.array([(128, )],
                                           dtype=[('out_dims', int)])
    agent_args['fc_meas_params'] = np.array([(128, ), (128, ), (128, )],
                                            dtype=[('out_dims', int)])
    agent_args['fc_joint_params'] = np.array([(256, ), (256, ), (-1, )],
                                             dtype=[('out_dims', int)])
    agent_args['target_dim'] = agent_args['num_future_steps'] * len(
        agent_args['meas_for_net_init'])
    agent_args['n_actions'] = 7

    # experiment arguments
    agent_args['test_objective_params'] = (np.array([5, 11, 17]),
                                           np.array([1., 1., 1.]))
    agent_args['history_length'] = 3
    agent_args['history_length_ico'] = 3
    historyLen = agent_args['history_length']
    print("HistoryLen: ", historyLen)

    print('starting simulator')
    simulator = DoomSimulator(simulator_args)
    num_channels = simulator.num_channels

    print('started simulator')

    agent_args['state_imgs_shape'] = (historyLen * num_channels,
                                      simulator.resolution[1],
                                      simulator.resolution[0])

    agent_args['n_ffnet_input'] = (agent_args['resolution'][0] *
                                   agent_args['resolution'][1])
    agent_args['n_ffnet_hidden'] = np.array([50, 5])
    agent_args['n_ffnet_output'] = 1
    agent_args['n_ffnet_act'] = 7
    agent_args['n_ffnet_meas'] = simulator.num_meas
    agent_args['learning_rate'] = 1E-4

    if 'meas_for_net_init' in agent_args:
        agent_args['meas_for_net'] = []
        for ns in range(historyLen):
            agent_args['meas_for_net'] += [
                i + simulator.num_meas * ns
                for i in agent_args['meas_for_net_init']
            ]
        agent_args['meas_for_net'] = np.array(agent_args['meas_for_net'])
    else:
        agent_args['meas_for_net'] = np.arange(historyLen * simulator.num_meas)
    if len(agent_args['meas_for_manual_init']) > 0:
        agent_args['meas_for_manual'] = np.array([
            i + simulator.num_meas * (historyLen - 1)
            for i in agent_args['meas_for_manual_init']
        ])  # current timestep is the last in the stack
    else:
        agent_args['meas_for_manual'] = []

    agent_args['state_meas_shape'] = (len(agent_args['meas_for_net']), )

    #    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
    #    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))

    #    agent = Agent(sess, agent_args)
    #    agent.load('/home/paul/Dev/GameAI/vizdoom_cig2017/icolearner/ICO1/checkpoints/ICO-8600')
    #    print("model loaded..")

    #    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
    #    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))

    img_buffer = np.zeros((historyLen, simulator.resolution[1],
                           simulator.resolution[0], num_channels),
                          dtype='uint8')

    meas_buffer = np.zeros((historyLen, simulator.num_meas))
    act_buffer = np.zeros((historyLen, 7))
    act_buffer_ico = np.zeros((agent_args['history_length_ico'], 7))
    curr_step = 0
    old_step = -1
    term = False

    print("state_meas_shape: ", meas_buffer.shape, " == ",
          agent_args['state_meas_shape'])
    print("act_buffer_shape: ", act_buffer.shape)

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                            log_device_placement=False))
    ag = Agent(sess, agent_args)

    diff_y = 0
    diff_x = 0
    diff_z = 0
    diff_theta = 0
    iter = 1
    epoch = 200
    radialFlowLeft = 30.
    radialFlowRight = 30.
    radialFlowInertia = 0.4
    radialGain = 4.
    rotationGain = 50.
    errorThresh = 10.
    updatePtsFreq = 50
    skipImage = 1
    skipImageICO = 5
    reflexGain = 0.01
    oldHealth = 0.

    # create masks for left and right visual fields - note that these only cover the upper half of the image
    # this is to help prevent the tracking getting confused by the floor pattern
    half_height = round(height / 2)
    half_width = round(width / 2)

    maskLeft = np.zeros([height, width], np.uint8)
    maskLeft[half_height:, :half_width] = 1.
    maskRight = np.zeros([height, width], np.uint8)
    maskRight[half_height:, half_width:] = 1.

    lk_params = dict(winSize=(15, 15),
                     maxLevel=2,
                     criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT,
                               10, 0.03))
    feature_params = dict(maxCorners=500,
                          qualityLevel=0.03,
                          minDistance=7,
                          blockSize=7)

    imgCentre = np.array([
        simulator_args['resolution'][0] / 2,
        simulator_args['resolution'][1] / 2
    ])
    print("Image centre: ", imgCentre)
    simpleInputs1 = np.zeros((width, height))
    simpleInputs2 = np.zeros((width, height))
    input_buff = np.zeros((1, width * height))
    target_buff = np.zeros((1, 1))
    meas_buff = np.zeros((1, simulator.num_meas))
    netOut = 0.
    netErr = np.zeros((width, height))
    delta = 0.

    while not term:
        if curr_step < historyLen:
            curr_act = np.zeros(7).tolist()
            img, meas, rwrd, term = simulator.step(curr_act)
            print("Image: ", img.shape, " max: ", np.amax(img), " min: ",
                  np.amin(img))

            if curr_step == 0:
                p0Left = cv2.goodFeaturesToTrack(img[:, :, 0],
                                                 mask=maskLeft,
                                                 **feature_params)
                p0Right = cv2.goodFeaturesToTrack(img[:, :, 0],
                                                  mask=maskRight,
                                                  **feature_params)

            img_buffer[curr_step % historyLen] = img
            meas_buffer[curr_step % historyLen] = meas
            act_buffer[curr_step % historyLen] = curr_act[:7]

        else:
            img1 = img_buffer[(curr_step - 2) % historyLen, :, :, :]
            img2 = img_buffer[(curr_step - 1) % historyLen, :, :, :]
            state = simulator._game.get_state()

            stateImg = state.screen_buffer
            greyImg1 = np.sum(img1, axis=0)
            greyImg2 = cv2.resize(stateImg, (width, height))
            greyImg2 = np.array(np.sum(greyImg2, axis=2) / 3, dtype='uint8')

            if (curr_step % updatePtsFreq == 0):
                p0Left = cv2.goodFeaturesToTrack(img[:, :, 0],
                                                 mask=maskLeft,
                                                 **feature_params)
                p0Right = cv2.goodFeaturesToTrack(img[:, :, 0],
                                                  mask=maskRight,
                                                  **feature_params)

            p1Left, st, err = cv2.calcOpticalFlowPyrLK(img1[:, :, 0],
                                                       img2[:, :, 0], p0Left,
                                                       None, **lk_params)
            p1Right, st, err = cv2.calcOpticalFlowPyrLK(
                img1[:, :, 0], img2[:, :, 0], p0Right, None, **lk_params)
            flowLeft = (p1Left - p0Left)[:, 0, :]
            flowRight = (p1Right - p0Right)[:, 0, :]
            radialFlowTmpLeft = 0
            radialFlowTmpRight = 0

            for i in range(0, len(p0Left)):
                radialFlowTmpLeft += ((p0Left[i, 0, :] - imgCentre)).dot(
                    flowLeft[i, :]) / float(len(p0Left))
            for i in range(0, len(p0Right)):
                radialFlowTmpRight += ((p0Right[i, 0, :] - imgCentre)).dot(
                    flowRight[i, :]) / float(len(p0Right))

            rotation = act_buffer[(curr_step - 1) % historyLen][6]
            forward = act_buffer[(curr_step - 1) % historyLen][3]
            # keep separate radial errors for left and right fields
            radialFlowLeft = radialFlowLeft + radialFlowInertia * (
                radialFlowTmpLeft - radialFlowLeft)
            radialFlowRight = radialFlowRight + radialFlowInertia * (
                radialFlowTmpRight - radialFlowRight)
            expectFlowLeft = radialGain * forward + (rotationGain * rotation
                                                     if rotation < 0. else 0.)
            expectFlowRight = radialGain * forward - (rotationGain * rotation
                                                      if rotation > 0. else 0.)

            flowErrorLeft = forward * (expectFlowLeft - radialFlowLeft) / (
                1. + rotationGain * np.abs(rotation))
            flowErrorRight = forward * (expectFlowRight - radialFlowRight) / (
                1. + rotationGain * np.abs(rotation))
            flowErrorLeft = flowErrorLeft if flowErrorLeft > 0. else 0.
            flowErrorRight = flowErrorRight if flowErrorRight > 0. else 0.
            icoSteer = 0.

            if curr_step > 100:
                health = meas[1]

                # Don't run any networks when the player is dead!
                if (health < 101. and health > 0.):
                    #print (curr_step)

                    icoInLeft = (flowErrorLeft - errorThresh) if (
                        flowErrorLeft - errorThresh) > 0. else 0. / reflexGain
                    icoInRight = (flowErrorRight - errorThresh) if (
                        flowErrorRight - errorThresh) > 0. else 0. / reflexGain
                    icoInSteer = ((flowErrorRight - errorThresh) if
                                  (flowErrorRight - errorThresh) > 0. else
                                  0. / reflexGain -
                                  (flowErrorLeft - errorThresh) if
                                  (flowErrorLeft - errorThresh) > 0. else 0. /
                                  reflexGain)

                    centre, bottomLeft, topRight, colourStrength = getMaxColourPos(
                        stateImg, [255, 0, 0])
                    colourSteer = imgCentre[0]
                    delta = (colourSteer - imgCentre[0]) / width

                    if (len(bottomLeft) > 0 and len(topRight) > 0
                            and ((topRight[0] - bottomLeft[0]) < width / 3)
                            and ((topRight[1] - bottomLeft[1]) < height / 2)):
                        colourSteer = bottomLeft[0] + int(
                            0.5 * (topRight[0] - bottomLeft[0]))

                    # get the setpoint in the -.9/+.9 range
                    simpleInputs1[:, :] = 0.1 * np.random.rand(width, height)
                    simpleInputs2[:, :] = 0.1 * np.random.rand(width, height)

                    greyImg2 = cv2.filter2D(greyImg2, -1, edge)
                    input_buff[0, :] = np.ndarray.flatten(
                        preprocess_input_images(greyImg2))
                    target_buff[...] = delta + netOut
                    meas_buff[0, :] = meas

                    ag.act_ffnet(input_buff, meas, target_buff)
                    netOut = ag.ext_ffnet_output[0]

                    #if (False):
                    #net_output = np.ndarray.flatten(agent.test_ffnet(input_buff))[0]
                    #else:
                    #net_output = np.ndarray.flatten(agent.learn_ffnet(input_buff, target_buff))[0]

                    netErr[:, :] = 0.
                    diff_theta = diff_theta + 0.01 * colourStrength2 * (
                        colourSteer - imgCentre[0]) / width

                    curr_act = np.zeros(7).tolist()
                    curr_act[0] = 0
                    curr_act[1] = 0
                    curr_act[2] = 0
                    curr_act[3] = 0.  #curr_act[3] + diff_z
                    curr_act[3] = 0.
                    curr_act[4] = 0
                    curr_act[5] = 0
                    curr_act[6] = curr_act[6] + diff_theta

            img, meas, rwrd, term = simulator.step(curr_act)
            if (not (meas is None)) and meas[0] > 30.:
                meas[0] = 30.

            if not term:
                img_buffer[curr_step % historyLen] = img
                meas_buffer[curr_step % historyLen] = meas
                act_buffer[curr_step % historyLen] = curr_act[:7]
        curr_step += 1

    simulator.close_game()
    ag.save(
        '/home/paul/Dev/GameAI/vizdoom_cig2017/icolearner/ICO1/checkpoints/' +
        'hack-' + str(iter))
Exemple #4
0
def main(learning_rate_):
    learningRate = float(learning_rate_)
    FCLNet.setLearningRate(learningRate)

    print("learning rate ", learningRate, file=outFile)

    ## Simulator
    simulator_args = {}
    simulator_args['config'] = 'config/config.cfg'
    simulator_args['resolution'] = (widthIn, heightIn)
    simulator_args['frame_skip'] = 1
    simulator_args['color_mode'] = 'RGB24'
    simulator_args['game_args'] = "+name FCL +colorset 7"

    historyLen = 3
    print("HistoryLen: ", historyLen)

    print('starting simulator')
    simulator = DoomSimulator(simulator_args)
    num_channels = simulator.num_channels

    print('started simulator')

    modelDir = os.path.join(os.path.expanduser("~"),
                            "Dev/GameAI/vizdoom_cig2017/icodoom/ICO1/Models")

    img_buffer = np.zeros((historyLen, simulator.resolution[1],
                           simulator.resolution[0], num_channels),
                          dtype='uint8')

    meas_buffer = np.zeros((historyLen, simulator.num_meas))
    act_buffer = np.zeros((historyLen, 7))
    curr_step = 0
    term = False

    diff_z = 0
    iter = 1
    epoch = 200
    radialFlowLeft = 30.
    radialFlowRight = 30.
    radialFlowInertia = 0.4
    radialGain = 4.
    rotationGain = 50.
    errorThresh = 10.
    updatePtsFreq = 50
    reflexGain = 1E-3
    flowGain = 0.
    netGain = 40.
    reflexReduceGain = -0.05

    # create masks for left and right visual fields - note that these only cover the upper half of the image
    # this is to help prevent the tracking getting confused by the floor pattern
    half_height = round(height / 2)
    half_width = round(width / 2)

    maskLeft = np.zeros([height, width], np.uint8)
    maskLeft[half_height:, :half_width] = 1.
    maskRight = np.zeros([height, width], np.uint8)
    maskRight[half_height:, half_width:] = 1.

    lk_params = dict(winSize=(15, 15),
                     maxLevel=2,
                     criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT,
                               10, 0.03))
    feature_params = dict(maxCorners=500,
                          qualityLevel=0.03,
                          minDistance=7,
                          blockSize=7)

    imgCentre = np.array([
        int(simulator_args['resolution'][0] / 2),
        int(simulator_args['resolution'][1] / 2)
    ])
    print("Image centre: ", imgCentre)
    rawInputs = np.zeros((height, width))
    cheatInputs = np.zeros((width, height))
    input_buff = np.zeros((width * height))
    target_buff = np.zeros((1, 1))
    meas_buff = np.zeros((1, simulator.num_meas))
    netOut = 0.
    netErr = np.zeros(neuronsPerLayer[0])
    delta = 0.
    shoot = 0
    wtDist = np.zeros(FCLNet.getNumLayers())

    reflexOn = False
    iter = 0
    killed = False
    #    FCLNet.saveModel("Models/hack.txt")

    while not term:
        if curr_step < historyLen:
            curr_act = np.zeros(7).tolist()
            img, meas, rwrd, term = simulator.step(curr_act)
            print("Image: ", img.shape, " max: ", np.amax(img), " min: ",
                  np.amin(img))

            if curr_step == 0:
                p0Left = cv2.goodFeaturesToTrack(img[:, :, 0],
                                                 mask=maskLeft,
                                                 **feature_params)
                p0Right = cv2.goodFeaturesToTrack(img[:, :, 0],
                                                  mask=maskRight,
                                                  **feature_params)

            img_buffer[curr_step % historyLen] = img
            meas_buffer[curr_step % historyLen] = meas
            act_buffer[curr_step % historyLen] = curr_act[:7]

        else:
            img1 = img_buffer[(curr_step - 2) % historyLen, :, :, :]
            img2 = img_buffer[(curr_step - 1) % historyLen, :, :, :]
            state = simulator._game.get_state()

            stateImg = state.screen_buffer

            icoSteer = 0.

            if curr_step > 100:
                health = meas[1]

                if (health < 0.1):
                    reflexOn = False
                    iter = 0

                if (simulator._game.is_player_dead()) and killed == False:
                    g = open("KD.txt", "a")
                    s = "0 " + str(curr_step) + " " + str(
                        datetime.now().timestamp()) + "\n"

                    g.write(s)
                    g.close()
                    killed = True
                    print("KILLED")
                if (not (simulator._game.is_player_dead())):
                    killed = False

                # Don't run any networks when the player is dead!
                if (health < 101. and health > 0.):

                    icoInSteer = 0.

                    saveImage(curr_step, stateImg)
                    centre, bottomLeft, topRight, colourStrength = getMaxColourPos(
                        stateImg, [0, 0, 255], curr_step)
                    colourSteer = imgCentre[0]

                    if (len(bottomLeft) > 0 and len(topRight) > 0
                            and ((topRight[0] - bottomLeft[0]) < width / 3)
                            and ((topRight[1] - bottomLeft[1]) < height / 2)):
                        colourSteer = bottomLeft[0] + int(
                            0.5 * (topRight[0] - bottomLeft[0]))
                        shoot = 1

                    rawInputs = np.array(np.sum(stateImg, axis=2) / 3)

                    input_buff[:] = np.ndarray.flatten(rawInputs)
                    input_buff = input_buff - np.mean(input_buff)
                    input_buff = input_buff / np.sqrt(np.var(input_buff))

                    # we want the reflex to be delayed wrt to the image input, so that the image is. Otherwise the learning can
                    # never reduce the error to zero no matter how good the controller.

                    oldDelta = delta
                    if (iter > 2):
                        delta = (float(colourSteer) -
                                 float(imgCentre[0])) / float(width)
                    else:
                        delta = 0

                    deltaDiff = delta - oldDelta
                    if (iter > 2):
                        if (np.abs(delta) > 0.01):
                            shoot = 0

                    netErr[:] = delta
                    target_buff[...] = delta + netOut
                    meas_buff[0, :] = meas

                    FCLNet.setLearningRate(0.)
                    FCLNet.doStep(input_buff, netErr)
                    netOut = FCLNet.getOutput(0) + 0.3 * FCLNet.getOutput(
                        1) + 0.1 * FCLNet.getOutput(2)
                    netOut1 = FCLNet.getOutput(3) + 0.3 * FCLNet.getOutput(
                        4) + 0.1 * FCLNet.getOutput(5)

                    netErr += reflexReduceGain * netGain * (netOut - netOut1)
                    FCLNet.setLearningRate(learningRate)

                    FCLNet.doStep(input_buff, netErr)
                    netOut = FCLNet.getOutput(0) + 0.3 * FCLNet.getOutput(
                        1) + 0.1 * FCLNet.getOutput(2)
                    netOut1 = FCLNet.getOutput(3) + 0.3 * FCLNet.getOutput(
                        4) + 0.1 * FCLNet.getOutput(5)

                    #                    print("%s" % (" SHOOT " if shoot == 1 else "       "), deltaDiff, delta, netOut)

                    for i in range(FCLNet.getNumLayers()):
                        wtDist[i] = FCLNet.getLayer(
                            i).getWeightDistanceFromInitialWeights()

                    print(curr_step,
                          delta,
                          netErr[0],
                          netOut - netOut1,
                          health,
                          file=outFile)
                    print(' '.join(map(str, wtDist)), file=wtdistFile)

                    diff_theta = 0.6 * max(min((icoInSteer), 5.), -5.)

                    netErr[:] = 0.
                    diff_theta = diff_theta + reflexGain * colourStrength * delta

                    curr_act = np.zeros(7).tolist()
                    curr_act[0] = 0
                    curr_act[1] = 0
                    curr_act[2] = shoot
                    curr_act[3] = curr_act[3] + diff_z
                    curr_act[4] = 0
                    curr_act[5] = 0.
                    curr_act[6] = diff_theta + netGain * (netOut - netOut1)

                    iter += 1

            if (curr_step % epoch == 0):

                # uncomment to write models to file
                """
                if not os.path.exists("Models"):
                    os.makedirs("Models")
                FCLNet.saveModel("Models/BP-" + str(curr_step) + ".txt")

                file = open("Models/checkpoint", 'w')
                file.write("Models/BP-" + str(curr_step) + ".txt")
                file.close()
                """

            img, meas, rwrd, term = simulator.step(curr_act)
            if (not (meas is None)) and meas[0] > 30.:
                meas[0] = 30.

            if not term:
                img_buffer[curr_step % historyLen] = img
                meas_buffer[curr_step % historyLen] = meas
                act_buffer[curr_step % historyLen] = curr_act[:7]
        curr_step += 1

    simulator.close_game()
    outFile.close()
    wtdistFile.close()
Exemple #5
0
def main():
    ## Simulator
    simulator_args = {}
    simulator_args['config'] = 'config/config.cfg'
    simulator_args['resolution'] = (widthIn, heightIn)
    simulator_args['frame_skip'] = 1
    simulator_args['color_mode'] = 'RGB24'
    simulator_args['game_args'] = "+name ICO +colorset 7"

    ## Agent
    agent_args = {}

    # preprocessing
    preprocess_input_images = lambda x: x / 255. - 0.5
    agent_args['preprocess_input_images'] = lambda x: x / 255. - 0.5
    agent_args['preprocess_input_measurements'] = lambda x: x / 100. - 0.5
    agent_args['num_future_steps'] = 6
    pred_scale_coeffs = np.expand_dims(
        (np.expand_dims(np.array([8., 40., 1.]), 1) * np.ones(
            (1, agent_args['num_future_steps']))).flatten(), 0)
    agent_args['meas_for_net_init'] = range(3)
    agent_args['meas_for_manual_init'] = range(3, 16)
    agent_args['resolution'] = (width, height)
    # just use grayscale for nnet inputs
    agent_args['num_channels'] = 1

    # net parameters
    agent_args['conv_params'] = np.array([(16, 5, 4), (32, 3, 2), (64, 3, 2),
                                          (128, 3, 2)],
                                         dtype=[('out_channels', int),
                                                ('kernel', int),
                                                ('stride', int)])
    agent_args['fc_img_params'] = np.array([(128, )],
                                           dtype=[('out_dims', int)])
    agent_args['fc_meas_params'] = np.array([(128, ), (128, ), (128, )],
                                            dtype=[('out_dims', int)])
    agent_args['fc_joint_params'] = np.array([(256, ), (256, ), (-1, )],
                                             dtype=[('out_dims', int)])
    agent_args['target_dim'] = agent_args['num_future_steps'] * len(
        agent_args['meas_for_net_init'])
    agent_args['n_actions'] = 7

    # experiment arguments
    agent_args['test_objective_params'] = (np.array([5, 11, 17]),
                                           np.array([1., 1., 1.]))
    agent_args['history_length'] = 3
    agent_args['history_length_ico'] = 3
    historyLen = agent_args['history_length']
    print("HistoryLen: ", historyLen)

    print('starting simulator')
    simulator = DoomSimulator(simulator_args)
    num_channels = simulator.num_channels

    print('started simulator')

    agent_args['state_imgs_shape'] = (historyLen * num_channels,
                                      simulator.resolution[1],
                                      simulator.resolution[0])

    agent_args['n_ffnet_inputs'] = 2 * (agent_args['resolution'][0] *
                                        agent_args['resolution'][1])
    agent_args['n_ffnet_hidden'] = np.array([50, 5])
    agent_args['n_ffnet_outputs'] = 1
    agent_args['n_ffnet_act'] = 7
    agent_args['n_ffnet_meas'] = simulator.num_meas
    agent_args['learning_rate'] = 1E-4

    if 'meas_for_net_init' in agent_args:
        agent_args['meas_for_net'] = []
        for ns in range(historyLen):
            agent_args['meas_for_net'] += [
                i + simulator.num_meas * ns
                for i in agent_args['meas_for_net_init']
            ]
        agent_args['meas_for_net'] = np.array(agent_args['meas_for_net'])
    else:
        agent_args['meas_for_net'] = np.arange(historyLen * simulator.num_meas)
    if len(agent_args['meas_for_manual_init']) > 0:
        agent_args['meas_for_manual'] = np.array([
            i + simulator.num_meas * (historyLen - 1)
            for i in agent_args['meas_for_manual_init']
        ])  # current timestep is the last in the stack
    else:
        agent_args['meas_for_manual'] = []

    agent_args['state_meas_shape'] = (len(agent_args['meas_for_net']), )

    #    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
    #    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))

    #    agent = Agent(sess, agent_args)
    #    agent.load('/home/paul/Dev/GameAI/vizdoom_cig2017/icolearner/ICO1/checkpoints/ICO-8600')
    #    print("model loaded..")

    #    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
    #    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))

    img_buffer = np.zeros((historyLen, simulator.resolution[1],
                           simulator.resolution[0], num_channels),
                          dtype='uint8')

    meas_buffer = np.zeros((historyLen, simulator.num_meas))
    act_buffer = np.zeros((historyLen, 7))
    act_buffer_ico = np.zeros((agent_args['history_length_ico'], 7))
    curr_step = 0
    old_step = -1
    term = False

    print("state_meas_shape: ", meas_buffer.shape, " == ",
          agent_args['state_meas_shape'])
    print("act_buffer_shape: ", act_buffer.shape)

    #    ag = Agent(agent_args)

    diff_y = 0
    diff_x = 0
    diff_z = 0
    diff_theta = 0
    iter = 1
    epoch = 200
    radialFlowLeft = 30.
    radialFlowRight = 30.
    radialFlowInertia = 0.4
    radialGain = 4.
    rotationGain = 50.
    errorThresh = 10.
    updatePtsFreq = 50
    skipImage = 1
    skipImageICO = 5
    reflexGain = 0.01
    oldHealth = 0.

    # create masks for left and right visual fields - note that these only cover the upper half of the image
    # this is to help prevent the tracking getting confused by the floor pattern
    half_height = round(height / 2)
    half_width = round(width / 2)

    maskLeft = np.zeros([height, width], np.uint8)
    maskLeft[half_height:, :half_width] = 1.
    maskRight = np.zeros([height, width], np.uint8)
    maskRight[half_height:, half_width:] = 1.

    netErr = np.zeros((width, height))

    #    deepIcoEfference = Deep_ICO(simulator_args['resolution'][0] * simulator_args['resolution'][1] + 7, 10, 1)
    nh = np.asarray([36, 36])
    #    deepIcoEfference = Deep_ICO_Conv(1, [1], 1, Deep_ICO_Conv.conv)
    #    deepIcoEfference = Deep_ICO_Conv(simulator_args['resolution'][0] * simulator_args['resolution'][1] + 7,
    #                                     nh, simulator_args['resolution'][0] * simulator_args['resolution'][1], Deep_ICO_Conv.conv)
    #    deepIcoEfference.setLearningRate(0.01)
    #    deepIcoEfference.setAlgorithm(Deep_ICO.backprop)
    #    print ("Model type: ", "ff" if deepIcoEfference.getModelType() == 0 else "conv")

    #    deepIcoEfference.initWeights(1 / (np.sqrt(float(simulator_args['resolution'][0] * simulator_args['resolution'][1] + 7))))
    #    deepIcoEfference.initWeights(0.0)
    outputImage = np.zeros(simulator_args['resolution'][0] *
                           simulator_args['resolution'][1])
    imageDiff = np.zeros(simulator_args['resolution'][0] *
                         simulator_args['resolution'][1])
    outputArray = np.zeros(1)  #deepIcoEfference.getNoutputs())

    lk_params = dict(winSize=(15, 15),
                     maxLevel=2,
                     criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT,
                               10, 0.03))
    feature_params = dict(maxCorners=500,
                          qualityLevel=0.03,
                          minDistance=7,
                          blockSize=7)

    imgCentre = np.array([
        simulator_args['resolution'][0] / 2,
        simulator_args['resolution'][1] / 2
    ])
    print("Image centre: ", imgCentre)
    simpleInputs1 = np.zeros((width, height))
    simpleInputs2 = np.zeros((width, height))
    input_buff = np.zeros((1, width * height))
    target_buff = np.zeros((1, 1))

    t = threading.Thread(target=plotWeights)
    t.start()

    while not term:
        if curr_step < historyLen:
            curr_act = np.zeros(7).tolist()
            img, meas, rwrd, term = simulator.step(curr_act)
            print("Image: ", img.shape, " max: ", np.amax(img), " min: ",
                  np.amin(img))

            if curr_step == 0:
                p0Left = cv2.goodFeaturesToTrack(img[:, :, 0],
                                                 mask=maskLeft,
                                                 **feature_params)
                p0Right = cv2.goodFeaturesToTrack(img[:, :, 0],
                                                  mask=maskRight,
                                                  **feature_params)

            img_buffer[curr_step % historyLen] = img
            meas_buffer[curr_step % historyLen] = meas
            act_buffer[curr_step % historyLen] = curr_act[:7]

        else:
            img1 = img_buffer[(curr_step - 2) % historyLen, :, :, :]
            img2 = img_buffer[(curr_step - 1) % historyLen, :, :, :]
            state = simulator._game.get_state()

            stateImg = state.screen_buffer
            greyImg1 = np.sum(img1, axis=0)
            greyImg2 = cv2.resize(stateImg, (width, height))
            greyImg2 = np.array(np.sum(greyImg2, axis=2) / 3, dtype='uint8')

            if (curr_step % updatePtsFreq == 0):
                p0Left = cv2.goodFeaturesToTrack(img[:, :, 0],
                                                 mask=maskLeft,
                                                 **feature_params)
                p0Right = cv2.goodFeaturesToTrack(img[:, :, 0],
                                                  mask=maskRight,
                                                  **feature_params)

            p1Left, st, err = cv2.calcOpticalFlowPyrLK(img1[:, :, 0],
                                                       img2[:, :, 0], p0Left,
                                                       None, **lk_params)
            p1Right, st, err = cv2.calcOpticalFlowPyrLK(
                img1[:, :, 0], img2[:, :, 0], p0Right, None, **lk_params)
            flowLeft = (p1Left - p0Left)[:, 0, :]
            flowRight = (p1Right - p0Right)[:, 0, :]
            radialFlowTmpLeft = 0
            radialFlowTmpRight = 0

            for i in range(0, len(p0Left)):
                radialFlowTmpLeft += ((p0Left[i, 0, :] - imgCentre)).dot(
                    flowLeft[i, :]) / float(len(p0Left))
            for i in range(0, len(p0Right)):
                radialFlowTmpRight += ((p0Right[i, 0, :] - imgCentre)).dot(
                    flowRight[i, :]) / float(len(p0Right))

            rotation = act_buffer[(curr_step - 1) % historyLen][6]
            forward = act_buffer[(curr_step - 1) % historyLen][3]
            # keep separate radial errors for left and right fields
            radialFlowLeft = radialFlowLeft + radialFlowInertia * (
                radialFlowTmpLeft - radialFlowLeft)
            radialFlowRight = radialFlowRight + radialFlowInertia * (
                radialFlowTmpRight - radialFlowRight)
            expectFlowLeft = radialGain * forward + (rotationGain * rotation
                                                     if rotation < 0. else 0.)
            expectFlowRight = radialGain * forward - (rotationGain * rotation
                                                      if rotation > 0. else 0.)

            flowErrorLeft = forward * (expectFlowLeft - radialFlowLeft) / (
                1. + rotationGain * np.abs(rotation))
            flowErrorRight = forward * (expectFlowRight - radialFlowRight) / (
                1. + rotationGain * np.abs(rotation))
            flowErrorLeft = flowErrorLeft if flowErrorLeft > 0. else 0.
            flowErrorRight = flowErrorRight if flowErrorRight > 0. else 0.
            icoSteer = 0.

            if curr_step > 100:
                health = meas[1]

                # Don't run any networks when the player is dead!
                if (health < 101. and health > 0.):
                    #print (curr_step)

                    icoInLeft = (flowErrorLeft - errorThresh) if (
                        flowErrorLeft - errorThresh) > 0. else 0. / reflexGain
                    icoInRight = (flowErrorRight - errorThresh) if (
                        flowErrorRight - errorThresh) > 0. else 0. / reflexGain
                    icoInSteer = ((flowErrorRight - errorThresh) if
                                  (flowErrorRight - errorThresh) > 0. else
                                  0. / reflexGain -
                                  (flowErrorLeft - errorThresh) if
                                  (flowErrorLeft - errorThresh) > 0. else 0. /
                                  reflexGain)

                    centre1, bottomLeft1, topRight1, colourStrength1 = getMaxColourPos(
                        img1, [255, 0, 0])
                    centre2, bottomLeft2, topRight2, colourStrength2 = getMaxColourPos(
                        img2, [255, 0, 0])
                    colourSteer = centre2[0]
                    # get the setpoint in the -.9/+.9 range
                    simpleInputs1[:, :] = 0.1 * np.random.rand(width, height)
                    simpleInputs2[:, :] = 0.1 * np.random.rand(width, height)
                    sp = 1.8 * (colourSteer - imgCentre[0]) / width
                    print("ColourSteer: ", colourSteer, " ColourStrength: ",
                          colourStrength2)

                    if (colourStrength2 > 150.):
                        #print ("ColourSteer: ", colourSteer, " ColourStrength: ", colourStrength)
                        #inputs[colourSteer,:] = colourStrength / 300.
                        simpleInputs2[bottomLeft2[0]:topRight2[0],
                                      bottomLeft2[1]:topRight2[1]] = 1.
                        #print(bottomLeft[0], bottomLeft[1], topRight[0], topRight[1], np.sum(inputs))
                    else:
                        colourStrength2 = 0.
                        sp = 0
                    if (colourStrength1 > 150.):
                        simpleInputs1[bottomLeft1[0]:topRight1[0],
                                      bottomLeft1[1]:topRight1[1]] = 1.

                    netErr[:, :] = 0.
                    #deepBP.doStep(np.ndarray.flatten(inputs), np.ndarray.flatten(netErr))
                    #icoSteer = deepBP.getOutput(0)
                    #delta = sp - icoSteer
                    delta = 0.06 * colourStrength2 * (colourSteer -
                                                      imgCentre[0]) / width
                    #delta = 0.6 * max(min((icoInSteer), 5.), -5.)
                    #delta = 1. - icoSteer

                    #input_buff[0,:] = preprocess_input_images(np.ndarray.flatten(img2[2,:,:]))
                    #input_buff[0,:] = np.ndarray.flatten(inputs)
                    #input_buff[0,:] = np.concatenate([np.ndarray.flatten(greyImg1), np.ndarray.flatten(greyImg2)])
                    greyImg2 = cv2.filter2D(greyImg2, -1, edge)
                    input_buff[0, :] = np.ndarray.flatten(
                        preprocess_input_images(greyImg2))
                    target_buff[0, 0] = delta
                    if (False):
                        deepBP.setLearningRate(0.)
                        #net_output = np.ndarray.flatten(agent.test_ffnet(input_buff))[0]
                    #else:
                    #net_output = np.ndarray.flatten(agent.learn_ffnet(input_buff, target_buff))[0]

                    netErr[:, :] = delta
                    deepBP.doStep(preprocess_input_images(greyImg2.flatten()),
                                  netErr.flatten())

                    icoSteer = deepBP.getOutput(0)
                    #print ("In ", inputs[colourSteer], "Error: ", netErr[0,0], "Wt ", deepBP.getLayer(0).getNeuron(0).getWeight(int(colourSteer))
                    #      , "WtOut ", deepBP.getLayer(1).getNeuron(0).getWeight(0)
                    #, " Out ", deepBP.getLayer(0).getNeuron(0).getOutput(), " NErr ", deepBP.getLayer(0).getNeuron(0).getError(), " OUT ", 40.*icoSteer
                    #, " OUTErr ", deepBP.getLayer(1).getNeuron(0).getError())

                    #deepBP.doStep(np.ndarray.flatten(preprocess_input_images(img_buffer[(curr_step - 1) % historyLen, 2, :, :])), np.ndarray.flatten(netErr))
                    #                    deepBP.doStep(np.ndarray.flatten(inputs), np.ndarray.flatten(netErr))
                    #deepBP.doStep(np.ndarray.flatten(preprocess_input_images(img_buffer[(curr_step - 1) % historyLen, 0, :, :])), [0.0001 * colourStrength * (colourSteer - imgCentre[0])])
                    #deepBP.doStep([(colourSteer - imgCentre[0])/width], [0.0001*colourStrength * (colourSteer - imgCentre[0])])
                    print(" ** ", curr_step, icoSteer, " ", delta, " ",
                          colourStrength2)

                    #print (colourSteer, " In ", inputs[colourSteer], "Error: ", netErr[0,0], "Wt ", deepBP.getLayer(0).getNeuron(0).getWeight(int(colourSteer))
                    #       , " NOut ", deepBP.getLayer(0).getNeuron(0).getOutput(), " NErr ", deepBP.getLayer(0).getNeuron(0).getError(), " OUT ", 40.*icoSteer
                    #       , "OUTRefl ", diff_theta + 0.03 * colourStrength * (colourSteer - imgCentre[0])/width
                    #       , " OUTErr ", deepBP.getLayer(1).getNeuron(0).getError())

                    diff_theta = 0.6 * max(min((icoInSteer), 5.), -5.)

                    diff_theta = diff_theta + 0.01 * colourStrength2 * (
                        colourSteer - imgCentre[0]) / width
                    diff_theta = diff_theta + 10. * icoSteer
                    #diff_theta = diff_theta + 20. * net_output
                    curr_act = np.zeros(7).tolist()
                    curr_act[0] = 0
                    curr_act[1] = 0
                    curr_act[2] = 0
                    curr_act[3] = curr_act[3] + diff_z
                    curr_act[3] = 0.
                    curr_act[4] = 0
                    curr_act[5] = 0
                    curr_act[6] = curr_act[6] + diff_theta
                    oldHealth = health

            img, meas, rwrd, term = simulator.step(curr_act)
            if (not (meas is None)) and meas[0] > 30.:
                meas[0] = 30.

            if not term:
                img_buffer[curr_step % historyLen] = img
                meas_buffer[curr_step % historyLen] = meas
                act_buffer[curr_step % historyLen] = curr_act[:7]

            #if curr_step % epoch == 0:
            #    agent.save('/home/paul/Dev/GameAI/vizdoom_cig2017/icolearner/ICO1/checkpoints', curr_step)

#                np.save('/home/paul/tmp/icoSteer-' + str(curr_step), icoSteer.weights)
#                np.save('/home/paul/tmp/imageDiff-' + str(curr_step), imageDiff)
#            np.save('/home/paul/tmp/icoDetect-' + str(curr_step), icoDetect.weights)

#            icoSteer.saveInputs(curr_step)
        curr_step += 1

    simulator.close_game()
Exemple #6
0
def main():
    ## Simulator
    simulator_args = {}
    simulator_args['config'] = 'config/config.cfg'
    simulator_args['resolution'] = (160, 120)
    simulator_args['frame_skip'] = 1
    simulator_args['color_mode'] = 'GRAY'
    simulator_args['game_args'] = "+name ICO +colorset 7"

    ## Agent
    agent_args = {}

    # preprocessing
    agent_args['preprocess_input_images'] = lambda x: x / 255. - 0.5
    agent_args['preprocess_input_measurements'] = lambda x: x / 100. - 0.5
    agent_args['num_future_steps'] = 6
    pred_scale_coeffs = np.expand_dims(
        (np.expand_dims(np.array([8., 40., 1.]), 1) * np.ones(
            (1, agent_args['num_future_steps']))).flatten(), 0)
    agent_args['postprocess_predictions'] = lambda x: x * pred_scale_coeffs
    agent_args['discrete_controls_manual'] = range(6, 12)
    agent_args['meas_for_net_init'] = range(3)
    agent_args['meas_for_manual_init'] = range(3, 16)
    agent_args['opposite_button_pairs'] = [(0, 1), (2, 3)]

    # net parameters
    agent_args['conv_params'] = np.array([(16, 5, 4), (32, 3, 2), (64, 3, 2),
                                          (128, 3, 2)],
                                         dtype=[('out_channels', int),
                                                ('kernel', int),
                                                ('stride', int)])
    agent_args['fc_img_params'] = np.array([(128, )],
                                           dtype=[('out_dims', int)])
    agent_args['fc_meas_params'] = np.array([(128, ), (128, ), (128, )],
                                            dtype=[('out_dims', int)])
    agent_args['fc_joint_params'] = np.array([(256, ), (256, ), (-1, )],
                                             dtype=[('out_dims', int)])
    agent_args['target_dim'] = agent_args['num_future_steps'] * len(
        agent_args['meas_for_net_init'])

    # efference copy

    # experiment arguments
    agent_args['test_objective_params'] = (np.array([5, 11, 17]),
                                           np.array([1., 1., 1.]))
    agent_args['history_length'] = 3
    agent_args['test_checkpoint'] = 'model'

    print('starting simulator')

    simulator = DoomSimulator(simulator_args)

    print('started simulator')

    agent_args['discrete_controls'] = simulator.discrete_controls
    agent_args['continuous_controls'] = simulator.continuous_controls
    agent_args['state_imgs_shape'] = (agent_args['history_length'] *
                                      simulator.num_channels,
                                      simulator.resolution[1],
                                      simulator.resolution[0])

    agent_args['n_ffnet_hidden'] = np.array([50, 50])

    if 'meas_for_net_init' in agent_args:
        agent_args['meas_for_net'] = []
        for ns in range(agent_args['history_length']):
            agent_args['meas_for_net'] += [
                i + simulator.num_meas * ns
                for i in agent_args['meas_for_net_init']
            ]
        agent_args['meas_for_net'] = np.array(agent_args['meas_for_net'])
    else:
        agent_args['meas_for_net'] = np.arange(agent_args['history_length'] *
                                               simulator.num_meas)
    if len(agent_args['meas_for_manual_init']) > 0:
        agent_args['meas_for_manual'] = np.array([
            i + simulator.num_meas * (agent_args['history_length'] - 1)
            for i in agent_args['meas_for_manual_init']
        ])  # current timestep is the last in the stack
    else:
        agent_args['meas_for_manual'] = []
    agent_args['state_meas_shape'] = (len(agent_args['meas_for_net']), )

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                            log_device_placement=False))

    img_buffer = np.zeros(
        (agent_args['history_length'], simulator.num_channels,
         simulator.resolution[1], simulator.resolution[0]))
    meas_buffer = np.zeros((agent_args['history_length'], simulator.num_meas))
    act_buffer = np.zeros((agent_args['history_length'], 6))
    curr_step = 0
    term = False

    print("state_meas_shape: ", meas_buffer.shape, " == ",
          agent_args['state_meas_shape'])
    print("act_buffer_shape: ", act_buffer.shape)
    agent_args['n_ffnet_meas'] = len(np.ndarray.flatten(meas_buffer))
    agent_args['n_ffnet_act'] = len(np.ndarray.flatten(act_buffer))

    ag = Agent(sess, agent_args)
    ag.load('./checkpoints')

    acts_to_replace = [
        a + b + d + e for a in [[0, 0], [1, 1]] for b in [[0, 0], [1, 1]]
        for d in [[0]] for e in [[0], [1]]
    ]
    print(acts_to_replace)
    replacement_act = [0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
    # MOVE_FORWARD   MOVE_BACKWARD   TURN_LEFT   TURN_RIGHT  ATTACK  SPEED   SELECT_WEAPON2  SELECT_WEAPON3  SELECT_WEAPON4  SELECT_WEAPON5  SELECT_WEAPON6  SELECT_WEAPON7

    #    img, meas, rwrd, term = simulator.step(np.squeeze(ag.random_actions(1)).tolist())

    diff_y = 0
    diff_x = 0
    diff_z = 0
    inertia = 0.5
    iter = 1
    epoch = 200

    userdoc = os.path.join(os.path.expanduser("~"), "Documents")

    while not term:
        if curr_step < agent_args['history_length']:
            curr_act = np.squeeze(ag.random_actions(1)).tolist()
            img, meas, rwrd, term = simulator.step(curr_act)

        else:
            state_imgs = np.transpose(
                np.reshape(
                    img_buffer[np.arange(
                        curr_step - agent_args['history_length'], curr_step) %
                               agent_args['history_length']],
                    (1, ) + agent_args['state_imgs_shape']), [0, 2, 3, 1])
            state_meas = np.reshape(
                meas_buffer[np.arange(curr_step - agent_args['history_length'],
                                      curr_step) %
                            agent_args['history_length']],
                (1, agent_args['history_length'] * simulator.num_meas))

            #            print ("imgs shape: ", state_imgs.shape, " meas shape: ", state_meas.shape)
            #            print ("flat imgs shape: ", np.ndarray.flatten(state_imgs).shape, " flat meas shape: ", np.ndarray.flatten(state_meas).shape)
            #            print ("meas shape: ", state_meas.shape)

            curr_act = np.squeeze(ag.random_actions(1)[0]).tolist()
            if curr_act[:6] in acts_to_replace:
                curr_act = replacement_act
            hack = [0] * len(curr_act)

            hack[6] = diff_x
            hack[8] = -diff_y * 0.2
            hack[3] = 0  # diff_z
            #            hack[6] = 1
            #            hack[8] = 1
            curr_act[2] = 0
            curr_act[3] = 10

            img, meas, rwrd, term = simulator.step(curr_act)
            if (not (meas is None)) and meas[0] > 30.:
                meas[0] = 30.
            if (not (img is None)):

                #                print ("state_imgs: ", np.shape(state_imgs), "state_meas: ", np.shape(state_meas), "curr_act: ", np.shape(curr_act))
                #                print ("img type: ", np.ndarray.flatten(ag.preprocess_input_images(img)).dtype, "state_img type: ", state_imgs.dtype, "state_meas type: ", state_meas.dtype)

                ag.act_ffnet(
                    np.ndarray.flatten(state_imgs),
                    np.ndarray.flatten(state_meas),
                    np.array(np.ndarray.flatten(act_buffer), dtype='float64'),
                    np.ndarray.flatten(ag.preprocess_input_images(img)))
                diff_image = np.absolute(
                    np.reshape(np.array(ag.ext_ffnet_output),
                               [img.shape[0], img.shape[1]]) -
                    ag.preprocess_input_images(img))
                diff_image = np.absolute(
                    ag.preprocess_input_images(
                        img_buffer[(curr_step - 1) %
                                   agent_args['history_length']] -
                        ag.preprocess_input_images(img)))
                diff_image = ag.preprocess_input_images(img)

                diff_x = diff_x + inertia * (
                    (np.argmax(diff_image.sum(axis=0)) /
                     float(diff_image.shape[1])) - 0.5 - diff_x)
                diff_y = diff_x + inertia * (
                    (np.argmax(diff_image.sum(axis=1)) /
                     float(diff_image.shape[0])) - 0.5 - diff_y)

                #                print ("diff_x: ", diff_x, " diff_y: ", hack[6], "centre_x: ", np.argmax(diff_image.sum(axis=0)), "centre_y: ", np.argmax(diff_image.sum(axis=1)))

                if (curr_step % epoch == 0):
                    print("saving...")
                    np.save(
                        os.path.join('/home/paul', "hack"),
                        np.reshape(np.array(ag.ext_ffnet_output),
                                   [img.shape[0], img.shape[1]]))
                    np.save(os.path.join('/home/paul', "target"),
                            ag.preprocess_input_images(img))
                    np.save(os.path.join('/home/paul', "diff"), diff_image)
                    diff_x = np.random.normal(0, 2)
                    diff_z = np.random.normal(10, 2)

        if not term:
            img_buffer[curr_step % agent_args['history_length']] = img
            meas_buffer[curr_step % agent_args['history_length']] = meas
            act_buffer[curr_step % agent_args['history_length']] = curr_act[:6]
            curr_step += 1

    simulator.close_game()
    ag.save(
        '/home/paul/Dev/GameAI/vizdoom_cig2017/icolearner/ICO1/checkpoints/' +
        'hack-' + str(iter))
Exemple #7
0
def main():
    ## Simulator
    simulator_args = {}
    simulator_args['config'] = 'config/config.cfg'
    simulator_args['resolution'] = (width, height)
    simulator_args['frame_skip'] = 1
    simulator_args['color_mode'] = 'RGB24'
    simulator_args['game_args'] = "+name ICO +colorset 7"

    ## Agent
    agent_args = {}

    # preprocessing
    preprocess_input_images = lambda x: x / 255. - 0.5
    agent_args['preprocess_input_images'] = lambda x: x / 255. - 0.5
    agent_args['preprocess_input_measurements'] = lambda x: x / 100. - 0.5
    agent_args['num_future_steps'] = 6
    pred_scale_coeffs = np.expand_dims(
        (np.expand_dims(np.array([8., 40., 1.]), 1) * np.ones(
            (1, agent_args['num_future_steps']))).flatten(), 0)
    agent_args['meas_for_net_init'] = range(3)
    agent_args['meas_for_manual_init'] = range(3, 16)
    agent_args['resolution'] = (width, height)
    # just use grayscale for nnet inputs
    agent_args['num_channels'] = 1

    # net parameters
    agent_args['net_type'] = "fc"
    agent_args['conv_params'] = np.array([(16, 5, 4), (32, 3, 2), (64, 3, 2),
                                          (128, 3, 2)],
                                         dtype=[('out_channels', int),
                                                ('kernel', int),
                                                ('stride', int)])
    agent_args['fc_img_params'] = np.array([(128, )],
                                           dtype=[('out_dims', int)])
    agent_args['fc_meas_params'] = np.array([(128, ), (128, ), (128, )],
                                            dtype=[('out_dims', int)])
    agent_args['fc_joint_params'] = np.array([(256, ), (256, ), (-1, )],
                                             dtype=[('out_dims', int)])
    agent_args['target_dim'] = agent_args['num_future_steps'] * len(
        agent_args['meas_for_net_init'])
    agent_args['n_actions'] = 7

    # experiment arguments
    agent_args['test_objective_params'] = (np.array([5, 11, 17]),
                                           np.array([1., 1., 1.]))
    agent_args['history_length'] = 3
    agent_args['history_length_ico'] = 3
    historyLen = agent_args['history_length']
    print("HistoryLen: ", historyLen)

    print('starting simulator')
    simulator = DoomSimulator(simulator_args)
    num_channels = simulator.num_channels

    print('started simulator')

    agent_args['state_imgs_shape'] = (historyLen * num_channels,
                                      simulator.resolution[1],
                                      simulator.resolution[0])

    agent_args['n_ffnet_input'] = (agent_args['resolution'][0] *
                                   agent_args['resolution'][1])
    agent_args['n_ffnet_hidden'] = np.array([50, 5])
    agent_args['n_ffnet_output'] = 1
    agent_args['n_ffnet_act'] = 7
    agent_args['n_ffnet_meas'] = simulator.num_meas
    agent_args['learning_rate'] = 1E-3

    modelDir = os.path.join(os.path.expanduser("~"),
                            "Dev/GameAI/vizdoom_cig2017/icodoom/ICO1/Models")

    if 'meas_for_net_init' in agent_args:
        agent_args['meas_for_net'] = []
        for ns in range(historyLen):
            agent_args['meas_for_net'] += [
                i + simulator.num_meas * ns
                for i in agent_args['meas_for_net_init']
            ]
        agent_args['meas_for_net'] = np.array(agent_args['meas_for_net'])
    else:
        agent_args['meas_for_net'] = np.arange(historyLen * simulator.num_meas)
    if len(agent_args['meas_for_manual_init']) > 0:
        agent_args['meas_for_manual'] = np.array([
            i + simulator.num_meas * (historyLen - 1)
            for i in agent_args['meas_for_manual_init']
        ])  # current timestep is the last in the stack
    else:
        agent_args['meas_for_manual'] = []

    agent_args['state_meas_shape'] = (len(agent_args['meas_for_net']), )

    #    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
    #    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))

    #    agent = Agent(sess, agent_args)
    #    agent.load('/home/paul/Dev/GameAI/vizdoom_cig2017/icolearner/ICO1/checkpoints/ICO-8600')
    #    print("model loaded..")

    #    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
    #    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))

    img_buffer = np.zeros((historyLen, simulator.resolution[1],
                           simulator.resolution[0], num_channels),
                          dtype='uint8')

    meas_buffer = np.zeros((historyLen, simulator.num_meas))
    act_buffer = np.zeros((historyLen, 7))
    act_buffer_ico = np.zeros((agent_args['history_length_ico'], 7))
    curr_step = 0
    old_step = -1
    term = False

    print("state_meas_shape: ", meas_buffer.shape, " == ",
          agent_args['state_meas_shape'])
    print("act_buffer_shape: ", act_buffer.shape)

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                            log_device_placement=False))
    ag = Agent(sess, agent_args)

    if (os.path.isfile("checkpoints1/checkpoint")):
        ag.load(
            '/home/paul/Dev/GameAI/vizdoom_cig2017/icodoom/ICO1/checkpoints1/')
        print("model loaded..")
    else:
        print("No model file, initialising...")

    diff_y = 0
    diff_x = 0
    diff_z = 0
    diff_theta = 0
    epoch = 200
    radialFlowLeft = 30.
    radialFlowRight = 30.
    radialFlowInertia = 0.4
    radialGain = 4.
    rotationGain = 50.
    errorThresh = 10.
    updatePtsFreq = 50
    skipImage = 1
    skipImageICO = 5
    reflexGain = 0.1
    netGain = 0.  #10.
    oldHealth = 0.

    # create masks for left and right visual fields - note that these only cover the upper half of the image
    # this is to help prevent the tracking getting confused by the floor pattern
    half_height = round(height / 2)
    half_width = round(width / 2)

    maskLeft = np.zeros([height, width], np.uint8)
    maskLeft[half_height:, :half_width] = 1.
    maskRight = np.zeros([height, width], np.uint8)
    maskRight[half_height:, half_width:] = 1.

    lk_params = dict(winSize=(15, 15),
                     maxLevel=2,
                     criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT,
                               10, 0.03))
    feature_params = dict(maxCorners=500,
                          qualityLevel=0.03,
                          minDistance=7,
                          blockSize=7)

    imgCentre = np.array([
        int(simulator_args['resolution'][0] / 2),
        int(simulator_args['resolution'][1] / 2)
    ])
    print("Image centre: ", imgCentre)
    simpleInputs = np.zeros((width, height))
    input_buff = np.zeros((1, width * height))
    target_buff = np.zeros((1, 1))
    meas_buff = np.zeros((1, simulator.num_meas))
    netOut = 0.
    netErr = np.zeros((width, height))
    delta = 0.
    delta2 = 0
    dontshoot = 1
    deltaZeroCtr = 1
    curr_act = np.zeros(7).tolist()

    reflexOn = False
    iter = 0
    episodes = 1000
    simulator._game.init()

    for i in range(episodes):
        #        print ("Episode ", i)
        tc = 0
        simulator._game.new_episode()

        while (tc < 500):
            screen_buf, meas, rwrd, term = simulator.step(curr_act)
            if (screen_buf is None):
                break

            midlinex = int(width / 2)
            midliney = int(height * 0.75)
            crcb = screen_buf
            screen_left = screen_buf[0:midliney, 0:midlinex, 2]
            screen_right = screen_buf[0:midliney, midlinex:width, 2]
            screen_left = cv2.filter2D(screen_left, -1, sharpen)
            screen_right = cv2.filter2D(screen_right, -1, sharpen)
            simpleInputs = preprocess_input_images(
                np.array(np.sum(crcb, axis=2) / 3))

            #            simpleInputs = cv2.filter2D(simpleInputs, -1, edge)
            #            simpleInputs = simpleInputs - np.mean(simpleInputs)
            screen_diff = screen_left - np.fliplr(screen_right)
            screen_diff = cv2.resize(screen_diff, (width, height))
            # cv2.imwrite('/tmp/left.png',screen_left)
            # cv2.imwrite('/tmp/right.png',screen_right)
            #            cv2.imwrite("/home/paul/tmp/Images/diff-" + str(iter) + ".png", screen_diff)
            #            cv2.imwrite("/home/paul/tmp/Images/raw-" + str(iter) + ".png", crcb)

            lavg = np.average(screen_left)
            ravg = np.average(screen_right)

            shoot = 0
            if (dontshoot > 1):
                dontshoot = dontshoot - 1
            else:
                if (tc > 30):
                    shoot = 1
                    dontshoot = 5

            centre, bottomLeft, topRight, colourStrength = getMaxColourPos(
                crcb, [255, 0, 0])
            colourSteer = imgCentre[0]
            imgRect = np.zeros(crcb.shape)

            if (len(bottomLeft) > 0 and len(topRight) > 0
                    and ((topRight[0] - bottomLeft[0]) < width / 3)
                    and ((topRight[1] - bottomLeft[1]) < height / 2)):
                colourSteer = bottomLeft[0] + int(
                    0.5 * (topRight[0] - bottomLeft[0]))

            cv2.arrowedLine(imgRect, (colourSteer, imgCentre[1] + 10),
                            (colourSteer, imgCentre[1]),
                            color=(255, 255, 255),
                            thickness=2)
            #            cv2.imwrite("/home/paul/tmp/Images/simple-" + str(iter) + ".jpg", simpleInputs)
            #            cv2.imwrite("/home/paul/tmp/Images/rect-" + str(iter) + ".jpg", imgRect)
            #            cv2.imwrite("/home/paul/tmp/Images/" + str(iter) + ".jpg", crcb)

            #            cv2.imwrite("/home/paul/tmp/Images/Positive/arrow-" + str(iter) + ".jpg", imgRect)
            #            cv2.imwrite("/home/paul/tmp/Images/Positive/" + str(iter) + ".jpg", crcb)

            #            blue = cv2.filter2D(blue, -1, edge)
            #            cv2.imwrite("/home/paul/tmp/Images/Positive/blue-" + str(curr_step) + ".jpg", blue)
            meas_buff[0, :] = meas
            imgRect = np.array(np.sum(imgRect, axis=2) / 3)
            #            input_buff[0,:] = np.ndarray.flatten(imgRect)
            input_buff[0, :] = np.ndarray.flatten(imgRect)

            #            input_buff[0,:] = np.random.normal(0.0, 0.01, size=width*height)
            #            print("mean: ", np.mean(input_buff[0,:]), " var: ", np.var(input_buff[0,:]))

            if (tc > 2):
                delta = (float(colourSteer) -
                         float(imgCentre[0])) / float(width)

            else:
                delta = 0

            target_buff[...] = delta + netOut
            target_buff[...] = -0.5

            ag.act_fcnet(input_buff, meas, target_buff)
            netOut = np.ndarray.flatten(ag.ext_fcnet_output)[0].flatten()[0]

            netErr[:, :] = 0.
            diff_theta = reflexGain * delta
            #            print(tc, diff_theta, netGain*netOut, target_buff[0,0], delta)
            print(tc, reflexGain * delta, netOut)

            curr_act = np.zeros(7).tolist()
            curr_act[0] = 0
            curr_act[1] = 0
            curr_act[2] = 1
            curr_act[3] = 0.  #curr_act[3] + diff_z
            curr_act[3] = 0.
            curr_act[4] = 0
            curr_act[5] = 0
            curr_act[6] = diff_theta + netGain * netOut

            iter += 1

            if (curr_step % epoch == 0):
                ag.save(
                    '/home/paul/Dev/GameAI/vizdoom_cig2017/icodoom/ICO1/checkpoints1/BPBasic',
                    curr_step)
            curr_step += 1
            # 30 fps
            #            time.sleep(0.03)

            tc += 1

    simulator.close_game()
Exemple #8
0
def main():
    ## Simulator
    simulator_args = {}
    simulator_args['config'] = 'config/config.cfg'
    simulator_args['resolution'] = (widthIn, heightIn)
    simulator_args['frame_skip'] = 1
    simulator_args['color_mode'] = 'RGB24'
    simulator_args['game_args'] = "+name ICO +colorset 7"

    ## Agent
    agent_args = {}

    # preprocessing
    preprocess_input_images = lambda x: x / 255. - 0.5
    agent_args['preprocess_input_images'] = lambda x: x / 255. - 0.5
    agent_args['preprocess_input_measurements'] = lambda x: x / 100. - 0.5
    agent_args['num_future_steps'] = 6
    pred_scale_coeffs = np.expand_dims(
        (np.expand_dims(np.array([8., 40., 1.]), 1) * np.ones(
            (1, agent_args['num_future_steps']))).flatten(), 0)
    agent_args['meas_for_net_init'] = range(3)
    agent_args['meas_for_manual_init'] = range(3, 16)
    agent_args['resolution'] = (width, height)
    # just use grayscale for nnet inputs
    agent_args['num_channels'] = 1

    # net parameters
    agent_args['net_type'] = "fc"
    #    agent_args['net_type'] = "conv"
    agent_args['conv_params'] = np.array([(16, 5, 4), (32, 3, 2), (64, 3, 2),
                                          (128, 3, 2)],
                                         dtype=[('out_channels', int),
                                                ('kernel', int),
                                                ('stride', int)])
    agent_args['fc_img_params'] = np.array([(128, )],
                                           dtype=[('out_dims', int)])
    agent_args['fc_meas_params'] = np.array([(128, ), (128, ), (128, )],
                                            dtype=[('out_dims', int)])
    agent_args['fc_joint_params'] = np.array([(256, ), (256, ), (-1, )],
                                             dtype=[('out_dims', int)])
    agent_args['target_dim'] = agent_args['num_future_steps'] * len(
        agent_args['meas_for_net_init'])
    agent_args['n_actions'] = 7

    # experiment arguments
    agent_args['test_objective_params'] = (np.array([5, 11, 17]),
                                           np.array([1., 1., 1.]))
    agent_args['history_length'] = 3
    agent_args['history_length_ico'] = 3
    historyLen = agent_args['history_length']
    print("HistoryLen: ", historyLen)

    print('starting simulator')
    simulator = DoomSimulator(simulator_args)
    num_channels = simulator.num_channels

    print('started simulator')

    agent_args['state_imgs_shape'] = (historyLen * num_channels,
                                      simulator.resolution[1],
                                      simulator.resolution[0])

    agent_args['n_ffnet_input'] = (agent_args['resolution'][0] *
                                   agent_args['resolution'][1])
    agent_args['n_ffnet_hidden'] = np.array([50, 5])
    agent_args['n_ffnet_output'] = 1
    agent_args['n_ffnet_act'] = 7
    agent_args['n_ffnet_meas'] = simulator.num_meas
    agent_args['learning_rate'] = 1E-4

    modelDir = os.path.join(os.path.expanduser("~"),
                            "Dev/GameAI/vizdoom_cig2017/icodoom/ICO1/Models")

    if 'meas_for_net_init' in agent_args:
        agent_args['meas_for_net'] = []
        for ns in range(historyLen):
            agent_args['meas_for_net'] += [
                i + simulator.num_meas * ns
                for i in agent_args['meas_for_net_init']
            ]
        agent_args['meas_for_net'] = np.array(agent_args['meas_for_net'])
    else:
        agent_args['meas_for_net'] = np.arange(historyLen * simulator.num_meas)
    if len(agent_args['meas_for_manual_init']) > 0:
        agent_args['meas_for_manual'] = np.array([
            i + simulator.num_meas * (historyLen - 1)
            for i in agent_args['meas_for_manual_init']
        ])  # current timestep is the last in the stack
    else:
        agent_args['meas_for_manual'] = []

    agent_args['state_meas_shape'] = (len(agent_args['meas_for_net']), )

    img_buffer = np.zeros((historyLen, simulator.resolution[1],
                           simulator.resolution[0], num_channels),
                          dtype='uint8')

    meas_buffer = np.zeros((historyLen, simulator.num_meas))
    act_buffer = np.zeros((historyLen, 7))
    curr_step = 0
    term = False

    print("state_meas_shape: ", meas_buffer.shape, " == ",
          agent_args['state_meas_shape'])
    print("act_buffer_shape: ", act_buffer.shape)

    try:
        checkpointFile = open("Models/checkpoint")
        try:
            modelName = checkpointFile.read().splitlines()
            if (deepBP.loadModel(modelName[0])):
                print("loaded from Model file: ", modelName[0])
            else:
                print("FAILED loading from Model file: ", modelName[0])
        except:
            print("Checkpoint file contains no valid model")
        finally:
            checkpointFile.close
    except Exception:
        print("No checkpoint found...")

    diff_z = 0
    iter = 1
    epoch = 200
    radialFlowLeft = 30.
    radialFlowRight = 30.
    radialFlowInertia = 0.4
    radialGain = 4.
    rotationGain = 50.
    errorThresh = 10.
    updatePtsFreq = 50
    reflexGain = 1E-3
    flowGain = 0.
    netGain = 10.
    reflexReduceGain = -0.01

    # create masks for left and right visual fields - note that these only cover the upper half of the image
    # this is to help prevent the tracking getting confused by the floor pattern
    half_height = round(height / 2)
    half_width = round(width / 2)

    maskLeft = np.zeros([height, width], np.uint8)
    maskLeft[half_height:, :half_width] = 1.
    maskRight = np.zeros([height, width], np.uint8)
    maskRight[half_height:, half_width:] = 1.

    lk_params = dict(winSize=(15, 15),
                     maxLevel=2,
                     criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT,
                               10, 0.03))
    feature_params = dict(maxCorners=500,
                          qualityLevel=0.03,
                          minDistance=7,
                          blockSize=7)

    imgCentre = np.array([
        int(simulator_args['resolution'][0] / 2),
        int(simulator_args['resolution'][1] / 2)
    ])
    print("Image centre: ", imgCentre)
    rawInputs = np.zeros((height, width))
    cheatInputs = np.zeros((width, height))
    input_buff = np.zeros((width * height))
    target_buff = np.zeros((1, 1))
    meas_buff = np.zeros((1, simulator.num_meas))
    netOut = 0.
    netErr = np.zeros(nHidden[0])
    delta = 0.
    shoot = 0

    reflexOn = False
    iter = 0
    killed = False
    deepBP.saveModel("Models/hack.txt")

    while not term:
        if curr_step < historyLen:
            curr_act = np.zeros(7).tolist()
            img, meas, rwrd, term = simulator.step(curr_act)
            print("Image: ", img.shape, " max: ", np.amax(img), " min: ",
                  np.amin(img))

            if curr_step == 0:
                p0Left = cv2.goodFeaturesToTrack(img[:, :, 0],
                                                 mask=maskLeft,
                                                 **feature_params)
                p0Right = cv2.goodFeaturesToTrack(img[:, :, 0],
                                                  mask=maskRight,
                                                  **feature_params)

            img_buffer[curr_step % historyLen] = img
            meas_buffer[curr_step % historyLen] = meas
            act_buffer[curr_step % historyLen] = curr_act[:7]

        else:
            img1 = img_buffer[(curr_step - 2) % historyLen, :, :, :]
            img2 = img_buffer[(curr_step - 1) % historyLen, :, :, :]
            state = simulator._game.get_state()

            stateImg = state.screen_buffer

            if (curr_step % updatePtsFreq == 0):
                p0Left = cv2.goodFeaturesToTrack(img[:, :, 0],
                                                 mask=maskLeft,
                                                 **feature_params)
                p0Right = cv2.goodFeaturesToTrack(img[:, :, 0],
                                                  mask=maskRight,
                                                  **feature_params)

            p1Left, st, err = cv2.calcOpticalFlowPyrLK(img1[:, :, 0],
                                                       img2[:, :, 0], p0Left,
                                                       None, **lk_params)
            p1Right, st, err = cv2.calcOpticalFlowPyrLK(
                img1[:, :, 0], img2[:, :, 0], p0Right, None, **lk_params)
            flowLeft = (p1Left - p0Left)[:, 0, :]
            flowRight = (p1Right - p0Right)[:, 0, :]
            radialFlowTmpLeft = 0
            radialFlowTmpRight = 0

            for i in range(0, len(p0Left)):
                radialFlowTmpLeft += ((p0Left[i, 0, :] - imgCentre)).dot(
                    flowLeft[i, :]) / float(len(p0Left))
            for i in range(0, len(p0Right)):
                radialFlowTmpRight += ((p0Right[i, 0, :] - imgCentre)).dot(
                    flowRight[i, :]) / float(len(p0Right))

            rotation = act_buffer[(curr_step - 1) % historyLen][6]
            forward = act_buffer[(curr_step - 1) % historyLen][3]
            # keep separate radial errors for left and right fields
            radialFlowLeft = radialFlowLeft + radialFlowInertia * (
                radialFlowTmpLeft - radialFlowLeft)
            radialFlowRight = radialFlowRight + radialFlowInertia * (
                radialFlowTmpRight - radialFlowRight)
            expectFlowLeft = radialGain * forward + (rotationGain * rotation
                                                     if rotation < 0. else 0.)
            expectFlowRight = radialGain * forward - (rotationGain * rotation
                                                      if rotation > 0. else 0.)

            flowErrorLeft = forward * (expectFlowLeft - radialFlowLeft) / (
                1. + rotationGain * np.abs(rotation))
            flowErrorRight = forward * (expectFlowRight - radialFlowRight) / (
                1. + rotationGain * np.abs(rotation))
            flowErrorLeft = flowErrorLeft if flowErrorLeft > 0. else 0.
            flowErrorRight = flowErrorRight if flowErrorRight > 0. else 0.
            icoSteer = 0.

            if curr_step > 100:
                health = meas[1]
                if curr_step == 10000:
                    g = open("/home/paul/Dev/GameAI/vizdoom_cig2017/KD.txt",
                             "a")
                    g.write("Learning on\n")
                    g.close()

                if curr_step < 10000:
                    learningRate = 0.
                else:
                    learningRate = 1e-3

                if (health < 0.1):
                    reflexOn = False
                    iter = 0

                if (simulator._game.is_player_dead()) and killed == False:
                    g = open("/home/paul/Dev/GameAI/vizdoom_cig2017/KD.txt",
                             "a")
                    g.write("0\n")
                    g.close()
                    killed = True
                    print("KILLED")
                if (not (simulator._game.is_player_dead())):
                    killed = False

                # Don't run any networks when the player is dead!
                if (health < 101. and health > 0.):

                    icoInSteer = flowGain * (
                        (flowErrorRight - errorThresh) if
                        (flowErrorRight - errorThresh) > 0. else 0. -
                        flowGain * (flowErrorLeft - errorThresh) if
                        (flowErrorLeft - errorThresh) > 0. else 0.)

                    centre, bottomLeft, topRight, colourStrength = getMaxColourPos(
                        stateImg, [255, 0, 0])
                    colourSteer = imgCentre[0]

                    if (len(bottomLeft) > 0 and len(topRight) > 0
                            and ((topRight[0] - bottomLeft[0]) < width / 3)
                            and ((topRight[1] - bottomLeft[1]) < height / 2)):
                        colourSteer = bottomLeft[0] + int(
                            0.5 * (topRight[0] - bottomLeft[0]))
                        shoot = 1


#                        cv2.imwrite("/home/paul/tmp/Backup/rect-" + str(curr_step) + ".jpg", cheatInputs)

                    rawInputs = np.array(np.sum(stateImg, axis=2) / 3)
                    #                    cv2.imwrite("/home/paul/tmp/Backup/raw-" + str(curr_step) + ".jpg", rawInputs)

                    input_buff[:] = np.ndarray.flatten(rawInputs)
                    input_buff = input_buff - np.mean(input_buff)
                    input_buff = input_buff / np.sqrt(np.var(input_buff))

                    # we want the reflex to be delayed wrt to the image input, so that the image is. Otherwise the learning can
                    # never reduce the error to zero no matter how good the controller.

                    oldDelta = delta
                    if (iter > 2):
                        delta = (float(colourSteer) -
                                 float(imgCentre[0])) / float(width)
                    else:
                        delta = 0

                    deltaDiff = delta - oldDelta
                    if (iter > 2):
                        if (np.abs(delta) > 0.01):
                            shoot = 0

                    netErr[:] = delta
                    target_buff[...] = delta + netOut
                    meas_buff[0, :] = meas

                    if (deepBP.getAlgorithm() == DeepFeedbackLearning.backprop
                        ):
                        netErr = netErr[0:1]

                    deepBP.setLearningRate(0.)
                    deepBP.doStep(np.ndarray.flatten(input_buff), netErr)
                    netOut = deepBP.getOutput(0)
                    netErr += reflexReduceGain * netGain * netOut

                    deepBP.setLearningRate(learningRate)
                    deepBP.doStep(np.ndarray.flatten(input_buff), netErr)
                    netOut = deepBP.getOutput(0)

                    #                    print("%s" % (" SHOOT " if shoot == 1 else "       "), deltaDiff, delta, netOut)
                    print(curr_step, delta, netGain * netOut)

                    diff_theta = 0.6 * max(min((icoInSteer), 5.), -5.)

                    netErr[:] = 0.
                    diff_theta = diff_theta + reflexGain * colourStrength * delta
                    #                    diff_z = -1.

                    curr_act = np.zeros(7).tolist()
                    curr_act[0] = 0
                    curr_act[1] = 0
                    curr_act[2] = shoot
                    curr_act[3] = curr_act[3] + diff_z
                    curr_act[4] = 0
                    curr_act[5] = 0.
                    curr_act[6] = diff_theta + netGain * netOut

                    iter += 1

            if (curr_step % epoch == 0):

                if not os.path.exists("Models"):
                    os.makedirs("Models")
                deepBP.saveModel("Models/BP-" + str(curr_step) + ".txt")

                file = open("Models/checkpoint", 'w')
                file.write("Models/BP-" + str(curr_step) + ".txt")
                file.close()

            img, meas, rwrd, term = simulator.step(curr_act)
            if (not (meas is None)) and meas[0] > 30.:
                meas[0] = 30.

            if not term:
                img_buffer[curr_step % historyLen] = img
                meas_buffer[curr_step % historyLen] = meas
                act_buffer[curr_step % historyLen] = curr_act[:7]
        curr_step += 1

    simulator.close_game()