Ejemplo n.º 1
0
 def __init__(self, name, debug):
     self.name = name
     self.debug = debug
     self.setup_directory()
     self.score = 0
     self.score_delta = 0
     # setting up jar runner
     self.needs_reset = True
     self.pid = None
     self.done = False
     self.prev_obs = get_initial_obs(1)[0][0]
     self.clock = Stopwatch()
     self.step_num = 0
Ejemplo n.º 2
0
    def get_spatial(self, net):
        '''
        Gets the spatial action of the network
        '''
        if self.debug:
            log("getting spatial action")
            s = Stopwatch()
        net = tf.layers.conv2d(self.spatial,
                               32, [3, 3],
                               strides=1,
                               padding='SAME',
                               activation=tf.nn.relu,
                               name="finalConv")
        net = tf.layers.conv2d(net,
                               1, [1, 1],
                               strides=1,
                               padding='SAME',
                               name="conv1x1")

        flat = tf.layers.flatten(net)
        dist = tf.distributions.Categorical(logits=flat)
        sample = dist.sample()

        coords = tf.unravel_index(sample, [self.rows, self.columns / 2])

        if self.debug:
            log("Finished spatial action inference. Took: " + s.delta)
        return coords
Ejemplo n.º 3
0
    def add_non_spatial(self, net):
        '''
        Infers the non-spatial action of the network
        '''
        if self.debug:
            log("Getting non-spatial action")
            s = Stopwatch()

        net = custom_layers.add_inception_resnet_B(net, '1a0')
        net = custom_layers.add_inception_resnet_B(net, '2a0')

        net = Layers.Conv2D(8, [1, 1],
                            strides=1,
                            padding='SAME',
                            activation=tf.nn.relu,
                            name="non_spat_conv2")(net)
        flatten = Layers.Flatten()(net)
        non_spatial = Layers.Dense(256,
                                   activation=tf.nn.relu,
                                   name="non_spatial")(flatten)
        a0_logits = Layers.Dense(constants.n_base_actions,
                                 name="a0")(non_spatial)

        a0_sampled = SampleCategoricalLayer()(a0_logits)

        if self.debug:
            log("Finished non-spatial action. Took: " + s.delta)

        return a0_sampled
Ejemplo n.º 4
0
    def generate_action(self):
        '''
        Scud model estimator
        '''
        if self.debug:
            log("Running conv2d on " + device)
        with tf.device('/' + device + ':0'):
            net = self.add_base()
            #print("state shape: ", net.shape) # (1, 20, 20, 32)

        ## split into non-spatial and spatial action path
        a0 = self.get_non_spatial(net)
        building = int(a0)  # now an int between 0 and 3
        if self.debug:
            log("a0 = " + str(a0))

        coords = self.get_spatial(net)
        x = int(coords[0])
        y = int(coords[1])
        if self.debug:
            log("x, y = " + str(x) + ", " + str(y))

        ## loading the state (for RNN stuffs)
        if self.debug:
            log("Loading state")
            sss = Stopwatch()
        _ = np.load('scudstate.npy')  # takes ~ 0.031s
        if self.debug:
            log("State loaded. Took: " + sss.delta)

        ## saving the state (for RNN stuffs)
        if self.debug:
            log("Saving state")
            ss = Stopwatch()
        new_state = net
        np.save('scudstate.npy', new_state)
        if self.debug:
            log("State saved. Took: " + ss.delta)

        #util.write_action(x,y,building)
        return x, y, building
Ejemplo n.º 5
0
    def add_base(self):
        if self.debug:
            log("Adding base")
            s = Stopwatch()
        with tf.name_scope("adding_base") as scope:
            net = self.spatial
            for i in range(2):
                net = tf.layers.conv2d(net,
                                       32, [3, 3],
                                       strides=1,
                                       padding='SAME',
                                       activation=tf.nn.relu,
                                       name="conv" +
                                       str(i))  # ok well this takes 5 seconds

        if self.debug:
            log("Finished adding base. Took: " + s.delta)

        return net
Ejemplo n.º 6
0
    def get_non_spatial(self, net):
        '''
        Infers the non-spatial action of the network
        '''
        if self.debug:
            log("Getting non-spatial action")
            s = Stopwatch()
        non_spatial = tf.layers.dense(tf.layers.flatten(net),
                                      256,
                                      activation=tf.nn.relu,
                                      name="non_spatial")
        a0 = tf.layers.dense(non_spatial, n_base_actions, name="a0")

        # TODO: possibly softmax this and then transform it into an int from 0 - 4
        # possibly use tf autoregressive distribution
        dist = tf.distributions.Categorical(logits=a0)
        sample = dist.sample()

        if self.debug:
            log("Finished non-spatial action. Took: " + s.delta)

        return sample
Ejemplo n.º 7
0
    def add_spatial(self, net, a0):
        '''
        Gets the spatial action of the network
        '''
        if self.debug:
            log("getting spatial action")
            s = Stopwatch()

        one_hot_a0 = OneHotLayer(constants.n_base_actions)(a0)

        k = net.get_shape().as_list()
        broadcast_stats = Layers.RepeatVector(int(k[1] * k[2]))(one_hot_a0)
        broadcast_stats2 = Layers.Reshape(
            (k[1], k[2], constants.n_base_actions))(broadcast_stats)
        net = Layers.concatenate([net, broadcast_stats2],
                                 axis=-1)  # (?, 8, 8, 38)

        net = Layers.Conv2D(64, [3, 3],
                            strides=1,
                            padding='SAME',
                            activation=tf.nn.relu,
                            name="finalConv")(net)

        net = custom_layers.add_inception_resnet_B(net, '1a1')
        net = custom_layers.add_inception_resnet_B(net, '2a1')

        net = Layers.Conv2D(1, [1, 1],
                            strides=1,
                            padding='SAME',
                            name="conv1x1")(net)

        logits = Layers.Flatten()(net)

        a1_sampled = SampleCategoricalLayer()(logits)

        if self.debug:
            log("Finished spatial action inference. Took: " + s.delta)
        return a1_sampled
Ejemplo n.º 8
0
                  str(path)[-50:])

    def load(self, filepath, savename):
        if savename is None:
            path = os.path.join(filepath, str(self.name) + '.h5')
        else:
            if savename.endswith('.h5') == False:
                path = os.path.join(filepath, str(savename) + '.h5')
            else:
                path = os.path.join(filepath, str(savename))
        self.model = tf.keras.models.load_model(
            path, custom_objects=custom_keras_layers)
        if self.refbot_position != -1:
            print(">> SCUD >> ", self.name, "(refbot pos ",
                  self.refbot_position, ") had model restored from file ",
                  str(path)[-50:])
        else:
            print(">> SCUD >> ", self.name, " had model restored from file ",
                  str(path)[-50:])

    def __str__(self):
        return "SCUD2 [Name: {:20} | Masking: {:3} | Refbot pos: {:2d}]".format(
            self.name, self.mask_output, self.refbot_position)


if __name__ == '__main__':
    k = Stopwatch()
    s = Scud('www', debug=True)
    we = s.get_flat_weights()
    log("Round-time was {}".format(k.delta))
Ejemplo n.º 9
0
def main(mode):
    sTime = Stopwatch()
    env_names = ['env' + str(i) for i in range(n_envs)]

    if mode in ['test', 'rank']:
        train = False
    else:
        train = True
    if mode == 'resume':
        resume_training = True
    else:
        resume_training = False

    def make_env(name):
        def env_fn():
            env_inside = Env(name, console_debug)

            return env_inside

        return env_fn

    print('>> manager >> creating envs')
    s = Stopwatch()
    try:
        env = SubprocEnvManager([make_env(s) for s in env_names])
    except EOFError as e:
        print("caught an EOFError ", e, '\nClosing the env now')
        env.close()
        return
    print('>> manager >> created envs. Took ', s.delta)
    no_act_vec = [constants.no_op_action for _ in range(n_envs)]

    # TODO:
    # obs = np.zeros() # some initial state
    if train:
        try:
            storm.train(env, n_envs, no_act_vec, resume_training)
        except Exception as err:
            try:
                exc_info = sys.exc_info()

            finally:
                traceback.print_exception(*exc_info)
                del exc_info
        finally:
            print('>> manager >> closing env. Total runtime: ', sTime.delta)
            env.close()
            sys.exit(0)
    elif mode == 'rank':
        try:
            print("Getting MMR ranks")
            runner.mmr_from_checkpoints(env)
            print("Finished getting ranks")
        except Exception as err:
            try:
                exc_info = sys.exc_info()

            finally:
                traceback.print_exception(*exc_info)
                del exc_info
        finally:
            print('>> manager >> closing env. Total runtime: ', sTime.delta)
            env.close()
            sys.exit(0)
    else:
        try:
            actions = no_act_vec
            agents = [Scud(name=str(i), debug=False) for i in range(n_envs)]
            print(agents[0].model.count_params())
            # checkpoint_names = os.listdir(util.get_savedir('checkpoints'))
            # checkpoint_names = sorted(checkpoint_names, reverse=True)

            #agents[0].load(util.get_savedir('checkpoints'), 'gen50elite.h5')
            #agents[0].load(util.get_savedir(), 'scudsave')
            #agents[0].save(util.get_savedir(), 'scudsave')

            refbot = Scud('ref', False)
            env.reset()
            obs = util.get_initial_obs(n_envs)
            #print("manager obs shape = ", ob.shape)
            ref_act = None
            for i in range(5):
                ss = Stopwatch()

                #print(rews)
                #print("obs shape", obs.shape)
                #print("obs[:, 1] shape = ", obs[:, 1].shape) # the column of refbot obs
                try:
                    sss = Stopwatch()
                    actions = [
                        agent.step(obs[j][0]) for j, agent in enumerate(agents)
                    ]
                    print("running agents NN :", sss.delta)
                    sss.reset()
                    ref_act = refbot.step(obs[:, 1], batch_predict=True)
                    #ref_act = [refbot.step(obs[i][1]) for i in range(len(agents))]
                    print("running refbot NN :", sss.delta)
                    #ref_act = [StarterBotPrime.step(obs[j][1]) for j in range(n_envs)]
                except TypeError as e:
                    try:
                        exc_info = sys.exc_info()

                    finally:
                        traceback.print_exception(*exc_info)
                        del exc_info
                    print("TypeError!!! ", e)
                    break

                print(
                    ">> manager >> step {}, taking actions: {} and refactions {}"
                    .format(i, actions, ref_act))
                ssss = Stopwatch()
                obs, rews, infos = env.step(actions,
                                            ref_act)  # obs is n_envs x 1
                print("Running env : ", ssss.delta)
                print('>> manager >> just took step {}. Took: {}'.format(
                    i, ss.delta))
                time.sleep(0.1)

            runner.run_battle(agents[0], refbot, env)

        except Exception as err:
            try:
                exc_info = sys.exc_info()

            finally:
                traceback.print_exception(*exc_info)
                del exc_info
        finally:
            print('>> manager >> closing env. Total runtime: ', sTime.delta)
            env.close()
            sys.exit(0)
    # gets all the variables of the model
    # all_variables = agents[0].model.get_weights()

    print('>> manager >> closing env. Total runtime: ', sTime.delta)
    env.close()
    sys.exit(0)
Ejemplo n.º 10
0
def train(env, n_envs, no_op_vec, resume_trianing):
    print(str('=' * 50) + '\n' + 'Initializing agents\n' + str('=' * 50))
    ##############################
    ## Summary buckets
    #failed_episodes = 0
    #early_episodes = 0
    refbot_back_ind = 1
    elite_overthrows = 0
    elite = None
    starting_gen = 0  # default startin generation number. Is overwritten if resuming
    ## Setting up logs
    writer = summary.create_file_writer(util.get_logdir('train12A'),
                                        flush_millis=10000)
    writer.set_as_default()
    global_step = tf.train.get_or_create_global_step()

    ## TODO: change agent layers to use xavier initializer
    agents = [Scud(name=str(i), debug=scud_debug) for i in range(n_population)]
    total_steps = 0

    elite_moving_average = metrics.MovingAverage(
        elite_score_moving_avg_periods)
    next_generation = [
        Scud(name=str(i) + 'next', debug=scud_debug)
        for i in range(n_population)
    ]

    refbot_queue = [
        Scud(name='refbot' + str(i), debug=scud_debug)
        for i in range(refbot_queue_length)
    ]
    for i, bot in enumerate(refbot_queue):
        bot.refbot_position = i
    refbot = refbot_queue[0]

    ## DOES NOT WORK WITH EAGER EXECUTION
    # with summary.always_record_summaries():
    #     summary.graph(agents[0].model.graph)
    total_s = Stopwatch()
    ########################################
    ## Restoring from last training session
    if resume_trianing:
        # loading up config from last train finish
        print("Restoring progress config from last run...")
        config_path = os.path.join(util.get_savedir(), 'progress.json')
        conf = json.load(open(config_path, 'r'))

        starting_gen = conf['gen_at_end'] + 1
        elite_overthrows = conf['elite_overthrows']
        total_steps = conf['total_steps']
        total_s.startime = conf['clock_start_time']
        global_step.assign(starting_gen)

        # Loading truncs, elite and refbot
        print(
            str('=' * 50) + '\n' + '>> STORM >> Resuming training.\n' +
            str('=' * 50))
        trunc_names = os.listdir(util.get_savedir('truncFinals'))
        trunc_names = sorted(trunc_names, reverse=True)

        for j in range(trunc_size):
            if j < len(trunc_names):
                agents[j + 1].load(util.get_savedir('truncFinals'),
                                   trunc_names[j])
            else:
                print("Skipping loading trunc agent for j = ", j)

        refbot_names = os.listdir(util.get_savedir('refbots'))
        refbot_names = sorted(refbot_names, reverse=False)
        refbot_q_names = refbot_names[-refbot_queue_length:]
        # sec = 0
        # for i in range(5, refbot_queue_length):
        #     refbot_queue[i].load(util.get_savedir('refbots'), refbot_q_names[sec])
        #     refbot_queue[i].refbot_position = i
        #     sec = sec + 1
        for i in range(refbot_queue_length):
            refbot_queue[i].load(util.get_savedir('refbots'),
                                 refbot_q_names[i])
            refbot_queue[i].refbot_position = i

        elite = agents[0]
        elite.load(util.get_savedir(), 'elite')

        print(">> STORM >> Successfully restored from last checkpoints")

    print(
        str('=' * 50) + '\n' + 'Beginning training (at gen ' +
        str(starting_gen) + ')\n' + str('=' * 50))
    s = Stopwatch()

    #partition_stopwatch = Stopwatch()
    for g in range(starting_gen, starting_gen + n_generations):
        #####################
        ## Hyperparameter annealing
        # gamma = gamma_func((g+1)/n_generations)

        #####################
        ## GA Algorithm
        for i in range(n_population):
            if g == 0:
                break
            else:
                kappa = random.sample(agents[0:trunc_size], 1)
                mutate(kappa[0], next_generation[i], g)
        #partition_stopwatch.lap('mutation')
        # swap agents and the next gen's agents. i.e set next gen agents to be current agents to evaluate
        tmp = agents
        agents = next_generation
        next_generation = tmp

        # evaluate fitness on each agent in population
        try:
            agents, additional_steps, rollout_info = evaluate_fitness(
                env, agents, refbot, debug=False)
        except KeyboardInterrupt as e:
            print(
                "Received keyboard interrupt {}. Saving and then closing env.".
                format(e))
            break
        total_steps += additional_steps

        # sort them based on final discounted reward
        agents = sorted(agents,
                        key=lambda agent: agent.fitness_score,
                        reverse=True)

        #partition_stopwatch.lap('fitness evaluation + sorting')

        ##################################
        ## Summary information
        with summary.always_record_summaries():
            sc_vec = [a.fitness_score for a in agents]
            summary.scalar('rewards/mean', np.mean(sc_vec))
            summary.scalar('rewards/max', agents[0].fitness_score)
            summary.scalar('rewards/min', agents[-1].fitness_score)
            summary.scalar('rewards/var', np.var(sc_vec))
            summary.scalar('rewards/truc_mean', np.mean(sc_vec[:trunc_size]))
            summary.scalar('hyperparameters/gamma', gamma)

            summary.scalar('main_rollout/agentWins', rollout_info['agentWins'])
            summary.scalar('main_rollout/refbotWins',
                           rollout_info['refbotWins'])
            summary.scalar('main_rollout/ties', rollout_info['ties'])
            summary.scalar('main_rollout/early_eps', rollout_info['early_eps'])
            summary.scalar('main_rollout/failed_eps',
                           rollout_info['failed_eps'])

            if len(rollout_info['ep_lengths']) > 0:
                mean_ep_lengg = np.mean(rollout_info['ep_lengths'])
                summary.histogram('main_rollout/ep_lengths',
                                  rollout_info['ep_lengths'])
                summary.scalar('main_rollout/mean_ep_length', mean_ep_lengg)
                print("Mean ep length: ", mean_ep_lengg)

            if len(rollout_info['agent_actions']) > 0:
                summary.histogram('main_rollout/agent_a0',
                                  rollout_info['agent_actions'])
                summary.histogram('main_rollout/agent_a0_first15steps',
                                  rollout_info['agent_early_actions'])

        print("Main stats: agent wins - {} | refbot wins - {} | Early - {}".
              format(rollout_info['agentWins'], rollout_info['refbotWins'],
                     rollout_info['early_eps']))
        for a in agents[:5]:
            print(a.name, " with fitness score: ", a.fitness_score)

        ############################################
        ## Evaluating elite candidates to find elite

        #partition_stopwatch.lap('summaries 1')
        # setup next generation parents / elite agents
        if g == 0:
            if resume_trianing == False:
                elite_candidates = set(agents[0:n_elite_in_royale])
            else:
                elite_candidates = set(agents[0:n_elite_in_royale - 1]) | set([
                    elite,
                ])
        else:
            elite_candidates = set(agents[0:n_elite_in_royale - 1]) | set([
                elite,
            ])
        # finding next elite by battling proposed elite candidates for some additional rounds
        #print("Evaluating elite agent...")
        inds = np.random.random_integers(0, refbot_queue_length - 1, 4)
        refbots_for_elite = [refbot_queue[lolno] for lolno in inds]
        elo_ags, additional_steps, rollout_info = evaluate_fitness(
            env,
            elite_candidates,
            refbots_for_elite,
            runs=elite_additional_episodes)
        total_steps += additional_steps
        elo_ags = sorted(elo_ags,
                         key=lambda agent: agent.fitness_score,
                         reverse=True)
        if elite != elo_ags[0]:
            elite_overthrows += 1
        elite = elo_ags[0]

        #partition_stopwatch.lap('elite battle royale')

        try:
            agents.remove(elite)
            agents = [
                elite,
            ] + agents
        except ValueError:
            agents = [
                elite,
            ] + agents[:len(agents) - 1]

        print("Elite stats: agent wins - {} | refbot wins - {} | Early - {}".
              format(rollout_info['agentWins'], rollout_info['refbotWins'],
                     rollout_info['early_eps']))
        for i, a in enumerate(elo_ags):
            print('Elite stats: pos', i, '; name: ', a.name,
                  " ; fitness score: ", a.fitness_score)

        ############################
        ## Summary information 2
        with summary.always_record_summaries():
            elite_moving_average.push(elite.fitness_score)
            summary.scalar('rewards/elite_moving_average',
                           elite_moving_average.value())
            summary.scalar('rewards/elite_score', elite.fitness_score)
            summary.scalar('rewards/stable_mean',
                           np.mean([a.fitness_score for a in elo_ags]))
            summary.scalar('time/wall_clock_time', total_s.deltaT())
            summary.scalar('time/single_gen_time', s.deltaT())
            summary.scalar('time/total_game_steps', total_steps)
            summary.scalar('time/elite_overthrows', elite_overthrows)

            summary.scalar('elite_rollout/agentWins',
                           rollout_info['agentWins'])
            summary.scalar('elite_rollout/refbotWins',
                           rollout_info['refbotWins'])
            summary.scalar('elite_rollout/ties', rollout_info['ties'])
            summary.scalar('elite_rollout/early_eps',
                           rollout_info['early_eps'])
            summary.scalar('elite_rollout/failed_eps',
                           rollout_info['failed_eps'])

            if len(rollout_info['ep_lengths']) > 0:
                mean_ep_lengE = np.mean(rollout_info['ep_lengths'])
                summary.histogram('elite_rollout/ep_lengths',
                                  rollout_info['ep_lengths'])
                summary.scalar('elite_rollout/mean_ep_length', mean_ep_lengE)
                print("Elite mean ep length: ", mean_ep_lengE)

            if len(rollout_info['agent_actions']) > 0:
                summary.histogram('elite_rollout/agent_a0',
                                  rollout_info['agent_actions'])
                summary.histogram('elite_rollout/agent_a0_first15steps',
                                  rollout_info['agent_early_actions'])

            summary.scalar('hyperparameters/refbot_back_ind', refbot_back_ind)

        #################################
        ## Replacing reference bot
        if g % replace_refbot_every == 0:
            toback = refbot
            del refbot_queue[0]

            refbot_back_ind = np.random.random_integers(
                0, refbot_queue_length - 1)
            print(
                str('=' * 50) + '\n' +
                '>> STORM >> Upgrading refbot (to pos ' +
                str(refbot_back_ind) + ') now.\n' + str('=' * 50))
            #good_params = agents[trunc_size-1].get_flat_weights()
            good_params = agents[np.random.random_integers(
                0, trunc_size - 1)].get_flat_weights()
            toback.set_flat_weights(good_params)

            refbot_queue.append(toback)
            #refbot = refbot_queue[0]
            ################
            ## Sampling refbot uniformly from past <refbot_queue_length> generation's agents
            refbot = refbot_queue[refbot_back_ind]

            for meme_review, inner_refbot in enumerate(refbot_queue):
                inner_refbot.refbot_position = meme_review

            #for bot in refbot_queue:
            #    print("Bot ", bot.name, ' now has refbot pos: ', bot.refbot_position)

        #################################
        ## Saving agents periodically
        if g % save_elite_every == 0 and g != 0:
            elite.save(util.get_savedir('checkpoints'),
                       'gen' + str(g) + 'elite')
            if refbot_queue_length < 5:
                for refAgent in refbot_queue:
                    refAgent.save(
                        util.get_savedir('refbots'),
                        'gen' + str(g) + 'pos' + str(refAgent.refbot_position))

            if trunc_size < 5:
                for i, truncAgent in enumerate(agents[:trunc_size]):
                    truncAgent.save(util.get_savedir('truncs'),
                                    'gen' + str(g) + 'agent' + str(i))

        global_step.assign_add(1)

        print(
            str('=' * 50) + '\n' + 'Generation ' + str(g) + '. Took  ' +
            s.delta + '(total: ' + total_s.delta + ')\n' + str('=' * 50))
        s.reset()
        #partition_stopwatch.lap('summaries 2 and updates/saves')

    ###############################
    ## Shutdown behavior

    #print("PARTITION STOPWATCH RESULTS:") # last i checked runtime is *dominated*
    #partition_stopwatch.print_results()
    elite.save(util.get_savedir(), elite_savename)
    summary.flush()
    for i, ag in enumerate(agents[:trunc_size]):
        ag.save(util.get_savedir('truncFinals'), 'finalTrunc' + str(i))

    print("End refbot queue: ", len(refbot_queue))
    for identity, refAgent in enumerate(refbot_queue):
        refAgent.save(util.get_savedir('refbots'),
                      'finalRefbot{:03d}'.format(identity))

    ##########################
    ## Saving progress.config
    conf = {}
    conf['gen_at_end'] = g
    conf['gamma_at_end'] = gamma
    conf['elite_overthrows'] = elite_overthrows
    conf['total_steps'] = total_steps
    conf['clock_start_time'] = total_s.startime
    path = os.path.join(util.get_savedir(), 'progress.json')
    with open(path, 'w') as config_file:
        config_file.write(json.dumps(conf))
    print(">> STORM >> Saved progress.config to: ", path)
Ejemplo n.º 11
0
def evaluate_fitness(env, agents, refbot, runs=1, debug=False):
    '''
    Function to run [agents] in the [env] for [runs] number of times each. 
    i.e performs rollouts of each agent [runs] number of times.

    - agents : list of agents on which to evaluate rollouts
    - env : env to run agents through
    - refbot : agent which will be player B for all agents
    - runs : int number of times each agent should play a rollout
    '''
    if type(refbot) == np.ndarray:
        assert (runs * len(agents)) % len(
            refbot) == 0, "Please don't be stupid. refbots={}".format(refbot)

    queue = list(agents)
    queue = runs * queue
    init_length = len(queue)
    n_envs = env.num_envs
    print(">> ROLLOUTS >> Running rollout wave with queue length  ",
          init_length)
    pbar = metrics.ProgressBar(init_length)
    interior_steps = 0
    rollout_info = {
        'early_eps': 0,
        'failed_eps': 0,
        'agentWins': 0,
        'refbotWins': 0,
        'ties': 0,
        'ep_lengths': [],
        'agent_actions': [],
        'agent_early_actions': []
    }

    next_refbot = refbot

    while len(queue) > 0:
        pbar.show(init_length - len(queue))

        if len(queue) >= n_envs:
            cur_playing_agents = [queue.pop() for i in range(n_envs)]
        else:
            cur_playing_agents = [queue.pop() for i in range(len(queue))]

        step = 0
        dummy_actions = [(
            0,
            0,
            3,
        ) for _ in range(n_envs - len(cur_playing_agents))]
        suc = env.reset()
        if all(suc) == False:
            print("something f****d out. Could not reset all envs.")
            return

        #obs = env.get_base_obs()
        obs = util.get_initial_obs(n_envs)

        if type(next_refbot) == np.ndarray or type(next_refbot) == list:
            next_refbot = refbot.pop()

        for a in cur_playing_agents:
            a.fitness_score = 0
            a.mask_output = False

        while step < max_episode_length:
            if debug:
                ss = Stopwatch()
            actions = [
                agent.step(obs[i][0])
                for i, agent in enumerate(cur_playing_agents)
            ]

            #ref_actions = [refbot.step(obs[i][1]) for i in range(len(obs))]
            ref_actions = next_refbot.step(obs[:, 1], batch_predict=True)

            if len(dummy_actions) > 0:
                actions.extend(dummy_actions)

            if len(actions) != len(ref_actions):
                print("LEN OF ACTIONS != LEN OF REF ACTIONS!!!!")
                raise ValueError

            if debug:
                print(">> storm >> taking actions: ", actions,
                      ' and ref actions ', ref_actions)

            obs, rews, ep_infos = env.step(actions, p2_actions=ref_actions)

            interior_steps += n_envs
            ## TODO: loop through obs and check which one is a ControlObj, and stop processing the agents for the rest of that episode
            failure = False
            for i, a in enumerate(cur_playing_agents):
                if type(rews[i][0]) == util.ControlObject:
                    if rews[i][0].code == "EARLY":
                        a.mask_output = True
                        if step == max_episode_length - 1:
                            rollout_info['early_eps'] += 1

                    elif rews[i][0].code == "FAILURE":
                        # redo this whole f*****g batch
                        rollout_info['failed_eps'] += 1
                        failure = True
                        break
                else:
                    inner_rew = rews[i][0]
                    if 'valid' in ep_infos[i].keys():
                        if ep_infos[i]['valid'] == False:
                            if scoring_method == 'binary':
                                inner_rew += invalid_act_penalty_binary
                            else:
                                inner_rew += invalid_act_penalty_dense

                    a.fitness_score = inner_rew + gamma * a.fitness_score
                    _, _, building_act = actions[i]
                    rollout_info['agent_actions'].append(building_act)
                    if step < 15:
                        rollout_info['agent_early_actions'].append(
                            building_act)

                if 'winner' in ep_infos[i].keys():
                    if ep_infos[i]['winner'] == 'A':
                        rollout_info['agentWins'] += 1
                    elif ep_infos[i]['winner'] == 'B':
                        rollout_info['refbotWins'] += 1
                    else:
                        rollout_info['ties'] += 1
                    rollout_info['ep_lengths'].append(ep_infos[i]['n_steps'])

            if failure:
                curQlen = len(queue)
                queue = cur_playing_agents + queue
                print(
                    "Failure detected. Redoing last batch... (len Q before = ",
                    curQlen, ' ; after = ', len(queue), ')')
                break

            if debug:
                print("obs shape = ", obs.shape)
                print("rews shape = ", rews.shape)
                print('>> storm >> just took step {}. Took: {}'.format(
                    step, ss.delta))
            step = step + 1

        for a in cur_playing_agents:
            a.fitness_averaging_list.append(a.fitness_score)

    for a in agents:
        a.squash_fitness_scores()

    pbar.close()

    return agents, interior_steps, rollout_info
Ejemplo n.º 12
0
class Env():

    def __init__(self, name, debug):
        self.name = name
        self.debug = debug
        self.setup_directory()
        self.score = 0
        self.score_delta = 0
        # setting up jar runner
        self.needs_reset = True
        self.pid = None
        self.done = False
        self.prev_obs = get_initial_obs(1)[0][0]
        self.clock = Stopwatch()
        self.step_num = 0

    def setup_directory(self):
        # creates the dirs responsible for this env, 
        # and moves a copy of the runner and config to that location
        
        print("Setting up file directory for " + self.name + " with pid " + str(os.getpid()))
        basedir = os.path.dirname(os.path.abspath(__file__)) # now in scudstorm dir
        self.run_path = os.path.join(basedir, 'runs', self.name)
        if os.path.isdir(self.run_path):
            shutil.rmtree(self.run_path)
        self.wrapper_path = os.path.join(self.run_path, 'jar_wrapper.py')
        os.makedirs(self.run_path, exist_ok=True)
        jarpath = os.path.join(basedir, jar_name)
        copy2(jarpath, self.run_path)
        config_path = os.path.join(basedir, config_name)
        copy2(config_path, self.run_path)
        wrapper_path = os.path.join(basedir, 'common', 'jar_wrapper.py')
        copy2(wrapper_path, self.run_path)
        botdir = os.path.join(basedir, bot_file_name)
        copy2(botdir, self.run_path)
        copy2(os.path.join(basedir, game_config_name), self.run_path)

        # Copying over reference bot
        self.refbot_path = os.path.join(self.run_path, 'refbot')

        if os.path.isdir(self.refbot_path):
            shutil.rmtree(self.refbot_path)
        refbotdir = os.path.join(basedir, 'refbot')
        shutil.copytree(refbotdir, self.refbot_path)

        self.in_file = os.path.join(self.run_path, wrapper_out_filename)
        self.state_file = os.path.join(self.run_path, state_name)
        self.bot_file = os.path.join(self.run_path, bot_file_name)
        self.proc = None
        self.refenv = RefEnv(self, debug=self.debug)
        
        with open(self.in_file, 'w') as f:
            f.write('0')
        # run path should now have the jar, config and jar wrapper files

    def step(self, action, ref_act):
        #############################
        ## Maintenence on the process
        if self.needs_reset:
            self.reset()

        #######################
        ## Debug messages
        if self.debug:
            with open(os.path.join(self.run_path, 'mylog.txt'), 'a') as f:
                f.write(str(time.time()) + "\t-->Wanting to do op:!!!\t" + str(action) + '\t' + str(ref_act) + '\n')

            with open(os.path.join(self.refbot_path, 'mylog.txt'), 'a') as f:
                f.write(str(time.time()) + "\t-->Wanting to do op:!!!\t" + str(ref_act) + '\n')
        ep_info = {}
        #######################
        ## Writing actions
        x2, y2, build2 = ref_act
        write_prep_action(x2, y2, build2, path=self.refbot_path, debug=self.debug)

        x, y, build = action
        write_prep_action(x, y, build, path=self.run_path, debug=self.debug)

        #######################
        ## Signalling to jar wrappers to begin their running step
        with open(self.in_file, 'w') as f:
            # we want start of a new step
            if self.debug:
                print(">> pyenv {} >> writing 2 to file {}".format(self.name, self.in_file))
            f.write('2')

        with open(self.refenv.in_file, 'w') as f:
            # we want start of a new step
            if self.debug:
                print(">> pyenv {} >> writing 2 to file {}".format(self.refenv.name, self.refenv.in_file))
            f.write('2')

        #######################
        ## Checking if episode ended early
        if self.proc.poll() != None and self.done == True:
            # env ended last step, so reset:
            if self.debug:
                print(">> PYENV ", self.name ," >>  Ended early")
            cntrl_obj = ControlObject('EARLY')
            tp = np.concatenate([np.asarray([cntrl_obj,]), np.asarray([cntrl_obj,])], axis=-1)
            return tp, np.concatenate([np.asarray([cntrl_obj,]), np.asarray([cntrl_obj,])], axis=-1), ep_info

        #######################
        ## Taking step

        # Vars for Env
        obs = None
        should_load_obs = False
        reward = None
        # Vars for ref env
        ref_obs = None
        should_load_obs2 = False
        # Waiting for responses from the jar wrappers
        stopw = Stopwatch()
        failure = False
        while True:
            if should_load_obs == False:
                with open(self.in_file, 'r') as ff:
                    k = ff.read()
                    try:
                        k = int(k)
                    except ValueError:
                        continue
                    if k == 1:
                        #print("just wrote 0 to the ", self.out_file)
                        # a new turn has just been processed
                        should_load_obs = True

            if should_load_obs2 == False:
                with open(self.refenv.in_file, 'r') as ff:
                    k2 = ff.read()
                    try:
                        k2 = int(k2)
                    except ValueError:
                        continue
                    if k2 == 1:
                        #print("just wrote 0 to the ", self.out_file)
                        # a new turn has just been processed
                        should_load_obs2 = True

            if should_load_obs == True and should_load_obs2 == True:
                break
            
            if self.proc.poll() != None and self.done == False:
                #ep ended early.
                if self.debug:
                    print("PYENV: >> GAME ENDING EARLY FOR THE FIRST TIME")
                self.done = True

                valid, reason = is_valid_action(action, self.prev_obs)
                obs = self.load_state()
                self.prev_obs = obs
                
                ep_info['n_steps'] = self.step_num

                if valid == True:
                    ep_info['valid'] = True
                else:
                    ep_info['valid'] = False

                ref_obs = self.refenv.load_state()
                if obs['players'][0]['playerType'] == 'A':
                    a_hp = obs['players'][0]['health']
                    b_hp = obs['players'][1]['health']
                else:
                    a_hp = obs['players'][1]['health']
                    b_hp = obs['players'][0]['health']
                k = np.asarray([obs,])
                u = np.asarray([ref_obs,])
                return_obs = np.concatenate([k, u], axis=-1)
                if reward_mode == 'dense':
                    win_reward = dense_win_reward
                    lose_reward = -1 * dense_win_reward
                else:
                    win_reward = binary_win_reward
                    lose_reward = -1 * binary_win_reward
                if a_hp > b_hp:
                    # player a wins
                    ep_info['winner'] = 'A'
                    return return_obs, np.concatenate([np.asarray([win_reward,]), np.asarray([lose_reward,])], axis=-1), ep_info
                elif a_hp < b_hp:
                    ep_info['winner'] = 'B'
                    return return_obs, np.concatenate([np.asarray([lose_reward,]), np.asarray([win_reward,])], axis=-1), ep_info
                else:
                    ep_info['winner'] = 'TIE'
                    return return_obs, np.concatenate([np.asarray([0.0,]), np.asarray([0.0,])], axis=-1), ep_info

            if stopw.deltaT() > 3:
                # we have waited more than 3s, game clearly ended
                self.needs_reset = True
                failure = True
                print('pyenv: env ' + str(self.name) + ' with pid ' + str(self.pid) + ' encountered error. (', should_load_obs, ',',should_load_obs2, ')' , time.time())
                break

            time.sleep(0.01)
        # TODO: possibly pre-parse obs here and derive a reward from it?
        
        #########################
        ## Loading the obs if their jar's ended properly
        #ref_obs, _ = self.refenv.step(ref_act)
        if should_load_obs:
            valid, reason = is_valid_action(action, self.prev_obs)
            obs = self.load_state()
            self.prev_obs = obs

            if valid == True:
                ep_info['valid'] = True
            else:
                ep_info['valid'] = False
        if should_load_obs2:
            ref_obs = self.refenv.load_state()

        if obs is None and self.debug == True:
            print(">> PY_ENV >> MAIN OBS IS NONE (", self.name, ")")

        if ref_obs is None:
            print(">> PY_ENV >> REF OBS IS NONE. (", self.name, ")")

        if failure == True:
            cntrl_obj = ControlObject('FAILURE')
            tp = np.concatenate([np.asarray([cntrl_obj,]), np.asarray([cntrl_obj,])], axis=-1)
            return tp, np.concatenate([np.asarray([cntrl_obj,]), np.asarray([cntrl_obj,])], axis=-1), ep_info

        # print('-----A------------->', obs['players'][0]['health'])
        # print('-----B------------->', obs['players'][1]['health'])
        self.step_num += 1

        ########################
        ## Forming rewards and packaging the obs into a good numpy form
        if obs is not None:
            # Infer reward:
            #reward = float(obs['players'][0]['score']) - float(obs['players'][1]['score'])
            curS = float(obs['players'][0]['score']) * general_reward_scaling_factor
            self.score_delta = curS - self.score
            reward = self.score_delta + per_step_reward_penalty
            self.score = curS

        if ref_obs is not None:
            curS2 = float(ref_obs['players'][0]['score']) * general_reward_scaling_factor
            self.refenv.score_delta = curS2 - self.refenv.score
            ref_reward = self.refenv.score_delta + per_step_reward_penalty
            self.refenv.score = curS2

        k = np.asarray([obs,])
        u = np.asarray([ref_obs,])
        return_obs = np.concatenate([k, u], axis=-1)
        if reward_mode == 'dense':
            return return_obs, np.concatenate([np.asarray([reward,]), np.asarray([ref_reward,])], axis=-1), ep_info
        elif reward_mode == 'binary':
            return return_obs, np.concatenate([np.asarray([binary_step_penalty,]), np.asarray([binary_step_penalty,])], axis=-1), ep_info


    def load_state(self):
        '''
        Gets the current Game State json file.
        '''
        while os.path.isfile(self.state_file) == False:
            if self.debug:
               print(">> PYENV >> waiting for state file  ", self.state_file, ' to appear')
            time.sleep(0.01)

        flag = False
        while flag == False:
            try:
                k = json.load(open(self.state_file,'r'))
                flag = True
                break
            except json.decoder.JSONDecodeError as e:
                k = None
                if self.debug:
                    print(">> PYENV >> Failed to decode json state! Got error ", e)
                time.sleep(0.01)

        return k

    def get_obs(self):
        this_obs = self.load_state()
        refbot_obs = self.refenv.load_state()
        x = np.asarray([this_obs,])
        y = np.asarray([refbot_obs,])

        return np.concatenate([x, y], axis=-1)

    def reset(self):
        self.step_num = 0
        if self.debug:
            with open(os.path.join(self.run_path, 'mylog.txt'), 'a') as f:
                f.write(str(time.time()) + "\t-->RESETTING!!!\n")

            with open(os.path.join(self.refbot_path, 'mylog.txt'), 'a') as f:
                f.write(str(time.time()) + "\t-->RESETTING!!!\n")

        if self.proc is not None:
            self.proc.terminate()
            self.proc.wait()
        self.needs_reset = False
        self.done = False
        time.sleep(0.01)
        # trying to kill jar wrapper of this env
        pid_file = os.path.join(self.run_path, 'wrapper_pid.txt')
        if os.path.isfile(pid_file):
            flag = False
            while flag == False:
                with open(pid_file, 'r') as f:
                    try:
                        wrapper_pid = int(f.read())
                    except ValueError:
                        continue
                    if wrapper_pid == 0:
                        flag = True
                        return None
                    else:
                        flag = True
                        try:
                            os.kill(wrapper_pid, signal.SIGTERM)
                        except (PermissionError, ProcessLookupError) as e:
                            if self.debug:
                                print(">> PYENV ", self.name, " >> Attempted to close wrapper pid ", wrapper_pid, " but got ERROR ", e)
                        break
        else:
            if self.debug:
                print(">> PYENV >> Attempted to close wrapper pid but the wrapper pid file was not found ")
        ## Trying to prevent reset bugs from propping up
        # if os.path.isdir(self.refbot_path):
        #     shutil.rmtree(self.refbot_path)
        # refbotdir = os.path.join(basedir, 'refbot')
        # shutil.copytree(refbotdir, self.refbot_path)

        ## Trying to kill jar wrapper of ref env
        refpid_file = os.path.join(self.refbot_path, 'wrapper_pid.txt')
        if os.path.isfile(refpid_file):
            flag = False
            while flag == False:
                with open(refpid_file, 'r') as f:
                    try:
                        wrapper_pid2 = int(f.read())
                    except ValueError:
                        continue
                    if wrapper_pid2 == 0:
                        flag = True
                        return None
                    else:
                        flag = True
                        try:
                            os.kill(wrapper_pid2, signal.SIGTERM)
                        except (PermissionError, ProcessLookupError) as e:
                            if self.debug:
                                print(">> PYENV ", self.name, " >> Attempted to close refbot wrapper pid ", wrapper_pid2, " but got ERROR ", e)
        else:
            if self.debug:
                print(">> PYENV >> Attempted to close refbot wrapper pid but the wrapper pid file was not found ")
        time.sleep(0.01)
        
        #######################
        ## Flushing matchlogs folder if env alive for over 1h
        if self.clock.deltaT() >= 1800:
            print(">> PYENV {} >> Env alive for over half an hour, flushing (deleting) matchlogs folder".format(self.name))
            self.cleanup()
            self.clock.reset()
            print("Cleand.")

        command = 'java -jar ' + os.path.join(self.run_path, jar_name)

        if sys.platform == "win32":
            she = False
        else:
            she = True

        if self.debug:
            self.proc = subprocess.Popen(command, shell=she , stdout=subprocess.PIPE, cwd=self.run_path)
            print("Opened process: ", str(command), " with pid ", self.proc.pid)
        else:
            self.proc = subprocess.Popen(command, shell=she, stdout=subprocess.DEVNULL, cwd=self.run_path)
        
        self.pid = self.proc.pid
        time.sleep(0.01)

        return True

    def close(self):
        if self.debug:
            print("Closing env ", self.name)
        # clean up after itself
        
        if self.pid is not None:
            self.needs_reset = True
            self.proc.terminate()
            self.proc.wait()
        else:
            return None

        time.sleep(0.1)
        pid_file = os.path.join(self.run_path, 'wrapper_pid.txt')
        if os.path.isfile(pid_file):
            flag = False
            while flag == False:
                with open(pid_file, 'r') as f:
                    try:
                        wrapper_pid = int(f.read())
                    except ValueError:
                        continue
                    if wrapper_pid == 0:
                        flag = True
                        return None
                    else:
                        flag = True
                        try:
                            os.kill(wrapper_pid, signal.SIGTERM)
                        except (PermissionError, ProcessLookupError) as e:
                            if self.debug:
                                print(">> PYENV ", self.name, " >> Attempted to close wrapper pid ", wrapper_pid, " but got ERROR ", e)
                        break
        else:
            print(">> PYENV >> Attempted to close wrapper pid but the wrapper pid file was not found ")
        time.sleep(0.1)

        refpid_file = os.path.join(self.refbot_path, 'wrapper_pid.txt')
        if os.path.isfile(refpid_file):
            flag = False
            while flag == False:
                with open(refpid_file, 'r') as f:
                    try:
                        wrapper_pid2 = int(f.read())
                    except ValueError:
                        continue
                    if wrapper_pid2 == 0:
                        flag = True
                        return None
                    else:
                        flag = True
                        try:
                            os.kill(wrapper_pid2, signal.SIGTERM)
                        except (PermissionError, ProcessLookupError) as e:
                            if self.debug:
                                print(">> PYENV ", self.name, " >> Attempted to close refbot wrapper pid ", wrapper_pid2, " but got ERROR ", e)
        else:
            if self.debug:
                print(">> PYENV >> Attempted to close refbot wrapper pid but the wrapper pid file was not found ")
        time.sleep(0.1)

        self.pid = None
        return True
        
    def cleanup(self):
        log_path = os.path.join(self.run_path, 'matchlogs')

        if self.debug:
            print("Removing folder: ", log_path)
        try:
            if keep_log_folder_override == False:
                shutil.rmtree(log_path)
            else:
                print(">> PYENV >> OVERRIDE - Keeping log files.")
            time.sleep(0.1)
        except Exception:
            print(">> PYENV >> Exception occured while removing matchlogs folder")
Ejemplo n.º 13
0
def fight(env, agent1, agent2, n_fights, max_steps, debug=False):
    '''
    Function to run [agents] in the [env] for [runs] number of times each. 
    i.e performs rollouts of each agent [runs] number of times.

    - agents : list of agents on which to evaluate rollouts
    - env : env to run agents through
    - refbot : agent which will be player B for all agents
    - runs : int number of times each agent should play a rollout
    '''

    queue = list([
        agent1,
    ])
    queue = n_fights * queue
    init_length = len(queue)
    n_envs = env.num_envs
    print(">> ROLLOUTS >> Running rollout wave with queue length  ",
          init_length)
    pbar = metrics.ProgressBar(init_length)
    interior_steps = 0
    early_eps = 0
    failed_eps = 0
    agent1Wins = 0
    agent2Wins = 0
    ties = 0

    while len(queue) > 0:
        # KEEP THIS THERE OTHERWISE SHIT BREAKS
        pbar.show(init_length - len(queue))

        if len(queue) >= n_envs:
            cur_playing_agents = [queue.pop() for i in range(n_envs)]
        else:
            cur_playing_agents = [queue.pop() for i in range(len(queue))]

        step = 0
        dummy_actions = [(
            0,
            0,
            3,
        ) for _ in range(n_envs - len(cur_playing_agents))]
        suc = env.reset()
        if all(suc) == False:
            print("something f****d out. Could not reset all envs.")
            return
        #obs = env.get_base_obs()
        obs = util.get_initial_obs(n_envs)

        for a in cur_playing_agents:
            a.fitness_score = 0
            a.mask_output = False
            agent2.mask_output = False

        ## TODO: Modify this for loop to be able to end early for games which finish early
        while step < max_steps:
            if debug:
                ss = Stopwatch()
            actions = [
                agent.step(obs[i][0])
                for i, agent in enumerate(cur_playing_agents)
            ]
            ref_actions = [agent2.step(obs[i][1]) for i in range(len(obs))]

            if len(dummy_actions) > 0:
                actions.extend(dummy_actions)

            if len(actions) != len(ref_actions):
                print("LEN OF ACTIONS != LEN OF REF ACTIONS!!!!")
                raise ValueError

            if debug:
                print(">> storm >> taking actions: ", actions,
                      ' and ref actions ', ref_actions)

            obs, rews, ep_infos = env.step(actions, p2_actions=ref_actions)
            interior_steps += n_envs
            ## TODO: loop through obs and check which one is a ControlObj, and stop processing the agents for the rest of that episode
            failure = False
            for i, a in enumerate(cur_playing_agents):
                if type(rews[i][0]) == util.ControlObject:
                    if rews[i][0].code == "EARLY":
                        a.mask_output = True
                        if step == max_steps - 1:
                            early_eps += 1
                        #a.fitness_score = a.fitness_score + 1
                    elif rews[i][0].code == "FAILURE":
                        # redo this whole f*****g batch
                        failed_eps += 1
                        failure = True
                        break
                else:
                    #print(rews)
                    # if rews[i][0] >= 0.95:
                    #     agent1Wins += 1
                    # elif rews[i][1] >= 0.95:
                    #     agent2Wins += 1
                    pass
                    #a.fitness_score = rews[i][0] + gamma*a.fitness_score
                if 'winner' in ep_infos[i].keys():
                    if ep_infos[i]['winner'] == 'A':
                        agent1Wins += 1
                    elif ep_infos[i]['winner'] == 'B':
                        agent2Wins += 1
                    elif ep_infos[i]['winner'] == 'TIE':
                        ties += 1

            if failure:
                curQlen = len(queue)
                queue = cur_playing_agents + queue
                print(
                    "Failure detected. Redoing last batch... (len Q before = ",
                    curQlen, ' ; after = ', len(queue))
                break

            if debug:
                print("obs shape = ", obs.shape)
                print("rews shape = ", rews.shape)
                print('>> storm >> just took step {}. Took: {}'.format(
                    step, ss.delta))
            step = step + 1

    pbar.close()

    return agent1Wins, agent2Wins, early_eps, failed_eps, ties
Ejemplo n.º 14
0
Net = tf.contrib.eager.Network
'''
Internal agent config
'''
debug = True
n_base_actions = 4  # number of base actions -- 0=NO OP, 1=DEFENSE, 2=OFFENSE, 3=ENERGY...
debug_verbose = False
endpoints = {}
device = 'cpu'
tf.enable_eager_execution()
# let an example map size be 20x40, so each player's building area is 20x20

if debug_verbose and debug:
    log("Testing tensorflow")
    s = Stopwatch()
    print("TensorFlow version: {}".format(tf.VERSION))
    print("Eager execution: {}".format(tf.executing_eagerly()))

    log("Finished, took: " + s.delta)


class Scud(object):
    def __init__(self, obs, name, debug=False):
        '''
        Initialize Bot.
        Load all game state information.
        '''

        self.debug = debug
        try:
Ejemplo n.º 15
0
    def step(self, action, ref_act):
        #############################
        ## Maintenence on the process
        if self.needs_reset:
            self.reset()

        #######################
        ## Debug messages
        if self.debug:
            with open(os.path.join(self.run_path, 'mylog.txt'), 'a') as f:
                f.write(str(time.time()) + "\t-->Wanting to do op:!!!\t" + str(action) + '\t' + str(ref_act) + '\n')

            with open(os.path.join(self.refbot_path, 'mylog.txt'), 'a') as f:
                f.write(str(time.time()) + "\t-->Wanting to do op:!!!\t" + str(ref_act) + '\n')
        ep_info = {}
        #######################
        ## Writing actions
        x2, y2, build2 = ref_act
        write_prep_action(x2, y2, build2, path=self.refbot_path, debug=self.debug)

        x, y, build = action
        write_prep_action(x, y, build, path=self.run_path, debug=self.debug)

        #######################
        ## Signalling to jar wrappers to begin their running step
        with open(self.in_file, 'w') as f:
            # we want start of a new step
            if self.debug:
                print(">> pyenv {} >> writing 2 to file {}".format(self.name, self.in_file))
            f.write('2')

        with open(self.refenv.in_file, 'w') as f:
            # we want start of a new step
            if self.debug:
                print(">> pyenv {} >> writing 2 to file {}".format(self.refenv.name, self.refenv.in_file))
            f.write('2')

        #######################
        ## Checking if episode ended early
        if self.proc.poll() != None and self.done == True:
            # env ended last step, so reset:
            if self.debug:
                print(">> PYENV ", self.name ," >>  Ended early")
            cntrl_obj = ControlObject('EARLY')
            tp = np.concatenate([np.asarray([cntrl_obj,]), np.asarray([cntrl_obj,])], axis=-1)
            return tp, np.concatenate([np.asarray([cntrl_obj,]), np.asarray([cntrl_obj,])], axis=-1), ep_info

        #######################
        ## Taking step

        # Vars for Env
        obs = None
        should_load_obs = False
        reward = None
        # Vars for ref env
        ref_obs = None
        should_load_obs2 = False
        # Waiting for responses from the jar wrappers
        stopw = Stopwatch()
        failure = False
        while True:
            if should_load_obs == False:
                with open(self.in_file, 'r') as ff:
                    k = ff.read()
                    try:
                        k = int(k)
                    except ValueError:
                        continue
                    if k == 1:
                        #print("just wrote 0 to the ", self.out_file)
                        # a new turn has just been processed
                        should_load_obs = True

            if should_load_obs2 == False:
                with open(self.refenv.in_file, 'r') as ff:
                    k2 = ff.read()
                    try:
                        k2 = int(k2)
                    except ValueError:
                        continue
                    if k2 == 1:
                        #print("just wrote 0 to the ", self.out_file)
                        # a new turn has just been processed
                        should_load_obs2 = True

            if should_load_obs == True and should_load_obs2 == True:
                break
            
            if self.proc.poll() != None and self.done == False:
                #ep ended early.
                if self.debug:
                    print("PYENV: >> GAME ENDING EARLY FOR THE FIRST TIME")
                self.done = True

                valid, reason = is_valid_action(action, self.prev_obs)
                obs = self.load_state()
                self.prev_obs = obs
                
                ep_info['n_steps'] = self.step_num

                if valid == True:
                    ep_info['valid'] = True
                else:
                    ep_info['valid'] = False

                ref_obs = self.refenv.load_state()
                if obs['players'][0]['playerType'] == 'A':
                    a_hp = obs['players'][0]['health']
                    b_hp = obs['players'][1]['health']
                else:
                    a_hp = obs['players'][1]['health']
                    b_hp = obs['players'][0]['health']
                k = np.asarray([obs,])
                u = np.asarray([ref_obs,])
                return_obs = np.concatenate([k, u], axis=-1)
                if reward_mode == 'dense':
                    win_reward = dense_win_reward
                    lose_reward = -1 * dense_win_reward
                else:
                    win_reward = binary_win_reward
                    lose_reward = -1 * binary_win_reward
                if a_hp > b_hp:
                    # player a wins
                    ep_info['winner'] = 'A'
                    return return_obs, np.concatenate([np.asarray([win_reward,]), np.asarray([lose_reward,])], axis=-1), ep_info
                elif a_hp < b_hp:
                    ep_info['winner'] = 'B'
                    return return_obs, np.concatenate([np.asarray([lose_reward,]), np.asarray([win_reward,])], axis=-1), ep_info
                else:
                    ep_info['winner'] = 'TIE'
                    return return_obs, np.concatenate([np.asarray([0.0,]), np.asarray([0.0,])], axis=-1), ep_info

            if stopw.deltaT() > 3:
                # we have waited more than 3s, game clearly ended
                self.needs_reset = True
                failure = True
                print('pyenv: env ' + str(self.name) + ' with pid ' + str(self.pid) + ' encountered error. (', should_load_obs, ',',should_load_obs2, ')' , time.time())
                break

            time.sleep(0.01)
        # TODO: possibly pre-parse obs here and derive a reward from it?
        
        #########################
        ## Loading the obs if their jar's ended properly
        #ref_obs, _ = self.refenv.step(ref_act)
        if should_load_obs:
            valid, reason = is_valid_action(action, self.prev_obs)
            obs = self.load_state()
            self.prev_obs = obs

            if valid == True:
                ep_info['valid'] = True
            else:
                ep_info['valid'] = False
        if should_load_obs2:
            ref_obs = self.refenv.load_state()

        if obs is None and self.debug == True:
            print(">> PY_ENV >> MAIN OBS IS NONE (", self.name, ")")

        if ref_obs is None:
            print(">> PY_ENV >> REF OBS IS NONE. (", self.name, ")")

        if failure == True:
            cntrl_obj = ControlObject('FAILURE')
            tp = np.concatenate([np.asarray([cntrl_obj,]), np.asarray([cntrl_obj,])], axis=-1)
            return tp, np.concatenate([np.asarray([cntrl_obj,]), np.asarray([cntrl_obj,])], axis=-1), ep_info

        # print('-----A------------->', obs['players'][0]['health'])
        # print('-----B------------->', obs['players'][1]['health'])
        self.step_num += 1

        ########################
        ## Forming rewards and packaging the obs into a good numpy form
        if obs is not None:
            # Infer reward:
            #reward = float(obs['players'][0]['score']) - float(obs['players'][1]['score'])
            curS = float(obs['players'][0]['score']) * general_reward_scaling_factor
            self.score_delta = curS - self.score
            reward = self.score_delta + per_step_reward_penalty
            self.score = curS

        if ref_obs is not None:
            curS2 = float(ref_obs['players'][0]['score']) * general_reward_scaling_factor
            self.refenv.score_delta = curS2 - self.refenv.score
            ref_reward = self.refenv.score_delta + per_step_reward_penalty
            self.refenv.score = curS2

        k = np.asarray([obs,])
        u = np.asarray([ref_obs,])
        return_obs = np.concatenate([k, u], axis=-1)
        if reward_mode == 'dense':
            return return_obs, np.concatenate([np.asarray([reward,]), np.asarray([ref_reward,])], axis=-1), ep_info
        elif reward_mode == 'binary':
            return return_obs, np.concatenate([np.asarray([binary_step_penalty,]), np.asarray([binary_step_penalty,])], axis=-1), ep_info
Ejemplo n.º 16
0
    def add_base(self):
        if self.debug:
            log("Adding base")
            s = Stopwatch()
        with tf.name_scope("adding_base") as scope:
            net = self.input

            net = Layers.Conv2D(32, [3, 3],
                                strides=1,
                                padding='SAME',
                                activation=tf.nn.relu,
                                name="baseConv1")(net)
            net = Layers.Conv2D(32, [3, 3],
                                strides=1,
                                padding='SAME',
                                activation=tf.nn.relu,
                                name="baseConv2")(net)
            net = Layers.Conv2D(64, [3, 3],
                                strides=1,
                                padding='SAME',
                                activation=tf.nn.relu,
                                name="baseConv3")(net)

            net_a = Layers.Conv2D(64, [1, 1],
                                  strides=1,
                                  padding='SAME',
                                  activation=tf.nn.relu,
                                  name="baseConv4a")(net)
            net_a = Layers.Conv2D(96, [3, 3],
                                  strides=1,
                                  padding='SAME',
                                  activation=tf.nn.relu,
                                  name="baseConv5a")(net_a)

            net_b = Layers.Conv2D(64, [1, 1],
                                  strides=1,
                                  padding='SAME',
                                  activation=tf.nn.relu,
                                  name="baseConv4b")(net)
            net_b = Layers.Conv2D(64, [8, 1],
                                  strides=1,
                                  padding='SAME',
                                  activation=tf.nn.relu,
                                  name="baseConv5b")(net_b)
            net_b = Layers.Conv2D(64, [1, 8],
                                  strides=1,
                                  padding='SAME',
                                  activation=tf.nn.relu,
                                  name="baseConv6b")(net_b)
            net_b = Layers.Conv2D(96, [3, 3],
                                  strides=1,
                                  padding='SAME',
                                  activation=tf.nn.relu,
                                  name="baseConv7b")(net_b)

            net = custom_layers.add_inception_resnet_A(net, 'A1')
            net = custom_layers.add_inception_resnet_A(net, 'A2')
        if self.debug:
            log("Finished adding base. Took: " + s.delta)

        return net
Ejemplo n.º 17
0
    def __init__(self, obs, name, debug=False):
        '''
        Initialize Bot.
        Load all game state information.
        '''

        self.debug = debug
        try:
            self.game_state = obs[0]
        except IOError:
            print("Cannot load Game State")

        self.full_map = self.game_state['gameMap']
        self.rows = self.game_state['gameDetails']['mapHeight']
        self.columns = self.game_state['gameDetails']['mapWidth']
        self.command = ''

        self.player_buildings = self.getPlayerBuildings()
        self.opponent_buildings = self.getOpponentBuildings()
        self.projectiles = self.getProjectiles()

        self.player_info = self.getPlayerInfo('A')
        self.opponent_info = self.getPlayerInfo('B')

        self.round = self.game_state['gameDetails']['round']

        self.prices = {
            "ATTACK":
            self.game_state['gameDetails']['buildingPrices']['ATTACK'],
            "DEFENSE":
            self.game_state['gameDetails']['buildingPrices']['DEFENSE'],
            "ENERGY":
            self.game_state['gameDetails']['buildingPrices']['ENERGY']
        }

        if self.debug and debug_verbose:
            log("rows: " + str(self.rows))
            log("columns: " + str(self.columns))
            log("player_buildings: " + str(self.player_buildings))
            log("opp_buildings: " + str(self.opponent_buildings))
            log("projectiles: " + str(self.projectiles))
            log("player_info: " + str(self.player_info))
            log("opp_info: " + str(self.opponent_info))
            log("Round: " + str(self.round))
            log("Prices: " + str(self.prices))

        # getting inputs
        with tf.name_scope("shaping_inputs") as scope:
            if self.debug:
                log("Shaping inputs...")
                s = Stopwatch()

            pb = tf.one_hot(indices=self.player_buildings,
                            depth=4,
                            axis=-1,
                            name="player_buildings")  # 20x20x4
            ob = tf.one_hot(indices=self.opponent_buildings,
                            depth=4,
                            axis=-1,
                            name="opp_buildings")  # 20x20x4
            proj = tf.one_hot(indices=self.projectiles,
                              depth=3,
                              axis=-1,
                              name='projectiles')  # 20x40x3
            k = proj.get_shape().as_list()
            proj = tf.reshape(proj,
                              [k[0], k[1] / 2, 6
                               ])  # 20x20x6. Only works for single misssiles

            self.non_spatial = list(self.player_info.values())[1:] + list(
                self.opponent_info.values())[1:] + list(
                    self.prices.values())  # 11x1
            self.non_spatial = tf.cast(self.non_spatial, dtype=tf.float32)
            # broadcasting the non-spatial features to the channel dimension
            broadcast_stats = tf.tile(
                tf.expand_dims(tf.expand_dims(self.non_spatial, axis=0),
                               axis=0), [k[0], k[1] / 2, 1])  # now 20x20x11

            # adding all the inputs together via the channel dimension
            self.spatial = tf.concat([pb, ob, proj, broadcast_stats],
                                     axis=-1)  # 20x20x(14 + 11)
            self.spatial = tf.expand_dims(self.spatial, axis=0)

            if self.debug:
                log("Finished shaping inputs. Took " + s.delta)

        return None
Ejemplo n.º 18
0
def parallel_fight(env, matchups, max_steps, debug=False):
    a1s = []
    a2s = []
    for a, b in matchups:
        a1s.append(a)
        a2s.append(b)
    #a1s, a2s = [(e, f,) for e, f in zip(*matchups)]

    n_envs = env.num_envs
    assert len(matchups) == n_envs, "agent lengths must be same as env"

    print(
        ">> PARALLEL FIGHT >> Running rollouts with {} games  ".format(n_envs))
    pbar = metrics.ProgressBar(max_steps)
    early_eps = 0
    failed_eps = 0
    games = []
    ties = 0

    step = 0
    suc = env.reset()
    if all(suc) == False:
        print("something f****d out. Could not reset all envs.")
        return
    #obs = env.get_base_obs()
    obs = util.get_initial_obs(n_envs)

    for aa, bb in matchups:
        aa.mask_output = False
        bb.mask_output = False

    while step < max_steps:
        pbar.show(step)

        if debug:
            ss = Stopwatch()

        actions = [agent.step(obs[i][0]) for i, agent in enumerate(a1s)]
        ref_actions = [agen2.step(obs[i][1]) for i, agen2 in enumerate(a2s)]

        if len(actions) != len(ref_actions):
            print("LEN OF ACTIONS != LEN OF REF ACTIONS!!!!")
            raise ValueError

        if debug:
            print(">> storm >> taking actions: ", actions, ' and ref actions ',
                  ref_actions)

        obs, rews, ep_infos = env.step(actions, p2_actions=ref_actions)

        failure = False
        for i in range(n_envs):
            if type(rews[i][0]) == util.ControlObject:
                if rews[i][0].code == "EARLY":
                    a1s[i].mask_output = True
                    a2s[i].mask_output = True
                elif rews[i][0].code == "FAILURE":
                    # redo this whole f*****g batch
                    failed_eps += 1
                    failure = True
                    break

            if 'winner' in ep_infos[i].keys():
                early_eps += 1
                if ep_infos[i]['winner'] == 'A':
                    #matchup_dict[a.name] = 'A'
                    games.append(
                        Game(a1s[i].name, a2s[i].name, winner=a1s[i].name))
                elif ep_infos[i]['winner'] == 'B':
                    #matchup_dict[a.name] = 'B'
                    games.append(
                        Game(a1s[i].name, a2s[i].name, winner=a2s[i].name))
                elif ep_infos[i]['winner'] == 'TIE':
                    #matchup_dict[a.name] = 'TIE'
                    games.append(Game(a1s[i].name, a2s[i].name, winner='TIE'))
                    ties += 1

        if failure:
            print("Failure detected. Skipping batch")
            break

        if debug:
            print("obs shape = ", obs.shape)
            print("rews shape = ", rews.shape)
            print('>> storm >> just took step {}. Took: {}'.format(
                step, ss.delta))
        step = step + 1

    pbar.close()

    return games, early_eps, failed_eps, ties
Ejemplo n.º 19
0
def parse_obs(game_state):
    full_map = game_state['gameMap']
    rows = game_state['gameDetails']['mapHeight']
    columns = game_state['gameDetails']['mapWidth']

    player_buildings = getPlayerBuildings(full_map, rows, columns)
    opponent_buildings = getOpponentBuildings(full_map, rows, columns)
    projectiles = getProjectiles(full_map, rows, columns)

    player_info = getPlayerInfo('A', game_state)
    opponent_info = getPlayerInfo('B', game_state)

    round_num = game_state['gameDetails']['round']

    # works for jar v1.1.2
    prices = {
        "ATTACK":
        game_state['gameDetails']['buildingsStats']['ATTACK']['price'],
        "DEFENSE":
        game_state['gameDetails']['buildingsStats']['DEFENSE']['price'],
        "ENERGY":
        game_state['gameDetails']['buildingsStats']['ENERGY']['price'],
        "TESLTA":
        game_state['gameDetails']['buildingsStats']['TESLA']['price'],
    }

    with tf.name_scope("shaping_inputs") as scope:
        if debug:
            print("Shaping inputs...")
            s = Stopwatch()

        pb = tf.one_hot(indices=player_buildings,
                        depth=5,
                        axis=-1,
                        name="player_buildings")  # 20x20x5
        ob = tf.one_hot(indices=opponent_buildings,
                        depth=5,
                        axis=-1,
                        name="opp_buildings")  # 20x20x5
        proj = tf.one_hot(indices=projectiles,
                          depth=3,
                          axis=-1,
                          name='projectiles')  # 20x40x3
        k = proj.get_shape().as_list()
        proj = tf.reshape(proj, [int(k[0]), int(k[1] / 2), 6
                                 ])  # 20x20x6. Only works for single misssiles

        non_spatial = list(player_info.values())[1:] + list(
            opponent_info.values())[1:] + list(prices.values())  # 12x1
        non_spatial = tf.cast(non_spatial, dtype=tf.float32)
        # broadcasting the non-spatial features to the channel dimension
        broadcast_stats = tf.tile(
            tf.expand_dims(tf.expand_dims(non_spatial, axis=0), axis=0),
            [int(k[0]), int(k[1] / 2), 1])  # now 20x20x11

        # adding all the inputs together via the channel dimension
        spatial = tf.concat([pb, ob, proj, broadcast_stats],
                            axis=-1)  # 20x20x(16 + 12)

        if debug:
            print("Finished shaping inputs. Took " + s.delta +
                  "\nShape of inputs:" + str(spatial.shape))

        return spatial, rows, columns