Ejemplo n.º 1
0
    def __init__(self,
                 controller,
                 comm_disabled=False,
                 iteration=1000,
                 time=0.1,
                 name='unnamed_trial',
                 preset='M&N, 2003',
                 env_height=270,
                 env_width=270,
                 targets=True,
                 verbose=False):
        """Initialize a trial."""
        super().__init__()
        self.name = name  # name for the trial
        self.preset = preset  # preset name
        self.iteration = iteration  # total numbers of iterations
        self.step_time = 0.1  # time for each iteration step in seconds
        self.step_fitness = []  # fitness at each timestep
        self.fitness = 0  # total fitness
        self.comm_disabled = comm_disabled
        self.verbose = verbose

        # initialize environment
        self.env = environment(width=env_width,
                               height=env_height,
                               targets=targets)

        # the same ann used for this trial
        self.ann = deepcopy(controller)

        # initialize agents for the trial
        self.env.agents = [
            agent(name=self.name + 'agent0', color='red'),
            agent(name=self.name + 'agent1', color='orange'),
            agent(name=self.name + 'agent2', color='cyan'),
            agent(name=self.name + 'agent3', color='green')
        ]
Ejemplo n.º 2
0
def run_experiments(env_info,
                    agent_info,
                    num_episodes=500,
                    experiment_name=None):
    env = environment()
    age = agent()

    env.env_init(env_info)
    age.agent_init(agent_info)

    for i in range(num_episodes):
        terminal = False

        last_state = env.env_start()
        last_action = age.agent_start(last_state)
        total_reward = 0

        while not terminal:
            (reward, last_state, terminal) = env.env_step(last_action)

            total_reward += reward
            if terminal:
                age.agent_end(reward)
            else:
                last_action = age.agent_step(reward, last_state)

    values = age.agent_values()
    print("VALUE FUNCTION", end="\n\n")

    if experiment_name is not None:
        print(experiment_name)

    for i in range(env_info.get("height", 4)):
        for j in range(env_info.get("width", 12)):
            print("%7.2f" % values[i * env_info.get("width", 12) + j], end=' ')
        print()
Ejemplo n.º 3
0
import gym
from Agent import agent
from DQN import DQN
if __name__ == '__main__':
    mainAgent = agent(18)
    targetAgent = agent(18)
    dqn = DQN(mainAgent, targetAgent, 0.99, 0.1, 200, 32, 64, True)
    dqn.train('Boxing-v0', True)
Ejemplo n.º 4
0
import gym
from Agent import agent
from DQN import DQN
if __name__ == '__main__':
    mainAgent = agent(4)
    targetAgent = agent(4)
    dqn = DQN(mainAgent, targetAgent, 0.99, 0.01, 2000, 120, 400, True)
    dqn.train('CarRacing-v0', True)
Ejemplo n.º 5
0
from Environment import Grid
from Cell import Cell
from Agent import agent
import pygame, time

dim = 15
size = width, height = 900, 900
screen = pygame.display.set_mode(size)
environment = Grid(dim)
agent = agent(environment)
(x, y) = (agent.agent_x, agent.agent_y)

FLAT = 0.1
HILL = 0.3
FOREST = 0.7
CAVE = 0.9


def update_ui():
    for i in range(dim):
        for j in range(dim):
            # print(i, j, (i*(width/dim), j*(height/dim)))
            if environment.field[i][
                    j].terrain_type == FLAT:  #IF FLAT, PRINT WHITE SQUARE
                pygame.draw.rect(screen, (255, 255, 255),
                                 (j * (width / dim), i *
                                  (height / dim), width / dim, height / dim))
            elif environment.field[i][
                    j].terrain_type == HILL:  #IF HILL, PRINT LIGHT GREEN SQUARE
                pygame.draw.rect(screen, (130, 227, 2),
                                 (j * (width / dim), i *
Ejemplo n.º 6
0
def train(task_relation="<diedIn>", rootpath=None, epoch=5):
    datapath = {'type2id': rootpath + 'type2id.json', 'relation2id': rootpath + 'relation2id.json', \
         'graph': rootpath + 'graph.pkl', 'ent2type': rootpath + 'ent2type.json' ,\
         'entity2id': rootpath + 'entity2id.json'}
    Env_a = env(datapath)
    Env_a.init_relation_query_state(task_relation)
    batchsize = 20
    maxlen = 5
    po = Policy_memory(Env_a, 300, 100, Env_a.rel_num)
    Env_a.filter_query(maxlen, 5000)
    pairs = Env_a.filter_query
    random.shuffle(pairs)

    training_pairs = pairs
    test_pairs = pairs[:int(len(pairs) * 0.5)]
    reward_record = []
    success_record = []
    path_length = 0
    valid_paris = pairs[int(len(pairs) * 0.5):int(len(pairs) * 0.6)]
    print('Train pairs:', len(training_pairs))
    print('valid pairs:', len(valid_paris))
    print('Test pairs:', len(test_pairs))
    agent_a = agent(po, Env_a, policymethod='GRU')
    if global_device == 'cuda:0':
        po = po.cuda()

    try_count, batch_loss, ave_reward, ave_success = 0, 0, 0, 0
    opt = torch.optim.Adam(agent_a.parameters() + Env_a.parameters(), lr=0.001)
    for ep in range(epoch):
        opt.zero_grad()
        random.shuffle(training_pairs)
        for query in training_pairs:
            try:
                e1, e2 = query[0], query[1]
                e1, e2 = Env_a.entity2id[e1], Env_a.entity2id[e2]
                with torch.no_grad():
                    traj, success = agent_a.trajectory(e1, e2, max_len=maxlen)
                try_count += 1
            except KeyError:
                continue

            logger.MARK(Env_a.traj_for_showing(traj))

            traj_loss = 0
            po.zero_history()
            traj_reward = 0

            for i in traj:

                ave_reward += i[4]
                traj_reward += i[4]
                loss = agent_a.update_memory_policy(i)
                loss.backward()
                traj_loss += loss.cpu()
            if success:
                ave_success += 1
                path_length += len(traj) - 1
                success_record.append(1)
            else:
                success_record.append(0)
            reward_record.append(traj_reward)
            batch_loss += traj_loss / len(traj)
            if try_count % batchsize == 0 and try_count > 0:
                opt.step()
                opt.zero_grad()
                logger.info(
                    '|%d epoch|%d eposide|Batch_loss:%.4f|Ave_reward:%.3f|Ave_success:%%%.2f|ave path lenghth:%.2f|'
                    % (ep, try_count, batch_loss * 100 / batchsize,
                       ave_reward / batchsize, ave_success * 100 / batchsize,
                       path_length / ave_success))
                batch_loss, ave_reward, ave_success, path_length = 0, 0, 0, 0

            if try_count % (20 * batchsize) == 0 and try_count > 0:
                valid(valid_paris, Env_a, agent_a, batchsize, maxlen)

        generate_paths(Env_a, agent_a, test_pairs,
                       rootpath + task_relation + '.paths', maxlen)

    success = ave_smooth(success_record, 20)
    reward = ave_smooth(reward_record, 20)

    with open(rootpath + task_relation + 'sucess_record_without.txt',
              'w') as fin:
        wstr = '\n'.join([str(i) for i in success])
        fin.write(wstr)
    with open(rootpath + task_relation + 'reward_record_without.txt',
              'w') as fin:
        wstr = '\n'.join([str(i) for i in reward])
        fin.write(wstr)

    with open(rootpath + task_relation + 'test_positive_pairs', 'w') as fin:
        wstr = []
        for i in test_pairs:
            wstr.append(str(i[0] + '\t' + str(i[1])))
        wstr = '\n'.join(wstr)
        fin.write(wstr)
Ejemplo n.º 7
0
def train(task_relation="<diedIn>",rootpath=None,epoch=5):
    datapath = {'type2id': rootpath + 'type2id.json', 'relation2id': rootpath + 'relation2id.json', \
         'graph': rootpath + 'graph.pkl', 'ent2type': rootpath + 'ent2type.json' ,\
         'entity2id': rootpath + 'entity2id.json'}
    Env_a = env(datapath)
    Env_a.init_relation_query_state(task_relation)
    batchsize=20
    maxlen=5
    po = Policy_memory(Env_a,300, 100, Env_a.rel_num)
    # Env_a.filter_query(maxlen,5000)
    # pairs = Env_a.filter_query
    # random.shuffle(pairs)
    # training_pairs=pairs
    # test_pairs=pairs[:int(len(pairs)*0.5)]
    # valid_paris=pairs[int(len(pairs)*0.5):int(len(pairs)*0.6)]
    train_path=rootpath+'/'+task_relation+'train_pairs'
    valid_path = rootpath + '/' + task_relation + 'valid_pairs'
    training_pairs=load_pair(train_path)
    valid_paris=load_pair(valid_path)

    print('Train pairs:',len(training_pairs))
    print('valid pairs:',len(valid_paris))
    #print('Test pairs:',len(test_pairs))
    agent_a = agent(po, Env_a,policymethod='GRU')
    if global_device=='cuda:0':
        po=po.cuda()

    try_count, batch_loss, ave_reward, ave_success = 0, 0, 0, 0
    opt=torch.optim.Adam(agent_a.parameters()+Env_a.parameters(),lr=0.001)
    for ep in range(epoch):
        opt.zero_grad()
        random.shuffle(training_pairs)
        for query in training_pairs:
            try:
                e1, e2 = query[0], query[1]
                e1, e2 = Env_a.entity2id[e1], Env_a.entity2id[e2]
                with torch.no_grad():
                    traj, success = agent_a.trajectory(e1, e2,max_len=maxlen)
                try_count += 1
            except KeyError:
                continue
            logger.MARK(Env_a.traj_for_showing(traj))
            traj_loss=0
            po.zero_history()
            traj_reward=0

            for i in traj:

                ave_reward+=i[4]
                traj_reward+=i[4]
                loss=agent_a.update_memory_policy(i)
                loss.backward()
                traj_loss+=loss.cpu()
            if success:
                ave_success+=1
            batch_loss+=traj_loss/len(traj)
            if try_count%batchsize==0 and try_count>0:
                opt.step()
                opt.zero_grad()
                logger.info('|%d epoch|%d eposide|Batch_loss:%.4f|Ave_reward:%.3f|Ave_success:%%%.2f|'%(ep,try_count,batch_loss*100/batchsize,ave_reward/batchsize,ave_success*100/batchsize))
                batch_loss,ave_reward,ave_success=0,0,0

            if try_count%(20*batchsize)==0 and try_count>0:
                valid(valid_paris,Env_a,agent_a,batchsize,maxlen)

        generate_paths(Env_a,agent_a,test_pairs,rootpath+task_relation+'.paths',maxlen)
Ejemplo n.º 8
0
    x.create_common_transition(
        "Deterministic")  # ("Bernoulli",0.7)) # "Deterministic"

    import Rewards
    sparse_reward = Rewards.Reward(grid, actions)
    sparse_reward.common_reward("sparse")

    discount = 0.2
    policy = np.ones((len(grid.states), len(actions))) * 0.25  # uniform policy

    state_values = iterative_bellman_equation(
        grid, actions, policy,
        discount=discount)  # approximate the true state values

    from Agent import Agent as agent
    episodes = agent(grid, actions, policy).sample_episode(
        1000, terminal_state=16)  # generate the episodes

    initial_states = np.zeros(len(
        grid.states))  # initial estimates should be an numpy array

    mc_estimates = monte_carlo(initial_states, episodes, discount=discount)

    n_estimates = n_step_td(initial_states,
                            50,
                            episodes,
                            discount=discount,
                            learning_rate=0.001)

    lambda_estimate = td_lambda(initial_states,
                                0,
                                episodes,
Ejemplo n.º 9
0
hill = [[],[],[]]
frst = [[],[],[]]
cave = [[],[],[]]


outer_loop = 15
inner_loop = 30
start = time()
for i in range(outer_loop):
    env = Grid(50)
    sum1 = 0
    sum2 = 0
    sum3 = 0
    print(i)
    for j in range(inner_loop):
        agent1 = agent(env)
        agent2 = agent(env)
        agent3 = agent(env)

        temp1 = None
        temp2 = None
        temp3 = None

        while temp1 == None:
            temp1 = agent1.basic_agent(1)
        
        while temp2 == None:
            temp2 = agent2.basic_agent(2)
        
        while temp3 == None:
            temp3 = agent3.advanced_agent(2)