def start(): init(autoreset=True) c = Character() with open('art/dungeon.txt') as f: print(f.read()) print('') print('#' * 80 + '\n#' + ' ' * 78 + '#' + '\n#\tWelcome to Command-Line Dungeon Diver! '\ 'Begin by selecting a class: #\n' + '#' + ' ' * 78 + '#' + '\n' + '#' * 80) announce('Mage, Warrior, Fighter, Cleric, Ranger or Paladin?') classtoplay = input('>>> ') character = dict() if classtoplay.lower() == 'mage': character['stats'] = c.mage() elif classtoplay.lower() == 'warrior': character['stats'] = c.warrior() elif classtoplay.lower() == 'fighter': character['stats'] = c.fighter() elif classtoplay.lower() == 'cleric': character['stats'] = c.cleric() elif classtoplay.lower() == 'ranger': character['stats'] = c.ranger() elif classtoplay.lower() == 'paladin': character['stats'] = c.paladin() else: start() announce('Ah, a {classtoplay}. What shall we call you?'.format(classtoplay=classtoplay)) character['name'] = input('>>> ') character['stats']['health'] = math.floor(character['stats']['vitality'] - .15 * character['stats']['vitality']) character['experience'] = 0 character['level'] = 1 character['type'] = classtoplay.lower() announce('Here are your stats, {name}..\n'.format(name=character['name'])) for k, v in character['stats'].items(): announce('\t{stat}: {value}'.format(stat=k, value=v)) env = Environments(level=character['level']) environment = env.dungeon() announce("You've entered a {env}! Clear out all of the enemies!".format(env=environment['name'])) shuffle(environment['mobs']) for mob in environment['mobs']: announce('Uh-oh! {mobname} attacks!'.format(mobname=mob['name'])) if mob['name'] == 'Bat': with open('art/bat.txt', mode='r') as f: print(f.read()) elif mob['name'] == 'Skeleton': with open('art/skeleton.txt', mode='r') as f: print(f.read()) fight(character, mob) announce("\nYou've cleared out all of the enemies, now its time for the boss!") if environment['boss']['name'] == 'Ogre': with open('art/ogre.txt', mode='r') as f: print(f.read()) fight(character, environment['boss'])
def evaluate(name, environment, n=0, uncertainty=0): disablePrint() name = name + '-' + str(n) agent = pickle.load( open(f"outputs/{'-'.join(name.split('-')[:-1])}/Agents/{name}.agent", "rb")) agent.explore = agent.exploration.greedy agent.uncertainty = True agent.state_avoidance = False agent.uncertainty_weight = uncertainty env = Environments(render=False, envs=[environment for _ in range(20)], agent=agent) rews, dones = [], [] for i in range(20000): obs, hn, cn = env.start() act, obs_old, h0, c0, hn, cn, _, _ = agent.chooseMulti(obs, hn, cn) obs, rew, done, info = env.step(act, hn, cn) rews.append(sum(rew)) dones.append(sum(done)) enablePrint() score = sum(rews) / sum(dones) print( name.ljust(20, ' '), int(100 * (score - rMin[environment]) / (rMax[environment] - rMin[environment])) / 100, " ", score)
def main(): name, environment, hours, total_agents, done = params['name'], params['environment'], params['hours'], params['total_agents'], None agent = Agent(**params) env = Environments(render=False, envs=[environment for _ in range(total_agents)], agent=agent) collector = Collector(**params) tid, f = time() + 3600 * hours - 300, 0 while time() < tid: f += 1 obs, hn, cn = env.start() act, obs_old, h0, c0, hn, cn, before_trace, after_trace = agent.chooseMulti(obs, hn, cn, done=done) obs, rew, done, info = env.step(act, hn, cn) collector.collect(rew, done, act, agent.onpolicy) if not agent.onpolicy and f > 10: agent.rememberMulti(obs_old, act, obs, rew, h0, c0, hn, cn, done, before_trace, after_trace) agent.learn() saveAgent(agent, name) saveCollector(collector, name)
x = self.decoder(x) return x total_agents, display_every = 20, 5000 agent = Agent(memory=50000, discount=0.995, uncertainty=False, update_every=100, double=True, use_distribution=False, reward_normalization=True) env = Environments(render=True, envs=[ 'bigfish', 'bossfight', 'caveflyer', 'chaser', 'climber', 'coinrun', 'dodgeball', 'fruitbot', 'heist', 'jumper', 'leaper', 'maze', 'miner', 'ninja', 'plunder', 'starpilot' ], agent=agent) network = NetWork().to(device) print("Number of parameters in network:", count_parameters(network)) print("Number of parameters in encoder:", count_parameters(network.encoder)) print("Number of parameters in decoder:", count_parameters(network.decoder)) criterion = MSELoss() optimizer = Adam(network.parameters(), lr=1e-4, weight_decay=1e-5) for f in range(0, 10000000): obs, hn, cn = env.start() act, obs_old, h0, c0, hn, cn = agent.chooseMulti(obs, hn, cn) obs, rew, done, info = env.step(act, hn, cn) agent.rememberMulti(obs_old, act, obs, rew, h0, c0, hn, cn, done) obs, action, obs_next, reward, h0, c0, hn, sn, done = agent.memory.sample(
if keyboard.Key.f3 == key: save = True keyboard.Listener(on_press=on_press).start() total_agents, display_every = 20, 5000 agent = Agent(memory=40000, discount=0.995, uncertainty=False, update_every=100, double=True, use_distribution=False, reward_normalization=False) env = Environments(render=True, envs=['maze' for _ in range(total_agents)], agent=agent) collector = Collector(calculate_every=500, total_agents=total_agents) for f in range(10000000): obs, hn, cn = env.start() act, obs_old, h0, c0, hn, cn = agent.chooseMulti(obs, hn, cn) obs, rew, done, info = env.step(act, hn, cn) collector.collect(rew, done, act) agent.rememberMulti(obs_old, act, obs, rew, h0, c0, hn, cn, done) agent.learn() if showPrint: plt.close('all') displayer(obs[0].cpu(), agent, collector) fig = plt.figure() move_figure(fig, 0, 0)
# env.close() # obs = clean(env.reset()) # hn = torch.zeros(1, 1, hidden_size, device=device) # cn = torch.zeros(1, 1, hidden_size, device=device) # disablePrint() # t0 = time.time() # obs, hn, cn = [obs for _ in range(k)], [hn for _ in range(k)], [cn for _ in range(k)] # for i in range(n // k): # agent.chooseMulti(obs, hn, cn) # t1 = time.time() # enablePrint() # print("Choose", t1 - t0) # 17.568519115447998 agent = Agent() env = Environments(render=True, envs=['fruitbot' for _ in range(20)]) all_return, all_dones = [], [] update_every = 100 disablePrint() frames = 1000000 dones, total_rew = 0, 0 for f in range(1, frames + 1): obs, hn, cn = env.start() act, obs_old, h0, c0, hn, cn = agent.chooseMulti(obs, hn, cn) obs, rew, done, info = env.step(act, hn, cn) total_rew += sum(rew) / len(rew) dones += sum(done) / len(done) agent.rememberMulti(obs_old, act, obs, rew, h0, c0, hn, cn, done) if f > update_every: for _ in range(2): agent.learn(double=True)