def main(): from fireplace import cards cards.db.initialize() from utils import Agent,play_set_of_games,play_MechaHunterGames from hearthstone.enums import CardClass Human=Agent("Human",None,myClass=CardClass.MAGE) StandardRandom=Agent("Maya",None) # Classを指定しないとHUNTER # モンテカルロによる読み切り ## Maya=Agent("Maya",None) # Standardなベクトル評価のエージェント34次元の重みベクトルをオプションとして入力する #from agent_Standard import StandardStep1 #import random #opt = [] #for i in range(34): # opt.append(random.randint(1,10)) #GhostCatPlayer=Agent("GhostCat", StandardStep1,myOption=opt,myClass=CardClass.WARRIOR) #あそび #PIPlayer=Agent("Pi", StandardStep1,myOption=[3,1,4,1,5,9,2,6,5,3,5,8,9,7,9,3,2,3,8,4,6,2,6,4,3,3,8,3,2,7,9,5,0,2,8],myClass=CardClass.PRIEST) # 言葉で戦略を組み立てるエージェント by Ahara #from agent_word_strategy import WS, agent_word_strategy #WSplayer = Agent("WS", agent_word_strategy,\ # myOption=[WS.ミニョンで敵ヒーローの体力を削る, WS.呪文を使えるなら呪文, WS.ランダムにプレー],\ # myClass=CardClass.PRIEST) #AngryCat by Ahara 無駄な行為を選択肢から排除するアルゴリズム #from agent_AngryCat import AngryCatAI #AngryCatPlayer = Agent("AngryCat", AngryCatAI) #ゲームプレイ play_set_of_games(Human, StandardRandom, gameNumber=1, debugLog=True) #ハンター縛りのデッキ(メカハンター)による対戦 #play_MechaHunterGames(StandardPlayer, AngryCatPlayer, gameNumber=1, debugLog=True) #総当たり戦 #from competition import play_round_robin_competition #play_round_robin_competition([StandardRandom,PIPlayer,AngryCatPlayer],matchNumber=5) #特定の2枚のカードのシナジーを調べる(idea by Maya) #from card_pair import investigate_card_pair, find_card_pair #investigate_card_pair() #シナジーのあるカードの組を漠然と探す #find_card_pair(1) #print("test_branch_yamadamaya") pass
def getTransportistas(agentType, directoryAgent, sender, messageCount): gmess = Graph() # Construimos el mensaje de registro gmess.bind('foaf', FOAF) gmess.bind('dso', DSO) ask_obj = agn[sender.name + '-Search'] gmess.add((ask_obj, RDF.type, DSO.Search)) gmess.add((ask_obj, DSO.AgentType, agentType)) gr = send_message( build_message(gmess, perf=ACL.request, sender=sender.uri, receiver=directoryAgent.uri, msgcnt=messageCount, content=ask_obj), directoryAgent.address) dic = get_message_properties(gr) content = dic['content'] agents = [] for (s, p, o) in gr.triples((content, None, None)): if str(p).startswith('http://www.w3.org/1999/02/22-rdf-syntax-ns#_'): address = gr.value(subject=o, predicate=DSO.Address) url = gr.value(subject=o, predicate=DSO.Uri) name = gr.value(subject=o, predicate=FOAF.name) agent = Agent(name, url, address, None) agents += [agent] return agents
class MyForm(BoxLayout): # 此处类定义虽然为空,但会将my.kv的GUI定义的相关“程序”引入,即相当于在此定义 text_input = ObjectProperty() # 在类中添加text_input属性,对应kv文件中用于外部引用的名称,最终指向对应id的GUI部件 label_output = ObjectProperty() # 在类中添加label_output属性,对应kv文件中用于外部引用的名称,最终指向对应id的GUI部件 agent = Agent() translate_res = None # 加载字体资源(使用中文) kivy.resources.resource_add_path("./fonts") font_zh = kivy.resources.resource_find("msyh.ttc") # 通过labelBase LabelBase.register("msyh_labelBase", "msyh.ttc") kivy.core.text.Label.register("msyh_label", "msyh.ttc") def button_act(self, action=None): print('Start ...') if action is None: self.translate_res = self.text_input.text # 获取text_input所指向GUI部件的text值, elif action == 'Translate': t = self.target_lang.text if t == 'Target': self.translate_res = self.text_input.text else: self.translate_res = self.agent.translate(text=self.text_input.text, source='auto', target=t) elif action == 'Audio': text = self.agent.transcribe() t = self.target_lang.text if t == 'Target': t = 'en' # default self.translate_res = self.agent.translate(text=text, source='auto', target=t) elif action == 'Speech': # 语音输出翻译结果 try: assert self.translate_res is not None self.agent.speech(self.translate_res) print('Finish speeching ...') except: print('No speeching content.') pass else: raise NotImplementedError print(self.translate_res) # 打印结果到控制台 if self.translate_res is not None: if action == 'Speech': pass else: # 显示翻译结果到UI界面 self.label_output.text = self.translate_res print('Finish translating ...') pass return def clean_label(self,): # 清除label文本 self.label_output.text = "" # (Waiting ...) self.translate_res = None return
def simulate_hunterschoice_policy_from_func(policy, N=1000): grounds = create_small_hunting_environment() env = HuntingMDPWrapper(grounds) agent = Agent(env, policy) successes = 0 episode_lengths = [] for i in range(N): rewards = run_agent(agent) if rewards[-1] == 1: successes += 1 episode_lengths.append(len(rewards)) return successes / N, episode_lengths
def simulate_lake_policy_from_func(policy, N=1000, size="small"): if size == "small": env = get_small_lake() else: env = get_large_lake() agent = Agent(env, policy) successes = 0 episode_lengths = [] for i in range(N): rewards = run_agent(agent) if rewards[-1] == 1: successes += 1 episode_lengths.append(len(rewards)) return successes / N, episode_lengths
def simulate_hunterschoice_policy(policy_location, N=1000): print(f"Simulating Policy {policy_location}") with open(policy_location, "rb") as f: solver = pickle.load(f) grounds = create_small_hunting_environment() policy = create_hunterschoice_policy(solver.policy) agent = Agent(grounds, policy) successes = 0 episode_lengths = [] for i in range(N): rewards = run_agent(agent) if rewards[-1] == 1: successes += 1 episode_lengths.append(len(rewards)) return successes / N, episode_lengths
def simulate_frozenlake_policy(policy_location, N=1000): print(f"Simulating Policy {policy_location}") with open(policy_location, "rb") as f: solver = pickle.load(f) if "small" in policy_location: lake = get_small_lake() else: lake = get_large_lake() policy = create_frozenlake_policy(solver.policy) agent = Agent(lake, policy) successes = 0 episode_lengths = [] for i in range(N): rewards = run_agent(agent) if rewards[-1] == 1: successes += 1 episode_lengths.append(len(rewards)) return successes / N, episode_lengths
def getAgentInfo(agentType, directoryAgent, sender, messageCount): gmess = Graph() gmess.bind('foaf', FOAF) gmess.bind('dso', DSO) ask_obj = agn[sender.name + '-Search'] gmess.add((ask_obj, RDF.type, DSO.Search)) gmess.add((ask_obj, DSO.AgentType, agentType)) gr = send_message( build_message(gmess, perf=ACL.request, sender=sender.uri, receiver=directoryAgent.uri, msgcnt=messageCount, content=ask_obj), directoryAgent.address) dic = get_message_properties(gr) content = dic['content'] address = gr.value(subject=content, predicate=DSO.Address) url = gr.value(subject=content, predicate=DSO.Uri) name = gr.value(subject=content, predicate=FOAF.name) return Agent(name, url, address, None)
RUN_ID = ( "cliff-GAMMA(%.2f)-ALPHA(%f)-MEMSIZE(%d)-EPISODES(%d)-BATCHSIZE(%d)-REPLACE_AFTER(%d)-PER(%r)" % (GAMMA, ALPHA, MAX_MEMORY_SIZE, NUM_EPISODES, BATCH_SIZE, REPLACE_AFTER, USE_PER)) time_started = datetime.now().strftime("%d.%m.%Y-%H:%M:%S") if USE_TB: writer = SummaryWriter("runs/%s-%s" % (RUN_ID, time_started)) env = CliffWalking1D(9) agent = Agent(Net=Net, input_size=1, output_size=env.action_space.n, gamma=GAMMA, alpha=ALPHA, max_memory_size=MAX_MEMORY_SIZE, num_episodes=NUM_EPISODES, replace_after=REPLACE_AFTER, use_per=USE_PER, per_a=1) # Filling memory print("Filling replay memory") state = env.reset() if USE_TB: writer.add_graph(agent.Q_loc, torch.FloatTensor([state]).to(device)) for t in range(200): action = env.action_space.sample() next_state, reward, done, info = env.step(action) agent.store_transition(state, action, reward, next_state, done)
EPS_DECAY = 200 # tokyo uni value = 200 TARGET_UPDATE = 10 # for updating the target network # for the experiments bash script: record_file = "../exp/rl_results_" + args.data_name + ".csv" # File for recording episode durations - original code: # record_file = "../exp/rl_results.csv" # Random Seed for seed in range(1, 101): # original range (1,6) random.seed(seed) torch.manual_seed(0) # Initialize Agent agent = Agent() # Get number of actions # n_actions = len(agent.action_space) n_actions = res_dict["num_words"] print(f"n_actions: {n_actions}") # Get size of state state_size = agent.get_state().size(1) # Initialize policy and target network hidden_size = 32 policy_net = DQN(state_size, n_actions, hidden_size).to(device) target_net = DQN(state_size, n_actions, hidden_size).to(device) target_net.load_state_dict(policy_net.state_dict()) target_net.eval() learning_rate = 1e-2 # tokyo uni value = 1e-2
def get_agent_any_type(type_opps, name, policy_type, env): if type_opps == "zoo": return load_agent(name, policy_type, "zoo_ant_policy_2", env, 1) elif type_opps == "const": trained_agent = constant_agent_sampler() trained_agent.load(name) return trained_agent elif type_opps == "lstm": policy = LSTMPolicy(scope="agent_new", reuse=False, ob_space=env.observation_space.spaces[0], ac_space=env.action_space.spaces[0], hiddens=[128, 128], normalize=True) def get_action(observation): return policy.act(stochastic=True, observation=observation)[0] trained_agent = Agent(get_action, policy.reset) with open(name, "rb") as file: values_from_save = pickle.load(file) for key, value in values_from_save.items(): var = tf.get_default_graph().get_tensor_by_name(key) sess.run(tf.assign(var, value)) return trained_agent elif type_opps == "our_mlp": #TODO DO ANYTHING BUT THIS. THIS IS VERY DIRTY AND SAD :( def make_env(id): # TODO: seed (not currently supported) # TODO: VecNormalize? (typically good for MuJoCo) # TODO: baselines logger? # TODO: we're loading identical policy weights into different # variables, this is to work-around design choice of Agent's # having state stored inside of them. sess = utils.make_session() with sess.as_default(): multi_env = env attacked_agent = constant_agent_sampler(act_dim=8, magnitude=100) single_env = Gymify( MultiToSingle(CurryEnv(multi_env, attacked_agent))) single_env.spec = gym.envs.registration.EnvSpec('Dummy-v0') # TODO: upgrade Gym so don't have to do thi0s single_env.observation_space.dtype = np.dtype(np.float32) return single_env # TODO: close session? #TODO DO NOT EVEN READ THE ABOVE CODE :'( denv = SubprocVecEnv([functools.partial(make_env, 0)]) model = ppo2.learn(network="mlp", env=denv, total_timesteps=1, seed=0, nminibatches=4, log_interval=1, save_interval=1, load_path=name) stateful_model = StatefulModel(denv, model) trained_agent = utils.Agent(action_selector=stateful_model.get_action, reseter=stateful_model.reset) return trained_agent raise (Exception('Agent type unrecognized'))