from Environment import * import time env = Environment(10, 10, "Universe", True) sleepTime = 0.5 #This is a good starting point, because it rarely dies out env.set_state(5, 5, True) env.set_state(6, 5, True) env.set_state(4, 4, True) env.set_state(5, 4, True) env.set_state(5, 3, True) #Make this state environment's initial state env.update_initial() #Run the simulation while True: print("NEXT") env.print_map() env.step() time.sleep(sleepTime)
a1= Auction(nBidders=5, nslots=5, mu=0.59, sigma=0.2, lambdas=lambdas) a2= Auction(nBidders=6, nslots=5, mu=0.67, sigma=0.4, lambdas=lambdas) a3= Auction(nBidders=6, nslots=5, mu=0.47, sigma=0.25, lambdas=lambdas) a4= Auction(nBidders=5, nslots=5, mu=0.57, sigma=0.39, lambdas=lambdas) ncampaigns=3 c1 = Campaign(a1, nUsers=1000.0, probClick=0.5, convParams= convparams) c2 = Campaign(a2, nUsers=1500.0, probClick=0.6, convParams= convparams) c3 = Campaign(a3, nUsers=1500.0, probClick=0.6, convParams= convparams) c4 = Campaign(a2, nUsers=1000.0, probClick=0.5, convParams= convparams) c5 = Campaign(a4, nUsers=1250.0, probClick=0.4, convParams= convparams) env = Environment([c1,c2,c3]) nBids=10 nIntervals=10 deadline = 2 maxBudget = 100 agent = Agent(1000, deadline, ncampaigns,nIntervals,nBids,maxBudget) agent.initGPs() plotter = Plotter(agent=agent,env=env) # mi creo una lista con tutte le matrici dell'oracolo di ogni campagna listMatrices = list() for i in range(0,ncampaigns): matrix = plotter.oracleMatrix(indexCamp=i,nsimul=10) listMatrices.append(matrix) if i==0: optMatrix = np.array([matrix.max(axis=1)])
class BuildContext(Utils.Context): def __init__(self): global bld bld = self self.task_manager = Task.TaskManager() self.id_nodes = 0 self.idx = {} self.all_envs = {} self.bdir = '' self.path = None self.deps_man = Utils.DefaultDict(list) self.cache_node_abspath = {} self.cache_scanned_folders = {} self.uninstall = [] for v in 'cache_node_abspath task_sigs node_deps raw_deps node_sigs'.split( ): var = {} setattr(self, v, var) self.cache_dir_contents = {} self.all_task_gen = [] self.task_gen_cache_names = {} self.cache_sig_vars = {} self.log = None self.root = None self.srcnode = None self.bldnode = None class node_class(Node.Node): pass self.node_class = node_class self.node_class.__module__ = "Node" self.node_class.__name__ = "Nodu" self.node_class.bld = self self.is_install = None def __copy__(self): raise Utils.WafError('build contexts are not supposed to be cloned') def load(self): try: env = Environment.Environment( os.path.join(self.cachedir, 'build.config.py')) except (IOError, OSError): pass else: if env['version'] < HEXVERSION: raise Utils.WafError( 'Version mismatch! reconfigure the project') for t in env['tools']: self.setup(**t) try: gc.disable() f = data = None Node.Nodu = self.node_class try: f = open(os.path.join(self.bdir, DBFILE), 'rb') except (IOError, EOFError): pass try: if f: data = cPickle.load(f) except AttributeError: if Logs.verbose > 1: raise if data: for x in SAVED_ATTRS: setattr(self, x, data[x]) else: debug('build: Build cache loading failed') finally: if f: f.close() gc.enable() def save(self): gc.disable() self.root.__class__.bld = None Node.Nodu = self.node_class db = os.path.join(self.bdir, DBFILE) file = open(db + '.tmp', 'wb') data = {} for x in SAVED_ATTRS: data[x] = getattr(self, x) cPickle.dump(data, file, -1) file.close() try: os.unlink(db) except OSError: pass os.rename(db + '.tmp', db) self.root.__class__.bld = self gc.enable() def clean(self): debug('build: clean called') precious = set([]) for env in self.all_envs.values(): for x in env[CFG_FILES]: node = self.srcnode.find_resource(x) if node: precious.add(node.id) def clean_rec(node): for x in list(node.childs.keys()): nd = node.childs[x] tp = nd.id & 3 if tp == Node.DIR: clean_rec(nd) elif tp == Node.BUILD: if nd.id in precious: continue for env in self.all_envs.values(): try: os.remove(nd.abspath(env)) except OSError: pass node.childs.__delitem__(x) clean_rec(self.srcnode) for v in 'node_sigs node_deps task_sigs raw_deps cache_node_abspath'.split( ): setattr(self, v, {}) def compile(self): debug('build: compile called') self.flush() self.generator = Runner.Parallel(self, Options.options.jobs) def dw(on=True): if Options.options.progress_bar: if on: sys.stderr.write(Logs.colors.cursor_on) else: sys.stderr.write(Logs.colors.cursor_off) debug('build: executor starting') back = os.getcwd() os.chdir(self.bldnode.abspath()) try: try: dw(on=False) self.generator.start() except KeyboardInterrupt: dw() if Runner.TaskConsumer.consumers: self.save() raise except Exception: dw() raise else: dw() if Runner.TaskConsumer.consumers: self.save() if self.generator.error: raise BuildError(self, self.task_manager.tasks_done) finally: os.chdir(back) def install(self): debug('build: install called') self.flush() if self.is_install < 0: lst = [] for x in self.uninstall: dir = os.path.dirname(x) if not dir in lst: lst.append(dir) lst.sort() lst.reverse() nlst = [] for y in lst: x = y while len(x) > 4: if not x in nlst: nlst.append(x) x = os.path.dirname(x) nlst.sort() nlst.reverse() for x in nlst: try: os.rmdir(x) except OSError: pass def new_task_gen(self, *k, **kw): if self.task_gen_cache_names: self.task_gen_cache_names = {} kw['bld'] = self if len(k) == 0: ret = TaskGen.task_gen(*k, **kw) else: cls_name = k[0] try: cls = TaskGen.task_gen.classes[cls_name] except KeyError: raise Utils.WscriptError( '%s is not a valid task generator -> %s' % (cls_name, [x for x in TaskGen.task_gen.classes])) ret = cls(*k, **kw) return ret def __call__(self, *k, **kw): if self.task_gen_cache_names: self.task_gen_cache_names = {} kw['bld'] = self return TaskGen.task_gen(*k, **kw) def load_envs(self): try: lst = Utils.listdir(self.cachedir) except OSError, e: if e.errno == errno.ENOENT: raise Utils.WafError( 'The project was not configured: run "waf configure" first!' ) else: raise if not lst: raise Utils.WafError( 'The cache directory is empty: reconfigure the project') for file in lst: if file.endswith(CACHE_SUFFIX): env = Environment.Environment(os.path.join( self.cachedir, file)) name = file[:-len(CACHE_SUFFIX)] self.all_envs[name] = env self.init_variants() for env in self.all_envs.values(): for f in env[CFG_FILES]: newnode = self.path.find_or_declare(f) try: hash = Utils.h_file(newnode.abspath(env)) except (IOError, AttributeError): error("cannot find " + f) hash = SIG_NIL self.node_sigs[env.variant()][newnode.id] = hash self.bldnode = self.root.find_dir(self.bldnode.abspath()) self.path = self.srcnode = self.root.find_dir(self.srcnode.abspath()) self.cwd = self.bldnode.abspath()
This test is meant to be run from the Project folder! """ #sys.path.append('../project') sys.path.append('../tests') sys.path.append('../GuiProject') import mainwindow import Environment import os import XmlLoader import CharSelect import Deck import PlayerCharacter import EnvMap import ChooseListDialog from PyQt4.QtGui import * from PyQt4.QtCore import * """Load Characters""" app = QApplication(sys.argv) info = [0, "new"] mainwindow.DisplayMainMenu(app, info) Environment = Environment.Environment() Environment.NumOfPlayers = info[0] Environment.SetupPhase() #mainwindow.DisplayMainMenu(Environment) EnvMap.ShowMap(app, Environment)
def experiment(k): # Agent initialization np.random.seed() agents = [] agents.append( AgentFactoredExperiment(budgetTot=1000, deadline=deadline, nCampaigns=nCampaigns, nBudget=nIntervals, nBids=nBids, maxBid=maxBid, maxBudget=maxBudget, method="Sampling")) agents.append( AgentFactoredExperiment(budgetTot=1000, deadline=deadline, nCampaigns=nCampaigns, nBudget=nIntervals, nBids=nBids, maxBid=maxBid, maxBudget=maxBudget, method="Mean")) agents.append( AgentFactoredExperiment(budgetTot=1000, deadline=deadline, nCampaigns=nCampaigns, nBudget=nIntervals, nBids=nBids, maxBid=maxBid, maxBudget=maxBudget, method="UCB")) agents.append( AgentPrior(budgetTot=1000, deadline=deadline, nCampaigns=nCampaigns, nBudget=nIntervals, nBids=nBids, maxBid=maxBid, maxBudget=maxBudget, usePrior=False)) results = [] for idxAgent, agent in enumerate(agents): agent.initGPs() print "Experiment : ", k # Set the GPs hyperparameters for c in range(0, nCampaigns): if agentPath[idxAgent] == "3D/": print "AOOO" agent.setGPKernel(c, oracle.gps3D[c].kernel_, oracle.alphasClicksGP[c]) else: print "\n" print "alphaCosts: ", oracle.alphasPotCostsGP print "alphaClicks: ", oracle.alphasPotClicksGP agent.setGPKernel(c, oracle.gpsClicks[c].kernel_, oracle.gpsCosts[c].kernel_, alphaClicks=oracle.alphasPotClicksGP[c], alphaCosts=oracle.alphasPotCostsGP[c]) # Init the Core and execute the experiment envi = Environment(copy.copy(campaigns)) core = Core(agent, copy.copy(envi), deadline) core.runEpisode() ensure_dir(pathSetting + agentPath[idxAgent]) np.save(pathSetting + agentPath[idxAgent] + "policy_" + str(k), [agent.prevBids, agent.prevBudgets]) np.save(pathSetting + agentPath[idxAgent] + "experiment_" + str(k), np.sum(agent.prevConversions, axis=1)) results.append(np.sum(agent.prevConversions, axis=1)) return [results, agents, envi]
def main(): np.random.seed() tf.set_random_seed(0) # gpu configuration config = tf.ConfigProto( device_count={'GPU': args.master_num_gpu}, gpu_options=tf.GPUOptions( per_process_gpu_memory_fraction=args.master_gpu_fraction)) sess = tf.Session(config=config) actor_agent = ActorAgent(sess, args.policy_input_dim, 1028, args.hid_dims, args.output_dim, args.max_depth) # 加载已经训练好的模型,参数是模型位置,默认为models/zcm main_model = "../models/DRLS_Model/111203/policy/" actor_agent.saver_policy.restore(actor_agent.sess, main_model) node_nums = [15 for _ in range(50000)] index = 0 min_usage = 1 update_number = -1 max_exps_number = 60000 total_exps_cnt = 0 model_count = 0 for node_num in range(100000): index += 1 print("train number", index, "last updated", update_number) data = DataGenerater() data.gene_all(node_num=15, eps=0.35, rand_min=5, rand_max=10, tt_num=60000, delay_min=64, delay_max=512, pkt_min=72, pkt_max=1526, hop=1, dynamic=True) # data_gene = A380Generater() # data_gene.gene_all(rand_min=1000, rand_max=1000, tt_num=60000, # delay_min=64, delay_max=256, pkt_min=64, pkt_max=1526, dynamic=True) # data_gene = LadderGenerater() # node_num = 14 # print("node number", node_num) # data_gene.gene_all(node_num=node_num, eps=0.35, rand_min=5, rand_max=10, tt_num=60000, # delay_min=64, delay_max=512, pkt_min=72, pkt_max=1526, hop=1, dynamic=True) actor_agent.env = Environment(data) start_time = time.time() runtime_threshold = 60 done = 0 flow_number = 0 per_flow_cnt_total = 0 per_flow_cnt_valid = 0 time_record = {} manual = False # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!一定记得改回False while True: exps = [] while done == 0: per_flow_cnt_total += 1 per_flow_cnt_valid += 1 # print("TT_flow: ", flow_cnt, "epoch: ", cnt, "index: ", index) policy_inputs, time_inputs, edge, time_slot, edge_selected_mask, cycle, LD_score, flag = actor_agent.invoke_model( manual) reward, done, reason = actor_agent.env.step( edge, time_slot, LD_score) # print(len(actor_agent.env.schedule.sche)) # print(" reward: ", reward, "done: ", done) if flag: exps.append([ policy_inputs, time_inputs, edge, time_slot, reward, done, edge_selected_mask, cycle, actor_agent.env.graph.reachable_edge_matrix, index ]) else: exps[-1][4] = reward cumulated_reward = np.array([exp[4] for exp in exps]) cumulated_reward = discount(cumulated_reward, 0.8) for i in range(len(exps)): exps[-i - 1][4] = cumulated_reward[-i - 1] if done == 1: for exp in exps: total_exps_cnt += 1 if len(actor_agent.success_exps) < max_exps_number: actor_agent.success_exps.append(exp) else: k = random.randint(0, total_exps_cnt - 1) if k < max_exps_number: actor_agent.success_exps[k] = exp else: #if reason == "Visited edge or Not adjacent edge": print(reason, actor_agent.env.tt_query_id, flow_number, done) actor_agent.fail_exps.extend(exps) # print("cumulated reward", cumulated_reward, "done", done) cur_time = time.time() if done == -1: # if exps[-1][4] < -15: # print(exps[-1][0]) actor_agent.env.enforce_next_query() # 调度下一条流 done = 0 per_flow_cnt_valid = 0 # actor_agent.env.roll_back(1) elif done == 1: # 继续调度下一条流 delay = actor_agent.env.tt_flow_time_record[ -1] - actor_agent.env.tt_flow_time_record[0] usage = actor_agent.env.edge_usage() print("TT_flow", flow_number, "cycle", cycle, "usage", usage, "use time", cur_time - start_time, "delay", delay, "reward", reward) time_record[flow_number] = [ flow_number, cycle, per_flow_cnt_total, per_flow_cnt_valid, cur_time - start_time, delay ] actor_agent.env.enforce_next_query() # 调度下一条流 start_time = time.time() flow_number += 1 per_flow_cnt_total = 0 per_flow_cnt_valid = 0 done = 0 # print(actor_agent.env.edge_usage(), len(success_exps), len(fail_exps)) if (actor_agent.env.tt_query_id >= flow_number + 10) or actor_agent.env.tt_query_id == 59999: # actor_agent.env.schedule.show() break edge_usage = actor_agent.env.edge_usage() print(edge_usage, len(actor_agent.success_exps), len(actor_agent.fail_exps)) actor_agent.flow_count_record[index] = 1 - edge_usage # 反向传播更新 actor_agent.update()
def prepare_impl(t, cwd, ver, wafdir): Options.tooldir = [t] Options.launch_dir = cwd # some command-line options can be processed immediately if '--version' in sys.argv: opt_obj = Options.Handler() opt_obj.curdir = cwd opt_obj.parse_args() sys.exit(0) # now find the wscript file msg1 = 'Waf: Please run waf from a directory containing a file named "%s" or run distclean' % WSCRIPT_FILE # in theory projects can be configured in a gcc manner: # mkdir build && cd build && ../waf configure && ../waf build_dir_override = None candidate = None lst = os.listdir(cwd) search_for_candidate = True if WSCRIPT_FILE in lst: candidate = cwd elif 'configure' in sys.argv and not WSCRIPT_BUILD_FILE in lst: # gcc-like configuration calldir = os.path.abspath(os.path.dirname(sys.argv[0])) if WSCRIPT_FILE in os.listdir(calldir): candidate = calldir search_for_candidate = False else: error('arg[0] directory does not contain a wscript file') sys.exit(1) build_dir_override = cwd # climb up to find a script if it is not found while search_for_candidate: if len(cwd) <= 3: break # stop at / or c: dirlst = os.listdir(cwd) if WSCRIPT_FILE in dirlst: candidate = cwd if 'configure' in sys.argv and candidate: break if Options.lockfile in dirlst: env = Environment.Environment() env.load(os.path.join(cwd, Options.lockfile)) try: os.stat(env['cwd']) except: candidate = cwd else: candidate = env['cwd'] break cwd = os.path.dirname(cwd) # climb up if not candidate: # check if the user only wanted to display the help if '-h' in sys.argv or '--help' in sys.argv: warn('No wscript file found: the help message may be incomplete') opt_obj = Options.Handler() opt_obj.curdir = cwd opt_obj.parse_args() else: error(msg1) sys.exit(0) # We have found wscript, but there is no guarantee that it is valid try: os.chdir(candidate) except OSError: raise Utils.WafError("the folder %r is unreadable" % candidate) # define the main module containing the functions init, shutdown, .. Utils.set_main_module(os.path.join(candidate, WSCRIPT_FILE)) if build_dir_override: d = getattr(Utils.g_module, BLDDIR, None) if d: # test if user has set the blddir in wscript. msg = ' Overriding build directory %s with %s' % ( d, build_dir_override) warn(msg) Utils.g_module.blddir = build_dir_override # bind a few methods and classes by default def set_def(obj, name=''): n = name or obj.__name__ if not n in Utils.g_module.__dict__: setattr(Utils.g_module, n, obj) for k in [dist, distclean, distcheck, clean, install, uninstall]: set_def(k) set_def(Configure.ConfigurationContext, 'configure_context') for k in ['build', 'clean', 'install', 'uninstall']: set_def(Build.BuildContext, k + '_context') # now parse the options from the user wscript file opt_obj = Options.Handler(Utils.g_module) opt_obj.curdir = candidate try: f = Utils.g_module.set_options except AttributeError: pass else: opt_obj.sub_options(['']) opt_obj.parse_args() if not 'init' in Utils.g_module.__dict__: Utils.g_module.init = Utils.nada if not 'shutdown' in Utils.g_module.__dict__: Utils.g_module.shutdown = Utils.nada main()
def read_txt(self,path): if self.mid: self.mid=None temp_envir=Environment.Environment() temp_route=[] with open(path,'r') as f: temp_str=f.readline() temp_str=temp_str.rstrip('\n') if(temp_str=='Environment'): temp_str=f.readline() temp_str=temp_str.rstrip('\n') temp_str=temp_str.split(' ') temp_int=[int(temp_str[0]),int(temp_str[1])] temp_envir.set_dimenssion(temp_int) tag=True while(tag): temp_str = f.readline() temp_str = temp_str.rstrip('\n') if(temp_str!=str(0)): temp_str = temp_str.split(' ') tempx = float(temp_str[0]) tempy = float(temp_str[1]) ID=int(temp_str[2]) state=int(temp_str[3]) weight=int(temp_str[4]) gridID=int(temp_str[5]) t=float(temp_str[6]) point=Point.Point([tempx,tempy],ID=ID,state=state,weight=weight,gridid=gridID,t=t) temp_route.append(point) else: break temp_envir.locations=temp_route data_mid_list=[] temp_str = f.readline() while(temp_str): temp_route2 = [] person_tag = 0 temp_str = temp_str.rstrip('\n') if (temp_str == 'People'): person_tag= int(f.readline().rstrip('\n')) temp_str = f.readline() important_loc=[] temp = temp_str.rstrip('\n') if not(temp=='0'): temp = temp.split(" ") for i in range(int(len(temp))): index=int(temp[i]) point=temp_envir.locations[index] important_loc.append(point) while (True): temp_str = f.readline() temp_str = temp_str.rstrip('\n') if (temp_str != str(0)): temp_str = temp_str.split(' ') tempx = float(temp_str[0]) tempy = float(temp_str[1]) ID = int(temp_str[2]) state = int(temp_str[3]) weight = int(temp_str[4]) gridID = int(temp_str[5]) t=float(temp_str[6]) point = Point.Point([tempx, tempy], gridid=gridID, ID=ID, state=state, weight=weight,t=t) temp_route2.append(point) else: break temp_mid=data_mid.data_mid(temp_envir,person_tag=person_tag,important_loc=important_loc) temp_mid.add_location(temp_route2) data_mid_list.append(temp_mid) temp_str = f.readline() return data_mid_list
class Cal_para2(): Envir=Environment.Environment() data_mid_list=[] def __init__(self,data_mid_list): if(data_mid_list): self.data_mid_list=data_mid_list self.Envir=data_mid_list[0].environment #return the frequency rank disput of the simulate def get_visit_frequency_disput(self,route=[]): data1=[0]*1100 data2=[0]*400 if not route: route=self.data_mid_list for mid in route: cal = Cal_para.Cal_para(mid.route, self.Envir) temp_data1 = cal.get_visit_frequency_disput() temp_data2 = cal.get_visit_frequency_raster_disput() for i, item in enumerate(temp_data1): data1[i] += item for i, item in enumerate(temp_data2): data2[i] += item data1=[item/len(self.data_mid_list) for item in data1] data2=[item/len(self.data_mid_list) for item in data2] data1_1 = [] data2_1 = [] for i in range(len(data1)): item = data1[i] for t in range(int(item)): data1_1.append(i + 1) for i in range(len(data2)): item = data2[i] for t in range(int(item)): data2_1.append(i + 1) return data1_1,data2_1 ##################################################################################################################################### #cal the attribute of different group #group is based on home_work_distance #attribute is discuss later def get_group_all_attribute(self, n, attribute_func, dis_func): group_tag = self.get_homework_dis_group(n, distant=dis_func) group_attribute = [] for i in range(n): temp_attribute = self.get_group_attribute(groupid=i, group_tag=group_tag, attribute_func=attribute_func) group_attribute.append(temp_attribute) return group_attribute def get_homework_dis_group(self,n,distant): dis_list=[] for item in self.data_mid_list: dis=distant(item.important_loc[0],item.important_loc[1]) dis_list.append(dis) temp_dis_list=sorted(dis_list) length=int(len(temp_dis_list)/n)*n temp_dis_array=np.array(temp_dis_list[0:length]).reshape((n,-1)) group_tag=[] #store the tag of mid_data_list for item in dis_list: for j,jtem in enumerate(temp_dis_array): if(item in jtem): group_tag.append(j) continue return group_tag def get_group_attribute(self,groupid,group_tag,attribute_func): group=[] for i,item in enumerate(self.data_mid_list): if(i<len(group_tag)): if(group_tag[i] == groupid): group.append(item) return attribute_func(group) ########################################################################################################################### #atrribute func ,using in func get_group_all_attribute, # aim to cal the different group attribute #group base on distance def attribute_func_rog_disput(self,group): locations=[] dis_temp=[] for item in group: cal = Cal_para.Cal_para(item.route, self.Envir) dis,_=cal.get_rog_disput() if not dis_temp: dis_temp=[i for i in dis] else: dis_temp=list(map(lambda x,y: x+y,dis_temp,dis)) dis=[i/len(group) for i in dis_temp] return dis #get the beta and xmin of diffrent group powerlaw #return the point_base and raster_base answer def attribute_func_frequency_powerlaw(self,group): temp_data_mid=data_mid.data_mid(self.Envir) data_1,data_2=self.get_visit_frequency_disput(route=group) ## need a func to cal the beta and xmin of the data_1 and data_2 return data_1
NUMBER_TRASH_SOURCES = 4 #Default is 4 SAVED_TIMESTEPS = 3 #Print statements PRINT_OVERVIEW_AGENTS = False PRINT_HISTORY = True PRINT_TRASH_HISTORY = True PRINT_HISTORY_AGENTS = False PRINT_EACH_AGENTS_VIEW = True PRINT_COMPLETE_TRASH = True PRINT_REWARD_LIST = True #Create an environment with the dimensionality dim test_environment = Environment.Environment(dim=DIM, reward_eat_trash=REWARD_EAT_TRASH, reward_invalid_move=REWARD_EAT_TRASH, reward_nothing_happend=REWARD_NOTHING_HAPPEND, trash_appearence_prob=TRASH_APPEARENCE_PROB, number_trash_sources=NUMBER_TRASH_SOURCES, saved_timesteps=SAVED_TIMESTEPS) number_created_agents = 0 #Add the agents for i in range(0, NUMBER_AGENTS): if(test_environment.add_agent(coord=None, capacity=CAPACITY_PER_AGENT)): print("Agent {} is created succesfully".format(i)) number_created_agents += 1 else: print("Agent {} isn't created".format(i)) for step in range(0, NUMBER_SIMULATED_STEPS): print("{}. step started".format(step))
freqUpdate = opt.freqUpdate freqTest = opt.freqTest frameTest = opt.frameTest loadRatio = opt.loadRatio expInit = opt.expInit expFinal = opt.expFinal expTest = opt.expTest noopMax = opt.noopMax lifeReward = opt.lifeReward # initialization np.random.seed(SEED) env = Environment.Environment(opt) n_actions = opt.n_actions = env.n_actions if networkType == "CNN": import NN with tf.device(device): with tf.variable_scope("train") as train_scope: Q_train = NN.NN(opt, trainable=True) with tf.variable_scope("target") as target_scope: Q_target = NN.NN(opt, trainable=False) # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.InteractiveSession(config=config) sess.run(tf.initialize_all_variables())
def test(actor_agent): actor_agent.env = Environment() start_time = time.time() done = 0 flow_number = 0 per_flow_cnt_total = 0 per_flow_cnt_valid = 0 time_record = {} manual = True try_model_id = 0 models = [ "../models/A380_Model/A" + str(model_count) + "/policy/" for model_count in range(34) ] while True: while done == 0: per_flow_cnt_total += 1 per_flow_cnt_valid += 1 policy_inputs, time_inputs, edge, time_slot, edge_selected_mask, cycle, LD_score, flag = actor_agent.invoke_model( manual) reward, done, reason = actor_agent.env.step( edge, time_slot, LD_score) # print(edge.id, time_slot, actor_agent.env.time, actor_agent.env.tt_flow_deadline) cur_time = time.time() if done == -1: if try_model_id < len(models): actor_agent.env.roll_back(1) # print(actor_agent.env.edge_usage()) # print(models[len(models) - 1 - try_model_id]) actor_agent.saver_policy.restore( actor_agent.sess, models[len(models) - 1 - try_model_id]) try_model_id += 1 actor_agent.env.enforce_next_query() done = 0 else: break elif done == 1: # 继续调度下一条流 delay = actor_agent.env.tt_flow_time_record[ -1] - actor_agent.env.tt_flow_time_record[0] usage = actor_agent.env.edge_usage() # print("TT_flow", flow_number, "cycle", cycle, "usage", usage, "use time", cur_time - start_time, "delay", delay, "reward", reward) time_record[flow_number] = [ flow_number, cycle, per_flow_cnt_total, per_flow_cnt_valid, cur_time - start_time, delay ] # 删除边之后重新调度受影响的流 if flow_number == args.link_failure_pos: actor_agent.reschedule_start_time = time.time() edge = actor_agent.env.graph.edges[0] actor_agent.reschedule_cnt = len( actor_agent.env.edge_to_tt_flow[(edge.start_node.id, edge.end_node.id)]) # print(actor_agent.env.edge_to_tt_flow[(edge.start_node.id, edge.end_node.id)], actor_agent.reschedule_start_time) actor_agent.env.delete_edge( (edge.start_node.id, edge.end_node.id)) if actor_agent.env.reschedule == 2: actor_agent.reschedule_end_time = time.time() # print(actor_agent.reschedule_end_time) # 修改结束 actor_agent.env.enforce_next_query() # 调度下一条流 start_time = time.time() flow_number += 1 per_flow_cnt_total = 0 per_flow_cnt_valid = 0 done = 0 if try_model_id != 0: try_model_id = 0 actor_agent.saver_policy.restore(actor_agent.sess, actor_agent.main_model) if actor_agent.env.tt_query_id == 59999: # actor_agent.env.schedule.show() break edge_usage = actor_agent.env.edge_usage() verify = Verify(actor_agent.env.schedule.sche) print(actor_agent.reschedule_end_time, actor_agent.reschedule_start_time) return actor_agent.env.tt_query_id, edge_usage, verify.judge_conflict(), \ actor_agent.reschedule_end_time - actor_agent.reschedule_start_time, actor_agent.reschedule_cnt
def initialize_normal_dist_environment(): environment = env.Environment(actions, rounds, slate_size) return environment
# get environment state state = env.get_state() agent1.update_state_history(state) agent2.update_state_history(state) if draw: env.draw_board() # update value-function agent1.update(env) agent2.update(env) if __name__ == '__main__': # init env = Environment() agent1 = Agent() agent2 = Agent() state_winner_triples = get_state_hash_and_winner(env) print state_winner_triples Vx = initialVx(env, state_winner_triples) agent1.setV(Vx) agent1.set_symbol(env.x) Vo = initialVo(env, state_winner_triples) agent2.setV(Vo) agent2.set_symbol(env.o) # train T = 10000 for t in xrange(T): if t % 200 == 0:
def __init__(self, screen): super(Gameplay, self).__init__(screen) self._environment = Environment() self._level_file_name = "" self.sound = SoundEffects.GameTheme[randint(0,1)]
convParams=convparams)) #campaigns.append(Campaign(a5, nMeanResearch=1450.0, nStdResearch=50.0, probClick=probClick[4], convParams=convparams)) #campaigns.append(Campaign(a6, nMeanResearch=1480.0, nStdResearch=50.0, probClick=probClick[5], convParams=convparams)) #campaigns.append(Campaign(a7, nMeanResearch=1550.0, nStdResearch=50.0, probClick=probClick[6], convParams=convparams)) ncampaigns = len(campaigns) nBids = 10 nIntervals = 10 deadline = 250 maxBudget = 100 agent = AgentFactored(1000, deadline, ncampaigns, nIntervals, nBids, maxBudget, 1.0) agent.initGPs() env = Environment(campaigns) plotter = PlotterFinal(agent=agent, env=env) # mi creo una lista con tutte le matrici dell'oracolo di ogni campagna listMeans = list() listVar = list() for i in range(0, ncampaigns): [trueMeans, trueVar] = plotter.oracleMatrix(indexCamp=i, nsimul=20) listMeans.append(trueMeans) listVar.append(trueVar) if i == 0: optMatrix = np.array([trueMeans.max(axis=1)]) else: maxrow = np.array([trueMeans.max(axis=1)]) optMatrix = np.concatenate((optMatrix, maxrow))
class BuildContext(Utils.Context): "holds the dependency tree" def __init__(self): # not a singleton, but provided for compatibility global bld bld = self self.task_manager = Task.TaskManager() # instead of hashing the nodes, we assign them a unique id when they are created self.id_nodes = 0 self.idx = {} # map names to environments, the 'default' must be defined self.all_envs = {} # ======================================= # # code for reading the scripts # project build directory - do not reset() from load_dirs() self.bdir = '' # the current directory from which the code is run # the folder changes everytime a wscript is read self.path = None # Manual dependencies. self.deps_man = Utils.DefaultDict(list) # ======================================= # # cache variables # local cache for absolute paths - cache_node_abspath[variant][node] self.cache_node_abspath = {} # list of folders that are already scanned # so that we do not need to stat them one more time self.cache_scanned_folders = {} # list of targets to uninstall for removing the empty folders after uninstalling self.uninstall = [] # ======================================= # # tasks and objects # build dir variants (release, debug, ..) for v in 'cache_node_abspath task_sigs node_deps raw_deps node_sigs'.split(): var = {} setattr(self, v, var) self.cache_dir_contents = {} self.all_task_gen = [] self.task_gen_cache_names = {} self.cache_sig_vars = {} self.log = None self.root = None self.srcnode = None self.bldnode = None # bind the build context to the nodes in use # this means better encapsulation and no build context singleton class node_class(Node.Node): pass self.node_class = node_class self.node_class.__module__ = "Node" self.node_class.__name__ = "Nodu" self.node_class.bld = self self.is_install = None def __copy__(self): "nodes are not supposed to be copied" raise Utils.WafError('build contexts are not supposed to be cloned') def load(self): "load the cache from the disk" try: env = Environment.Environment(os.path.join(self.cachedir, 'build.config.py')) except (IOError, OSError): pass else: if env['version'] < HEXVERSION: raise Utils.WafError('Version mismatch! reconfigure the project') for t in env['tools']: self.setup(**t) try: gc.disable() f = data = None Node.Nodu = self.node_class try: f = open(os.path.join(self.bdir, DBFILE), 'rb') except (IOError, EOFError): # handle missing file/empty file pass try: if f: data = cPickle.load(f) except AttributeError: # handle file of an old Waf version # that has an attribute which no longer exist # (e.g. AttributeError: 'module' object has no attribute 'BuildDTO') if Logs.verbose > 1: raise if data: for x in SAVED_ATTRS: setattr(self, x, data[x]) else: debug('build: Build cache loading failed') finally: if f: f.close() gc.enable() def save(self): "store the cache on disk, see self.load" gc.disable() self.root.__class__.bld = None # some people are very nervous with ctrl+c so we have to make a temporary file Node.Nodu = self.node_class db = os.path.join(self.bdir, DBFILE) file = open(db + '.tmp', 'wb') data = {} for x in SAVED_ATTRS: data[x] = getattr(self, x) cPickle.dump(data, file, -1) file.close() # do not use shutil.move try: os.unlink(db) except OSError: pass os.rename(db + '.tmp', db) self.root.__class__.bld = self gc.enable() # ======================================= # def clean(self): debug('build: clean called') # does not clean files created during the configuration precious = set([]) for env in self.all_envs.values(): for x in env[CFG_FILES]: node = self.srcnode.find_resource(x) if node: precious.add(node.id) def clean_rec(node): for x in list(node.childs.keys()): nd = node.childs[x] tp = nd.id & 3 if tp == Node.DIR: clean_rec(nd) elif tp == Node.BUILD: if nd.id in precious: continue for env in self.all_envs.values(): try: os.remove(nd.abspath(env)) except OSError: pass node.childs.__delitem__(x) clean_rec(self.srcnode) for v in 'node_sigs node_deps task_sigs raw_deps cache_node_abspath'.split(): setattr(self, v, {}) def compile(self): """The cache file is not written if nothing was build at all (build is up to date)""" debug('build: compile called') """ import cProfile, pstats cProfile.run("import Build\nBuild.bld.flush()", 'profi.txt') p = pstats.Stats('profi.txt') p.sort_stats('cumulative').print_stats(80) """ self.flush() #""" self.generator = Runner.Parallel(self, Options.options.jobs) def dw(on=True): if Options.options.progress_bar: if on: sys.stderr.write(Logs.colors.cursor_on) else: sys.stderr.write(Logs.colors.cursor_off) debug('build: executor starting') back = os.getcwd() os.chdir(self.bldnode.abspath()) try: try: dw(on=False) self.generator.start() except KeyboardInterrupt: dw() if Runner.TaskConsumer.consumers: self.save() raise except Exception: dw() # do not store anything, for something bad happened raise else: dw() if Runner.TaskConsumer.consumers: self.save() if self.generator.error: raise BuildError(self, self.task_manager.tasks_done) finally: os.chdir(back) def install(self): "this function is called for both install and uninstall" debug('build: install called') self.flush() # remove empty folders after uninstalling if self.is_install < 0: lst = [] for x in self.uninstall: dir = os.path.dirname(x) if not dir in lst: lst.append(dir) lst.sort() lst.reverse() nlst = [] for y in lst: x = y while len(x) > 4: if not x in nlst: nlst.append(x) x = os.path.dirname(x) nlst.sort() nlst.reverse() for x in nlst: try: os.rmdir(x) except OSError: pass def new_task_gen(self, *k, **kw): if self.task_gen_cache_names: self.task_gen_cache_names = {} kw['bld'] = self if len(k) == 0: ret = TaskGen.task_gen(*k, **kw) else: cls_name = k[0] try: cls = TaskGen.task_gen.classes[cls_name] except KeyError: raise Utils.WscriptError('%s is not a valid task generator -> %s' % (cls_name, [x for x in TaskGen.task_gen.classes])) ret = cls(*k, **kw) return ret def __call__(self, *k, **kw): if self.task_gen_cache_names: self.task_gen_cache_names = {} kw['bld'] = self return TaskGen.task_gen(*k, **kw) def load_envs(self): try: lst = Utils.listdir(self.cachedir) except OSError, e: if e.errno == errno.ENOENT: raise Utils.WafError('The project was not configured: run "waf configure" first!') else: raise if not lst: raise Utils.WafError('The cache directory is empty: reconfigure the project') for file in lst: if file.endswith(CACHE_SUFFIX): env = Environment.Environment(os.path.join(self.cachedir, file)) name = file[:-len(CACHE_SUFFIX)] self.all_envs[name] = env self.init_variants() for env in self.all_envs.values(): for f in env[CFG_FILES]: newnode = self.path.find_or_declare(f) try: hash = Utils.h_file(newnode.abspath(env)) except (IOError, AttributeError): error("cannot find "+f) hash = SIG_NIL self.node_sigs[env.variant()][newnode.id] = hash # TODO: hmmm, these nodes are removed from the tree when calling rescan() self.bldnode = self.root.find_dir(self.bldnode.abspath()) self.path = self.srcnode = self.root.find_dir(self.srcnode.abspath()) self.cwd = self.bldnode.abspath()
if __name__ == '__main__': model = load_model('agent.h5') agent = Agent(model) trajectory = [] matrix = np.asarray([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1], [1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1], [1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1], [1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], [1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1], [1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1], [1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1], [1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) for i in range(TEST): _, _, reward, demo = game(Environment(matrix), agent) if reward >= 0: trajectory.append(np.asarray(demo)) for i, j in enumerate(trajectory): video_dir = 'output' + str(i + 1) + '.avi' fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G') video_writer = cv2.VideoWriter(video_dir, fourcc, 5, (12, 12)) for pic in j: video_writer.write(pic.reshape(12, 12, 1)) video_writer.release() print("finally generate " + str(len(trajectory)) + " video")
def interactive(): movies = environment.test_data sid = [] for m in movies: sid.append(environment.id_to_name[m]) while True: for movie in movies: movie_id = environment.id_to_name[movie] print( str(sid.index(movie_id)) + ': <' + str(id_to_movie[movie_id]) + '>, ' + str(id_to_genre[movie_id])) user_choices = raw_input( 'Please type in any movie id(s) from the above example list (use space to separate multiple ones): \n' ) rank(user_choices.rstrip().split(), sid) if __name__ == '__main__': id_to_movie, movie_to_id, id_to_genre = utils.load_movie_genre( args.movie_file, args.genre_files, args.genre_name_file) environment = Environment(args) tf.reset_default_graph() os.environ['CUDA_VISIBLE_DEVICES'] = str(args.device_id) with tf.device('/gpu:' + str(args.device_id)): agent = AutoPath(environment.params, environment) saver = tf.train.Saver() with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: saver.restore(sess, args.model_file) interactive()
import copy import Environment import Player import State env = Environment.Environment(4, 4, 4) player = Player.Player('p1', env, [[{'location':j, 'size':4-j} for j in range(4)] for i in range(3)], 0.9, False) human = Player.Player('human', env, [[{'location':j, 'size':4-j} for j in range(4)] for i in range(3)], 0, False) player.load_policy('policy_p2') while True: while True: row = input("Type a row to move to: ") col = input("Type a col to move to: ") size = input("Type a size of piece to use: ") next_state, result = env.update({'destination':(int(row), int(col)), 'size':int(size), 'origin':[0]}, human) env.state.board = next_state.board env.display() if result != None: # the game is over here game_over = True print("RESULT: {}".format(result)) break game_over = False board = copy.deepcopy(env.state) # choose action #negative_board = State.State(list(map(lambda x:list(map(lambda y: -1 * y, x)), board.board)))
def configure(conf): src = getattr(Options.options, SRCDIR, None) if not src: src = getattr(Utils.g_module, SRCDIR, None) if not src: src = '.' incomplete_src = 1 src = os.path.abspath(src) bld = getattr(Options.options, BLDDIR, None) if not bld: bld = getattr(Utils.g_module, BLDDIR, None) if bld == '.': raise Utils.WafError( 'Setting blddir="." may cause distclean problems') if not bld: bld = 'build' incomplete_bld = 1 bld = os.path.abspath(bld) try: os.makedirs(bld) except OSError: pass # It is not possible to compile specific targets in the configuration # this may cause configuration errors if autoconfig is set targets = Options.options.compile_targets Options.options.compile_targets = None Options.is_install = False conf.srcdir = src conf.blddir = bld conf.post_init() if 'incomplete_src' in vars(): conf.check_message_1('Setting srcdir to') conf.check_message_2(src) if 'incomplete_bld' in vars(): conf.check_message_1('Setting blddir to') conf.check_message_2(bld) # calling to main wscript's configure() conf.sub_config(['']) conf.store() # this will write a configure lock so that subsequent builds will # consider the current path as the root directory (see prepare_impl). # to remove: use 'waf distclean' env = Environment.Environment() env[BLDDIR] = bld env[SRCDIR] = src env['argv'] = sys.argv env['commands'] = Options.commands env['options'] = Options.options.__dict__ # conf.hash & conf.files hold wscript files paths and hash # (used only by Configure.autoconfig) env['hash'] = conf.hash env['files'] = conf.files env['environ'] = dict(conf.environ) env['cwd'] = os.path.split(Utils.g_module.root_path)[0] if Utils.g_module.root_path != src: # in case the source dir is somewhere else env.store(os.path.join(src, Options.lockfile)) env.store(Options.lockfile) Options.options.compile_targets = targets
dataRule2Moving = [] for celltype in typelist: for i in range(100): env = EnvironmentAnalysis.Environment(50, celltype) agent = Agent.Agent(env, 2, 50) dataRule2Moving.append([celltype, agent.finalSteps]) print("Iteration:", i) Rule2MovingDF = pd.DataFrame(dataRule2Moving, columns=["Type", "NumberOfSteps"]) Rule2MovingDF.to_csv("Rule2Moving.csv", sep=',', encoding='utf-8', mode='a') # For Question 2: dataRule1 = [] for i in range(20): env = Environment.Environment(50) agent = Moving_Target.Moving_Target_Agent(env, 1, 50) dataRule1.append(agent.finalSteps) print("Iteration:", i) Rule1DF = pd.DataFrame(dataRule1, columns=["NumberOfSteps"]) Rule1DF.to_csv("Rule1MovingTarget.csv", sep=',', encoding='utf-8', mode='a') dataRule2 = [] for i in range(20): env = Environment.Environment(50) agent = Moving_Target.Moving_Target_Agent(env, 2, 50) dataRule2.append(agent.finalSteps) print("Iteration:", i)
# Campaign setting for c in range(0, nCampaigns): a = AuctionTrueData(nBidders=int(nBidders[c]), nSlots=nSlots, lambdas=lambdas, myClickProb=probClick[c]) campaigns.append( Campaign(a, nMeanResearch=nMeanResearch[c], nStdResearch=sigmaResearch[c], probClick=probClick[c], convParams=convparams[c])) # Environment setting envOracle = Environment(copy.copy(campaigns)) # Baseline computation oracle = Oracle(budgetTot=1000, deadline=deadline, nCampaigns=nCampaigns, nBudget=nIntervals, nBids=nBids, maxBid=maxBid, maxBudget=maxBudget, environment=copy.copy(envOracle)) oracle.generateBidBudgetMatrix(nSimul=nSimul) values = np.ones(nCampaigns) * convparams[:nCampaigns, 0] oracle.updateValuesPerClick(values) [optBud, optBid, optConv] = oracle.chooseAction()
def visit_block_stmt(self, statement : Stmt.Block): self.execute_block(statement.statements, Environment.Environment(self.env))
episodes = 500 # how often to report training results trainingReportRate = 100 # play the interactive game? # 0: human does not play # 1: human plays as the bot # 2: human plays as the enemy play = 2 #Max reward received in any iteration maxr = None # Set up environment for initial training gridEnvironment = Environment() gridEnvironment.randomStart = False gridEnvironment.enemyMode = 2 gridEnvironment.verbose = 0 # Set up agent gridAgent = Agent(gridEnvironment) gridAgent.verbose = False # This is where learning happens for i in range(episodes): # Train gridAgent.agent_reset() gridAgent.qLearn(gridAgent.initialObs) # Test gridAgent.agent_reset()
def run_all(): """Transforms the source infiles to a binary outfile. Returns a shell-style exit code: 1 if there were errors, 0 if there were no errors. """ Err.count = 0 Tamagotchi.process(CmdLine.infiles) z = Frontend.parse(CmdLine.infiles) env = Environment.Environment() m = Passes.ExpandMacros() i = Passes.InitLabels() l_basic = Passes.UpdateLabels() l = Passes.FixPoint("label update", [l_basic], lambda: not l_basic.changed) # The instruction selector is a bunch of fixpoints, and which # passes run depends on the command line options a bit. c_basic = Passes.Collapse() c = Passes.FixPoint("instruction selection 1", [l, c_basic], lambda: not c_basic.changed) if CmdLine.enable_branch_extend: b = Passes.ExtendBranches() instruction_select = Passes.FixPoint("instruction selection 2", [c, b], lambda: not b.changed) else: instruction_select = c a = Passes.Assembler() passes = [] passes.append(Passes.DefineMacros()) passes.append(Passes.FixPoint("macro expansion", [m], lambda: not m.changed)) passes.append(Passes.FixPoint("label initialization", [i], lambda: not i.changed)) passes.extend([Passes.CircularityCheck(), Passes.CheckExprs(), Passes.EasyModes()]) passes.append(instruction_select) passes.extend([Passes.NormalizeModes(), Passes.UpdateLabels(), a]) for p in passes: p.go(z, env) if Err.count == 0: try: outfile = CmdLine.outfile if outfile == '-': output = sys.stdout if sys.platform == "win32": # We can't dump our binary in text mode; that would be # disastrous. So, we'll do some platform-specific # things here to force our stdout to binary mode. import msvcrt msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) elif outfile is None: output = file('bin', 'wb') else: output = file(outfile, 'wb') f = open("template.txt", "rb") t = f.read() head = t[:0x40000] if (len("".join(map(chr, a.output))) > 0x400): print "too large" return 1 tail = t[(0x40000 + len("".join(map(chr, a.output)))):] output.write(head + "".join(map(chr, a.output)) + tail) output.flush() if outfile != '-': output.close() return 0 except IOError: print>>sys.stderr, "Could not write to " + outfile return 1 else: Err.report() return 1
def main(): # Create train and evaluation environments for Tensorflow train_py_env = Environment.Environment() train_env = tf_py_environment.TFPyEnvironment(train_py_env) eval_py_env = Environment.Environment() eval_env = tf_py_environment.TFPyEnvironment(eval_py_env) # utils.validate_py_environment(train_py_env, episodes=5) # Set up an agent # Decide on layers of a network fc_layer_params = (50, 200, 25, 6) #conv_layer_params = [(4, 4, 1), (8, 4, 2)] # QNetwork predicts QValues (expected returns) for all actions based on observation on the given environment q_net = q_network.QNetwork(train_env.observation_spec(), train_env.action_spec(), #conv_layer_params=conv_layer_params, fc_layer_params=fc_layer_params) # Initialize DQN Agent on the train environment steps, actions, QNetwork, Adam Optimizer, loss function & train step counter optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate) # Variable maintains shared, persistent state manipulated by a program. # 0 is the initial value. # After construction, the type and shape of the variable are fixed. train_step_counter = tf.Variable(0) agent = dqn_agent.DqnAgent( train_env.time_step_spec(), train_env.action_spec(), q_network=q_net, optimizer=optimizer, epsilon_greedy=0.4, #TODO tune this td_errors_loss_fn=common.element_wise_squared_loss, train_step_counter=train_step_counter, #boltzmann_temperature=0.1, summarize_grads_and_vars=True) agent.initialize() # Policies """A policy defines the way an agent acts in an environment. Typically, the goal of RL is to train the underlying model until the policy produces the desired outcome. Agents contain two policies: agent.policy — The main policy that is used for evaluation and deployment. agent.collect_policy — A second policy that is used for data collection. """ # tf_agents.policies.random_tf_policy creates a policy which will randomly select an action for each time_step (independent of agent) random_policy = random_tf_policy.RandomTFPolicy(train_env.time_step_spec(), train_env.action_spec()) # Baseline average return of the moves based on random_policy (random actions of an agent) print(compute_avg_return(eval_env, random_policy, num_eval_episodes)) # Replay buffer # The replay buffer keeps track of data collected from the environment. # This tutorial uses tf_agents.replay_buffers.tf_uniform_replay_buffer.TFUniformReplayBuffer, as it is the most common. # The constructor requires the specs for the data it will be collecting. # This is available from the agent using the collect_data_spec method. # The batch size and maximum buffer length are also required. replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer( data_spec=agent.collect_data_spec, batch_size=1, max_length=replay_buffer_max_length, dataset_window_shift=1) # The agent needs access to the replay buffer. # This is provided by creating an iterable tf.data.Dataset pipeline which will feed data to the agent. # Each row of the replay buffer only stores a single observation step. # But since the DQN Agent needs both the current and next observation to compute the loss, # the dataset pipeline will sample two adjacent rows for each item in the batch (num_steps=2). # This dataset is also optimized by running parallel calls and prefetching data. dataset = replay_buffer.as_dataset( num_parallel_calls=3, sample_batch_size=batch_size, single_deterministic_pass=False, num_steps=2).prefetch(3) iterator = iter(dataset) # Train the agent agent.train = common.function(agent.train) # Reset the train step agent.train_step_counter.assign(0) collect_data(train_env, random_policy, replay_buffer, steps=10000) for _ in range(num_iterations): # Collect a few steps using collect_policy and save to the replay buffer. for _ in range(collect_steps_per_iteration): collect_step(train_env, agent.collect_policy, replay_buffer) # Sample a batch of data from the buffer and update the agent's network. experience, unused_info = next(iterator) train_loss = agent.train(experience).loss step = agent.train_step_counter.numpy() if step % log_interval == 0: avg_return = compute_avg_return(eval_env, agent.policy, 3) #TODO if not os.path.exists("eval_data"): os.makedirs("eval_data") path = os.path.join("eval_data", f'Eval_data.step{step // log_interval}.txt') with open(path, 'w') as f: for move in eval_py_env.all_moves: print(str(move), file=f) eval_py_env.all_moves = [] print('step = {0}: loss = {1}, Average Return: {2}'.format(step, train_loss, avg_return))
import sys import pickle import matplotlib.pyplot as plt import Environment import Player import State from Model import Model ROUNDS = 10000 env = Environment.Environment(3, 3) prefix1 = input("Input the prefix for the first model to be loaded: ") model_name1 = input("Input the name of the model you want: ") if len(sys.argv) != 2 else sys.argv[1] print("Loading model with filename {}{} . . .\n".format(prefix1, model_name1)) player1 = Player.Player('player1', env, 1, 0) player1.load_policy(model_name1, prefix1) prefix2 = input("Input the prefix for the second model to be loaded: ") model_name2 = input("Input the name of the model you want: ") if len(sys.argv) != 2 else sys.argv[2] print("Loading model with filename {}{} . . .\n".format(prefix2, model_name2)) player2 = Player.Player('player2', env, -1, 0) player2.load_policy(model_name2, prefix2) players = [player1, player2] for i in range(ROUNDS): while True: result = None
def prepare_impl(t, cwd, ver, wafdir): Options.tooldir = [t] Options.launch_dir = cwd if '--version' in sys.argv: opt_obj = Options.Handler() opt_obj.curdir = cwd opt_obj.parse_args() sys.exit(0) msg1 = 'Waf: Please run waf from a directory containing a file named "%s" or run distclean' % WSCRIPT_FILE build_dir_override = None candidate = None lst = os.listdir(cwd) search_for_candidate = True if WSCRIPT_FILE in lst: candidate = cwd elif 'configure' in sys.argv and not WSCRIPT_BUILD_FILE in lst: calldir = os.path.abspath(os.path.dirname(sys.argv[0])) if WSCRIPT_FILE in os.listdir(calldir): candidate = calldir search_for_candidate = False else: error('arg[0] directory does not contain a wscript file') sys.exit(1) build_dir_override = cwd while search_for_candidate: if len(cwd) <= 3: break dirlst = os.listdir(cwd) if WSCRIPT_FILE in dirlst: candidate = cwd if 'configure' in sys.argv and candidate: break if Options.lockfile in dirlst: env = Environment.Environment() env.load(os.path.join(cwd, Options.lockfile)) try: os.stat(env['cwd']) except: candidate = cwd else: candidate = env['cwd'] break cwd = os.path.dirname(cwd) if not candidate: if '-h' in sys.argv or '--help' in sys.argv: warn('No wscript file found: the help message may be incomplete') opt_obj = Options.Handler() opt_obj.curdir = cwd opt_obj.parse_args() else: error(msg1) sys.exit(0) try: os.chdir(candidate) except OSError: raise Utils.WafError("the folder %r is unreadable" % candidate) Utils.set_main_module(os.path.join(candidate, WSCRIPT_FILE)) if build_dir_override: d = getattr(Utils.g_module, BLDDIR, None) if d: msg = ' Overriding build directory %s with %s' % ( d, build_dir_override) warn(msg) Utils.g_module.blddir = build_dir_override def set_def(obj, name=''): n = name or obj.__name__ if not n in Utils.g_module.__dict__: setattr(Utils.g_module, n, obj) for k in [dist, distclean, distcheck, build, clean, install, uninstall]: set_def(k) set_def(Configure.ConfigurationContext, 'configure_context') for k in ['build', 'clean', 'install', 'uninstall']: set_def(Build.BuildContext, k + '_context') opt_obj = Options.Handler(Utils.g_module) opt_obj.curdir = candidate try: f = Utils.g_module.set_options except AttributeError: pass else: opt_obj.sub_options(['']) opt_obj.parse_args() if not 'init' in Utils.g_module.__dict__: Utils.g_module.init = Utils.nada if not 'shutdown' in Utils.g_module.__dict__: Utils.g_module.shutdown = Utils.nada main()
mu_PI, J_collector, Q_PI, iter_counter = MDPSolver.PI(mdp, gamma, mu=None, max_iters=10) print("mu=\n{}".format(mu_PI)) print("J_collector=\n{}".format(J_collector)) g.show_J_collector(J_collector) q_agent = q.SimpleQAgent(X, U, gamma, epsil=0.1, alpha=1e-1, random=random_state) env = e.Environment(mdp, q_agent) bs = BasicSampler(variable_names=["Q", "epsilon", "step", "delQ", "del_mu"]) for p in range(100): delQ = np.linalg.norm(q_agent.Q - Q_PI, "fro") del_mu = np.linalg.norm(q_agent.get_policy() - mu_PI, "fro") bs.add(q_agent.Q, q_agent.epsilon, q_agent.lr, delQ, del_mu) env.play_round(100) plt.figure(2) plt.subplot(211) plt.plot(bs.get().delQ) plt.subplot(212) plt.plot(bs.get().del_mu) plt.show()