def checkJudge(self, name): pTree = self.world.judgeTrees[name][0] judge = JudgeDef(pTree) judge.name = pTree.val judge.envs = map(lambda x: self.strToSym(x, pTree.syntax), pTree.syntax.envs) judge.args = map(lambda x: self.strToSym(x, pTree.syntax), pTree.syntax.children) freshGen = Env() judge.envLabels = map(lambda x: freshGen.freshOld(x), judge.envs) judge.argLabels = map(lambda x: freshGen.freshOld(x), judge.args) self.world.judges[pTree.val] = judge for i in range(len(pTree.children)): self.checkJCase(pTree.children[i], judge, i) curIndex = len(pTree.children) #add any other asts which hold parts of the judgement for oTree in self.world.judgeTrees[name][1:]: #check the shapes match (ignore any latex or label) envs = map(lambda x: self.strToSym(x, oTree.syntax), oTree.syntax.envs) args = map(lambda x: self.strToSym(x, oTree.syntax), oTree.syntax.children) check = reduce(lambda x,y: x and y, map(lambda (x,y): x == y, zip(args, judge.args)), True) check = check and reduce(lambda x,y: x and y, map(lambda (x,y): x == y, zip(envs, judge.envs)), True) if not check: expectStr = "; ".join(map(lambda x: x.name, judge.envs)) + " |- " + "; ".join(map(lambda x: x.name, judge.args)) foundStr = "; ".join(map(lambda x: x.name, envs)) + " |- " + "; ".join(map(lambda x: x.name, args)) self.addErr("Shape of repeated judgment does not match original judgement: found: " + foundStr + ", expected: " + expectStr, oTree) #add any cases for i in range(len(oTree.children)): self.checkJCase(oTree.children[i], judge, curIndex + i) curIndex += len(oTree.children)
class Function(Exp): def __init__(self, params, cmd): self.params = params self.cmd = cmd self.this = None self.env = None def eval(self, env): if self.env is None: self.env = Env(env) self.env.declare('this', self.this) return self def set_this(self, this): self.this = this def call(self, args, env): if len(args) != len(self.params): raise Exception("Invalid count of parameters. Should be %s, is %s." % (len(self.params), len(args))) new_env = Env(self.env) values = zip(self.params, args) for val in values: new_env.declare(val[0], val[1]) return self.cmd.eval(new_env) def __str__(self): return "Function(%s, %s)" % (self.params, self.cmd)
def EVAL(ast, env): #print("EVAL %s" % printer._pr_str(ast)) if not types._list_Q(ast): return eval_ast(ast, env) # apply list if len(ast) == 0: return ast a0 = ast[0] if not isinstance(a0, MalSym): raise Exception("attempt to apply on non-symbol") if u"def!" == a0.value: a1, a2 = ast[1], ast[2] res = EVAL(a2, env) return env.set(a1, res) elif u"let*" == a0.value: a1, a2 = ast[1], ast[2] let_env = Env(env) for i in range(0, len(a1), 2): let_env.set(a1[i], EVAL(a1[i+1], let_env)) return EVAL(a2, let_env) else: el = eval_ast(ast, env) f = el.values[0] if isinstance(f, MalFunc): return f.apply(el.values[1:]) else: raise Exception("%s is not callable" % f)
class Field(ScatterPlane): ''' This is the Field which will contain cells. ''' agent_widget = ObjectProperty(None) total_reward = NumericProperty(0) def __init__(self, cell_size=25, *args, **kwargs): super().__init__(*args, **kwargs) self.cell_size = cell_size # At the __init__ height and width, and consecutively center may be not # established, yet due to layout logic. Clock.schedule_once(self._init_after) Clock.schedule_interval(self.update, 0.1) def _init_after(self, dt): ''' Perform initializations after the layout is finalized. ''' self.env = Env() # TODO: Move params to config file with open('sarsa.pickle', 'rb') as fd: self.sarsa = pickle.load(fd) self.grid = Grid(self.canvas, 'line_loop', Color(), self.cell_size, self.to_local(*self.center)) self.state = self.env.reset(self.grid) self._place_agent(self.state.cell) def _place_agent(self, cell): self.agent_widget.center = self.grid.pixcenter(cell.q, cell.r) # FIXME for _ in self.grid.neighbors(cell.q, cell.r): pass def on_touch_down(self, touch): super().on_touch_down(touch) x, y = self.to_local(touch.x, touch.y) q, r = self.grid.pixel_to_hex(x, y) if (q, r) in self.grid: print("Touched ({}, {}) in {}.".format(q, r, (x, y))) print("env tvisited", self.env.tvisited[q, r]) print("state food", self.state.food) else: self.grid.init(q, r) for _ in self.grid.neighbors(q, r): pass return True # TODO: Shouldn't this feel better in SwarmApp? def update(self, dt): action = self.sarsa.policy(self.state, explore=False) next_state, reward, done = self.env.step(action) self.sarsa.adapt_policy(self.state, action, next_state, reward) self.state = next_state self.total_reward += int(reward) self._place_agent(self.state.cell)
def EVAL(ast, env): while True: #print("EVAL %s" % printer._pr_str(ast)) if not types._list_Q(ast): return eval_ast(ast, env) # apply list ast = macroexpand(ast, env) if not types._list_Q(ast): return eval_ast(ast, env) if len(ast) == 0: return ast a0 = ast[0] if "def!" == a0: a1, a2 = ast[1], ast[2] res = EVAL(a2, env) return env.set(a1, res) elif "let*" == a0: a1, a2 = ast[1], ast[2] let_env = Env(env) for i in range(0, len(a1), 2): let_env.set(a1[i], EVAL(a1[i+1], let_env)) ast = a2 env = let_env # Continue loop (TCO) elif "quote" == a0: return ast[1] elif "quasiquote" == a0: ast = quasiquote(ast[1]); # Continue loop (TCO) elif 'defmacro!' == a0: func = EVAL(ast[2], env) func._ismacro_ = True return env.set(ast[1], func) elif 'macroexpand' == a0: return macroexpand(ast[1], env) elif "do" == a0: eval_ast(ast[1:-1], env) ast = ast[-1] # Continue loop (TCO) elif "if" == a0: a1, a2 = ast[1], ast[2] cond = EVAL(a1, env) if cond is None or cond is False: if len(ast) > 3: ast = ast[3] else: ast = None else: ast = a2 # Continue loop (TCO) elif "fn*" == a0: a1, a2 = ast[1], ast[2] return types._function(EVAL, Env, a2, env, a1) else: el = eval_ast(ast, env) f = el[0] if hasattr(f, '__ast__'): ast = f.__ast__ env = f.__gen_env__(el[1:]) else: return f(*el[1:])
def entry_point(argv): repl_env = Env() def REP(str, env): return PRINT(EVAL(READ(str), env)) # core.py: defined using python for k, v in core.ns.items(): repl_env.set(_symbol(unicode(k)), MalFunc(v)) # core.mal: defined using the language itself REP("(def! not (fn* (a) (if a false true)))", repl_env) while True: try: line = mal_readline.readline("user> ") if line == "": continue print(REP(line, repl_env)) except EOFError as e: break except reader.Blank: continue except types.MalException as e: print(u"Error: %s" % printer._pr_str(e.object, False)) except Exception as e: print("Error: %s" % e) #print("".join(traceback.format_exception(*sys.exc_info()))) return 0
def call(self, args, env): if len(args) != len(self.params): raise Exception("Invalid count of parameters. Should be %s, is %s." % (len(self.params), len(args))) new_env = Env(self.env) values = zip(self.params, args) for val in values: new_env.declare(val[0], val[1]) return self.cmd.eval(new_env)
def EVAL(ast, env): while True: if type(ast) == list: if ast[0] == "def!": val = EVAL(ast[2], env) env.set(ast[1], val) return val elif ast[0] == "let*": new_env = Env(env) bindings = ast[1] for i in range(0, len(bindings), 2): val = EVAL(bindings[i+1], new_env) new_env.set(bindings[i], val) # return EVAL(ast[2], new_env) ast = ast[2] env = new_env continue elif ast[0] == "do": # elements = [eval_ast(e, env) for e in ast[1:]] # return elements[-1] [eval_ast(e, env) for e in ast[1:-1]] ast = ast[-1] continue elif ast[0] == "if": cond = EVAL(ast[1], env) if cond != None and cond != False: # cond was true ast = ast[2] else: if len(ast) > 3: ast = ast[3] else: return None continue elif ast[0] == "fn*": # def func(*params): # new_env = Env(env, ast[1], params) # res = EVAL(ast[2], new_env) # return res; return maltypes.Function(ast[2], ast[1], env) else: l = eval_ast(ast, env) f = l[0] if type(f) == maltypes.Function: ast = f.ast new_env = Env(f.env, f.params, l[1:]) env = new_env else: return f(*l[1:]) else: res = eval_ast(ast, env) return res
def EVAL(ast, env): while True: #print("EVAL %s" % printer._pr_str(ast)) if not types._list_Q(ast): return eval_ast(ast, env) # apply list if len(ast) == 0: return ast a0 = ast[0] if isinstance(a0, MalSym): a0sym = a0.value else: a0sym = u"__<*fn*>__" if u"def!" == a0sym: a1, a2 = ast[1], ast[2] res = EVAL(a2, env) return env.set(a1, res) elif u"let*" == a0sym: a1, a2 = ast[1], ast[2] let_env = Env(env) for i in range(0, len(a1), 2): let_env.set(a1[i], EVAL(a1[i+1], let_env)) ast = a2 env = let_env # Continue loop (TCO) elif u"quote" == a0sym: return ast[1] elif u"quasiquote" == a0sym: ast = quasiquote(ast[1]) # Continue loop (TCO) elif u"do" == a0sym: if len(ast) == 0: return nil elif len(ast) > 1: eval_ast(ast.slice2(1, len(ast)-1), env) ast = ast[-1] # Continue loop (TCO) elif u"if" == a0sym: a1, a2 = ast[1], ast[2] cond = EVAL(a1, env) if cond is nil or cond is false: if len(ast) > 3: ast = ast[3] # Continue loop (TCO) else: return nil else: ast = a2 # Continue loop (TCO) elif u"fn*" == a0sym: a1, a2 = ast[1], ast[2] return MalFunc(None, a2, env, a1, EVAL) else: el = eval_ast(ast, env) f = el.values[0] if isinstance(f, MalFunc): if f.ast: ast = f.ast env = f.gen_env(el.rest()) # Continue loop (TCO) else: return f.apply(el.rest()) else: raise Exception("%s is not callable" % f)
def EVAL(ast: MalType, env: Env) -> MalType: while True: if isinstance(ast, MalVector): return MalVector(EVAL(member, env) for member in ast) if isinstance(ast, MalHashmap): return MalHashmap([ast[0], EVAL(ast[1], env)]) if not isinstance(ast, MalList): # not a list return eval_ast(ast, env) if isinstance(ast, MalList): if len(ast) == 0: # an empty list return ast else: # a list if ast[0] == 'def!': return env.set(ast[1], EVAL(ast[2], env)) elif ast[0] == 'let*': let_env = Env(outer=env) param1 = iter(ast[1]) for symbol, value in zip(param1, param1): let_env.set(symbol, EVAL(value, env=let_env)) # return EVAL(ast[2], env=let_env) ast, env = ast[2], let_env continue elif ast[0] == 'do': # value = nil # for element in ast[1:]: # value = EVAL(element, env) # return value for ele in ast[1:-1]: eval_ast(ele, env) ast = ast[-1] continue elif ast[0] == 'if': cond = EVAL(ast[1], env) if cond != nil and MalBool(cond): # return EVAL(ast[2], env) ast = ast[2] continue elif len(ast) == 4: # return EVAL(ast[3], env) ast = ast[3] continue else: return nil elif ast[0] == 'fn*': return MalFunction(ast=ast[2], params=ast[1], env=env, eval_fn=EVAL) else: f, *args = eval_ast(ast, env) if isinstance(f, MalFunction): env = Env(binds=f.params, exprs=args, outer=f.env) ast = f.ast continue else: return f(*args)
def eval(self, env): new_env = Env(env) # put object in heap addr = heap.alloc() heap[addr] = self for decl in self.decls: decl.exp.set_this(addr) decl.eval(new_env) for key in new_env: if new_env.directly_defined(key): self.env[key] = new_env[key] return addr
def interpret(code, print_ast=False): ast = parse(tokenize(code)) if print_ast: print(ast) env = Env() env.declare("alloc", Alloc()) env.declare("readline", ReadLine()) env.declare("true", 1) env.declare("false", 0) ast.eval(env)
def eval_ast(ast: MalType, env: Env): if isinstance(ast, MalSymbol): return env.get(ast) elif isinstance(ast, MalList): return MalList(EVAL(child, env) for child in ast) else: return ast
def EVAL(mt, env): if type(mt) == list: if mt[0] == "def!": val = EVAL(mt[2], env) env.set(mt[1], val) return val elif mt[0] == "let*": new_env = Env(env) bindings = mt[1] for i in range(0, len(bindings), 2): val = EVAL(bindings[i+1], new_env) new_env.set(bindings[i], val) return EVAL(mt[2], new_env) elif mt[0] == "do": elements = [eval_ast(e, env) for e in mt[1:]] return elements[-1] elif mt[0] == "if": cond = EVAL(mt[1], env) if cond != None and cond != False: # cond was true res = EVAL(mt[2], env) else: if len(mt) > 3: res = EVAL(mt[3], env) else: res = maltypes.Nil() return res elif mt[0] == "fn*": def func(*params): new_env = Env(env, mt[1], params) res = EVAL(mt[2], new_env) return res; return func else: l = eval_ast(mt, env) func = l[0] return func(*l[1:]) else: res = eval_ast(mt, env) return res
def EVAL(ast, env): #print("EVAL %s" % printer._pr_str(ast)) if not types._list_Q(ast): return eval_ast(ast, env) # apply list if len(ast) == 0: return ast a0 = ast[0] if isinstance(a0, MalSym): a0sym = a0.value else: a0sym = u"__<*fn*>__" if u"def!" == a0sym: a1, a2 = ast[1], ast[2] res = EVAL(a2, env) return env.set(a1, res) elif u"let*" == a0sym: a1, a2 = ast[1], ast[2] let_env = Env(env) for i in range(0, len(a1), 2): let_env.set(a1[i], EVAL(a1[i+1], let_env)) return EVAL(a2, let_env) elif u"do" == a0sym: el = eval_ast(ast.rest(), env) return el.values[-1] elif u"if" == a0sym: a1, a2 = ast[1], ast[2] cond = EVAL(a1, env) if cond is nil or cond is false: if len(ast) > 3: return EVAL(ast[3], env) else: return nil else: return EVAL(a2, env) elif u"fn*" == a0sym: a1, a2 = ast[1], ast[2] return MalFunc(None, a2, env, a1, EVAL) else: el = eval_ast(ast, env) f = el.values[0] if isinstance(f, MalFunc): return f.apply(el.rest()) else: raise Exception("%s is not callable" % f)
def EVAL(ast, env): while True: #print("EVAL %s" % ast) if not types._list_Q(ast): return eval_ast(ast, env) # apply list if len(ast) == 0: return ast a0 = ast[0] if "def!" == a0: a1, a2 = ast[1], ast[2] res = EVAL(a2, env) return env.set(a1, res) elif "let*" == a0: a1, a2 = ast[1], ast[2] let_env = Env(env) for i in range(0, len(a1), 2): let_env.set(a1[i], EVAL(a1[i+1], let_env)) return EVAL(a2, let_env) elif "do" == a0: eval_ast(ast[1:-1], env) ast = ast[-1] # Continue loop (TCO) elif "if" == a0: a1, a2 = ast[1], ast[2] cond = EVAL(a1, env) if cond is None or cond is False: if len(ast) > 3: ast = ast[3] else: ast = None else: ast = a2 # Continue loop (TCO) elif "fn*" == a0: a1, a2 = ast[1], ast[2] return types._function(EVAL, Env, a2, env, a1) else: el = eval_ast(ast, env) f = el[0] if hasattr(f, '__ast__'): ast = f.__ast__ env = f.__gen_env__(el[1:]) else: return f(*el[1:])
def EVAL(ast, env): if type(ast) == List and len(ast) > 0: function = ast[0] if function == 'fn*': bindings = ast[1] body = ast[2] return Function(Env, bindings, env, body, EVAL) elif function == 'let*': scoped_env = Env(env) bindings = ast[1] for i in range(0, len(bindings), 2): symbol = Symbol(bindings[i]) value = EVAL(bindings[i+1], scoped_env) scoped_env.set(symbol, value) expression = ast[-1] return EVAL(expression, scoped_env) elif function == 'def!': symbol = Symbol(ast[1]) value = EVAL(ast[2], env) env.set(symbol, value) return value elif function == 'do': return_val = None for exp in ast[1:]: return_val = EVAL(exp, env) return return_val elif function == 'if': condition = EVAL(ast[1], env) if_branch = ast[2] if condition is not False and condition is not None: return EVAL(if_branch, env) else: else_branch = None try: else_branch = ast[3] except IndexError as e: pass return EVAL(else_branch, env) else: evaluated = eval_ast(ast, env) return evaluated[0](*evaluated[1:]) evaluated = eval_ast(ast, env) return evaluated
def _init_after(self, dt): ''' Perform initializations after the layout is finalized. ''' self.env = Env() # TODO: Move params to config file with open('sarsa.pickle', 'rb') as fd: self.sarsa = pickle.load(fd) self.grid = Grid(self.canvas, 'line_loop', Color(), self.cell_size, self.to_local(*self.center)) self.state = self.env.reset(self.grid) self._place_agent(self.state.cell)
def _mako_render(path,meta): makedirs(dirname(path)) f=open(path,'w+') content=Env.get_template(template).render(**meta) f.write(content) try: print path except: pass f.close()
def provision_env(**optionals): vpc_cidr_block = '10.6.0.0/16' zone = valid_zones[0] private_subnet_cidr_block = '10.6.0.0/19' public_subnet_cidr_block = '10.6.32.0/20' dev_zones = [valid_zones[0]] env = Env( env='dev', vpc_cidr_block=vpc_cidr_block, **optionals ) env.provision_zone( zone=zone, private_subnet_cidr_block=private_subnet_cidr_block, public_subnet_cidr_block=public_subnet_cidr_block ) env.provision_resources(zones=dev_zones, zk_cluster_size=1, flink_num_jobmanagers=1, flink_num_taskmanagers=1)
def EVAL(mt, env): if type(mt) == list: if mt[0] == "def!": val = EVAL(mt[2], env) env.set(mt[1], val) return val elif mt[0] == "let*": new_env = Env(env) bindings = mt[1] for i in range(0, len(bindings), 2): val = EVAL(bindings[i+1], new_env) new_env.set(bindings[i], val) return EVAL(mt[2], new_env) else: l = eval_ast(mt, env) f = l[0] return f(*l[1:]) else: return eval_ast(mt, env)
def repl(): # set up base environment env = Env(outer=None) for k, v in ns.items(): env.set(k, v) env.set(SYMBOL_EVAL, lambda ast: EVAL(ast, env=env)) # `eval` added to ns s = """ (def! load-file (fn* (f) (eval (read-string (str "(do " (slurp f) ")"))))) """ EVAL(READ(s), env=env) # read-eval-print loop while True: try: s = input("=> ") rep(s, env) except Exception as e: print("error: {0}".format(e))
def EVAL(ast, env): if type(ast) == List and len(ast) > 0: function = ast[0] if function == 'let*': scoped_env = Env(env) bindings = ast[1] for i in range(0, len(bindings), 2): symbol = Symbol(bindings[i]) value = EVAL(bindings[i+1], scoped_env) scoped_env.set(symbol, value) expression = ast[-1] return EVAL(expression, scoped_env) elif function == 'def!': symbol = Symbol(ast[1]) value = EVAL(ast[2], env) env.set(symbol, value) return value else: evaluated = eval_ast(ast, env) return evaluated[0](*evaluated[1:]) evaluated = eval_ast(ast, env) return evaluated
def EVAL(ast, env): #print("EVAL %s" % printer._pr_str(ast)) if not types._list_Q(ast): return eval_ast(ast, env) # apply list if len(ast) == 0: return ast a0 = ast[0] if "def!" == a0: a1, a2 = ast[1], ast[2] res = EVAL(a2, env) return env.set(a1, res) elif "let*" == a0: a1, a2 = ast[1], ast[2] let_env = Env(env) for i in range(0, len(a1), 2): let_env.set(a1[i], EVAL(a1[i+1], let_env)) return EVAL(a2, let_env) else: el = eval_ast(ast, env) f = el[0] return f(*el[1:])
def _get_options(sys,parser): options = None if len(sys.argv[1:]) == 0: options = parser.parse_args(['auto']) elif len(sys.argv[1:]) == 1: if sys.argv[1] == "_conditioncheck": options = condition_check.parse_args(sys.argv[2:]) env = Env(options) env.check_env() CheckCondition(modules_pool=None, options=options, env=env).run() quit() elif sys.argv[1] == "--help" or sys.argv[1] == "-h": options = parser.parse_args(sys.argv[1:]) elif sys.argv[1].startswith('-'): options = parser.parse_args(["auto"]+sys.argv[1:]) else: options = parser.parse_args(sys.argv[1:]) else: options = parser.parse_args(sys.argv[1:]) return options
def EVAL(ast: MalType, env: Env) -> MalType: if isinstance(ast, MalVector): return MalVector(EVAL(member, env) for member in ast) if isinstance(ast, MalHashmap): return MalHashmap([ast[0], EVAL(ast[1], env)]) if not isinstance(ast, MalList): # not a list return eval_ast(ast, env) if isinstance(ast, MalList): if len(ast) == 0: # an empty list return ast else: # a list if ast[0] == 'def!': return env.set(ast[1], EVAL(ast[2], env)) elif ast[0] == 'let*': let_env = Env(outer=env) param1 = iter(ast[1]) for symbol, value in zip(param1, param1): let_env.set(symbol, EVAL(value, env=let_env)) return EVAL(ast[2], env=let_env) else: f, *args = eval_ast(ast, env) return f(*args)
def EVAL(ast, env): #print("EVAL %s" % ast) if not types._list_Q(ast): return eval_ast(ast, env) # apply list if len(ast) == 0: return ast a0 = ast[0] if "def!" == a0: a1, a2 = ast[1], ast[2] res = EVAL(a2, env) return env.set(a1, res) elif "let*" == a0: a1, a2 = ast[1], ast[2] let_env = Env(env) for i in range(0, len(a1), 2): let_env.set(a1[i], EVAL(a1[i+1], let_env)) return EVAL(a2, let_env) elif "do" == a0: el = eval_ast(ast[1:], env) return el[-1] elif "if" == a0: a1, a2 = ast[1], ast[2] cond = EVAL(a1, env) if cond is None or cond is False: if len(ast) > 3: return EVAL(ast[3], env) else: return None else: return EVAL(a2, env) elif "fn*" == a0: a1, a2 = ast[1], ast[2] return types._function(EVAL, Env, a2, env, a1) else: el = eval_ast(ast, env) f = el[0] return f(*el[1:])
def entry_point(argv): repl_env = Env() def REP(str, env): return PRINT(EVAL(READ(str), env)) # core.py: defined using python for k, v in core.ns.items(): repl_env.set(_symbol(unicode(k)), MalFunc(v)) repl_env.set(types._symbol(u'eval'), MalEval(None, env=repl_env, EvalFunc=EVAL)) mal_args = [] if len(argv) >= 3: for a in argv[2:]: mal_args.append(MalStr(unicode(a))) repl_env.set(_symbol(u'*ARGV*'), MalList(mal_args)) # core.mal: defined using the language itself REP("(def! *host-language* \"rpython\")", repl_env) REP("(def! not (fn* (a) (if a false true)))", repl_env) REP("(def! load-file (fn* (f) (eval (read-string (str \"(do \" (slurp f) \")\")))))", repl_env) REP("(defmacro! cond (fn* (& xs) (if (> (count xs) 0) (list 'if (first xs) (if (> (count xs) 1) (nth xs 1) (throw \"odd number of forms to cond\")) (cons 'cond (rest (rest xs)))))))", repl_env) REP("(def! inc (fn* [x] (+ x 1)))", repl_env) REP("(def! gensym (let* [counter (atom 0)] (fn* [] (symbol (str \"G__\" (swap! counter inc))))))", repl_env) REP("(defmacro! or (fn* (& xs) (if (empty? xs) nil (if (= 1 (count xs)) (first xs) (let* (condvar (gensym)) `(let* (~condvar ~(first xs)) (if ~condvar ~condvar (or ~@(rest xs)))))))))", repl_env) if len(argv) >= 2: REP('(load-file "' + argv[1] + '")', repl_env) return 0 REP("(println (str \"Mal [\" *host-language* \"]\"))", repl_env) while True: try: line = mal_readline.readline("user> ") if line == "": continue print(REP(line, repl_env)) except EOFError as e: break except reader.Blank: continue except types.MalException as e: print(u"Error: %s" % printer._pr_str(e.object, False)) except Exception as e: print("Error: %s" % e) if IS_RPYTHON: llop.debug_print_traceback(lltype.Void) else: print("".join(traceback.format_exception(*sys.exc_info()))) return 0
def entry_point(argv): repl_env = Env() def REP(str, env): return PRINT(EVAL(READ(str), env)) # core.py: defined using python for k, v in core.ns.items(): repl_env.set(_symbol(unicode(k)), MalFunc(v)) repl_env.set(types._symbol(u'eval'), MalEval(None, env=repl_env, EvalFunc=EVAL)) mal_args = [] if len(argv) >= 3: for a in argv[2:]: mal_args.append(MalStr(unicode(a))) repl_env.set(_symbol(u'*ARGV*'), MalList(mal_args)) # core.mal: defined using the language itself REP("(def! not (fn* (a) (if a false true)))", repl_env) REP("(def! load-file (fn* (f) (eval (read-string (str \"(do \" (slurp f) \")\")))))", repl_env) if len(argv) >= 2: REP('(load-file "' + argv[1] + '")', repl_env) return 0 while True: try: line = mal_readline.readline("user> ") if line == "": continue print(REP(line, repl_env)) except EOFError as e: break except reader.Blank: continue except types.MalException as e: print(u"Error: %s" % printer._pr_str(e.object, False)) except Exception as e: print("Error: %s" % e) #print("".join(traceback.format_exception(*sys.exc_info()))) return 0
def to_call(arg, k): return expr.eval(Env(variable, arg, env), k)
from functools import partial import torch from torch import autograd, optim from torch.distributions import Independent, Normal from torch.distributions.kl import kl_divergence from torch.nn.utils import parameters_to_vector, vector_to_parameters from tqdm import tqdm from env import Env from hyperparams import BACKTRACK_COEFF, BACKTRACK_ITERS, ON_POLICY_BATCH_SIZE as BATCH_SIZE, CONJUGATE_GRADIENT_ITERS, DAMPING_COEFF, DISCOUNT, HIDDEN_SIZE, INITIAL_POLICY_LOG_STD_DEV, KL_LIMIT, LEARNING_RATE, MAX_STEPS, TRACE_DECAY, VALUE_EPOCHS from models import ActorCritic from utils import plot env = Env() agent = ActorCritic(env.observation_space.shape[0], env.action_space.shape[0], HIDDEN_SIZE, initial_policy_log_std_dev=INITIAL_POLICY_LOG_STD_DEV) critic_optimiser = optim.Adam(agent.critic.parameters(), lr=LEARNING_RATE) def hessian_vector_product(d_kl, x): g = parameters_to_vector( autograd.grad(d_kl, agent.actor.parameters(), create_graph=True)) return parameters_to_vector( autograd.grad((g * x.detach()).sum(), agent.actor.parameters(), retain_graph=True)) + DAMPING_COEFF * x def conjugate_gradient(Ax, b): x = torch.zeros_like(b)
class SingleThread: def __init__(self, sess, thread_index, global_network, initial_learning_rate, grad_applier, max_global_time_step, action_size, env_name, device='/CPU:0'): self.thread_index = thread_index self.global_network = global_network self.initial_learning_rate = initial_learning_rate self.grad_applier = grad_applier self.max_global_time_step = max_global_time_step self.device = device self.action_size = action_size self.env = Env(env_name) # prepare model self.local_network = A3CLSTM(action_size, self.thread_index, self.device) self.local_network.loss_calculate_scaffold() # get gradients for local network v_ref = [v for v in self.local_network.get_vars()] self.gradients = tf.gradients(self.local_network.total_loss, v_ref, colocate_gradients_with_ops=False, gate_gradients=False, aggregation_method=None) # self.apply_gradients = grad_applier.apply_gradient(self.global_network.get_vars(), # self.gradients) self.apply_gradients = tf.train.RMSPropOptimizer( initial_learning_rate).apply_gradients( zip(self.gradients, self.global_network.get_vars())) self.sync = self.local_network.sync_from(self.global_network) # intiialize states self.episode_reward = 0 self.done = False self.state = self.env.reset() def choose_action(self, policy): return np.random.choice(range(len(policy)), p=policy) def _anneal_learning_rate(self, global_time_step): learning_rate = self.initial_learning_rate * ( self.max_global_time_step - global_time_step) / self.max_global_time_step if learning_rate < 0.0: learning_rate = 0.0 return learning_rate def write_summary(self, summary, train_writer, global_step): if self.thread_index == 0 and global_step % 10 == 0: train_writer.add_summary(summary, global_step) def process(self, sess, summary_op, train_writer, score, global_step): states = [] values = [] rewards = [] discounted_rewards = [] actions = [] deltas = [] gaes = [] # first we sync local network with global network sess.run(self.sync) initial_lstm_state = self.local_network.lstm_state_output if self.done: self.state = self.env.reset() self.done = False # now our local network is the same as global network for i in range(0, LOCAL_MAX_STEP): #self.env.render() policy, value = self.local_network.get_policy_value( sess, self.state) action = self.choose_action(policy) states.append(self.state) actions.append(action) self.state, reward, self.done = self.env.step(action) rewards.append(reward) values.append(value[0]) self.episode_reward += reward if self.done: print('Episode reward: {}'.format(self.episode_reward)) self.episode_reward = 0 self.state = self.env.reset() self.local_network.reset_lstm_state() break R = 0.0 gae = 0.0 if self.done is False: _, R = self.local_network.get_policy_value( sess, self.state) # run and get the last value R = R[0] #states.append(self.state) a = [] action_batch = [] for i in reversed(range(len(rewards))): R = R * gamma + rewards[i] #R = R - values[i] # this is temporal difference discounted_rewards.append(R) a = np.zeros(self.action_size) a[actions[i]] = 1 action_batch.append(a) #delta = rewards[i] + gamma * values[i+1] - values[i] #deltas.append(delta) #gae = gamma * tau * gae + delta #gaes.append(gae) #gaes = np.expand_dims(gaes, 1) states.reverse() states = np.array(states).reshape(-1, 47, 47, 1) discounted_rewards = np.array(discounted_rewards).reshape(-1, 1) #rewards.reverse() _, summary = sess.run( [self.apply_gradients, summary_op], feed_dict={ self.local_network.s: states, #self.local_network.rewards: rewards, #self.local_network.values: values, self.local_network.step_size: [len(states)], #self.local_network.deltas: deltas, # self.local_network.gaes: gaes, #self.local_network.td: td, self.local_network.a: action_batch, self.local_network.discounted_rewards: discounted_rewards, self.local_network.LSTMState: initial_lstm_state, score: self.episode_reward }) self.write_summary(summary, train_writer, global_step) time.sleep(2)
def fn(*exprs): new_env = Env(outer=env, binds=ast[1], exprs=exprs) return EVAL(ast[2], new_env)
el = eval_ast(ast, env) f = el[0] if hasattr(f, '__ast__'): ast = f.__ast__ env = f.__gen_env__(el[1:]) else: return f(*el[1:]) # print def PRINT(exp): return printer._pr_str(exp) # repl repl_env = Env() def REP(str): return PRINT(EVAL(READ(str), repl_env)) # core.py: defined using python for k, v in core.ns.items(): repl_env.set(types._symbol(k), v) repl_env.set(types._symbol('eval'), lambda ast: EVAL(ast, repl_env)) repl_env.set(types._symbol('*ARGV*'), types._list(*sys.argv[2:])) # core.mal: defined using the language itself REP("(def! *host-language* \"python\")") REP("(def! not (fn* (a) (if a false true)))")
def EVAL(ast, env): while True: #print("EVAL %s" % printer._pr_str(ast)) if not types._list_Q(ast): return eval_ast(ast, env) # apply list ast = macroexpand(ast, env) if not types._list_Q(ast): return ast if len(ast) == 0: return ast a0 = ast[0] if isinstance(a0, MalSym): a0sym = a0.value else: a0sym = u"__<*fn*>__" if u"def!" == a0sym: a1, a2 = ast[1], ast[2] res = EVAL(a2, env) return env.set(a1, res) elif u"let*" == a0sym: a1, a2 = ast[1], ast[2] let_env = Env(env) for i in range(0, len(a1), 2): let_env.set(a1[i], EVAL(a1[i+1], let_env)) ast = a2 env = let_env # Continue loop (TCO) elif u"quote" == a0sym: return ast[1] elif u"quasiquote" == a0sym: ast = quasiquote(ast[1]) # Continue loop (TCO) elif u"defmacro!" == a0sym: func = EVAL(ast[2], env) func.ismacro = True return env.set(ast[1], func) elif u"macroexpand" == a0sym: return macroexpand(ast[1], env) elif u"try*" == a0sym: a1, a2 = ast[1], ast[2] a20 = a2[0] if isinstance(a20, MalSym): if a20.value == u"catch*": try: return EVAL(a1, env); except types.MalException as exc: exc = exc.object catch_env = Env(env, _list(a2[1]), _list(exc)) return EVAL(a2[2], catch_env) except Exception as exc: exc = MalStr(unicode("%s" % exc)) catch_env = Env(env, _list(a2[1]), _list(exc)) return EVAL(a2[2], catch_env) return EVAL(a1, env); elif u"do" == a0sym: if len(ast) == 0: return nil elif len(ast) > 1: eval_ast(ast.slice2(1, len(ast)-1), env) ast = ast[-1] # Continue loop (TCO) elif u"if" == a0sym: a1, a2 = ast[1], ast[2] cond = EVAL(a1, env) if cond is nil or cond is false: if len(ast) > 3: ast = ast[3] # Continue loop (TCO) else: return nil else: ast = a2 # Continue loop (TCO) elif u"fn*" == a0sym: a1, a2 = ast[1], ast[2] return MalFunc(None, a2, env, a1, EVAL) else: el = eval_ast(ast, env) f = el.values[0] if isinstance(f, MalFunc): if f.ast: ast = f.ast env = f.gen_env(el.rest()) # Continue loop (TCO) else: return f.apply(el.rest()) else: raise Exception("%s is not callable" % f)
model_dir = os.path.abspath(os.path.join(data_dir, "oak_model/model")) cont_res_dir = os.path.abspath(os.path.join(data_dir, "eval_result/cont_res_things_15")) scene_file_name = "7_things_3_same.json" graph_file_name = "7_things_3_same.pkl" dataset_name = "7_things_3_same.pt" cmd_args.graph_file_name = graph_file_name cmd_args.scene_file_name = scene_file_name cmd_args.dataset_name = dataset_name print(cmd_args) raw_path = os.path.abspath(os.path.join(data_dir, "./processed_dataset/raw")) scenes_path = os.path.abspath(os.path.join(raw_path, scene_file_name)) graphs_path = os.path.join(raw_path, graph_file_name) graphs, scene_dataset = create_dataset(data_dir, scenes_path, graphs_path) embedding_layer = nn.Embedding(len(scene_dataset.attr_encoder.lookup_list), cmd_args.hidden_dim) gnn = GNNGL(scene_dataset, embedding_layer) # --- Finished Load dataset , construct decoder ---- # decoder = NodeDecoder() ref = [0, 1] dataloader = DataLoader(scene_dataset) for data_point in dataloader: graph = graphs[data_point.graph_id] env = Env(data_point, graph, config, scene_dataset.attr_encoder) graph_embedding = gnn(data_point) probs, clauses = decoder.unroll(graph_embedding, graph, ref, eps=0)
def test(): tf.reset_default_graph() policy_network = PolicyNetwork(scope='supervised_policy') f = open(relationPath) all_data = f.readlines() f.close() test_data = all_data test_num = len(test_data) success = 0 saver = tf.train.Saver() path_found = [] path_relation_found = [] path_set = set() with tf.Session() as sess: saver.restore(sess, 'models/policy_retrained' + relation) print('Model reloaded') if test_num > 500: test_num = 500 for episode in xrange(test_num): print('Test sample %d: %s' % (episode, test_data[episode][:-1])) env = Env(dataPath, test_data[episode]) sample = test_data[episode].split() state_idx = [env.entity2id_[sample[0]], env.entity2id_[sample[1]], 0] transitions = [] for t in count(): state_vec = env.idx_state(state_idx) action_probs = policy_network.predict(state_vec) action_probs = np.squeeze(action_probs) action_chosen = np.random.choice(np.arange(action_space), p=action_probs) reward, new_state, done = env.interact(state_idx, action_chosen) new_state_vec = env.idx_state(new_state) transitions.append( Transition(state=state_vec, action=action_chosen, next_state=new_state_vec, reward=reward)) if done or t == max_steps_test: if done: success += 1 print("Success\n") path = path_clean(' -> '.join(env.path)) path_found.append(path) else: print('Episode ends due to step limit\n') break state_idx = new_state if done: if len(path_set) != 0: path_found_embedding = [env.path_embedding(path.split(' -> ')) for path in path_set] curr_path_embedding = env.path_embedding(env.path_relations) path_found_embedding = np.reshape(path_found_embedding, (-1, embedding_dim)) cos_sim = cosine_similarity(path_found_embedding, curr_path_embedding) diverse_reward = -np.mean(cos_sim) print('diverse_reward', diverse_reward) # total_reward = 0.1*global_reward + 0.8*length_reward + 0.1*diverse_reward state_batch = [] action_batch = [] for t, transition in enumerate(transitions): if transition.reward == 0: state_batch.append(transition.state) action_batch.append(transition.action) policy_network.update(np.reshape(state_batch, (-1, state_dim)), 0.1 * diverse_reward, action_batch) path_set.add(' -> '.join(env.path_relations)) for path in path_found: rel_ent = path.split(' -> ') path_relation = [] for idx, item in enumerate(rel_ent): if idx % 2 == 0: path_relation.append(item) path_relation_found.append(' -> '.join(path_relation)) # path_stats = collections.Counter(path_found).items() relation_path_stats = collections.Counter(path_relation_found).items() relation_path_stats = sorted(relation_path_stats, key=lambda x: x[1], reverse=True) ranking_path = [] for item in relation_path_stats: path = item[0] length = len(path.split(' -> ')) ranking_path.append((path, length)) ranking_path = sorted(ranking_path, key=lambda x: x[1]) print('Success persentage:', success / test_num) f = open(dataPath + 'tasks/' + relation + '/' + 'path_to_use.txt', 'w') for item in ranking_path: f.write(item[0] + '\n') f.close() print('path to use saved') return
return pickle.load(pickle_file) else: with bz2.open(memory_path, 'rb') as zipped_pickle_file: return pickle.load(zipped_pickle_file) def save_memory(memory, memory_path, disable_bzip): if disable_bzip: with open(memory_path, 'wb') as pickle_file: pickle.dump(memory, pickle_file) else: with bz2.open(memory_path, 'wb') as zipped_pickle_file: pickle.dump(memory, zipped_pickle_file) # Environment env = Env(args) env.train() action_space = env.action_space() # Agent dqn_list = [] for _ in range(args.num_ensemble): dqn = Agent(args, env) dqn_list.append(dqn) # If a model is provided, and evaluate is fale, presumably we want to resume, so try to load memory if args.model is not None and not args.evaluate: if not args.memory: raise ValueError('Cannot resume training without memory save path. Aborting...') elif not os.path.exists(args.memory): raise ValueError('Could not find memory file at {path}. Aborting...'.format(path=args.memory))
# modules import datetime # Packages from flask import Flask, jsonify, request import pytz # Files from env import Env env = Env('Api') # __name__ is '__main_' if run as the main program # else __name__ will be the file name app = Flask(__name__) @app.route('/', methods=['GET']) def landing(): return jsonify({'success': True}) @app.route('/show/<name>', methods=['GET']) def show(name): return jsonify({'machine': name}) # It's as if the interpreter inserts this at the top # of your module when run as the main program. if __name__ == '__main__': print(
EMBED_TYPE = 'conv1d' LOG_INTERVAL = 200 #------------------------SET LOGS WRITER-------------------------- time_id = datetime.now().strftime("%d_%m_%Y") filename = "experiment1" log_dir = os.path.join('tensorboardLogs', filename) writer = SummaryWriter(log_dir=log_dir) #----------------INITIALIZE ENVIROMENT AND POLICIES---------------- env = Env(seed=SEED, batch_size=BATCH_SIZE, capacity=CAPACITY, n_nodes=N_NODES, n_depot=N_DEPOT, max_demand=MAX_DEMAND, n_agents=N_VEHICLES) env_test = Env(seed=SEED + 2, batch_size=BATCH_SIZE, capacity=CAPACITY, n_nodes=N_NODES, n_depot=N_DEPOT, max_demand=MAX_DEMAND, n_agents=N_VEHICLES) policy = [ PolicyNet(batch_size=BATCH_SIZE, n_nodes=N_NODES,
if (sa_value >= best_sa_value): pi[s] = action best_sa_value = sa_value def count_rewards(): rewards_sum = 0 for m in memories: rewards_sum += m['reward'] return rewards_sum # print(rewards_sum) gamma = 1 epsilon = 0.1 env = Env(10) wins = 0 loses = 0 state_space = [(x, y, z) for x in range(52) for y in range(52) for z in range(52)] action_space = env.action_space Q = init_Q(state_space, action_space) returns = init_returns() pi = init_pi() T = 10 best_score = 0 epochs = range(20) for e in epochs: memories = epoch() update_returns() update_Q()
def mal_eval(ast, environ): while True: ast = macroexpand(ast, environ) if not isinstance(ast, list): return eval_ast(ast, environ) elif len(ast) == 0: return ast else: if isinstance(ast[0], Symbol): if ast[0].getVal() == 'def!': environ.set(ast[1].getVal(), mal_eval(ast[2], environ)) return environ.get(ast[1].getVal()) elif ast[0].getVal() == 'quote': return ast[1] elif ast[0].getVal() == 'quasiquote': ast = quasiquote(ast[1]) continue elif ast[0].getVal() == 'macroexpand': return macroexpand(ast[1], environ) elif ast[0].getVal() == 'let*': e = Env(environ) update_env(e, ast[1]) environ = e ast = ast[2] continue elif ast[0].getVal() == 'if': b = mal_eval(ast[1], environ) if b != SpecialToken.NIL and b is not False: ast = ast[2] else: if len(ast) < 4: ast = SpecialToken.NIL else: ast = ast[3] continue elif ast[0].getVal() == 'do': for i in ast[1:len(ast) - 1]: mal_eval(i, environ) ast = ast[len(ast) - 1] continue elif ast[0].getVal() == 'fn*': return Function( ast[2], ast[1].getVal(), environ, lambda *x: mal_eval( ast[2], Env(environ, ast[1].getVal(), x))) elif ast[0].getVal() == 'defn': f = Function( ast[3], ast[2].getVal(), environ, lambda *x: mal_eval( ast[3], Env(environ, ast[2].getVal(), x))) environ.set(ast[1].getVal(), f) return f elif ast[0].getVal() == 'defmacro!': f = Function( ast[3], ast[2].getVal(), environ, lambda *x: mal_eval( ast[3], Env(environ, ast[2].getVal(), x[0])), True) environ.set(ast[1].getVal(), f) return f eval_list = eval_ast(ast, environ) if isinstance(eval_list[0], Function): ast = eval_list[0].get_ast_body() environ = Env(eval_list[0].get_env(), eval_list[0].get_params(), eval_list[1:]) continue else: fn = eval_list[0] args = eval_list[1:] return fn(*args)
def main(unused_argv): ''' check path ''' if FLAGS.data_dir == '' or not os.path.exists(FLAGS.data_dir): raise ValueError('invalid data directory {}'.format(FLAGS.data_dir)) if FLAGS.output_dir == '': raise ValueError('invalid output directory {}'.format( FLAGS.output_dir)) elif not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) event_log_dir = os.path.join(FLAGS.output_dir, '') checkpoint_path = os.path.join(FLAGS.output_dir, 'model.ckpt') ''' setup summaries ''' summ = Summaries() ''' setup the game environment ''' filenames_train = glob.glob( os.path.join(FLAGS.data_dir, 'train-{}'.format(FLAGS.sampling_rate), '*.mat')) filenames_val = glob.glob( os.path.join(FLAGS.data_dir, 'val-{}'.format(FLAGS.sampling_rate), '*.mat')) game_env_train = Env(decay=FLAGS.decay) game_env_val = Env(decay=FLAGS.decay) game_actions = list(game_env_train.actions.keys()) ''' setup the transition table for experience replay ''' stateDim = [FLAGS.num_chans, FLAGS.num_points] transition_args = { 'batchSize': FLAGS.batch_size, 'stateDim': stateDim, 'numActions': len(game_actions), 'maxSize': FLAGS.replay_memory, } transitions = TransitionMemory(transition_args) ''' setup agent ''' s_placeholder = tf.placeholder(tf.float32, [FLAGS.batch_size] + stateDim, 's_placeholder') s2_placeholder = tf.placeholder(tf.float32, [FLAGS.batch_size] + stateDim, 's2_placeholder') a_placeholder = tf.placeholder(tf.int32, [FLAGS.batch_size], 'a_placeholder') r_placeholder = tf.placeholder(tf.float32, [FLAGS.batch_size], 'r_placeholder') pcont_t = tf.constant(FLAGS.discount, tf.float32, [FLAGS.batch_size]) network = Model(FLAGS.batch_size, len(game_actions), FLAGS.num_chans, FLAGS.sampling_rate, \ FLAGS.num_filters, FLAGS.num_recurs, FLAGS.pooling_stride, name = "network") target_network = Model(FLAGS.batch_size, len(game_actions), FLAGS.num_chans, FLAGS.sampling_rate,\ FLAGS.num_filters, FLAGS.num_recurs, FLAGS.pooling_stride, name = "target_n") q = network(s_placeholder) q2 = target_network(s2_placeholder) q_selector = network(s2_placeholder) loss, q_learning = trfl.double_qlearning(q, a_placeholder, r_placeholder, pcont_t, q2, q_selector) synchronizer = Synchronizer(network, target_network) sychronize_ops = synchronizer() training_variables = network.variables opt = Adam(FLAGS.learning_rate, lr_decay=FLAGS.lr_decay, lr_decay_steps=FLAGS.lr_decay_steps, lr_decay_factor=FLAGS.lr_decay_factor, clip=True) reduced_loss = tf.reduce_mean(loss) graph_regularizers = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) total_regularization_loss = tf.reduce_sum(graph_regularizers) total_loss = reduced_loss + total_regularization_loss update_op = opt(total_loss, var_list=training_variables) summ_loss_op = tf.summary.scalar('loss', total_loss) state_placeholder = tf.placeholder(tf.float32, [1] + stateDim, 'state_placeholder') decayed_ep_placeholder = tf.placeholder(tf.float32, [], 'decayed_ep_placeholder') action_tensor_egreedy = eGreedy(state_placeholder, network, len(game_actions), decayed_ep_placeholder, FLAGS.debug) action_tensor_greedy = greedy(state_placeholder, network) ''' setup the training process ''' episode_reward_placeholder = tf.placeholder(tf.float32, [], "episode_reward_placeholder") average_reward_placeholder = tf.placeholder(tf.float32, [], "average_reward_placeholder") summ.register('train', 'episode_reward_train', episode_reward_placeholder) summ.register('train', 'average_reward_train', average_reward_placeholder) summ.register('val', 'episode_reward_val', episode_reward_placeholder) summ.register('val', 'average_reward_val', average_reward_placeholder) total_reward_train = 0 average_reward_train = 0 total_reward_val = 0 average_reward_val = 0 ''' gathering summary operators ''' train_summ_op = summ('train') val_summ_op = summ('val') ''' setup the training process ''' transitions.empty() # print("game_actions -> {}".format(game_actions)) writer = tf.summary.FileWriter(event_log_dir, tf.get_default_graph()) saver = tf.train.Saver(training_variables) config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) assert (FLAGS.gpus != ''), 'invalid GPU specification' config.gpu_options.visible_device_list = FLAGS.gpus with tf.Session(config=config) as sess: sess.run([ tf.global_variables_initializer(), tf.local_variables_initializer() ]) val_step = 0 for step in range(FLAGS.steps): print("Iteration: {}".format(step)) game_env_train.reset(filenames_train[np.random.randint( 0, len(filenames_train))]) last_state = None last_state_assigned = False episode_reward = 0 action_index = (len(game_actions) >> 2) for estep in range(FLAGS.eval_steps): # print("Evaluation step: {}".format(estep)) # print("{} - measured RT: {}".format(estep, game_env_train.measured_rt)) # print("{} - predicted RT: {}".format(estep, game_env_train.predicted_rt)) # print("{} - action -> {}".format(estep, game_actions[action])) state, reward, terminal = game_env_train.step( game_actions[action_index]) # game over? if terminal: break episode_reward += reward # Store transition s, a, r, t # if last_state_assigned and reward: if last_state_assigned: # print("reward -> {}".format(reward)) # print("action -> {}".format(game_actions[last_action])) transitions.add(last_state, last_action, reward, last_terminal) # Select action # decayed_ep = FLAGS.testing_ep decayed_ep = max(0.1, (FLAGS.steps - step) / FLAGS.steps * FLAGS.ep) if not terminal: action_index = sess.run(action_tensor_egreedy, feed_dict={ state_placeholder: np.expand_dims(state, axis=0), decayed_ep_placeholder: decayed_ep }) else: action_index = 0 # Do some Q-learning updates if estep > FLAGS.learn_start and estep % FLAGS.update_freq == 0: summ_str = None for _ in range(FLAGS.n_replay): if transitions.size > FLAGS.batch_size: s, a, r, s2 = transitions.sample() summ_str, _ = sess.run( [summ_loss_op, update_op], feed_dict={ s_placeholder: s, a_placeholder: a, r_placeholder: r, s2_placeholder: s2 }) if summ_str: writer.add_summary(summ_str, step * FLAGS.eval_steps + estep) last_state = state last_state_assigned = True last_action = action_index last_terminal = terminal if estep > FLAGS.learn_start and estep % FLAGS.target_q == 0: # print("duplicate model parameters") sess.run(sychronize_ops) total_reward_train += episode_reward average_reward_train = total_reward_train / (step + 1) train_summ_str = sess.run(train_summ_op, feed_dict={ episode_reward_placeholder: episode_reward, average_reward_placeholder: average_reward_train }) writer.add_summary(train_summ_str, step) if FLAGS.validation and step % FLAGS.validation_interval == 0: game_env_val.reset(filenames_val[0]) episode_reward = 0 count = 0 action_index = (len(game_actions) >> 2) while True: # print("Evaluation step: {}".format(count)) # print("action -> {}".format(game_actions[action_index])) state, reward, terminal = game_env_val.step( game_actions[action_index]) # game over? if terminal: break episode_reward += reward if not terminal: action_index = sess.run(action_tensor_greedy, feed_dict={ state_placeholder: np.expand_dims(state, axis=0) }) action_index = np.squeeze(action_index) # print('state -> {}'.format(state)) # print('action_index -> {}'.format(action_index)) else: action_index = 0 count += 1 total_reward_val += episode_reward average_reward_val = total_reward_val / (val_step + 1) val_step += 1 val_summ_str = sess.run(val_summ_op, feed_dict={ episode_reward_placeholder: episode_reward, average_reward_placeholder: average_reward_val }) writer.add_summary(val_summ_str, step) tf.logging.info('Saving model.') saver.save(sess, checkpoint_path) tf.logging.info('Training complete') writer.close()
# RL take action and get next observation and reward s_, reward, done = E.step(action) print(action, reward) # RL learn from this transition RL.learn(s, action, reward, s_) # swap observation s = s_ # break while loop when end of this episode if done: #RL.epsilon += 0.001 break if episode %10 == 0: RL.dump_model = copy.copy(RL.model) E = Env() print("---------------test---------------") RL.m.bias_noisy = False RL.m.weight_noisy = False for i in range(E.final_step): q_table = RL.model.predict([i]) E.step(np.argmax(q_table)) print(np.argmax(q_table)) print(E.score) if __name__ == "__main__": env = Env() RL = NoisyQ(actions=list(range(env.n_actions))) update()
def REINFORCE(training_pairs, policy_nn, num_episodes): train = training_pairs success = 0 # path_found = set() path_found_entity = [] path_relation_found = [] for i_episode in range(num_episodes): start = time.time() print('Episode %d' % i_episode) print('Training sample: ', train[i_episode][:-1]) env = Env(dataPath, train[i_episode]) sample = train[i_episode].split() state_idx = [env.entity2id_[sample[0]], env.entity2id_[sample[1]], 0] episode = [] state_batch_negative = [] action_batch_negative = [] for t in count(): state_vec = env.idx_state(state_idx) action_probs = policy_nn.predict(state_vec) action_chosen = np.random.choice(np.arange(action_space), p=np.squeeze(action_probs)) reward, new_state, done = env.interact(state_idx, action_chosen) if reward == -1: # the action fails for this step state_batch_negative.append(state_vec) action_batch_negative.append(action_chosen) new_state_vec = env.idx_state(new_state) episode.append(Transition(state=state_vec, action=action_chosen, next_state=new_state_vec, reward=reward)) if done or t == max_steps: break state_idx = new_state # Discourage the agent when it choose an invalid step if len(state_batch_negative) != 0: print('Penalty to invalid steps:', len(state_batch_negative)) policy_nn.update(np.reshape(state_batch_negative, (-1, state_dim)), -0.05, action_batch_negative) print('----- FINAL PATH -----') print('\t'.join(env.path)) print('PATH LENGTH', len(env.path)) print('----- FINAL PATH -----') # If the agent success, do one optimization if done == 1: print('Success') path_found_entity.append(path_clean(' -> '.join(env.path))) success += 1 path_length = len(env.path) length_reward = 1 / path_length global_reward = 1 # if len(path_found) != 0: # path_found_embedding = [env.path_embedding(path.split(' -> ')) for path in path_found] # curr_path_embedding = env.path_embedding(env.path_relations) # path_found_embedding = np.reshape(path_found_embedding, (-1,embedding_dim)) # cos_sim = cosine_similarity(path_found_embedding, curr_path_embedding) # diverse_reward = -np.mean(cos_sim) # print 'diverse_reward', diverse_reward # total_reward = 0.1*global_reward + 0.8*length_reward + 0.1*diverse_reward # else: # total_reward = 0.1*global_reward + 0.9*length_reward # path_found.add(' -> '.join(env.path_relations)) total_reward = 0.1 * global_reward + 0.9 * length_reward state_batch = [] action_batch = [] for t, transition in enumerate(episode): if transition.reward == 0: state_batch.append(transition.state) action_batch.append(transition.action) policy_nn.update(np.reshape(state_batch, (-1, state_dim)), total_reward, action_batch) else: global_reward = -0.05 # length_reward = 1/len(env.path) state_batch = [] action_batch = [] total_reward = global_reward for t, transition in enumerate(episode): if transition.reward == 0: state_batch.append(transition.state) action_batch.append(transition.action) policy_nn.update(np.reshape(state_batch, (-1, state_dim)), total_reward, action_batch) print('Failed, Do one teacher guideline') try: good_episodes = teacher(sample[0], sample[1], 1, env, graphpath) for item in good_episodes: teacher_state_batch = [] teacher_action_batch = [] total_reward = 0.0 * 1 + 1 * 1 / len(item) for t, transition in enumerate(item): teacher_state_batch.append(transition.state) teacher_action_batch.append(transition.action) policy_nn.update(np.squeeze(teacher_state_batch), 1, teacher_action_batch) except Exception as e: print('Teacher guideline failed') print('Episode time: ', time.time() - start) print('\n') print('Success percentage:', success / num_episodes) for path in path_found_entity: rel_ent = path.split(' -> ') path_relation = [] for idx, item in enumerate(rel_ent): if idx % 2 == 0: path_relation.append(item) path_relation_found.append(' -> '.join(path_relation)) relation_path_stats = collections.Counter(path_relation_found).items() relation_path_stats = sorted(relation_path_stats, key=lambda x: x[1], reverse=True) f = open(dataPath + 'tasks/' + relation + '/' + 'path_stats.txt', 'w') for item in relation_path_stats: f.write(item[0] + '\t' + str(item[1]) + '\n') f.close() print('Path stats saved') return
def test_step8_is_macro(self): self.assertEqual(False, MalFunctionCompiled(lambda a: MalInt(1)).is_macro()) self.assertEqual( False, MalFunctionRaw(core.ns["+"], MalInt(1), MalList([]), Env(None)).is_macro(), )
def EVAL(ast, env): while True: #print("EVAL %s" % printer._pr_str(ast)) if not types._list_Q(ast): return eval_ast(ast, env) # apply list ast = macroexpand(ast, env) if not types._list_Q(ast): return eval_ast(ast, env) if len(ast) == 0: return ast a0 = ast[0] if "def!" == a0: a1, a2 = ast[1], ast[2] res = EVAL(a2, env) return env.set(a1, res) elif "let*" == a0: a1, a2 = ast[1], ast[2] let_env = Env(env) for i in range(0, len(a1), 2): let_env.set(a1[i], EVAL(a1[i + 1], let_env)) ast = a2 env = let_env # Continue loop (TCO) elif "quote" == a0: return ast[1] elif "quasiquote" == a0: ast = quasiquote(ast[1]) # Continue loop (TCO) elif 'defmacro!' == a0: func = EVAL(ast[2], env) func._ismacro_ = True return env.set(ast[1], func) elif 'macroexpand' == a0: return macroexpand(ast[1], env) elif "py!*" == a0: exec(compile(ast[1], '', 'single'), globals()) return None elif "py*" == a0: return types.py_to_mal(eval(ast[1])) elif "." == a0: el = eval_ast(ast[2:], env) f = eval(ast[1]) return f(*el) elif "try*" == a0: if len(ast) < 3: return EVAL(ast[1], env) a1, a2 = ast[1], ast[2] if a2[0] == "catch*": err = None try: return EVAL(a1, env) except types.MalException as exc: err = exc.object except Exception as exc: err = exc.args[0] catch_env = Env(env, [a2[1]], [err]) return EVAL(a2[2], catch_env) else: return EVAL(a1, env) elif "do" == a0: eval_ast(ast[1:-1], env) ast = ast[-1] # Continue loop (TCO) elif "if" == a0: a1, a2 = ast[1], ast[2] cond = EVAL(a1, env) if cond is None or cond is False: if len(ast) > 3: ast = ast[3] else: ast = None else: ast = a2 # Continue loop (TCO) elif "fn*" == a0: a1, a2 = ast[1], ast[2] return types._function(EVAL, Env, a2, env, a1) else: el = eval_ast(ast, env) f = el[0] if hasattr(f, '__ast__'): ast = f.__ast__ env = f.__gen_env__(el[1:]) else: return f(*el[1:])
def test(model, a2c, config, args, **kwargs): env = Env(config.move_range) env.set_param(**kwargs) test_loader = torch.utils.data.DataLoader(dataset=MRIDataset( root=args.root, image_set='test', transform=False), batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=False) start = time.time() reward_sum = 0 PSNR_list = [] SSIM_list = [] for i, (ori_image, image) in enumerate(test_loader): ori_image = ori_image.numpy() image = image.numpy() previous_image = image.copy() env.reset(ori_image=ori_image, image=image) for j in range(config.episode_len): image_input = Variable(torch.from_numpy(image).cuda(), volatile=True) pout, vout = model(image_input) actions = a2c.act(pout, deterministic=True) image, reward = env.step(actions) image = np.clip(image, 0, 1) reward_sum += np.mean(reward) for ii in range(image.shape[0]): PSNR_list.append( computePSNR(ori_image[ii, 0], previous_image[ii, 0], image[ii, 0])) SSIM_list.append( computeSSIM(ori_image[ii, 0], previous_image[ii, 0], image[ii, 0])) if i == 100: i += 1 actions = actions.astype(np.uint8) total = actions.size a0 = actions[0] B = image[0, 0].copy() for a in range(config.num_actions): print(a, 'actions', np.sum(actions == a) / total) A = np.zeros((*B.shape, 3)) #print(A, B) A[..., 0] += B * 255 A[..., 1] += B * 255 A[..., 2] += B * 255 A[a0 == a, 0] += 250 cv2.imwrite('actions/' + str(a) + '.jpg', A) break psnr_res = np.mean(np.array(PSNR_list), axis=0) ssim_res = np.mean(np.array(SSIM_list), axis=0) print('PSNR', psnr_res) print('SSIM', ssim_res) avg_reward = reward_sum / i print('test finished: reward ', avg_reward) return avg_reward, psnr_res, ssim_res
device = "gpu" dt = 0.25 seed = 4 #alpha=0.25 net_size = 50 epochs = 10000 bptt_steps = seq_size = 50 le_size = 10 lrate = 0.0001 decay_rate = 1.0 #0.999 forecast_step = 1 env = Env("piano") source_data_file_list = [] for f in sorted(os.listdir(env.dataset())): if f.endswith("sparse_acoustic_data.dump"): print "Considering {} as input".format(f) source_data_file_list.append(env.dataset(f)) data_file_list = source_data_file_list[:] max_t, input_size = 0, None
def train_filter(model, a2c): args = parse() config = Config('filter_config.yml') torch.backends.cudnn.benchmark = True #log_dir = os.path.expanduser(args.log_dir) env = Env(config.move_range, reward_method=config.reward_method) #model = MyFcn(num_actions=config.num_actions) #model = torch.nn.DataParallel(model, device_ids=args.gpu).cuda() #a2c = PixelWiseA2C(model=None, optimizer=None, t_max=100000, gamma=config.gamma, beta=1e-2) filter_model = FilterModel() filter_model = filter_model.cuda() optimizer = torch.optim.SGD(filter_model.parameters(), config.base_lr, momentum=0) train_loader = torch.utils.data.DataLoader(dataset=MRIDataset( root=args.root, image_set='train', transform=True), batch_size=config.batch_size, shuffle=True, num_workers=config.workers, pin_memory=False) writer = SummaryWriter('./filter_logs') #for lp in [0, 0.01, 0.02, 0.08, 0.09, 0.095, 0.1, 0.105, 0.11]: # print('lp', lp) # avg_reward, psnr_res, ssim_res = test(model, a2c, config, args, laplace_param=lp) for sobel_v1 in [0, 0.01, 0.02, 0.08, 0.09, 0.095, 0.1, 0.105, 0.11]: print('sobel_v1', sobel_v1) avg_reward, psnr_res, ssim_res = test(model, a2c, config, args, sobel_v1_param=sobel_v1) episodes = 0 while episodes < config.num_episodes: for i, (ori_image, image) in enumerate(train_loader): learning_rate = adjust_learning_rate( optimizer, episodes, config.base_lr, policy=config.lr_policy, policy_parameter=config.policy_parameter) ori_image_input = Variable(ori_image).cuda() ori_image = ori_image.numpy() image = image.numpy() env.reset(ori_image=ori_image, image=image) reward = np.zeros((1)) loss = Variable(torch.zeros(1)).cuda() for j in range(config.episode_len): image_input = Variable(torch.from_numpy(image).cuda(), volatile=True) #reward_input = Variable(torch.from_numpy(reward).cuda()) pout, vout = model(image_input) actions = a2c.act(pout, deterministic=True) #print(actions) mask_laplace = (actions == 6)[:, np.newaxis] action_mask = Variable( torch.from_numpy(mask_laplace.astype(np.float32))).cuda() print(action_mask.mean()) xxx image_input = Variable(torch.from_numpy(image).cuda()) output_laplace = filter_model(image_input) ll = torch.abs(ori_image_input - output_laplace) * action_mask #print(ll.shape) loss += ll.mean() previous_image = image image, reward = env.step(actions) #print(ori_image_input.shape, action_mask.shape, actions.shape, output_laplace.shape) if i % 40 == 0: print('reward', j, np.mean(reward)) print( computeSSIM(ori_image[0, 0], previous_image[0, 0], image[0, 0])) print('diff', ( torch.abs(ori_image_input.data - torch.from_numpy(image).cuda()) - torch.abs(ori_image_input.data - output_laplace.data) * action_mask.data).mean()) image = np.where(mask_laplace, output_laplace.cpu().data.numpy(), image) image = np.clip(image, 0, 1) #loss = a2c.stop_episode_and_compute_loss(reward=Variable(torch.from_numpy(reward).cuda()), done=True) / config.iter_size loss.backward() if not (episodes % config.iter_size): optimizer.step() optimizer.zero_grad() lw = float(filter_model.state_dict() ['conv_laplace.weight'].cpu().numpy()) print('loss:', ll.mean(), 'weight:', lw) writer.add_scalar('weight', lw, episodes) episodes += 1 if episodes % config.display == 0: print('episode: ', episodes, 'loss: ', loss.data) if not (episodes % config.save_episodes): #torch.save(model.module.state_dict(), 'model/' + str(episodes) + '.pth') print('model saved') if not (episodes % config.test_episodes): avg_reward, psnr_res, ssim_res = test(model, a2c, config, args) #writer.add_scalar('psnr_ref', psnr_res[0], episodes) #writer.add_scalar('psnr', psnr_res[1], episodes) #writer.add_scalar('ssim_ref', ssim_res[0], episodes) #writer.add_scalar('ssim', ssim_res[1], episodes) if episodes == config.num_episodes: writer.close() break
def sarsa(): grid_size = 4 env = Env(grid_size) policy = EspionGreedyPolicy(env.actions(), range(grid_size**2)) Q = defaultdict(float) for i in range(5000): s0 = env.init() if env.is_t(s0): continue a0 = policy.get_a(s0) while not env.is_t(s0): s, r = env.step(a0) a = policy.get_a(s) Q[(s0, a0)] += 0.9 * (r + 0.9 * Q[(s, a)] - Q[(s0, a0)]) s0 = s a0 = a mm = [(x, Q[(s0, x)]) for x in env.actions()] action = max(mm, key=lambda x:x[1])[0] policy.set_max(s0, action) Pi = {} for i in range(grid_size**2): Pi[i] = policy.get_m(i) for t in env.get_t(): Pi[t] = 'ter' env.render(Pi)
class MulControl: def __init__(self): # 环境初始化 self.global_arg = arg.init_global_arg() env_arg = arg.init_env_arg(self.global_arg) # 增加nk的一个读入操作 self.main_env = Env(env_arg) for model_type in ['st', 'ed']: if all_config['checkpoint']['env'][model_type]['enable']: self.main_env.nkmodel_load(all_config['checkpoint']['env']['path'], model_type) self.main_env.nkmodel_save(all_config["nkmodel_path"][model_type], model_type) # 个体初始化 self.agents = [] csv_head_agent = ['agent_no'] + ['st_state'] + ['st_value'] + ['insight'] + ['xplr'] + ['xplt'] + ['enable'] moniter.AppendToCsv(csv_head_agent, all_config['agent_csv_path']) for i in range(self.global_arg["Nagent"]): # 个体随机初始位置 start_st_label = [randint(0, self.main_env.P - 1) for j in range(self.main_env.N)] state_start = State(start_st_label) self.agents.append(Agent(arg.init_agent_arg(self.global_arg, self.main_env.arg), self.main_env)) self.agents[i].state_now = deepcopy(state_start) self.agents[i].agent_id = i # 去除了一开始给一个全局area,改为添加一个包含起点的点area start_area = Area(self.agents[i].state_now, [False] * self.main_env.N, 0) start_area.info = get_area_sample_distr(env=self.main_env, area=start_area, state=self.agents[i].state_now, T_stmp=0, sample_num=1, dfs_r=1) start_area.sign = Sign(i, 0, 'start') self.agents[i].renew_m_info(start_area, 0) self.a_plan = None logging.info("state:%s, st_value:%s,insight:%.5s ,xplr:%.5s, xplt:%.5s, enable:%.5s" % ( str(self.agents[i].state_now), self.main_env.getValue(self.agents[i].state_now, 0), self.agents[i].agent_arg['a']['insight'], self.agents[i].agent_arg['a']['xplr'], self.agents[i].agent_arg['a']['xplt'], self.agents[i].agent_arg['a']['enable'])) # 记录agent信息 csv_info_agent = ['agent%d' % i] \ + [self.agents[i].state_now] \ + [self.main_env.getValue(self.agents[i].state_now, 0)] \ + [self.agents[i].agent_arg['a']['insight']] \ + [self.agents[i].agent_arg['a']['xplr']] \ + [self.agents[i].agent_arg['a']['xplt']] \ + [self.agents[i].agent_arg['a']['enable']] moniter.AppendToCsv(csv_info_agent, all_config['agent_csv_path']) # 社会网络初始化 soclnet_arg = arg.init_soclnet_arg(self.global_arg, env_arg) self.socl_net = SoclNet(soclnet_arg) self.socl_net.new_flat_init() # 修改初始化方法 # self.socl_net.flat_init() if all_config['checkpoint']['socl_network']['enable']: self.socl_net.power_load(all_config['checkpoint']['socl_network']['power']) self.socl_net.relat_load(all_config['checkpoint']['socl_network']['relat']) self.record = Record() self.metric = metrics.register_all_metrics(metrics.Metrics()) def run_meet_frame(self, Ti, Tfi, meet_name, member, host, up_info): # 根据m_name开会 logging.debug("m_name:%s, member:%s, host:%s" % (meet_name, member, host)) self.agents, self.socl_net = meeting.meet_map[meet_name](env=self.main_env, agents=self.agents, member=member, host=host, socl_net=self.socl_net, record=self.record, T=Ti, Tfi=Tfi) def run_all_frame(self, Ti, Tfi, meet_req, up_info): # 将每个Agent上一帧的初始拷贝进来 for i in range(len(self.agents)): last_arg = deepcopy(self.agents[i].frame_arg) # logging.debug("agent %d, %s"%(i,"{}".format(self.agents[i].frame_arg))) self.agents[i].frame_arg = arg.init_frame_arg( global_arg=self.global_arg, env_arg=self.main_env.arg, agent_arg=self.agents[i].agent_arg, stage_arg=self.agents[i].stage_arg, last_arg=last_arg, Tp=Ti, PSMfi=self.main_env.getValue(self.agents[i].state_now, Ti) ) logging.debug("agent copy finished") # 清空agent的行动和会议记录 for i in range(len(self.agents)): self.agents[i].meeting_now = '' self.agents[i].policy_now = '' # NOTE cid 增加SoclNet自然衰减 self.socl_net.relat_cd(self.socl_net.arg['re_decr_r']) # 读取之前发起的集体行动 all_host = set() all_meet_info = {} new_meet_req = {} # 把每一种meeting的host先集中起来,并加入到对应的meet_info中 # meet_req的结构大致如下? # meet_req={ # "m_name1":{agent} # "m_name2":{agent} # } # m_name是指信息交流xxjl之类的集体行动名称 for m_name in meet_req: all_host = all_host.union(meet_req[m_name]) all_meet_info[m_name] = {"member": deepcopy(meet_req[m_name]), "host": deepcopy(meet_req[m_name])} # 询问每个Agent是否加入 logging.debug("all host:%s" % (all_host)) for m_name in all_meet_info: logging.debug("before m_name:%s, member:%s, host:%s" % ( m_name, all_meet_info[m_name]['member'], all_meet_info[m_name]['host'])) for i in range(len(self.agents)): # logging.debug("all_host:{}".format(all_host)) # 跳过所有host if i in all_host: continue # 返回是否参与集体行动的信息,如果不参与,执行完个体行动,如果参与,进入后续run_meet_frame if self.global_arg['mul_agent']: # logging.info("using mul_act") self.agents[i], self.socl_net, meet_info = brain.mul_agent_act(env=self.main_env, soc_net=self.socl_net, agent=self.agents[i], Ti=Ti, Tfi=Tfi, agent_no=i, record=self.record, meet_req=meet_req) else: self.agents[i], self.socl_net, meet_info = brain.sgl_agent_act(env=self.main_env, soc_net=self.socl_net, agent=self.agents[i], Ti=Ti, Tfi=Tfi, agent_no=i, record=self.record, meet_req=meet_req) if meet_info is None: continue # 选择参加会议,则加入会议名单 if meet_info['type'] == 'commit': all_meet_info[meet_info['name']]["member"].add(i) # 选择发起新会议 if meet_info['type'] == 'req': if not meet_info['name'] in new_meet_req: new_meet_req[meet_info['name']] = set() new_meet_req[meet_info['name']].add(i) # 每个host都选完人之后,依次开会 for m_name in all_meet_info: logging.debug("after m_name:%s, member:%s, host:%s" % ( m_name, all_meet_info[m_name]['member'], all_meet_info[m_name]['host'])) self.run_meet_frame(Ti, Tfi, m_name, all_meet_info[m_name]['member'], all_meet_info[m_name]['host'], up_info) self.metric.calc_metric(['frame'], Ti + Tfi, socl_net=self.socl_net, agents=self.agents, env=self.main_env) return new_meet_req def run_stage(self, Ti, meet_req, up_info): # 将Agent上一个stage的最终状态拷贝过来 for i in range(len(self.agents)): last_arg = deepcopy(self.agents[i].stage_arg) self.agents[i].stage_arg = arg.init_stage_arg(self.global_arg, self.main_env.arg, self.agents[i].agent_arg, last_arg, Ti) meet_req = {} # NOTE cid传了个up_info进去,避免重复遍历 self.record.add_env_record(self.main_env, Ti, up_info) self.record.add_socl_net_record(self.socl_net, Ti) for i in range(self.global_arg['Ts']): logging.info("frame %3d , Ti:%3d" % (i, Ti)) self.record.add_agents_record(self.main_env, self.agents, Ti + i) # 运行Frame, 并将运行后生成的会议请求记录下来 meet_req = self.run_all_frame(Ti, i, meet_req, up_info) # 输出每个个体的具体信息 for k in range(self.global_arg["Nagent"]): tmp_goal = '' tmp_goal_value = '' if not self.agents[k].a_plan is None: tmp_goal = self.agents[k].a_plan.goal tmp_goal_value = self.agents[k].a_plan.goal_value csv_info_result = [ Ti + i, str(self.agents[k].state_now), self.main_env.getValue(self.agents[k].state_now, Ti), self.agents[k].get_max_area().info['max'], str(self.agents[k].get_max_area().center), str(self.agents[k].policy_now) + '&' + str(self.agents[k].meeting_now), str(tmp_goal), tmp_goal_value ] moniter.AppendToCsv(csv_info_result, all_config['result_csv_path'][k]) # 输出当前value agent_value = [self.main_env.getValue(self.agents[k].state_now, Ti) for k in range(self.global_arg["Nagent"])] agent_avg = sum(agent_value) / len(agent_value) csv_info_value = [Ti + i] \ + agent_value \ + [agent_avg, max(agent_value), min(agent_value)] \ + [up_info['nkinfo'][key] for key in ['max', 'min', 'avg']] \ + [(agent_avg - up_info['nkinfo']['min']) / ( up_info['nkinfo']['max'] - up_info['nkinfo']['min'])] moniter.AppendToCsv(csv_info_value, all_config['value_csv_path'][-1]) # 输出max_area agent_max_area = [self.agents[k].get_max_area().info['max'] for k in range(self.global_arg["Nagent"])] csv_info_area = [Ti + i] \ + agent_max_area \ + [sum(agent_max_area) / len(agent_max_area)] \ + [up_info['nkinfo']['max']] moniter.AppendToCsv(csv_info_area, all_config['area_csv_path']) # NOTE cid 添加act信息(相应增加agent类里的变量) act_list = [self.agents[k].policy_now + '&' + self.agents[k].meeting_now for k in range(self.global_arg["Nagent"])] csv_info_act = [Ti + i] \ + act_list moniter.AppendToCsv(csv_info_act, all_config['act_csv_path']) # 按stage输出 if self.global_arg['mul_agent']: # net_title, net_data = self.record.output_socl_net_per_frame(Ti + i) power_save_path = os.path.join(all_config['network_csv_path'], "power_%04d.csv" % (Ti)) relat_save_path = os.path.join(all_config['network_csv_path'], "relat_%04d.csv" % (Ti)) self.socl_net.power_save(power_save_path) self.socl_net.relat_save(relat_save_path) # P1-05 增加Socil Network的结果输出 self.metric.calc_metric(['stage'], Ti, socl_net=self.socl_net, agents=self.agents, env=self.main_env) return meet_req def run_exp(self): up_info = {} # 单个agent的结果表 for k in range(self.global_arg["Nagent"]): csv_head = ['frame', 'state', 'value', 'area_v', 'area_center', 'act', 'goal', 'goal_value'] moniter.AppendToCsv(csv_head, all_config['result_csv_path'][k]) # 结果汇总表 # 添加agent max和agent min csv_head_value = ['frame'] \ + ["agent%d" % (k) for k in range(self.global_arg['Nagent'])] \ + ["agent_avg", "agent_max", "agent_min"] \ + ['peakmax', 'peakmin', 'peakavg'] \ + ['adj_avg'] moniter.AppendToCsv(csv_head_value, all_config['value_csv_path'][-1]) csv_head_area = ['frame'] \ + ["agent%d" % (k) for k in range(self.global_arg['Nagent'])] \ + ["agent_avg"] \ + ['nkmax'] moniter.AppendToCsv(csv_head_area, all_config['area_csv_path']) csv_head_act = ['frame'] \ + ["agent%d" % (k) for k in range(self.global_arg['Nagent'])] moniter.AppendToCsv(csv_head_act, all_config['act_csv_path']) stage_num = self.global_arg['T'] // self.global_arg['Ts'] # self.main_env.getModelDistri() # 为了作图,仅测试时调用!! up_info['nkinfo'] = self.main_env.getModelPeakDistri() # 将nkinfo变为peakvalue # all_peak_value = self.main_env.getAllPeakValue() # moniter.DrawHist(all_peak_value, all_config['peak_hist']) meet_req = {} for i in range(stage_num): Ti = i * self.global_arg['Ts'] + 1 logging.info("stage %3d, Ti:%3d" % (i, Ti)) self.main_env.T_clock = Ti # 每个stage遍历一遍当前模型,获取分布信息 # 减少运算量,只算第一帧 # up_info['nkinfo'] = self.main_env.getModelDistri() # logging.debug("max_value:{max}".format(**up_info['nkinfo'])) # 运行一个Stage,Ti表示每个Stage的第一帧 meet_req = self.run_stage(Ti, meet_req, up_info) moniter.DumpToJson(self.metric.get_data(), all_config['metrics_json_path']) moniter.DumpToJson(leadership_bill.leader_bill.to_json(), all_config['leadership_bill_json_path'])
from env import Env import random env = Env() for dx in range(-1, 2): for dy in range(-1, 2): if dx == 0 and dy == 0: continue for x in range(0, 15): for y in range(0, 15): if dx < 0 and x < 5: continue if dx > 0 and x > 10: continue if dy < 0 and y < 5: continue if dy > 0 and y > 10: continue for m in range(100): env.reset() actions = [] for i in range(5): actions.append((x + dx * i, y + dy * i)) for i in range(4): action = random.choice(env.actions) while action in actions: action = random.choice(env.actions) env.take_action(actions[i]) assert not env.game_over env.take_action(action) assert not env.game_over
import numpy as np from tqdm import tqdm from models import Stochastic from models import RoundRobin from models import Greedy from models import learn_local from models import learn_hierarchical if __name__ == '__main__': logger = get_logger(args.note) logger.info(str(args)) env = Env() models = [ RoundRobin(act_size=args.n_servers), Stochastic(act_size=args.n_servers), Greedy(act_size=args.n_servers, n_servers=args.n_servers), learn_local( env=env, total_epoches=args.total_epoches, n_servers=args.n_servers, l_ob_size=args.l_obsize + 1, l_act_size=args.l_actsize, l_latents=args.l_latents,
import reader import printer import mal_types from env import Env from core import ns import sys repl_env = Env() repl_env.set('eval', lambda ast: EVAL(ast, repl_env)) for k, v in ns.items(): repl_env.set(k, v) def READ(string): return reader.read_str(string) def EVAL(ast, env): while True: if not isinstance(ast, mal_types.list_types): return eval_ast(ast, env) elif not ast: return ast elif isinstance(ast, mal_types.list_types): if len(ast) == 0: return ast if isinstance(ast[0], mal_types.MalSymbol): if ast[0].data == 'def!': value = EVAL(ast[2], env) env.set(ast[1].data, value) return value
from keras.optimizers import Adam from rl.agents import NAFAgent from rl.memory import SequentialMemory from rl.random import OrnsteinUhlenbeckProcess from rl.core import Processor class PendulumProcessor(Processor): def process_reward(self, reward): # The magnitude of the reward can be important. Since each step yields a relatively # high reward, we reduce the magnitude by two orders. return reward / 100. N = 5 env = Env(N) nb_actions = N processor = PendulumProcessor() memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3, size=nb_actions) agent = NAFAgent(covariance_mode='diag', nb_actions=nb_actions, V_model=Vmodel(N), L_model=Lmodel(N),
from env import Env import time # set the environment env = Env((8, 8), (130, 90), default_rewards=0) def game_map_1(environment): environment.add_item('yellow_star', (3, 3), pickable=True) environment.add_item('yellow_star', (0, 7), pickable=True) environment.add_item('red_ball', (5, 6), terminal=True, label="Exit") # select a game game_map_1(env) for _ in range(100): action = env.action_space.sample() print(action) reward, next, end = env.step(action) print(reward, next, end) time.sleep(0.2) env.reset()
np.random.seed(1) #### epsilon贪心算法 def epsilon_greedy(Q, state): n = np.random.uniform() if (n > 1 - EPSILON) or ((Q[state, :] == 0).all()): action = np.random.randint(0, 4) # 0~3 else: action = Q[state, :].argmax() return action e = Env() #### Q表:Q(s,a) Q = np.zeros((e.state_num, 4)) for i in range(EPOCH): e = Env() #### E表:E(s,a) ### 1. 主要记录所经历过的路径 ### 2. 即最终产生的reward变化跟所经历过的所有路径有关 E = np.zeros((e.state_num, 4)) #### e.is_end !=False while ((e.is_end == 0) and (e.step < MAX_STEP)): action = epsilon_greedy(Q, e.present_state) state = e.present_state reward = e.interact(action) new_state = e.present_state
sys.stderr.write(' hash_type = stream | historic\n') sys.stderr.write(' hash = the hash\n') sys.stderr.write(' name = a friendly name for the subscription\n') sys.stderr.write(' key=val = output_type-specific arguments\n') sys.stderr.write('\n') sys.stderr.write('Example\n') sys.stderr.write(' PushFromHash http stream <hash> \"Push Name\" delivery_frequency=10 \\\n') sys.stderr.write(' url=http://www.example.com/push_endpoint auth.type=none\n') sys.stderr.write('\n') sys.stderr.write('\n') if exit: sys.exit(1) # Set up the environment env = Env(sys.argv) # Make sure we have enough arguments if env.get_arg_count() < 4: usage() # Get the args output_type = env.get_arg(0) hash_type = env.get_arg(1) hash = env.get_arg(2) name = env.get_arg(3) try: # Create the Push definition pushdef = env.get_user().create_push_definition() pushdef.set_output_type(output_type)
if t > 10 * 60: # eval takes too long self.eval_episode = int(self.eval_episode * 0.94) self.trainer.monitors.put_scalar('farmer_win_rate', farmer_win_rate) self.trainer.monitors.put_scalar('lord_win_rate', 1 - farmer_win_rate) if __name__ == '__main__': # encoding = np.load('encoding.npy') # print(encoding.shape) # env = Env() # stat = StatCounter() # init_cards = np.arange(21) # # init_cards = np.append(init_cards[::4], init_cards[1::4]) # for _ in range(10): # fw = play_one_episode(env, lambda b: np.random.rand(1, 1, 100) if b[1][0] else np.random.rand(1, 1, 21), [100, 21]) # stat.feed(int(fw)) # print('lord win rate: {}'.format(1. - stat.average)) env = Env() stat = StatCounter() for i in range(100): env.reset() print('begin') env.prepare() r = 0 while r == 0: role = env.get_role_ID() intention, r, _ = env.step_auto() # print('lord gives' if role == 2 else 'farmer gives', to_char(intention)) stat.feed(int(r < 0)) print(stat.average)