def runTest(self): # test cases are taken from Particle Data Group table: # http://pdg.lbl.gov/2015/reviews/rpp2014-rev-clebsch-gordan-coefs.pdf test_cases = ( (1/2, 1/2, 1/2, -1/2, 0, 0, sqrt(1/2)), (2, 1/2, 1, -1/2, 3/2, 1/2, sqrt(3/5)), (2, 1/2, -2, 1/2, 1.5, -3/2, -sqrt(4/5)), (1, 1/2, 0, -1/2, 3/2, -1/2, sqrt(2/3)), (3/2, 1/2, 3/2, -1/2, 2, 1, sqrt(1/4)), (3/2, 1, 3/2, 0, 5/2, 3/2, sqrt(2/5)), (2, 1, 2, -1, 3, 1, sqrt(1/15)), (1, 1, 0, 0, 0, 0, -sqrt(1/3)), (3/2, 1, -3/2, -1, 5/2, -5/2, 1), (2, 3/2, 2, -3/2, 5/2, 1/2, sqrt(6/35)), (3/2, 3/2, -1/2, 1/2, 0, 0, sqrt(1/4)), (3/2, 3/2, -3/2, 1/2, 1, -1, sqrt(3/10)), (2, 3/2, 0, -1/2, 1/2, -1/2, -sqrt(1/5)), (2, 2, 2, -2, 4, 0, sqrt(1/70)), (2, 2, 1, -2, 4, -1, sqrt(1/14)), (2, 2, -2, 1, 3, -1, -sqrt(3/10)), (2, 2, -1, -1, 3, -2, 0), ) for case in test_cases: j1, j2, m1, m2, j, m, test_cg = case my_cg = cg.cg(j1,j2,m1,m2,j,m) print("< %g %g ; %g %g | %g %g > = " % case[:-1]) print(" (calc), (expected) %.16g %.16g" % (my_cg, test_cg)) numpy.testing.assert_approx_equal(test_cg, my_cg, 14) # test permutation symmetry: perm_cg = cg.cg(j2,j1,m2,m1,j,m) numpy.testing.assert_approx_equal( (-1)**(j-j1-j2)*test_cg, perm_cg, 14)
def test5(): x = 'A[Mastering complex systems]\ B[Enhance requirements engineering methods] B->A\ C[Methods evolutivity] C->B\ D[Extended enterprise] D->B\ E[User-friendliness] E->B\ F[Take into account certain NFRs] F->B\ G[Enhance architectural design methods] G->A\ H[Take zigzags into account (?)] H->G H->B\ FLOSS->C\ Extendability->C\ K[Collaboration support] K->D K->E\ L[Viewpoint-based language] L->E L->F\ [Requirements evolution]->B\ [Validate requirements as early as possible]->B\ [Safe method]->B' #x = 'A B A->B' x = 'A B C D A->B C->D A->C B->D' mygraph = cg.cg(x,{}) mygraph.layout(10,100) mygraph = cg.cg(x,{}) print 'k=%s'%mygraph.get_k() for i in range (2,2): n,sum = 0,0 for j in range(10): n+=1 mygraph = cg(x,{}) sum += mygraph.layout(i,40) print '%s %s'%(i,sum/n)
def solve(self, prev_positions=None): Csize = 2 * self.i_indices.size # add damping self.rows.append(self.base_row_idx + np.arange(Csize)) self.cols.append(np.arange(Csize)) self.lvals.append(np.ones(Csize) * 0.001) self.rvals.append(np.zeros(Csize)) x0 = np.hstack((v.ravel() for v in prev_positions)) # build matrix rows = np.hstack([r.ravel() for r in self.rows]) cols = np.hstack([c.ravel() for c in self.cols]) lvals = np.hstack([l.ravel() for l in self.lvals]) rvals = np.hstack([r.ravel() for r in self.rvals]) Rsize = rows.max() + 1 M = sparse.coo_matrix((lvals, (rows, cols)), (Rsize, Csize)).tocsr() t0 = time.time() # V = linalg.lsqr(M.T * M, M.T * rvals.reshape((-1, 1)), show=False, damp=0.001) V = cg.cg(M.T * M, M.T * rvals.reshape((-1, 1)), x0=None) # print " solved in", time.time() - t0, "NNZ", M.nnz, (M.T * M).nnz new_ivals = V[0][:self.i_indices.size].reshape(self.i_indices.shape) new_jvals = V[0][self.i_indices.size:].reshape(self.i_indices.shape) return new_ivals, new_jvals
def test_connectors(): n,ok=0,0 for i in set_connectors.keys(): item = cg.cg(i) n+=1 if item.get_connectors() != set_connectors[i]: ok += 1 print 'Item %d %s Computed:%s|Expected:%s|'%(n, i, item.get_connectors(),set_connectors[i]) print ('Test OK (%d cases)'%n if ok==0 else 'Test KO on %d tests'%ok) return n
def test_syntax(): n,ok=0,0 for i in set_syntax.keys(): item = cg.cg(i) n+=1 value = item.check(i) if value != set_syntax[i]: ok += 1 print 'Item %d %s Computed:%s|Expected:%s|'%(n, i, value,set_syntax[i]) print ('Test OK (%d cases)'%n if ok==0 else 'Test KO on %d tests'%ok) return n
def main(): if len(sys.argv) < 3: print "Usage python proc_scoreboard.py cg_file rsa_file" return AAlist = ['A','C','D','E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y'] cgfile = sys.argv[1] nafile = sys.argv[2] outfile = cgfile+'.nascore' print 'loading %s, %s' % (cgfile, nafile) na = naccess(nafile) alphabet = expandVars([AAlist, na.alphabet]) #print repr(alphabet) cgs = [cg(line.strip(), alphabet) for line in open(cgfile)] fo = open(outfile, 'w') for c in cgs: if len(c.AAgroup) > 1: fo.write(c.nascore(na)+'\n') fo.close()
def main(): if len(sys.argv) < 3: print "Usage python proc_nvboard.py cg_file rsa_file" print "python proc_nvboard.py 1k2p.tip.hcg 1k2p.rsa" print "output: 1k2p.tip.hcg.nvscore" return AAlist = ['A','C','D','E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y'] cgfile = sys.argv[1] nafile = sys.argv[2] outfile = cgfile+'.nvscore' #print 'loading %s, %s' % (cgfile, nafile) na = naccess(nafile) cgs = [cg(line.strip(), '') for line in open(cgfile)] fo = open(outfile, 'w') for c in cgs: if len(c.AAgroup) > 1: fo.write(c.nvscore(na)+'\n') fo.close() print 'finish writing %s.' % outfile
def poisson(f_rhs,f_dist,h0,pts,tri,*args,**kwargs): """Solve Poisson's equation on a domain D by the FE method: - Laplacian u = f_rhs on D and u = 0 on boundary of D. The right-hand side is f = f_rhs(pts,*args). We use a triangulation described by points pts, triangles tri, a mesh scale h0, and a signed distance function f_dist(pts,*args); see distmesh2d.py. Returns uh = approximate solution value at pts inside = index of interior point (or -1 if not interior) See fem_examples.py for examples. """ announce = kwargs.get('announce',False) geps = 0.001 * h0 ii = (f_dist(pts, *args) < -geps) # boolean array for interior nodes Npts = np.shape(pts)[0] # = number of nodes N = ii.sum() # = number of *interior* nodes if announce: print " poisson: assembling on mesh with %d nodes and %d interior nodes" \ % (Npts,N) inside = np.zeros(Npts,dtype=np.int32) # index only the interior nodes count = 0 for j in range(Npts): if ii[j]: inside[j] = count count = count + 1 else: inside[j] = -1 # eval f_rhs once for each node ff = np.zeros(Npts) for j in range(Npts): ff[j] = f_rhs(pts[j], *args) # loop over triangles to set up stiffness matrix A and load vector b # NOTE: not using sparse matrices at all A = np.zeros((N,N)) b = np.zeros(N) for n in range(np.shape(tri)[0]): # loop over triangles # indices, coordinates, and Jacobian of triangle j, k, l = tri[n,:] vj = inside[j] vk = inside[k] vl = inside[l] Jac = np.array([[ pts[k,0] - pts[j,0], pts[l,0] - pts[j,0] ], [ pts[k,1] - pts[j,1], pts[l,1] - pts[j,1] ]]) ar = abs(det2(Jac))/2.0 C = ar/12.0 Q = inv2(np.dot(Jac.transpose(),Jac)) fT = np.array([ff[j], ff[k], ff[l]]) # add triangle's contribution to linear system A x = b if ii[j]: A[vj,vj] += ar * np.sum(Q) b[vj] += C * np.dot(fT, np.array([2,1,1])) if ii[k]: A[vk,vk] += ar * Q[0,0] b[vk] += C * np.dot(fT, np.array([1,2,1])) if ii[l]: A[vl,vl] += ar * Q[1,1] b[vl] += C * np.dot(fT, np.array([1,1,2])) if ii[j] & ii[k]: A[vj,vk] -= ar * np.sum(Q[:,0]) A[vk,vj] = A[vj,vk] if ii[j] & ii[l]: A[vj,vl] -= ar * np.sum(Q[:,1]) A[vl,vj] = A[vj,vl] if ii[k] & ii[l]: A[vk,vl] += ar * Q[0,1] A[vl,vk] = A[vk,vl] if announce: print " poisson: solving linear system A uh = b using cg (N=%d unknowns)" % N uh = np.zeros(Npts) # solve by cg including (weak) test of positive definiteness uh[ii], iters, r = cg(A,b,np.zeros(np.shape(b)),tol=1.0e-4,h=h0,test=True) #from numpy.linalg import solve #uh[ii] = solve(A,b) if (announce & (not(kwargs.get('getsys',False)))): if np.dot(b,b) > 0.0: err = np.sqrt(np.dot(r,r) / np.dot(b,b)) print " poisson: cg did %d iterations to get |r|/|b| = %.4e" % (iters,err) else: print " poisson: cg did %d iterations" % iters if kwargs.get('getsys',False): return uh, inside, A, b else: return uh, inside
def compute_flow(im1, im2, previous_flow=None, average_derivs=True, flow_iters=10, alpha=1.0): # See Ce Liu's thesis, appendix A for notation assert im1.shape == im2.shape, "mismatch" + str(im1.shape) + " " + str(im2.shape) # compute image derivatives Ix, Iy = derivs(im2) # warp im2 and derivs by existing flow if previous_flow is not None: cur_flow = previous_flow.resize(im1.shape) I2 = warp(im2, cur_flow) Ix = warp(Ix, cur_flow) Iy = warp(Iy, cur_flow) else: cur_flow = Flow(im1.shape) I2 = im2 # Average derivatives if average_derivs: temp_Ix, temp_Iy = derivs(im1) Ix = (Ix + temp_Ix) / 2.0 Iy = (Iy + temp_Iy) / 2.0 # temporal derivative Iz = I2 - im1 print "Median Abs error", np.median(np.abs(Iz)) # mask nonoverlapping areas Iz[np.isnan(Iz)] = 0 Ix[np.isnan(Ix)] = 0 Iy[np.isnan(Iy)] = 0 # setup Dx, Dy = deriv_operators(im1.shape) Ix = vectorize(Ix) Iy = vectorize(Iy) Iz = vectorize(Iz) U = vectorize(cur_flow.u) V = vectorize(cur_flow.v) dU = np.zeros_like(U) dV = np.zeros_like(V) prev_x = None for i in range(flow_iters): # Compute data and spatial weighting terms g = (Dx * (U + dU)) ** 2 + (Dy * (U + dU)) ** 2 + (Dx * (V + dV)) ** 2 + (Dy * (V + dV)) ** 2 f = (Iz + Ix * dU + Iy * dV) ** 2 Phi = phi_prime(g) Psi = psi_prime(f) L = Dx.T * diag(Phi) * Dx + Dy.T * diag(Phi) * Dy UL = diag(Psi * (Ix ** 2)) + alpha * L UR = LL = diag(Psi * Ix * Iy) LR = diag(Psi * (Iy ** 2)) + alpha * L A = s_vstack((s_hstack((UL, UR)), s_hstack((LL, LR)))).tocsc() di = A[range(A.shape[0]), range(A.shape[0])].A.ravel() di[di == 0] = 1.0 preA = sparse.diags(1.0 / di, 0) bU = Psi * Ix * Iz + alpha * L * U bL = Psi * Iy * Iz + alpha * L * V b = - np.vstack((bU, bL)) x, st = cg(A, b, x0=prev_x, M=preA, tol=0.05 / np.linalg.norm(b)) print i, np.median(np.abs(x)), st dU = x[:dU.shape[0]].reshape(dU.shape) dV = x[dU.shape[0]:].reshape(dU.shape) prev_x = x if st <= 3 and i > 1: break cur_flow.u += dU.reshape(cur_flow.u.shape) cur_flow.v += dV.reshape(cur_flow.v.shape) cur_flow.u = cv2.medianBlur(cur_flow.u, 5) cur_flow.v = cv2.medianBlur(cur_flow.v, 5) return cur_flow
} } # for every vector ... for pair in b_dict: # ... extract values from dict ... b_value = b_dict[pair]['b_value'] plot_title = b_dict[pair]['title'] save_name = b_dict[pair]['save_name'] print(plot_title) # ... and do some magic t1 = time.monotonic() x_0, R = cg(A, b_value) t2 = time.monotonic() print('{:.4f} seconds with own cg'.format(t2 - t1)) # get the lengths of the residuum and the error R_norm = calc_residuum(R) E = calc_errors(R, A) # plot the results plot_residuum(R_norm, title=plot_title, save_name=save_name) plot_errors(E, title=plot_title, save_name=save_name) plot_residuum_and_errors(R_norm, E, title=plot_title, save_name=save_name)
def learn(env, policy_func, reward_giver, reward_guidance, expert_dataset, rank, pretrained, pretrained_weight, *, g_step, d_step, entcoeff, save_per_iter, ckpt_dir, log_dir, timesteps_per_batch, task_name, gamma, lam, algo, max_kl, cg_iters, cg_damping=1e-2, vf_stepsize=3e-4, d_stepsize=1e-4, vf_iters=3, max_timesteps=0, max_episodes=0, max_iters=0, loss_percent=0.0, callback=None): nworkers = MPI.COMM_WORLD.Get_size() rank = MPI.COMM_WORLD.Get_rank() np.set_printoptions(precision=3) # Setup losses and stuff # ---------------------------------------- ob_space = env.observation_space ac_space = env.action_space policy = build_policy(env, 'mlp', value_network='copy') ob = observation_placeholder(ob_space) with tf.variable_scope('pi'): pi = policy(observ_placeholder=ob) with tf.variable_scope('oldpi'): oldpi = policy(observ_placeholder=ob) atarg = tf.placeholder( dtype=tf.float32, shape=[None]) # Target advantage function (if applicable) ret = tf.placeholder(dtype=tf.float32, shape=[None]) # Empirical return ac = pi.pdtype.sample_placeholder([None]) kloldnew = oldpi.pd.kl(pi.pd) ent = pi.pd.entropy() meankl = tf.reduce_mean(kloldnew) meanent = tf.reduce_mean(ent) entbonus = entcoeff * meanent vferr = tf.reduce_mean(tf.square(pi.vf - ret)) ratio = tf.exp(pi.pd.logp(ac) - oldpi.pd.logp(ac)) # advantage * pnew / pold surrgain = tf.reduce_mean(ratio * atarg) optimgain = surrgain + entbonus losses = [optimgain, meankl, entbonus, surrgain, meanent] loss_names = ["optimgain", "meankl", "entloss", "surrgain", "entropy"] dist = meankl all_var_list = get_trainable_variables('pi') # var_list = [v for v in all_var_list if v.name.startswith("pi/pol") or v.name.startswith("pi/logstd")] # vf_var_list = [v for v in all_var_list if v.name.startswith("pi/vff")] var_list = get_pi_trainable_variables("pi") vf_var_list = get_vf_trainable_variables("pi") # assert len(var_list) == len(vf_var_list) + 1 d_adam = MpiAdam(reward_giver.get_trainable_variables()) guidance_adam = MpiAdam(reward_guidance.get_trainable_variables()) vfadam = MpiAdam(vf_var_list) get_flat = U.GetFlat(var_list) set_from_flat = U.SetFromFlat(var_list) klgrads = tf.gradients(dist, var_list) flat_tangent = tf.placeholder(dtype=tf.float32, shape=[None], name="flat_tan") shapes = [var.get_shape().as_list() for var in var_list] start = 0 tangents = [] for shape in shapes: sz = U.intprod(shape) tangents.append(tf.reshape(flat_tangent[start:start + sz], shape)) start += sz gvp = tf.add_n([ tf.reduce_sum(g * tangent) for (g, tangent) in zipsame(klgrads, tangents) ]) # pylint: disable=E1111 fvp = U.flatgrad(gvp, var_list) assign_old_eq_new = U.function( [], [], updates=[ tf.assign(oldv, newv) for (oldv, newv) in zipsame(get_variables('oldpi'), get_variables('pi')) ]) compute_losses = U.function([ob, ac, atarg], losses) compute_lossandgrad = U.function([ob, ac, atarg], losses + [U.flatgrad(optimgain, var_list)]) compute_fvp = U.function([flat_tangent, ob, ac, atarg], fvp) compute_vflossandgrad = U.function([ob, ret], U.flatgrad(vferr, vf_var_list)) @contextmanager def timed(msg): if rank == 0: print(colorize(msg, color='magenta')) tstart = time.time() yield print( colorize("done in %.3f seconds" % (time.time() - tstart), color='magenta')) else: yield def allmean(x): assert isinstance(x, np.ndarray) out = np.empty_like(x) MPI.COMM_WORLD.Allreduce(x, out, op=MPI.SUM) out /= nworkers return out U.initialize() th_init = get_flat() MPI.COMM_WORLD.Bcast(th_init, root=0) set_from_flat(th_init) d_adam.sync() guidance_adam.sync() vfadam.sync() if rank == 0: print("Init param sum", th_init.sum(), flush=True) # Prepare for rollouts # ---------------------------------------- seg_gen = traj_segment_generator(pi, env, reward_giver, reward_guidance, timesteps_per_batch, stochastic=True, algo=algo, loss_percent=loss_percent) episodes_so_far = 0 timesteps_so_far = 0 iters_so_far = 0 tstart = time.time() lenbuffer = deque(maxlen=40) # rolling buffer for episode lengths rewbuffer = deque(maxlen=40) # rolling buffer for episode rewards true_rewbuffer = deque(maxlen=40) assert sum([max_iters > 0, max_timesteps > 0, max_episodes > 0]) == 1 g_loss_stats = stats(loss_names) d_loss_stats = stats(reward_giver.loss_name) ep_stats = stats(["True_rewards", "Rewards", "Episode_length"]) # if provide pretrained weight if pretrained_weight is not None: U.load_state(pretrained_weight, var_list=pi.get_variables()) while True: if callback: callback(locals(), globals()) if max_timesteps and timesteps_so_far >= max_timesteps: break elif max_episodes and episodes_so_far >= max_episodes: break elif max_iters and iters_so_far >= max_iters: break # Save model # if rank == 0 and iters_so_far % save_per_iter == 0 and ckpt_dir is not None: # fname = os.path.join(ckpt_dir, task_name) # os.makedirs(os.path.dirname(fname), exist_ok=True) # saver = tf.train.Saver() # saver.save(tf.get_default_session(), fname) logger.log("********** Iteration %i ************" % iters_so_far) # global flag_render # if iters_so_far > 0 and iters_so_far % 10 ==0: # flag_render = True # else: # flag_render = False def fisher_vector_product(p): return allmean(compute_fvp(p, *fvpargs)) + cg_damping * p # ------------------ Update G ------------------ logger.log("Optimizing Policy...") for _ in range(g_step): with timed("sampling"): seg = seg_gen.__next__() print('rewards', seg['rew']) add_vtarg_and_adv(seg, gamma, lam) # ob, ac, atarg, ret, td1ret = map(np.concatenate, (obs, acs, atargs, rets, td1rets)) ob, ac, atarg, tdlamret = seg["ob"], seg["ac"], seg["adv"], seg[ "tdlamret"] vpredbefore = seg[ "vpred"] # predicted value function before udpate atarg = (atarg - atarg.mean()) / atarg.std( ) # standardized advantage function estimate if hasattr(pi, "ob_rms"): pi.ob_rms.update(ob) # update running mean/std for policy args = seg["ob"], seg["ac"], atarg fvpargs = [arr[::5] for arr in args] assign_old_eq_new( ) # set old parameter values to new parameter values with timed("computegrad"): *lossbefore, g = compute_lossandgrad(*args) lossbefore = allmean(np.array(lossbefore)) g = allmean(g) if np.allclose(g, 0): logger.log("Got zero gradient. not updating") else: with timed("cg"): stepdir = cg(fisher_vector_product, g, cg_iters=cg_iters, verbose=rank == 0) assert np.isfinite(stepdir).all() shs = .5 * stepdir.dot(fisher_vector_product(stepdir)) lm = np.sqrt(shs / max_kl) # logger.log("lagrange multiplier:", lm, "gnorm:", np.linalg.norm(g)) fullstep = stepdir / lm expectedimprove = g.dot(fullstep) surrbefore = lossbefore[0] stepsize = 1.0 thbefore = get_flat() for _ in range(10): thnew = thbefore + fullstep * stepsize set_from_flat(thnew) meanlosses = surr, kl, *_ = allmean( np.array(compute_losses(*args))) improve = surr - surrbefore logger.log("Expected: %.3f Actual: %.3f" % (expectedimprove, improve)) if not np.isfinite(meanlosses).all(): logger.log("Got non-finite value of losses -- bad!") elif kl > max_kl * 1.5: logger.log("violated KL constraint. shrinking step.") elif improve < 0: logger.log("surrogate didn't improve. shrinking step.") else: logger.log("Stepsize OK!") break stepsize *= .5 else: logger.log("couldn't compute a good step") set_from_flat(thbefore) if nworkers > 1 and iters_so_far % 20 == 0: paramsums = MPI.COMM_WORLD.allgather( (thnew.sum(), vfadam.getflat().sum())) # list of tuples assert all( np.allclose(ps, paramsums[0]) for ps in paramsums[1:]) with timed("vf"): for _ in range(vf_iters): for (mbob, mbret) in dataset.iterbatches( (seg["ob"], seg["tdlamret"]), include_final_partial_batch=False, batch_size=128): if hasattr(pi, "ob_rms"): pi.ob_rms.update( mbob) # update running mean/std for policy g = allmean(compute_vflossandgrad(mbob, mbret)) vfadam.update(g, vf_stepsize) g_losses = meanlosses for (lossname, lossval) in zip(loss_names, meanlosses): logger.record_tabular(lossname, lossval) logger.record_tabular("ev_tdlam_before", explained_variance(vpredbefore, tdlamret)) # ------------------ Update D ------------------ logger.log("Optimizing Discriminator...") logger.log(fmt_row(13, reward_giver.loss_name)) ob_expert, ac_expert = expert_dataset.get_next_batch( batch_size=len(ob)) batch_size = 128 d_losses = [ ] # list of tuples, each of which gives the loss for a minibatch with timed("Discriminator"): for (ob_batch, ac_batch) in dataset.iterbatches( (ob, ac), include_final_partial_batch=False, batch_size=batch_size): ob_expert, ac_expert = expert_dataset.get_next_batch( batch_size=batch_size) # update running mean/std for reward_giver if hasattr(reward_giver, "obs_rms"): reward_giver.obs_rms.update( np.concatenate((ob_batch, ob_expert), 0)) *newlosses, g = reward_giver.lossandgrad(ob_batch, ob_expert) d_adam.update(allmean(g), d_stepsize) d_losses.append(newlosses) logger.log(fmt_row(13, np.mean(d_losses, axis=0))) # ------------------ Update Guidance ------------ logger.log("Optimizing Guidance...") logger.log(fmt_row(13, reward_guidance.loss_name)) batch_size = 128 guidance_losses = [ ] # list of tuples, each of which gives the loss for a minibatch with timed("Guidance"): for ob_batch, ac_batch in dataset.iterbatches( (ob, ac), include_final_partial_batch=False, batch_size=batch_size): ob_expert, ac_expert = expert_dataset.get_next_batch( batch_size=batch_size) idx_condition = process_expert(ob_expert, ac_expert) pick_idx = (idx_condition >= loss_percent) # pick_idx = idx_condition ob_expert_p = ob_expert[pick_idx] ac_expert_p = ac_expert[pick_idx] ac_batch_p = [] for each_ob in ob_expert_p: tmp_ac, _, _, _ = pi.step(each_ob, stochastic=True) ac_batch_p.append(tmp_ac) # update running mean/std for reward_giver if hasattr(reward_guidance, "obs_rms"): reward_guidance.obs_rms.update(ob_expert_p) # reward_guidance.train(expert_s=ob_batch_p, agent_a=ac_batch_p, expert_a=ac_expert_p) *newlosses, g = reward_guidance.lossandgrad( ob_expert_p, ac_batch_p, ac_expert_p) guidance_adam.update(allmean(g), d_stepsize) guidance_losses.append(newlosses) logger.log(fmt_row(13, np.mean(guidance_losses, axis=0))) lrlocal = (seg["ep_lens"], seg["ep_rets"], seg["ep_true_rets"] ) # local values listoflrpairs = MPI.COMM_WORLD.allgather(lrlocal) # list of tuples lens, rews, true_rets = map(flatten_lists, zip(*listoflrpairs)) true_rewbuffer.extend(true_rets) lenbuffer.extend(lens) rewbuffer.extend(rews) logger.record_tabular("EpLenMean", np.mean(lenbuffer)) logger.record_tabular("EpRewMean", np.mean(rewbuffer)) logger.record_tabular("EpTrueRewMean", np.mean(true_rewbuffer)) logger.record_tabular("EpThisIter", len(lens)) episodes_so_far += len(lens) timesteps_so_far += sum(lens) * g_step iters_so_far += 1 logger.record_tabular("EpisodesSoFar", episodes_so_far) logger.record_tabular("TimestepsSoFar", timesteps_so_far) logger.record_tabular("TimeElapsed", time.time() - tstart) if rank == 0: logger.dump_tabular() if ckpt_dir is not None: print('saving...') fname = os.path.join(ckpt_dir, task_name) os.makedirs(os.path.dirname(fname), exist_ok=True) pi.save(fname) print('save completely and the path:', fname)
Actor_Learning_rate, Critic_Learning_rate, Tau, trajectory_number=100, update_epoach=50) if args.obj == 'quadratic': obj = Quadratic(dim) elif args.obj == 'logistic': obj = Logistic(dim, X, Y) elif args.obj == 'ackley': obj = Ackley(dim) elif args.obj == 'neural': obj = NeuralNet(dim, X, Y, **kwargs) cg_x, cg_y, _, cg_iter, _ = cg(obj, x0=init_point, maxiter=max_iter) print('CG method:\n optimal point: {0}, optimal value: {1}, iterations {2}'. format(cg_x, cg_y, cg_iter)) sd_x, sd_y, _, sd_iter, _ = sd(obj, x0=init_point, maxiter=max_iter) print('SD method:\n optimal point: {0}, optimal value: {1}, iterations {2}'. format(sd_x, sd_y, sd_iter)) bfgs_x, bfgs_y, _, bfgs_iter, _ = quasiNewton(obj, x0=init_point, maxiter=max_iter) print('bfgs method:\n optimal point: {0}, optimal value: {1}, iterations {2}'. format(bfgs_x, bfgs_y, bfgs_iter)) if args.agent == 'naf': agent = train(naf, max_epoch, max_iter) elif args.agent == 'ddpg': agent = train(ddpg, max_epoch, max_iter)
def poisson(f_rhs, f_dist, h0, pts, tri, *args, **kwargs): """Solve Poisson's equation on a domain D by the FE method: - Laplacian u = f_rhs on D and u = 0 on boundary of D. The right-hand side is f = f_rhs(pts,*args). We use a triangulation described by points pts, triangles tri, a mesh scale h0, and a signed distance function f_dist(pts,*args); see py_distmesh2d.py. Returns uh = approximate solution value at pts inside = index of interior point (or -1 if not interior) See fem_examples.py for examples. """ announce = kwargs.get('announce', False) geps = 0.001 * h0 ii = (f_dist(pts, *args) < -geps) # boolean array for interior nodes Npts = np.shape(pts)[0] # = number of nodes N = ii.sum() # = number of *interior* nodes if announce: print (" poisson: assembling on mesh with %d nodes and %d interior nodes" \ % (Npts,N)) inside = np.zeros(Npts, dtype=np.int32) # index only the interior nodes count = 0 for j in range(Npts): if ii[j]: inside[j] = count count = count + 1 else: inside[j] = -1 # eval f_rhs once for each node ff = np.zeros(Npts) for j in range(Npts): ff[j] = f_rhs(pts[j], *args) # loop over triangles to set up stiffness matrix A and load vector b # NOTE: not using sparse matrices at all A = np.zeros((N, N)) b = np.zeros(N) for n in range(np.shape(tri)[0]): # loop over triangles # indices, coordinates, and Jacobian of triangle j, k, l = tri[n, :] vj = inside[j] vk = inside[k] vl = inside[l] Jac = np.array([[pts[k, 0] - pts[j, 0], pts[l, 0] - pts[j, 0]], [pts[k, 1] - pts[j, 1], pts[l, 1] - pts[j, 1]]]) ar = abs(det2(Jac)) / 2.0 C = ar / 12.0 Q = inv2(np.dot(Jac.transpose(), Jac)) fT = np.array([ff[j], ff[k], ff[l]]) # add triangle's contribution to linear system A x = b if ii[j]: A[vj, vj] += ar * np.sum(Q) b[vj] += C * np.dot(fT, np.array([2, 1, 1])) if ii[k]: A[vk, vk] += ar * Q[0, 0] b[vk] += C * np.dot(fT, np.array([1, 2, 1])) if ii[l]: A[vl, vl] += ar * Q[1, 1] b[vl] += C * np.dot(fT, np.array([1, 1, 2])) if ii[j] & ii[k]: A[vj, vk] -= ar * np.sum(Q[:, 0]) A[vk, vj] = A[vj, vk] if ii[j] & ii[l]: A[vj, vl] -= ar * np.sum(Q[:, 1]) A[vl, vj] = A[vj, vl] if ii[k] & ii[l]: A[vk, vl] += ar * Q[0, 1] A[vl, vk] = A[vk, vl] if announce: print( " poisson: solving linear system A uh = b using cg (N=%d unknowns)" % N) uh = np.zeros(Npts) # solve by cg including (weak) test of positive definiteness uh[ii], iters, r = cg(A, b, np.zeros(np.shape(b)), tol=1.0e-4, h=h0, test=True) #from numpy.linalg import solve #uh[ii] = solve(A,b) if (announce & (not (kwargs.get('getsys', False)))): if np.dot(b, b) > 0.0: err = np.sqrt(np.dot(r, r) / np.dot(b, b)) print(" poisson: cg did %d iterations to get |r|/|b| = %.4e" % (iters, err)) else: print(" poisson: cg did %d iterations" % iters) if kwargs.get('getsys', False): return uh, inside, A, b else: return uh, inside