def test_get_values(self): self.assertEqual(utils.get_values(self.table, 'arg1', [0, 2, 3]), ['left', 'right', 'right']) self.assertEqual(utils.get_values(self.table, 'arg3', [1, 3]), ['yes', 'no']) self.assertEqual(utils.get_values(self.table, 'arg1', []), []) self.assertEqual(utils.get_values(self.table, 'arg1', [9, 12]), [])
def preform_regression(test_num, trials): trial = 0 accuracy = 0 # Load csv into variable data = u.read_csv("final_training_dataset.csv") # Get required information and get predictions with regression watch_price = u.get_values(data[1:], u.get_index(data, 'price')) watch_deal = u.normalize_deal( u.get_values(data[1:], u.get_index(data, 'deal_type'))) # Run Trials while trial <= trials: random_instances = get_random_instances( test_num, data[1:]) # Don't include header guessed_deals = least_squares_regression( watch_price, watch_deal, u.get_values(random_instances, u.get_index(data, 'price')), False) guessed_deals = u.classify_deal(guessed_deals) # Get actual mpg values of random instances actual_deals = u.get_values(random_instances, u.get_index(data, 'deal_type')) for i in range(len(random_instances)): if guessed_deals[i] == actual_deals[i]: accuracy += 1 trial += 1 return accuracy
def scatter_plot(myList, attributeX, attributeY, filename, title, xLabel, yLabel): plt.figure(figsize=(20, 7)) data = myList[1:] x = u.get_values(data, u.get_index(myList, attributeX)) y = u.get_values(data, u.get_index(myList, attributeY)) plt.scatter(x, y, marker='.') label_plt(title, xLabel, yLabel) plt.savefig(filename) plt.close()
def rollout_parallel(rolloutmem, envs, actor, critic, params): # parallelization method_1 episodes_rewards = [] episode_number = [] data = ray.get([ rollout_sim_single_step_parallel.remote(i, envs[i], copy.deepcopy(actor), params.policy_params.horizon) for i in range(params.policy_params.envs_num) ]) for episode in data: old_states, new_states, raw_actions, rewards, dones, log_probs, rollout_reward = \ torch.Tensor(episode[0]).cuda(), torch.Tensor(episode[1]).cuda(), torch.stack(episode[2]).detach().cuda(), \ torch.Tensor(episode[3]).cuda(), torch.Tensor(episode[4]).cuda(), torch.stack(episode[5]).detach().cuda(), \ torch.Tensor([episode[6]]).cuda() gae_deltas = critic.gae_delta(old_states, new_states, rewards, params.policy_params.discount) advantages = get_advantage_new( gae_deltas, params.policy_params.discount, params.policy_params.lambd).detach().cuda() values = get_values(rewards, params.policy_params.discount).cuda() if len(advantages.shape) == 1: advantages = advantages[:, None] if len(values.shape) == 1: values = values[:, None] rolloutmem.append(old_states, new_states, raw_actions, rewards, dones, log_probs, advantages, values) episodes_rewards.append(rollout_reward) episode_number.append(len((dones == 1).nonzero())) return torch.mean( torch.Tensor([ episodes_rewards[i] / max(episode_number[i], 1) for i in range(len(envs)) ]))
def name_isexit(): while True: keys = get_keys() for key in keys: dict1 = get_values(key) write_log(str(dict1)) try: copywriting = dict1[b'copywriting'].decode('utf-8') rooms = [dict1[b'room'].decode('utf-8')] except: pass try: photo_path = dict1[b'photo_path'].decode('utf-8') if photo_path: send_photo(copywriting, photo_path, rooms) del_key(key) except: pass try: text = dict1[b'text'].decode('utf-8') if text: send_text(copywriting, text, rooms) del_key(key) except: pass time.sleep(60)
def get_ridgeplot_fig(df, distance='total_distance', nvals='all'): clubs, xmin, xmax = utils.get_clubs(df) colors = n_colors('rgb(242, 139, 0)', 'rgb(206, 0, 0)', 12, colortype='rgb') fig = go.Figure() for club, color in zip(clubs, colors): name = utils.club_enum[club] array = df.groupby('club').get_group(club)[distance].values data = utils.get_values(array, nvals) fig.add_trace(go.Violin(x=data, name=name, line_color=color)) fig.update_traces( orientation='h', side='positive', width=3, points=False, ) fig.update_layout( margin=dict(t=30, r=10, b=10, l=10), xaxis_showgrid=True, xaxis_zeroline=False, showlegend=False, xaxis=dict( range=[xmin-10, xmax+20], tickmode='linear', tick0=0, dtick=10, ) ) return fig
def compute_healing_length( self ): threshold = self.healing_threshold if self.phi_at_infty=='vev': delta_Phi = self.physics.Vev - self.Phi(0) elif self.phi_at_infty=='zero': delta_Phi = - self.Phi(0) # get bracket for the healing length: # mesh points to the left and right of the true healing length r_values, Phi_values = get_values( self.Phi, output_mesh=True ) idx = np.where( Phi_values > self.Phi(0) + threshold * delta_Phi )[0][0] left = r_values[idx-1] right = r_values[idx] # now find the healing length in that bracket using the Brent method F = lambda r : self.Phi(r) - self.Phi(0) - threshold * delta_Phi try: r_healing = brentq( F, left, right ) except: r_healing = np.nan # rescaled and physical self.r_healing = r_healing self.R_healing = r_healing / self.mn
def parallel_rollout_sim(env_name, env_number, horizon): envs = [gym.make(env_name) for _ in range(env_number)] actor = gen_actor(env_name, 512) critic = gen_critic(env_name, 512) rolloutmem = RolloutMemory(env_number * horizon, env_name) time_start = time.time() episodes_rewards = [] data = ray.get( [rollout_sim_single_step_parallel.remote(i, env_name, horizon, None, None) for i in range(env_number)]) time_end = time.time() for episode in data: old_states, new_states, raw_actions, rewards, dones, log_probs, episode_reward = \ torch.Tensor(episode[0]).cuda(), torch.Tensor(episode[1]).cuda(), torch.stack(episode[2]).detach().cuda(), \ torch.Tensor(episode[3]).cuda(), torch.Tensor(episode[4]).cuda(), torch.stack(episode[5]).detach().cuda(), \ torch.Tensor([episode[6]]).cuda() gae_deltas = critic.gae_delta(old_states, new_states, rewards, 0.99) advantages = torch.Tensor(get_advantage_new(gae_deltas, 0.99, 0.95)).cuda() values = get_values(rewards, 0.99).cuda() if len(advantages.shape) == 1: advantages = advantages[:, None] if len(values.shape) == 1: values = values[:, None] rolloutmem.append(old_states, new_states, raw_actions, rewards, dones, log_probs, advantages, values) episodes_rewards.append(episode_reward) time_reformat = time.time() print( "parallel_time: {}, reformat_time: {:.3f}\nrollout_time: {:.3f}\ndata_len: {}\navgR: {:.3f}\nsaved_step_num: {}\n\n" .format(time_end - time_start, time_reformat - time_end, time_reformat - time_start, len(data), torch.mean(torch.Tensor(episodes_rewards)), rolloutmem.offset)) return torch.mean(torch.Tensor(episodes_rewards)), time_end - time_start
def init_from_params(self,bestfit,nwalkers,pdf='gaussian_diag',seed=None): self.logger.info('Setting initial values and errors from parameters.') values = utils.get_values(self.fitargs) errors = utils.get_errors(self.fitargs) self.fitted_values = [values[v] for v in self.fitted] self.fitted_errors = [errors[v] for v in self.fitted] self.fixed_values = {v:values[v] for v in self.fixed} self.reset()
def precalc_logit_moments(mus, sigmas): keys = list(itertools.product(mus, sigmas)) old = utils.get_values(keys, LOGIT_FNAME) for mu, sigma in tqdm.tqdm(keys): if (mu, sigma) not in old: avg, var = logit_norm_moments(mu, sigma) old[(mu, sigma)] = (avg, var) utils.dump_values(old, LOGIT_FNAME) LOGIT_CACHE.update(old) return old
def main(args): numbers = get_values(args[0]) if (len(numbers) <= 0): sys.exit(84) print(len(numbers), " element", sep="", end="") print("s" if len(numbers) > 1 else "") selection(numbers[::]) insertion(numbers[::]) bubble(numbers[::]) print_res("Quicksort:", 0 if (len(numbers) <= 1) else quicksort(numbers[::])[1]) print_res("Merge sort:", merge(numbers[::])[1])
def rollout_serial(rolloutmem, envs, actor, critic, params): episodes_rewards = [] # collect episodes from different environments for env in envs: old_states, new_states, raw_actions, dones, rewards, log_probs, advantages, episode_reward \ = [], [], [], [], [], [], [], 0. # collect one episode from current env old_state = env.reset() for step in range(params.policy_params.horizon): # act one step in current environment action, log_prob, raw_action = actor.gen_action( torch.Tensor(old_state).cuda()) assert (env.action_space.low < np.array(action)).all() and ( np.array(action) < env.action_space.high).all() new_state, reward, done, info = env.step( action.cpu() if hasattr(action, 'cpu') else action) time.sleep( .002 ) # check this issue: https://github.com/openai/mujoco-py/issues/340 # record trajectory step old_states.append(old_state) new_states.append(new_state) raw_actions.append(raw_action.view(-1)) rewards.append(reward) dones.append(done) log_probs.append(log_prob.view(-1)) episode_reward += reward # update old observation old_state = new_state if done: break dones[-1] = True # reformat trajectory step old_states, new_states, raw_actions, rewards, dones, log_probs = \ torch.Tensor(old_states).cuda(), torch.Tensor(new_states).cuda(), torch.stack(raw_actions).detach().cuda(), \ torch.Tensor(rewards).cuda(), torch.Tensor(dones).cuda(), torch.stack(log_probs).detach().cuda() # compute loss factors gae_deltas = critic.gae_delta(old_states, new_states, rewards, params.policy_params.discount) for t in range(len(gae_deltas)): advantages.append( get_advantage(t, gae_deltas, params.policy_params.discount, params.policy_params.lambd)) advantages = torch.Tensor(advantages).cuda() values = get_values(rewards, params.policy_params.discount).cuda() if len(advantages.shape) == 1: advantages = advantages[:, None] if len(values.shape) == 1: values = values[:, None] # store epoch rolloutmem.append(old_states, new_states, raw_actions, rewards, dones, log_probs, advantages, values) # record epoch reward episodes_rewards.append(episode_reward) return torch.mean(torch.Tensor(episodes_rewards))
def parallel_rollout_env(envs, actor, critic, rolloutmem, horizon): # interact time_start = time.time() env_number = len(envs) old_states, new_states, raw_actions, dones, rewards, log_probs, advantages, episode_reward \ = [], [], [], [], [], [], [], [0] * env_number old_state = ray.get([env.reset.remote() for env in envs]) rolloutmem.reset() for step in range(horizon): # interact action, log_prob, raw_action = actor.gen_action(torch.Tensor(old_state).cuda()) step_obs_batch = ray.get( [envs[i].step.remote(action[i]) for i in range(env_number)]) # new_obs, reward, done, info # parse interact results new_state = [step_obs[0] for step_obs in step_obs_batch] reward = [step_obs[1] for step_obs in step_obs_batch] done = [step_obs[2] for step_obs in step_obs_batch] # record parsed results old_states.append(old_state) new_states.append(new_state) raw_actions.append(raw_action) rewards.append(reward) dones.append(done) log_probs.append(log_prob) episode_reward = [float(reward[i]) + episode_reward[i] for i in range(len(reward))] # update old observation old_state = new_state if np.array(done).all(): break dones[-1] = [True] * env_number old_states = torch.Tensor(old_states).permute(1, 0, 2).cuda() new_states = torch.Tensor(new_states).permute(1, 0, 2).cuda() raw_actions = torch.stack(raw_actions).permute(1, 0, 2).detach().cuda() rewards = torch.Tensor(rewards).permute(1, 0).cuda() dones = torch.Tensor(dones).permute(1, 0).cuda() log_probs = torch.stack(log_probs).permute(1, 0, 2).detach().cuda() gae_deltas = critic.gae_delta(old_states, new_states, rewards, .99).cuda() advantages = get_advantage_new(gae_deltas, .99, .95).cuda() values = get_values(rewards, .99).cuda() advantages = advantages[:, :, None] values = values[:, :, None] # rolloutmem.append(old_states, new_states, raw_actions, rewards, dones, log_probs, advantages, values) for i in range(env_number): # abandon redundant step info first_done = (dones[i] > 0).nonzero().min() rolloutmem.append(old_states[i][:first_done + 1], new_states[i][:first_done + 1], raw_actions[i][:first_done + 1], rewards[i][:first_done + 1], dones[i][:first_done + 1], log_probs[i][:first_done + 1], advantages[i][:first_done + 1], values[i][:first_done + 1]) print(" rollout_time: {}".format(time.time() - time_start)) return torch.mean(torch.Tensor(episode_reward))
def get_boxplot_fig(df, distance='total_distance', axis='yaxis', nvals='all'): colors = utils.big_rainbow(len(df.club.unique())) # Get an ordered list of clubs, excluding clubs with no data clubs = [club for club in utils.club_enum.keys() if club in df.club.unique()] clubs.reverse() xmin = df.carry_distance.min() - 10 xmax = df.total_distance.max() + 10 # clubs, xmin, xmax = utils.get_clubs(df) fig = go.Figure() for club, color in zip(clubs, colors): values = df.groupby('club').get_group(club)[distance].values values = utils.get_values(values, nvals) name = utils.club_enum[club] if axis == 'yaxis': fig.add_trace(go.Box(y=values, name=name, marker_color=color)) else: fig.add_trace(go.Box(x=values, name=name, marker_color=color)) fig.update_layout( # height=300, margin=dict(t=30, r=10, b=10, l=10), legend=dict( orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1 ), showlegend=False, ) # Set properties to selected axis axis_kwargs = {'range': [xmin, xmax], 'tickmode': 'linear', 'tick0': 0, 'dtick': 10} if axis == 'yaxis': fig.update_layout(yaxis=axis_kwargs) else: fig.update_layout(xaxis=axis_kwargs) return fig
def test_get_values(self): self.assertEqual(utils.get_values(self.table, "arg1", [0, 2, 3]), ["left", "right", "right"]) self.assertEqual(utils.get_values(self.table, "arg3", [1, 3]), ["yes", "no"]) self.assertEqual(utils.get_values(self.table, "arg1", []), []) self.assertEqual(utils.get_values(self.table, "arg1", [9, 12]), [])
def compute_derrick(self): D = self.physics.D r = Expression('x[0]', degree=self.fem.func_degree) # r^(D-1) rD = Expression('pow(x[0],D-1)', D=D, degree=self.fem.func_degree) mu, M = Constant(self.physics.mu), Constant(self.physics.M) lam = Constant(self.physics.lam) mn = Constant(self.mn) r_values, Phi_values = get_values(self.Phi, output_mesh=True) # the numerical value of the potential energy goes below the machine precision on # its biggest component after some radius (at those radii the biggest component is the vacuum energy) # analytically, we know the integral over that and bigger radii should be close to 0 # to avoid integrating numerical noise (and blowing it up by r^D) we restrict integration # on the submesh where the potential energy is resolved within machine precision # find radius at which potential energy drops below machine precision vacuum_energy = self.physics.mu**4 / (4. * self.physics.lam) eV = lam / 4. * self.Phi**4 - mu**2 / 2. * self.Phi**2 + Constant( vacuum_energy) eV_values = self.physics.lam / 4. * Phi_values**4 - self.physics.mu**2 / 2. * Phi_values**2 + vacuum_energy eV_idx_wrong = np.where(eV_values < d.DOLFIN_EPS * vacuum_energy)[0][0] eV_r_wrong = r_values[eV_idx_wrong] # define a submesh where the potential energy density is resolved class eV_Resolved(SubDomain): def inside(self, x, on_boundary): return x[0] < eV_r_wrong eV_resolved = eV_Resolved() eV_subdomain = d.CellFunction('size_t', self.fem.mesh.mesh) eV_subdomain.set_all(0) eV_resolved.mark(eV_subdomain, 1) eV_submesh = d.SubMesh(self.fem.mesh.mesh, eV_subdomain, 1) # integrate potential energy density E_V = d.assemble(eV * rD * dx(eV_submesh)) E_V /= self.mn**D # get physical distances - integral now has mass dimension 4 - D # kinetic energy - here we are limited by the machine precision on the gradient # the numerical value of the field is limited by the machine precision on the VEV, which # we are going to use as threshold eK_idx_wrong = np.where( abs(Phi_values - self.physics.Vev) < d.DOLFIN_EPS * self.physics.Vev)[0][0] eK_r_wrong = r_values[eK_idx_wrong] # define a submesh where the kinetic energy density is resolved class eK_Resolved(SubDomain): def inside(self, x, on_boundary): return x[0] < eK_r_wrong eK_resolved = eK_Resolved() eK_subdomain = d.CellFunction('size_t', self.fem.mesh.mesh) eK_subdomain.set_all(0) eK_resolved.mark(eK_subdomain, 1) eK_submesh = d.SubMesh(self.fem.mesh.mesh, eK_subdomain, 1) # integrate kinetic energy density eK = Constant(0.5) * self.grad_Phi**2 E_K = d.assemble(eK * rD * dx(eK_submesh)) E_K /= self.mn**D # get physical distances - integral now has mass dimension 4 - D # matter coupling energy erho = self.source.rho / M * self.Phi E_rho = d.assemble( erho * rD * dx) # rescaled rho, and so the integral, has mass dimension 4 - D # integral terms of Derrick's theorem derrick1 = (D - 2.) * E_K + D * (E_V + E_rho) derrick4 = 2. * (D - 2.) * E_K # non-integral terms of Derrick's theorem - these have mass dimension 4 - D derrick2 = self.source.Rho_bar * self.source.Rs**D * \ self.Phi(self.fem.mesh.rs) / self.physics.M derrick3 = self.source.Rho_bar * self.source.Rs**(D+1.) * \ self.grad_Phi(self.fem.mesh.rs) / self.physics.M self.derrick = [derrick1, derrick2, derrick3, derrick4]
def main(): team_1 = input("Enter comma separated list players. Please be wary of spelling!: \n") team1_players = get_values(team_1) pretty_print(get_average(get_stats(team1_players)))
def prepare_dat(varname, lfhist, lfrcp, run_tokeep, bbox, season, compute_ano=True, start_ano="1850-01-01T00:00:00", end_ano="2100-12-31T23:59:59", first_spatial=True, spatial_aggregator="mean", time_aggregator="mean"): import os run_rcp = [basename(f).split("_")[4] for f in lfrcp] run_hist = [basename(f).split("_")[4] for f in lfhist] fhist_tokeep = [x for (x, y) in zip(lfhist, run_hist) if y in run_tokeep] frcp_tokeep = [x for (x, y) in zip(lfrcp, run_rcp) if y in run_tokeep] f_agg = {} drun = {} file_torm = [] print run_tokeep for r in run_tokeep: print r LOGGER.debug('o_O!!!') f_onerun_rcp = [x for (x, y) in zip(lfrcp, run_rcp) if y == r] f_onerun_hist = [x for (x, y) in zip(lfhist, run_hist) if y == r] drun[r] = sorted(f_onerun_hist + f_onerun_rcp) LOGGER.debug('cdo merge time') cdo.mergetime(input=" ".join(drun[r]), output="tmp1.nc", options="-b F64") LOGGER.debug('cdo sellonlatbox') cdo.sellonlatbox(bbox, input="tmp1.nc", output="tmp2.nc", options="-b F64") LOGGER.debug('cdo selseason') cdo.select('season=' + season, input="tmp2.nc", output="tmp1.nc", options="-b F64") if compute_ano: LOGGER.debug('cdo compute_ano') # maybe offer anomalies computation only on a given period with cdo seldate,startdate,endate cdo.ydaysub(input="tmp1.nc" + " -ydaymean -seldate," + start_ano + "," + end_ano + " " + "tmp1.nc", output="tmp2.nc", options="-b F64") copyfile("tmp2.nc", "tmp1.nc") if first_spatial: LOGGER.debug('cdo first_spatial') strings_to_spagg(spatial_aggregator)(input="tmp1.nc", output="tmp2.nc", options="-b F64") strings_to_tagg(time_aggregator)(input="tmp2.nc", output="tmp1.nc", options="-b F64") else: LOGGER.debug('cdo first_temporal') strings_to_tagg(time_aggregator)(input="tmp1.nc", output="tmp2.nc", options="-b F64") strings_to_spagg(spatial_aggregator)(input="tmp2.nc", output="tmp1.nc", options="-b F64") f_agg[r] = "agg_" + r + ".nc" copyfile("tmp1.nc", f_agg[r]) LOGGER.debug('cdo run end!!!') # print cdo.sinfo(input = temp_nc) lf_agg = [f_agg[r] for r in run_tokeep] time = get_time(lf_agg) year = [t.year for t in time] # add xname and yname as arguments if len(run_tokeep) == 1: var = get_values(lf_agg[0], variable=varname) else: # add xname and yname as arguments var = get_values(lf_agg, variable=varname) #print file_torm #for f in file_torm: # os.remove(f) import pandas ps_year = pandas.Series(year) counts_year = ps_year.value_counts() var, year = map( list, zip(*[(x, y) for x, y in zip(var, year) if counts_year[y] == len(run_tokeep)])) return var, year
def process(self): inta = self.prop(self.weights,get_values(self.inputs))+self.bias act = self.act(inta) self.output = self.out(act) return
def parallel_rollout_env(rolloutmem, envs, actor, critic, params): # parallelization method_2 # interact env_number = len(envs) env_attributes = ray.get(envs[0].get_attributes.remote()) old_states, new_states, raw_actions, dones, rewards, log_probs, advantages, rollout_reward, episode_number \ = [], [], [], [], [], [], [], [0] * env_number, [0] * env_number old_state = ray.get([env.reset.remote() for env in envs]) rolloutmem.reset() for step in range(params.policy_params.horizon ): # data shape: [env_num, data[:, ..., step_i]] # interact action, log_prob, raw_action = actor.gen_action( torch.Tensor(old_state).cuda()) action = action.cuda() assert (env_attributes['action_high'].cuda() >= action).all() and ( action >= env_attributes['action_low'].cuda() ).all(), '>> Error: action value exceeds boundary!' action = action.cpu() if env_attributes['action_type']['data_type'] is type( int(0)) and not env_attributes['image_obs']: action = action.int().tolist() step_obs_batch = ray.get([ envs[i].step.remote(action[i]) for i in range(env_number) ]) # new_obs, reward, done, info # parse interact results new_state = [step_obs[0] for step_obs in step_obs_batch] reward = [step_obs[1] for step_obs in step_obs_batch] done = [step_obs[2] for step_obs in step_obs_batch] # record parsed results raw_action = raw_action[:, None] if len( raw_action.shape) < 2 else raw_action old_states.append(old_state) new_states.append(new_state) raw_actions.append(raw_action) rewards.append(reward) dones.append(done) log_probs.append(log_prob.float()) rollout_reward = [ rollout_reward[i] + (float(reward[i]) if done[i] is False else 0) for i in range(len(reward)) ] episode_number = [ float(done[i]) + episode_number[i] for i in range(len(done)) ] # update old observation old_state = new_state if torch.Tensor(done).bool().all(): break # for ind in [int(i) for i in list((torch.Tensor(done) == 1).nonzero())]: # state = ray.get(envs[ind].reset.remote()) dones[-1] = [True] * env_number # reformat collected data to episode-serial order if env_attributes['image_obs']: old_states = torch.Tensor(old_states).permute(1, 0, 2, 3, 4).cuda() new_states = torch.Tensor(new_states).permute(1, 0, 2, 3, 4).cuda() raw_actions = torch.stack(raw_actions).permute(1, 0, 2).detach().cuda() rewards = torch.Tensor(rewards).permute(1, 0).cuda() dones = torch.Tensor(dones).permute(1, 0).cuda() log_probs = torch.stack(log_probs).permute(1, 0, 2).detach().double().cuda() else: old_states = torch.Tensor(old_states).permute(1, 0, 2).cuda() new_states = torch.Tensor(new_states).permute(1, 0, 2).cuda() raw_actions = torch.stack(raw_actions).permute(1, 0, 2).detach().cuda() rewards = torch.Tensor(rewards).permute(1, 0).cuda() dones = torch.Tensor(dones).permute(1, 0).cuda() log_probs = torch.stack(log_probs).permute(1, 0, 2).detach().double().cuda() for i in range(env_number): # compute each episode length first_done = (dones[i] > 0).nonzero().min() gae_deltas = critic.gae_delta(old_states[i][:first_done + 1], new_states[i][:first_done + 1], rewards[i][:first_done + 1], params.policy_params.discount) advantages = get_advantage_new( gae_deltas, params.policy_params.discount, params.policy_params.lambd)[:, None].detach().cuda() advantages = advantages[:first_done + 1] advantages = (advantages - advantages.mean()) / torch.std(advantages + 1e-6) values = get_values(rewards[i][:first_done + 1], params.policy_params.discount)[:, None].cuda() # abandon redundant step info rolloutmem.append( old_states[i][:first_done + 1], new_states[i][:first_done + 1], raw_actions[i][:first_done + 1], rewards[i][:first_done + 1], dones[i][:first_done + 1], log_probs[i][:first_done + 1], advantages[:first_done + 1], values[:first_done + 1]) return torch.mean(torch.Tensor(rollout_reward))