Example #1
0
 def test_get_values(self):
     self.assertEqual(utils.get_values(self.table, 'arg1', [0, 2, 3]),
                      ['left', 'right', 'right'])
     self.assertEqual(utils.get_values(self.table, 'arg3', [1, 3]),
                      ['yes', 'no'])
     self.assertEqual(utils.get_values(self.table, 'arg1', []), [])
     self.assertEqual(utils.get_values(self.table, 'arg1', [9, 12]), [])
Example #2
0
 def test_get_values(self):
     self.assertEqual(utils.get_values(self.table, 'arg1', [0, 2, 3]),
                      ['left', 'right', 'right'])
     self.assertEqual(utils.get_values(self.table, 'arg3', [1, 3]),
                      ['yes', 'no'])
     self.assertEqual(utils.get_values(self.table, 'arg1', []), [])
     self.assertEqual(utils.get_values(self.table, 'arg1', [9, 12]), [])
Example #3
0
def preform_regression(test_num, trials):
    trial = 0
    accuracy = 0
    # Load csv into variable
    data = u.read_csv("final_training_dataset.csv")
    # Get required information and get predictions with regression
    watch_price = u.get_values(data[1:], u.get_index(data, 'price'))
    watch_deal = u.normalize_deal(
        u.get_values(data[1:], u.get_index(data, 'deal_type')))
    # Run Trials
    while trial <= trials:
        random_instances = get_random_instances(
            test_num, data[1:])  # Don't include header
        guessed_deals = least_squares_regression(
            watch_price, watch_deal,
            u.get_values(random_instances, u.get_index(data, 'price')), False)
        guessed_deals = u.classify_deal(guessed_deals)
        # Get actual mpg values of random instances
        actual_deals = u.get_values(random_instances,
                                    u.get_index(data, 'deal_type'))

        for i in range(len(random_instances)):
            if guessed_deals[i] == actual_deals[i]:
                accuracy += 1
        trial += 1
    return accuracy
def scatter_plot(myList, attributeX, attributeY, filename, title, xLabel,
                 yLabel):
    plt.figure(figsize=(20, 7))
    data = myList[1:]
    x = u.get_values(data, u.get_index(myList, attributeX))
    y = u.get_values(data, u.get_index(myList, attributeY))
    plt.scatter(x, y, marker='.')
    label_plt(title, xLabel, yLabel)

    plt.savefig(filename)
    plt.close()
Example #5
0
def rollout_parallel(rolloutmem, envs, actor, critic, params):
    # parallelization method_1
    episodes_rewards = []
    episode_number = []
    data = ray.get([
        rollout_sim_single_step_parallel.remote(i, envs[i],
                                                copy.deepcopy(actor),
                                                params.policy_params.horizon)
        for i in range(params.policy_params.envs_num)
    ])
    for episode in data:
        old_states, new_states, raw_actions, rewards, dones, log_probs, rollout_reward = \
            torch.Tensor(episode[0]).cuda(), torch.Tensor(episode[1]).cuda(), torch.stack(episode[2]).detach().cuda(), \
            torch.Tensor(episode[3]).cuda(), torch.Tensor(episode[4]).cuda(), torch.stack(episode[5]).detach().cuda(), \
            torch.Tensor([episode[6]]).cuda()
        gae_deltas = critic.gae_delta(old_states, new_states, rewards,
                                      params.policy_params.discount)
        advantages = get_advantage_new(
            gae_deltas, params.policy_params.discount,
            params.policy_params.lambd).detach().cuda()
        values = get_values(rewards, params.policy_params.discount).cuda()
        if len(advantages.shape) == 1: advantages = advantages[:, None]
        if len(values.shape) == 1: values = values[:, None]
        rolloutmem.append(old_states, new_states, raw_actions, rewards, dones,
                          log_probs, advantages, values)
        episodes_rewards.append(rollout_reward)
        episode_number.append(len((dones == 1).nonzero()))
    return torch.mean(
        torch.Tensor([
            episodes_rewards[i] / max(episode_number[i], 1)
            for i in range(len(envs))
        ]))
Example #6
0
def name_isexit():
    while True:
        keys = get_keys()
        for key in keys:
            dict1 = get_values(key)
            write_log(str(dict1))
            try:
                copywriting = dict1[b'copywriting'].decode('utf-8')
                rooms = [dict1[b'room'].decode('utf-8')]
            except:
                pass
            try:
                photo_path = dict1[b'photo_path'].decode('utf-8')
                if photo_path:
                    send_photo(copywriting, photo_path, rooms)
                    del_key(key)
            except:
                pass
            try:
                text = dict1[b'text'].decode('utf-8')
                if text:
                    send_text(copywriting, text, rooms)
                    del_key(key)
            except:
                pass
            time.sleep(60)
Example #7
0
def get_ridgeplot_fig(df, distance='total_distance', nvals='all'):

    clubs, xmin, xmax = utils.get_clubs(df)
    colors = n_colors('rgb(242, 139, 0)', 'rgb(206, 0, 0)', 12, colortype='rgb')
    fig = go.Figure()
    for club, color in zip(clubs, colors):
        name = utils.club_enum[club]
        array = df.groupby('club').get_group(club)[distance].values
        data = utils.get_values(array, nvals)
        fig.add_trace(go.Violin(x=data, name=name, line_color=color))

    fig.update_traces(
        orientation='h',
        side='positive',
        width=3,
        points=False,
    )

    fig.update_layout(
        margin=dict(t=30, r=10, b=10, l=10),
        xaxis_showgrid=True,
        xaxis_zeroline=False,
        showlegend=False,
        xaxis=dict(
            range=[xmin-10, xmax+20],
            tickmode='linear',
            tick0=0,
            dtick=10,
        )
    )

    return fig
Example #8
0
    def compute_healing_length( self ):

        threshold = self.healing_threshold
        if self.phi_at_infty=='vev':
            delta_Phi = self.physics.Vev - self.Phi(0)
        elif self.phi_at_infty=='zero':
            delta_Phi = - self.Phi(0)
        
        # get bracket for the healing length:
        # mesh points to the left and right of the true healing length
        r_values, Phi_values = get_values( self.Phi, output_mesh=True )
        idx = np.where( Phi_values > self.Phi(0) + threshold * delta_Phi )[0][0]
        left = r_values[idx-1]
        right = r_values[idx]

        # now find the healing length in that bracket using the Brent method
        F = lambda r : self.Phi(r) - self.Phi(0) - threshold * delta_Phi
        try:
            r_healing = brentq( F, left, right )
        except:
            r_healing = np.nan

        # rescaled and physical
        self.r_healing = r_healing
        self.R_healing = r_healing / self.mn
Example #9
0
def parallel_rollout_sim(env_name, env_number, horizon):
    envs = [gym.make(env_name) for _ in range(env_number)]
    actor = gen_actor(env_name, 512)
    critic = gen_critic(env_name, 512)
    rolloutmem = RolloutMemory(env_number * horizon, env_name)
    time_start = time.time()
    episodes_rewards = []
    data = ray.get(
        [rollout_sim_single_step_parallel.remote(i, env_name, horizon, None, None) for i in range(env_number)])
    time_end = time.time()
    for episode in data:
        old_states, new_states, raw_actions, rewards, dones, log_probs, episode_reward = \
            torch.Tensor(episode[0]).cuda(), torch.Tensor(episode[1]).cuda(), torch.stack(episode[2]).detach().cuda(), \
            torch.Tensor(episode[3]).cuda(), torch.Tensor(episode[4]).cuda(), torch.stack(episode[5]).detach().cuda(), \
            torch.Tensor([episode[6]]).cuda()
        gae_deltas = critic.gae_delta(old_states, new_states, rewards, 0.99)
        advantages = torch.Tensor(get_advantage_new(gae_deltas, 0.99, 0.95)).cuda()
        values = get_values(rewards, 0.99).cuda()
        if len(advantages.shape) == 1: advantages = advantages[:, None]
        if len(values.shape) == 1: values = values[:, None]
        rolloutmem.append(old_states, new_states, raw_actions, rewards, dones, log_probs, advantages, values)
        episodes_rewards.append(episode_reward)
    time_reformat = time.time()
    print(
        "parallel_time: {}, reformat_time: {:.3f}\nrollout_time: {:.3f}\ndata_len: {}\navgR: {:.3f}\nsaved_step_num: {}\n\n"
            .format(time_end - time_start, time_reformat - time_end, time_reformat - time_start, len(data),
                    torch.mean(torch.Tensor(episodes_rewards)), rolloutmem.offset))
    return torch.mean(torch.Tensor(episodes_rewards)), time_end - time_start
Example #10
0
	def init_from_params(self,bestfit,nwalkers,pdf='gaussian_diag',seed=None):
		self.logger.info('Setting initial values and errors from parameters.')
		values = utils.get_values(self.fitargs)
		errors = utils.get_errors(self.fitargs)
		self.fitted_values = [values[v] for v in self.fitted]
		self.fitted_errors = [errors[v] for v in self.fitted]
		self.fixed_values = {v:values[v] for v in self.fixed}
		self.reset()
Example #11
0
def precalc_logit_moments(mus, sigmas):
    keys = list(itertools.product(mus, sigmas))
    old = utils.get_values(keys, LOGIT_FNAME)
    for mu, sigma in tqdm.tqdm(keys):
        if (mu, sigma) not in old:
            avg, var = logit_norm_moments(mu, sigma)
            old[(mu, sigma)] = (avg, var)
    utils.dump_values(old, LOGIT_FNAME)
    LOGIT_CACHE.update(old)
    return old
Example #12
0
def main(args):
    numbers = get_values(args[0])
    if (len(numbers) <= 0):
        sys.exit(84)
    print(len(numbers), " element", sep="", end="")
    print("s" if len(numbers) > 1 else "")
    selection(numbers[::])
    insertion(numbers[::])
    bubble(numbers[::])
    print_res("Quicksort:", 0 if (len(numbers) <= 1) else quicksort(numbers[::])[1])
    print_res("Merge sort:", merge(numbers[::])[1])
Example #13
0
def rollout_serial(rolloutmem, envs, actor, critic, params):
    episodes_rewards = []
    # collect episodes from different environments
    for env in envs:
        old_states, new_states, raw_actions, dones, rewards, log_probs, advantages, episode_reward \
            = [], [], [], [], [], [], [], 0.
        # collect one episode from current env
        old_state = env.reset()
        for step in range(params.policy_params.horizon):
            # act one step in current environment
            action, log_prob, raw_action = actor.gen_action(
                torch.Tensor(old_state).cuda())
            assert (env.action_space.low < np.array(action)).all() and (
                np.array(action) < env.action_space.high).all()
            new_state, reward, done, info = env.step(
                action.cpu() if hasattr(action, 'cpu') else action)
            time.sleep(
                .002
            )  # check this issue: https://github.com/openai/mujoco-py/issues/340
            # record trajectory step
            old_states.append(old_state)
            new_states.append(new_state)
            raw_actions.append(raw_action.view(-1))
            rewards.append(reward)
            dones.append(done)
            log_probs.append(log_prob.view(-1))
            episode_reward += reward
            # update old observation
            old_state = new_state
            if done:
                break
        dones[-1] = True
        # reformat trajectory step
        old_states, new_states, raw_actions, rewards, dones, log_probs = \
            torch.Tensor(old_states).cuda(), torch.Tensor(new_states).cuda(), torch.stack(raw_actions).detach().cuda(), \
            torch.Tensor(rewards).cuda(), torch.Tensor(dones).cuda(), torch.stack(log_probs).detach().cuda()
        # compute loss factors
        gae_deltas = critic.gae_delta(old_states, new_states, rewards,
                                      params.policy_params.discount)
        for t in range(len(gae_deltas)):
            advantages.append(
                get_advantage(t, gae_deltas, params.policy_params.discount,
                              params.policy_params.lambd))
        advantages = torch.Tensor(advantages).cuda()
        values = get_values(rewards, params.policy_params.discount).cuda()
        if len(advantages.shape) == 1: advantages = advantages[:, None]
        if len(values.shape) == 1: values = values[:, None]
        # store epoch
        rolloutmem.append(old_states, new_states, raw_actions, rewards, dones,
                          log_probs, advantages, values)
        # record epoch reward
        episodes_rewards.append(episode_reward)
    return torch.mean(torch.Tensor(episodes_rewards))
Example #14
0
def parallel_rollout_env(envs, actor, critic, rolloutmem, horizon):
    # interact
    time_start = time.time()
    env_number = len(envs)
    old_states, new_states, raw_actions, dones, rewards, log_probs, advantages, episode_reward \
        = [], [], [], [], [], [], [], [0] * env_number
    old_state = ray.get([env.reset.remote() for env in envs])
    rolloutmem.reset()
    for step in range(horizon):
        # interact
        action, log_prob, raw_action = actor.gen_action(torch.Tensor(old_state).cuda())
        step_obs_batch = ray.get(
            [envs[i].step.remote(action[i]) for i in range(env_number)])  # new_obs, reward, done, info
        # parse interact results
        new_state = [step_obs[0] for step_obs in step_obs_batch]
        reward = [step_obs[1] for step_obs in step_obs_batch]
        done = [step_obs[2] for step_obs in step_obs_batch]
        # record parsed results
        old_states.append(old_state)
        new_states.append(new_state)
        raw_actions.append(raw_action)
        rewards.append(reward)
        dones.append(done)
        log_probs.append(log_prob)
        episode_reward = [float(reward[i]) + episode_reward[i] for i in range(len(reward))]
        # update old observation
        old_state = new_state
        if np.array(done).all():
            break
    dones[-1] = [True] * env_number
    old_states = torch.Tensor(old_states).permute(1, 0, 2).cuda()
    new_states = torch.Tensor(new_states).permute(1, 0, 2).cuda()
    raw_actions = torch.stack(raw_actions).permute(1, 0, 2).detach().cuda()
    rewards = torch.Tensor(rewards).permute(1, 0).cuda()
    dones = torch.Tensor(dones).permute(1, 0).cuda()
    log_probs = torch.stack(log_probs).permute(1, 0, 2).detach().cuda()
    gae_deltas = critic.gae_delta(old_states, new_states, rewards, .99).cuda()
    advantages = get_advantage_new(gae_deltas, .99, .95).cuda()
    values = get_values(rewards, .99).cuda()
    advantages = advantages[:, :, None]
    values = values[:, :, None]
    # rolloutmem.append(old_states, new_states, raw_actions, rewards, dones, log_probs, advantages, values)
    for i in range(env_number):
        # abandon redundant step info
        first_done = (dones[i] > 0).nonzero().min()
        rolloutmem.append(old_states[i][:first_done + 1], new_states[i][:first_done + 1],
                          raw_actions[i][:first_done + 1], rewards[i][:first_done + 1], dones[i][:first_done + 1],
                          log_probs[i][:first_done + 1], advantages[i][:first_done + 1], values[i][:first_done + 1])
    print("    rollout_time: {}".format(time.time() - time_start))
    return torch.mean(torch.Tensor(episode_reward))
Example #15
0
def get_boxplot_fig(df, distance='total_distance', axis='yaxis', nvals='all'):

    colors = utils.big_rainbow(len(df.club.unique()))

    # Get an ordered list of clubs, excluding clubs with no data
    clubs = [club for club in utils.club_enum.keys() if club in df.club.unique()]
    clubs.reverse()

    xmin = df.carry_distance.min() - 10
    xmax = df.total_distance.max() + 10

    # clubs, xmin, xmax = utils.get_clubs(df)

    fig = go.Figure()
    for club, color in zip(clubs, colors):
        values = df.groupby('club').get_group(club)[distance].values
        values = utils.get_values(values, nvals)
        name = utils.club_enum[club]
        if axis == 'yaxis':
            fig.add_trace(go.Box(y=values, name=name, marker_color=color))
        else:
            fig.add_trace(go.Box(x=values, name=name, marker_color=color))

    fig.update_layout(
        # height=300,
        margin=dict(t=30, r=10, b=10, l=10),
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1 
        ),
        showlegend=False,
    )

    # Set properties to selected axis
    axis_kwargs = {'range': [xmin, xmax], 'tickmode': 'linear', 'tick0': 0, 'dtick': 10}
    if axis == 'yaxis':
        fig.update_layout(yaxis=axis_kwargs)
    else:
        fig.update_layout(xaxis=axis_kwargs)

    return fig
Example #16
0
 def test_get_values(self):
     self.assertEqual(utils.get_values(self.table, "arg1", [0, 2, 3]), ["left", "right", "right"])
     self.assertEqual(utils.get_values(self.table, "arg3", [1, 3]), ["yes", "no"])
     self.assertEqual(utils.get_values(self.table, "arg1", []), [])
     self.assertEqual(utils.get_values(self.table, "arg1", [9, 12]), [])
Example #17
0
    def compute_derrick(self):

        D = self.physics.D

        r = Expression('x[0]', degree=self.fem.func_degree)
        # r^(D-1)
        rD = Expression('pow(x[0],D-1)', D=D, degree=self.fem.func_degree)

        mu, M = Constant(self.physics.mu), Constant(self.physics.M)
        lam = Constant(self.physics.lam)
        mn = Constant(self.mn)

        r_values, Phi_values = get_values(self.Phi, output_mesh=True)

        # the numerical value of the potential energy goes below the machine precision on
        # its biggest component after some radius (at those radii the biggest component is the vacuum energy)
        # analytically, we know the integral over that and bigger radii should be close to 0
        # to avoid integrating numerical noise (and blowing it up by r^D) we restrict integration
        # on the submesh where the potential energy is resolved within machine precision

        # find radius at which potential energy drops below machine precision
        vacuum_energy = self.physics.mu**4 / (4. * self.physics.lam)
        eV = lam / 4. * self.Phi**4 - mu**2 / 2. * self.Phi**2 + Constant(
            vacuum_energy)
        eV_values = self.physics.lam / 4. * Phi_values**4 - self.physics.mu**2 / 2. * Phi_values**2 + vacuum_energy
        eV_idx_wrong = np.where(eV_values < d.DOLFIN_EPS * vacuum_energy)[0][0]
        eV_r_wrong = r_values[eV_idx_wrong]

        # define a submesh where the potential energy density is resolved
        class eV_Resolved(SubDomain):
            def inside(self, x, on_boundary):
                return x[0] < eV_r_wrong

        eV_resolved = eV_Resolved()
        eV_subdomain = d.CellFunction('size_t', self.fem.mesh.mesh)
        eV_subdomain.set_all(0)
        eV_resolved.mark(eV_subdomain, 1)
        eV_submesh = d.SubMesh(self.fem.mesh.mesh, eV_subdomain, 1)

        # integrate potential energy density
        E_V = d.assemble(eV * rD * dx(eV_submesh))
        E_V /= self.mn**D  # get physical distances - integral now has mass dimension 4 - D

        # kinetic energy - here we are limited by the machine precision on the gradient
        # the numerical value of the field is limited by the machine precision on the VEV, which
        # we are going to use as threshold
        eK_idx_wrong = np.where(
            abs(Phi_values - self.physics.Vev) < d.DOLFIN_EPS *
            self.physics.Vev)[0][0]
        eK_r_wrong = r_values[eK_idx_wrong]

        # define a submesh where the kinetic energy density is resolved
        class eK_Resolved(SubDomain):
            def inside(self, x, on_boundary):
                return x[0] < eK_r_wrong

        eK_resolved = eK_Resolved()
        eK_subdomain = d.CellFunction('size_t', self.fem.mesh.mesh)
        eK_subdomain.set_all(0)
        eK_resolved.mark(eK_subdomain, 1)
        eK_submesh = d.SubMesh(self.fem.mesh.mesh, eK_subdomain, 1)

        # integrate kinetic energy density
        eK = Constant(0.5) * self.grad_Phi**2
        E_K = d.assemble(eK * rD * dx(eK_submesh))
        E_K /= self.mn**D  # get physical distances - integral now has mass dimension 4 - D

        # matter coupling energy
        erho = self.source.rho / M * self.Phi
        E_rho = d.assemble(
            erho * rD *
            dx)  # rescaled rho, and so the integral, has mass dimension 4 - D

        # integral terms of Derrick's theorem
        derrick1 = (D - 2.) * E_K + D * (E_V + E_rho)
        derrick4 = 2. * (D - 2.) * E_K

        # non-integral terms of Derrick's theorem - these have mass dimension 4 - D
        derrick2 = self.source.Rho_bar * self.source.Rs**D * \
                   self.Phi(self.fem.mesh.rs) / self.physics.M
        derrick3 = self.source.Rho_bar * self.source.Rs**(D+1.) * \
                   self.grad_Phi(self.fem.mesh.rs) / self.physics.M

        self.derrick = [derrick1, derrick2, derrick3, derrick4]
Example #18
0
def main():
    team_1 = input("Enter comma separated list players. Please be wary of spelling!: \n")
    team1_players = get_values(team_1)
    pretty_print(get_average(get_stats(team1_players)))
Example #19
0
def prepare_dat(varname,
                lfhist,
                lfrcp,
                run_tokeep,
                bbox,
                season,
                compute_ano=True,
                start_ano="1850-01-01T00:00:00",
                end_ano="2100-12-31T23:59:59",
                first_spatial=True,
                spatial_aggregator="mean",
                time_aggregator="mean"):
    import os
    run_rcp = [basename(f).split("_")[4] for f in lfrcp]
    run_hist = [basename(f).split("_")[4] for f in lfhist]
    fhist_tokeep = [x for (x, y) in zip(lfhist, run_hist) if y in run_tokeep]
    frcp_tokeep = [x for (x, y) in zip(lfrcp, run_rcp) if y in run_tokeep]
    f_agg = {}
    drun = {}
    file_torm = []
    print run_tokeep
    for r in run_tokeep:
        print r
        LOGGER.debug('o_O!!!')
        f_onerun_rcp = [x for (x, y) in zip(lfrcp, run_rcp) if y == r]
        f_onerun_hist = [x for (x, y) in zip(lfhist, run_hist) if y == r]
        drun[r] = sorted(f_onerun_hist + f_onerun_rcp)
        LOGGER.debug('cdo merge time')
        cdo.mergetime(input=" ".join(drun[r]),
                      output="tmp1.nc",
                      options="-b F64")
        LOGGER.debug('cdo sellonlatbox')
        cdo.sellonlatbox(bbox,
                         input="tmp1.nc",
                         output="tmp2.nc",
                         options="-b F64")
        LOGGER.debug('cdo selseason')
        cdo.select('season=' + season,
                   input="tmp2.nc",
                   output="tmp1.nc",
                   options="-b F64")
        if compute_ano:
            LOGGER.debug('cdo compute_ano')
            # maybe offer anomalies computation only on a given period with cdo seldate,startdate,endate
            cdo.ydaysub(input="tmp1.nc" + " -ydaymean -seldate," + start_ano +
                        "," + end_ano + " " + "tmp1.nc",
                        output="tmp2.nc",
                        options="-b F64")
            copyfile("tmp2.nc", "tmp1.nc")
        if first_spatial:
            LOGGER.debug('cdo first_spatial')
            strings_to_spagg(spatial_aggregator)(input="tmp1.nc",
                                                 output="tmp2.nc",
                                                 options="-b F64")
            strings_to_tagg(time_aggregator)(input="tmp2.nc",
                                             output="tmp1.nc",
                                             options="-b F64")
        else:
            LOGGER.debug('cdo first_temporal')
            strings_to_tagg(time_aggregator)(input="tmp1.nc",
                                             output="tmp2.nc",
                                             options="-b F64")
            strings_to_spagg(spatial_aggregator)(input="tmp2.nc",
                                                 output="tmp1.nc",
                                                 options="-b F64")
        f_agg[r] = "agg_" + r + ".nc"
        copyfile("tmp1.nc", f_agg[r])
        LOGGER.debug('cdo run end!!!')
        # print cdo.sinfo(input = temp_nc)
    lf_agg = [f_agg[r] for r in run_tokeep]
    time = get_time(lf_agg)
    year = [t.year for t in time]
    # add xname and yname as arguments
    if len(run_tokeep) == 1:
        var = get_values(lf_agg[0], variable=varname)
    else:
        # add xname and yname as arguments
        var = get_values(lf_agg, variable=varname)
    #print file_torm
    #for f in file_torm:
    #    os.remove(f)
    import pandas
    ps_year = pandas.Series(year)
    counts_year = ps_year.value_counts()
    var, year = map(
        list,
        zip(*[(x, y) for x, y in zip(var, year)
              if counts_year[y] == len(run_tokeep)]))
    return var, year
Example #20
0
 def process(self):
     inta = self.prop(self.weights,get_values(self.inputs))+self.bias
     act = self.act(inta)
     self.output = self.out(act)
     return
Example #21
0
def parallel_rollout_env(rolloutmem, envs, actor, critic, params):
    # parallelization method_2
    # interact
    env_number = len(envs)
    env_attributes = ray.get(envs[0].get_attributes.remote())
    old_states, new_states, raw_actions, dones, rewards, log_probs, advantages, rollout_reward, episode_number \
        = [], [], [], [], [], [], [], [0] * env_number, [0] * env_number
    old_state = ray.get([env.reset.remote() for env in envs])
    rolloutmem.reset()
    for step in range(params.policy_params.horizon
                      ):  # data shape: [env_num, data[:, ..., step_i]]
        # interact
        action, log_prob, raw_action = actor.gen_action(
            torch.Tensor(old_state).cuda())
        action = action.cuda()
        assert (env_attributes['action_high'].cuda() >= action).all() and (
            action >= env_attributes['action_low'].cuda()
        ).all(), '>> Error: action value exceeds boundary!'
        action = action.cpu()
        if env_attributes['action_type']['data_type'] is type(
                int(0)) and not env_attributes['image_obs']:
            action = action.int().tolist()
        step_obs_batch = ray.get([
            envs[i].step.remote(action[i]) for i in range(env_number)
        ])  # new_obs, reward, done, info
        # parse interact results
        new_state = [step_obs[0] for step_obs in step_obs_batch]
        reward = [step_obs[1] for step_obs in step_obs_batch]
        done = [step_obs[2] for step_obs in step_obs_batch]
        # record parsed results
        raw_action = raw_action[:, None] if len(
            raw_action.shape) < 2 else raw_action
        old_states.append(old_state)
        new_states.append(new_state)
        raw_actions.append(raw_action)
        rewards.append(reward)
        dones.append(done)
        log_probs.append(log_prob.float())
        rollout_reward = [
            rollout_reward[i] + (float(reward[i]) if done[i] is False else 0)
            for i in range(len(reward))
        ]
        episode_number = [
            float(done[i]) + episode_number[i] for i in range(len(done))
        ]
        # update old observation
        old_state = new_state
        if torch.Tensor(done).bool().all():
            break
        # for ind in [int(i) for i in list((torch.Tensor(done) == 1).nonzero())]:
        #     state = ray.get(envs[ind].reset.remote())
    dones[-1] = [True] * env_number
    # reformat collected data to episode-serial order
    if env_attributes['image_obs']:
        old_states = torch.Tensor(old_states).permute(1, 0, 2, 3, 4).cuda()
        new_states = torch.Tensor(new_states).permute(1, 0, 2, 3, 4).cuda()
        raw_actions = torch.stack(raw_actions).permute(1, 0, 2).detach().cuda()
        rewards = torch.Tensor(rewards).permute(1, 0).cuda()
        dones = torch.Tensor(dones).permute(1, 0).cuda()
        log_probs = torch.stack(log_probs).permute(1, 0,
                                                   2).detach().double().cuda()
    else:
        old_states = torch.Tensor(old_states).permute(1, 0, 2).cuda()
        new_states = torch.Tensor(new_states).permute(1, 0, 2).cuda()
        raw_actions = torch.stack(raw_actions).permute(1, 0, 2).detach().cuda()
        rewards = torch.Tensor(rewards).permute(1, 0).cuda()
        dones = torch.Tensor(dones).permute(1, 0).cuda()
        log_probs = torch.stack(log_probs).permute(1, 0,
                                                   2).detach().double().cuda()
    for i in range(env_number):
        # compute each episode length
        first_done = (dones[i] > 0).nonzero().min()
        gae_deltas = critic.gae_delta(old_states[i][:first_done + 1],
                                      new_states[i][:first_done + 1],
                                      rewards[i][:first_done + 1],
                                      params.policy_params.discount)
        advantages = get_advantage_new(
            gae_deltas, params.policy_params.discount,
            params.policy_params.lambd)[:, None].detach().cuda()
        advantages = advantages[:first_done + 1]
        advantages = (advantages - advantages.mean()) / torch.std(advantages +
                                                                  1e-6)
        values = get_values(rewards[i][:first_done + 1],
                            params.policy_params.discount)[:, None].cuda()
        # abandon redundant step info
        rolloutmem.append(
            old_states[i][:first_done + 1], new_states[i][:first_done + 1],
            raw_actions[i][:first_done + 1], rewards[i][:first_done + 1],
            dones[i][:first_done + 1], log_probs[i][:first_done + 1],
            advantages[:first_done + 1], values[:first_done + 1])
    return torch.mean(torch.Tensor(rollout_reward))