Esempio n. 1
0
def main():
    args = get_args()
    args.critic_layers = literal_eval(args.critic_layers)
    args.actor_layers = literal_eval(args.actor_layers)

    if args.prosthetic:
        num_actions = 19
    else:
        num_actions = 22

    env = RunEnv2(model=args.modeldim,
                  prosthetic=args.prosthetic,
                  difficulty=args.difficulty,
                  skip_frame=3)
    env.change_model(args.modeldim, args.prosthetic, args.difficulty)
    state = env.reset(seed=42, difficulty=0)
    # obs = env.get_observation()
    d = env.get_state_desc()
    state_size = len(env.dict_to_vec(d))
    del env

    model_params = {
        'state_size': state_size,
        'num_act': num_actions,
        'gamma': 0,
        'actor_layers': args.actor_layers,
        'critic_layers': args.critic_layers,
        'actor_lr': 0,
        'critic_lr': 0,
        'layer_norm': args.layer_norm
    }

    test_agent(args, args.episodes, model_params)
Esempio n. 2
0
def submit_agent(args, model_params):

    ##########################################################

    actor_fn, params_actor, params_crit = build_model_test(**model_params)
    weights = [p.get_value() for p in params_actor]
    actor = Agent(actor_fn, params_actor, params_crit)
    actor.set_actor_weights(weights)
    if args.weights is not None:
        actor.load(args.weights)

    env = RunEnv2(model=args.modeldim,
                  prosthetic=args.prosthetic,
                  difficulty=args.difficulty,
                  skip_frame=3)

    # Settings
    remote_base = "http://grader.crowdai.org:1729"
    token = args.token
    client = Client(remote_base)

    # Create environment
    di = client.env_create(token, env_id="ProstheticsEnv")

    stat = []
    ep = 1
    ii = 0
    reward_sum = 0
    print('\n\n#################################################\n\n')
    while True:
        ii += 1
        proj = env.dict_to_vec(di)
        action = actor.act(proj)
        action += np.random.rand(len(action)) / 10.

        [di, reward, done, info] = client.env_step(action.tolist(), True)
        reward_sum += reward
        print('ep: ' + str(ep) + '  >>  step: ' + str(int(ii)) +
              '  >>  reward: ' + format(reward, '.2f') + '  \t' +
              str(int(reward_sum)) + '\t  >>  pelvis X Y Z: \t' +
              format(di['body_pos']['pelvis'][0], '.2f') + '\t' +
              format(di['body_pos']['pelvis'][1], '.2f') + '\t' +
              format(di['body_pos']['pelvis'][2], '.2f'))
        if done:
            print('\n\n#################################################\n\n')
            stat.append([ep, ii, reward_sum])
            di = client.env_reset()
            ep += 1
            ii = 0
            reward_sum = 0
            if not di:
                break
    for e in stat:
        print(e)
    print('\n\nclient.submit()\n\n')
    client.submit()
    ##########################################################
    print('\n\n#################################################\n\n')
    print('DONE\n\n')
Esempio n. 3
0
def test_agent(args, testing, num_test_episodes, model_params, weights,
               best_reward, updates, global_step, save_dir):
    env = RunEnv2(model=args.modeldim,
                  prosthetic=args.prosthetic,
                  difficulty=args.difficulty,
                  skip_frame=3)
    test_rewards_all = []
    test_pelvis_X_all = []

    train_fn, actor_fn, target_update_fn, params_actor, params_crit, actor_lr, critic_lr = build_model(
        **model_params)
    actor = Agent(actor_fn, params_actor, params_crit)
    actor.set_actor_weights(weights)
    # if args.weights is not None:
    #     actor.load(args.weights)

    for ep in range(num_test_episodes):
        seed = random.randrange(2**32 - 2)
        state = env.reset(seed=seed, difficulty=0)
        test_reward = 0
        while True:
            state = np.asarray(state, dtype='float32')
            action = actor.act(state)
            state, reward, terminal, info = env._step(action)
            test_reward += reward
            if terminal:
                break
        test_rewards_all.append(test_reward)
        test_pelvis_X_all.append(info['pelvis_X'])
    test_reward_mean = np.mean(test_rewards_all)
    mean_pelvis_X = np.mean(test_pelvis_X_all)
    std_reward = np.std(test_rewards_all)

    test_str ='global step {}; test_reward_mean: {:.2f}, test_rewards_all: {}; mean_pelvis_Xmean: {:.2f}, test_pelvis_X_all: {} '.\
        format(global_step.value, float(test_reward_mean), test_rewards_all, float(mean_pelvis_X), test_pelvis_X_all)

    print(test_str)
    try:
        with open(os.path.join(save_dir, 'test_report.log'), 'a') as f:
            f.write(test_str + '\n')
    except:
        print('#############################################')
        print('except  »  f.write(test_str )')
        print('#############################################')

    if test_reward_mean > best_reward.value or test_reward_mean > 30 * env.reward_mult:
        if test_reward_mean > best_reward.value:
            best_reward.value = test_reward_mean
        fname = os.path.join(
            save_dir,
            'weights_updates_{}_reward_{:.1f}_pelvis_X_{:.1f}.pkl'.format(
                updates.value, test_reward_mean, mean_pelvis_X))
        actor.save(fname)
    testing.value = 0
Esempio n. 4
0
def test_agent(args, testing, state_transform, num_test_episodes, model_params,
               weights, best_reward, updates, global_step, save_dir):
    env = RunEnv2(state_transform,
                  visualize=args.test,
                  integrator_accuracy=args.accuracy,
                  model=args.modeldim,
                  prosthetic=args.prosthetic,
                  difficulty=args.difficulty,
                  skip_frame=1)
    test_rewards = []

    train_fn, actor_fn, target_update_fn, params_actor, params_crit, actor_lr, critic_lr = \
        build_model(**model_params)
    actor = Agent(actor_fn, params_actor, params_crit)
    actor.set_actor_weights(weights)
    if args.weights is not None:
        actor.load(args.weights)

    for ep in range(num_test_episodes):
        seed = random.randrange(2**32 - 2)
        state = env.reset(seed=seed, difficulty=2)
        test_reward = 0
        while True:
            state = np.asarray(state, dtype='float32')
            action = actor.act(state)
            state, reward, terminal, _ = env._step(action)
            test_reward += reward
            if terminal:
                break
        test_rewards.append(test_reward)
    mean_reward = np.mean(test_rewards)
    std_reward = np.std(test_rewards)

    test_str ='global step {}; test reward mean: {:.2f}, std: {:.2f}, all: {} '.\
        format(global_step.value, float(mean_reward), float(std_reward), test_rewards)

    print(test_str)
    with open(os.path.join(save_dir, 'test_report.log'), 'a') as f:
        f.write(test_str + '\n')

    if mean_reward > best_reward.value or mean_reward > 30 * env.reward_mult:
        if mean_reward > best_reward.value:
            best_reward.value = mean_reward
        fname = os.path.join(
            save_dir, 'weights_updates_{}_reward_{:.2f}.pkl'.format(
                updates.value, mean_reward))
        actor.save(fname)
    testing.value = 0
Esempio n. 5
0
def main():
    args = get_args()
    args.critic_layers = literal_eval(args.critic_layers)
    args.actor_layers = literal_eval(args.actor_layers)

    save_dir = os.path.join('tests')
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    state_transform = NormState(args.prosthetic)
    # state_transform = StateVelCentr(obstacles_mode='standard',
    #                                 exclude_centr=True,
    #                                 vel_states=[])
    env = RunEnv2(state_transform,
                  integrator_accuracy=args.accuracy,
                  model=args.modeldim,
                  prosthetic=args.prosthetic,
                  difficulty=args.difficulty,
                  skip_frame=1)
    env.change_model(args.modeldim, args.prosthetic, args.difficulty)
    num_actions = env.get_action_space_size()
    del env

    model_params = {
        'state_size': state_transform.state_size,
        'num_act': num_actions,
        'gamma': 0,
        'actor_layers': args.actor_layers,
        'critic_layers': args.critic_layers,
        'actor_lr': 0,
        'critic_lr': 0,
        'layer_norm': args.layer_norm
    }
    actor_fn, params_actor, params_crit, actor_lr, critic_lr = \
            build_model_test(**model_params)
    actor = Agent(actor_fn, params_actor, params_crit)

    actor.load(args.weights)

    weights = [p.get_value() for p in params_actor]

    global_step = 0
    test_agent(args, state_transform, args.episodes, actor, weights,
               global_step, save_dir)
Esempio n. 6
0
def test_agent(args, num_test_episodes, model_params):
    env = RunEnv2(visualize=True,
                  model=args.modeldim,
                  prosthetic=args.prosthetic,
                  difficulty=args.difficulty,
                  skip_frame=3)
    test_rewards = []

    # train_fn, actor_fn, target_update_fn, params_actor, params_crit, actor_lr, critic_lr = build_model(**model_params)
    # actor_fn, params_actor, params_crit, actor_lr, critic_lr = build_model(**model_params)
    actor_fn, params_actor, params_crit = build_model_test(**model_params)
    weights = [p.get_value() for p in params_actor]
    actor = Agent(actor_fn, params_actor, params_crit)
    actor.set_actor_weights(weights)
    if args.weights is not None:
        actor.load(args.weights)

    for ep in range(num_test_episodes):
        seed = random.randrange(2**32 - 2)
        state = env.reset(seed=seed, difficulty=0)
        test_reward = 0
        while True:
            state = np.asarray(state, dtype='float32')
            # state = np.concatenate((state,state,state))[:390]  # ndrw tmp
            action = actor.act(state)  # ndrw tmp
            # if args.prosthetic:
            #     action = np.zeros(19)  # ndrw tmp
            # else:
            #     action = np.zeros(22)  # ndrw tmp
            state, reward, terminal, _ = env._step(action)
            test_reward += reward
            if terminal:
                break
        test_rewards.append(test_reward)
    mean_reward = np.mean(test_rewards)
    std_reward = np.std(test_rewards)

    global_step = 0
    test_str ='global step {}; test reward mean: {:.2f}, std: {:.2f}, all: {} '.\
        format(global_step.value, float(mean_reward), float(std_reward), test_rewards)

    print(test_str)
    with open(os.path.join('test_report.log'), 'a') as f:
        f.write(test_str + '\n')
Esempio n. 7
0
def test_agent(args, state_transform, num_test_episodes, actor, weights,
               global_step, save_dir):
    env = RunEnv2(state_transform,
                  visualize=True,
                  integrator_accuracy=args.accuracy,
                  model=args.modeldim,
                  prosthetic=args.prosthetic,
                  difficulty=args.difficulty,
                  skip_frame=config.skip_frames)
    test_rewards = []

    actor.set_actor_weights(weights)
    if args.weights is not None:
        actor.load(args.weights)

    for ep in range(num_test_episodes):
        seed = random.randrange(2**32 - 2)
        state = env.reset(seed=seed, difficulty=2)
        test_reward = 0
        while True:
            state = np.asarray(state, dtype='float32')
            action = actor.act(state)
            state, reward, terminal, _ = env._step(action)
            test_reward += reward
            if terminal:
                break
        test_rewards.append(test_reward)
    mean_reward = np.mean(test_rewards)
    std_reward = np.std(test_rewards)

    test_str ='global step {}; test reward mean: {:.2f}, std: {:.2f}, all: {} '.\
        format(global_step.value, float(mean_reward), float(std_reward), test_rewards)

    print(test_str)
    with open(os.path.join('test_report.log'), 'a') as f:
        f.write(test_str + '\n')
Esempio n. 8
0
def run_agent(model_params, weights, state_transform, data_queue, weights_queue,
              process, global_step, updates, best_reward, param_noise_prob, save_dir,
              max_steps=10000000):

    train_fn, actor_fn, target_update_fn, params_actor, params_crit, actor_lr, critic_lr = \
        build_model(**model_params)
    actor = Agent(actor_fn, params_actor, params_crit)
    actor.set_actor_weights(weights)

    env = RunEnv2(state_transform, max_obstacles=config.num_obstacles, skip_frame=config.skip_frames)
    random_process = OrnsteinUhlenbeckProcess(theta=.1, mu=0., sigma=.2, size=env.noutput,
                                              sigma_min=0.05, n_steps_annealing=1e6)
    # prepare buffers for data
    states = []
    actions = []
    rewards = []
    terminals = []

    total_episodes = 0
    start = time()
    action_noise = True
    while global_step.value < max_steps:
        seed = random.randrange(2**32-2)
        state = env.reset(seed=seed, difficulty=2)
        random_process.reset_states()

        total_reward = 0.
        total_reward_original = 0.
        terminal = False
        steps = 0
        
        while not terminal:
            state = np.asarray(state, dtype='float32')
            action = actor.act(state)
            if action_noise:
                action += random_process.sample()

            next_state, reward, next_terminal, info = env.step(action)
            total_reward += reward
            total_reward_original += info['original_reward']
            steps += 1
            global_step.value += 1

            # add data to buffers
            states.append(state)
            actions.append(action)
            rewards.append(reward)
            terminals.append(terminal)

            state = next_state
            terminal = next_terminal

            if terminal:
                break

        total_episodes += 1

        # add data to buffers after episode end
        states.append(state)
        actions.append(np.zeros(env.noutput))
        rewards.append(0)
        terminals.append(terminal)

        states_np = np.asarray(states).astype(np.float32)
        data = (states_np,
                np.asarray(actions).astype(np.float32),
                np.asarray(rewards).astype(np.float32),
                np.asarray(terminals),
                )
        weight_send = None
        if total_reward > best_reward.value:
            weight_send = actor.get_actor_weights()
        # send data for training
        data_queue.put((process, data, weight_send, total_reward))

        # receive weights and set params to weights
        weights = weights_queue.get()

        report_str = 'Global step: {}, steps/sec: {:.2f}, updates: {}, episode len {}, ' \
                     'reward: {:.2f}, original_reward {:.4f}; best reward: {:.2f} noise {}'. \
            format(global_step.value, 1. * global_step.value / (time() - start), updates.value, steps,
                   total_reward, total_reward_original, best_reward.value, 'actions' if action_noise else 'params')
        print(report_str)

        with open(os.path.join(save_dir, 'train_report.log'), 'a') as f:
            f.write(report_str + '\n')

        actor.set_actor_weights(weights)
        action_noise = np.random.rand() < 1 - param_noise_prob
        if not action_noise:
            set_params_noise(actor, states_np, random_process.current_sigma)

        # clear buffers
        del states[:]
        del actions[:]
        del rewards[:]
        del terminals[:]

        if total_episodes % 100 == 0:
            env = RunEnv2(state_transform, max_obstacles=config.num_obstacles, skip_frame=config.skip_frames)
Esempio n. 9
0
def main():
    args = get_args()
    args.critic_layers = literal_eval(args.critic_layers)
    args.actor_layers = literal_eval(args.actor_layers)

    # create save directory
    save_dir = os.path.join('weights', args.exp_name)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    else:
        shutil.move(save_dir, save_dir + '.backup')
        os.makedirs(save_dir)

    # state_transform = StateVelCentr(obstacles_mode='standard', exclude_centr=True, vel_states=[])
    # num_actions = 18
    # state_transform = NormState(args.prosthetic)
    if args.prosthetic:
        num_actions = 19
    else:
        num_actions = 22

    env = RunEnv2(model=args.modeldim,
                  prosthetic=args.prosthetic,
                  difficulty=args.difficulty,
                  skip_frame=3)
    env.change_model(args.modeldim, args.prosthetic, args.difficulty)
    state = env.reset(seed=42, difficulty=0)
    # obs = env.get_observation()
    d = env.get_state_desc()
    state_size = len(env.dict_to_vec(d))
    del env

    # build model
    model_params = {
        'state_size': state_size,
        'num_act': num_actions,
        'gamma': args.gamma,
        'actor_layers': args.actor_layers,
        'critic_layers': args.critic_layers,
        'actor_lr': args.actor_lr,
        'critic_lr': args.critic_lr,
        'layer_norm': args.layer_norm
    }
    print('building model')
    train_fn, actor_fn, target_update_fn, params_actor, params_crit, actor_lr, critic_lr = build_model(
        **model_params)
    actor = Agent(actor_fn, params_actor, params_crit)

    if args.weights is not None:
        actor.load(args.weights)  # set_actor_weights & set_crit_weights

    actor_lr_step = (args.actor_lr - args.actor_lr_end) / args.max_steps
    critic_lr_step = (args.critic_lr - args.critic_lr_end) / args.max_steps

    # build actor
    weights = [p.get_value() for p in params_actor]

    # build replay memory
    memory = ReplayMemory(state_size, num_actions, 5000000)

    # init shared variables
    global_step = Value('i', 0)
    updates = Value('i', 0)
    best_reward = Value('f', -1e8)
    testing = Value('i', 0)

    # init agents
    data_queue = Queue()
    workers = []
    weights_queues = []
    num_agents = args.n_threads - 2
    print('starting {} agents'.format(num_agents))
    for i in range(num_agents):
        w_queue = Queue()
        worker = Process(target=run_agent,
                         args=(args, model_params, weights, data_queue,
                               w_queue, i, global_step, updates, best_reward,
                               args.param_noise_prob, save_dir,
                               args.max_steps))
        worker.daemon = True
        worker.start()
        sleep(args.sleep)
        workers.append(worker)
        weights_queues.append(w_queue)

    prev_steps = 0
    start_save = time()
    start_test = time()
    weights_rew_to_check = []
    while global_step.value < args.max_steps:

        # get all data
        try:
            i, batch, weights_check, reward = data_queue.get_nowait()
            if weights_check is not None:
                weights_rew_to_check.append((weights_check, reward))
            weights_queues[i].put(weights)
            # add data to memory
            memory.add_samples(*batch)
        except queue.Empty:
            pass

        # training step
        # TODO: consider not training during testing model
        if len(memory) > args.start_train_steps:
            batch = memory.random_batch(args.batch_size)

            # if np.random.rand() < args.flip_prob:
            #     states, actions, rewards, terminals, next_states = batch
            #
            #     states_flip = state_transform.flip_states(states)
            #     next_states_flip = state_transform.flip_states(next_states)
            #     actions_flip = np.zeros_like(actions)
            #     actions_flip[:, :num_actions//2] = actions[:, num_actions//2:]
            #     actions_flip[:, num_actions//2:] = actions[:, :num_actions//2]
            #
            #     states_all = np.concatenate((states, states_flip))
            #     actions_all = np.concatenate((actions, actions_flip))
            #     rewards_all = np.tile(rewards.ravel(), 2).reshape(-1, 1)
            #     terminals_all = np.tile(terminals.ravel(), 2).reshape(-1, 1)
            #     next_states_all = np.concatenate((next_states, next_states_flip))
            #     batch = (states_all, actions_all, rewards_all, terminals_all, next_states_all)

            actor_loss, critic_loss = train_fn(*batch)
            updates.value += 1
            if np.isnan(actor_loss):
                raise Value('actor loss is nan')
            if np.isnan(critic_loss):
                raise Value('critic loss is nan')
            target_update_fn()
            weights = actor.get_actor_weights()

        delta_steps = global_step.value - prev_steps
        prev_steps += delta_steps

        actor_lr.set_value(
            lasagne.utils.floatX(
                max(actor_lr.get_value() - delta_steps * actor_lr_step,
                    args.actor_lr_end)))
        critic_lr.set_value(
            lasagne.utils.floatX(
                max(critic_lr.get_value() - delta_steps * critic_lr_step,
                    args.critic_lr_end)))

        # check if need to save and test
        if (time() - start_save) / 60. > args.save_period_min:
            fname = os.path.join(
                save_dir, 'weights_updates_{}.pkl'.format(updates.value))
            actor.save(fname)
            start_save = time()

        # start new test process
        weights_rew_to_check = [(w, r) for w, r in weights_rew_to_check
                                if r > best_reward.value and r > 0]
        weights_rew_to_check = sorted(weights_rew_to_check, key=lambda x: x[1])
        if ((time() - start_test) / 60. > args.test_period_min
                or len(weights_rew_to_check) > 0) and testing.value == 0:
            testing.value = 1
            print('start test')
            if len(weights_rew_to_check) > 0:
                _weights, _ = weights_rew_to_check.pop()
            else:
                _weights = weights
            worker = Process(target=test_agent,
                             args=(args, testing, args.num_test_episodes,
                                   model_params, _weights, best_reward,
                                   updates, global_step, save_dir))
            worker.daemon = True
            worker.start()
            start_test = time()

    # end all processes
    for w in workers:
        w.join()
Esempio n. 10
0
def run_agent(args,
              model_params,
              weights,
              data_queue,
              weights_queue,
              process,
              global_step,
              updates,
              best_reward,
              param_noise_prob,
              save_dir,
              max_steps=10000000):

    train_fn, actor_fn, target_update_fn, params_actor, params_crit, actor_lr, critic_lr = build_model(
        **model_params)
    actor = Agent(actor_fn, params_actor, params_crit)
    actor.set_actor_weights(weights)

    env = RunEnv2(model=args.modeldim,
                  prosthetic=args.prosthetic,
                  difficulty=args.difficulty,
                  skip_frame=config.skip_frames)
    env.spec.timestep_limit = 3000  # ndrw
    # random_process = OrnsteinUhlenbeckProcess(theta=.1, mu=0., sigma=.3, size=env.noutput, sigma_min=0.05, n_steps_annealing=1e6)

    sigma_rand = random.uniform(0.05, 0.5)
    dt_rand = random.uniform(0.002, 0.02)
    param_noise_prob = random.uniform(param_noise_prob * 0.25,
                                      min(param_noise_prob * 1.5, 1.))

    random_process = OrnsteinUhlenbeckProcess(theta=.1,
                                              mu=0.,
                                              sigma=sigma_rand,
                                              dt=dt_rand,
                                              size=env.noutput,
                                              sigma_min=0.05,
                                              n_steps_annealing=1e6)

    print('OUProcess_sigma = ' + str(sigma_rand) + '    OUProcess_dt = ' +
          str(dt_rand) + '    param_noise_prob = ' + str(param_noise_prob))

    # prepare buffers for data
    states = []
    actions = []
    rewards = []
    terminals = []

    total_episodes = 0
    start = time()
    action_noise = True
    while global_step.value < max_steps:
        seed = random.randrange(2**32 - 2)
        state = env.reset(seed=seed, difficulty=args.difficulty)
        random_process.reset_states()

        total_reward = 0.
        total_reward_original = 0.
        terminal = False
        steps = 0

        while not terminal:
            state = np.asarray(state, dtype='float32')
            action = actor.act(state)
            if action_noise:
                action += random_process.sample()

            next_state, reward, next_terminal, info = env._step(action)
            total_reward += reward
            total_reward_original += info['original_reward']
            steps += 1
            global_step.value += 1

            # add data to buffers
            states.append(state)
            actions.append(action)
            rewards.append(reward)
            terminals.append(terminal)

            state = next_state
            terminal = next_terminal

            if terminal:
                break

        total_episodes += 1

        # add data to buffers after episode end
        states.append(state)
        actions.append(np.zeros(env.noutput))
        rewards.append(0)
        terminals.append(terminal)

        states_np = np.asarray(states).astype(np.float32)
        data = (
            states_np,
            np.asarray(actions).astype(np.float32),
            np.asarray(rewards).astype(np.float32),
            np.asarray(terminals),
        )
        weight_send = None
        if total_reward > best_reward.value:
            weight_send = actor.get_actor_weights()
        # send data for training
        data_queue.put((process, data, weight_send, total_reward))

        # receive weights and set params to weights
        weights = weights_queue.get()

        # report_str = 'Global step: {}, steps/sec: {:.2f}, updates: {}, episode len: {}, pelvis_X: {:.2f}, reward: {:.2f}, original_reward {:.4f}, best reward: {:.2f}, noise: {}'. \
        #     format(global_step.value, 1. * global_step.value / (time() - start), updates.value, steps, info['pelvis_X'], total_reward, total_reward_original, best_reward.value, 'actions' if action_noise else 'params')
        # report_str = 'Global step: {}, steps/sec: {:.2f}, updates: {}, episode len: {}, pelvis_X: {:.2f}, reward: {:.2f}, best reward: {:.2f}, noise: {}'. \
        #     format(global_step.value, 1. * global_step.value / (time() - start), updates.value, steps, info['pelvis_X'], total_reward, best_reward.value, 'actions' if action_noise else 'params')
        report_str = 'Global step: {}, steps/sec: {:.2f}, updates: {}, episode len: {}, pelvis_X: {:.2f}, pelvis_Z: {:.2f}, reward: {:.2f}, best reward: {:.2f}, noise: {}'. \
            format(global_step.value, 1. * global_step.value / (time() - start), updates.value, steps, info['pelvis'][0], info['pelvis'][2], total_reward, best_reward.value, 'actions' if action_noise else 'params')
        print(report_str)

        try:
            with open(os.path.join(save_dir, 'train_report.log'), 'a') as f:
                f.write(report_str + '\n')
        except:
            print('#############################################')
            print(
                'except  »  with open(os.path.join(save_dir, train_report.log), a) as f:'
            )
            print('#############################################')

        actor.set_actor_weights(weights)
        action_noise = np.random.rand() < 1 - param_noise_prob
        if not action_noise:
            set_params_noise(actor, states_np, random_process.current_sigma)

        # clear buffers
        del states[:]
        del actions[:]
        del rewards[:]
        del terminals[:]

        if total_episodes % 100 == 0:
            env = RunEnv2(model=args.modeldim,
                          prosthetic=args.prosthetic,
                          difficulty=args.difficulty,
                          skip_frame=config.skip_frames)