def test(env, n_episodes, policy, exp, exp_name, agent, render=True):
    for episode in range(n_episodes):
        obs = env.reset()
        state = utils.get_state(obs)
        total_reward = 0.0
        for _ in count():
            action = policy(state.to('cuda')).max(1)[1].view(1, 1)

            if render:
                env.render()
                time.sleep(0.02)

            obs, reward, done, info = env.step(action)

            total_reward += reward

            if not done:
                next_state = utils.get_state(obs)
            else:
                next_state = None

            state = next_state

            if done:
                out_str = "Finished Episode {} (test) with reward {}".format(episode, total_reward)
                exp.log(out_str)
                with open(agent.CONSTANTS.TEST_LOG_FILE_PATH, 'wt') as f:
                    f.write(out_str)
                break
    env.close()
Esempio n. 2
0
def train_model(agent, episode, data, ep_count=100, batch_size=32, window_size=10):
    data_length = len(data) - 1
    total_profit = 0
    agent.inventory = []
    avg_loss = []
    start = clock()
    state = get_state(data, 0, window_size + 1)
    for t in tqdm(range(data_length), total=data_length, leave=True, desc='Episode {}/{}'.format(episode, ep_count)):
        action = agent.act(state)
        # SIT
        next_state = get_state(data, t + 1, window_size + 1)
        reward = 0
        # BUY
        if action == 1:
            agent.inventory.append(data[t])
        # SELL
        elif action == 2 and len(agent.inventory) > 0:
            bought_price = agent.inventory.pop(0)
            reward = max(data[t] - bought_price, 0)
            total_profit += data[t] - bought_price

        done = True if t == data_length - 1 else False
        agent.remember(state, action, reward, next_state, done)
        state = next_state

        if len(agent.memory) > batch_size:
            loss = agent.train_experience_replay(batch_size)
            avg_loss.append(loss)
        if done:
            end = clock() - start

    if episode % 20 == 0:
        agent.save(episode)
    return (episode, ep_count, total_profit, np.mean(np.array(avg_loss)), end)
Esempio n. 3
0
    def play(self, env):

        print(f'[I] - Set Online Net to evaluation mode ... ', end='')
        self.online_net.eval()
        print('Done.')

        for i in range(5):

            stackedstates = StackedStates()
            env.reset()
            state = get_state(env, stackedstates, self.device)

            for t in count():
                env.render()
                # time.sleep(0.04)

                with torch.no_grad():
                    action = torch.argmax(self.online_net(state), dim=1).item()

                _, reward, done, _ = env.step(action)
                state = get_state(env, stackedstates, self.device)

                if done or t > 1000:
                    env.close()
                    break
Esempio n. 4
0
def create_components(initial, bindings):
    """From bindings list, it creates components and return a component list"""
    components = []


    for binding in bindings:
        #r, p, n = binding.require, binding.provide, binding.arity

        state = get_state(binding.provide_xpath)
        c = get_component(components, binding.provide_xpath)
        s = c.get_state(state)
        if binding.type == 'local':
            arity = 1
        else:
            arity = common.INFINITY
        s.provides.append(Provide(binding.provide_xpath, arity))

        # Secondly, we create components that require something
        state = get_state(binding.require_xpath)
        c = get_component(components, binding.require_xpath)
        s = c.get_state(state)
        s.requires.append(Require(binding.provide_xpath, binding.arity))

    # We manually add the state that contains the provide required by
    # the user. It is not necessary added via Bindings because if this
    # state doens't contain any require, it doesn't appear in
    # bindings.
    c = get_component(components, initial.xpath)
    s = c.get_state(get_state(initial.xpath))

    return components
Esempio n. 5
0
    def eval(self, env, verbose=False, display=False):
        if verbose:
            print('Start evaluation.')

        rewards = []
        for i_episode in range(self.episode):
            reward = []
            time_step = env.reset()
            state = torch.tensor([utils.get_state(time_step.observation)],
                                 device=self.device)
            while not time_step.last():
                action_ID = self.select_action(state, random_choose=False)
                time_step = env.step(self.action_space[action_ID])
                reward.append(time_step.reward)
                state = torch.tensor([utils.get_state(time_step.observation)],
                                     device=self.device)

            reward = np.mean(reward)
            rewards.append(reward)

            if verbose:
                print('Episode {} average reward: {}'.format(
                    i_episode, reward))

        if verbose:
            print('End evaluation.')

        print('Average reward: {}'.format(np.mean(rewards)))
Esempio n. 6
0
def init_reward_matrix():
    reward = []

    # set the reward of unconnected area to -1000
    for i in range(0, 27):
        row = []
        for j in range(0, 27):
            row.append(-1000)
        reward.append(row)

    # set the reward of connected states to 0
    for state in range(0, 27):  # enumerate state
        pos_s = state % 3
        pos_m = (state / 3) % 3
        pos_l = (state / 9) % 3

        for i in range(0, 3):  # small can move freely
            reward[state][get_state(i, pos_m, pos_l)] = 0

        if pos_m != pos_s:  # if middle can move
            for i in range(0, 3):
                if pos_s != i:
                    reward[state][get_state(pos_s, i, pos_l)] = 0

        if pos_l != pos_m and pos_l != pos_s:  # if large can move
            for i in range(0, 3):
                if pos_m != i and pos_s != i:
                    reward[state][get_state(pos_s, pos_m, i)] = 0

        reward[state][state] = -1000  # prevent loop

    return reward
Esempio n. 7
0
def evaluate_model(agent, data, window_size, debug):
    data_length = len(data) - 1
    state = get_state(data, 0, window_size + 1)
    total_profit = 0
    agent.inventory = []
    for t in range(data_length):
        action = agent.act(state, is_eval=True)
        # SIT
        next_state = get_state(data, t + 1, window_size + 1)
        reward = 0
        # BUY
        if action == 1:
            agent.inventory.append(data[t])
            if debug:
                logging.debug('Buy at: {}'.format(format_currency(data[t])))
        # SELL
        elif action == 2 and len(agent.inventory) > 0:
            bought_price = agent.inventory.pop(0)
            reward = max(data[t] - bought_price, 0)
            total_profit += data[t] - bought_price
            if debug:
                logging.debug('Sell at: {} | Position: {}'.format(
                    format_currency(data[t]),
                    format_position(data[t] - bought_price)))

        done = True if t == data_length - 1 else False
        agent.memory.append((state, action, reward, next_state, done))
        state = next_state

        if done:
            return total_profit
Esempio n. 8
0
    def exp_rep_pretrain(self, env):
        i = 0

        print('Pretrain Filling Experience Replay Memory ... ', end='')
        while i < self.exp_rep_pretrain_size:

            # Initialize the environment and state
            stackedstates = StackedStates()
            env.reset()
            state = get_state(env, stackedstates, self.device)

            for t in count():

                i += 1
                action = env.action_space.sample()
                _, reward, done, _ = env.step(action)

                reward = torch.tensor([reward], device=self.device)
                done = torch.tensor([done], device=self.device)
                action = torch.tensor([action], device=self.device)

                # Observe new state
                next_state = get_state(env, stackedstates, self.device)

                # Store the transition in memory
                self.memory.push(state, action, next_state, reward, done)

                if done:
                    print("{} ".format(t + 1), end='')
                    break
                else:
                    # Move to the next state
                    state = next_state
        print('Done.')
Esempio n. 9
0
def dashboard():
    if utils.needs_user():
        return flask.redirect(flask.url_for('setup'))
    if ('logged_in' not in flask.session) or (not flask.session['logged_in']):
        return flask.redirect(flask.url_for('login'))
    running = utils.check_pid(Config["pidfile"])
    tstate = utils.get_state("alarm_thread")
    tutime_s = ""
    trunning = False
    if tstate:
        trunning, tutime = tstate
        tutime_s = time.strftime("%c", time.localtime(tutime))

    thread_state = {"running": bool(trunning), "utime": tutime_s}

    utime = time.strftime("%c", time.localtime())
    state_text = "Not Runnning"
    flags = {
        "alarm": False,
        "armed": False,
        "disarmed": False,
        "tripped": False,
        "faulted": False
    }

    state_data = None

    alarm_state_d = utils.get_state("alarm")
    if alarm_state_d:
        alarm_state, state_time_i = alarm_state_d
        if alarm_state is not None:
            utime = time.strftime("%c", time.localtime(state_time_i))
            state_text = Alarm.ALARM_STATES[alarm_state]
            flags["alarm"] = alarm_state == Alarm.ALARMED
            flags["disarmed"] = alarm_state == Alarm.DISARMED
            flags["tripped"] = alarm_state == Alarm.TRIPPED
            flags["faulted"] = alarm_state == Alarm.FAULT
            flags["armed"] = alarm_state == Alarm.ARMED

    states = utils.get_states_not("alarm", "alarm_thread")
    state_data = {
        state['key']: {
            'data': state['data'],
            'time': time.strftime("%c", time.localtime(state['state_time']))
        }
        for state in states
    }

    interfaces = utils.get_interfaces()
    return flask.render_template('dashboard.j2',
                                 flags=flags,
                                 running=running,
                                 thread_state=thread_state,
                                 state_text=state_text,
                                 state_data=state_data,
                                 utime=utime,
                                 interfaces=interfaces,
                                 smbio=smbio)
Esempio n. 10
0
def dashboard():
    if utils.needs_user():
        return flask.redirect(flask.url_for('setup'))
    if ('logged_in' not in flask.session) or (not flask.session['logged_in']):
        return flask.redirect(flask.url_for('login'))
    running = utils.check_pid(Config["pidfile"])
    tstate = utils.get_state("alarm_thread")
    tutime_s = ""
    trunning = False
    if tstate:
        trunning, tutime = tstate
        tutime_s = time.strftime("%c", time.localtime(tutime))

    thread_state = {
        "running": bool(trunning),
        "utime": tutime_s}

    utime = time.strftime("%c", time.localtime())
    state_text = "Not Runnning"
    flags = {
        "alarm": False,
        "armed": False,
        "disarmed": False,
        "tripped": False,
        "faulted": False}

    state_data = None

    alarm_state_d = utils.get_state("alarm")
    if alarm_state_d:
        alarm_state, state_time_i = alarm_state_d
        if alarm_state is not None:
            utime = time.strftime("%c", time.localtime(state_time_i))
            state_text = Alarm.ALARM_STATES[alarm_state]
            flags["alarm"] = alarm_state == Alarm.ALARMED
            flags["disarmed"] = alarm_state == Alarm.DISARMED
            flags["tripped"] = alarm_state == Alarm.TRIPPED
            flags["faulted"] = alarm_state == Alarm.FAULT
            flags["armed"] = alarm_state == Alarm.ARMED

    states = utils.get_states_not("alarm", "alarm_thread")
    state_data = {
        state['key']: {
            'data': state['data'],
            'time': time.strftime("%c", time.localtime(state['state_time']))}
        for state in states}

    interfaces = utils.get_interfaces()
    return flask.render_template(
        'dashboard.j2',
        flags=flags,
        running=running,
        thread_state=thread_state,
        state_text=state_text,
        state_data=state_data,
        utime=utime,
        interfaces=interfaces,
        smbio=smbio)
Esempio n. 11
0
def get_exp_context(code):
    ctx = {
        'user_type': 'researcher',
        'today_date': datetime.now().strftime('%Y-%m-%d'),
        'experiment': Experiment.query.filter_by(code=code).first(),
        'protocols': Protocol.query.filter_by(exp_code=code).all(),
        'dashboard_page': True
    }
    ctx['show_pam'] = utils.get_state("pam", ctx['protocols'])
    ctx['show_survey'] = utils.get_state("push_survey", ctx['protocols'])
    return ctx
Esempio n. 12
0
def experiment_options(code):
    ctx = {
        'user_type': 'researcher',
        'today_date': datetime.now().strftime('%Y-%m-%d'),
        'experiment': Experiment.query.filter_by(code=code).first(),
        'protocols': Protocol.query.filter_by(exp_code=code).all(),
        'experiment_page': True
    }
    ctx['show_pam'] = utils.get_state("pam", ctx['protocols'])
    ctx['show_survey'] = utils.get_state("push_survey", ctx['protocols'])
    return render_template('experiment/create-edit-experiment.html', **ctx)
Esempio n. 13
0
File: deepq.py Progetto: shukon/SDC
def evaluate(env, load_path='agent.pt'):
    """ Evaluate a trained model and compute your leaderboard scores
	
	NO CHANGES SHOULD BE MADE TO THIS FUNCTION
	
    Parameters
    -------
    env: gym.Env
        environment to evaluate on
    load_path: str
        path to load the model (.pt) from
    """
    episode_rewards = [0.0]
    actions = get_action_set()
    action_size = len(actions)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # These are not the final evaluation seeds, do not overfit on these tracks!
    seeds = [
        22597174, 68545857, 75568192, 91140053, 86018367, 49636746, 66759182,
        91294619, 84274995, 31531469
    ]

    # Build & load network
    policy_net = DQN(action_size, device).to(device)
    checkpoint = torch.load(load_path, map_location=device)
    policy_net.load_state_dict(checkpoint)
    policy_net.eval()

    # Iterate over a number of evaluation episodes
    for i in range(10):
        env.seed(seeds[i])
        obs, done = env.reset(), False
        obs = get_state(obs)
        t = 0

        # Run each episode until episode has terminated or 600 time steps have been reached
        while not done and t < 600:
            env.render()
            action_id = select_greedy_action(obs, policy_net, action_size)
            action = actions[action_id]
            obs, rew, done, _ = env.step(action)
            obs = get_state(obs)
            episode_rewards[-1] += rew
            t += 1
        print('episode %d \t reward %f' % (i, episode_rewards[-1]))
        episode_rewards.append(0.0)

    print('---------------------------')
    print(' total score: %f' % np.mean(np.array(episode_rewards)))
    print('---------------------------')
Esempio n. 14
0
def get_non_local_provide(initial, bindings):
    """Return the list of lifecycle and state that are not local
    provide. They will be used by specification.

    :rtype: [{"component":lifecycle, "state": state}]

    """
    non_local = []
    i = initial.xpath
    non_local.append({"component": get_lifecycle(i), "state": get_state(i)})
    for b in bindings:
        if b.type == "external":
            non_local.append({"component": get_lifecycle(b.provide_xpath), "state": get_state(b.provide_xpath)})
    return non_local
Esempio n. 15
0
    def evaluate(self, env, render=False):
        success_count = 0
        for i_episode in range(self.evaluate_episode):
            obs = env.reset()
            s = get_state(obs)
            for i_step in range(self.max_timestep):
                if render:
                    env.render()
                a = self.select_action(s, train_mode=False)
                obs_, r_e, done, info = env.step(a)
                s_ = get_state(obs_)
                s = s_
            success_count += info['is_success']

        return success_count / self.evaluate_episode
Esempio n. 16
0
def _simulate_truncated(agent, node, frac_occupied=.5):
    # Rollout until (board.size ** 2) * frac_occupied positions are occupied by players' stones
    node.increment_visits()

    board = node.board.copy()
    curr_color = node.color

    while (np.sum(board.scores()) / (board.size ** 2)) < frac_occupied:
        state, valid_positions, valid_positions_mask = get_state(board, curr_color)
 
        if len(valid_positions) == 0:
            break

        action_p, _ = agent(tf.convert_to_tensor([state], dtype=tf.float32))
        action_p = action_p[0].numpy() * valid_positions_mask.reshape((-1,))
        action_idx = np.argmax(action_p)

        position_key = (int(action_idx / board.size), int(action_idx % board.size))
        board.apply_position(curr_color, valid_positions[position_key])
        curr_color = 1 - curr_color

    valid_positions = board.valid_positions(node.color)
    if len(valid_positions) == 0:
        return -_get_value_from_scores(board.scores(), node.color)

    _, v = node.get_p_v(agent)
    return -v
Esempio n. 17
0
def visualisation_plan(workspace_path):
    file_replay = workspace_path + "/" + aeolus.common.FILE_ARMONIC_REPLAY_FILLED
    file_metis_plan = workspace_path + "/" +  aeolus.common.FILE_METIS_PLAN_JSON

    with open(file_replay, 'r') as f:
        replay = json.load(f)
    with open(file_metis_plan, 'r') as f:
        metis_plan = json.load(f)

    plan = aeolus.launcher.metis_to_armonic(metis_plan, replay)

    def is_final_state(jid, cpt, plan):
        # Used to know if a component state change is the last one or not.
        for p in plan:
            if p.type == 'state-goto' and p.jid == jid and utils.get_lifecycle(p.xpath) == cpt:
                return False
        return True

    ret = []

    for idx, action in enumerate(plan):
        if type(action) == StateGoto:
            action.location = action.jid
            action.component_type = utils.get_lifecycle(action.xpath)
            action.state = utils.get_state(action.xpath)
            action.final = is_final_state(action.location, action.component_type, plan[idx+1:])
            action.last_one = (idx == len(plan) - 1)

        else:
            pass

    plan.insert(0, Start(len(plan)))
    plan.append(End())

    return plan
Esempio n. 18
0
File: modes.py Progetto: joram/steps
def mode_solid_sparkly(key, pixels):
    #####
    # To make John happy (he's worried you'll think he wrote this):
    # This function was written by Caitlin, who has not coded in rather
    # a long time. And who thinks all this formatting bullshit is exactly that.
    # But it means this code is not nearly as elegant as he would have it be.
    # And he can f*****g deal with it. <3
    ######
    state = get_state()
    primary = state.get("primary")
    sparkles = state.get("sparkles")
    primary_color = (
        primary.get("r", 255),
        primary.get("g", 255),
        primary.get("b", 255),
    )
    sparkles_color = (
        sparkles.get("r", 255),
        sparkles.get("g", 255),
        sparkles.get("b", 255),
    )

    pixels.fill(primary_color)
    selected_pixels = random.sample(range(0, 300), 10)

    while key == state_key():
        pixels[selected_pixels[0]] = primary_color
        selected_pixels.pop(0)

        new_pixel = random.randint(0, 299)
        selected_pixels.append(new_pixel)
        pixels[new_pixel] = sparkles_color

        pixels.show()
        time.sleep(.25)
Esempio n. 19
0
File: modes.py Progetto: joram/steps
def mode_chaos_colors(key, pixels):
    #####
    # A bunch of flashing colors - by Caitlin
    #####
    state = get_state()

    def rand_color():

        rand_1 = random.randint(0, 254)
        rand_2 = random.randint(0, (254 - rand_1))
        rand_3 = 254 - (rand_1 + rand_2)

        return rand_1, rand_2, rand_3

        # red = (255, 0, 0)
        # green = (0, 255, 0)
        # blue = (0, 0, 255)
        # return random.choice([red, green, blue])

    # Init by setting all to random colors
    for i in range(0, len(pixels)):
        pixels[i] = rand_color()
    time.sleep(.01)

    # Each .5s, change 10 pixels
    while key == state_key():
        selected_pixels = random.sample(range(0, 300), 20)
        for i in selected_pixels:
            pixels[i] = rand_color()
        time.sleep(.5)

        pixels.show()
Esempio n. 20
0
def get_component(components, xpath):
    name = get_lifecycle(xpath)
    
    # First, we create components that provide something
    os = [armonic.utils.OsTypeMBS(), armonic.utils.OsTypeDebian()]
    lfms = []
    for o in os:
        l = armonic.serialize.Serialize(os_type=o)
        lf = name
        state = get_state(xpath)
        state_xpath = "%s/%s" % (lf, state)
        path = l.state_goto_path(state_xpath)[0]['paths']
        if len(path) > 1:
            raise Exception("Number of paths to reach %s must not be greather than 1" % state_xpath)
        if len(path) == 1:
            lfms.append(l)

    if len(lfms) > 1:
        logger.error("%s is available the following OS:" % xpath)
        for l in lfms:
            logger.error("  %s" % l.lf_manager.os_type)
        raise Exception("%s is available on several OS and this is not supported (yet)." % xpath)
    lfm = lfms[0]

    c = None
    for i in components:
        if i.name == name:
            c = i
            break
    if c is None:
        c = Component(name, lfm)
        components.append(c)
    return c
Esempio n. 21
0
File: modes.py Progetto: joram/steps
def mode_fading(key, pixels):
    state = get_state()
    colors = state.get("colors", [
        {
            "r": 255,
            "g": 255,
            "b": 255
        },
        {
            "r": 0,
            "g": 0,
            "b": 0
        },
    ])
    c1 = _color_tuple(colors[0])
    c2 = _color_tuple(colors[1])
    i = 0
    delta = 1
    while key == state_key():
        color = _color_between(c1, c2, float(i) / 100.0)
        print(color)
        pixels.fill(color)
        pixels.show()
        time.sleep(0.01)
        if i >= 100:
            delta = -1
        if i <= 0:
            delta = 1
        i += delta
Esempio n. 22
0
File: main.py Progetto: joram/steps
def file_view():
    state = request.files
    print(state)
    import pdb
    pdb.set_trace()
    # set_state(state)
    # print(state)
    return jsonify(get_state())
Esempio n. 23
0
def show_table():
    project_id = int(request.args.get('project_id'))
    summary_dict, df = utils.get_state(project_id)
    return render_template("show_table.html",
        summary_dict = summary_dict,
        table=df.to_html(
            index=False,header="true",
            classes="display",table_id="example",border=0),
        )
Esempio n. 24
0
    def train(self, env, logger=None):
        total_step = 0
        loss_pi, loss_q, loss_forward, loss_inverse = 0., 0., 0., 0.
        for i_episode in range(self.max_episode):
            obs = env.reset()
            s = get_state(obs)

            cumulative_r = 0.
            for i_step in range(self.max_timestep):
                a = self.select_action(s)
                obs_, r_e, done, info = env.step(a)
                s_ = get_state(obs_)

                r_i = self.get_intrisic_reward(s, a, s_)
                r = r_e + r_i

                self.memory.store(s, a, r, s_)
                s = s_

                if len(self.memory) > self.batch_size:
                    loss_pi, loss_q, loss_forward, loss_inverse = self.learn()
                cumulative_r += r_e
                total_step += 1

            print(
                'i_episode: {} total step: {} cumulative reward: {:.4f} is_success: {} '
                .format(i_episode, total_step, cumulative_r,
                        info['is_success']))
            if logger is not None and i_episode % self.log_interval == 0:
                logger.add_scalar('Indicator/cumulative reward', cumulative_r,
                                  i_episode)
                logger.add_scalar('Loss/pi_loss', loss_pi, i_episode)
                logger.add_scalar('Loss/q_loss', loss_q, i_episode)
                logger.add_scalar('Loss/forward_loss', loss_forward, i_episode)
                logger.add_scalar('Loss/inverse_loss', loss_inverse, i_episode)
            if i_episode % self.evaluate_interval == 0:
                success_rate = self.evaluate(env)
                if logger is not None:
                    logger.add_scalar('Indicator/success rate', success_rate,
                                      i_episode)

            if i_episode > self.save_model_start and i_episode % self.save_model_interval == 0:
                self.save_model(remarks='{}_{}'.format(env.spec.id, i_episode))
Esempio n. 25
0
File: modes.py Progetto: joram/steps
def mode_solid(key, pixels):
    state = get_state()
    color = state.get("color")
    print(f"solid: {color}")
    pixels.fill((
        color.get("r", 255),
        color.get("g", 255),
        color.get("b", 255),
    ))
    pixels.show()
    time.sleep(5)
Esempio n. 26
0
def main():
    #time_step = 0.0002 # TODO: context.get_continuous_state_vector() fails
    time_step = 2e-3

    parser = argparse.ArgumentParser()
    parser.add_argument('-c', '--cfree', action='store_true',
                        help='Disables collisions when planning')
    parser.add_argument('-d', '--deterministic', action='store_true',
                        help='Manually sets the random seeds used by the stream generators')
    parser.add_argument('-s', '--simulate', action='store_true',
                        help='Simulates the system')
    args = parser.parse_args()

    if args.deterministic:
        # TODO: still not fully deterministic
        random.seed(0)
        np.random.seed(0)

    import meshcat
    meshcat_vis = meshcat.Visualizer()
    task, diagram, state_machine = load_station(time_step=time_step)
    print(task)

    plant = task.mbp
    #dump_plant(plant)
    #dump_models(plant)
    RenderSystemWithGraphviz(diagram) # Useful for getting port names
    context = diagram.GetMutableSubsystemContext(plant, task.diagram_context)

    task.publish()
    initial_state = get_state(plant, context)
    trajectories = plan_trajectories(task, context, collisions=not args.cfree)
    if trajectories is None:
        return

    ##################################################

    set_state(plant, context, initial_state)
    if args.simulate:
        from manipulation_station.robot_plans import JointSpacePlan
        splines, gripper_setpoints = convert_splines(plant, task.robot, task.gripper, context, trajectories)
        sim_duration = compute_duration(splines)
        plan_list = [JointSpacePlan(spline) for spline in splines]
        print('Splines: {}\nDuration: {:.3f} seconds'.format(len(splines), sim_duration))

        task, diagram, state_machine = load_station(time_step=time_step, plan=(plan_list, gripper_setpoints))
        task.set_initial()
        #set_state(plant, context, initial_state)
        #state_machine.Load(plan_list, gripper_setpoints)
        simulate_splines(task.diagram, task.diagram_context, sim_duration)
    else:
        step_trajectories(diagram, task.diagram_context, context, trajectories, time_step=0.001)
Esempio n. 27
0
def table_csv():
    project_id = int(request.args.get('project_id'))
    summary_dict, df = utils.get_state(project_id)
    with tempfile.TemporaryDirectory() as tempdir:
        filepath = os.path.join(tempdir,'out.csv')
        df.to_csv(filepath,index=False)
        with open(filepath,'r') as f:
            csv_txt = f.read()

    tstamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
    return Response(
        csv_txt,
        mimetype="text/csv",
        headers={"Content-disposition":
                 f"attachment; filename={tstamp}.csv"})
Esempio n. 28
0
    def create_visualisation_attributes(self):
        def is_final_state(jid, cpt, actions):
            # Used to know if a component state change is the last one or not.
            for p in actions:
                if p.type == 'state-goto' and p.jid == jid and utils.get_lifecycle(p.xpath) == cpt:
                    return False
            return True

        for idx, action in enumerate(self.actions):
            if type(action) == StateGoto:
                action.location = action.jid
                action.component_type = utils.get_lifecycle(action.xpath)
                action.state = utils.get_state(action.xpath)
                action.final = is_final_state(action.location, action.component_type, self.actions[idx+1:])
                action.last_one = (idx == len(self.actions) - 1)
            else:
                pass

        # Add header and footer used to inform interface deployment has started and is ended
        self.actions.insert(0, Start(len(self.actions)))
        self.actions.append(End())
Esempio n. 29
0
File: modes.py Progetto: joram/steps
def mode_solid_rough(key, pixels):
    state = get_state()
    color = state.get("color")
    variance = int(state.get("variance", 10))
    print(f"solid rough: {color}")

    for i in range(0, len(pixels)):

        r = color.get("r", 255) + random.randint(-variance, variance)
        g = color.get("g", 255) + random.randint(-variance, variance)
        b = color.get("b", 255) + random.randint(-variance, variance)

        r = min(255, max(0, r))
        g = min(255, max(0, g))
        b = min(255, max(0, b))

        pixels[i] = (r, g, b)

    time.sleep(.01)

    pixels.show()
Esempio n. 30
0
    def tianbiao_my(person, n):
        base_row_index = n * 6
        r = 0 + base_row_index
        set_value(r, 2, person['成员姓名'])
        set_value(r, 5, person['性别'])
        set_value(r, 8, '汉')
        set_value(r, 10, get_birth_day(person['身份证号码']))
        set_font_size(r, 10, 9)
        set_value(r, 12, person['文化程度'])

        r = 1 + base_row_index
        set_value(r, 2, str(person['身份证号码']))
        set_value(r, 8, person['兵役状况'])  # 兵役情况

        # set_value(r, 10, phone_numbers[person['成员姓名']])
        set_value(r, 10, person['联系电话'])

        set_value(r, 12, person['与户主关系'])

        r = 2 + base_row_index
        set_value(
            r, 2,
            get_power(person[[
                '土地(共有)使用权', '保留型土地使用权', '承包经营权', '集体资产管理权', '集体收益分配权'
            ]]))
        set_value(r, 7, get_state(person['存在状态']))

        set_align(r, 2, WD_ALIGN_PARAGRAPH.JUSTIFY)
        set_align(r, 7, WD_ALIGN_PARAGRAPH.LEFT)

        r = 3 + base_row_index
        # set_value(r, 7, get_state(person['存在状态']))
        set_value(r, 10, person['现住地址'])

        r = 4 + base_row_index
        # set_value(r, 2, get_power(person[['土地(共有)使用权', '保留型土地使用权', '承包经营权', '集体资产管理权', '集体收益分配权']]))
        set_value(r, 10, str(person['户籍号']))

        r = 5 + base_row_index
        set_value(r, 2, person['婚姻状况'])  # 婚姻状况
Esempio n. 31
0
def _simulate(agent, node):
    # Rollout until a terminal state is reached and return true win/loss value.
    node.increment_visits()

    board = node.board.copy()
    curr_color = node.color

    while True:
        state, valid_positions, valid_positions_mask = get_state(board, curr_color)
 
        if len(valid_positions) == 0:
            break

        action_p, _ = agent(tf.convert_to_tensor([state], dtype=tf.float32))
        action_p = action_p[0].numpy() * valid_positions_mask.reshape((-1,))
        action_idx = np.random.choice(len(action_p), p=action_p / np.sum(action_p))

        position_key = (int(action_idx / board.size), int(action_idx % board.size))
        board.apply_position(curr_color, valid_positions[position_key])

        curr_color = 1 - curr_color

    return -_get_value_from_scores(board.scores(), node.color)
Esempio n. 32
0
    def move(self, board):
        if self.mcts is not None:
            try:
                _, mcts_p, _, _ = self.mcts.search(board, self.color)
                # _, mcts_p, _, _ = mcts(board, self.agent, self.color, n_iter=self._mcts_iter, c=1)
            except TerminalStateException:
                return

            action_idx = np.argmax(mcts_p)
        else:
            state, valid_positions, valid_positions_mask = get_state(
                board, self.color)
            if len(valid_positions) == 0:
                return

            action_p, _ = self.agent(
                tf.convert_to_tensor([state], dtype=tf.float32))
            action_p = action_p[0].numpy() * valid_positions_mask.reshape(
                (-1, ))
            action_idx = np.argmax(action_p)
            # action_idx = np.random.choice(len(action_p), p=action_p / np.sum(action_p))

        return (int(action_idx / board.size), int(action_idx % board.size))
Esempio n. 33
0
File: main.py Progetto: joram/steps
def drive_leds():
    global done
    key = ""
    while not done:
        new_key = state_key()
        if new_key != key:
            key = new_key
            state = get_state()
            mode = state.get("mode", "solid")
            print(f"change in state!: {state}")
            func = {
                "off": mode_off,
                "solid": mode_solid,
                "solid_rough": mode_solid_rough,
                "fading": mode_fading,
                "solid_rainbow": mode_solid_rainbow,
                "sliding_rainbow": mode_sliding_circle_rainbow,
                "halloween": mode_halloween,
                "per_step": mode_per_step,
                "nyan_cat": mode_nyan_cat,
                "nyan_cats": mode_nyan_cats,
                "solid_sparkly": mode_solid_sparkly,
                "chaos_colors": mode_chaos_colors,
            }.get(mode, mode_solid)

            worker_thread = threading.Thread(target=func, args=(key, pixels))
            worker_thread.daemon = True
            worker_thread.start()

            # kill_thread = threading.Thread(target=delay_off, args=(60*2,))
            # kill_thread.daemon = True
            # kill_thread.start()

        time.sleep(0.1)

    pixels.fill((0, 0, 0))
Esempio n. 34
0
    def think(self, game):
        legal_moves = game.board.get_legal_nearby_moves(2) or [(7, 7)]
        values_dict = {}
        tmp_board = game.board.board
        pattern_array = []
        white_will_win = 0
        black_will_win = 0
        max_point = (-1, -1)
        max_eval_move = (-1, -1)
        if game.current_player.stone_color == 'b':
            max_eval = -10000
        else:
            max_eval = 10000
        occurence = utils.pattern_occurrence(game.board.board, self.load_pattern)
        od_value = sum([a*b for a,b in zip(occurence, self.mul_values)])
        for x, y in legal_moves:
            tmp_board[x][y] = game.current_player.stone_color
            pattern = utils.extract_features(tmp_board, config.pattern_file_name)
            pattern_array.append(pattern)
            state = utils.get_state(tmp_board)
            self_occurence = utils.pattern_occurrence(tmp_board, self.load_pattern)
            self_value = sum([a*b for a,b in zip(self_occurence, self.mul_values)])
            if game.current_player.stone_color == 'b':
                if self_value > max_eval:
                    max_eval = self_value
                    max_eval_move = (x, y)
                elif self_value == max_eval:
                    if random.randint(0,9) >= 4:
                        max_eval_move = (x, y)
            elif game.current_player.stone_color == 'w':
                if self_value < max_eval:
                    max_eval = self_value
                    max_eval_move = (x, y)
                elif self_value == max_eval:
                    if random.randint(0,9) >= 4:
                        max_eval_move = (x, y)

            if state == 1:
                print('b win')
                black_will_win = 1
                max_point = (x, y)
            elif state == 2:
                print('w win')
                white_will_win = 1
                max_point = (x, y)
            tmp_board[x][y] = '.'

        if max_eval_move == (-1, -1):
            max_eval_move = random.choice(legal_moves)

        values = self.CNN.run_value(pattern_array)
        value_set = set()
        for index, (x, y) in enumerate(legal_moves):
            values_dict[(x, y)] = values[index]
            value_set.add(values[index][0])

        if black_will_win == 0 and white_will_win == 0:
            if random.randint(0,9) >= 3 and len(value_set) >= 5:
                #print("set len:", len(value_set))
                if game.current_player.stone_color == 'b':
                    max_point = max(values_dict.items(), key=operator.itemgetter(1))[0]
                else:
                    max_point = min(values_dict.items(), key=operator.itemgetter(1))[0]
            else:
                max_point = max_eval_move
                #max_point = random.choice(legal_moves)
        tmp_board[max_point[0]][max_point[1]] = game.current_player.stone_color
        self._feature = utils.extract_features(game.board.board, config.pattern_file_name)
        new_pattern = utils.extract_features(tmp_board, config.pattern_file_name)
        print(max_point)
        #print(values_dict[max_point])
        #print("new_pattern", new_pattern)
        #reward
        if black_will_win == 1:
            print("learning...reward 1")
            print(self.CNN.run_learning([[1.]], [self._feature], [new_pattern]))
        elif white_will_win == 1:
            print("learning...reward -1")
            print(self.CNN.run_learning([[-1.]], [self._feature], [new_pattern]))
        else:
            new_occurence = utils.pattern_occurrence(tmp_board, self.load_pattern)
            print("new_occur", new_occurence)
            self_occurence = utils.pattern_occurrence(game.board.board, self.load_pattern)
            self_value = sum([a*b for a,b in zip(self_occurence, self.mul_values)])
            new_value = sum([a*b for a,b in zip(new_occurence, self.mul_values)])
            print("self value:", self_value)
            print("new value:", new_value)
            if new_value > self_value:
                print("learning...reward 0.x")
                print(self.CNN.run_learning([[0.00001 * (new_value - self_value)]], [self._feature], [new_pattern]))
            elif new_value < self_value:
                print("learning...reward -0.x")
                print(self.CNN.run_learning([[0.00001 * (new_value - self_value)]], [self._feature], [new_pattern]))
            else:
                print("reward 0")
                print(self.CNN.run_learning([[0.]], [self._feature], [new_pattern]))
        return max_point
Esempio n. 35
0
 def get_state_name(self):
     """Returns the current workflow state name of the object.
     """
     return str(self.current_state or utils.get_state(self))
Esempio n. 36
0
 def current_state(self):
     """Return the state of the current object"""
     return get_state(self.context)
Esempio n. 37
0
File: main.py Progetto: joram/steps
def state_view():
    if request.method == "POST":
        state = request.json.get("state")
        set_state(state)
        print(state)
    return jsonify(get_state())
def single_train(envs, agents, core_env, core_agent, n_episodes, agent_n, exp, exp_name, render=False,):
    """
    Training step for single-agent settings.

    Parameters
    ----------
    envs: list of Environment
        List of environment for multi-agent
    agents: list of Agent
        List of multi-agents to create candidates for core_agent
    core_env: Environment
        Main environment of this train step
    core_agent: Agent
        Main agent of this train step
    n_episodes: int
        The number of episodes
    agent_n : int
        The number of agent
    exp: Experiment
        The Experiment object used by hyperdash
    exp_name: str
        The name of experiment
    render: boolean, default False
        Flag for whether to render the environment
    """
    print("INFO: Single mode...")
    for episode in range(n_episodes):
        # 0. Initialize the environment, state and agent params
        obs = core_env.reset()
        core_state = utils.get_state(obs)
        core_agent.reset_total_reward()
        core_agent.set_state(core_state)

        for t in count():
            if episode == 0 and t == 0:
                core_agent_action = core_agent.select_action(core_agent.get_state(), is_first=True)
            else:
                core_agent_action = core_agent.select_action(core_agent.get_state())
            core_agent.set_action(core_agent_action)

            core_obs, core_reward, core_done, core_info = core_agent.get_env().step(core_agent.get_action())
            core_agent.set_step_retrun_value(core_obs, core_done, core_info)

            core_agent.set_done_state(core_done)
            core_agent.set_total_reward(core_reward)

            if not core_done:
                core_next_state = utils.get_state(core_obs)
            else:
                core_next_state = None

            core_reward = torch.tensor([core_reward], device=core_agent.CONSTANTS.DEVICE)
            core_agent.memory.push(core_agent.get_state(), core_agent.get_action().to('cpu'), core_next_state, core_reward.to('cpu'))
            core_agent.set_state(core_next_state)

            if core_agent.steps_done > core_agent.CONSTANTS.INITIAL_MEMORY:
                core_agent.optimize_model()

                if core_agent.steps_done % core_agent.CONSTANTS.TARGET_UPDATE == 0:
                    core_agent.target_net.load_state_dict(core_agent.policy_net.state_dict())

            if core_agent.is_done():
                print("\n")
                break

            exp.log("{}: Current core_agent reward: {} | Episode:{}\n".format(t, core_agent.get_total_reward(), episode))
            core_agent.writer.add_scalar("core/reward/all_step", core_agent.get_total_reward(), core_agent.steps_done)
            # print("Current core_agent reward: {}".format(core_agent.get_total_reward()))

        if episode % core_agent.CONSTANTS.MODEL_SAVING_FREQUENCY == 0:
            with open(core_agent.CONSTANTS.OUTPUT_DIRECTORY_PATH + "/model_tmp/{}-policy".format(core_agent.get_name()), 'wb') as f:
                cloudpickle.dump(core_agent.target_net, f)
            with open(core_agent.CONSTANTS.OUTPUT_DIRECTORY_PATH + "/model_tmp/{}-target".format(core_agent.get_name()), 'wb') as f:
                cloudpickle.dump(core_agent.target_net, f)

        t_reward = core_agent.get_total_reward()
        o_reward = core_agent.get_obtained_reward()
        exp.metric("total_reward", t_reward)
        exp.metric("steps", t)
        exp.metric("obtained_reward", o_reward)
        out_str = 'Total steps: {} \t Episode: {}/{} \t Total reward: {}'.format(
            core_agent.steps_done, episode, t, core_agent.get_total_reward())
        if episode % 20 == 0:
            print(out_str)
            out_str = str("\n" + out_str + "\n")
            exp.log(out_str)
        else:
            exp.log(out_str)
        with open(core_agent.CONSTANTS.TRAIN_LOG_FILE_PATH, 'a') as f:
            f.write(str(out_str) + "\n")
        core_agent.writer.add_scalar("core/reward/total", core_agent.get_total_reward(), episode)
        core_agent.writer.add_scalar("core/steps/total", t, episode)
        core_agent.writer.add_scalars("telemetry", {"steps": t,
                                                    "reward": core_agent.get_total_reward()}, episode)
        core_agent.writer.add_scalar("core/obtained_reward/", core_agent.get_obtained_reward(), episode)
    core_env.close()
    core_agent.writer.close()
Esempio n. 39
0
def dashboard(): return render_template("index.html", sensors=get_state(app.sensors_path))


@app.route("/id_pub")
def train(envs, agents, core_env, core_agent, n_episodes, agent_n, exp, exp_name, render=False,):
    """
    Training step.

    In this code, we use the multi-agents to create candidate for core agent.
    The core agent and environment is main RL set. In addition, each agent has
    own environment and durability. Each agent's reward is checked for the
    specified number of episodes, and if an agent is not selected as the
    best-agent, that agent's durability is reduced.

    Parameters
    ----------
    envs: list of Environment
        List of environment for multi-agent
    agents: list of Agent
        List of multi-agents to create candidates for core_agent
    core_env: Environment
        Main environment of this train step
    core_agent: Agent
        Main agent of this train step
    n_episodes: int
        The number of episodes
    agent_n : int
        The number of agent
    exp: Experiment
        The Experiment object used by hyperdash
    exp_name: str
        The name of experiment
    render: boolean, default False
        Flag for whether to render the environment
    """
    _count = 0
    for episode in range(n_episodes):
        # 0. Initialize the environment, state and agent params
        obs = core_env.reset()
        core_state = utils.get_state(obs)
        core_agent.reset_total_reward()
        core_agent.set_state(core_state)
        for agent in agents:
            obs = agent.get_env().reset()
            state = utils.get_state(obs)
            agent.set_state(state)
            agent.reset_total_reward()
            # agent.durability = DEFAULT_DURABILITY

        for t in count():
            # if t % 20 != 0:
            #     print(str(t) + " ", end='')
            # else:
            #     print("\n")
            exp.log("agent_durability:{}".format([agent.get_durability() for agent in agents]))
            for agent in agents:
                if agent.get_state() is not None and len(agents) > 1:
                    agent.writer.add_scalar("internal/durability/{}".format(agent.get_name()), agent.get_durability(),
                                            _count)
                    utils.write_csv([_count, agent.get_durability()], core_agent.CONSTANTS.OUTPUT_DIRECTORY_PATH +
                                    "/{}-durability.csv".format(agent.get_name()))
                else:
                    utils.write_csv([_count, 0], core_agent.CONSTANTS.OUTPUT_DIRECTORY_PATH +
                                    "/{}-durability.csv".format(agent.get_name()))
            _count += 1
            #     print(str(t) + " ", end='')

            # 1. Select action from environment of each agent
            for agent in agents:
                if agent.get_state() is not None and len(agents) > 1:
                # if agent.get_state() is not None:
                    # agent.set_env(core_agent.get_env())
                    # agent.set_state(core_agent.get_state())
                    # agent.set_init_state(core_agent.get_state())
                    agent.set_init_state(agent.get_state())
                    if episode == 0 and t == 0:
                        action = agent.select_action(agent.get_state(), is_first=True)
                    else:
                        action = agent.select_action(agent.get_state())
                    agent.set_action(action)

            # 2. Proceed step of each agent
            for agent in agents:
                if agent.get_state() is not None:
                # if agent.get_state() is not None and len(agents) > 1:
                    obs, reward, done, info = agent.get_env().step(agent.get_action())
                    agent.set_step_retrun_value(obs, done, info)

                    agent.set_total_reward(reward)
                    # Agent reward value
                    # print("Agent:{}, Reward:{}, State:{}".format(agent.name, reward, agent.get_state()))
                    # print("Agent:{}, Reward:{}".format(agent.name, reward))

                    if not done:
                        next_state = utils.get_state(obs)
                    else:
                        next_state = None

                    reward = torch.tensor([reward], device=agent.CONSTANTS.DEVICE)
                    agent.memory.push(agent.get_state(), agent.get_action().to('cpu'), next_state, reward.to('cpu'))
                    agent.set_state(next_state)

                    if agent.steps_done > agent.CONSTANTS.INITIAL_MEMORY:
                        agent.optimize_model()

                        if agent.steps_done % agent.CONSTANTS.TARGET_UPDATE == 0:
                            agent.target_net.load_state_dict(agent.policy_net.state_dict())

            # print("\n")
            # print([agent.get_total_reward() for agent in agents])
            exp.log([agent.get_total_reward() for agent in agents])
            # print(str(t) + " ", end='')

            # ---------------
            # Proposal method
            # ---------------

            # 3. Select best agent in this step
            if len(agents) > 1:
                best_agent = utils.select_best_agent(agents, core_agent.CONSTANTS.ROULETTE_MODE,
                                                     max_reward=core_agent.CONSTANTS.MAX_REWARD,
                                                     min_reward=core_agent.CONSTANTS.MIN_REWARD)
                # best_agent.best_counter()
                [agent.best_counter() for agent in agents if agent.get_name() == best_agent.get_name()]
                # for agent in agents:
                #     if agent.get_name() == best_agent.get_name():
                #         agent.best_counter()
                core_agent.memory.push(best_agent.get_init_state(), best_agent.get_action().to('cpu'),
                                       best_agent.get_next_state(),
                                       torch.tensor([best_agent.get_reward()],
                                                    device=best_agent.CONSTANTS.DEVICE).to('cpu'))
                for agent in agents:
                    agent.writer.add_scalar("internal/reward/{}/all_step".format(agent.get_name()),
                                            agent.get_total_reward(), core_agent.steps_done)
                    agent.writer.add_scalar("internal/obtained_reward/{}".format(agent.get_name()),
                                            agent.get_obtained_reward(), episode)
                    # core_agent_action = best_agent.get_action()
                    # best_agent_state = best_agent.get_state()
                    # policy_net_flag = best_agent.get_policy_net_flag()
                    # best_agent_action = best_agent.get_action()

                # 3.5 Only best_agent can heal own durability at specific iteration
                if t % core_agent.CONSTANTS.DURABILITY_HEALING_FREQUENCY == 0 and len(agents) > 1:
                    # best_agent.heal_durability(core_agent.CONSTANTS.DEFAULT_DURABILITY_INCREASED_LEVEL)
                    [agent.heal_durability(core_agent.CONSTANTS.DEFAULT_DURABILITY_INCREASED_LEVEL)
                     for agent in agents if agent.get_name() == best_agent.get_name()]

            # Best_agent information
            # exp.log("{}: Current best agent: {}, Disabilities:{}".format(t, best_agent.name,
            #                                                              [agent.durability() for agent in agents]))
            # print("{}: Current best agent: {}, Reward:{}".format(t, best_agent.name, best_agent.get_total_reward()))
                exp.log("{}: Current best agent: {}, Reward:{}".format(t, best_agent.get_name(),
                                                                       best_agent.get_total_reward()))

            # 4. Check the agent durability in specified step
            if t % core_agent.CONSTANTS.DURABILITY_CHECK_FREQUENCY == 0:
                if len(agents) > 1:
                    # index = [i for i in range(len(agents)) if i not in best_agents]
                    index = [i for i, agent in enumerate(agents) if agent.get_name() != best_agent.get_name()]
                    for i in index:
                        if agents[i].get_state() is not None:
                            agents[i].reduce_durability(core_agent.CONSTANTS.DEFAULT_DURABILITY_DECREASED_LEVEL)

            # 5. Kill agent
            if len(agents) > 1:
                for i, agent in enumerate(agents):
                    if agent.get_durability() <= 0:
                        del agents[i]

            # 6. Main step of core agent
            # core_agent_action = core_agent.select_core_action(best_agent_state, policy_net_flag, best_agent_action)
            if episode == 0 and t == 0:
                core_agent_action = core_agent.select_action(core_agent.get_state(), is_first=True)
            else:
                core_agent_action = core_agent.select_action(core_agent.get_state())
            core_agent.set_action(core_agent_action)

            core_obs, core_reward, core_done, core_info = core_agent.get_env().step(core_agent.get_action())
            core_agent.set_step_retrun_value(core_obs, core_done, core_info)

            core_agent.set_done_state(core_done)
            core_agent.set_total_reward(core_reward)

            if not core_done:
                core_next_state = utils.get_state(core_obs)
            else:
                core_next_state = None

            core_reward = torch.tensor([core_reward], device=core_agent.CONSTANTS.DEVICE)
            core_agent.memory.push(core_agent.get_state(), core_agent.get_action().to('cpu'), core_next_state,
                                   core_reward.to('cpu'))
            core_agent.set_state(core_next_state)

            if core_agent.steps_done > core_agent.CONSTANTS.INITIAL_MEMORY:
                core_agent.optimize_model()

                if core_agent.steps_done % core_agent.CONSTANTS.TARGET_UPDATE == 0:
                    core_agent.target_net.load_state_dict(core_agent.policy_net.state_dict())

            if core_agent.is_done():
                print("\n")
                break

            exp.log("{} steps | Current core_agent reward: {} | Episode:{}\n".format(t, core_agent.get_total_reward(),
                                                                                     episode))
            core_agent.writer.add_scalar("core/reward/all_step", core_agent.get_total_reward(), core_agent.steps_done)
            for agent in agents:
                agent.writer.add_scalar("internal/reward/{}/episode".format(agent.get_name()),
                                        agent.get_total_reward(), episode)
            # print("Current core_agent reward: {}".format(core_agent.get_total_reward()))

        # ----------------------
        # End of proposal method
        # ----------------------

        if episode % core_agent.CONSTANTS.MODEL_SAVING_FREQUENCY == 0:
            for agent in agents:
                with open(core_agent.CONSTANTS.OUTPUT_DIRECTORY_PATH + "/model_tmp/{}-policy".format(agent.get_name()), 'wb') as f:
                    cloudpickle.dump(agent.policy_net, f)
                with open(core_agent.CONSTANTS.OUTPUT_DIRECTORY_PATH + "/model_tmp/{}-target".format(agent.get_name()), 'wb') as f:
                    cloudpickle.dump(agent.target_net, f)
                agent.writer.add_scalar("internal/obtained_reward/{}".format(agent.get_name()),
                                        agent.get_obtained_reward(), episode)
            with open(core_agent.CONSTANTS.OUTPUT_DIRECTORY_PATH + "/model_tmp/{}-policy".format(core_agent.get_name()), 'wb') as f:
                cloudpickle.dump(core_agent.target_net, f)
            with open(core_agent.CONSTANTS.OUTPUT_DIRECTORY_PATH + "/model_tmp/{}-target".format(core_agent.get_name()), 'wb') as f:
                cloudpickle.dump(core_agent.target_net, f)

        t_reward = core_agent.get_total_reward()
        o_reward = core_agent.get_obtained_reward()
        exp.metric("total_reward", t_reward)
        exp.metric("steps", t)
        exp.metric("obtained_reward", o_reward)
        out_str = 'Total steps: {} \t Episode: {}/{} \t Total reward: {}'.format(
            core_agent.steps_done, episode, t, core_agent.get_total_reward())
        if episode % 20 == 0:
            print(out_str)
            out_str = str("\n" + out_str + "\n")
            exp.log(out_str)
        else:
            # print(out_str)
            exp.log(out_str)
        with open(core_agent.CONSTANTS.TRAIN_LOG_FILE_PATH, 'a') as f:
            f.write(str(out_str) + "\n")
        core_agent.writer.add_scalar("core/reward/total", core_agent.get_total_reward(), episode)
        core_agent.writer.add_scalar("core/steps/total", t, episode)
        core_agent.writer.add_scalars("telemetry", {"steps": t,
                                                    "reward": core_agent.get_total_reward()}, episode)
        core_agent.writer.add_scalar("core/obtained_reward/", core_agent.get_obtained_reward(), episode)
    core_env.close()
    core_agent.writer.close()
    for agent in agents:
        agent.writer.close()
    for agent in agents:
        agent.get_env().close()
    del best_agent