from simulator.simulator import Simulator
from simulator.route_manager import RouteManager
from algorithm import AlgorithmManager as algo
# from algorithm import DummyAlgorithmManager as algo
from connection import ConnectionManager as conn
rm = RouteManager("simulator/maps/simple")
#algo = AlgorithmManager
sim = Simulator(
    rm,  #route manager
    algo,  #algorithm module
    conn,  #connection module
    "simulator/maps/simple"  #map folder
)

rm.bind_simulator(sim)
sim.start_simulation()
 def test_run_simulator(self):
     simulator = Simulator("team_vs_goblins")
     simulator.run_until_done()
예제 #3
0
    def augment_label_maps(self, atlases: List[str], feature_list: List[str],
                           label_list: List[str],
                           single_feature_list: List[str],
                           single_label_list: List[str]) -> None:
        if not atlases:
            return

        self.__services.debug.write(
            """Starting Augmentation: [
            atlases: {},
            feature_list: {},
            label_list: {},
            single_feature_list: {},
            single_label_list: {}
        ] 
        """.format(atlases, feature_list, label_list, single_feature_list,
                   single_label_list), DebugLevel.BASIC)

        label_atlas_name = "training_" + "_".join(atlases)

        self.__services.debug.write("Loading maps", DebugLevel.BASIC)
        maps: List[Map] = []
        for name in atlases:
            maps = maps + self.__services.resources.maps_dir.get_atlas(
                name).load_all()

        self.__services.debug.write("Loading atlas", DebugLevel.BASIC)
        t: List[Dict[str,
                     any]] = self.__services.resources.training_data_dir.load(
                         label_atlas_name)

        progress_bar: Progress = self.__services.debug.progress_debug(
            len(t), DebugLevel.BASIC)
        progress_bar.start()

        for i in range(len(t)):
            config = Configuration()
            config.simulator_algorithm_type = AStar
            config.simulator_testing_type = AStarTesting
            config.simulator_initial_map = maps[i]
            services: Services = Services(config)
            simulator: Simulator = Simulator(services)
            testing: AStarTesting = simulator.start()

            if feature_list:
                seq_features = MapProcessing.get_sequential_features(
                    testing.map, feature_list)
                for q in range(len(t[i]["features"])):
                    t[i]["features"][q].update(seq_features[q])

            if label_list:
                seq_labels = MapProcessing.get_sequential_labels(
                    testing.map, label_list)
                for q in range(len(t[i]["labels"])):
                    t[i]["labels"][q].update(seq_labels[q])

            if single_feature_list:
                t[i]["single_features"].update(
                    MapProcessing.get_single_features(maps[i],
                                                      single_feature_list))

            if single_label_list:
                t[i]["single_labels"].update(
                    MapProcessing.get_single_labels(maps[i],
                                                    single_label_list))
            progress_bar.step()

        self.__services.debug.write(
            "Saving atlas augmentation: " + str(label_atlas_name),
            DebugLevel.BASIC)
        self.__services.resources.training_data_dir.save(label_atlas_name, t)
        self.__services.debug.write(
            "Finished atlas augmentation: " + str(label_atlas_name) + "\n",
            DebugLevel.BASIC)
예제 #4
0
    parser.add_argument('--nolog', action='store_true')
    parser.add_argument('--subsample', default=None, type=float, help='Subsample depth image by provided factor before feeding to network')
    parser.add_argument('--gui', action='store_true')
    parser.add_argument('-e', nargs='+', default=None, type=int, help='epochs to evaluate, if next arg is model, separate with -- ')

    args = parser.parse_args()

    model_fns = glob.glob(args.model + '/*.hdf5')
    assert len(model_fns) > 0
    model_name = model_fns[0].split('/')[-2]

    # Get input size and initialize simulator camera with it
    from keras.models import load_model
    _, height, width, _ = load_model(model_fns[0]).input_shape

    sim = Simulator(gui=args.gui, timeout=4, debug=True, use_egl=False, stop_th=1e-3)
    sim.cam.height = height
    sim.cam.width = width

    scenes = h5py.File(args.scenes, 'r')

    dt = datetime.datetime.now().strftime('%y%m%d_%H%M')
    model_results_path = os.path.join(args.results, '{}_{}'.format(dt, model_name))
    if not os.path.exists(model_results_path):
        os.makedirs(model_results_path)
    results_fn = model_results_path + '/results.txt'
    results_f = open(results_fn, 'w')
    results_f.write('ARGUMENTS:\n'+''.join(['{}: {}\n'.format(item[0], item[1]) for item in vars(args).items()]))
    results_f.write('---\n')
    print_attrs(scenes, results_f)
예제 #5
0
 def run_graphical_for(self, ticks):
     s = Simulator(self.nodes)
     s.run_graphical(ticks)
예제 #6
0
from functions.TEMA import TEMAdecision
from functions.TRIMA import TRIMAdecision
from functions.WMA import WMAdecision
from simulator.simulator import Simulator
import pandas_datareader as web
import pandas as pd
import datetime as dt

start = dt.datetime(2015, 1, 1)
end = dt.datetime(2017, 12, 31)

asset = 'AAPL'
data = web.DataReader(asset, 'morningstar', start=start, end=end)
data.index = data.index.droplevel()

sim = Simulator(data, std_purchase=10)
sim.add_indicator('EMA-20', EMAdecision(data, 20))
sim.add_indicator('KAMA-20', KAMAdecision(data, 20))
sim.add_indicator('SMA-20', SMAdecision(data, 20))
sim.add_indicator('SMA-100', SMAdecision(data, 10))
sim.add_indicator('TEMA-20', TEMAdecision(data, 20))
sim.add_indicator('TRIMA-20', TRIMAdecision(data, 20))
sim.add_indicator('WMA-20', WMAdecision(data, 20))

#print(sim.security.head())
#print(sim.security.tail())

sim.calc_earning()
print('\n Resumen')
print('Capital final {}'.format(sim.final_capital))
print('Acciones finales {}'.format(sim.shares_own))
from simulator.simulator import Simulator
import h5py
import pptk
import numpy as np
import core.evaluate_orthographic_pipeline as ortho_pipeline

SCENE_FN = '/Users/mario/Developer/msc-thesis/data/scenes/200220_1700_manually_generated_scenes.hdf5'
scene = h5py.File(SCENE_FN)['scene'][0]
sim = Simulator(use_egl=False, gui=False)
sim.cam.pos = [.6, 0., .3]
sim.restore(scene, '../data/3d_models/shapenetsem40')
cloud = sim.cam.point_cloud()

cloud = ortho_pipeline.transform_world_to_camera(cloud, sim.cam)

pptk.viewer(cloud)
np.save('../test/horizontal_bottle.npy', cloud)
예제 #8
0
 def __init__(self):
     self.simulator = Simulator()
예제 #9
0
 def test_gripper_autostop(self):
     sim = Simulator(gui=True, use_egl=False)
     sim.add_gripper('../simulator/gripper.urdf')
     result = sim.move_gripper_to([0, 0, 0, 0, 0, 0])
예제 #10
0
def train(params):
    """
    parameters set
    """
    NUM_NODES = params['number of nodes in the cluster']
    env = LraClusterEnv(num_nodes=NUM_NODES)
    batch_size = params['batch_size']
    ckpt_path_1 = "./checkpoint/" + params['path'] + "_1" + "/model.ckpt"
    ckpt_path_2 = "./checkpoint/" + params['path'] + "_2" + "/model.ckpt"
    ckpt_path_3 = "./checkpoint/" + params['path'] + "_3" + "/model.ckpt"
    np_path = "./checkpoint/" + params['path'] + "/optimal_file_name.npz"
    Recover = params['recover']
    nodes_per_group = int(params['nodes per group'])
    replay_size = params['replay size']
    training_times_per_episode = 1
    UseExperienceReplay = False
    """
    Build Network
    """
    n_actions = nodes_per_group  #: 3 nodes per group
    n_features = int(n_actions * env.NUM_APPS + 1 +
                     env.NUM_APPS)  #: 3*7+1+7 = 29
    RL_1 = PolicyGradient(n_actions=n_actions,
                          n_features=n_features,
                          learning_rate=params['learning rate'],
                          suffix=str(params['NUM_CONTAINERS_start']) + '1')

    RL_2 = PolicyGradient(n_actions=n_actions,
                          n_features=n_features,
                          learning_rate=params['learning rate'],
                          suffix=str(params['NUM_CONTAINERS_start']) + '2')

    RL_3 = PolicyGradient(n_actions=n_actions,
                          n_features=n_features,
                          learning_rate=params['learning rate'],
                          suffix=str(params['NUM_CONTAINERS_start']) + '3')
    sim = Simulator()
    """
    Training
    """
    start_time = time.time()
    global_start_time = start_time

    observation_episode_1, action_episode_1, reward_episode_1 = [], [], []
    observation_episode_2, action_episode_2, reward_episode_2 = [], [], []
    observation_episode_3, action_episode_3, reward_episode_3 = [], [], []
    epoch_i = 0
    entropy_weight = 0.01
    names = locals()
    for i in range(0, 10):
        names['highest_tput_' + str(i)] = 0.1
        names['observation_optimal_1_' + str(i)] = []
        names['action_optimal_1_' + str(i)] = []
        names['reward_optimal_1_' + str(i)] = []
        names['number_optimal_' + str(i)] = []
        names['optimal_range_' + str(i)] = 1.2

    for i in range(0, 10):
        names['observation_optimal_2_' + str(i)] = []
        names['action_optimal_2_' + str(i)] = []
        names['reward_optimal_2_' + str(i)] = []

    for i in range(0, 10):
        names['observation_optimal_3_' + str(i)] = []
        names['action_optimal_3_' + str(i)] = []
        names['reward_optimal_3_' + str(i)] = []

    # TODO: delete this range

    def store_episode_1(observations, actions):
        observation_episode_1.append(observations)
        action_episode_1.append(actions)

    def store_episode_2(observations, actions):
        observation_episode_2.append(observations)
        action_episode_2.append(actions)

    def store_episode_3(observations, actions):
        observation_episode_3.append(observations)
        action_episode_3.append(actions)

    NUM_CONTAINERS_start = params['NUM_CONTAINERS_start']

    while epoch_i < params['epochs']:

        NUM_CONTAINERS = np.random.randint(NUM_CONTAINERS_start + 1,
                                           NUM_CONTAINERS_start + 11)
        tput_origimal_class = int(NUM_CONTAINERS - NUM_CONTAINERS_start - 1)
        source_batch_, index_data = batch_data(
            NUM_CONTAINERS, env.NUM_APPS)  # index_data = [0,1,2,0,1,2]
        observation = env.reset().copy()  # (9,9)
        source_batch = source_batch_.copy()

        for inter_episode_index in range(NUM_CONTAINERS):

            appid = index_data[inter_episode_index]
            observation_ = env.step(inter_episode_index % NUM_NODES,
                                    appid)  # load-balancing
            observation = observation_.copy()  # (9,9)
        tput_state = env.get_tput_total_env()
        tput_baseline = (sim.predict(tput_state.reshape(-1, env.NUM_APPS)) *
                         tput_state).sum() / NUM_CONTAINERS
        """
        Episode
        """
        observation = env.reset().copy()
        for inter_episode_index in range(NUM_CONTAINERS):
            source_batch[index_data[inter_episode_index]] -= 1
            observation, mapping_index = handle_constraint(
                observation.copy(), NUM_NODES)
            assert len(mapping_index) > 0

            observation_first_layer = np.empty([0, env.NUM_APPS], int)
            number_of_first_layer_nodes = int(NUM_NODES / nodes_per_group)  # 9
            for i in range(nodes_per_group):
                observation_new = np.sum(
                    observation[i * number_of_first_layer_nodes:(i + 1) *
                                number_of_first_layer_nodes],
                    0).reshape(1, -1)
                observation_first_layer = np.append(observation_first_layer,
                                                    observation_new, 0)
            observation_first_layer[:, index_data[inter_episode_index]] += 1
            observation_first_layer = np.array(
                observation_first_layer).reshape(1, -1)
            observation_first_layer = np.append(
                observation_first_layer,
                index_data[inter_episode_index]).reshape(1, -1)
            observation_first_layer = np.append(
                observation_first_layer,
                np.array(source_batch)).reshape(1, -1)  # (1,29)
            action_1, prob_weights = RL_1.choose_action(
                observation_first_layer.copy())

            observation_copy = observation.copy()
            observation_copy = observation_copy[action_1 *
                                                number_of_first_layer_nodes:
                                                (action_1 + 1) *
                                                number_of_first_layer_nodes]
            number_of_second_layer_nodes = int(number_of_first_layer_nodes /
                                               nodes_per_group)  # 9/3 = 3
            observation_second_layer = np.empty([0, env.NUM_APPS], int)
            for i in range(nodes_per_group):
                observation_new = np.sum(
                    observation_copy[i * number_of_second_layer_nodes:(i + 1) *
                                     number_of_second_layer_nodes],
                    0).reshape(1, -1)
                observation_second_layer = np.append(observation_second_layer,
                                                     observation_new, 0)
            observation_second_layer[:, index_data[inter_episode_index]] += 1
            observation_second_layer = np.array(
                observation_second_layer).reshape(1, -1)
            observation_second_layer = np.append(
                observation_second_layer,
                index_data[inter_episode_index]).reshape(1, -1)
            observation_second_layer = np.append(
                observation_second_layer,
                np.array(source_batch)).reshape(1, -1)
            action_2, prob_weights = RL_2.choose_action(
                observation_second_layer.copy())

            observation_copy = observation_copy[action_2 *
                                                number_of_second_layer_nodes:
                                                (action_2 + 1) *
                                                number_of_second_layer_nodes]
            number_of_third_layer_nodes = int(number_of_second_layer_nodes /
                                              nodes_per_group)  # 3/3 = 1
            observation_third_layer = np.empty([0, env.NUM_APPS], int)
            for i in range(nodes_per_group):
                observation_new = np.sum(
                    observation_copy[i * number_of_third_layer_nodes:(i + 1) *
                                     number_of_third_layer_nodes],
                    0).reshape(1, -1)
                observation_third_layer = np.append(observation_third_layer,
                                                    observation_new, 0)
            observation_third_layer[:, index_data[inter_episode_index]] += 1
            observation_third_layer = np.array(
                observation_third_layer).reshape(1, -1)
            observation_third_layer = np.append(
                observation_third_layer,
                index_data[inter_episode_index]).reshape(1, -1)
            observation_third_layer = np.append(
                observation_third_layer,
                np.array(source_batch)).reshape(1, -1)
            action_3, prob_weights = RL_3.choose_action(
                observation_third_layer.copy())

            final_decision = action_1 * number_of_first_layer_nodes + action_2 * number_of_second_layer_nodes + action_3 * number_of_third_layer_nodes

            appid = index_data[inter_episode_index]
            observation_ = env.step(mapping_index[final_decision], appid)

            store_episode_1(observation_first_layer, action_1)
            store_episode_2(observation_second_layer, action_2)
            store_episode_3(observation_third_layer, action_3)
            observation = observation_.copy()  # (9,9)
        """
        After an entire allocation, calculate total throughput, reward
        """
        tput_state = env.get_tput_total_env()
        tput = (sim.predict(tput_state.reshape(-1, env.NUM_APPS)) *
                tput_state).sum() / NUM_CONTAINERS

        RL_1.store_tput_per_episode(tput, epoch_i)
        assert (np.sum(env.state, axis=1) <=
                params['container_limitation per node']).all()
        assert sum(sum(env.state)) == NUM_CONTAINERS

        reward_ratio = (tput - tput_baseline)

        reward_episode_1 = [reward_ratio] * len(observation_episode_1)
        reward_episode_2 = [reward_ratio] * len(observation_episode_2)
        reward_episode_3 = [reward_ratio] * len(observation_episode_3)

        RL_1.store_training_samples_per_episode(observation_episode_1,
                                                action_episode_1,
                                                reward_episode_1)
        RL_2.store_training_samples_per_episode(observation_episode_2,
                                                action_episode_2,
                                                reward_episode_2)
        RL_3.store_training_samples_per_episode(observation_episode_3,
                                                action_episode_3,
                                                reward_episode_3)
        """
        check_tput_quality(tput)
        """
        if names['highest_tput_' + str(tput_origimal_class)] < tput:
            highest_tput_original = names['highest_tput_' +
                                          str(tput_origimal_class)]
            optimal_range_original = names['optimal_range_' +
                                           str(tput_origimal_class)]
            names['highest_tput_' + str(tput_origimal_class)] = tput
            names['number_optimal_' + str(tput_origimal_class)] = []
            names['observation_optimal_1_' + str(tput_origimal_class)], names[
                'action_optimal_1_' + str(tput_origimal_class)], names[
                    'reward_optimal_1_' +
                    str(tput_origimal_class)] = [], [], []
            names['observation_optimal_2_' + str(tput_origimal_class)], names[
                'action_optimal_2_' + str(tput_origimal_class)], names[
                    'reward_optimal_2_' +
                    str(tput_origimal_class)] = [], [], []
            names['observation_optimal_3_' + str(tput_origimal_class)], names[
                'action_optimal_3_' + str(tput_origimal_class)], names[
                    'reward_optimal_3_' +
                    str(tput_origimal_class)] = [], [], []
            if UseExperienceReplay:
                names['observation_optimal_1_' +
                      str(tput_origimal_class)].extend(observation_episode_1)
                names['action_optimal_1_' +
                      str(tput_origimal_class)].extend(action_episode_1)
                names['reward_optimal_1_' +
                      str(tput_origimal_class)].extend(reward_episode_1)

                names['observation_optimal_2_' +
                      str(tput_origimal_class)].extend(observation_episode_2)
                names['action_optimal_2_' +
                      str(tput_origimal_class)].extend(action_episode_2)
                names['reward_optimal_2_' +
                      str(tput_origimal_class)].extend(reward_episode_2)

                names['observation_optimal_3_' +
                      str(tput_origimal_class)].extend(observation_episode_3)
                names['action_optimal_3_' +
                      str(tput_origimal_class)].extend(action_episode_3)
                names['reward_optimal_3_' +
                      str(tput_origimal_class)].extend(reward_episode_3)

            names['number_optimal_' +
                  str(tput_origimal_class)].append(NUM_CONTAINERS)
            names['optimal_range_' + str(tput_origimal_class)] = min(
                1.2, tput / (highest_tput_original / optimal_range_original))
        elif names['highest_tput_' + str(tput_origimal_class)] < tput * names[
                'optimal_range_' + str(tput_origimal_class)]:

            if UseExperienceReplay:

                names['observation_optimal_1_' +
                      str(tput_origimal_class)].extend(observation_episode_1)
                names['action_optimal_1_' +
                      str(tput_origimal_class)].extend(action_episode_1)
                names['reward_optimal_1_' +
                      str(tput_origimal_class)].extend(reward_episode_1)

                names['observation_optimal_2_' +
                      str(tput_origimal_class)].extend(observation_episode_2)
                names['action_optimal_2_' +
                      str(tput_origimal_class)].extend(action_episode_2)
                names['reward_optimal_2_' +
                      str(tput_origimal_class)].extend(reward_episode_2)

                names['observation_optimal_3_' +
                      str(tput_origimal_class)].extend(observation_episode_3)
                names['action_optimal_3_' +
                      str(tput_origimal_class)].extend(action_episode_3)
                names['reward_optimal_3_' +
                      str(tput_origimal_class)].extend(reward_episode_3)

            names['number_optimal_' +
                  str(tput_origimal_class)].append(NUM_CONTAINERS)

        observation_episode_1, action_episode_1, reward_episode_1 = [], [], []
        observation_episode_2, action_episode_2, reward_episode_2 = [], [], []
        observation_episode_3, action_episode_3, reward_episode_3 = [], [], []
        """
        Each batch, RL.learn()
        """
        if (epoch_i % batch_size == 0) & (epoch_i > 1):
            if UseExperienceReplay:
                for replay_class in range(0, 10):

                    reward_optimal_1 = names['reward_optimal_1_' +
                                             str(replay_class)]
                    observation_optimal_1 = names['observation_optimal_1_' +
                                                  str(replay_class)]
                    action_optimal_1 = names['action_optimal_1_' +
                                             str(replay_class)]

                    reward_optimal_2 = names['reward_optimal_2_' +
                                             str(replay_class)]
                    observation_optimal_2 = names['observation_optimal_2_' +
                                                  str(replay_class)]
                    action_optimal_2 = names['action_optimal_2_' +
                                             str(replay_class)]

                    reward_optimal_3 = names['reward_optimal_3_' +
                                             str(replay_class)]
                    observation_optimal_3 = names['observation_optimal_3_' +
                                                  str(replay_class)]
                    action_optimal_3 = names['action_optimal_3_' +
                                             str(replay_class)]

                    number_optimal = names['number_optimal_' +
                                           str(replay_class)]

                    buffer_size = int(len(number_optimal))
                    assert sum(
                        number_optimal) * training_times_per_episode == len(
                            action_optimal_1)

                    if buffer_size < replay_size:
                        # TODO: if layers changes, training_times_per_episode should be modified
                        RL_1.ep_obs.extend(observation_optimal_1)
                        RL_1.ep_as.extend(action_optimal_1)
                        RL_1.ep_rs.extend(reward_optimal_1)

                        RL_2.ep_obs.extend(observation_optimal_2)
                        RL_2.ep_as.extend(action_optimal_2)
                        RL_2.ep_rs.extend(reward_optimal_2)

                        RL_3.ep_obs.extend(observation_optimal_3)
                        RL_3.ep_as.extend(action_optimal_3)
                        RL_3.ep_rs.extend(reward_optimal_3)

                    else:
                        replay_index = np.random.choice(range(buffer_size),
                                                        size=replay_size,
                                                        replace=False)
                        for replay_id in range(replay_size):
                            replace_start = replay_index[replay_id]
                            start_location = sum(number_optimal[:replace_start]
                                                 ) * training_times_per_episode
                            stop_location = sum(
                                number_optimal[:replace_start +
                                               1]) * training_times_per_episode

                            RL_1.ep_obs.extend(observation_optimal_1[
                                start_location:stop_location])
                            RL_1.ep_as.extend(
                                action_optimal_1[start_location:stop_location])
                            RL_1.ep_rs.extend(
                                reward_optimal_1[start_location:stop_location])

                            RL_2.ep_obs.extend(observation_optimal_2[
                                start_location:stop_location])
                            RL_2.ep_as.extend(
                                action_optimal_2[start_location:stop_location])
                            RL_2.ep_rs.extend(
                                reward_optimal_2[start_location:stop_location])

                            RL_3.ep_obs.extend(observation_optimal_3[
                                start_location:stop_location])
                            RL_3.ep_as.extend(
                                action_optimal_3[start_location:stop_location])
                            RL_3.ep_rs.extend(
                                reward_optimal_3[start_location:stop_location])

            # entropy_weight=0.1
            RL_1.learn(epoch_i, entropy_weight, True)
            RL_2.learn(epoch_i, entropy_weight, False)
            RL_3.learn(epoch_i, entropy_weight, False)
        """
        checkpoint, per 1000 episodes
        """
        if (epoch_i % 1000 == 0) & (epoch_i > 1):
            highest_value = 0
            for class_replay in range(0, 10):
                highest_value = names['highest_tput_' + str(class_replay)]
                optimal_number = len(names['number_optimal_' +
                                           str(class_replay)])
                print("\n epoch: %d, highest tput: %f, optimal_number: %d" %
                      (epoch_i, highest_value, optimal_number))
            RL_1.save_session(ckpt_path_1)
            RL_2.save_session(ckpt_path_2)
            RL_3.save_session(ckpt_path_3)

            np.savez(np_path,
                     tputs=np.array(RL_1.tput_persisit),
                     candidate=np.array(RL_1.episode))
            """
            optimal range adaptively change
            """
            print(prob_weights)
            print(prob_weights)
            entropy_weight *= 0.5
            entropy_weight = max(entropy_weight, 0.002)

        epoch_i += 1
예제 #11
0
 def test_close_gripper(self):
     sim = Simulator(gui=True, use_egl=False)
     sim.add_gripper('../simulator/gripper.urdf')
     sim.run(epochs=100)
     result = sim.close_gripper()
예제 #12
0
    def test_teleport_to_pose(self):
        sim = Simulator(gui=True, use_egl=False, debug=True)
        sim.add_gripper('../simulator/gripper.urdf')

        sim.run_debug_teleport()
예제 #13
0
parser.add_argument('--gui', action='store_true')
parser.add_argument('--logvideo', action='store_true')

args = parser.parse_args()

scene_ds = h5py.File(args.scenes, 'r')
if args.scene_name is None:
    scene_idx = np.random.randint(0, 39)
else:
    scene_idx = int(np.where(scene_ds['name'][:] == args.scene_name)[0])
scene = scene_ds['scene'][scene_idx]

network = net.Network(model_fn=args.model)
sim = Simulator(gui=args.gui,
                timeout=4,
                debug=False,
                use_egl=False,
                timestep=1e-3)
sim.restore(scene, args.models)
depth = scene_ds['depth'][scene_idx]

position, angle, width = network.predict(depth)

if not args.gui:
    net.plot_output(depth, position, angle, width, 1)

gs = net.get_grasps_from_output(position, angle, width, 1)[0]
# Send grasp to simulator and evaluate
pose, width = sim.cam.compute_grasp(gs.as_bb.points, depth[gs.center])
print pose, width, gs.angle
pose = np.concatenate((pose, [0, 0, gs.angle]))
예제 #14
0
        'distance_error': {},
    }

    num_re_runs = 1
    num_time_instances = 100

    num_nodes = 50
    communication_radius = 50
    max_width = 500
    max_height = 500
    max_v = 10

    # Initiate
    for k in total_algorithm_results.keys():
        total_algorithm_results[k] = {}
        for algo in Simulator().algorithms:
            total_algorithm_results[k][algo.name()] = np.zeros(
                (len(percentage_are_anchors_list), 1))

    # Run Grid
    for i, percentage_are_anchors in enumerate(percentage_are_anchors_list):
        for re_runs in range(num_re_runs):
            start = time()
            simulator = Simulator()

            # random.seed((i+1) * 5)

            simulator_results, algorithm_results = simulator.run(
                num_time_instances=num_time_instances,
                num_nodes=num_nodes,
                num_anchors=num_nodes * percentage_are_anchors,
                    ).draw()
                elif data["type"] == "line":
                    pyglet.shapes.Line(
                        x=data["x"],
                        y=data["y"],
                        x2=data["x2"],
                        y2=data["y2"],
                        color=data["color"],
                        width=1
                    ).draw()

    def assign_car_to_agent(self, agent):
        if len(self.available_cars) > 0:
            self.agents.append({
                "car_id": self.available_cars.pop(),
                "agent": agent
            })


if __name__ == '__main__':
    simulator = Simulator(mode="pyglet", cars_number=1, allow_human=True)
    window = GameSimulator(size=(600, 600), _simulator=simulator)
    keys = pyglet.window.key.KeyStateHandler()
    window.push_handlers(keys)

    trained_agent = TrainedAgent('models/test-1')
    window.assign_car_to_agent(agent=trained_agent)

    pyglet.clock.schedule_interval(window.run(), 1 / 160.0)
    pyglet.app.run()
예제 #16
0
파일: runner.py 프로젝트: donallmc/lbp_sim
          "'. Does file exist? And is json formatted correctly?")
    print("Error caused by: ")
    print(e)
    sys.exit()
factory = BuyerFactory()
buyers = factory.buildBuyers(buyer_config)

# the number of iterations is the number of times the pool should adjust weights. In this case it corresponds to 48 iterations, once per hour
iterations = 48

# will print info about each buy and the state of the pool at each iteration. Set to false for less noise
verbose = False

# set to True to plot the final price history (at hourly resolution). Requires matplotlib
show_chart = True

# most of these params are hopefully self-explanatory!
# The pool assumes prices will be listed in USD but you can treat is as anything.
# The init_balance_token is the number of tokens that are available for sale. This will reduce over time.
# The init_balance_usd is the amount of USD added to the pool initially. This will increase over time
# weight_delta is the change in weights (negative for the token, positive for usd)
lbp = LBP(weight_delta=0.9375,
          init_weight_usd=5,
          init_weight_token=95,
          init_balance_usd=650000,
          init_balance_token=13000000,
          verbose=verbose)

simulator = Simulator()
simulator.run(lbp, iterations, buyers, show_chart)
for trial in range(0, repeat_count):
    # Set one manually...
    #seed = "\x61\xcb\x82\x90"

    seed = os.urandom(4)
    print "random.seed() = 0x{}".format(binascii.hexlify(seed))
    random.seed(seed)

    # Set message printing level
    Simulator.EXTRA_VERBOSE = False
    Simulator.VERBOSE = False
    Channel.VERBOSE = False
    Node.EXTRA_VERBOSE = False
    Node.EXTRA_VERBOSE = False

    sim = Simulator()
    loss_rate = 0.60  # loss rate (0.0 to 1.0)
    min_delay = 0.000001  # 1 micro-second minimum delay
    mean_dealy = 0.000020  # 20 micro-second delay

    delay_generator = ExponentialDelay(min_delay, mean_dealy)

    alice_output = Channel(sim, delay_generator, loss_rate)
    alice = Node(sim, "ALICE", True, alice_output)

    bob_output = Channel(sim, delay_generator, loss_rate)
    bob = Node(sim, "BOB  ", False, bob_output)

    alice.set_peer(bob)
    bob.set_peer(alice)
예제 #18
0
 def run_for(self, ticks):
     s = Simulator(self.nodes)
     s.run(ticks)
예제 #19
0
def test01():
    print("Test 01")

    rDescr: RecommenderDescription = RecommenderDescription(
        RecommenderTheMostPopular, {})

    recommenderID: str = "TheMostPopular"
    pDescr: Portfolio1MethDescription = Portfolio1MethDescription(
        recommenderID.title(), recommenderID, rDescr)

    dataset: ADataset = DatasetML.readDatasets()

    history: AHistory = HistoryDF("test")
    p: APortfolio = pDescr.exportPortfolio("jobID", history)
    p.train(history, dataset)

    #    r, rwr = p.recommend(1, DataFrame(), {APortfolio.ARG_NUMBER_OF_AGGR_ITEMS:20})
    #    rItemID1 = r[0]
    #    rItemID2 = r[1]
    #    rItemID3 = r[2]
    #
    #    print(r)
    #    print("rItemID1: " + str(rItemID1))
    #    print("rItemID2: " + str(rItemID2))
    #    print("rItemID3: " + str(rItemID3))

    testRatingsDF: DataFrame = DataFrame(columns=[
        Ratings.COL_USERID, Ratings.COL_MOVIEID, Ratings.COL_RATING,
        Ratings.COL_TIMESTAMP
    ])
    timeStampI: int = 1000

    userID1: int = 1
    userID2: int = 2
    userID3: int = 3
    rItemID1: int = 9001
    rItemID2: int = 9002
    rItemID3: int = 9003
    # training part of dataset
    for i in [i + 0 for i in range(5 * 8)]:
        timeStampI = timeStampI + 1
        testRatingsDF.loc[i] = [userID1] + list([9000, 5, timeStampI])
    timeStampI = timeStampI + 1
    testRatingsDF.loc[len(testRatingsDF)] = [userID2] + list(
        [rItemID1, 5, timeStampI])
    timeStampI = timeStampI + 1
    testRatingsDF.loc[len(testRatingsDF)] = [userID2] + list(
        [rItemID2, 5, timeStampI])
    timeStampI = timeStampI + 1
    testRatingsDF.loc[len(testRatingsDF)] = [userID3] + list(
        [rItemID3, 5, timeStampI])
    timeStampI = timeStampI + 1
    testRatingsDF.loc[len(testRatingsDF)] = [userID2] + list(
        [rItemID2, 5, timeStampI])
    timeStampI = timeStampI + 1
    testRatingsDF.loc[len(testRatingsDF)] = [userID2] + list(
        [rItemID2, 5, timeStampI])

    # testing part of dataset
    userID11: int = 11
    userID12: int = 12
    timeStampI = timeStampI + 1
    testRatingsDF.loc[len(testRatingsDF)] = [userID11] + list(
        [rItemID1, 5, timeStampI])
    timeStampI = timeStampI + 1
    testRatingsDF.loc[len(testRatingsDF)] = [userID11] + list(
        [rItemID2, 5, timeStampI])
    timeStampI = timeStampI + 1
    testRatingsDF.loc[len(testRatingsDF)] = [userID11] + list(
        [rItemID3, 5, timeStampI])
    timeStampI = timeStampI + 1
    testRatingsDF.loc[len(testRatingsDF)] = [userID12] + list(
        [rItemID2, 5, timeStampI])
    timeStampI = timeStampI + 1
    testRatingsDF.loc[len(testRatingsDF)] = [userID11] + list(
        [rItemID2, 5, timeStampI])

    print("len(testRatingsDF): " + str(len(testRatingsDF)))
    print(testRatingsDF.head(20))
    print(testRatingsDF.tail(20))

    datasetMy: ADataset = DatasetML("", testRatingsDF, dataset.usersDF,
                                    dataset.itemsDF)

    behavioursDF: DataFrame = DataFrame(
        columns=[BehavioursML.COL_REPETITION, BehavioursML.COL_BEHAVIOUR])
    for ratingIndexI in range(len(testRatingsDF)):
        for repetitionI in range(5):
            behavioursDF.loc[ratingIndexI * 5 + repetitionI] = list(
                [repetitionI, [True] * 20])
    print(behavioursDF.head(20))

    argsSimulationDict: Dict[str, str] = {
        SimulationML.ARG_WINDOW_SIZE: 5,
        SimulationML.ARG_RECOM_REPETITION_COUNT: 1,
        SimulationML.ARG_NUMBER_OF_RECOMM_ITEMS: 100,
        SimulationML.ARG_NUMBER_OF_AGGR_ITEMS:
        InputSimulatorDefinition.numberOfAggrItems,
        SimulationML.ARG_DIV_DATASET_PERC_SIZE: 90,
        SimulationML.ARG_HISTORY_LENGTH: 10
    }

    # simulation of portfolio
    simulator: Simulator = Simulator("test", SimulationML, argsSimulationDict,
                                     datasetMy, behavioursDF)
    simulator.simulate([pDescr], [DataFrame()], [EToolDoNothing({})],
                       HistoryHierDF)
예제 #20
0
 def __init__(self, start_time, timestep, dispatch_policy, matching_policy):
     self.simulator = Simulator(start_time, timestep)
     self.agent = Agent(dispatch_policy, matching_policy)
     self.last_vehicle_id = 1
     self.vehicle_queue = []
예제 #21
0
    dt = datetime.datetime.now().strftime('%y%m%d_%H%M')
    scenes_ds = os.path.join(args.scenes, dt + '_' + args.name + '.hdf5')
    scenes_ds = h5py.File(scenes_ds, 'w')
    tp = h5py.special_dtype(vlen=bytes)
    scenes_ds.create_dataset('name', (len(obj_fns) * args.nscenes, ), dtype=tp)
    scenes_ds.create_dataset(
        'rgb', (len(obj_fns) * args.nscenes, args.height, args.width, 3),
        dtype='u8')
    scenes_ds.create_dataset(
        'depth', (len(obj_fns) * args.nscenes, args.height, args.width),
        dtype=np.float64)
    scenes_ds.create_dataset('scene', (len(obj_fns) * args.nscenes, ),
                             dtype=tp)

    sim = Simulator(gui=True, use_egl=False, debug=True)
    sim.cam.width = args.width
    sim.cam.height = args.height

    for var in vars(sim.cam).items():
        scenes_ds.attrs[var[0]] = var[1]

    for i, obj_fn in enumerate(obj_fns):
        scene_it = iter(range(args.nscenes))
        scene_n = next(scene_it, None)
        while scene_n is not None:
            print('Processing: {} {} '.format(scene_n, obj_fn))
            idx = i * args.nscenes + scene_n
            if not args.default_scale:
                scale = sim.read_scale(
                    obj_fn.split('/')[-1].replace('.obj', ''), args.models)
예제 #22
0
    def __label_single_maps(self, atlas_name, feature_list: List[str],
                            label_list: List[str],
                            single_feature_list: List[str],
                            single_label_list: List[str],
                            overwrite: bool) -> List[Dict[str, any]]:
        """
        Passed atlas name, feature list, label list, and returns res object with the map features labelled for training
        """
        if not atlas_name:
            return []

        if not overwrite and self.__services.resources.training_data_dir.exists(
                "training_" + atlas_name, ".pickle"):
            self.__services.debug.write(
                "Found in training data. Loading from training data",
                DebugLevel.BASIC)
            return self.__services.resources.training_data_dir.load(
                "training_" + atlas_name)

        self.__services.debug.write("Loading maps", DebugLevel.BASIC)
        maps: List[Map] = self.__services.resources.maps_dir.get_atlas(
            atlas_name).load_all()

        res: List[Dict[str, any]] = []

        progress_bar: Progress = self.__services.debug.progress_debug(
            len(maps), DebugLevel.BASIC)
        progress_bar.start()

        # process atlas
        for m in maps:
            config = Configuration()
            config.simulator_algorithm_type = AStar
            config.simulator_testing_type = AStarTesting
            config.simulator_initial_map = m
            services: Services = Services(config)
            simulator: Simulator = Simulator(services)
            testing: AStarTesting = simulator.start()

            features: Dict[str, any] = {}
            arg: str
            for arg in [
                    "map_obstacles_percentage", "goal_found",
                    "distance_to_goal", "original_distance_to_goal", "trace",
                    "total_steps", "total_distance",
                    "smoothness_of_trajectory", "total_time", "algorithm_type",
                    "fringe", "search_space"
            ]:
                features[arg] = testing.get_results()[arg]

            features["features"] = MapProcessing.get_sequential_features(
                testing.map, feature_list)
            features["labels"] = MapProcessing.get_sequential_labels(
                testing.map, label_list)
            features["single_features"] = MapProcessing.get_single_features(
                m, single_feature_list)
            features["single_labels"] = MapProcessing.get_single_labels(
                m, single_label_list)
            res.append(features)
            progress_bar.step()

        return res
예제 #23
0
    parser.add_argument("--min_delta", default=60, type=int)
    parser.add_argument("--n_state", default=1, type=int)
    parser.add_argument("--epsilon", default=0.1, type=float)
    parser.add_argument("--alpha", default=0.8, type=float)
    parser.add_argument("--gamma", default=0.8, type=float)
    parser.add_argument("--seed", default=2018, type=int)

    args = parser.parse_args()
    no = args.no

    #path = glob.glob(r"/Users/denism/work/train-schedule-optimisation-challenge-starter-kit/problem_instances/" + no + "*")[0]
    path = glob.glob(r"inputs/" + no + "*")[0]

    qtable = QTable()

    sim = Simulator(path=path, qtable=qtable)
    sim.trains = sim.trains
    sim.assign_limit()

    sim.wait_time = args.wait
    sim.max_delta = args.max_delta
    sim.min_delta = args.min_delta
    sim.n_state = args.n_state

    # dijkstra or ..
    sim.late_on_node = False
    sim.with_connections = True
    sim.backward = True

    qtable.epsilon = args.epsilon
    qtable.alpha = args.alpha  # learning rate
예제 #24
0
                                         batch_size=batch_size,
                                         proba_coeff=proba_coeff,
                                         train_data_size=train_data_size,
                                         mode=None)
 #continue
 ensembles = resnet(tf.Graph(), n_replicas, resnet_size)
 resnet20_names.append(name)
 sim = Simulator(model=None,
                 learning_rate=learning_rate,
                 noise_list=noise_list,
                 noise_type=noise_type,
                 batch_size=batch_size,
                 n_epochs=n_epochs,
                 name=name,
                 ensembles=ensembles,
                 burn_in_period=burn_in_period,
                 swap_step=swap_step,
                 separation_ratio=0,
                 n_simulations=n_simulations,
                 scheduled_noise=scheduled_noise,
                 test_step=test_step,
                 loss_func_name=loss_func_name,
                 proba_coeff=proba_coeff,
                 mode=None)
 os.system('clear')
 sim.train(train_data_size=train_data_size,
           train_data=x_train,
           train_labels=y_train,
           validation_data=x_valid,
           validation_labels=y_valid,
           test_data=x_test,
예제 #25
0
 def setUp(self):
     self.simulator = Simulator({'heroes': [], 'villains': []})
     self.actor = Actor({
         'weapons': ['Knife'],
         'tactics': 'thrust_attack'
     }, 'Heroes', self.simulator)
예제 #26
0
def run_simulation(series=False):
    sim = Simulator("config/simulator_config.yaml", series=series)
    sim.train()
    return sim.config.General.rootdir
예제 #27
0
    # Uncoment to debug a bunch of scenes by their name
    # name_filter = np.isin(scenes_ds['name'][:], ['3_2e228ee528f0a7054212ff51b27f0221', '0_1a4daa4904bb4a0949684e7f0bb99f9c', '1_1a4daa4904bb4a0949684e7f0bb99f9c', '2_1a4daa4904bb4a0949684e7f0bb99f9c', '3_1a4daa4904bb4a0949684e7f0bb99f9c', '4_1a4daa4904bb4a0949684e7f0bb99f9c', '1_2cc3904f7bfc8650ee25380b2e696b36', '0_1a0312faac503f7dc2c1a442b53fa053', '2_1a0312faac503f7dc2c1a442b53fa053', '4_1a0312faac503f7dc2c1a442b53fa053', '0_1f8a542e64756d349628684766da1bb4', '1_1f8a542e64756d349628684766da1bb4', '2_1f8a542e64756d349628684766da1bb4', '4_1f8a542e64756d349628684766da1bb4', '0_1f4e56064de606093e746e5f1700ce1a', '1_1f4e56064de606093e746e5f1700ce1a', '2_1f4e56064de606093e746e5f1700ce1a', '3_1f4e56064de606093e746e5f1700ce1a', '4_1f4e56064de606093e746e5f1700ce1a', '4_1d190c1bb38b29cb7a2fbdd8f7e098f4', '2_3ba7dd61736e7a96270c0e719fe4ed97', '0_1d4a469bdb53d3f77a3f900e0a6f2d83', '1_1d4a469bdb53d3f77a3f900e0a6f2d83', '2_1d4a469bdb53d3f77a3f900e0a6f2d83', '3_1d4a469bdb53d3f77a3f900e0a6f2d83', '4_1d4a469bdb53d3f77a3f900e0a6f2d83', '1_2d89d2b3b6749a9d99fbba385cc0d41d', '4_2d89d2b3b6749a9d99fbba385cc0d41d', '0_2f2f0e72a0088dd0f9b0754354ae88f5', '1_2f2f0e72a0088dd0f9b0754354ae88f5', '2_2f2f0e72a0088dd0f9b0754354ae88f5', '3_2f2f0e72a0088dd0f9b0754354ae88f5', '4_2f2f0e72a0088dd0f9b0754354ae88f5', '0_1be987c137d37f0b7c15f7bdb6fa82dd', '1_1be987c137d37f0b7c15f7bdb6fa82dd', '2_1be987c137d37f0b7c15f7bdb6fa82dd', '3_1be987c137d37f0b7c15f7bdb6fa82dd', '4_1be987c137d37f0b7c15f7bdb6fa82dd', '0_2daedbac8e1ee36f57467549cdfd9eb3', '2_2daedbac8e1ee36f57467549cdfd9eb3', '3_2daedbac8e1ee36f57467549cdfd9eb3', '4_2daedbac8e1ee36f57467549cdfd9eb3', '0_2c38b974e331ff14ec7d0aeaf786ab21', '2_2c38b974e331ff14ec7d0aeaf786ab21', '3_2c38b974e331ff14ec7d0aeaf786ab21', '4_2c38b974e331ff14ec7d0aeaf786ab21', '0_1e700065e92a072b39a22f83a4a90eb', '1_1e700065e92a072b39a22f83a4a90eb', '2_1e700065e92a072b39a22f83a4a90eb', '3_1e700065e92a072b39a22f83a4a90eb', '4_1e700065e92a072b39a22f83a4a90eb', '1_1cfc37465809382edfd1d17b67edb09', '3_1cfc37465809382edfd1d17b67edb09', '4_1cfc37465809382edfd1d17b67edb09', '1_1e227771ef66abdb4212ff51b27f0221', '1_1c5e5f1485ba5db1f879801ae14fa622', '2_1c5e5f1485ba5db1f879801ae14fa622', '3_1c5e5f1485ba5db1f879801ae14fa622', '4_1c5e5f1485ba5db1f879801ae14fa622', '4_1a0710af081df737c50a037462bade42', '2_1d9b04c979dfbddca84874d9f682ce6c', '3_1d9b04c979dfbddca84874d9f682ce6c', '4_1d9b04c979dfbddca84874d9f682ce6c', '0_3b947648cfb77c92dc6e31f308657eca', '1_3b947648cfb77c92dc6e31f308657eca', '2_3b947648cfb77c92dc6e31f308657eca', '3_3b947648cfb77c92dc6e31f308657eca', '4_3b947648cfb77c92dc6e31f308657eca', '3_1e23d88e517b711567ff608a5fbe6aa8'])
    # scenes = scenes[name_filter]

    # Uncomment to debug a range of scenes
    # scenes = scenes[4:9]

    # Uncomment to debug a particular scene by its index
    # scenes = [scenes[5]]

    # Hack to fix pybullet-pylab incompatibility on mac os
    if args.gui:
        import pylab as plt
        plt.figure()

    sim = Simulator(use_egl=False, gui=args.gui)  # Change to no gui
    sim.cam.pos = [
        0.,
        np.cos(np.deg2rad(args.angle)) * args.distance,
        np.sin(np.deg2rad(args.angle)) * args.distance
    ]
    sim.cam.width = args.cam_resolution
    sim.cam.height = args.cam_resolution
    sim.add_gripper(os.environ['GRIPPER_PATH'])

    onet = OrthoNet(model_fn=args.network)

    accuracy = 0

    _global_start = time.time()
    for scene_idx in range(len(scenes)):
예제 #28
0
import sys
import os
sys.path.insert(1, os.path.join(sys.path[0], '..'))

from simulator.action_history import ActionHistory
from simulator.plotter import Plotter
from simulator.simulator import Simulator
from EKF_known_correspondence import EKF_SLAM

import numpy as np

if __name__ == '__main__':
    dt = 1.0

    # load testcase
    s = Simulator('../testcase/map2.txt', '../testcase/robot1.txt')
    ah = ActionHistory(dt)
    ah.LoadFromFile('../testcase/action_history_loop.txt')

    # set up the SLAM algorithm
    Q = np.diag([0.1, 0.1, 0.1]) # process noise; (x, y, theta). should be over-estimates
    R = np.diag([0.2, 0.2, 0.01]) # measurement noise; (dist, bearing, signature),
    algo = EKF_SLAM()
    algo.Initialize(Q, R, s.world.GetFeatureCount())
    
    # test the SLAM algorithm with the simulator
    ah.Rewind()
    while (not ah.EOF()):
        t, ra = ah.GetNext()
        m = s.Update(t, ra)
        algo.Update(t, ra, m)
예제 #29
0
def train(params):
    """
    parameters set
    """
    NUM_NODES = params['number of nodes in the cluster']
    NUM_CONTAINERS = params['number of containers']
    env = LraClusterEnv(num_nodes=NUM_NODES)
    batch_size = params['batch_size']
    ckpt_path = "./checkpoint/" + params['path'] + "/model.ckpt"
    np_path = "./checkpoint/" + params['path'] + "/optimal_file_name.npz"
    nodes_per_group = int(params['nodes per group'])
    training_times_per_episode = 1  # TODO: if layers changes, training_times_per_episode should be modified
    sim = Simulator()
    """
    """
    n_actions = nodes_per_group  #: 3 nodes per group
    n_features = int(n_actions * env.NUM_APPS + env.NUM_APPS)  #: 3*9+1 = 28
    RL_1 = PolicyGradient(n_actions=n_actions,
                          n_features=n_features,
                          learning_rate=params['learning rate'],
                          suffix='1')
    """
    Training
    """
    start_time = time.time()
    highest_value_time = 0.0
    print("start time!")
    global_start_time = start_time
    observation_episode_1, action_episode_1, reward_episode_1 = [], [], []
    observation_optimal_1, action_optimal_1, reward_optimal_1 = [], [], []
    highest_tput = 0.1
    epoch_i = 0
    optimal_range = 1.02
    allocation_optimal = []

    def store_episode_1(observations, actions):
        observation_episode_1.append(observations)
        action_episode_1.append(actions)

    source_batch, index_data, embedding = batch_data(
    )  # index_data = [0,1,2,0,1,2]

    while epoch_i < params['epochs']:

        observation = env.reset().copy()  # (9,9)
        """
        Episode
        """
        for inter_episode_index in range(NUM_CONTAINERS):
            observation, mapping_index = handle_constraint(
                observation, NUM_NODES)
            assert len(mapping_index) > 0
            """
            first layer
            """
            observation_first_layer = np.empty([0, env.NUM_APPS], int)
            number_of_first_layer_nodes = int(NUM_NODES / nodes_per_group)  # 9
            for i in range(nodes_per_group):
                observation_new = np.sum(
                    observation[i * number_of_first_layer_nodes:(i + 1) *
                                number_of_first_layer_nodes],
                    0).reshape(1, -1)
                observation_first_layer = np.append(observation_first_layer,
                                                    observation_new, 0)
            observation_first_layer[:, index_data[inter_episode_index]] += 1
            observation_first_layer = np.append(
                observation_first_layer,
                embedding[index_data[inter_episode_index]]).reshape(1, -1)
            action_1, prob_weights = RL_1.choose_action(
                observation_first_layer)
            """
            final decision
            """
            final_decision = action_1 * number_of_first_layer_nodes  #+ action_2 * number_of_second_layer_nodes #+ action_3 * number_of_third_layer_nodes + action_4 * number_of_fourth_layer_nodes
            appid = index_data[inter_episode_index]
            observation_ = env.step(mapping_index[final_decision], appid)

            store_episode_1(observation_first_layer, action_1)
            observation = observation_.copy()  # (9,9)
        """
        After an entire allocation, calculate total throughput, reward
        """
        tput = env.get_tput_total_env() / NUM_CONTAINERS
        # print(tput)
        # external, record the tput values under use_external_knowledge, used to terminate use_external_knowledge
        RL_1.store_tput_per_episode(tput, epoch_i,
                                    time.time() - global_start_time)

        assert (np.sum(env.state, axis=1) <=
                params['container_limitation per node']).all()
        assert sum(sum(env.state)) == NUM_CONTAINERS

        reward_ratio = (tput - highest_tput)
        reward_episode_1 = [reward_ratio] * len(observation_episode_1)
        RL_1.store_training_samples_per_episode(observation_episode_1,
                                                action_episode_1,
                                                reward_episode_1)
        """
        check_tput_quality(tput)
        """

        if highest_tput < tput:
            highest_tput_original = highest_tput
            optimal_range_original = optimal_range
            highest_tput = tput
            highest_value_time = time.time() - start_time
            allocation_optimal = env.state
            observation_optimal_1, action_optimal_1, reward_optimal_1 = [], [], []
            observation_optimal_1.extend(observation_episode_1)
            action_optimal_1.extend(action_episode_1)
            reward_optimal_1.extend(reward_episode_1)

            optimal_range = min(
                1.02, highest_tput /
                (highest_tput_original / optimal_range_original))

        observation_episode_1, action_episode_1, reward_episode_1 = [], [], []
        """
        Each batch, RL.learn()
        """
        if (epoch_i % batch_size == 0) & (epoch_i > 1):
            RL_1.learn(epoch_i, 0.0, True)
        """
        checkpoint, per 1000 episodes
        """

        if (epoch_i % 500 == 0) & (epoch_i > 1):
            count_size = (len(reward_optimal_1) /
                          (NUM_CONTAINERS * training_times_per_episode))
            print("\n epoch: %d, highest tput: %f, times: %d" %
                  (epoch_i, highest_tput, count_size))
            print("spending time: %f" % (time.time() - global_start_time))
            RL_1.save_session(ckpt_path)
            np.savez(np_path,
                     tputs=np.array(RL_1.tput_persisit),
                     candidate=np.array(RL_1.episode),
                     time=np.array(RL_1.time_persisit),
                     highest_value_time=highest_value_time,
                     highest_tput=highest_tput,
                     allocation_optimal=allocation_optimal,
                     entropy=np.array(RL_1.entropy_persist))

        epoch_i += 1