Esempio n. 1
0
def puzzle_2(all_ingredients, all_allergens, inert_ingredients):
    possible_ingredients = list(
        set(all_ingredients.keys()) - inert_ingredients)
    possible_allergens = list(set(all_allergens.keys()))

    solver = z3.Solver()

    assignments = z3.IntVector('allergen', len(possible_allergens))
    for assignment in assignments:
        solver.add(0 <= assignment)
        solver.add(assignment < len(possible_allergens))
    solver.add(z3.Distinct(assignments))

    for ai, allergen in enumerate(possible_allergens):
        conditions = []
        for ii, ingredient in enumerate(possible_ingredients):
            if all_ingredients[ingredient] >= all_allergens[allergen]:
                conditions.append(assignments[ii] == ai)
        solver.add(z3.Or(conditions))

    solver.check()
    model = solver.model()

    matches = []
    for ii, _ in enumerate(assignments):
        matches.append(
            (possible_allergens[model.evaluate(assignments[ii]).as_long()],
             possible_ingredients[ii]))

    matches.sort()
    return (','.join(match[1] for match in matches))
Esempio n. 2
0
def part_2_z3():
    foods = get_foods()
    hypoallergenic = get_hypoallergenic_ingredients(foods)

    allergens = set()
    ingredients = set()
    foods_by_allergen = collections.defaultdict(set)
    foods_by_ingredient = collections.defaultdict(set)

    for food in foods:
        for allergen in food.allergens:
            foods_by_allergen[allergen].add(food)
            allergens.add(allergen)

        for ingredient in food.ingredients:
            foods_by_ingredient[ingredient].add(food)
            ingredients.add(ingredient)

    allergens = list(allergens)
    ingredients = list(ingredients - hypoallergenic)

    # List of variables representing possible assignment of ingredient to allergen
    assignments = z3.IntVector('assignment', len(ingredients))
    solver = z3.Solver()

    for assignment in assignments:
        solver.add(0 <= assignment)
        solver.add(assignment < len(allergens))

    solver.add(z3.Distinct(assignments))

    for i, allergen in enumerate(allergens):
        candidates = []

        for j, ingredient in enumerate(ingredients):
            # If set of foods that we know contain allergen_i is a subset of foods containing ingredient_j,
            # then ingredient_j = allergen_i is a possible assignment
            if foods_by_allergen[allergen] <= foods_by_ingredient[ingredient]:
                candidates.append(assignments[j] == i)

        solver.add(z3.Or(candidates))

    assert solver.check() == z3.sat

    model = solver.model()
    matches = []

    for i, assignment in enumerate(assignments):
        assignment = model.evaluate(assignment).as_long()
        matches.append((allergens[assignment], ingredients[i]))

    print(','.join(ingredient for _, ingredient in sorted(matches)))
Esempio n. 3
0
def valid_numbers_z3(optimize_method):
    digits = z3.IntVector('digits', 14)
    optimizer = z3.Optimize()
    number = 0

    for i in range(14):
        optimizer.add(1 <= digits[i], digits[i] <= 9)
        number = 10 * number + digits[i]

    for (i, j), delta in RULES.items():
        optimizer.add(digits[i] - digits[j] == delta)

    getattr(optimizer, optimize_method)(number)
    assert optimizer.check() == z3.sat
    return optimizer.model().eval(number)
Esempio n. 4
0
# •Ten pallets of crottles, each of weight 2500 kg.

# •Twenty pallets of dupples, each of weight 200 kg.

# Skipples need to be cooled; only three of the eight trucks have the facility for cooling skipples.

# Nuzzles are very valuable; to distribute the risk of loss no two pallets of nuzzles may be in the same truck.

# Investigate what is the maximum number of pallets of prittles that can be delivered.

# (Hint: if you do not use the maximize command, you may run the tool several times and do a binary search to find the right value)

# from z3 import IntVector, Sum, Solver
import z3

N = z3.IntVector('N', 8) # Number of Nuzzle pallets on each truck
P = z3.IntVector('P', 8) # Number of Prittle pallets on each truck
S = z3.IntVector('S', 3) # Number of Skipple pallets on each refridgerated truck
C = z3.IntVector('C', 8) # Number of Crottle pallets on each truck
D = z3.IntVector('D', 8) # Number of Dupple pallets on each truck

# Weight restrictions for refridgerated trucks
weight_restriction = [ 800*N[i] + 1100*P[i] + 1000*S[i] + 2500*C[i] + 200*D[i] <= 8000 for i in range(3) ]
# Weight restrictions for non-refridgerated trucks
weight_restriction += [ 800*N[i] + 1100*P[i] + 2500*C[i] + 200*D[i] <= 8000 for i in range(3, 8) ]

# Pallet restrictions for refridgerated trucks
pallet_restriction = [ N[i] + P[i] + S[i] + C[i] + D[i] <= 8 for i in range(3) ]
pallet_restriction += [ N[i] + P[i] + C[i] + D[i] <= 8 for i in range(3,8) ]

# Only one Nuzzle pallet on each truck
Esempio n. 5
0
# Question 1
# Consider the following program:
# a := 1; b := 1;
# for i := 1 to 10 do
# if ? then {a := a+2b; b := b+i} else {b := a+b; a := a+i};
# if b = 600+n then crash
# Here '?' is an unknown test that may yield false or true in any situation.
# Establish for which values of n = 1,2...,10 it is safe, that is, will not reach 'crash'.

import z3

A = z3.IntVector('A', 11)
B = z3.IntVector('B', 11)

Q = z3.BoolVector('?', 11)  # Q for question mark, note Q[0] is not used at all

# Initial conditions
init_conds = [A[0] == 1, B[0] == 1]

# Iteration conditions
iter_conds = []
for i in range(1, 11):
    iter_conds += [A[i] == z3.If(Q[i], A[i - 1] + 2 * B[i - 1], A[i - 1] + i)]
    iter_conds += [B[i] == z3.If(Q[i], B[i - 1] + i, A[i - 1] + B[i - 1])]

conditions = init_conds + iter_conds

solver = z3.Solver()
solver.add(conditions)
solver.push()
def train(params):

    """
    parameters set
    """
    NUM_NODES = params['number of nodes in the cluster']
    node_limit_sum = 120
    node_limit_coex = 20
    NUM_APPS = 7

    batch_size = params['batch_size']
    ckpt_path_1 = "./checkpoint/" + params['path'] + "1/model.ckpt"
    ckpt_path_2 = "./checkpoint/" + params['path'] + "2/model.ckpt"
    ckpt_path_3 = "./checkpoint/" + params['path'] + "3/model.ckpt"
    make_path(params['path'] + "1")
    make_path(params['path'] + "2")
    make_path(params['path'] + "3")

    ckpt_path_recover_1 = "../results/cpo/newhypernode/" + params['path_recover'] + "1/model.ckpt"
    ckpt_path_recover_2 = "../results/cpo/newhypernode/" + params['path_recover'] + "2/model.ckpt"
    ckpt_path_recover_3 = "../results/cpo/newhypernode/" + params['path_recover'] + "3/model.ckpt"

    env = LraClusterEnv(num_nodes=NUM_NODES)

    np_path = "./checkpoint/" + params['path'] + "/optimal_file_name.npz"
    Recover = params['recover']
    nodes_per_group = int(params['nodes per group'])
    replay_size = params['replay size']
    training_times_per_episode = 1  # TODO: if layers changes, training_times_per_episode should be modified
    safety_requirement = 0.05#40
    ifUseExternal = True

    """
    Build Network
    """
    n_actions = nodes_per_group  #: 3 nodes per group
    n_features = int(n_actions * (env.NUM_APPS + 1 + env.NUM_APPS )+ 1 + env.NUM_APPS)  #: 3*9+1 = 28
    RL_1 = PolicyGradient(
        n_actions=n_actions,
        n_features=n_features,
        learning_rate=params['learning rate'],
        suffix='1b',
        safety_requirement=safety_requirement)

    RL_2 = PolicyGradient(
        n_actions=n_actions,
        n_features=n_features,
        learning_rate=params['learning rate'],
        suffix='2b',
        safety_requirement=safety_requirement)

    RL_3 = PolicyGradient(
        n_actions=n_actions,
        n_features=n_features,
        learning_rate=params['learning rate'],
        suffix='3b',
        safety_requirement=safety_requirement)

    # sim = Simulator()

    """
    Training
    """
    start_time = time.time()
    global_start_time = start_time
    number_optimal = []
    observation_episode_1, action_episode_1, reward_episode_1, safety_episode_1 = [], [], [], []
    observation_optimal_1, action_optimal_1, reward_optimal_1, safety_optimal_1 = [], [], [], []

    observation_episode_2, action_episode_2, reward_episode_2, safety_episode_2 = [], [], [], []
    observation_optimal_2, action_optimal_2, reward_optimal_2, safety_optimal_2 = [], [], [], []

    observation_episode_3, action_episode_3, reward_episode_3, safety_episode_3 = [], [], [], []
    observation_optimal_3, action_optimal_3, reward_optimal_3, safety_optimal_3 = [], [], [], []

    epoch_i = 0

    thre_entropy = 0.001
    # TODO: delete this range

    names = locals()
    for i in range(7):
        names['x' + str(i)] = z3.IntVector('x' + str(i), 3)
    for i in range(0, 10):
        names['highest_tput_' + str(i)] = 0

        names['observation_optimal_1_' + str(i)] = []
        names['action_optimal_1_' + str(i)] = []
        names['observation_optimal_2_' + str(i)] = []
        names['action_optimal_2_' + str(i)] = []
        names['observation_optimal_3_' + str(i)] = []
        names['action_optimal_3_' + str(i)] = []

        names['reward_optimal_1_' + str(i)] = []
        names['reward_optimal_2_' + str(i)] = []
        names['reward_optimal_3_' + str(i)] = []
        names['safety_optimal_1_' + str(i)] = []
        names['safety_optimal_2_' + str(i)] = []
        names['safety_optimal_3_' + str(i)] = []

        names['number_optimal_' + str(i)] = []
        names['optimal_range_' + str(i)] = 1.05

    def store_episode_1(observations, actions):
        observation_episode_1.append(observations)
        action_episode_1.append(actions)

    def store_episode_2(observations, actions):
        observation_episode_2.append(observations)
        action_episode_2.append(actions)

    def store_episode_3(observations, actions):
        observation_episode_3.append(observations)
        action_episode_3.append(actions)

    def handle_constraint(observation_now, appid_now):

        observation_original = observation_now.copy()

        mapping_index = []
        list_check = []

        t2 = time.time()
        for place in range(3):
            s.push()
            s.add(names['x' + str(appid_now)][place] >= int(observation_now[place][appid_now]) + 1)

            if s.check() == z3.sat:
                list_check.append(False)
            else:
                list_check.append(True)
            s.pop()

        t3 = time.time()
        # print("formulate: ", t2 - t1)
        # print("calculate: ", t3 - t2)
        good_index = np.where(np.array(list_check) == False)[0]
        length = len(good_index)
        if length < 1:
            test = 1
        index_replace = 0
        for node in range(3):
            if list_check[node]:  # bad node
                # index_this_replace = good_index[np.random.randint(length)]
                index_this_replace = good_index[index_replace % length]
                index_replace += 1
                observation_original[node] = observation[index_this_replace]
                mapping_index.append(index_this_replace)
            else:
                mapping_index.append(node)
                observation_original[node] = observation[node]

        return observation_original, mapping_index

    source_batch_a, index_data_a = batch_data()  # index_data = [0,1,2,0,1,2]

    while epoch_i < params['epochs']:
        if Recover:
            RL_1.restore_session(ckpt_path_recover_1)
            RL_2.restore_session(ckpt_path_recover_2)
            RL_3.restore_session(ckpt_path_recover_3)
            Recover = False

        tput_origimal_class = 0
        source_batch_ = source_batch_a.copy()
        index_data = index_data_a.copy()
        NUM_CONTAINERS = sum(source_batch_)
        observation = np.zeros([NUM_NODES, NUM_APPS]).copy()  # (9,9)
        source_batch = source_batch_.copy()

        """
        Episode
        """
        """
        first layer
        """
        total = source_batch
        limit = (1 * 9 * 27)
        capicity = (8 * 9 * 27)  # 3
        s = Solver()
        # app sum == batch
        for i in range(7):
            s.add(z3.Sum(names['x' + str(i)]) == int(total[i]))
        # node capacity
        for node in range(3):
            s.add(z3.Sum([names['x' + str(i)][node] for i in range(7)]) <= int(capicity))
        # >=0
        for i in range(7):
            for node in range(3):
                s.add(names['x' + str(i)][node] >= 0)
        # per app spread
        for i in range(7):
            for node in range(3):
                s.add(names['x' + str(i)][node] <= limit)
        # App1 and App2 not exist
        for node in range(3):
            s.add(names['x' + str(1)][node] + names['x' + str(2)][node] <= limit)

        source_batch_first = source_batch_.copy()
        observation_first_layer = np.zeros([nodes_per_group, env.NUM_APPS], int)
        for inter_episode_index in range(NUM_CONTAINERS):

            appid = index_data[inter_episode_index]
            observation_first_layer_copy, mapping_index = handle_constraint(observation_first_layer, appid)
            assert len(mapping_index) > 0
            source_batch_first[appid] -= 1
            # observation_first_layer_copy = observation_first_layer.copy()
            observation_first_layer_copy[:, appid] += 1

            observation_first_layer_copy = np.append(observation_first_layer_copy, observation_first_layer_copy > 9 * node_limit_coex, axis=1)
            observation_first_layer_copy = np.append(observation_first_layer_copy, observation_first_layer_copy.sum(axis=1).reshape(nodes_per_group, 1), axis=1)
            # observation_first_layer_copy = np.append(observation_first_layer_copy, ((observation_first_layer_copy[:, 2] > 0) * (observation_first_layer_copy[:, 3] > 0)).reshape(nodes_per_group, 1), axis=1)
            observation_first_layer_copy = np.array(observation_first_layer_copy).reshape(1, -1)
            observation_first_layer_copy = np.append(observation_first_layer_copy, appid).reshape(1, -1)
            observation_first_layer_copy = np.append(observation_first_layer_copy, np.array(source_batch_first)).reshape(1, -1)
            if ifUseExternal:
                action_1 = inter_episode_index % 3
                prob_weights = []
            else:
                action_1, prob_weights = RL_1.choose_action(observation_first_layer_copy.copy())

            decision = mapping_index[action_1]
            observation_first_layer[decision, appid] += 1
            s.add(names['x' + str(appid)][decision] >= int(observation_first_layer[decision][appid]))

            store_episode_1(observation_first_layer_copy, action_1)
        assert (np.sum(observation_first_layer, axis=1) <= params['container_limitation per node'] * 9).all()
        assert sum(sum(observation_first_layer)) == NUM_CONTAINERS

        """
        second layer
        """
        observation_second_layer_aggregation = np.empty([0, env.NUM_APPS], int)  # 9*20

        number_cont_second_layer = []

        for second_layer_index in range(nodes_per_group):

            rnd_array = observation_first_layer[second_layer_index].copy()
            total = rnd_array
            limit = (1 * 3 *27)
            capicity = (8 * 3*27)  # 3
            s = Solver()
            # app sum == batch
            for i in range(7):
                s.add(z3.Sum(names['x' + str(i)]) == int(total[i]))
            # node capacity
            for node in range(3):
                s.add(z3.Sum([names['x' + str(i)][node] for i in range(7)]) <= int(capicity))
            # >=0
            for i in range(7):
                for node in range(3):
                    s.add(names['x' + str(i)][node] >= 0)
            # per app spread
            for i in range(7):
                for node in range(3):
                    s.add(names['x' + str(i)][node] <= limit)
            # App1 and App2 not exist
            for node in range(3):
                s.add(names['x' + str(1)][node] + names['x' + str(2)][node] <= limit)

            source_batch_second, index_data = batch_data_sub(rnd_array)

            observation_second_layer = np.zeros([nodes_per_group, env.NUM_APPS], int)

            NUM_CONTAINERS_second = sum(source_batch_second)

            number_cont_second_layer.append(NUM_CONTAINERS_second)

            for inter_episode_index in range(NUM_CONTAINERS_second):

                appid = index_data[inter_episode_index]
                observation_second_layer_copy, mapping_index = handle_constraint(observation_second_layer, appid)
                assert len(mapping_index) > 0
                source_batch_second[appid] -= 1
                # observation_second_layer_copy = observation_second_layer.copy()
                observation_second_layer_copy[:, appid] += 1

                observation_second_layer_copy = np.append(observation_second_layer_copy, observation_second_layer_copy > 3 * node_limit_coex, axis=1)
                observation_second_layer_copy = np.append(observation_second_layer_copy, observation_second_layer_copy.sum(axis=1).reshape(nodes_per_group, 1), axis=1)
                # observation_second_layer_copy = np.append(observation_second_layer_copy, ((observation_second_layer_copy[:, 2] > 0) * (observation_second_layer_copy[:, 3] > 0)).reshape(nodes_per_group, 1), axis=1)
                observation_second_layer_copy = np.array(observation_second_layer_copy).reshape(1, -1)
                observation_second_layer_copy = np.append(observation_second_layer_copy, appid).reshape(1, -1)
                observation_second_layer_copy = np.append(observation_second_layer_copy, np.array(source_batch_second)).reshape(1, -1)
                if ifUseExternal:
                    action_2 = inter_episode_index % 3
                    prob_weights = []
                else:
                    action_2, prob_weights = RL_2.choose_action(observation_second_layer_copy.copy())

                decision = mapping_index[action_2]
                observation_second_layer[decision, appid] += 1
                s.add(names['x' + str(appid)][decision] >= int(observation_second_layer[decision][appid]))
                store_episode_2(observation_second_layer_copy, action_2)
            assert (np.sum(observation_second_layer, axis=1) <= params['container_limitation per node'] * 3).all()
            assert sum(sum(observation_second_layer)) == NUM_CONTAINERS_second
            observation_second_layer_aggregation = np.append(observation_second_layer_aggregation, observation_second_layer, 0)

        """
        third layer
        """
        observation_third_layer_aggregation = np.empty([0, env.NUM_APPS], int)  # 9*20
        number_cont_third_layer = []

        for third_layer_index in range(nodes_per_group * nodes_per_group):

            rnd_array = observation_second_layer_aggregation[third_layer_index].copy()
            total = rnd_array
            limit = (1 * 1 *27)
            capicity = 8 *27
            s = Solver()
            # app sum == batch
            for i in range(7):
                s.add(z3.Sum(names['x' + str(i)]) == int(total[i]))
            # node capacity
            for node in range(3):
                s.add(z3.Sum([names['x' + str(i)][node] for i in range(7)]) <= int(capicity))
            # >=0
            for i in range(7):
                for node in range(3):
                    s.add(names['x' + str(i)][node] >= 0)
            # per app spread
            for i in range(7):
                for node in range(3):
                    s.add(names['x' + str(i)][node] <= limit)
            # App1 and App2 not exist
            for node in range(3):
                s.add(names['x' + str(1)][node] + names['x' + str(2)][node] <= limit)

            source_batch_third, index_data = batch_data_sub(rnd_array)

            observation_third_layer = np.zeros([nodes_per_group, env.NUM_APPS], int)

            NUM_CONTAINERS_third = sum(source_batch_third)
            number_cont_third_layer.append(NUM_CONTAINERS_third)

            for inter_episode_index in range(NUM_CONTAINERS_third):
                appid = index_data[inter_episode_index]
                observation_third_layer_copy, mapping_index = handle_constraint(observation_third_layer, appid)
                assert len(mapping_index) > 0
                source_batch_third[appid] -= 1
                # observation_third_layer_copy = observation_third_layer.copy()
                observation_third_layer_copy[:, appid] += 1

                observation_third_layer_copy = np.append(observation_third_layer_copy, observation_third_layer_copy > 1 * node_limit_coex, axis=1)
                observation_third_layer_copy = np.append(observation_third_layer_copy, observation_third_layer_copy.sum(axis=1).reshape(nodes_per_group, 1), axis=1)
                # observation_third_layer_copy = np.append(observation_third_layer_copy, ((observation_third_layer_copy[:, 2] > 0) * (observation_third_layer_copy[:, 3] > 0)).reshape(nodes_per_group, 1), axis=1)
                observation_third_layer_copy = np.array(observation_third_layer_copy).reshape(1, -1)
                observation_third_layer_copy = np.append(observation_third_layer_copy, appid).reshape(1, -1)
                observation_third_layer_copy = np.append(observation_third_layer_copy, np.array(source_batch_third)).reshape(1, -1)

                if ifUseExternal:
                    action_3 = inter_episode_index % 3
                    prob_weights = []
                else:

                    action_3, prob_weights = RL_3.choose_action(observation_third_layer_copy.copy())

                decision = mapping_index[action_3]
                observation_third_layer[decision, appid] += 1
                s.add(names['x' + str(appid)][decision] >= int(observation_third_layer[decision][appid]))

                store_episode_3(observation_third_layer_copy, action_3)

            observation_third_layer_aggregation = np.append(observation_third_layer_aggregation, observation_third_layer, 0)
            assert (np.sum(observation_third_layer, axis=1) <= params['container_limitation per node'] * 1).all()
            assert sum(sum(observation_third_layer)) == NUM_CONTAINERS_third
        """
        After an entire allocation, calculate total throughput, reward
        """
        env.state = observation_third_layer_aggregation.copy()
        assert sum(sum(env.state)) == NUM_CONTAINERS
        assert (env.state.sum(0) == source_batch_).all()

        total_tput, list_check_sum, list_check_coex, list_check_per_app, list_check = env.get_tput_total_env()

        tput = total_tput/NUM_CONTAINERS
        list_check = 1.0 * list_check / NUM_CONTAINERS
        reward_ratio = tput

        list_check_ratio = list_check

        list_check_layer_one = 0
        list_check_layer_one_ratio = list_check_layer_one

        safety_episode_1 = [list_check_ratio+ list_check_layer_one_ratio * 1.0] * len(observation_episode_1)
        reward_episode_1 = [reward_ratio * 1.0] * len(observation_episode_1)

        safety_episode_2 = [list_check_ratio * 1.0] * len(observation_episode_2)
        reward_episode_2 = [reward_ratio * 1.0] * len(observation_episode_2)

        safety_episode_3 = [list_check_ratio * 1.0] * len(observation_episode_3)
        reward_episode_3 = [reward_ratio * 1.0] * len(observation_episode_3)


        RL_1.store_tput_per_episode(tput, epoch_i, list_check+list_check_layer_one, list_check_per_app, list_check_coex, list_check_sum)
        RL_2.store_tput_per_episode(tput, epoch_i, list_check+list_check_layer_one, list_check_per_app, list_check_coex, list_check_sum)
        RL_3.store_tput_per_episode(tput, epoch_i, list_check+list_check_layer_one, list_check_per_app, list_check_coex, list_check_sum)


        RL_1.store_training_samples_per_episode(observation_episode_1, action_episode_1, reward_episode_1, safety_episode_1)
        RL_2.store_training_samples_per_episode(observation_episode_2, action_episode_2, reward_episode_2, safety_episode_2)
        RL_3.store_training_samples_per_episode(observation_episode_3, action_episode_3, reward_episode_3, safety_episode_3)

        """
        check_tput_quality(tput)
        """
        if list_check <= safety_requirement:
            if names['highest_tput_' + str(tput_origimal_class)] < tput:
                names['highest_tput_' + str(tput_origimal_class)] = tput

                names['observation_optimal_1_' + str(tput_origimal_class)], names['action_optimal_1_' + str(tput_origimal_class)], names['observation_optimal_2_' + str(tput_origimal_class)], names['action_optimal_2_' + str(tput_origimal_class)],\
                names['reward_optimal_1_' + str(tput_origimal_class)],names['reward_optimal_2_' + str(tput_origimal_class)],names['reward_optimal_3_' + str(tput_origimal_class)], \
                names['number_optimal_' + str(tput_origimal_class)],\
                names['safety_optimal_1_' + str(tput_origimal_class)],names['safety_optimal_2_' + str(tput_origimal_class)],names['safety_optimal_3_' + str(tput_origimal_class)]\
                    = [], [], [], [], [], [], [], [], [], [], []
                names['observation_optimal_3_' + str(tput_origimal_class)], names['action_optimal_3_' + str(tput_origimal_class)] = [], []

                names['observation_optimal_1_' + str(tput_origimal_class)].extend(observation_episode_1)
                names['action_optimal_1_' + str(tput_origimal_class)].extend(action_episode_1)
                names['observation_optimal_2_' + str(tput_origimal_class)].extend(observation_episode_2)
                names['action_optimal_2_' + str(tput_origimal_class)].extend(action_episode_2)
                names['observation_optimal_3_' + str(tput_origimal_class)].extend(observation_episode_3)
                names['action_optimal_3_' + str(tput_origimal_class)].extend(action_episode_3)

                names['number_optimal_' + str(tput_origimal_class)].append(NUM_CONTAINERS)

                names['safety_optimal_1_' + str(tput_origimal_class)].extend(safety_episode_1)
                names['safety_optimal_2_' + str(tput_origimal_class)].extend(safety_episode_2)
                names['safety_optimal_3_' + str(tput_origimal_class)].extend(safety_episode_3)
                names['reward_optimal_1_' + str(tput_origimal_class)].extend(reward_episode_1)
                names['reward_optimal_2_' + str(tput_origimal_class)].extend(reward_episode_2)
                names['reward_optimal_3_' + str(tput_origimal_class)].extend(reward_episode_3)

                names['optimal_range_' + str(tput_origimal_class)] = 1.05

            elif names['highest_tput_' + str(tput_origimal_class)] < tput * names['optimal_range_' + str(tput_origimal_class)]:
                names['observation_optimal_1_' + str(tput_origimal_class)].extend(observation_episode_1)
                names['action_optimal_1_' + str(tput_origimal_class)].extend(action_episode_1)
                names['observation_optimal_2_' + str(tput_origimal_class)].extend(observation_episode_2)
                names['action_optimal_2_' + str(tput_origimal_class)].extend(action_episode_2)
                names['observation_optimal_3_' + str(tput_origimal_class)].extend(observation_episode_3)
                names['action_optimal_3_' + str(tput_origimal_class)].extend(action_episode_3)

                names['number_optimal_' + str(tput_origimal_class)].append(NUM_CONTAINERS)

                names['safety_optimal_1_' + str(tput_origimal_class)].extend(safety_episode_1)
                names['safety_optimal_2_' + str(tput_origimal_class)].extend(safety_episode_2)
                names['safety_optimal_3_' + str(tput_origimal_class)].extend(safety_episode_3)
                names['reward_optimal_1_' + str(tput_origimal_class)].extend(reward_episode_1)
                names['reward_optimal_2_' + str(tput_origimal_class)].extend(reward_episode_2)
                names['reward_optimal_3_' + str(tput_origimal_class)].extend(reward_episode_3)

        observation_episode_1, action_episode_1, reward_episode_1, safety_episode_1 = [], [], [], []
        observation_episode_2, action_episode_2, reward_episode_2, safety_episode_2 = [], [], [], []
        observation_episode_3, action_episode_3, reward_episode_3, safety_episode_3 = [], [], [], []

        """
        Each batch, RL.learn()
        """
        if (epoch_i % batch_size == 0) & (epoch_i > 1):
            for replay_class in range(0,10):

                number_optimal = names['number_optimal_' + str(replay_class)]

                reward_optimal_1 = names['reward_optimal_1_' + str(replay_class)]
                reward_optimal_2 = names['reward_optimal_2_' + str(replay_class)]
                reward_optimal_3 = names['reward_optimal_3_' + str(replay_class)]
                safety_optimal_1 = names['safety_optimal_1_' + str(replay_class)]
                safety_optimal_2 = names['safety_optimal_2_' + str(replay_class)]
                safety_optimal_3 = names['safety_optimal_3_' + str(replay_class)]

                observation_optimal_1 = names['observation_optimal_1_' + str(replay_class)]
                action_optimal_1 = names['action_optimal_1_' + str(replay_class)]
                observation_optimal_2 = names['observation_optimal_2_' + str(replay_class)]
                action_optimal_2 = names['action_optimal_2_' + str(replay_class)]
                observation_optimal_3 = names['observation_optimal_3_' + str(replay_class)]
                action_optimal_3 = names['action_optimal_3_' + str(replay_class)]


                buffer_size = int(len(number_optimal))

                if buffer_size < replay_size:
                    # TODO: if layers changes, training_times_per_episode should be modified
                    RL_1.ep_obs.extend(observation_optimal_1)
                    RL_1.ep_as.extend(action_optimal_1)
                    RL_1.ep_rs.extend(reward_optimal_1)
                    RL_1.ep_ss.extend(safety_optimal_1)

                    RL_2.ep_obs.extend(observation_optimal_2)
                    RL_2.ep_as.extend(action_optimal_2)
                    RL_2.ep_rs.extend(reward_optimal_2)
                    RL_2.ep_ss.extend(safety_optimal_2)

                    RL_3.ep_obs.extend(observation_optimal_3)
                    RL_3.ep_as.extend(action_optimal_3)
                    RL_3.ep_rs.extend(reward_optimal_3)
                    RL_3.ep_ss.extend(safety_optimal_3)

                else:
                    replay_index = np.random.choice(range(buffer_size), size=replay_size, replace=False)
                    for replay_id in range(replay_size):
                        replace_start = replay_index[replay_id]
                        start_location = sum(number_optimal[:replace_start])
                        stop_location = sum(number_optimal[:replace_start+1])
                        RL_1.ep_obs.extend(observation_optimal_1[start_location: stop_location])
                        RL_1.ep_as.extend(action_optimal_1[start_location: stop_location])
                        RL_1.ep_rs.extend(reward_optimal_1[start_location: stop_location])
                        RL_1.ep_ss.extend(safety_optimal_1[start_location: stop_location])

                        RL_2.ep_obs.extend(observation_optimal_2[start_location: stop_location])
                        RL_2.ep_as.extend(action_optimal_2[start_location: stop_location])
                        RL_2.ep_rs.extend(reward_optimal_2[start_location: stop_location])
                        RL_2.ep_ss.extend(safety_optimal_2[start_location: stop_location])

                        RL_3.ep_obs.extend(observation_optimal_3[start_location: stop_location])
                        RL_3.ep_as.extend(action_optimal_3[start_location: stop_location])
                        RL_3.ep_rs.extend(reward_optimal_3[start_location: stop_location])
                        RL_3.ep_ss.extend(safety_optimal_3[start_location: stop_location])
            #
            RL_1.learn(epoch_i, thre_entropy, Ifprint=True)
            RL_2.learn(epoch_i, thre_entropy)
            optim_case = RL_3.learn(epoch_i, thre_entropy)

        """
        checkpoint, per 1000 episodes
        """
        if (epoch_i % 200 == 0) & (epoch_i > 1):
            for class_replay in range(0,10):
                highest_value = names['highest_tput_' + str(class_replay)]
                print("\n epoch: %d, highest tput: %f" % (epoch_i, highest_value))

                # lowest_vio_ = names['lowest_vio_' + str(class_replay)]
                # print("\n epoch: %d, lowest_vio: %f" % (epoch_i, lowest_vio_))

            RL_1.save_session(ckpt_path_1)
            RL_2.save_session(ckpt_path_2)
            RL_3.save_session(ckpt_path_3)
            np.savez(np_path, tputs=np.array(RL_1.tput_persisit), candidate=np.array(RL_1.episode), vio_persis=np.array(RL_1.safe_persisit))
            print("epoch:", epoch_i, "mean(sum): ", np.mean(RL_1.sum_persisit), "mean(coex): ", np.mean(RL_1.coex_persisit))
            """
            optimal range adaptively change
            """
            for class_replay in range(0, 10):
                number_optimal = names['number_optimal_' + str(class_replay)]
                count_size = int(len(number_optimal))

                if (count_size > 100):
                    names['optimal_range_' + str(class_replay)] *= 0.99
                    names['optimal_range_' + str(class_replay)] = max(names['optimal_range_' + str(class_replay)], 1.01)

                    start_location = sum(names['number_optimal_' + str(class_replay)][:-10]) * training_times_per_episode

                    names['observation_optimal_1_' + str(class_replay)] = names['observation_optimal_1_' + str(class_replay)][start_location:]
                    names['action_optimal_1_' + str(class_replay)] = names['action_optimal_1_' + str(class_replay)][start_location:]

                    names['observation_optimal_2_' + str(class_replay)] = names['observation_optimal_2_' + str(class_replay)][start_location:]
                    names['action_optimal_2_' + str(class_replay)] = names['action_optimal_2_' + str(class_replay)][start_location:]

                    names['observation_optimal_3_' + str(class_replay)] = names['observation_optimal_3_' + str(class_replay)][start_location:]
                    names['action_optimal_3_' + str(class_replay)] = names['action_optimal_3_' + str(class_replay)][start_location:]

                    names['number_optimal_' + str(class_replay)] = names['number_optimal_' + str(class_replay)][-10:]

                    names['safety_optimal_1_' + str(class_replay)] = names['safety_optimal_1_' + str(class_replay)][start_location:]
                    names['safety_optimal_2_' + str(class_replay)] = names['safety_optimal_2_' + str(class_replay)][start_location:]
                    names['safety_optimal_3_' + str(class_replay)] = names['safety_optimal_3_' + str(class_replay)][start_location:]
                    names['reward_optimal_1_' + str(class_replay)] = names['reward_optimal_1_' + str(class_replay)][start_location:]
                    names['reward_optimal_2_' + str(class_replay)] = names['reward_optimal_2_' + str(class_replay)][start_location:]
                    names['reward_optimal_3_' + str(class_replay)] = names['reward_optimal_3_' + str(class_replay)][start_location:]

                print("optimal_range:", names['optimal_range_' + str(class_replay)])

            print(prob_weights)
            if optim_case > 0:
                thre_entropy *= 0.5
            thre_entropy = max(thre_entropy, 0.001)

        epoch_i += 1
        if epoch_i>30:
            ifUseExternal = False
	bitsCount = tp.itemsize * 8
	return [z3.BitVec(name + "__" + str(i), bitsCount) for i in range(count)]


def generateFiniteLenFloats(name, count, tp, ctx):
	tp = np.dtype(tp)
	fpSort = floatTypes[tp.itemsize]
	if isinstance(fpSort, tuple):
		fpSort = z3.FPSort(*fpSort)
	return [z3.FP(name + "__" + str(i), fpSort) for i in range(count)]


typesRemapping = {
	np.bool_: lambda name, count, tp, ctx: z3.BoolVector(name, count, ctx),
	bool: lambda name, count, tp, ctx: z3.BoolVector(name, count, ctx),
	int: lambda name, count, tp, ctx: z3.IntVector(name, count, ctx),
	float: lambda name, count, tp, ctx: z3.RealVector(name, count, ctx),
}

floatTypes = {
	1: (4, 4),
	#2: (5, 11),
	2: z3.FloatHalf(),
	#4: (8, 24),
	4: z3.FloatSingle(),
	#8: (11, 53),
	8: z3.FloatDouble(),
	10: (15, 63),
	#16: (15, 111),
	16: z3.FloatQuadruple(),
	32: (19, 237),
Esempio n. 8
0
# Question 1
# Below you see a SUDOKU variant. (See sudoku.jpg)
# Again the numbers 1 to 9 should be filled, in such a way that each number occurs exactly once in every row, every column and every 3x3 block. But now there are no numbers given, only symbols '<' and 'o'. The symbol '<' means that the number left from it should be less than the number right from it.
# The symbol 'o' means that the two numbers on both sides are consecutive: they differ by exactly one. For border lines not containing a symbol '<' or 'o' nothing is known.
# Just like normal sudoku this puzzle has a unique solution (as was figured out by SMT solving). The goal is to find it. Doing this by hand looks quite impossible (you may try!), but solving it by SMT is much more feasible. Can you do this?
# As the answer you should give the 9 digit number formed by the lowest line.

import z3

S = [z3.IntVector(f"S_{i}", 9) for i in range(9)]

# Normal sudoku rules
# All numbers between 1 and 9
bounds = []
for i in range(9):
    bounds += [S[i][j] <= 9 for j in range(9)]
    bounds += [S[i][j] >= 1 for j in range(9)]

# Distinct numbers on each row
row_conditions = []
for i in range(9):
    row_conditions.append(z3.Distinct([s for s in S[i]]))

# Distinct numbers on each column
col_conditions = []
for j in range(9):
    col_conditions.append(z3.Distinct([S[i][j] for i in range(9)]))

# Distinct numbers in each box
box_conditions = []
for k in range(9):
Esempio n. 9
0
# What is the minimal total running time?

# Question 2
# Take all requirements from Question 1, but now additionally it is required that job 7 should not start earlier than job 8.
# What is the minimal total running time?

# Question 3
# Take all requirements from Question 1 and Question 2, but now additionally it is required that jobs 3, 4 and 5 are never allowed to run at the same time, since they need a special equipment of which only one copy is available.
# What is the minimal total running time?

import z3
from bin_search import binary_search

# Our labelling of jobs starts at 0 and ends at 9 so we need to subtract one
# from job numbers in the description
StartTimes = z3.IntVector('S', 10)
EndTimes = z3.IntVector(
    'E', 10
)  # Probably would have been nicer making this 11 long so the indexing agreed with the question.

# Conditions for job lengths
# • The running time of job i is i + 10, for i = 1, 2, . . . , 10.
joblengths = [EndTimes[i] == StartTimes[i] + 11 + i for i in range(10)]

start_conditions = []
# Jobs start after 0
start_conditions += [StartTimes[i] >= 0 for i in range(10)]
# • Job 3 may only start if jobs 1 and 2 have been finished.
start_conditions.append(StartTimes[3 - 1] >= EndTimes[1 - 1])
start_conditions.append(StartTimes[3 - 1] >= EndTimes[2 - 1])
# • Job 6 may only start if jobs 2 and 4 have been finished.
Esempio n. 10
0
def solve(foods):
    s = z3.Solver()

    # encode all ingredients and allergens
    ingredients_ = list(set(item for pair in foods for item in pair[0]))
    allergens_ = list(set(item for pair in foods for item in pair[1]))

    ingredients = dict((v, i) for i, v in enumerate(ingredients_))
    allergens = dict((v, i) for i, v in enumerate(allergens_))

    # we want to find out which ingredient is an allergen
    # we treat this as an MxN assignment problem
    # ie. solve for which ingredient can be possibly assigned as an allergen, while satisfying all input constrains
    assignments = z3.IntVector('allergen', len(allergens))

    # program the valid range of our ingredient encodings
    for a in assignments:
        s.add(z3.And(a >= 0, a < len(ingredients)))

    # there can only be one possible ingredient assigned to an allergen
    s.add(z3.Distinct(assignments))

    for i, a in foods:

        # program the all possible pairs of assignment for this food
        for a_ in a:
            food = []
            for i_ in i:
                # encode the input constrain
                I = ingredients[i_]
                A = assignments[allergens[a_]]

                # one of these (allergen == ingredient) pair could be valid
                food.append(A == I)
            s.add(z3.Or(food))

        # ensure that this food doesn't contain any other types of allergen
        food = []

        not_a = set(allergens.keys()) - set(a)
        for i_, a_ in itertools.product(i, not_a):
            # encode the input constrain
            I = ingredients[i_]
            A = assignments[allergens[a_]]

            # not any other types of allergen
            # print(f"{(i_, a_)=}")
            food.append(I != A)

        s.add(z3.Or(food))

    # are we asking the impossible?
    print(f"{s=}")
    print()

    r = s.check()
    print(f"{r=}")

    if r == z3.sat:
        # constrain satisfied, now we extract the solution from the model
        m = s.model()
        print(f"{m=}")

        # reverse the our integer encoding into string
        allergen = dict(enumerate(allergens_))
        ingredient = dict(enumerate(ingredients_))

        solution = [{'ingredient': ingredient[m.eval(a).as_long()], 'allergen': allergen[i]}
                    for i, a in enumerate(assignments)]
        return solution
Esempio n. 11
0
blueprint2 = "flag[{}] == {}"

ans = []
for i in exp:
    if i[1] is None:
        continue
        #ans.append(blueprint2.format(i[0], i[2]))
    else:
        ans.append(blueprint.format(i[0], i[1], i[2]))
print(" and ".join(ans))

### доказательство решаемости
import z3
for i in range(256):
    s = z3.Solver()
    a = z3.IntVector("a", len(flag))
    for _ in a:
        s.add(_ >= 0x20)
        s.add(_ <= 0x7E)
    for e in exp:
        s.add(a[e[0]] - a[e[1]] == e[2])
    s.add(a[7] == i)
    if s.check() == z3.sat:
        m = s.model()
        print("".join(chr(m[q].as_long()) for q in a))
"""
       W SS#]NB"2a"CNAh?b6Z&@EQR
       X!TT$^OC#3b#DOBi@c7['AFRS
       Y"UU%_PD$4c$EPCjAd8\(BGST
       Z#VV&`QE%5d%FQDkBe9])CHTU
       [$WW'aRF&6e&GRElCf:^*DIUV
Esempio n. 12
0
 def IntVector(self, s_str, length):
     return z3.IntVector(s_str, length)
# Idea: sum up squares of numbers in the row.
# Say we have 9 numbers, all between 1 and 9 (inclusive), sum of squares is 285.
# Can this sum be achieved any way other than =1+4+9+...+81?
# What if we have simultaneous equations? Eg sum = 45, sum of squares = 285, sum of cubes = 2025?
# What about if we mix in the product?
# Answers:
# Sums of squares and cubes have multiple solutions, even simultaneous ones.
# Product by itself is not enough in this case. It would work if the entries to the rows were primes though.
# Product and sum gives another solution [1,2,4,4,4,5,7,9,9]. Tricky! The three, and the factors of the six and eight get mixed together.
# Product and sum of squares give only the desired solution!!!
# I expect there is some sort of algebraic/arithmetic geometry theorem that would tell you all of this straight away.
# I really should read that book about Groebner bases and stuff!

import z3

X = z3.IntVector('X', 9)
base_conditions = [x >= 1 for x in X]
base_conditions += [x <= 9 for x in X]

# Without loss of generality we put the numbers in non-decreasing order
for i in range(8):
    base_conditions.append(X[i] <= X[i + 1])

# Exclude the solution 1,2,3,...,9
base_conditions.append(z3.Not(z3.And([X[i] == i + 1 for i in range(9)])))

# Set sum to 45
sum_cond = [z3.Sum(X) == 45]
# Set sum of squares to 285
square_conds = [z3.Sum([x**2 for x in X]) == 285]
# Set cubes to 2025
Esempio n. 14
0
    def get_total_tput(self, rnd_array):

        # assert sum(rnd_array) == 81
        source_batch_, index_data = self.batch_data(
            rnd_array.astype(int))  # index_data = [0,1,2,0,1,2]
        env = LraClusterEnv(num_nodes=self.NUM_NODES)
        ilp_dict = {}
        for i in range(7):
            ilp_dict['x' + str(i)] = z3.IntVector('x' + str(i), 3)
        observation = env.reset().copy()  # (9,9)
        source_batch = source_batch_.copy()
        nodes_per_group = int(params['nodes per group'])
        NUM_CONTAINERS = int(sum(rnd_array))
        """
        Episode
        """
        def handle_constraint(observation_now, appid_now, s):

            observation_original = observation_now.copy()

            mapping_index = []
            list_check = []

            for place in range(3):
                s.push()
                s.add(ilp_dict['x' + str(appid_now)][place] >=
                      int(observation_now[place][appid_now]) + 1)
                if s.check() == z3.sat:
                    list_check.append(False)
                else:
                    list_check.append(True)
                s.pop()

            good_index = np.where(np.array(list_check) == False)[0]
            length = len(good_index)
            if length < 1:
                test = 1
            index_replace = 0
            for node in range(3):
                if list_check[node]:  # bad node
                    # index_this_replace = good_index[np.random.randint(length)]
                    index_this_replace = good_index[index_replace % length]
                    index_replace += 1
                    observation_original[node] = observation[
                        index_this_replace]
                    mapping_index.append(index_this_replace)
                else:
                    mapping_index.append(node)
                    observation_original[node] = observation[node]

            return observation_original, mapping_index

        """
        first layer
        """

        total = source_batch_.copy()
        limit = (1 * 9)
        capicity = (8 * 9)  # 3
        s_first = Solver()
        # app sum == batch
        for i in range(7):
            s_first.add(z3.Sum(ilp_dict['x' + str(i)]) == int(total[i]))
        # node capacity
        for node in range(3):
            s_first.add(
                z3.Sum([ilp_dict['x' + str(i)][node]
                        for i in range(7)]) <= int(capicity))
        # >=0
        for i in range(7):
            for node in range(3):
                s_first.add(ilp_dict['x' + str(i)][node] >= 0)
        # per app spread
        for i in range(7):
            for node in range(3):
                s_first.add(ilp_dict['x' + str(i)][node] <= limit)
        # App1 and App2 not exist
        # for node in range(3):
        #     s_first.add(ilp_dict['x' + str(1)][node] + ilp_dict['x' + str(2)][node] <= limit)

        source_batch_first = source_batch_.copy()
        observation_first_layer = np.zeros([nodes_per_group, env.NUM_APPS],
                                           int)
        for inter_episode_index in range(NUM_CONTAINERS):
            appid = index_data[inter_episode_index]
            observation_first_layer_copy, mapping_index = handle_constraint(
                observation_first_layer, appid, s_first)
            assert len(mapping_index) > 0

            source_batch_first[appid] -= 1
            # observation_first_layer_copy = observation_first_layer.copy()
            observation_first_layer_copy[:, appid] += 1
            observation_first_layer_copy = np.append(
                observation_first_layer_copy,
                observation_first_layer_copy > 9 * 2,
                axis=1)
            observation_first_layer_copy = np.append(
                observation_first_layer_copy,
                observation_first_layer_copy.sum(axis=1).reshape(
                    nodes_per_group, 1),
                axis=1)
            observation_first_layer_copy = np.array(
                observation_first_layer_copy).reshape(1, -1)
            observation_first_layer_copy = np.append(
                observation_first_layer_copy, appid).reshape(1, -1)
            observation_first_layer_copy = np.append(
                observation_first_layer_copy,
                np.array(source_batch_first)).reshape(1, -1)
            action_1, prob_weights = self.RL_1.choose_action(
                observation_first_layer_copy.copy())
            decision = mapping_index[action_1]
            observation_first_layer[decision, appid] += 1
            s_first.add(ilp_dict['x' + str(appid)][decision] >= int(
                observation_first_layer[decision][appid]))
        assert (np.sum(observation_first_layer, axis=1) <=
                params['container_limitation per node'] * 9).all()
        assert sum(sum(observation_first_layer)) == NUM_CONTAINERS
        """
        second layer
        """
        observation_second_layer_aggregation = np.empty([0, env.NUM_APPS],
                                                        int)  # 9*20

        number_cont_second_layer = []

        for second_layer_index in range(nodes_per_group):

            rnd_array = observation_first_layer[second_layer_index].copy()

            total = rnd_array
            limit = (1 * 3)
            capicity = (8 * 3)  # 3
            s_second = Solver()
            # app sum == batch
            for i in range(7):
                s_second.add(z3.Sum(ilp_dict['x' + str(i)]) == int(total[i]))
            # node capacity
            for node in range(3):
                s_second.add(
                    z3.Sum([ilp_dict['x' + str(i)][node]
                            for i in range(7)]) <= int(capicity))
            # >=0
            for i in range(7):
                for node in range(3):
                    s_second.add(ilp_dict['x' + str(i)][node] >= 0)
            # per app spread
            for i in range(7):
                for node in range(3):
                    s_second.add(ilp_dict['x' + str(i)][node] <= limit)
            # App1 and App2 not exist
            # for node in range(3):
            #     s_second.add(ilp_dict['x' + str(1)][node] + ilp_dict['x' + str(2)][node] <= limit)

            source_batch_second, index_data = self.batch_data_sub(rnd_array)
            observation_second_layer = np.zeros(
                [nodes_per_group, env.NUM_APPS], int)
            NUM_CONTAINERS_second = sum(source_batch_second)
            number_cont_second_layer.append(NUM_CONTAINERS_second)

            for inter_episode_index in range(NUM_CONTAINERS_second):

                appid = index_data[inter_episode_index]
                observation_second_layer_copy, mapping_index = handle_constraint(
                    observation_second_layer, appid, s_second)
                assert len(mapping_index) > 0

                source_batch_second[appid] -= 1
                # observation_second_layer_copy = observation_second_layer.copy()
                observation_second_layer_copy[:, appid] += 1
                observation_second_layer_copy = np.append(
                    observation_second_layer_copy,
                    observation_second_layer_copy > 3 * 2,
                    axis=1)
                observation_second_layer_copy = np.append(
                    observation_second_layer_copy,
                    observation_second_layer_copy.sum(axis=1).reshape(
                        nodes_per_group, 1),
                    axis=1)
                observation_second_layer_copy = np.array(
                    observation_second_layer_copy).reshape(1, -1)
                observation_second_layer_copy = np.append(
                    observation_second_layer_copy, appid).reshape(1, -1)
                observation_second_layer_copy = np.append(
                    observation_second_layer_copy,
                    np.array(source_batch_second)).reshape(1, -1)

                action_2, prob_weights = self.RL_2.choose_action(
                    observation_second_layer_copy.copy())
                decision = mapping_index[action_2]
                observation_second_layer[decision, appid] += 1
                s_second.add(ilp_dict['x' + str(appid)][decision] >= int(
                    observation_second_layer[decision][appid]))

            observation_second_layer_aggregation = np.append(
                observation_second_layer_aggregation, observation_second_layer,
                0)
            assert (np.sum(observation_second_layer, axis=1) <=
                    params['container_limitation per node'] * 3).all()
            assert sum(sum(observation_second_layer)) == NUM_CONTAINERS_second
        """
        third layer
        """
        observation_third_layer_aggregation = np.empty([0, env.NUM_APPS],
                                                       int)  # 9*20
        number_cont_third_layer = []

        for third_layer_index in range(nodes_per_group * nodes_per_group):
            rnd_array = observation_second_layer_aggregation[
                third_layer_index].copy()

            total = rnd_array
            limit = (1 * 1)
            capicity = 8
            s_third = Solver()
            # app sum == batch
            for i in range(7):
                s_third.add(z3.Sum(ilp_dict['x' + str(i)]) == int(total[i]))
            # node capacity
            for node in range(3):
                s_third.add(
                    z3.Sum([ilp_dict['x' + str(i)][node]
                            for i in range(7)]) <= int(capicity))
            # >=0
            for i in range(7):
                for node in range(3):
                    s_third.add(ilp_dict['x' + str(i)][node] >= 0)
            # per app spread
            for i in range(7):
                for node in range(3):
                    s_third.add(ilp_dict['x' + str(i)][node] <= limit)
            # App1 and App2 not exist
            # for node in range(3):
            #     s_third.add(ilp_dict['x' + str(1)][node] + ilp_dict['x' + str(2)][node] <= limit)

            source_batch_third, index_data = self.batch_data_sub(rnd_array)
            observation_third_layer = np.zeros([nodes_per_group, env.NUM_APPS],
                                               int)
            NUM_CONTAINERS_third = sum(source_batch_third)
            number_cont_third_layer.append(NUM_CONTAINERS_third)

            for inter_episode_index in range(NUM_CONTAINERS_third):
                appid = index_data[inter_episode_index]
                observation_third_layer_copy, mapping_index = handle_constraint(
                    observation_third_layer, appid, s_third)
                assert len(mapping_index) > 0

                source_batch_third[appid] -= 1
                # observation_third_layer_copy = observation_third_layer.copy()
                observation_third_layer_copy[:, appid] += 1

                observation_third_layer_copy = np.append(
                    observation_third_layer_copy,
                    observation_third_layer_copy > 1 * 2,
                    axis=1)
                observation_third_layer_copy = np.append(
                    observation_third_layer_copy,
                    observation_third_layer_copy.sum(axis=1).reshape(
                        nodes_per_group, 1),
                    axis=1)
                observation_third_layer_copy = np.array(
                    observation_third_layer_copy).reshape(1, -1)
                observation_third_layer_copy = np.append(
                    observation_third_layer_copy, appid).reshape(1, -1)
                observation_third_layer_copy = np.append(
                    observation_third_layer_copy,
                    np.array(source_batch_third)).reshape(1, -1)

                action_3, prob_weights = self.RL_3.choose_action(
                    observation_third_layer_copy.copy())
                decision = mapping_index[action_3]
                observation_third_layer[decision, appid] += 1
                s_third.add(ilp_dict['x' + str(appid)][decision] >= int(
                    observation_third_layer[decision][appid]))

            observation_third_layer_aggregation = np.append(
                observation_third_layer_aggregation, observation_third_layer,
                0)
            assert (np.sum(observation_third_layer, axis=1) <=
                    params['container_limitation per node'] * 1).all()
            assert sum(sum(observation_third_layer)) == NUM_CONTAINERS_third

        env.state = observation_third_layer_aggregation.copy()
        assert sum(sum(env.state)) == NUM_CONTAINERS
        assert (env.state.sum(0) == source_batch_).all()
        """
        After an entire allocation, calculate total throughput, reward
        """
        # state = env.state
        # assert sum(sum(self.env.state)) == 81

        return env.state