def puzzle_2(all_ingredients, all_allergens, inert_ingredients): possible_ingredients = list( set(all_ingredients.keys()) - inert_ingredients) possible_allergens = list(set(all_allergens.keys())) solver = z3.Solver() assignments = z3.IntVector('allergen', len(possible_allergens)) for assignment in assignments: solver.add(0 <= assignment) solver.add(assignment < len(possible_allergens)) solver.add(z3.Distinct(assignments)) for ai, allergen in enumerate(possible_allergens): conditions = [] for ii, ingredient in enumerate(possible_ingredients): if all_ingredients[ingredient] >= all_allergens[allergen]: conditions.append(assignments[ii] == ai) solver.add(z3.Or(conditions)) solver.check() model = solver.model() matches = [] for ii, _ in enumerate(assignments): matches.append( (possible_allergens[model.evaluate(assignments[ii]).as_long()], possible_ingredients[ii])) matches.sort() return (','.join(match[1] for match in matches))
def part_2_z3(): foods = get_foods() hypoallergenic = get_hypoallergenic_ingredients(foods) allergens = set() ingredients = set() foods_by_allergen = collections.defaultdict(set) foods_by_ingredient = collections.defaultdict(set) for food in foods: for allergen in food.allergens: foods_by_allergen[allergen].add(food) allergens.add(allergen) for ingredient in food.ingredients: foods_by_ingredient[ingredient].add(food) ingredients.add(ingredient) allergens = list(allergens) ingredients = list(ingredients - hypoallergenic) # List of variables representing possible assignment of ingredient to allergen assignments = z3.IntVector('assignment', len(ingredients)) solver = z3.Solver() for assignment in assignments: solver.add(0 <= assignment) solver.add(assignment < len(allergens)) solver.add(z3.Distinct(assignments)) for i, allergen in enumerate(allergens): candidates = [] for j, ingredient in enumerate(ingredients): # If set of foods that we know contain allergen_i is a subset of foods containing ingredient_j, # then ingredient_j = allergen_i is a possible assignment if foods_by_allergen[allergen] <= foods_by_ingredient[ingredient]: candidates.append(assignments[j] == i) solver.add(z3.Or(candidates)) assert solver.check() == z3.sat model = solver.model() matches = [] for i, assignment in enumerate(assignments): assignment = model.evaluate(assignment).as_long() matches.append((allergens[assignment], ingredients[i])) print(','.join(ingredient for _, ingredient in sorted(matches)))
def valid_numbers_z3(optimize_method): digits = z3.IntVector('digits', 14) optimizer = z3.Optimize() number = 0 for i in range(14): optimizer.add(1 <= digits[i], digits[i] <= 9) number = 10 * number + digits[i] for (i, j), delta in RULES.items(): optimizer.add(digits[i] - digits[j] == delta) getattr(optimizer, optimize_method)(number) assert optimizer.check() == z3.sat return optimizer.model().eval(number)
# •Ten pallets of crottles, each of weight 2500 kg. # •Twenty pallets of dupples, each of weight 200 kg. # Skipples need to be cooled; only three of the eight trucks have the facility for cooling skipples. # Nuzzles are very valuable; to distribute the risk of loss no two pallets of nuzzles may be in the same truck. # Investigate what is the maximum number of pallets of prittles that can be delivered. # (Hint: if you do not use the maximize command, you may run the tool several times and do a binary search to find the right value) # from z3 import IntVector, Sum, Solver import z3 N = z3.IntVector('N', 8) # Number of Nuzzle pallets on each truck P = z3.IntVector('P', 8) # Number of Prittle pallets on each truck S = z3.IntVector('S', 3) # Number of Skipple pallets on each refridgerated truck C = z3.IntVector('C', 8) # Number of Crottle pallets on each truck D = z3.IntVector('D', 8) # Number of Dupple pallets on each truck # Weight restrictions for refridgerated trucks weight_restriction = [ 800*N[i] + 1100*P[i] + 1000*S[i] + 2500*C[i] + 200*D[i] <= 8000 for i in range(3) ] # Weight restrictions for non-refridgerated trucks weight_restriction += [ 800*N[i] + 1100*P[i] + 2500*C[i] + 200*D[i] <= 8000 for i in range(3, 8) ] # Pallet restrictions for refridgerated trucks pallet_restriction = [ N[i] + P[i] + S[i] + C[i] + D[i] <= 8 for i in range(3) ] pallet_restriction += [ N[i] + P[i] + C[i] + D[i] <= 8 for i in range(3,8) ] # Only one Nuzzle pallet on each truck
# Question 1 # Consider the following program: # a := 1; b := 1; # for i := 1 to 10 do # if ? then {a := a+2b; b := b+i} else {b := a+b; a := a+i}; # if b = 600+n then crash # Here '?' is an unknown test that may yield false or true in any situation. # Establish for which values of n = 1,2...,10 it is safe, that is, will not reach 'crash'. import z3 A = z3.IntVector('A', 11) B = z3.IntVector('B', 11) Q = z3.BoolVector('?', 11) # Q for question mark, note Q[0] is not used at all # Initial conditions init_conds = [A[0] == 1, B[0] == 1] # Iteration conditions iter_conds = [] for i in range(1, 11): iter_conds += [A[i] == z3.If(Q[i], A[i - 1] + 2 * B[i - 1], A[i - 1] + i)] iter_conds += [B[i] == z3.If(Q[i], B[i - 1] + i, A[i - 1] + B[i - 1])] conditions = init_conds + iter_conds solver = z3.Solver() solver.add(conditions) solver.push()
def train(params): """ parameters set """ NUM_NODES = params['number of nodes in the cluster'] node_limit_sum = 120 node_limit_coex = 20 NUM_APPS = 7 batch_size = params['batch_size'] ckpt_path_1 = "./checkpoint/" + params['path'] + "1/model.ckpt" ckpt_path_2 = "./checkpoint/" + params['path'] + "2/model.ckpt" ckpt_path_3 = "./checkpoint/" + params['path'] + "3/model.ckpt" make_path(params['path'] + "1") make_path(params['path'] + "2") make_path(params['path'] + "3") ckpt_path_recover_1 = "../results/cpo/newhypernode/" + params['path_recover'] + "1/model.ckpt" ckpt_path_recover_2 = "../results/cpo/newhypernode/" + params['path_recover'] + "2/model.ckpt" ckpt_path_recover_3 = "../results/cpo/newhypernode/" + params['path_recover'] + "3/model.ckpt" env = LraClusterEnv(num_nodes=NUM_NODES) np_path = "./checkpoint/" + params['path'] + "/optimal_file_name.npz" Recover = params['recover'] nodes_per_group = int(params['nodes per group']) replay_size = params['replay size'] training_times_per_episode = 1 # TODO: if layers changes, training_times_per_episode should be modified safety_requirement = 0.05#40 ifUseExternal = True """ Build Network """ n_actions = nodes_per_group #: 3 nodes per group n_features = int(n_actions * (env.NUM_APPS + 1 + env.NUM_APPS )+ 1 + env.NUM_APPS) #: 3*9+1 = 28 RL_1 = PolicyGradient( n_actions=n_actions, n_features=n_features, learning_rate=params['learning rate'], suffix='1b', safety_requirement=safety_requirement) RL_2 = PolicyGradient( n_actions=n_actions, n_features=n_features, learning_rate=params['learning rate'], suffix='2b', safety_requirement=safety_requirement) RL_3 = PolicyGradient( n_actions=n_actions, n_features=n_features, learning_rate=params['learning rate'], suffix='3b', safety_requirement=safety_requirement) # sim = Simulator() """ Training """ start_time = time.time() global_start_time = start_time number_optimal = [] observation_episode_1, action_episode_1, reward_episode_1, safety_episode_1 = [], [], [], [] observation_optimal_1, action_optimal_1, reward_optimal_1, safety_optimal_1 = [], [], [], [] observation_episode_2, action_episode_2, reward_episode_2, safety_episode_2 = [], [], [], [] observation_optimal_2, action_optimal_2, reward_optimal_2, safety_optimal_2 = [], [], [], [] observation_episode_3, action_episode_3, reward_episode_3, safety_episode_3 = [], [], [], [] observation_optimal_3, action_optimal_3, reward_optimal_3, safety_optimal_3 = [], [], [], [] epoch_i = 0 thre_entropy = 0.001 # TODO: delete this range names = locals() for i in range(7): names['x' + str(i)] = z3.IntVector('x' + str(i), 3) for i in range(0, 10): names['highest_tput_' + str(i)] = 0 names['observation_optimal_1_' + str(i)] = [] names['action_optimal_1_' + str(i)] = [] names['observation_optimal_2_' + str(i)] = [] names['action_optimal_2_' + str(i)] = [] names['observation_optimal_3_' + str(i)] = [] names['action_optimal_3_' + str(i)] = [] names['reward_optimal_1_' + str(i)] = [] names['reward_optimal_2_' + str(i)] = [] names['reward_optimal_3_' + str(i)] = [] names['safety_optimal_1_' + str(i)] = [] names['safety_optimal_2_' + str(i)] = [] names['safety_optimal_3_' + str(i)] = [] names['number_optimal_' + str(i)] = [] names['optimal_range_' + str(i)] = 1.05 def store_episode_1(observations, actions): observation_episode_1.append(observations) action_episode_1.append(actions) def store_episode_2(observations, actions): observation_episode_2.append(observations) action_episode_2.append(actions) def store_episode_3(observations, actions): observation_episode_3.append(observations) action_episode_3.append(actions) def handle_constraint(observation_now, appid_now): observation_original = observation_now.copy() mapping_index = [] list_check = [] t2 = time.time() for place in range(3): s.push() s.add(names['x' + str(appid_now)][place] >= int(observation_now[place][appid_now]) + 1) if s.check() == z3.sat: list_check.append(False) else: list_check.append(True) s.pop() t3 = time.time() # print("formulate: ", t2 - t1) # print("calculate: ", t3 - t2) good_index = np.where(np.array(list_check) == False)[0] length = len(good_index) if length < 1: test = 1 index_replace = 0 for node in range(3): if list_check[node]: # bad node # index_this_replace = good_index[np.random.randint(length)] index_this_replace = good_index[index_replace % length] index_replace += 1 observation_original[node] = observation[index_this_replace] mapping_index.append(index_this_replace) else: mapping_index.append(node) observation_original[node] = observation[node] return observation_original, mapping_index source_batch_a, index_data_a = batch_data() # index_data = [0,1,2,0,1,2] while epoch_i < params['epochs']: if Recover: RL_1.restore_session(ckpt_path_recover_1) RL_2.restore_session(ckpt_path_recover_2) RL_3.restore_session(ckpt_path_recover_3) Recover = False tput_origimal_class = 0 source_batch_ = source_batch_a.copy() index_data = index_data_a.copy() NUM_CONTAINERS = sum(source_batch_) observation = np.zeros([NUM_NODES, NUM_APPS]).copy() # (9,9) source_batch = source_batch_.copy() """ Episode """ """ first layer """ total = source_batch limit = (1 * 9 * 27) capicity = (8 * 9 * 27) # 3 s = Solver() # app sum == batch for i in range(7): s.add(z3.Sum(names['x' + str(i)]) == int(total[i])) # node capacity for node in range(3): s.add(z3.Sum([names['x' + str(i)][node] for i in range(7)]) <= int(capicity)) # >=0 for i in range(7): for node in range(3): s.add(names['x' + str(i)][node] >= 0) # per app spread for i in range(7): for node in range(3): s.add(names['x' + str(i)][node] <= limit) # App1 and App2 not exist for node in range(3): s.add(names['x' + str(1)][node] + names['x' + str(2)][node] <= limit) source_batch_first = source_batch_.copy() observation_first_layer = np.zeros([nodes_per_group, env.NUM_APPS], int) for inter_episode_index in range(NUM_CONTAINERS): appid = index_data[inter_episode_index] observation_first_layer_copy, mapping_index = handle_constraint(observation_first_layer, appid) assert len(mapping_index) > 0 source_batch_first[appid] -= 1 # observation_first_layer_copy = observation_first_layer.copy() observation_first_layer_copy[:, appid] += 1 observation_first_layer_copy = np.append(observation_first_layer_copy, observation_first_layer_copy > 9 * node_limit_coex, axis=1) observation_first_layer_copy = np.append(observation_first_layer_copy, observation_first_layer_copy.sum(axis=1).reshape(nodes_per_group, 1), axis=1) # observation_first_layer_copy = np.append(observation_first_layer_copy, ((observation_first_layer_copy[:, 2] > 0) * (observation_first_layer_copy[:, 3] > 0)).reshape(nodes_per_group, 1), axis=1) observation_first_layer_copy = np.array(observation_first_layer_copy).reshape(1, -1) observation_first_layer_copy = np.append(observation_first_layer_copy, appid).reshape(1, -1) observation_first_layer_copy = np.append(observation_first_layer_copy, np.array(source_batch_first)).reshape(1, -1) if ifUseExternal: action_1 = inter_episode_index % 3 prob_weights = [] else: action_1, prob_weights = RL_1.choose_action(observation_first_layer_copy.copy()) decision = mapping_index[action_1] observation_first_layer[decision, appid] += 1 s.add(names['x' + str(appid)][decision] >= int(observation_first_layer[decision][appid])) store_episode_1(observation_first_layer_copy, action_1) assert (np.sum(observation_first_layer, axis=1) <= params['container_limitation per node'] * 9).all() assert sum(sum(observation_first_layer)) == NUM_CONTAINERS """ second layer """ observation_second_layer_aggregation = np.empty([0, env.NUM_APPS], int) # 9*20 number_cont_second_layer = [] for second_layer_index in range(nodes_per_group): rnd_array = observation_first_layer[second_layer_index].copy() total = rnd_array limit = (1 * 3 *27) capicity = (8 * 3*27) # 3 s = Solver() # app sum == batch for i in range(7): s.add(z3.Sum(names['x' + str(i)]) == int(total[i])) # node capacity for node in range(3): s.add(z3.Sum([names['x' + str(i)][node] for i in range(7)]) <= int(capicity)) # >=0 for i in range(7): for node in range(3): s.add(names['x' + str(i)][node] >= 0) # per app spread for i in range(7): for node in range(3): s.add(names['x' + str(i)][node] <= limit) # App1 and App2 not exist for node in range(3): s.add(names['x' + str(1)][node] + names['x' + str(2)][node] <= limit) source_batch_second, index_data = batch_data_sub(rnd_array) observation_second_layer = np.zeros([nodes_per_group, env.NUM_APPS], int) NUM_CONTAINERS_second = sum(source_batch_second) number_cont_second_layer.append(NUM_CONTAINERS_second) for inter_episode_index in range(NUM_CONTAINERS_second): appid = index_data[inter_episode_index] observation_second_layer_copy, mapping_index = handle_constraint(observation_second_layer, appid) assert len(mapping_index) > 0 source_batch_second[appid] -= 1 # observation_second_layer_copy = observation_second_layer.copy() observation_second_layer_copy[:, appid] += 1 observation_second_layer_copy = np.append(observation_second_layer_copy, observation_second_layer_copy > 3 * node_limit_coex, axis=1) observation_second_layer_copy = np.append(observation_second_layer_copy, observation_second_layer_copy.sum(axis=1).reshape(nodes_per_group, 1), axis=1) # observation_second_layer_copy = np.append(observation_second_layer_copy, ((observation_second_layer_copy[:, 2] > 0) * (observation_second_layer_copy[:, 3] > 0)).reshape(nodes_per_group, 1), axis=1) observation_second_layer_copy = np.array(observation_second_layer_copy).reshape(1, -1) observation_second_layer_copy = np.append(observation_second_layer_copy, appid).reshape(1, -1) observation_second_layer_copy = np.append(observation_second_layer_copy, np.array(source_batch_second)).reshape(1, -1) if ifUseExternal: action_2 = inter_episode_index % 3 prob_weights = [] else: action_2, prob_weights = RL_2.choose_action(observation_second_layer_copy.copy()) decision = mapping_index[action_2] observation_second_layer[decision, appid] += 1 s.add(names['x' + str(appid)][decision] >= int(observation_second_layer[decision][appid])) store_episode_2(observation_second_layer_copy, action_2) assert (np.sum(observation_second_layer, axis=1) <= params['container_limitation per node'] * 3).all() assert sum(sum(observation_second_layer)) == NUM_CONTAINERS_second observation_second_layer_aggregation = np.append(observation_second_layer_aggregation, observation_second_layer, 0) """ third layer """ observation_third_layer_aggregation = np.empty([0, env.NUM_APPS], int) # 9*20 number_cont_third_layer = [] for third_layer_index in range(nodes_per_group * nodes_per_group): rnd_array = observation_second_layer_aggregation[third_layer_index].copy() total = rnd_array limit = (1 * 1 *27) capicity = 8 *27 s = Solver() # app sum == batch for i in range(7): s.add(z3.Sum(names['x' + str(i)]) == int(total[i])) # node capacity for node in range(3): s.add(z3.Sum([names['x' + str(i)][node] for i in range(7)]) <= int(capicity)) # >=0 for i in range(7): for node in range(3): s.add(names['x' + str(i)][node] >= 0) # per app spread for i in range(7): for node in range(3): s.add(names['x' + str(i)][node] <= limit) # App1 and App2 not exist for node in range(3): s.add(names['x' + str(1)][node] + names['x' + str(2)][node] <= limit) source_batch_third, index_data = batch_data_sub(rnd_array) observation_third_layer = np.zeros([nodes_per_group, env.NUM_APPS], int) NUM_CONTAINERS_third = sum(source_batch_third) number_cont_third_layer.append(NUM_CONTAINERS_third) for inter_episode_index in range(NUM_CONTAINERS_third): appid = index_data[inter_episode_index] observation_third_layer_copy, mapping_index = handle_constraint(observation_third_layer, appid) assert len(mapping_index) > 0 source_batch_third[appid] -= 1 # observation_third_layer_copy = observation_third_layer.copy() observation_third_layer_copy[:, appid] += 1 observation_third_layer_copy = np.append(observation_third_layer_copy, observation_third_layer_copy > 1 * node_limit_coex, axis=1) observation_third_layer_copy = np.append(observation_third_layer_copy, observation_third_layer_copy.sum(axis=1).reshape(nodes_per_group, 1), axis=1) # observation_third_layer_copy = np.append(observation_third_layer_copy, ((observation_third_layer_copy[:, 2] > 0) * (observation_third_layer_copy[:, 3] > 0)).reshape(nodes_per_group, 1), axis=1) observation_third_layer_copy = np.array(observation_third_layer_copy).reshape(1, -1) observation_third_layer_copy = np.append(observation_third_layer_copy, appid).reshape(1, -1) observation_third_layer_copy = np.append(observation_third_layer_copy, np.array(source_batch_third)).reshape(1, -1) if ifUseExternal: action_3 = inter_episode_index % 3 prob_weights = [] else: action_3, prob_weights = RL_3.choose_action(observation_third_layer_copy.copy()) decision = mapping_index[action_3] observation_third_layer[decision, appid] += 1 s.add(names['x' + str(appid)][decision] >= int(observation_third_layer[decision][appid])) store_episode_3(observation_third_layer_copy, action_3) observation_third_layer_aggregation = np.append(observation_third_layer_aggregation, observation_third_layer, 0) assert (np.sum(observation_third_layer, axis=1) <= params['container_limitation per node'] * 1).all() assert sum(sum(observation_third_layer)) == NUM_CONTAINERS_third """ After an entire allocation, calculate total throughput, reward """ env.state = observation_third_layer_aggregation.copy() assert sum(sum(env.state)) == NUM_CONTAINERS assert (env.state.sum(0) == source_batch_).all() total_tput, list_check_sum, list_check_coex, list_check_per_app, list_check = env.get_tput_total_env() tput = total_tput/NUM_CONTAINERS list_check = 1.0 * list_check / NUM_CONTAINERS reward_ratio = tput list_check_ratio = list_check list_check_layer_one = 0 list_check_layer_one_ratio = list_check_layer_one safety_episode_1 = [list_check_ratio+ list_check_layer_one_ratio * 1.0] * len(observation_episode_1) reward_episode_1 = [reward_ratio * 1.0] * len(observation_episode_1) safety_episode_2 = [list_check_ratio * 1.0] * len(observation_episode_2) reward_episode_2 = [reward_ratio * 1.0] * len(observation_episode_2) safety_episode_3 = [list_check_ratio * 1.0] * len(observation_episode_3) reward_episode_3 = [reward_ratio * 1.0] * len(observation_episode_3) RL_1.store_tput_per_episode(tput, epoch_i, list_check+list_check_layer_one, list_check_per_app, list_check_coex, list_check_sum) RL_2.store_tput_per_episode(tput, epoch_i, list_check+list_check_layer_one, list_check_per_app, list_check_coex, list_check_sum) RL_3.store_tput_per_episode(tput, epoch_i, list_check+list_check_layer_one, list_check_per_app, list_check_coex, list_check_sum) RL_1.store_training_samples_per_episode(observation_episode_1, action_episode_1, reward_episode_1, safety_episode_1) RL_2.store_training_samples_per_episode(observation_episode_2, action_episode_2, reward_episode_2, safety_episode_2) RL_3.store_training_samples_per_episode(observation_episode_3, action_episode_3, reward_episode_3, safety_episode_3) """ check_tput_quality(tput) """ if list_check <= safety_requirement: if names['highest_tput_' + str(tput_origimal_class)] < tput: names['highest_tput_' + str(tput_origimal_class)] = tput names['observation_optimal_1_' + str(tput_origimal_class)], names['action_optimal_1_' + str(tput_origimal_class)], names['observation_optimal_2_' + str(tput_origimal_class)], names['action_optimal_2_' + str(tput_origimal_class)],\ names['reward_optimal_1_' + str(tput_origimal_class)],names['reward_optimal_2_' + str(tput_origimal_class)],names['reward_optimal_3_' + str(tput_origimal_class)], \ names['number_optimal_' + str(tput_origimal_class)],\ names['safety_optimal_1_' + str(tput_origimal_class)],names['safety_optimal_2_' + str(tput_origimal_class)],names['safety_optimal_3_' + str(tput_origimal_class)]\ = [], [], [], [], [], [], [], [], [], [], [] names['observation_optimal_3_' + str(tput_origimal_class)], names['action_optimal_3_' + str(tput_origimal_class)] = [], [] names['observation_optimal_1_' + str(tput_origimal_class)].extend(observation_episode_1) names['action_optimal_1_' + str(tput_origimal_class)].extend(action_episode_1) names['observation_optimal_2_' + str(tput_origimal_class)].extend(observation_episode_2) names['action_optimal_2_' + str(tput_origimal_class)].extend(action_episode_2) names['observation_optimal_3_' + str(tput_origimal_class)].extend(observation_episode_3) names['action_optimal_3_' + str(tput_origimal_class)].extend(action_episode_3) names['number_optimal_' + str(tput_origimal_class)].append(NUM_CONTAINERS) names['safety_optimal_1_' + str(tput_origimal_class)].extend(safety_episode_1) names['safety_optimal_2_' + str(tput_origimal_class)].extend(safety_episode_2) names['safety_optimal_3_' + str(tput_origimal_class)].extend(safety_episode_3) names['reward_optimal_1_' + str(tput_origimal_class)].extend(reward_episode_1) names['reward_optimal_2_' + str(tput_origimal_class)].extend(reward_episode_2) names['reward_optimal_3_' + str(tput_origimal_class)].extend(reward_episode_3) names['optimal_range_' + str(tput_origimal_class)] = 1.05 elif names['highest_tput_' + str(tput_origimal_class)] < tput * names['optimal_range_' + str(tput_origimal_class)]: names['observation_optimal_1_' + str(tput_origimal_class)].extend(observation_episode_1) names['action_optimal_1_' + str(tput_origimal_class)].extend(action_episode_1) names['observation_optimal_2_' + str(tput_origimal_class)].extend(observation_episode_2) names['action_optimal_2_' + str(tput_origimal_class)].extend(action_episode_2) names['observation_optimal_3_' + str(tput_origimal_class)].extend(observation_episode_3) names['action_optimal_3_' + str(tput_origimal_class)].extend(action_episode_3) names['number_optimal_' + str(tput_origimal_class)].append(NUM_CONTAINERS) names['safety_optimal_1_' + str(tput_origimal_class)].extend(safety_episode_1) names['safety_optimal_2_' + str(tput_origimal_class)].extend(safety_episode_2) names['safety_optimal_3_' + str(tput_origimal_class)].extend(safety_episode_3) names['reward_optimal_1_' + str(tput_origimal_class)].extend(reward_episode_1) names['reward_optimal_2_' + str(tput_origimal_class)].extend(reward_episode_2) names['reward_optimal_3_' + str(tput_origimal_class)].extend(reward_episode_3) observation_episode_1, action_episode_1, reward_episode_1, safety_episode_1 = [], [], [], [] observation_episode_2, action_episode_2, reward_episode_2, safety_episode_2 = [], [], [], [] observation_episode_3, action_episode_3, reward_episode_3, safety_episode_3 = [], [], [], [] """ Each batch, RL.learn() """ if (epoch_i % batch_size == 0) & (epoch_i > 1): for replay_class in range(0,10): number_optimal = names['number_optimal_' + str(replay_class)] reward_optimal_1 = names['reward_optimal_1_' + str(replay_class)] reward_optimal_2 = names['reward_optimal_2_' + str(replay_class)] reward_optimal_3 = names['reward_optimal_3_' + str(replay_class)] safety_optimal_1 = names['safety_optimal_1_' + str(replay_class)] safety_optimal_2 = names['safety_optimal_2_' + str(replay_class)] safety_optimal_3 = names['safety_optimal_3_' + str(replay_class)] observation_optimal_1 = names['observation_optimal_1_' + str(replay_class)] action_optimal_1 = names['action_optimal_1_' + str(replay_class)] observation_optimal_2 = names['observation_optimal_2_' + str(replay_class)] action_optimal_2 = names['action_optimal_2_' + str(replay_class)] observation_optimal_3 = names['observation_optimal_3_' + str(replay_class)] action_optimal_3 = names['action_optimal_3_' + str(replay_class)] buffer_size = int(len(number_optimal)) if buffer_size < replay_size: # TODO: if layers changes, training_times_per_episode should be modified RL_1.ep_obs.extend(observation_optimal_1) RL_1.ep_as.extend(action_optimal_1) RL_1.ep_rs.extend(reward_optimal_1) RL_1.ep_ss.extend(safety_optimal_1) RL_2.ep_obs.extend(observation_optimal_2) RL_2.ep_as.extend(action_optimal_2) RL_2.ep_rs.extend(reward_optimal_2) RL_2.ep_ss.extend(safety_optimal_2) RL_3.ep_obs.extend(observation_optimal_3) RL_3.ep_as.extend(action_optimal_3) RL_3.ep_rs.extend(reward_optimal_3) RL_3.ep_ss.extend(safety_optimal_3) else: replay_index = np.random.choice(range(buffer_size), size=replay_size, replace=False) for replay_id in range(replay_size): replace_start = replay_index[replay_id] start_location = sum(number_optimal[:replace_start]) stop_location = sum(number_optimal[:replace_start+1]) RL_1.ep_obs.extend(observation_optimal_1[start_location: stop_location]) RL_1.ep_as.extend(action_optimal_1[start_location: stop_location]) RL_1.ep_rs.extend(reward_optimal_1[start_location: stop_location]) RL_1.ep_ss.extend(safety_optimal_1[start_location: stop_location]) RL_2.ep_obs.extend(observation_optimal_2[start_location: stop_location]) RL_2.ep_as.extend(action_optimal_2[start_location: stop_location]) RL_2.ep_rs.extend(reward_optimal_2[start_location: stop_location]) RL_2.ep_ss.extend(safety_optimal_2[start_location: stop_location]) RL_3.ep_obs.extend(observation_optimal_3[start_location: stop_location]) RL_3.ep_as.extend(action_optimal_3[start_location: stop_location]) RL_3.ep_rs.extend(reward_optimal_3[start_location: stop_location]) RL_3.ep_ss.extend(safety_optimal_3[start_location: stop_location]) # RL_1.learn(epoch_i, thre_entropy, Ifprint=True) RL_2.learn(epoch_i, thre_entropy) optim_case = RL_3.learn(epoch_i, thre_entropy) """ checkpoint, per 1000 episodes """ if (epoch_i % 200 == 0) & (epoch_i > 1): for class_replay in range(0,10): highest_value = names['highest_tput_' + str(class_replay)] print("\n epoch: %d, highest tput: %f" % (epoch_i, highest_value)) # lowest_vio_ = names['lowest_vio_' + str(class_replay)] # print("\n epoch: %d, lowest_vio: %f" % (epoch_i, lowest_vio_)) RL_1.save_session(ckpt_path_1) RL_2.save_session(ckpt_path_2) RL_3.save_session(ckpt_path_3) np.savez(np_path, tputs=np.array(RL_1.tput_persisit), candidate=np.array(RL_1.episode), vio_persis=np.array(RL_1.safe_persisit)) print("epoch:", epoch_i, "mean(sum): ", np.mean(RL_1.sum_persisit), "mean(coex): ", np.mean(RL_1.coex_persisit)) """ optimal range adaptively change """ for class_replay in range(0, 10): number_optimal = names['number_optimal_' + str(class_replay)] count_size = int(len(number_optimal)) if (count_size > 100): names['optimal_range_' + str(class_replay)] *= 0.99 names['optimal_range_' + str(class_replay)] = max(names['optimal_range_' + str(class_replay)], 1.01) start_location = sum(names['number_optimal_' + str(class_replay)][:-10]) * training_times_per_episode names['observation_optimal_1_' + str(class_replay)] = names['observation_optimal_1_' + str(class_replay)][start_location:] names['action_optimal_1_' + str(class_replay)] = names['action_optimal_1_' + str(class_replay)][start_location:] names['observation_optimal_2_' + str(class_replay)] = names['observation_optimal_2_' + str(class_replay)][start_location:] names['action_optimal_2_' + str(class_replay)] = names['action_optimal_2_' + str(class_replay)][start_location:] names['observation_optimal_3_' + str(class_replay)] = names['observation_optimal_3_' + str(class_replay)][start_location:] names['action_optimal_3_' + str(class_replay)] = names['action_optimal_3_' + str(class_replay)][start_location:] names['number_optimal_' + str(class_replay)] = names['number_optimal_' + str(class_replay)][-10:] names['safety_optimal_1_' + str(class_replay)] = names['safety_optimal_1_' + str(class_replay)][start_location:] names['safety_optimal_2_' + str(class_replay)] = names['safety_optimal_2_' + str(class_replay)][start_location:] names['safety_optimal_3_' + str(class_replay)] = names['safety_optimal_3_' + str(class_replay)][start_location:] names['reward_optimal_1_' + str(class_replay)] = names['reward_optimal_1_' + str(class_replay)][start_location:] names['reward_optimal_2_' + str(class_replay)] = names['reward_optimal_2_' + str(class_replay)][start_location:] names['reward_optimal_3_' + str(class_replay)] = names['reward_optimal_3_' + str(class_replay)][start_location:] print("optimal_range:", names['optimal_range_' + str(class_replay)]) print(prob_weights) if optim_case > 0: thre_entropy *= 0.5 thre_entropy = max(thre_entropy, 0.001) epoch_i += 1 if epoch_i>30: ifUseExternal = False
bitsCount = tp.itemsize * 8 return [z3.BitVec(name + "__" + str(i), bitsCount) for i in range(count)] def generateFiniteLenFloats(name, count, tp, ctx): tp = np.dtype(tp) fpSort = floatTypes[tp.itemsize] if isinstance(fpSort, tuple): fpSort = z3.FPSort(*fpSort) return [z3.FP(name + "__" + str(i), fpSort) for i in range(count)] typesRemapping = { np.bool_: lambda name, count, tp, ctx: z3.BoolVector(name, count, ctx), bool: lambda name, count, tp, ctx: z3.BoolVector(name, count, ctx), int: lambda name, count, tp, ctx: z3.IntVector(name, count, ctx), float: lambda name, count, tp, ctx: z3.RealVector(name, count, ctx), } floatTypes = { 1: (4, 4), #2: (5, 11), 2: z3.FloatHalf(), #4: (8, 24), 4: z3.FloatSingle(), #8: (11, 53), 8: z3.FloatDouble(), 10: (15, 63), #16: (15, 111), 16: z3.FloatQuadruple(), 32: (19, 237),
# Question 1 # Below you see a SUDOKU variant. (See sudoku.jpg) # Again the numbers 1 to 9 should be filled, in such a way that each number occurs exactly once in every row, every column and every 3x3 block. But now there are no numbers given, only symbols '<' and 'o'. The symbol '<' means that the number left from it should be less than the number right from it. # The symbol 'o' means that the two numbers on both sides are consecutive: they differ by exactly one. For border lines not containing a symbol '<' or 'o' nothing is known. # Just like normal sudoku this puzzle has a unique solution (as was figured out by SMT solving). The goal is to find it. Doing this by hand looks quite impossible (you may try!), but solving it by SMT is much more feasible. Can you do this? # As the answer you should give the 9 digit number formed by the lowest line. import z3 S = [z3.IntVector(f"S_{i}", 9) for i in range(9)] # Normal sudoku rules # All numbers between 1 and 9 bounds = [] for i in range(9): bounds += [S[i][j] <= 9 for j in range(9)] bounds += [S[i][j] >= 1 for j in range(9)] # Distinct numbers on each row row_conditions = [] for i in range(9): row_conditions.append(z3.Distinct([s for s in S[i]])) # Distinct numbers on each column col_conditions = [] for j in range(9): col_conditions.append(z3.Distinct([S[i][j] for i in range(9)])) # Distinct numbers in each box box_conditions = [] for k in range(9):
# What is the minimal total running time? # Question 2 # Take all requirements from Question 1, but now additionally it is required that job 7 should not start earlier than job 8. # What is the minimal total running time? # Question 3 # Take all requirements from Question 1 and Question 2, but now additionally it is required that jobs 3, 4 and 5 are never allowed to run at the same time, since they need a special equipment of which only one copy is available. # What is the minimal total running time? import z3 from bin_search import binary_search # Our labelling of jobs starts at 0 and ends at 9 so we need to subtract one # from job numbers in the description StartTimes = z3.IntVector('S', 10) EndTimes = z3.IntVector( 'E', 10 ) # Probably would have been nicer making this 11 long so the indexing agreed with the question. # Conditions for job lengths # • The running time of job i is i + 10, for i = 1, 2, . . . , 10. joblengths = [EndTimes[i] == StartTimes[i] + 11 + i for i in range(10)] start_conditions = [] # Jobs start after 0 start_conditions += [StartTimes[i] >= 0 for i in range(10)] # • Job 3 may only start if jobs 1 and 2 have been finished. start_conditions.append(StartTimes[3 - 1] >= EndTimes[1 - 1]) start_conditions.append(StartTimes[3 - 1] >= EndTimes[2 - 1]) # • Job 6 may only start if jobs 2 and 4 have been finished.
def solve(foods): s = z3.Solver() # encode all ingredients and allergens ingredients_ = list(set(item for pair in foods for item in pair[0])) allergens_ = list(set(item for pair in foods for item in pair[1])) ingredients = dict((v, i) for i, v in enumerate(ingredients_)) allergens = dict((v, i) for i, v in enumerate(allergens_)) # we want to find out which ingredient is an allergen # we treat this as an MxN assignment problem # ie. solve for which ingredient can be possibly assigned as an allergen, while satisfying all input constrains assignments = z3.IntVector('allergen', len(allergens)) # program the valid range of our ingredient encodings for a in assignments: s.add(z3.And(a >= 0, a < len(ingredients))) # there can only be one possible ingredient assigned to an allergen s.add(z3.Distinct(assignments)) for i, a in foods: # program the all possible pairs of assignment for this food for a_ in a: food = [] for i_ in i: # encode the input constrain I = ingredients[i_] A = assignments[allergens[a_]] # one of these (allergen == ingredient) pair could be valid food.append(A == I) s.add(z3.Or(food)) # ensure that this food doesn't contain any other types of allergen food = [] not_a = set(allergens.keys()) - set(a) for i_, a_ in itertools.product(i, not_a): # encode the input constrain I = ingredients[i_] A = assignments[allergens[a_]] # not any other types of allergen # print(f"{(i_, a_)=}") food.append(I != A) s.add(z3.Or(food)) # are we asking the impossible? print(f"{s=}") print() r = s.check() print(f"{r=}") if r == z3.sat: # constrain satisfied, now we extract the solution from the model m = s.model() print(f"{m=}") # reverse the our integer encoding into string allergen = dict(enumerate(allergens_)) ingredient = dict(enumerate(ingredients_)) solution = [{'ingredient': ingredient[m.eval(a).as_long()], 'allergen': allergen[i]} for i, a in enumerate(assignments)] return solution
blueprint2 = "flag[{}] == {}" ans = [] for i in exp: if i[1] is None: continue #ans.append(blueprint2.format(i[0], i[2])) else: ans.append(blueprint.format(i[0], i[1], i[2])) print(" and ".join(ans)) ### доказательство решаемости import z3 for i in range(256): s = z3.Solver() a = z3.IntVector("a", len(flag)) for _ in a: s.add(_ >= 0x20) s.add(_ <= 0x7E) for e in exp: s.add(a[e[0]] - a[e[1]] == e[2]) s.add(a[7] == i) if s.check() == z3.sat: m = s.model() print("".join(chr(m[q].as_long()) for q in a)) """ W SS#]NB"2a"CNAh?b6Z&@EQR X!TT$^OC#3b#DOBi@c7['AFRS Y"UU%_PD$4c$EPCjAd8\(BGST Z#VV&`QE%5d%FQDkBe9])CHTU [$WW'aRF&6e&GRElCf:^*DIUV
def IntVector(self, s_str, length): return z3.IntVector(s_str, length)
# Idea: sum up squares of numbers in the row. # Say we have 9 numbers, all between 1 and 9 (inclusive), sum of squares is 285. # Can this sum be achieved any way other than =1+4+9+...+81? # What if we have simultaneous equations? Eg sum = 45, sum of squares = 285, sum of cubes = 2025? # What about if we mix in the product? # Answers: # Sums of squares and cubes have multiple solutions, even simultaneous ones. # Product by itself is not enough in this case. It would work if the entries to the rows were primes though. # Product and sum gives another solution [1,2,4,4,4,5,7,9,9]. Tricky! The three, and the factors of the six and eight get mixed together. # Product and sum of squares give only the desired solution!!! # I expect there is some sort of algebraic/arithmetic geometry theorem that would tell you all of this straight away. # I really should read that book about Groebner bases and stuff! import z3 X = z3.IntVector('X', 9) base_conditions = [x >= 1 for x in X] base_conditions += [x <= 9 for x in X] # Without loss of generality we put the numbers in non-decreasing order for i in range(8): base_conditions.append(X[i] <= X[i + 1]) # Exclude the solution 1,2,3,...,9 base_conditions.append(z3.Not(z3.And([X[i] == i + 1 for i in range(9)]))) # Set sum to 45 sum_cond = [z3.Sum(X) == 45] # Set sum of squares to 285 square_conds = [z3.Sum([x**2 for x in X]) == 285] # Set cubes to 2025
def get_total_tput(self, rnd_array): # assert sum(rnd_array) == 81 source_batch_, index_data = self.batch_data( rnd_array.astype(int)) # index_data = [0,1,2,0,1,2] env = LraClusterEnv(num_nodes=self.NUM_NODES) ilp_dict = {} for i in range(7): ilp_dict['x' + str(i)] = z3.IntVector('x' + str(i), 3) observation = env.reset().copy() # (9,9) source_batch = source_batch_.copy() nodes_per_group = int(params['nodes per group']) NUM_CONTAINERS = int(sum(rnd_array)) """ Episode """ def handle_constraint(observation_now, appid_now, s): observation_original = observation_now.copy() mapping_index = [] list_check = [] for place in range(3): s.push() s.add(ilp_dict['x' + str(appid_now)][place] >= int(observation_now[place][appid_now]) + 1) if s.check() == z3.sat: list_check.append(False) else: list_check.append(True) s.pop() good_index = np.where(np.array(list_check) == False)[0] length = len(good_index) if length < 1: test = 1 index_replace = 0 for node in range(3): if list_check[node]: # bad node # index_this_replace = good_index[np.random.randint(length)] index_this_replace = good_index[index_replace % length] index_replace += 1 observation_original[node] = observation[ index_this_replace] mapping_index.append(index_this_replace) else: mapping_index.append(node) observation_original[node] = observation[node] return observation_original, mapping_index """ first layer """ total = source_batch_.copy() limit = (1 * 9) capicity = (8 * 9) # 3 s_first = Solver() # app sum == batch for i in range(7): s_first.add(z3.Sum(ilp_dict['x' + str(i)]) == int(total[i])) # node capacity for node in range(3): s_first.add( z3.Sum([ilp_dict['x' + str(i)][node] for i in range(7)]) <= int(capicity)) # >=0 for i in range(7): for node in range(3): s_first.add(ilp_dict['x' + str(i)][node] >= 0) # per app spread for i in range(7): for node in range(3): s_first.add(ilp_dict['x' + str(i)][node] <= limit) # App1 and App2 not exist # for node in range(3): # s_first.add(ilp_dict['x' + str(1)][node] + ilp_dict['x' + str(2)][node] <= limit) source_batch_first = source_batch_.copy() observation_first_layer = np.zeros([nodes_per_group, env.NUM_APPS], int) for inter_episode_index in range(NUM_CONTAINERS): appid = index_data[inter_episode_index] observation_first_layer_copy, mapping_index = handle_constraint( observation_first_layer, appid, s_first) assert len(mapping_index) > 0 source_batch_first[appid] -= 1 # observation_first_layer_copy = observation_first_layer.copy() observation_first_layer_copy[:, appid] += 1 observation_first_layer_copy = np.append( observation_first_layer_copy, observation_first_layer_copy > 9 * 2, axis=1) observation_first_layer_copy = np.append( observation_first_layer_copy, observation_first_layer_copy.sum(axis=1).reshape( nodes_per_group, 1), axis=1) observation_first_layer_copy = np.array( observation_first_layer_copy).reshape(1, -1) observation_first_layer_copy = np.append( observation_first_layer_copy, appid).reshape(1, -1) observation_first_layer_copy = np.append( observation_first_layer_copy, np.array(source_batch_first)).reshape(1, -1) action_1, prob_weights = self.RL_1.choose_action( observation_first_layer_copy.copy()) decision = mapping_index[action_1] observation_first_layer[decision, appid] += 1 s_first.add(ilp_dict['x' + str(appid)][decision] >= int( observation_first_layer[decision][appid])) assert (np.sum(observation_first_layer, axis=1) <= params['container_limitation per node'] * 9).all() assert sum(sum(observation_first_layer)) == NUM_CONTAINERS """ second layer """ observation_second_layer_aggregation = np.empty([0, env.NUM_APPS], int) # 9*20 number_cont_second_layer = [] for second_layer_index in range(nodes_per_group): rnd_array = observation_first_layer[second_layer_index].copy() total = rnd_array limit = (1 * 3) capicity = (8 * 3) # 3 s_second = Solver() # app sum == batch for i in range(7): s_second.add(z3.Sum(ilp_dict['x' + str(i)]) == int(total[i])) # node capacity for node in range(3): s_second.add( z3.Sum([ilp_dict['x' + str(i)][node] for i in range(7)]) <= int(capicity)) # >=0 for i in range(7): for node in range(3): s_second.add(ilp_dict['x' + str(i)][node] >= 0) # per app spread for i in range(7): for node in range(3): s_second.add(ilp_dict['x' + str(i)][node] <= limit) # App1 and App2 not exist # for node in range(3): # s_second.add(ilp_dict['x' + str(1)][node] + ilp_dict['x' + str(2)][node] <= limit) source_batch_second, index_data = self.batch_data_sub(rnd_array) observation_second_layer = np.zeros( [nodes_per_group, env.NUM_APPS], int) NUM_CONTAINERS_second = sum(source_batch_second) number_cont_second_layer.append(NUM_CONTAINERS_second) for inter_episode_index in range(NUM_CONTAINERS_second): appid = index_data[inter_episode_index] observation_second_layer_copy, mapping_index = handle_constraint( observation_second_layer, appid, s_second) assert len(mapping_index) > 0 source_batch_second[appid] -= 1 # observation_second_layer_copy = observation_second_layer.copy() observation_second_layer_copy[:, appid] += 1 observation_second_layer_copy = np.append( observation_second_layer_copy, observation_second_layer_copy > 3 * 2, axis=1) observation_second_layer_copy = np.append( observation_second_layer_copy, observation_second_layer_copy.sum(axis=1).reshape( nodes_per_group, 1), axis=1) observation_second_layer_copy = np.array( observation_second_layer_copy).reshape(1, -1) observation_second_layer_copy = np.append( observation_second_layer_copy, appid).reshape(1, -1) observation_second_layer_copy = np.append( observation_second_layer_copy, np.array(source_batch_second)).reshape(1, -1) action_2, prob_weights = self.RL_2.choose_action( observation_second_layer_copy.copy()) decision = mapping_index[action_2] observation_second_layer[decision, appid] += 1 s_second.add(ilp_dict['x' + str(appid)][decision] >= int( observation_second_layer[decision][appid])) observation_second_layer_aggregation = np.append( observation_second_layer_aggregation, observation_second_layer, 0) assert (np.sum(observation_second_layer, axis=1) <= params['container_limitation per node'] * 3).all() assert sum(sum(observation_second_layer)) == NUM_CONTAINERS_second """ third layer """ observation_third_layer_aggregation = np.empty([0, env.NUM_APPS], int) # 9*20 number_cont_third_layer = [] for third_layer_index in range(nodes_per_group * nodes_per_group): rnd_array = observation_second_layer_aggregation[ third_layer_index].copy() total = rnd_array limit = (1 * 1) capicity = 8 s_third = Solver() # app sum == batch for i in range(7): s_third.add(z3.Sum(ilp_dict['x' + str(i)]) == int(total[i])) # node capacity for node in range(3): s_third.add( z3.Sum([ilp_dict['x' + str(i)][node] for i in range(7)]) <= int(capicity)) # >=0 for i in range(7): for node in range(3): s_third.add(ilp_dict['x' + str(i)][node] >= 0) # per app spread for i in range(7): for node in range(3): s_third.add(ilp_dict['x' + str(i)][node] <= limit) # App1 and App2 not exist # for node in range(3): # s_third.add(ilp_dict['x' + str(1)][node] + ilp_dict['x' + str(2)][node] <= limit) source_batch_third, index_data = self.batch_data_sub(rnd_array) observation_third_layer = np.zeros([nodes_per_group, env.NUM_APPS], int) NUM_CONTAINERS_third = sum(source_batch_third) number_cont_third_layer.append(NUM_CONTAINERS_third) for inter_episode_index in range(NUM_CONTAINERS_third): appid = index_data[inter_episode_index] observation_third_layer_copy, mapping_index = handle_constraint( observation_third_layer, appid, s_third) assert len(mapping_index) > 0 source_batch_third[appid] -= 1 # observation_third_layer_copy = observation_third_layer.copy() observation_third_layer_copy[:, appid] += 1 observation_third_layer_copy = np.append( observation_third_layer_copy, observation_third_layer_copy > 1 * 2, axis=1) observation_third_layer_copy = np.append( observation_third_layer_copy, observation_third_layer_copy.sum(axis=1).reshape( nodes_per_group, 1), axis=1) observation_third_layer_copy = np.array( observation_third_layer_copy).reshape(1, -1) observation_third_layer_copy = np.append( observation_third_layer_copy, appid).reshape(1, -1) observation_third_layer_copy = np.append( observation_third_layer_copy, np.array(source_batch_third)).reshape(1, -1) action_3, prob_weights = self.RL_3.choose_action( observation_third_layer_copy.copy()) decision = mapping_index[action_3] observation_third_layer[decision, appid] += 1 s_third.add(ilp_dict['x' + str(appid)][decision] >= int( observation_third_layer[decision][appid])) observation_third_layer_aggregation = np.append( observation_third_layer_aggregation, observation_third_layer, 0) assert (np.sum(observation_third_layer, axis=1) <= params['container_limitation per node'] * 1).all() assert sum(sum(observation_third_layer)) == NUM_CONTAINERS_third env.state = observation_third_layer_aggregation.copy() assert sum(sum(env.state)) == NUM_CONTAINERS assert (env.state.sum(0) == source_batch_).all() """ After an entire allocation, calculate total throughput, reward """ # state = env.state # assert sum(sum(self.env.state)) == 81 return env.state