def select_action_from_state(self, state): actions = [] state = copy.deepcopy(state) state = np.reshape(flatten(state), (5, 20, 20)) state = [state[0], state[1], [state[2], state[3]], state[4]] agent_coord_1 = copy.deepcopy(self.env.agent_coord_1) agent_coord_2 = copy.deepcopy(self.env.agent_coord_2) init_score = self.env.score_mine rewards = [] states = [] next_states = [] for i in range(self.num_agents): _state = state _state[1] = self.env.get_agent_state(_state[1], i) _state = flatten(_state) act = self.get_exploration_action( np.array(_state, dtype=np.float32), i) states.append(state) state, agent_coord, score = self.env.fit_action( i, state, act, agent_coord_1, agent_coord_2) rewards.append(score - init_score) init_score = score actions.append(act) next_states.append(state) return states, actions, rewards, next_states
def select_best_actions(self, state): actions = [0] * self.num_agents state = copy.deepcopy(state) state = np.reshape(flatten(state), (5, 20, 20)) state = [state[0], state[1], [state[2], state[3]], state[4]] agent_coord_1 = copy.deepcopy(self.env.agent_coord_1) agent_coord_2 = copy.deepcopy(self.env.agent_coord_2) init_score = self.env.score_mine - self.env.score_opponent rewards = [] states = [] next_states = [] order = shuffle(range(self.num_agents)) for i in range(self.num_agents): agent = order[i] _state = state _state[1] = self.env.get_agent_state(_state[1], agent) _state = flatten(_state) states.append(state) act = 0 scores = [0] * 9 mn = 1000 mx = -1000 valid_states = [] for act in range(9): _state, _agent_coord_1, _agent_coord_2 = copy.deepcopy( [state, agent_coord_1, agent_coord_2]) valid, _state, _agent_coord, _score = self.env.fit_action( agent, _state, act, _agent_coord_1, _agent_coord_2) scores[act] = _score - init_score mn = min(mn, _score - init_score) mx = max(mx, _score - init_score) valid_states.append(valid) # scores[0] -= 2 for j in range(len(scores)): scores[j] = (scores[j] - mn) / (mx - mn + 0.0001) scores[j] **= 10 sum = np.sum(scores) + 0.0001 for j in range(len(scores)): scores[j] = scores[j] / sum if (valid_states[j] is False): scores[j] = 0 scores[0] = 0 act = choices(range(9), scores)[0] valid, state, agent_coord, score = self.env.fit_action( agent, state, act, agent_coord_1, agent_coord_2) rewards.append(score - init_score) init_score = score actions[agent] = act next_states.append(state) return states, actions, rewards, next_states
def learn(self, states_1, actions_1, rewards_1, next_states_1, actions_2, BGame, show_screen): next_state, reward, done, remaining_turns = self.env.next_frame( actions_1, actions_2, BGame, show_screen) for i in range(self.num_agents): states_1[i] = flatten(states_1[i]) next_states_1[i] = flatten(next_states_1[i]) self.memories[i].store_transition(states_1[i], actions_1[i], rewards_1[i], next_states_1[i]) self.optimize() return done
def scrape_course(subject, div): return { 'course': subject + ' ' + div.attr.id, 'name': div('h3').text()[9:], 'description': div('p').eq(0).text(), 'requirements': flatten([scrape_requirement_paragraph(p) for p in div('p').filter(lambda i: i != 0).items()]) }
def action_flatten(self, acts): _acts = [] for act in acts: p = [1 if j == act else 0 for j in range(self.action_lim)] _acts.append(p) while (len(_acts) < self.num_agent_lim): _acts.append([0] * self.action_lim) return flatten(_acts)
def block_mod(n: int, k, mod): nk = n // k remainder = n % k assignments = itertools.product( [x for x in bitstrings(k) if sum(x) % mod != 0], repeat=nk) if remainder > 0: assignments = itertools.product( (assignments, [x for x in bitstrings(remainder) if sum(x) % mod != 0])) return [flatten(x) for x in assignments]
def get(spreadsheet_id, value_range): request = service.spreadsheets().values().get( spreadsheetId=spreadsheet_id, range=value_range, ) response = request.execute() values = flatten(response.get('values', [])) return values
def select_action(self, state, epsilon): actions = [] state = copy.deepcopy(state) state = np.reshape(flatten(state), (5, 20, 20)) state = [state[0], state[1], [state[2], state[3]], state[4]] agent_coord_1 = copy.deepcopy(self.env.agent_coord_1) agent_coord_2 = copy.deepcopy(self.env.agent_coord_2) init_score = self.env.score_mine - self.env.score_opponent rewards = [] states = [] next_states = [] for i in range(self.num_agents): act = None states.append(state) if random() <= epsilon: act = randint(0, self.action_lim - 1) else: _state = state _state[1] = self.env.get_agent_state(_state[1], i) _state = flatten(_state) act = self.get_exploration_action( np.array(_state, dtype=np.float32), i) # _state, _agent_coord_1, _agent_coord_2 = copy.deepcopy([state, agent_coord_1, agent_coord_2]) valid, state, agent_coord_1, score = self.env.fit_action( i, state, act, agent_coord_1, agent_coord_2, False) punish = 0 if valid is False: punish = 20 rewards.append(score - init_score - punish) # rewards.append(score - init_score) actions.append(act) next_states.append(state) init_score = score self.steps_done += 1 return states, actions, rewards, next_states
def get_places(self, place_id_list): place_set = set(flatten(place_id_list)) raw_places = { place['id']: place for place in orion.get_entities(const.FIWARE_SERVICE, const.DELIVERY_ROBOT_SERVICEPATH, const.PLACE_TYPE) } places = { place: raw_places[place]['pose']['value'] for place in place_set } return places
def create(cls, matches): # Validation # need to have an odd # of matches so someone wins the series if not isodd(len(matches)): return # Only 2 teams allowed teams = set(flatten((m.winner, m.loser) for m in matches)) if len(teams) != 2: return # Series can't already be set num_existing_series = sum((1 if m.series else 0 for m in matches)) if num_existing_series: return series = Series.objects.create() Match.objects.filter(id__in=[m.id for m in matches]).update(series=series)
def _take_refuge(self, robot_id, waiting_route): places = RobotNotificationAPI.waypoint().get_places( [flatten([waiting_route['via'], waiting_route['to']])]) waypoints = RobotNotificationAPI.waypoint().get_waypoints( [places[place_id] for place_id in waiting_route['via']], [places[waiting_route['to']]]) navigating_waypoints = { 'to': waiting_route['to'], 'destination': self.get_destination_id(robot_id), 'action': { 'func': '', 'token': '', 'waiting_route': {}, }, 'waypoints': waypoints, } self.move_robot(robot_id, waypoints, navigating_waypoints) logger.info( f'take refuge a robot({robot_id}) in "{waiting_route["to"]}"')
def get_agent_for_step(self, agent_ID, player_ID, agent_coord): agent_state = [[], []] for ag_id in range(self.max_n_agents): for player_ID in range(self.num_players): empty_board = [] for _ in range(self.MAX_SIZE): empty_board.append([0] * self.MAX_SIZE) agent_state[player_ID].append(empty_board) if ag_id >= self.n_agents: continue x, y = agent_coord[player_ID][ag_id] agent_state[player_ID][ag_id][x][y] = 1 index = agent_state[0][agent_ID] onehot_nturns = [0] * self.max_n_turns onehot_nturns[self.remaining_turns] = 1 onehot_players = [1, 1] if player_ID == 0: onehot_players[1] = 0 else: onehot_players[0] = 0 agent_state = flatten([agent_state, index, onehot_nturns, onehot_players]) # print(len(agent_state)) return np.array(agent_state, dtype = np.float32).reshape(-1, self.agent_step_dim)
velocity = close_approach['relative_velocity']['kilometers_per_hour'] return {'name' : asteroid['name'], # 'url' : d['nasa_jpl_url'], 'magnitude' : asteroid['absolute_magnitude_h'], 'hazardous' : asteroid['is_potentially_hazardous_asteroid'], 'diameter_min' : diameter['estimated_diameter_min'], 'diameter_max' : diameter['estimated_diameter_max'], 'velocity' : velocity, 'orbiting_body' : close_approach['orbiting_body'], 'close_approach_date' : close_approach['close_approach_date'], # 'epoch_date' : close_approach['epoch_date_close_approach'], 'miss_distance' : float(close_approach['miss_distance']['kilometers'])} response = request('/neo/rest/v1/feed', { 'start_date': '2016-01-01' }) asteroids = flatten(response['near_earth_objects'].values()) preprocessed = [preprocess_asteroid(asteroid) for asteroid in asteroids] print(color_print(preprocessed[:5])) import pandas as pd from matplotlib.backends.backend_pdf import PdfPages # from mathplotlib import pyplot df = pd.DataFrame(preprocessed) # print(df) # print(df.describe()) # print(df.columns) # print(df.miss_distance.min()) with PdfPages('figure.pdf') as pdf:
def experiment(model, config, train_data, val_data): X_train, y_train = train_data X_val, y_val = val_data num_x_train = len(X_train) train_metrics = { "acc": tf.keras.metrics.SparseCategoricalAccuracy(), "top10 acc": tf.keras.metrics.SparseTopKCategoricalAccuracy(k=10), } val_metrics = { "top10 acc": tf.keras.metrics.SparseTopKCategoricalAccuracy(k=10), } test_metrics = { "acc": tf.keras.metrics.SparseCategoricalAccuracy(), "top5 acc": tf.keras.metrics.SparseTopKCategoricalAccuracy(k=5), "top10 acc": tf.keras.metrics.SparseTopKCategoricalAccuracy(k=10), "top20 acc": tf.keras.metrics.SparseTopKCategoricalAccuracy(k=20), "mrr5": MrrMetric(k=5), "mrr10": MrrMetric(k=10), "mrr20": MrrMetric(k=20) } train_metrics_rec = defaultdict(lambda: []) val_metrics_rec = defaultdict(lambda: []) test_metrics_rec = defaultdict(lambda: []) print(f"starting experiment \"{config['expr_name']}\"") expr_dir = Path("logs")/config['expr_name'] train_log_dir = expr_dir/"train" val_log_dir = expr_dir/"val" test_log_dir = expr_dir/"test" os.makedirs(expr_dir) for tensorboard_dir in [train_log_dir, val_log_dir, test_log_dir]: os.makedirs(tensorboard_dir, exist_ok=True) train_summary_writer = tf.summary.create_file_writer(str(train_log_dir)) val_summary_writer = tf.summary.create_file_writer(str(val_log_dir)) if config["save_model"]: model_saver = utils.TopModelSaver(Path("models")/config['expr_name'], config) else: model_saver = utils.TopModelSaver(Path("models")/"last_run_model", config) print(f"batch_size: {config['batch_size']}") for epoch in tqdm(list(range(config["epochs"]))): print(f"epoch {epoch}") # Reset the metrics at the start of the next epoch for metric in utils.flatten([train_metrics.values(), val_metrics.values(), test_metrics.values()]): metric.reset_states() # train model for i, (X, y) in enumerate(utils.batchify(X_train, y_train, shuffle=True, batch_size=config["batch_size"])): X = [sess[::-1] for sess in X] # reversing sessions so most recent item is last X, mask = utils.mask_length(X, maskoff_vals=config["maskoff"], maskon_vals=config["maskon"], justify="right") preds, loss = model.train_step(tf.constant(X), tf.constant(mask), tf.constant(y), list(train_metrics.values())) with train_summary_writer.as_default(): tf.summary.scalar('loss', loss.numpy(), step=epoch*(int(num_x_train/config["batch_size"])+1)+i) # test against validation data for X, y in utils.batchify(X_val, y_val, shuffle=True, batch_size=config["batch_size"]): X = [sess[::-1] for sess in X] X, mask = utils.mask_length(X, maskoff_vals=config["maskoff"], maskon_vals=config["maskon"], justify="right") model.test_step(tf.constant(X), tf.constant(mask), tf.constant(y), list(val_metrics.values())) # with val_summary_writer.as_default(): # tf.summary.histogram("preds", preds, step=epoch) # intentionally only getting the last preds cause it takes a while with train_summary_writer.as_default(): for label, metric in train_metrics.items(): train_metrics_rec[label].append(metric.result()) # print(f"{label} train: {metric.result()}") tf.summary.scalar(label, metric.result(), step=epoch) with val_summary_writer.as_default(): for label, metric in val_metrics.items(): val_metrics_rec[label].append(metric.result()) # print(f"{label} val: {metric.result()}") tf.summary.scalar(label, metric.result(), step=epoch) model_saver.save_best(model, val_metrics["top10 acc"].result()) # cutoff learning if the score is too low at the early cutoff if epoch >= config["early_cutoff_epoch"]: if val_metrics["top10 acc"].result() < config["early_cutoff_score"]: break return model, model_saver
def get_states_for_step(self, states): states = np.array(flatten(states), dtype = np.float32)\ .reshape(-1, self.n_inputs, self.MAX_SIZE, self.MAX_SIZE) return states
def test_str(self, target, expected): assert flatten(target) == expected
def test_dict_set(self, target, expected): assert sorted(flatten(target)) == expected
def test_exception(self, target, expected): with pytest.raises(expected): flatten(target)
def estimate_routes(self, shipment_list, robot_id): if not ('destination' in shipment_list and 'name' in shipment_list['destination'] and isinstance(shipment_list['destination']['name'], str) and 'updated' in shipment_list and isinstance(shipment_list['updated'], list) and all('place' in v for v in shipment_list['updated']) and all( isinstance(v['place'], str) for v in shipment_list['updated'])): raise TypeError('invalid shipment_list') if not isinstance(robot_id, str): raise TypeError('invalid robot_id') logger.info(f'shipment_list = {shipment_list}') destination = orion.query_entity( const.FIWARE_SERVICE, const.DELIVERY_ROBOT_SERVICEPATH, const.PLACE_TYPE, f'name=={shipment_list["destination"]["name"]}')['id'] via_name_list = list( set([v['place'] for v in shipment_list['updated']])) via_list = [ orion.query_entity(const.FIWARE_SERVICE, const.DELIVERY_ROBOT_SERVICEPATH, const.PLACE_TYPE, f'name=={v}')['id'] for v in sorted(via_name_list) ] via = const.VIA_SEPARATOR.join(sorted(via_list)) route_plan = orion.query_entity( const.FIWARE_SERVICE, const.DELIVERY_ROBOT_SERVICEPATH, const.ROUTE_PLAN_TYPE, f'destination=={destination};via=={via};robot_id=={robot_id}') routes = route_plan['routes']['value'] source = route_plan['source']['value'] places = self.get_places([ flatten([r['from'], r['via'], r['to'], r['destination']]) for r in routes ]) waypoints_list = [] for route in routes: waypoints = self.get_waypoints( [places[place_id] for place_id in route['via']], [places[route['to']]]) waypoints_list.append({ 'to': route['to'], 'destination': route['destination'], 'action': route['action'], 'waypoints': waypoints, }) order = { 'source': source, 'via': via_list, 'destination': destination, } return routes, waypoints_list, order
def get_literals_in_unit_clauses_from_lists_of_formulas(phis): return set(flatten(flatten([phi.clauses_with_width(1) for phi in phis])))