def set_from_tuple(self, dat): self.ninput = dat[0] self.noutput = dat[1] self.ioutput = dat[2] self.state_space_lag = [None] * self.ninput self.state_space_red = [None] * self.ninput for i in xrange(self.ninput): self.state_space_lag[i] = StateSpace() self.state_space_lag[i].set_from_tuple(dat[3][i]) self.state_space_red[i] = StateSpace() self.state_space_red[i].set_from_tuple(dat[5][i]) self.ann_lag.set_from_tuple(dat[4]) self.ann_red.set_from_tuple(dat[6])
def __init__(self): # System parameters self.nb_ST = 3 self.state_size = 2 * self.nb_ST self.nb_actions = (BackscatterEnv3.BUSY_TIMESLOT + 1)**3 * ( BackscatterEnv3.TIME_FRAME - BackscatterEnv3.BUSY_TIMESLOT + 1)**2 self.action_space = ActionSpace( (Discrete(BackscatterEnv3.BUSY_TIMESLOT + 1), Discrete(BackscatterEnv3.BUSY_TIMESLOT + 1), Discrete(BackscatterEnv3.BUSY_TIMESLOT + 1), Discrete(BackscatterEnv3.TIME_FRAME - BackscatterEnv3.BUSY_TIMESLOT + 1), Discrete(BackscatterEnv3.TIME_FRAME - BackscatterEnv3.BUSY_TIMESLOT + 1))) self.observation_space = StateSpace( (Discrete(SecondTransmitor.QUEUE), Discrete( SecondTransmitor.ENERGY), Discrete(SecondTransmitor.QUEUE), Discrete(SecondTransmitor.ENERGY), Discrete(SecondTransmitor.QUEUE), Discrete(SecondTransmitor.ENERGY))) # initialize Second Transmitters self.ST1 = SecondTransmitor(data_rate=BackscatterEnv3.DATA_RATE) self.ST2 = SecondTransmitor(data_rate=BackscatterEnv3.DATA_RATE) self.ST3 = SecondTransmitor(data_rate=BackscatterEnv3.DATA_RATE) self.viewer = None self.state = None self.steps_beyond_done = None
def __init__(self): # System parameters self.nb_MB = 3 self.state_size = 2 * self.nb_MB self.nb_actions = (Mobile.MAX_DATA + 1) ** self.nb_MB * (Mobile.MAX_ENERGY + 1) ** self.nb_MB self.action_space = ActionSpace((Discrete(Mobile.MAX_DATA + 1), Discrete(Mobile.MAX_ENERGY + 1), Discrete(Mobile.MAX_DATA + 1), Discrete(Mobile.MAX_ENERGY + 1), Discrete(Mobile.MAX_DATA + 1), Discrete(Mobile.MAX_ENERGY + 1) )) self.observation_space = StateSpace((Discrete(Mobile.MAX_CPU), Discrete(Mobile.MAX_ENERGY), Discrete(Mobile.MAX_CPU), Discrete(Mobile.MAX_ENERGY), Discrete(Mobile.MAX_CPU), Discrete(Mobile.MAX_ENERGY))) # initialize Second Transmitters self.MB1 = Mobile() self.MB2 = Mobile() self.MB3 = Mobile() self.max_data = self.nb_MB * Mobile.MAX_DATA self.max_energy = self.nb_MB * Mobile.MAX_ENERGY self.max_latency = Mobile.MAX_LATENCY self.training_time = 0 self.training_data = 0 self.viewer = None self.state = None self.steps_beyond_done = None
def __init__(self): # System parameters self.nb_ST = 2 self.state_size = 2 * self.nb_ST + 1 self.nb_actions = (Backscatter02Env.TIME_FRAME + 1)**3 self.action_space = ActionSpace( (Discrete(Backscatter02Env.TIME_FRAME + 1), Discrete(Backscatter02Env.TIME_FRAME + 1), Discrete(Backscatter02Env.TIME_FRAME + 1))) self.observation_space = StateSpace( (Discrete(SecondTransmitor.QUEUE), Discrete( SecondTransmitor.ENERGY), Discrete(SecondTransmitor.QUEUE), Discrete(SecondTransmitor.ENERGY), Discrete(Backscatter02Env.TIME_FRAME + 1))) # initialize Second Transmitters self.ST1 = SecondTransmitor() self.ST2 = SecondTransmitor() self.busy_slot = None self.viewer = None self.state = None self.steps_beyond_done = None
def from_dictionary(self, dict_data): self.ninput = dict_data['ninput'] self.noutput = dict_data['noutput'] self.ioutput = dict_data['ioutput'] self.state_space_lag = [None] * self.ninput for i in xrange(self.ninput): self.state_space_lag[i] = StateSpace() self.state_space_lag[i].from_dictionary( dict_data['state_space_lag'][i]) self.state_space_red = [None] * self.ninput for i in xrange(self.ninput): self.state_space_red[i] = StateSpace() self.state_space_red[i].from_dictionary( dict_data['state_space_red'][i]) self.ann_lag.from_dictionary(dict_data['ann_lag']) self.ann_red.from_dictionary(dict_data['ann_red'])
def __init__(self, metric_list, train_data): self.train_data = train_data self.metric_list = metric_list self.n_mets = len(self.metric_list) # state space self.X = StateSpace(self.metric_list, self.train_data) self.state_map = self.X.state_map self.inv_map = self.X.inverse_map self.h_map = self.X.h_state_map self.state_poss = self.X.state_poss_seq self.n_states = len(self.state_map) self.trans_probs = np.zeros((len(self.h_map), len(self.state_poss))) # initialize data self.markov_data = [] self.markov_data.append(int(self.n_states * np.random.random()))
def __init__(self, num_ccy: int, precision: int, gamma: float, metric_list: list, train_data: pd.DataFrame): self.num_ccy = num_ccy self.precision = precision self.gamma = gamma self.metric_list = metric_list self.train_data = train_data self.A = ActionSpace(self.num_ccy, self.precision) self.a_space = self.A.actions self.X = StateSpace(self.metric_list, self.train_data) self.state_map = self.X.state_map self.n_states = len(self.state_map) self.n_actions = len(self.a_space) self.q_table = np.zeros((self.n_states, self.n_actions)) self.lr_table = np.zeros((self.n_states, self.n_actions))
def main(): agent = ddpg total_reward = 0 num_of_states = 2 num_of_actions = 1 reward = np.array([0]) # For Loop του αλγορίθμου για ένα επισόδειο for t in range(steps): s_t = StateSpace.output()
def test_step(): # create an empty state space of 4,4 state_space = StateSpace(4, 4, goal_1_position=(1, 3), goal_2_position=(3, 3), forbidden_position=(2, 3), wall_position=(2, 2)) # start state start_state = state_space.get_state(0, 1) # create a new agent with this state space agent = Agent( state_space=state_space, start_state=start_state, learning_rate=0.1, discount_rate=0.2, exploit_prob=0.1, living_reward=-0.1, ) # step assert agent.step() is not None
def __init__(self): # Channel parameters self.nb_channels = 4 self.idleChannel = 1 self.prob_switching = 0.9 self.channelObservation = None self.prob_late = BlockchainNetworkingEnv.LATE_PROB self.cost_channels = [0.1, 0.1, 0.1, 0.1] # Blockchain parameters self.mempool = Mempool() self.userTransaction = Transaction() self.lastBlock = Block() self.hashRate = None self.doubleSpendSuccess = None # System parameters self.nb_past_observations = 4 self.state_size = Mempool.NB_FEE_INTERVALS + 2 * self.nb_past_observations self.action_space = ActionSpace(self.nb_channels + 1) self.observation_space = StateSpace( (Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), ActionSpace(self.nb_channels + 1), ChannelSpace(), ActionSpace(self.nb_channels + 1), ChannelSpace(), ActionSpace(self.nb_channels + 1), ChannelSpace(), ActionSpace(self.nb_channels + 1), ChannelSpace())) # reward define self.totalReward = 0 self.successReward = 0 self.channelCost = 0 self.transactionFee = 0 self.cost = 0 self.viewer = None self.state = None self.steps_beyond_done = None
class Markov(): def __init__(self, metric_list, train_data): self.train_data = train_data self.metric_list = metric_list self.n_mets = len(self.metric_list) # state space self.X = StateSpace(self.metric_list, self.train_data) self.state_map = self.X.state_map self.inv_map = self.X.inverse_map self.h_map = self.X.h_state_map self.state_poss = self.X.state_poss_seq self.n_states = len(self.state_map) self.trans_probs = np.zeros((len(self.h_map), len(self.state_poss))) # initialize data self.markov_data = [] self.markov_data.append(int(self.n_states * np.random.random())) def fit(self): self.state_seq = [] print('Generating transition probabilities...') for i in range(self.X.pts_required, len(self.train_data) - self.X.pts_required): self.state_seq.append( self.X.get_state(self.train_data.iloc[i - self.X.pts_required:i])) state_tuple = self.inv_map[self.state_seq[-1]] h_seq = [] s_seq = [] ctr = 0 for metric in self.X.metric_list: for j in range(len(metric.poss_h_seq)): if metric.poss_seq[state_tuple[ctr]][ 0:metric.markov_mem] == metric.poss_h_seq[j]: h_seq.append(j) s_seq.append( metric.poss_seq[state_tuple[ctr]][metric.markov_mem]) ctr += 1 if ctr == self.n_mets: for k in range(len(self.state_poss)): if tuple(s_seq) == self.state_poss[k]: s_idx = k row = self.h_map[tuple(h_seq)] self.trans_probs[row, s_idx] += 1 row_cnt = [] for i in range(len(self.trans_probs)): row_cnt.append(np.sum(self.trans_probs[i, :])) for i in range(len(self.trans_probs)): if row_cnt[i] != 0: self.trans_probs[i, :] = self.trans_probs[i, :] / row_cnt[i] print('done!') #return self.trans_probs def generate(self, data_len: int): """data_len = 93600 is 1Q worth of data""" print('generating markov data...') for i in range(data_len): state_tuple = self.inv_map[self.markov_data[-1]] seq = [] h_seq = [] s_seq = [] ctr = 0 for metric in self.X.metric_list: for j in range(len(metric.poss_h_seq)): if metric.poss_seq[state_tuple[ctr]][ -metric.markov_mem:] == metric.poss_h_seq[j]: h_seq.append(j) seq.append(metric.poss_h_seq[j]) s_seq.append( metric.poss_seq[state_tuple[ctr]][metric.markov_mem]) ctr += 1 row = self.h_map[tuple(h_seq)] rand = np.random.random() probs = [] for k in range(len(self.trans_probs[row, :])): probs.append(np.sum(self.trans_probs[row, :k])) s_idx = 0 while rand >= probs[s_idx] and s_idx < len(probs) - 1: s_idx += 1 for l in range(self.n_mets): seq[l] += tuple([self.state_poss[s_idx][l]]) state = [] ctr = 0 for metric in self.X.metric_list: state.append(metric.markov_dict[seq[ctr]]) ctr += 1 s = self.state_map[tuple(state)] self.markov_data.append(s) print('done!') return self.markov_data
class Qagent(): def __init__(self, num_ccy: int, precision: int, gamma: float, metric_list: list, train_data: pd.DataFrame): self.num_ccy = num_ccy self.precision = precision self.gamma = gamma self.metric_list = metric_list self.train_data = train_data self.A = ActionSpace(self.num_ccy, self.precision) self.a_space = self.A.actions self.X = StateSpace(self.metric_list, self.train_data) self.state_map = self.X.state_map self.n_states = len(self.state_map) self.n_actions = len(self.a_space) self.q_table = np.zeros((self.n_states, self.n_actions)) self.lr_table = np.zeros((self.n_states, self.n_actions)) def train(self, data=0, markov=False): if markov == True: train_start = 0 train_end = len(data) - 1 else: train_start = self.X.pts_required train_end = len(self.train_data) - 1 print('Training q_table...') for i in range(train_start, train_end): # initialize state if markov == True: s_idx = data[i] else: s_idx = self.X.get_state( self.train_data.iloc[i - self.X.pts_required:i]) # if all actions for a state have zero reward 0 randomly select action if np.sum(self.q_table[s_idx, :]) == 0: # randint randomly selects ints from 0 up to but not including n_actions a_idx = np.random.randint(0, self.n_actions) # select the action with largest reward value in state s else: a_idx = np.argmax(self.q_table[s_idx, :]) # get action from action space a = self.a_space[a_idx] # get new state from state space assuming states are iid price = self.train_data.iloc[i - 1].values[0] next_price = self.train_data.iloc[i].values[0] b = [1, next_price / price] z = -1 # calculate reward r = np.log(np.dot(a, b)) #print(r) # choose learning rate if self.lr_table[s_idx, a_idx] == 0: alpha = 1 else: alpha = 1 / (self.lr_table[s_idx, a_idx]) # update q table for action a taken on state s if markov == True: next_s_idx = data[i + 1] else: next_s_idx = self.X.get_state( train_data.iloc[i + 1 - self.X.pts_required:i + 1]) #print(next_s_idx) self.q_table[s_idx, a_idx] += r + alpha * (self.gamma * np.max( self.q_table[next_s_idx, :]) - self.q_table[s_idx, a_idx]) # update learning rate self.lr_table[s_idx, a_idx] += 1 print('done!') return self.q_table, self.lr_table
from robots.circle_robot import CircleRobot if __name__ == "__main__": resolution_m = 0.01 # Statespace can take CircleRobot or RectangleRobot objects robot = RectangleRobot(0.04, 0.02) #Rectangle # Takes discrete values, divide continuous values by resolution # Parameters: environment length, width, 2D array with obstacle parameters # e.g. [[l1, w1, x1, x2], [l2, w2, x2, y2],..., [ln, wn, xn, yn]] env = Environment(30, 30, [[6, 2, 19, 17], [2, 2, 14, 26]]) # Parameters: resolution (m), number of theta values, robot object, # and environment object state_space = StateSpace(resolution_m, 8, robot, env) planner = AStar(state_space) path = [] pts = [0.1, 0.1, 0.2, 0.25] # Input x (m), y (m) if planner.set_start(pts[0], pts[1], pi/4) and planner.set_goal(pts[2], pts[3], pi/4): # Planner return whether or not it was successful, # the number of expansions, and time taken (s) success, num_expansions, planning_time = planner.plan() if success: _, path = planner.extract_path()
def run_planner(env_parameters, render=None): global_vars[N_RUNS] += 1 resolution_m = 0.01 with open("temp-data-params.txt", "w") as f: f.write(",".join(env_parameters.astype(str))) obs_params = clamp_obs(env_parameters[:M]) # Statespace can take a PointRobot, SquareRobot, RectangleRobot objects # robot = PointRobot(0, 0) # robot = CircleRobot(3) # robot = RectangleRobot(4,4) # robot = RectangleRobot(3,1) robot = RectangleRobot(0.04, 0.02) # Takes discrete values, divide continuous values by resolution # Parameters: environment length, width, 2D array with obstacle parameters # e.g. [[l1, w1, x1, y1], [l2, w2, x2, y2],..., [ln, wn, xn, yn]] if ORACLE_L[1]: env = Environment(30, 30, []) else: env = Environment(30, 30, [obs_params[:4], obs_params[4:8]]) # Parameters: resolution (m), number of theta values, robot object, # and environment object state_space = StateSpace(resolution_m, 8, robot, env) planner = AStar(state_space) error = False path = ([], []) success, num_expansions, planning_time = True, 0, 0.0 # Input x (m), y (m) if len(env_parameters) > 8: sx, sy, gx, gy = env_parameters[ 8:12] / 116 + 0.02 # ensure b/w 0.02 and 0.28 else: sx, sy, gx, gy = [0.05, 0.05, 0.25, 0.25] # Optimizing orientation if len(env_parameters) > 12: so, go = env_parameters[12], env_parameters[13] else: so, go = pi / 4, pi / 4 if not (planner.set_start(sx, sy, so)): success = False # no expansions, since initial config was invalid if not (planner.set_goal(gx, gy, go)): success = False # ditto # Planner return whether or not it was successful, # the number of expansions, and time taken (s) if success: try: success, num_expansions, planning_time = planner.plan() if success: print("Extracting path") path = planner.extract_path() # BUG 3 -- Oracle if path is None and ORACLE_L[2]: print("ERROR: Incorrect goal") error = True # BUG 4 -- Oracle elif (not planner.check_consistency(path[0])) and ORACLE_L[3]: print("ERROR: Path not consistence") error = True else: # BUG 2 -- Oracle if ORACLE_L[1]: print('ERROR: Constrained goal') error = True # BUG 1 -- Oracle if len(planner.visited) >= state_space.get_max_expansions( ) and ORACLE_L[0]: print("ERROR: Expanded every node in the environment") error = True # BUG 6 -- Oracle if planning_time >= TIMEOUT: print("Planning timed out") if ORACLE_L[5]: error = True # BUG 5 -- Oracle except Exception: print("Unexpected error:", sys.exc_info()) error = True if error or render: filename = None if render == True or render is None: filename = save_vars[IMG_PATH] else: filename = render vis = Visualizer(env, state_space, robot) if path is None: path = ([], []) vis.visualize(path[1], filename=filename, start_end=[sx, sy, gx, gy]) else: print(" ", end='') print("Success:", success, "\tExpansions:", num_expansions, "\tTime:", planning_time) save_vars[CSV_WRITER].writerow([ env_parameters, error, not not render, success, num_expansions, planning_time, ]) save_vars[DATA_FILE].flush() return error, success, num_expansions, planning_time