예제 #1
0
 def set_from_tuple(self, dat):
     self.ninput = dat[0]
     self.noutput = dat[1]
     self.ioutput = dat[2]
     self.state_space_lag = [None] * self.ninput
     self.state_space_red = [None] * self.ninput
     for i in xrange(self.ninput):
         self.state_space_lag[i] = StateSpace()
         self.state_space_lag[i].set_from_tuple(dat[3][i])
         self.state_space_red[i] = StateSpace()
         self.state_space_red[i].set_from_tuple(dat[5][i])
     self.ann_lag.set_from_tuple(dat[4])
     self.ann_red.set_from_tuple(dat[6])
예제 #2
0
    def __init__(self):

        # System parameters
        self.nb_ST = 3
        self.state_size = 2 * self.nb_ST
        self.nb_actions = (BackscatterEnv3.BUSY_TIMESLOT + 1)**3 * (
            BackscatterEnv3.TIME_FRAME - BackscatterEnv3.BUSY_TIMESLOT + 1)**2

        self.action_space = ActionSpace(
            (Discrete(BackscatterEnv3.BUSY_TIMESLOT + 1),
             Discrete(BackscatterEnv3.BUSY_TIMESLOT + 1),
             Discrete(BackscatterEnv3.BUSY_TIMESLOT + 1),
             Discrete(BackscatterEnv3.TIME_FRAME -
                      BackscatterEnv3.BUSY_TIMESLOT + 1),
             Discrete(BackscatterEnv3.TIME_FRAME -
                      BackscatterEnv3.BUSY_TIMESLOT + 1)))

        self.observation_space = StateSpace(
            (Discrete(SecondTransmitor.QUEUE), Discrete(
                SecondTransmitor.ENERGY), Discrete(SecondTransmitor.QUEUE),
             Discrete(SecondTransmitor.ENERGY),
             Discrete(SecondTransmitor.QUEUE),
             Discrete(SecondTransmitor.ENERGY)))

        # initialize Second Transmitters
        self.ST1 = SecondTransmitor(data_rate=BackscatterEnv3.DATA_RATE)
        self.ST2 = SecondTransmitor(data_rate=BackscatterEnv3.DATA_RATE)
        self.ST3 = SecondTransmitor(data_rate=BackscatterEnv3.DATA_RATE)

        self.viewer = None
        self.state = None
        self.steps_beyond_done = None
    def __init__(self):

        # System parameters
        self.nb_MB = 3
        self.state_size = 2 * self.nb_MB
        self.nb_actions = (Mobile.MAX_DATA + 1) ** self.nb_MB * (Mobile.MAX_ENERGY + 1) ** self.nb_MB

        self.action_space = ActionSpace((Discrete(Mobile.MAX_DATA + 1), Discrete(Mobile.MAX_ENERGY + 1),
                                         Discrete(Mobile.MAX_DATA + 1), Discrete(Mobile.MAX_ENERGY + 1),
                                         Discrete(Mobile.MAX_DATA + 1), Discrete(Mobile.MAX_ENERGY + 1)
                                         ))

        self.observation_space = StateSpace((Discrete(Mobile.MAX_CPU), Discrete(Mobile.MAX_ENERGY),
                                             Discrete(Mobile.MAX_CPU), Discrete(Mobile.MAX_ENERGY),
                                             Discrete(Mobile.MAX_CPU), Discrete(Mobile.MAX_ENERGY)))

        # initialize Second Transmitters
        self.MB1 = Mobile()
        self.MB2 = Mobile()
        self.MB3 = Mobile()

        self.max_data = self.nb_MB * Mobile.MAX_DATA
        self.max_energy = self.nb_MB * Mobile.MAX_ENERGY
        self.max_latency = Mobile.MAX_LATENCY

        self.training_time = 0
        self.training_data = 0

        self.viewer = None
        self.state = None
        self.steps_beyond_done = None
예제 #4
0
    def __init__(self):

        # System parameters
        self.nb_ST = 2
        self.state_size = 2 * self.nb_ST + 1
        self.nb_actions = (Backscatter02Env.TIME_FRAME + 1)**3

        self.action_space = ActionSpace(
            (Discrete(Backscatter02Env.TIME_FRAME + 1),
             Discrete(Backscatter02Env.TIME_FRAME + 1),
             Discrete(Backscatter02Env.TIME_FRAME + 1)))

        self.observation_space = StateSpace(
            (Discrete(SecondTransmitor.QUEUE), Discrete(
                SecondTransmitor.ENERGY), Discrete(SecondTransmitor.QUEUE),
             Discrete(SecondTransmitor.ENERGY),
             Discrete(Backscatter02Env.TIME_FRAME + 1)))

        # initialize Second Transmitters
        self.ST1 = SecondTransmitor()
        self.ST2 = SecondTransmitor()
        self.busy_slot = None

        self.viewer = None
        self.state = None
        self.steps_beyond_done = None
예제 #5
0
 def from_dictionary(self, dict_data):
     self.ninput = dict_data['ninput']
     self.noutput = dict_data['noutput']
     self.ioutput = dict_data['ioutput']
     self.state_space_lag = [None] * self.ninput
     for i in xrange(self.ninput):
         self.state_space_lag[i] = StateSpace()
         self.state_space_lag[i].from_dictionary(
             dict_data['state_space_lag'][i])
     self.state_space_red = [None] * self.ninput
     for i in xrange(self.ninput):
         self.state_space_red[i] = StateSpace()
         self.state_space_red[i].from_dictionary(
             dict_data['state_space_red'][i])
     self.ann_lag.from_dictionary(dict_data['ann_lag'])
     self.ann_red.from_dictionary(dict_data['ann_red'])
예제 #6
0
    def __init__(self, metric_list, train_data):

        self.train_data = train_data
        self.metric_list = metric_list
        self.n_mets = len(self.metric_list)

        # state space
        self.X = StateSpace(self.metric_list, self.train_data)
        self.state_map = self.X.state_map
        self.inv_map = self.X.inverse_map
        self.h_map = self.X.h_state_map
        self.state_poss = self.X.state_poss_seq
        self.n_states = len(self.state_map)

        self.trans_probs = np.zeros((len(self.h_map), len(self.state_poss)))

        # initialize data
        self.markov_data = []
        self.markov_data.append(int(self.n_states * np.random.random()))
예제 #7
0
    def __init__(self, num_ccy: int, precision: int, gamma: float,
                 metric_list: list, train_data: pd.DataFrame):

        self.num_ccy = num_ccy
        self.precision = precision
        self.gamma = gamma
        self.metric_list = metric_list
        self.train_data = train_data

        self.A = ActionSpace(self.num_ccy, self.precision)
        self.a_space = self.A.actions
        self.X = StateSpace(self.metric_list, self.train_data)
        self.state_map = self.X.state_map

        self.n_states = len(self.state_map)
        self.n_actions = len(self.a_space)

        self.q_table = np.zeros((self.n_states, self.n_actions))
        self.lr_table = np.zeros((self.n_states, self.n_actions))
예제 #8
0
def main():
    agent = ddpg
    total_reward = 0
    num_of_states = 2
    num_of_actions = 1
    reward = np.array([0])

    # For Loop του αλγορίθμου για ένα επισόδειο
    for t in range(steps):
        s_t = StateSpace.output()
예제 #9
0
def test_step():
    # create an empty state space of 4,4
    state_space = StateSpace(4,
                             4,
                             goal_1_position=(1, 3),
                             goal_2_position=(3, 3),
                             forbidden_position=(2, 3),
                             wall_position=(2, 2))

    # start state
    start_state = state_space.get_state(0, 1)

    # create a new agent with this state space
    agent = Agent(
        state_space=state_space,
        start_state=start_state,
        learning_rate=0.1,
        discount_rate=0.2,
        exploit_prob=0.1,
        living_reward=-0.1,
    )
    # step
    assert agent.step() is not None
예제 #10
0
    def __init__(self):
        # Channel parameters
        self.nb_channels = 4
        self.idleChannel = 1
        self.prob_switching = 0.9
        self.channelObservation = None
        self.prob_late = BlockchainNetworkingEnv.LATE_PROB
        self.cost_channels = [0.1, 0.1, 0.1, 0.1]

        # Blockchain parameters
        self.mempool = Mempool()
        self.userTransaction = Transaction()
        self.lastBlock = Block()
        self.hashRate = None
        self.doubleSpendSuccess = None

        # System parameters
        self.nb_past_observations = 4

        self.state_size = Mempool.NB_FEE_INTERVALS + 2 * self.nb_past_observations

        self.action_space = ActionSpace(self.nb_channels + 1)
        self.observation_space = StateSpace(
            (Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE),
             Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE),
             Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE),
             Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE),
             Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE),
             ActionSpace(self.nb_channels + 1), ChannelSpace(),
             ActionSpace(self.nb_channels + 1), ChannelSpace(),
             ActionSpace(self.nb_channels + 1), ChannelSpace(),
             ActionSpace(self.nb_channels + 1), ChannelSpace()))
        # reward define
        self.totalReward = 0
        self.successReward = 0
        self.channelCost = 0
        self.transactionFee = 0
        self.cost = 0

        self.viewer = None
        self.state = None
        self.steps_beyond_done = None
예제 #11
0
class Markov():
    def __init__(self, metric_list, train_data):

        self.train_data = train_data
        self.metric_list = metric_list
        self.n_mets = len(self.metric_list)

        # state space
        self.X = StateSpace(self.metric_list, self.train_data)
        self.state_map = self.X.state_map
        self.inv_map = self.X.inverse_map
        self.h_map = self.X.h_state_map
        self.state_poss = self.X.state_poss_seq
        self.n_states = len(self.state_map)

        self.trans_probs = np.zeros((len(self.h_map), len(self.state_poss)))

        # initialize data
        self.markov_data = []
        self.markov_data.append(int(self.n_states * np.random.random()))

    def fit(self):
        self.state_seq = []
        print('Generating transition probabilities...')
        for i in range(self.X.pts_required,
                       len(self.train_data) - self.X.pts_required):

            self.state_seq.append(
                self.X.get_state(self.train_data.iloc[i -
                                                      self.X.pts_required:i]))
            state_tuple = self.inv_map[self.state_seq[-1]]

            h_seq = []
            s_seq = []
            ctr = 0
            for metric in self.X.metric_list:
                for j in range(len(metric.poss_h_seq)):
                    if metric.poss_seq[state_tuple[ctr]][
                            0:metric.markov_mem] == metric.poss_h_seq[j]:
                        h_seq.append(j)

                s_seq.append(
                    metric.poss_seq[state_tuple[ctr]][metric.markov_mem])
                ctr += 1

                if ctr == self.n_mets:
                    for k in range(len(self.state_poss)):
                        if tuple(s_seq) == self.state_poss[k]:
                            s_idx = k

            row = self.h_map[tuple(h_seq)]
            self.trans_probs[row, s_idx] += 1

        row_cnt = []
        for i in range(len(self.trans_probs)):
            row_cnt.append(np.sum(self.trans_probs[i, :]))

        for i in range(len(self.trans_probs)):
            if row_cnt[i] != 0:
                self.trans_probs[i, :] = self.trans_probs[i, :] / row_cnt[i]

        print('done!')
        #return self.trans_probs

    def generate(self, data_len: int):
        """data_len = 93600 is 1Q worth of data"""

        print('generating markov data...')
        for i in range(data_len):

            state_tuple = self.inv_map[self.markov_data[-1]]

            seq = []
            h_seq = []
            s_seq = []
            ctr = 0
            for metric in self.X.metric_list:
                for j in range(len(metric.poss_h_seq)):
                    if metric.poss_seq[state_tuple[ctr]][
                            -metric.markov_mem:] == metric.poss_h_seq[j]:
                        h_seq.append(j)
                        seq.append(metric.poss_h_seq[j])

                s_seq.append(
                    metric.poss_seq[state_tuple[ctr]][metric.markov_mem])
                ctr += 1

            row = self.h_map[tuple(h_seq)]
            rand = np.random.random()

            probs = []
            for k in range(len(self.trans_probs[row, :])):
                probs.append(np.sum(self.trans_probs[row, :k]))

            s_idx = 0
            while rand >= probs[s_idx] and s_idx < len(probs) - 1:
                s_idx += 1

            for l in range(self.n_mets):
                seq[l] += tuple([self.state_poss[s_idx][l]])

            state = []
            ctr = 0

            for metric in self.X.metric_list:
                state.append(metric.markov_dict[seq[ctr]])
                ctr += 1

            s = self.state_map[tuple(state)]

            self.markov_data.append(s)

        print('done!')
        return self.markov_data
예제 #12
0
class Qagent():
    def __init__(self, num_ccy: int, precision: int, gamma: float,
                 metric_list: list, train_data: pd.DataFrame):

        self.num_ccy = num_ccy
        self.precision = precision
        self.gamma = gamma
        self.metric_list = metric_list
        self.train_data = train_data

        self.A = ActionSpace(self.num_ccy, self.precision)
        self.a_space = self.A.actions
        self.X = StateSpace(self.metric_list, self.train_data)
        self.state_map = self.X.state_map

        self.n_states = len(self.state_map)
        self.n_actions = len(self.a_space)

        self.q_table = np.zeros((self.n_states, self.n_actions))
        self.lr_table = np.zeros((self.n_states, self.n_actions))

    def train(self, data=0, markov=False):

        if markov == True:
            train_start = 0
            train_end = len(data) - 1
        else:
            train_start = self.X.pts_required
            train_end = len(self.train_data) - 1

        print('Training q_table...')
        for i in range(train_start, train_end):

            # initialize state
            if markov == True:
                s_idx = data[i]
            else:
                s_idx = self.X.get_state(
                    self.train_data.iloc[i - self.X.pts_required:i])

            # if all actions for a state have zero reward 0 randomly select action
            if np.sum(self.q_table[s_idx, :]) == 0:
                # randint randomly selects ints from 0 up to but not including n_actions
                a_idx = np.random.randint(0, self.n_actions)

            # select the action with largest reward value in state s
            else:
                a_idx = np.argmax(self.q_table[s_idx, :])

            # get action from action space
            a = self.a_space[a_idx]

            # get new state from state space assuming states are iid
            price = self.train_data.iloc[i - 1].values[0]
            next_price = self.train_data.iloc[i].values[0]
            b = [1, next_price / price]
            z = -1
            # calculate reward
            r = np.log(np.dot(a, b))
            #print(r)

            # choose learning rate
            if self.lr_table[s_idx, a_idx] == 0:
                alpha = 1
            else:
                alpha = 1 / (self.lr_table[s_idx, a_idx])

            # update q table for action a taken on state s
            if markov == True:
                next_s_idx = data[i + 1]
            else:
                next_s_idx = self.X.get_state(
                    train_data.iloc[i + 1 - self.X.pts_required:i + 1])

            #print(next_s_idx)
            self.q_table[s_idx, a_idx] += r + alpha * (self.gamma * np.max(
                self.q_table[next_s_idx, :]) - self.q_table[s_idx, a_idx])

            # update learning rate
            self.lr_table[s_idx, a_idx] += 1

        print('done!')
        return self.q_table, self.lr_table
예제 #13
0
from robots.circle_robot import CircleRobot

if __name__ == "__main__":
    resolution_m = 0.01

    # Statespace can take CircleRobot or RectangleRobot objects
    robot = RectangleRobot(0.04, 0.02) #Rectangle

    # Takes discrete values, divide continuous values by resolution
    # Parameters: environment length, width, 2D array with obstacle parameters
    # e.g. [[l1, w1, x1, x2], [l2, w2, x2, y2],..., [ln, wn, xn, yn]] 
    env = Environment(30, 30, [[6, 2, 19, 17], [2, 2, 14, 26]])

    # Parameters: resolution (m), number of theta values, robot object, 
    # and environment object 
    state_space = StateSpace(resolution_m, 8, robot, env)

    planner = AStar(state_space)

    path = []
    pts = [0.1, 0.1, 0.2, 0.25]
    # Input x (m), y (m)
    if planner.set_start(pts[0], pts[1], pi/4) and planner.set_goal(pts[2], pts[3], pi/4):

        # Planner return whether or not it was successful,
        # the number of expansions, and time taken (s)
        success, num_expansions, planning_time = planner.plan()

        if success:
            _, path = planner.extract_path()
def run_planner(env_parameters, render=None):
    global_vars[N_RUNS] += 1

    resolution_m = 0.01
    with open("temp-data-params.txt", "w") as f:
        f.write(",".join(env_parameters.astype(str)))
    obs_params = clamp_obs(env_parameters[:M])

    # Statespace can take a PointRobot, SquareRobot, RectangleRobot objects
    # robot = PointRobot(0, 0)
    # robot = CircleRobot(3)
    # robot = RectangleRobot(4,4)
    # robot = RectangleRobot(3,1)
    robot = RectangleRobot(0.04, 0.02)

    # Takes discrete values, divide continuous values by resolution
    # Parameters: environment length, width, 2D array with obstacle parameters
    # e.g. [[l1, w1, x1, y1], [l2, w2, x2, y2],..., [ln, wn, xn, yn]]
    if ORACLE_L[1]:
        env = Environment(30, 30, [])
    else:
        env = Environment(30, 30, [obs_params[:4], obs_params[4:8]])

    # Parameters: resolution (m), number of theta values, robot object,
    # and environment object
    state_space = StateSpace(resolution_m, 8, robot, env)

    planner = AStar(state_space)
    error = False
    path = ([], [])
    success, num_expansions, planning_time = True, 0, 0.0

    # Input x (m), y (m)
    if len(env_parameters) > 8:
        sx, sy, gx, gy = env_parameters[
            8:12] / 116 + 0.02  # ensure b/w 0.02 and 0.28
    else:
        sx, sy, gx, gy = [0.05, 0.05, 0.25, 0.25]

    # Optimizing orientation
    if len(env_parameters) > 12:
        so, go = env_parameters[12], env_parameters[13]
    else:
        so, go = pi / 4, pi / 4

    if not (planner.set_start(sx, sy, so)):
        success = False  # no expansions, since initial config was invalid
    if not (planner.set_goal(gx, gy, go)):
        success = False  # ditto

    # Planner return whether or not it was successful,
    # the number of expansions, and time taken (s)
    if success:
        try:
            success, num_expansions, planning_time = planner.plan()
            if success:
                print("Extracting path")
                path = planner.extract_path()

                # BUG 3 -- Oracle
                if path is None and ORACLE_L[2]:
                    print("ERROR: Incorrect goal")
                    error = True
                # BUG 4 -- Oracle
                elif (not planner.check_consistency(path[0])) and ORACLE_L[3]:
                    print("ERROR: Path not consistence")
                    error = True

            else:
                # BUG 2 -- Oracle
                if ORACLE_L[1]:
                    print('ERROR: Constrained goal')
                    error = True
                # BUG 1 -- Oracle
                if len(planner.visited) >= state_space.get_max_expansions(
                ) and ORACLE_L[0]:
                    print("ERROR: Expanded every node in the environment")
                    error = True

            # BUG 6 -- Oracle
            if planning_time >= TIMEOUT:
                print("Planning timed out")
                if ORACLE_L[5]:
                    error = True

        # BUG 5 -- Oracle
        except Exception:
            print("Unexpected error:", sys.exc_info())
            error = True

    if error or render:
        filename = None
        if render == True or render is None:
            filename = save_vars[IMG_PATH]
        else:
            filename = render
        vis = Visualizer(env, state_space, robot)
        if path is None:
            path = ([], [])
        vis.visualize(path[1], filename=filename, start_end=[sx, sy, gx, gy])
    else:
        print("    ", end='')

    print("Success:", success, "\tExpansions:", num_expansions, "\tTime:",
          planning_time)
    save_vars[CSV_WRITER].writerow([
        env_parameters,
        error,
        not not render,
        success,
        num_expansions,
        planning_time,
    ])
    save_vars[DATA_FILE].flush()

    return error, success, num_expansions, planning_time