Python ActionSpaceの例、action_space.ActionSpace Pythonの例

コード例 #1

0

ファイルを表示

    def __init__(self):

        # System parameters
        self.nb_ST = 2
        self.state_size = 2 * self.nb_ST + 1
        self.nb_actions = (Backscatter02Env.TIME_FRAME + 1)**3

        self.action_space = ActionSpace(
            (Discrete(Backscatter02Env.TIME_FRAME + 1),
             Discrete(Backscatter02Env.TIME_FRAME + 1),
             Discrete(Backscatter02Env.TIME_FRAME + 1)))

        self.observation_space = StateSpace(
            (Discrete(SecondTransmitor.QUEUE), Discrete(
                SecondTransmitor.ENERGY), Discrete(SecondTransmitor.QUEUE),
             Discrete(SecondTransmitor.ENERGY),
             Discrete(Backscatter02Env.TIME_FRAME + 1)))

        # initialize Second Transmitters
        self.ST1 = SecondTransmitor()
        self.ST2 = SecondTransmitor()
        self.busy_slot = None

        self.viewer = None
        self.state = None
        self.steps_beyond_done = None

コード例 #2

0

ファイルを表示

ファイル: BC_env.py プロジェクト: Yunjusik/Blockchain

    def __init__(self):
        # Simulation parameters
        self.nb_nodes = 200
        self.tx_size = 200 #bytes
        self.B_max = 8 #Megabytes
        self.Ti_amx = 8 #seconds
        self.K_max = 8 # maximum shard number
        self.sign = 2 # MHZ
        self.MAC = 1 # MHZ
        self.batchsize =  3
        self.u = 6  # consecutive block confirm
        self.trans_prob = 0.1 # Transition Probability in Finitie Markov Chain
                                                   
        # define action space & observation_space
        self.action_space = ActionSpace(64)
        self.observation_space = spaces.Box(low=np.array([0, 1]), high=np.array([48, 8]), dtype=np.float32)

        self.seed()
        self.viewer = None
        self.state = None
        self.steps_beyond_done = None

        # state 불러오기. ShardDist함수의 return값을 각각 R,c,H,e_prob로 할당하도록함.
        # 여기서 state는 main의 진짜 state에 필요한 구성요소들을 각각 업데이트하는것임.
        self.R_transmission = None
        self.c_computing = None
        self.H_history = None
        self.e_prob = None

コード例 #3

0

ファイルを表示

ファイル: federated_learning_env.py プロジェクト: huynguyencse/federated_learning

    def __init__(self):

        # System parameters
        self.nb_MB = 3
        self.state_size = 2 * self.nb_MB
        self.nb_actions = (Mobile.MAX_DATA + 1) ** self.nb_MB * (Mobile.MAX_ENERGY + 1) ** self.nb_MB

        self.action_space = ActionSpace((Discrete(Mobile.MAX_DATA + 1), Discrete(Mobile.MAX_ENERGY + 1),
                                         Discrete(Mobile.MAX_DATA + 1), Discrete(Mobile.MAX_ENERGY + 1),
                                         Discrete(Mobile.MAX_DATA + 1), Discrete(Mobile.MAX_ENERGY + 1)
                                         ))

        self.observation_space = StateSpace((Discrete(Mobile.MAX_CPU), Discrete(Mobile.MAX_ENERGY),
                                             Discrete(Mobile.MAX_CPU), Discrete(Mobile.MAX_ENERGY),
                                             Discrete(Mobile.MAX_CPU), Discrete(Mobile.MAX_ENERGY)))

        # initialize Second Transmitters
        self.MB1 = Mobile()
        self.MB2 = Mobile()
        self.MB3 = Mobile()

        self.max_data = self.nb_MB * Mobile.MAX_DATA
        self.max_energy = self.nb_MB * Mobile.MAX_ENERGY
        self.max_latency = Mobile.MAX_LATENCY

        self.training_time = 0
        self.training_data = 0

        self.viewer = None
        self.state = None
        self.steps_beyond_done = None

コード例 #4

0

ファイルを表示

    def __init__(self):

        # System parameters
        self.nb_ST = 3
        self.state_size = 2 * self.nb_ST
        self.nb_actions = (BackscatterEnv3.BUSY_TIMESLOT + 1)**3 * (
            BackscatterEnv3.TIME_FRAME - BackscatterEnv3.BUSY_TIMESLOT + 1)**2

        self.action_space = ActionSpace(
            (Discrete(BackscatterEnv3.BUSY_TIMESLOT + 1),
             Discrete(BackscatterEnv3.BUSY_TIMESLOT + 1),
             Discrete(BackscatterEnv3.BUSY_TIMESLOT + 1),
             Discrete(BackscatterEnv3.TIME_FRAME -
                      BackscatterEnv3.BUSY_TIMESLOT + 1),
             Discrete(BackscatterEnv3.TIME_FRAME -
                      BackscatterEnv3.BUSY_TIMESLOT + 1)))

        self.observation_space = StateSpace(
            (Discrete(SecondTransmitor.QUEUE), Discrete(
                SecondTransmitor.ENERGY), Discrete(SecondTransmitor.QUEUE),
             Discrete(SecondTransmitor.ENERGY),
             Discrete(SecondTransmitor.QUEUE),
             Discrete(SecondTransmitor.ENERGY)))

        # initialize Second Transmitters
        self.ST1 = SecondTransmitor(data_rate=BackscatterEnv3.DATA_RATE)
        self.ST2 = SecondTransmitor(data_rate=BackscatterEnv3.DATA_RATE)
        self.ST3 = SecondTransmitor(data_rate=BackscatterEnv3.DATA_RATE)

        self.viewer = None
        self.state = None
        self.steps_beyond_done = None

コード例 #5

0

ファイルを表示

    def __init__(self):
        self.action_space = ActionSpace(3)
        self.observation_space = spaces.Tuple((Discrete(100), Discrete(100), Discrete(100)))

        # self.seed()
        self.viewer = None
        self.state = None

        self.market_value = 100
        self.alpha = -0.05
        self.ob = 0.1
        self.os = 0.15

        self.steps_beyond_done = None

コード例 #6

0

ファイルを表示

    def __init__(self):
        self.max_stake = 99
        self.max_action = 6
        self.nb_actions = 2 * self.max_action-1
        self.action_space = spaces.Tuple((ActionSpace(self.max_action), ActionSpace(self.max_action), ActionSpace(self.max_action)))
        self.observation_space = spaces.Tuple((Discrete(self.max_stake), Discrete(self.max_stake), Discrete(self.max_stake)))

        # self.seed()
        self.viewer = None
        self.state = None

        self.alpha = -0.01
        self.alphaX = -0.01
        self.ob = 0.5
        self.os = 0.6
        self.list_v = []

        self.steps_beyond_done = None
        self.marketValue = MarketValue()

コード例 #7

0

ファイルを表示

    def __init__(self):
        self.url = 'http://www.trex-game.skipser.com/'
        self.capture_width = 500
        self.capture_height = 325

        self.sct = mss()
        self.bbox = {'top': 345, 'left': 120, 'width': 580, 'height': 65}
        self.terminal_bbox = {'top': 325, 'left': 310, 'width': 1, 'height': 1}
        self.game_over_sprite = Image.open('assets/G_game_over.png')

        self.action_space = ActionSpace()
        self.actions = self.action_space.actions

        self.state = None
        self.frame_history = deque(maxlen=4)

        self.logger = Logger()

コード例 #8

0

ファイルを表示

ファイル: q_agent.py プロジェクト: nickdalfarra/4X_QLearning

    def __init__(self, num_ccy: int, precision: int, gamma: float,
                 metric_list: list, train_data: pd.DataFrame):

        self.num_ccy = num_ccy
        self.precision = precision
        self.gamma = gamma
        self.metric_list = metric_list
        self.train_data = train_data

        self.A = ActionSpace(self.num_ccy, self.precision)
        self.a_space = self.A.actions
        self.X = StateSpace(self.metric_list, self.train_data)
        self.state_map = self.X.state_map

        self.n_states = len(self.state_map)
        self.n_actions = len(self.a_space)

        self.q_table = np.zeros((self.n_states, self.n_actions))
        self.lr_table = np.zeros((self.n_states, self.n_actions))

コード例 #9

0

ファイルを表示

    def __init__(self):
        # Channel parameters
        self.nb_channels = 4
        self.idleChannel = 1
        self.prob_switching = 0.9
        self.channelObservation = None
        self.prob_late = BlockchainNetworkingEnv.LATE_PROB
        self.cost_channels = [0.1, 0.1, 0.1, 0.1]

        # Blockchain parameters
        self.mempool = Mempool()
        self.userTransaction = Transaction()
        self.lastBlock = Block()
        self.hashRate = None
        self.doubleSpendSuccess = None

        # System parameters
        self.nb_past_observations = 4

        self.state_size = Mempool.NB_FEE_INTERVALS + 2 * self.nb_past_observations

        self.action_space = ActionSpace(self.nb_channels + 1)
        self.observation_space = StateSpace(
            (Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE),
             Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE),
             Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE),
             Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE),
             Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE),
             ActionSpace(self.nb_channels + 1), ChannelSpace(),
             ActionSpace(self.nb_channels + 1), ChannelSpace(),
             ActionSpace(self.nb_channels + 1), ChannelSpace(),
             ActionSpace(self.nb_channels + 1), ChannelSpace()))
        # reward define
        self.totalReward = 0
        self.successReward = 0
        self.channelCost = 0
        self.transactionFee = 0
        self.cost = 0

        self.viewer = None
        self.state = None
        self.steps_beyond_done = None

コード例 #10

0

ファイルを表示

ファイル: federated_learning_env.py プロジェクト: huynguyencse/federated_learning

class FederatedLearningEnv(gym.Env):
    TIME_LIMIT = 10000
    DATA_LIMIT = 1500
    def __init__(self):

        # System parameters
        self.nb_MB = 3
        self.state_size = 2 * self.nb_MB
        self.nb_actions = (Mobile.MAX_DATA + 1) ** self.nb_MB * (Mobile.MAX_ENERGY + 1) ** self.nb_MB

        self.action_space = ActionSpace((Discrete(Mobile.MAX_DATA + 1), Discrete(Mobile.MAX_ENERGY + 1),
                                         Discrete(Mobile.MAX_DATA + 1), Discrete(Mobile.MAX_ENERGY + 1),
                                         Discrete(Mobile.MAX_DATA + 1), Discrete(Mobile.MAX_ENERGY + 1)
                                         ))

        self.observation_space = StateSpace((Discrete(Mobile.MAX_CPU), Discrete(Mobile.MAX_ENERGY),
                                             Discrete(Mobile.MAX_CPU), Discrete(Mobile.MAX_ENERGY),
                                             Discrete(Mobile.MAX_CPU), Discrete(Mobile.MAX_ENERGY)))

        # initialize Second Transmitters
        self.MB1 = Mobile()
        self.MB2 = Mobile()
        self.MB3 = Mobile()

        self.max_data = self.nb_MB * Mobile.MAX_DATA
        self.max_energy = self.nb_MB * Mobile.MAX_ENERGY
        self.max_latency = Mobile.MAX_LATENCY

        self.training_time = 0
        self.training_data = 0

        self.viewer = None
        self.state = None
        self.steps_beyond_done = None

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):
        assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))
        data_required1 = action[0]
        energy_required1 = action[1]
        data_required2 = action[2]
        energy_required2 = action[3]
        data_required3 = action[4]
        energy_required3 = action[5]

        data1, latency1, energy_consumption1, fault1 = self.MB1.update(data_required1, energy_required1)
        data2, latency2, energy_consumption2, fault2 = self.MB2.update(data_required2, energy_required2)
        data3, latency3, energy_consumption3, fault3 = self.MB3.update(data_required3, energy_required3)

        data = data1 + data2 + data3
        latency = max(latency1, latency2, latency3)
        energy_consumption = energy_consumption1 + energy_consumption2 + energy_consumption3
        fault = fault1 + fault2 + fault3

        state = [self.MB1.CPU_shared, self.MB1.energy, self.MB2.CPU_shared, self.MB2.energy, self.MB3.CPU_shared,
                 self.MB3.energy]
        # print (state)
        self.state = tuple(state)
        self.training_data += data
        self.training_time += latency
        reward = 10 * (5 * data/self.max_data - latency/self.max_latency - energy_consumption/self.max_energy) + fault

        if (self.training_data > FederatedLearningEnv.DATA_LIMIT):
            done = True
        else:
            done = False
        # if (fault < 0):
        #     print (fault)
            # print(np.array(self.state), action, [reward, data, latency, energy_consumption, fault], done)
        reward /= 10
        return np.array(self.state), [reward, data, latency, energy_consumption, data1, data2, data3], done, {}

    def reset(self):
        self.state = []
        self.MB1.reset()
        self.MB2.reset()
        self.MB3.reset()
        state = [self.MB1.CPU_shared, self.MB1.energy, self.MB2.CPU_shared, self.MB2.energy, self.MB3.CPU_shared, self.MB3.energy]
        self.state = tuple(state)
        self.training_time = 0
        self.training_data = 0
        print(self.state)
        self.steps_beyond_done = None
        return np.array(self.state)

    def updateObservation(self):
        return

    def render(self, mode='human', close=False):
       return

    def close(self):
        """Override in your subclass to perform any necessary cleanup.
        Environments will automatically close() themselves when
        garbage collected or when the program exits.
        """
        raise NotImplementedError()

    def seed(self, seed=None):
        """Sets the seed for this env's random number generator(s).

        # Returns
            Returns the list of seeds used in this env's random number generators
        """
        raise NotImplementedError()

    def configure(self, *args, **kwargs):
        """Provides runtime configuration to the environment.
        This configuration should consist of data that tells your
        environment how to run (such as an address of a remote server,
        or path to your ImageNet data). It should not affect the
        semantics of the environment.
        """
        raise NotImplementedError()

# env = FederatedLearningEnv()
# env.reset()
# for index in range(0, 100):
#     env.step(env.action_space.sample())

コード例 #11

0

ファイルを表示

ファイル: env.py プロジェクト: Zach417/dqn-robot

class Env(object):
    """The abstract environment class that is used by all agents. This class has the exact
    same API that OpenAI Gym uses so that integrating with it is trivial. In contrast to the
    OpenAI Gym implementation, this class only defines the abstract methods without any actual
    implementation.
    """
    def __init__(self):
        self.robot = Robot((84, 84))
        self.token = np.array([14, -5, 0])
        self.previousAction = 0
        self.iteration = 0

    reward_range = (-1, 1)
    action_space = ActionSpace()
    observation_space = ObservationSpace()

    def step(self, action):
        """Run one timestep of the environment's dynamics.
        Accepts an action and returns a tuple (observation, reward, done, info).
        Args:
            action (object): an action provided by the environment
        Returns:
            observation (object): agent's observation of the current environment
            reward (float) : amount of reward returned after previous action
            done (boolean): whether the episode has ended, in which case further step() calls will return undefined results
            info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)
        """
        self.previousAction = action
        pos1 = self.robot.position
        self.robot.executeAction(action)
        pos2 = self.robot.position + self.robot.direction

        self.iteration += 1
        isDone = self.iteration > 50 or np.array_equal(self.robot.position,
                                                       self.token)

        reward = 0
        if self.iteration > 50:
            reward = -1
        elif isDone == True:
            reward = 1
        elif (np.linalg.norm(self.token - pos1) >
              np.linalg.norm(self.token - pos2)):
            reward = 0.25
        else:
            reward = -0.25

        self.robot.update()

        observation = getObservation(self.robot, self.token)
        return observation, reward, isDone, {'info': 'test'}

    def reset(self):
        """
        Resets the state of the environment and returns an initial observation.
        Returns:
            observation (object): the initial observation of the space. (Initial reward is assumed to be 0.)
        """
        self.robot.reset()
        self.iteration = 0
        return np.zeros((84, 84, 3), dtype=np.uint8)

    def render(self, mode='human', close=False):
        """Renders the environment.
        The set of supported modes varies per environment. (And some
        environments do not support rendering at all.) By convention,
        if mode is:
        - human: render to the current display or terminal and
          return nothing. Usually for human consumption.
        - rgb_array: Return an numpy.ndarray with shape (x, y, 3),
          representing RGB values for an x-by-y pixel image, suitable
          for turning into a video.
        - ansi: Return a string (str) or StringIO.StringIO containing a
          terminal-style text representation. The text can include newlines
          and ANSI escape sequences (e.g. for colors).
        Note:
            Make sure that your class's metadata 'render.modes' key includes
              the list of supported modes. It's recommended to call super()
              in implementations to use the functionality of this method.
        Args:
            mode (str): the mode to render with
            close (bool): close all open renderings
        """
        text = str(self.iteration) + " "
        text += "Robot: " + str(self.robot.position) + "; "
        text += "Token " + str(self.token) + "; "
        text += "Action " + str(self.previousAction) + ";"
        sys.stdout.write('\r' + str(text) + ' ' * 20)
        sys.stdout.flush()

    def close(self):
        """Override in your subclass to perform any necessary cleanup.
        Environments will automatically close() themselves when
        garbage collected or when the program exits.
        """
        return 1

    def seed(self, seed=None):
        """Sets the seed for this env's random number generator(s).
        Note:
            Some environments use multiple pseudorandom number generators.
            We want to capture all such seeds used in order to ensure that
            there aren't accidental correlations between multiple generators.
        Returns:
            list<bigint>: Returns the list of seeds used in this env's random
              number generators. The first value in the list should be the
              "main" seed, or the value which a reproducer should pass to
              'seed'. Often, the main seed equals the provided 'seed', but
              this won't be true if seed=None, for example.
        """
        return [1]

    def configure(self, *args, **kwargs):
        """Provides runtime configuration to the environment.
        This configuration should consist of data that tells your
        environment how to run (such as an address of a remote server,
        or path to your ImageNet data). It should not affect the
        semantics of the environment.
        """
        return 1

    def __del__(self):
        self.close()

    def __str__(self):
        return '<{} instance>'.format(type(self).__name__)

コード例 #12

0

ファイルを表示

class BCnetenv(gym.Env):
    '''
Actions:
    Type: MultiDiscrete form.
    1) Block_Size(shard)   :   Discrete 4  - 2MB[0], 4MB[1], 6MB[2], 8MB[3],   - params: min: 2, max: 8  (megabytes)
    2) Time Interval       :   Discrete 4  - 2[0]  ,   4[1],   6[2],   8[3]    - params: min: 2, max: 8  (seconds)
    3) number of shard (K) :   Discrete 4  - 1[0],     2[1],   4[2],   8[3]    - params: min: 1, max: 8
    MultiDiscrete([ 4, 4, 4 ])  -> we use discrete expression (64)
    0, 0 ,0 ->0
    0, 0, 1 ->1
    0, 0, 2 ->2
    ...
    3, 3, 3 -> 63

state space:
    Type:
    Num       state                    Min       Max     format
    0    data transmission link      10MHZ    100MHZ      nxn
    1     computing capability       10GHZ     30GHZ      nx1
    2      consensus history           0         1        nxn
    3 estimated faulty probability     0        1/3       nx1

:
    Type: Box(2)
    num     observation        min       max
    0        latency           0         48
    1   required shard limit   1          8

    '''
    def __init__(self):
        # Simulation parameters
        self.nb_nodes = 200
        self.tx_size = 200  #bytes
        self.B_max = 8  #Megabytes
        self.Ti_amx = 8  #seconds
        self.K_max = 8  # maximum shard number
        self.sign = 2  # MHZ
        self.MAC = 1  # MHZ
        self.batchsize = 3
        self.u = 6  # consecutive block confirm
        self.trans_prob = 0.5  # Transition Probability in Finite Markov Chain

        # define action space & observation_space
        self.action_space = ActionSpace(512)
        self.observation_space = spaces.Box(low=np.array([0, 1]),
                                            high=np.array([48, 8]),
                                            dtype=np.float32)

        self.seed()
        self.viewer = None
        self.state = None
        self.steps_beyond_done = None

        # state 불러오기. ShardDist함수의 return값을 각각 R,c,H,e_prob로 할당하도록함.
        # 여기서 state는 main의 진짜 state에 필요한 구성요소들을 각각 업데이트하는것임.
        self.R_transmission = None
        self.c_computing = None
        self.H_history = None
        self.e_prob = None
        self.reward = 0

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):
        # step 함수 진행하면서 state space 를 update

        assert self.action_space.contains(
            action), "%r (%s) invalid" % (action, type(action))
        state = self.state
        R, C, H, e_prob = state

        a = action // 128  # block size (0~3)  2, 4, 6, 8
        b = (action -
             128 * a) // 16  # # of shard (1~8)  1, 2 ,3 ,4 ,5 ,6, 7, 8
        c = (action - (128 * a) - (16 * b)
             )  # time interval (0~15)  0.5/1/1.5 ~~~ 16

        b_size = 2 * (a + 1)  # block size 2,4,6,8 (4)
        t_interval = 0.5 * (c + 1)  # time interval 0.5, 1, 1.5 ~ ,8 (16)
        n_shard = b + 1  # number of shard 1,2,3,4,5,6,7,8 (8)

        # R 업데이트 finite markov channel 기반.
        for i in range(0, self.nb_nodes):
            for j in range(i + 1, self.nb_nodes):
                random_number = random.random()
                if (R[i, j] == 10 * (10**6)):
                    if (random_number < self.trans_prob):
                        R[i, j] += 10 * (10**6)
                        R[j, i] = R[i, j]
                elif (R[i, j] == 100 * (10**6)):
                    if (random_number < self.trans_prob):
                        R[i, j] -= 10 * (10**6)
                        R[j, i] = R[i, j]
                else:
                    if (random_number < self.trans_prob):
                        R[i, j] += 10 * (10**6)
                        R[j, i] = R[i, j]
                    elif (self.trans_prob <= random_number <
                          2 * self.trans_prob):
                        R[i, j] -= 10 * (10**6)
                        R[j, i] = R[i, j]

        # C 업데이트 finite markov channel 기반.
        for i in range(0, self.nb_nodes):
            for j in range(0, self.nb_nodes):
                random_number = random.random()
                if (C[i, j] == 10 * (10**9)):
                    if (random_number < self.trans_prob):
                        C[i, j] += 5 * (10**9)
                elif (C[i, j] == 30 * (10**9)):
                    if (random_number < self.trans_prob):
                        C[i, j] -= 5 * (10**9)
                else:
                    if (random_number < self.trans_prob):
                        C[i, j] += 5 * (10**9)
                    elif (self.trans_prob <= random_number <
                          2 * self.trans_prob):
                        C[i, j] -= 5 * (10**9)
        env2 = ShardDistribute()

        H, e_prob, NodesInShard, Success_ratio, FCP = env2.ShardDist(n_shard)
        self.state = [R, C, H, e_prob]
        e_p = e_prob[0, 0]  # constraint에 쓸 변수를 여기서 미리 불러옴

        # latency 계산시 R,c로부터 max/min값을 추출하여 latency 세부요소들 전부 계산
        # M,theta,C_numb,alpha,B,timeout 값 설
        M = 3
        theta = 2 * (10**6)

        alpha = 2 * 10**6
        beta = 10**6
        B = b_size * 8 * 10**6
        timeout = 2.2  #2.2 #3.2 #  0.64 에서 최대 6.4초. 중간을 기점으로 threshold 설정
        nb_nodes = self.nb_nodes

        ### latency computation (Sharding) (2이상일 때)
        if (n_shard >= 2):
            C_numb = len(NodesInShard[n_shard])

            # 1) Intra 샤드에서 validation time 계산
            T_k_in_val = []
            primary = []
            for K in range(n_shard):
                primary.append(NodesInShard[K][random.randint(
                    0,
                    len(NodesInShard[K]) - 1)])

                T_in_val = []
                for i in NodesInShard[K]:
                    if (i == primary[K]):
                        T_in_val.append(
                            (M * theta +
                             (M * (1 + C_numb) + 4 *
                              (len(NodesInShard[K]) - 1)) * alpha) / C[i][0])
                    else:
                        T_in_val.append(
                            (M * theta +
                             (C_numb * M + 4 *
                              (len(NodesInShard[K]) - 1)) * alpha) / C[i][0])
                T_k_in_val.append(max(T_in_val))
            T_k_in_val = (1 / M) * max(T_k_in_val)
            # 2) Intra 샤드에서 propagation time 계산
            T_k_in_prop = []
            for K in range(n_shard):

                T_in_preprepare = []
                T_in_prepare = []
                T_in_commit = []

                for i in NodesInShard[K]:
                    for j in NodesInShard[K]:
                        if (j != i):
                            if (i == primary[K]):
                                T_in_preprepare.append((M * B) / R[i, j])
                            else:
                                T_in_prepare.append((M * B) / R[i, j])
                            T_in_commit.append((M * B) / R[i, j])

                T_k_in_prop.append(
                    min(max(T_in_preprepare), timeout) +
                    min(max(T_in_prepare), timeout) +
                    min(max(T_in_commit), timeout))
            T_k_in_prop = (1 / M) * max(T_k_in_prop)

            # 3) DC (Final shard)에서 validation time 계산
            primary_DC = NodesInShard[n_shard][random.randint(
                0,
                len(NodesInShard[n_shard]) - 1)]

            T_k_f_val = []
            for i in NodesInShard[n_shard]:
                if (i == primary_DC):
                    T_k_f_val.append(
                        (n_shard * M * theta +
                         (n_shard * M + 4 * (C_numb - 1) +
                          (self.nb_nodes - C_numb) * M) * alpha) / C[i][0])
                else:
                    T_k_f_val.append(
                        (n_shard * M * theta +
                         (4 * (C_numb - 1) +
                          (self.nb_nodes - C_numb) * M) * alpha) / C[i][0])
            T_k_f_val = (1 / M) * max(T_k_f_val)

            # 4) DC (Final shard)에서 propagation time 계산
            T_k_f_request = []
            T_k_f_preprepare = []
            T_k_f_prepare = []
            T_k_f_commit = []
            T_k_f_reply = []

            for i in primary:
                for j in NodesInShard[n_shard]:
                    T_k_f_request.append((M * B) / R[i, j])

            for i in NodesInShard[n_shard]:
                for j in NodesInShard[n_shard]:
                    if (j != i):
                        if (i == primary_DC):
                            T_k_f_preprepare.append((M * B) / R[i, j])
                        else:
                            T_k_f_prepare.append((M * B) / R[i, j])
                        T_k_f_commit.append((M * B) / R[i, j])

            for i in NodesInShard[n_shard]:
                for j in primary:
                    T_k_f_reply.append((M * B) / R[i, j])

            T_k_f_prop = (1 / M) * (min(max(T_k_f_request), timeout) +
                                    min(max(T_k_f_preprepare), timeout) +
                                    min(max(T_k_f_prepare), timeout) +
                                    min(max(T_k_f_commit), timeout) +
                                    min(max(T_k_f_request), timeout))
            # 최종 latency의 값은 block interval + 위 4가지 time
            Tlatency = t_interval + (T_k_in_val + T_k_in_prop + T_k_f_val +
                                     T_k_f_prop)

        else:  #Shard가 1개일땐 PBFT

            T_V = []
            client = random.randint(0, nb_nodes - 1)
            primary = random.randint(0, nb_nodes - 1)
            while (primary == client):
                primary = random.randint(0, nb_nodes - 1)

            for i in range(nb_nodes):
                if (i == primary):
                    T_V.append((M * alpha + beta * (2 * M + 4 *
                                                    (nb_nodes - 1))) / C[i][0])
                elif (i != client):
                    T_V.append((M * alpha + beta * (M + 4 * (nb_nodes - 1))) /
                               C[i][0])
            T_V = (1 / M) * max(T_V)

            t1 = min((M * B / R[client, primary]), timeout)
            t2 = []
            for i in range(nb_nodes):
                if ((i != client) & (i != primary)):
                    t2.append(M * B / R[primary, i])
            t2 = min(max(t2), timeout)
            t3 = []
            for i in range(nb_nodes):
                for j in range(nb_nodes):
                    if ((j != i) & (i != client) & (j != client)):
                        t3.append(M * B / R[i, j])
            t3 = min(max(t3), timeout)
            t4 = []
            for i in range(nb_nodes):
                for j in range(nb_nodes):
                    if (j != i):
                        t4.append(M * B / R[i, j])
            t4 = min(max(t4), timeout)
            t5 = []
            for i in range(nb_nodes):
                for j in range(nb_nodes):
                    if (i != client):
                        t5.append(M * B / R[i, client])
            t5 = min(max(t5), timeout)

            T_D = (1 / M) * (t1 + t2 + t3 + t4 + t5)

            Tlatency = t_interval + T_V + T_D

### constraint (latency & shard)
        done_t = Tlatency > self.u * t_interval
        constraint = 0

        ### const 1
        if n_shard == 1:
            done_n = False
        else:
            constraint = (self.nb_nodes *
                          (1 - (3 * e_p)) - 1) / (3 * self.nb_nodes * e_p + 1)
            done_n = (n_shard >= constraint)
            #########lemma1
#### const 2
# constraint = (((2*self.nb_nodes) / (3*(self.nb_nodes * e_p +1))) -1)
#done_n = n_shard >=  (((2*self.nb_nodes) / (3*(self.nb_nodes * e_p +1))) -1)
#        #### lemma2

# done_n =False # no security bound
        done = done_t or done_n
        done = bool(done)

        #성공한샤드 = prob* K

        #done 이 1인경우, 즉 끝났다면(조건을 위반하여) reward는 0
        #done이 0인 경우, reward는 TPS를 반영한다.
        reward = self.reward

        if not done:

            reward = Success_ratio * M * ((n_shard * (math.floor(
                (b_size / self.tx_size) * 1000 * 1000))) / t_interval)
        elif self.steps_beyond_done is None:  ## step beyond done?
            self.steps_beyond_done = 0

        else:  # done인 경우,
            if self.steps_beyond_done == 0:
                logger.warn(
                    "You are calling 'step()' even though this environment has already returned done = True. "
                    "You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior."
                )
            self.steps_beyond_done += 1
            reward = 0

        self.reward = float(reward)
        print('reward', reward)

        const = [
            Tlatency, b_size, t_interval, n_shard, constraint, done_t, done_n,
            e_p, FCP
        ]
        print(const)
        ##### state change 적용해서 R,C,H,e_prop 업데이트.
        ### 리턴되어야 하는 값은, action이 들어왔을때, 변경된 R,C,H,e-Prob
        return self.state, self.reward, done, const, {
        }  ## 카트폴에서는 self.state에 np.array가 있는데, 여기서는 풀어줘야함

    def reset(self):
        # state space - > R,c,H,e_prob reset.
        R_transmission = np.zeros((self.nb_nodes, self.nb_nodes))
        c_computing = np.zeros((self.nb_nodes, 1))

        for i in range(0, self.nb_nodes):
            for j in range(i + 1, self.nb_nodes):
                R_transmission[i, j] = random.randrange(10, 101, 10)
                R_transmission[j, i] = R_transmission[i, j]
        R_transmission = (10**6) * R_transmission  # 200x200
        for i in range(0, self.nb_nodes):
            c_computing[i] = random.randrange(10, 31, 5)
        c_computing = (10**9) * c_computing  # 200x1
        c_computing = np.kron(c_computing, np.ones(
            (1, self.nb_nodes)))  # 200x200으로 확장한뒤 H_his에 대입

        n_shard = random.randrange(1, 9)
        env2 = ShardDistribute()
        H, e_prob, NodesInShard, Success_ratio, FCP = env2.ShardDist(n_shard)

        # H, e_prob는 shardDist를 통해 get
        self.state = [R_transmission, c_computing, H, e_prob]
        return self.state

コード例 #13

0

ファイルを表示

class BlockchainEnv(gym.Env):

    def __init__(self):
        self.action_space = ActionSpace(3)
        self.observation_space = spaces.Tuple((Discrete(100), Discrete(100), Discrete(100)))

        # self.seed()
        self.viewer = None
        self.state = None

        self.market_value = 100
        self.alpha = -0.05
        self.ob = 0.1
        self.os = 0.15

        self.steps_beyond_done = None

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):
        assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))
        state = self.state
        state_list = list(state)

        action = min(action,state_list[0])

        actions = np.array([action, self.action_space.sample(), self.action_space.sample()])
        for index in range(len(state)):
            win_prob = state[index]*1.0/sum(state)
            if(win_prob > np.random.rand(1)):
                state_list[index] = state_list[index] - actions[index] + 1
            else:
                state_list[index] = state_list[index] - actions[index]

        state = tuple(state_list)
        self.state = state
        self.market_value += sum(actions) * self.alpha
        if (action > 0):    #selling
            reward = action * self.market_value - self.ob
        elif (action < 0):
            reward = action * self.market_value - self.os
        else:
            reward = 0
        done = sum(state)==0
        done = bool(done)

        return np.array(self.state), reward, done, {}

    def reset(self):
        self.state = self.observation_space.sample()
        print(self.state)
        self.steps_beyond_done = None
        self.market_value = 100
        return np.array(self.state)



    def render(self, mode='human', close=False):
       return

    def close(self):
        """Override in your subclass to perform any necessary cleanup.
        Environments will automatically close() themselves when
        garbage collected or when the program exits.
        """
        raise NotImplementedError()

    def seed(self, seed=None):
        """Sets the seed for this env's random number generator(s).

        # Returns
            Returns the list of seeds used in this env's random number generators
        """
        raise NotImplementedError()

    def configure(self, *args, **kwargs):
        """Provides runtime configuration to the environment.
        This configuration should consist of data that tells your
        environment how to run (such as an address of a remote server,
        or path to your ImageNet data). It should not affect the
        semantics of the environment.
        """
        raise NotImplementedError()

コード例 #14

0

ファイルを表示

import torch
import torch.optim as optim
import torch.nn as nn
from dnn import NeuralNetwork
import config
from action_space import ActionSpace


# if gpu is to be used
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

# Instantiate action space
actionSpace = ActionSpace()

# Shortcuts
Tensor = torch.Tensor
LongTensor = torch.LongTensor


class QNetAgent(object):
    def __init__(self):
        self.nn = NeuralNetwork().to(device)

        self.loss_func = nn.MSELoss()
        # self.loss_func = nn.SmoothL1Loss()

        self.optimizer = optim.Adam(params=self.nn.parameters(), lr=config.learning_rate)
        # self.optimizer = optim.RMSprop(params=mynn.parameters(), lr=learning_rate)

    def select_action(self, state, epsilon):

コード例 #15

0

ファイルを表示

ファイル: strategy.py プロジェクト: hughestech/ctc-executioner

# orderbook_test = Orderbook(extraFeatures=False)
# orderbook_test.loadFromBitfinexFile('orderbook_bitfinex_btcusd_view.tsv')

# Load orderbook
cols = ["ts", "seq", "size", "price", "is_bid", "is_trade", "ttype"]
import pandas as pd
events = pd.read_table('ob-1-small.tsv', sep='\t', names=cols, index_col="seq")
d = Orderbook.generateDictFromEvents(events)
orderbook = Orderbook()
orderbook.loadFromDict(d)
# clean first n states (due to lack of bids and asks)
print("#States: " + str(len(orderbook.states)))
for i in range(100):
    orderbook.states.pop(0)
    del d[list(d.keys())[0]]
orderbook_test = orderbook
#orderbook.plot()

T = [0, 10, 20, 40, 60, 80, 100]  #, 120, 240]
T_test = [0, 10, 20, 40, 60, 80, 100]  # 120, 240]

I = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
actionSpace = ActionSpace(orderbook, side, T, I, ai, levels)
actionSpace_test = ActionSpace(orderbook_test, side, T_test, I, ai, levels)

#priceReturnCurve(crossval=1)

from agent_utils.ui import UI
UI.animate(run_profit, interval=100)
# UI.animate(run_q_reward, interval=1000)

コード例 #16

0

ファイルを表示

class BlockchainNetworkingEnv(gym.Env):
    SUCCESS_REWARD = 5
    LATE_PROB = 1
    MAX_ATTACK = 0.1

    def __init__(self):
        # Channel parameters
        self.nb_channels = 4
        self.idleChannel = 1
        self.prob_switching = 0.9
        self.channelObservation = None
        self.prob_late = BlockchainNetworkingEnv.LATE_PROB
        self.cost_channels = [0.1, 0.1, 0.1, 0.1]

        # Blockchain parameters
        self.mempool = Mempool()
        self.userTransaction = Transaction()
        self.lastBlock = Block()
        self.hashRate = None
        self.doubleSpendSuccess = None

        # System parameters
        self.nb_past_observations = 4

        self.state_size = Mempool.NB_FEE_INTERVALS + 2 * self.nb_past_observations

        self.action_space = ActionSpace(self.nb_channels + 1)
        self.observation_space = StateSpace(
            (Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE),
             Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE),
             Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE),
             Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE),
             Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE),
             ActionSpace(self.nb_channels + 1), ChannelSpace(),
             ActionSpace(self.nb_channels + 1), ChannelSpace(),
             ActionSpace(self.nb_channels + 1), ChannelSpace(),
             ActionSpace(self.nb_channels + 1), ChannelSpace()))
        # reward define
        self.totalReward = 0
        self.successReward = 0
        self.channelCost = 0
        self.transactionFee = 0
        self.cost = 0

        self.viewer = None
        self.state = None
        self.steps_beyond_done = None

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):
        assert self.action_space.contains(
            action), "%r (%s) invalid" % (action, type(action))
        # reset the rewards
        self.totalReward = 0
        self.successReward = 0
        self.channelCost = 0
        self.transactionFee = 0
        self.prob_late = None
        self.attacked = False

        state = list(self.state)
        # 1. User's transaction initialization
        self.userTransaction = Transaction()
        if (len(self.lastBlock.blockTransaction) != 0):
            self.userTransaction.estimateFee(self.lastBlock)

        # 2. The channel state changes - single idle channel, round robin switching
        if (np.random.rand() < self.prob_switching):
            self.idleChannel = (self.idleChannel + 1) % self.nb_channels
            # print(self.idleChannel)

        # 3. Mempool updates - some new transactions come
        self.mempool.generateNewTransactions()

        # if user does not submit transaction
        if (action == 0):
            self.totalReward = 0
            self.channelObservation = 2
            # miners mine a block
            self.lastBlock.mineBlock(self.mempool)
        # if user submits transaction
        else:
            self.channelCost = self.cost_channels[action - 1]
            # in case, channel is idle
            if ((action - 1) == self.idleChannel):
                self.prob_late = 0
                self.channelObservation = 1
            # if channel is busy, transaction can be late of mining process
            else:
                self.prob_late = BlockchainNetworkingEnv.LATE_PROB
                self.channelObservation = 0

            # if the transaction comes late
            if (np.random.rand() < self.prob_late):
                # mining process occurs before user's transaction is added
                # 4. Miners start mining process, transactions which are included in Block will be removed from mempool
                self.lastBlock.mineBlock(self.mempool)
                self.mempool.listTransactions.append(self.userTransaction)
                self.transactionFee = self.userTransaction.transactionFee
            else:
                self.mempool.listTransactions.append(self.userTransaction)
                # 4. Miners start mining process, transactions which are included in Block will be removed from mempool
                self.lastBlock.mineBlock(self.mempool)
                self.transactionFee = self.userTransaction.transactionFee
                # 5. Attack process
                self.hashRate = np.random.uniform(
                    0, BlockchainNetworkingEnv.MAX_ATTACK)
                self.doubleSpendSuccess = 2 * self.hashRate
                if (np.random.rand() < self.doubleSpendSuccess):
                    self.attacked = True

                # if user's transaction is successfully added inti the block -> reward=2
                if (self.userTransaction in self.lastBlock.blockTransaction
                        and not self.attacked):
                    self.successReward = BlockchainNetworkingEnv.SUCCESS_REWARD

        self.totalReward = self.successReward - self.channelCost - self.transactionFee
        self.cost = self.channelCost + self.transactionFee

        # 6. determine new state
        self.mempool.updateMempoolState()
        for index in range(0, Mempool.NB_FEE_INTERVALS):
            state[index] = self.mempool.mempoolState[index]
        state.insert(Mempool.NB_FEE_INTERVALS, action)
        state.insert(Mempool.NB_FEE_INTERVALS + 1, self.channelObservation)
        state.pop()
        state.pop()
        self.state = tuple(state)
        done = False

        # print(np.array(self.state), [self.totalReward, self.cost], done, {})
        return np.array(self.state), [
            self.totalReward, self.channelCost, self.transactionFee, self.cost
        ], done, {}

    def reset(self):
        self.state = []
        self.mempool.resetMempool()
        self.idleChannel = 1
        for index in range(0, len(self.mempool.mempoolState)):
            self.state.append(self.mempool.mempoolState[index])
        for obs_index in range(0, self.nb_past_observations):
            self.state.append(0)
            self.state.append(2)
        print(self.state)
        self.steps_beyond_done = None
        return np.array(self.state)

    def updateObservation(self):
        return

    def render(self, mode='human', close=False):
        return

    def close(self):
        """Override in your subclass to perform any necessary cleanup.
        Environments will automatically close() themselves when
        garbage collected or when the program exits.
        """
        raise NotImplementedError()

    def seed(self, seed=None):
        """Sets the seed for this env's random number generator(s).

        # Returns
            Returns the list of seeds used in this env's random number generators
        """
        raise NotImplementedError()

    def configure(self, *args, **kwargs):
        """Provides runtime configuration to the environment.
        This configuration should consist of data that tells your
        environment how to run (such as an address of a remote server,
        or path to your ImageNet data). It should not affect the
        semantics of the environment.
        """
        raise NotImplementedError()


# env = BlockchainNetworkingEnv()
# env.reset()
# for index in range(0, 50):
#     env.step(np.random.randint(0, env.nb_channels))

コード例 #17

0

ファイルを表示

class BackscatterEnv3(gym.Env):
    TIME_FRAME = 10
    BUSY_TIMESLOT = 4
    DATA_RATE = 0.3

    def __init__(self):

        # System parameters
        self.nb_ST = 3
        self.state_size = 2 * self.nb_ST
        self.nb_actions = (BackscatterEnv3.BUSY_TIMESLOT + 1)**3 * (
            BackscatterEnv3.TIME_FRAME - BackscatterEnv3.BUSY_TIMESLOT + 1)**2

        self.action_space = ActionSpace(
            (Discrete(BackscatterEnv3.BUSY_TIMESLOT + 1),
             Discrete(BackscatterEnv3.BUSY_TIMESLOT + 1),
             Discrete(BackscatterEnv3.BUSY_TIMESLOT + 1),
             Discrete(BackscatterEnv3.TIME_FRAME -
                      BackscatterEnv3.BUSY_TIMESLOT + 1),
             Discrete(BackscatterEnv3.TIME_FRAME -
                      BackscatterEnv3.BUSY_TIMESLOT + 1)))

        self.observation_space = StateSpace(
            (Discrete(SecondTransmitor.QUEUE), Discrete(
                SecondTransmitor.ENERGY), Discrete(SecondTransmitor.QUEUE),
             Discrete(SecondTransmitor.ENERGY),
             Discrete(SecondTransmitor.QUEUE),
             Discrete(SecondTransmitor.ENERGY)))

        # initialize Second Transmitters
        self.ST1 = SecondTransmitor(data_rate=BackscatterEnv3.DATA_RATE)
        self.ST2 = SecondTransmitor(data_rate=BackscatterEnv3.DATA_RATE)
        self.ST3 = SecondTransmitor(data_rate=BackscatterEnv3.DATA_RATE)

        self.viewer = None
        self.state = None
        self.steps_beyond_done = None

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):
        assert self.action_space.contains(
            action), "%r (%s) invalid" % (action, type(action))
        harvest = action[0]
        backscatter_time_1 = action[1]
        backscatter_time_2 = action[2]
        transmit_time_1 = action[3]
        transmit_time_2 = action[4]
        backscatter_time_3 = BackscatterEnv3.BUSY_TIMESLOT - harvest - backscatter_time_1 - backscatter_time_2
        transmit_time_3 = BackscatterEnv3.TIME_FRAME - BackscatterEnv3.BUSY_TIMESLOT - transmit_time_1 - transmit_time_2
        reward = 0
        if ((backscatter_time_3 >= 0) and (transmit_time_3 >= 0)):
            harvest_time_1 = BackscatterEnv3.BUSY_TIMESLOT - backscatter_time_1
            harvest_time_2 = BackscatterEnv3.BUSY_TIMESLOT - backscatter_time_2
            harvest_time_3 = BackscatterEnv3.BUSY_TIMESLOT - backscatter_time_3

            reward += self.ST1.update(harvest_time_1, backscatter_time_1,
                                      transmit_time_1)
            reward += self.ST2.update(harvest_time_2, backscatter_time_2,
                                      transmit_time_2)
            reward += self.ST3.update(harvest_time_3, backscatter_time_3,
                                      transmit_time_3)

            throughtput = reward

            datawaiting_before = self.ST1.queue

            self.ST1.generateData()
            self.ST2.generateData()
            self.ST3.generateData()
            datawaiting = self.ST1.queue

            state = [
                self.ST1.queue, self.ST1.energy, self.ST2.queue,
                self.ST2.energy, self.ST3.queue, self.ST3.energy
            ]
            self.state = tuple(state)

        else:  # in case, assignment is not suitable
            reward = -10
            throughtput = 0
            datawaiting_before = self.ST1.queue
            if (self.ST1.queue == SecondTransmitor.QUEUE
                    and self.ST2.queue == SecondTransmitor.QUEUE
                    and self.ST3.queue == SecondTransmitor.QUEUE):
                self.ST1.reset()
                self.ST2.reset()
                self.ST3.reset()
            else:
                self.ST1.generateData()
                self.ST2.generateData()
                self.ST3.generateData()
            datawaiting = self.ST1.queue
            state = [
                self.ST1.queue, self.ST1.energy, self.ST2.queue,
                self.ST2.energy, self.ST3.queue, self.ST3.energy
            ]
            self.state = tuple(state)
            print(np.array(self.state), reward, datawaiting, action)

        done = False
        # print(np.array(self.state), reward, done, {})
        return np.array(self.state), [
            reward, throughtput, datawaiting_before, datawaiting
        ], done, {}

    def reset(self):
        self.state = []
        self.ST1.reset()
        self.ST2.reset()
        self.ST3.reset()
        state = [
            self.ST1.queue, self.ST1.energy, self.ST2.queue, self.ST2.energy,
            self.ST3.queue, self.ST3.energy
        ]
        self.state = tuple(state)
        print(self.state)
        self.steps_beyond_done = None
        return np.array(self.state)

    def updateObservation(self):
        return

    def render(self, mode='human', close=False):
        return

    def close(self):
        """Override in your subclass to perform any necessary cleanup.
        Environments will automatically close() themselves when
        garbage collected or when the program exits.
        """
        raise NotImplementedError()

    def seed(self, seed=None):
        """Sets the seed for this env's random number generator(s).

        # Returns
            Returns the list of seeds used in this env's random number generators
        """
        raise NotImplementedError()

    def configure(self, *args, **kwargs):
        """Provides runtime configuration to the environment.
        This configuration should consist of data that tells your
        environment how to run (such as an address of a remote server,
        or path to your ImageNet data). It should not affect the
        semantics of the environment.
        """
        raise NotImplementedError()


# env = BackscatterEnv3()
# env.reset()
# for index in range(0, 1000):
#     env.step(env.action_space.sample())

コード例 #18

0

ファイルを表示

ファイル: BC_env.py プロジェクト: Yunjusik/Blockchain

class BCnetenv(gym.Env):
    '''
Actions:
    Type: MultiDiscrete form.
    1) Block_Size(shard)   :   Discrete 4  - 2MB[0], 4MB[1], 6MB[2], 8MB[3],   - params: min: 2, max: 8  (megabytes)
    2) Time Interval       :   Discrete 4  - 2[0]  ,   4[1],   6[2],   8[3]    - params: min: 2, max: 8  (seconds)
    3) number of shard (K) :   Discrete 4  - 1[0],     2[1],   4[2],   8[3]    - params: min: 1, max: 8
    MultiDiscrete([ 4, 4, 4 ])  -> we use discrete expression (64)
    0, 0 ,0 ->0
    0, 0, 1 ->1
    0, 0, 2 ->2
    ...
    3, 3, 3 -> 63

state space:
    Type:
    Num       state                    Min       Max     format
    0    data transmission link      10MHZ    100MHZ      nxn
    1     computing capability       10GHZ     30GHZ      nx1
    2      consensus history           0         1        nxn
    3 estimated faulty probability     0        1/3       nx1

:
    Type: Box(2)
    num     observation        min       max
    0        latency           0         48
    1   required shard limit   1          8

    '''

    def __init__(self):
        # Simulation parameters
        self.nb_nodes = 200
        self.tx_size = 200 #bytes
        self.B_max = 8 #Megabytes
        self.Ti_amx = 8 #seconds
        self.K_max = 8 # maximum shard number
        self.sign = 2 # MHZ
        self.MAC = 1 # MHZ
        self.batchsize =  3
        self.u = 6  # consecutive block confirm
        self.trans_prob = 0.1 # Transition Probability in Finitie Markov Chain
                                                   
        # define action space & observation_space
        self.action_space = ActionSpace(64)
        self.observation_space = spaces.Box(low=np.array([0, 1]), high=np.array([48, 8]), dtype=np.float32)

        self.seed()
        self.viewer = None
        self.state = None
        self.steps_beyond_done = None

        # state 불러오기. ShardDist함수의 return값을 각각 R,c,H,e_prob로 할당하도록함.
        # 여기서 state는 main의 진짜 state에 필요한 구성요소들을 각각 업데이트하는것임.
        self.R_transmission = None
        self.c_computing = None
        self.H_history = None
        self.e_prob = None




    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]






    def step(self, action):
        # step 함수 진행하면서 state space 를 update

        assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))
        state = self.state
        R, C, H, e_prob = state
        
        # state라는 지역변수에 self.state 입력. (추후 self.state = 를 다시 입력해 state를 업뎃해야함
        # state는 R,C,H,e_prob 로 구성
        # action 진행시 현재 state를 받아오고, 해당 action을 import한 뒤 다음 state를 출력할 수 있어야함.


## 선택된 action에 대한 local variable 반환
        a=action//16        # action을 16으로 나눈 몫
        b=(action%16)//4    # action을 16으로 나눈 나머지를 다시 4로나눈 몫
        c=action%4          # action을 4로 나눈 나머지

        b_size = 2*(a+1)    # block size 2,4,6,8
        t_interval = 2*(b+1)  # time interval 2,4,6,8
        n_shard = 2**c         # number of shard 1,2,4,8


        # R 업데이트 finite markov channel 기반.
        for i in range (0,self.nb_nodes):
            for j in range (i+1,self.nb_nodes):
                random_number = random.random()
                if (R[i,j] == 10*(10**6)):
                    if (random_number < self.trans_prob):
                        R[i,j] += 10*(10**6)
                        R[j,i] = R[i,j]
                elif (R[i,j] == 100*(10**6)):
                    if (random_number < self.trans_prob):
                        R[i,j] -= 10*(10**6)
                        R[j,i] = R[i,j]        
                else :
                    if (random_number < self.trans_prob):
                        R[i,j] += 10*(10**6)
                        R[j,i] = R[i,j]
                    elif (self.trans_prob <= random_number < 2*self.trans_prob):
                        R[i,j] -= 10*(10**6)
                        R[j,i] = R[i,j]                
        
        # C 업데이트 finite markov channel 기반.
        for i in range (0,self.nb_nodes):
            random_number = random.random()
            if (C[i] == 10*(10**9)):
                if (random_number < self.trans_prob):
                    C[i] += 5*(10**9)
            elif (C[i] == 30*(10**9)):
                if (random_number < self.trans_prob):
                    C[i] -= 5*(10**9)        
            else :
                if (random_number < self.trans_prob):
                    C[i] += 5*(10**9)
                elif (self.trans_prob <= random_number < 2*self.trans_prob):
                    C[i] -= 5*(10**9)
                    
        # H, e_prob 계산. (ShardDist)
        env2 = ShardDistribute()
        H,e_prob,NodesInShard = env2.ShardDist(n_shard)


        self.state = [R, C, H, e_prob]  ## 여기서 e_prob를 받았는데 nx1형식임.
        # 뒤에 constraint에 넣기위해 float32로 바꿔줌
        e_p = e_prob[0,0]  # constraint에 쓸 변수를 여기서 미리 불러옴

### latency computation
        # latency 계산시 R,c로부터 max/min값을 추출하여 latency 세부요소들 전부 계산
        # M,theta,C_numb,alpha,B,timeout 값 설
        M = 3
        theta = 2*(10**6)
        C_numb = len(NodesInShard[n_shard])
        alpha = 10**6
        B = b_size
        timeout = 1000000000000000000000000 # no timeout

        # 1) Intra 샤드에서 validation time 계산
        T_k_in_val = []
        primary = []
        for K in range(n_shard):
            primary.append(NodesInShard[K][random.randint(0,len(NodesInShard[K])-1)])
            
            T_in_val = []
            for i in NodesInShard[K]:
                if (i == primary[K]):
                    T_in_val.append((M*theta + (M*(1+C_numb) + 4*(len(NodesInShard[K])-1))*alpha) / C[i][0])
                else :
                    T_in_val.append((M*theta + (C_numb*M + 4*(len(NodesInShard[K])-1))*alpha) / C[i][0])
            T_k_in_val.append(max(T_in_val))
        T_k_in_val = (1/M)*max(T_k_in_val)
        # 2) Intra 샤드에서 propagation time 계산
        T_k_in_prop = []
        for K in range(n_shard):
            
            T_in_preprepare = []
            T_in_prepare = []
            T_in_commit = []
            
            for i in NodesInShard[K]:
                for j in NodesInShard[K]:
                    if (j != i):
                        if (i == primary[K]):
                            T_in_preprepare.append((M*B)/R[i,j])
                        else :
                            T_in_prepare.append((M*B)/R[i,j])
                        T_in_commit.append((M*B)/R[i,j])
                                           
            T_k_in_prop.append( min(max(T_in_preprepare),timeout) + min(max(T_in_prepare),timeout) + min(max(T_in_commit),timeout) )
        T_k_in_prop = (1/M)*max(T_k_in_prop)        

        # 3) DC (Final shard)에서 validation time 계산                
        primary_DC = NodesInShard[n_shard][random.randint(0,len(NodesInShard[n_shard])-1)] 
           
        T_k_f_val = []
        for i in NodesInShard[n_shard]:
            if (i == primary_DC):
                T_k_f_val.append((n_shard*M*theta + (n_shard*M + 4*(C_numb-1) + (self.nb_nodes-C_numb)*M)*alpha) / C[i][0])
            else :
                T_k_f_val.append((n_shard*M*theta + (4*(C_numb-1) + (self.nb_nodes-C_numb)*M)*alpha) / C[i][0])
        T_k_f_val = (1/M)*max(T_k_f_val)        
                
        # 4) DC (Final shard)에서 propagation time 계산   
        T_k_f_request = []
        T_k_f_preprepare = []
        T_k_f_prepare = []
        T_k_f_commit = []
        T_k_f_reply = []
        
        for i in primary:
            for j in NodesInShard[n_shard]:
                T_k_f_request.append((M*B)/R[i,j])
        
        for i in NodesInShard[n_shard]:
            for j in NodesInShard[n_shard]:
                if (j != i):
                    if (i == primary_DC):
                        T_k_f_preprepare.append((M*B)/R[i,j])
                    else:
                        T_k_f_prepare.append((M*B)/R[i,j])
                    T_k_f_commit.append((M*B)/R[i,j])
        
        for i in NodesInShard[n_shard]:
            for j in primary:
                T_k_f_reply.append((M*B)/R[i,j])
        
        T_k_f_prop = (1/M)*(min(max(T_k_f_request),timeout) + min(max(T_k_f_preprepare),timeout) 
                              + min(max(T_k_f_prepare),timeout) + min(max(T_k_f_commit),timeout) + min(max(T_k_f_request),timeout))
        # 최종 latency의 값은 block interval + 위 4가지 time
        Tlatency = t_interval + (T_k_in_val + T_k_in_prop + T_k_f_val + T_k_f_prop)


### constraint (latency & shard)
        done = Tlatency > self.u * t_interval \
               or n_shard >= (self.nb_nodes*(1-(3*e_p))-1)/(3*self.nb_nodes*e_p + 1)
        done = bool(done)

        #done 이 1인경우, 즉 끝났다면(조건을 위반하여) reward는 0
        #done이 0인 경우, reward는 TPS를 반영한다.

        if not done:
            reward = (n_shard * (math.floor((b_size/self.tx_size)*1024*1024)))/t_interval
        elif self.steps_beyond_done is None:   ## step beyond done?
            self.steps_beyond_done = 0
            reward = 1.0
        else:
            if self.steps_beyond_done == 0:
                logger.warn("You are calling 'step()' even though this environment has already returned done = True. "
                            "You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.")
            self.steps_beyond_done += 1
            reward = 0.0
##### state change 적용해서 R,C,H,e_prop 업데이트.
### 리턴되어야 하는 값은, action이 들어왔을때, 변경된 R,C,H,e-Prob

        return self.state, reward, done, {}   ## 카트폴에서는 self.state에 np.array가 있는데, 여기서는 풀어줘야함
    # 왜냐하면, R,C,H,e_prob는 모두 차원이 달라 np.array를 쓰면 차원오류가 뜸.
    # 144번라인에서 self.state = [R, C, H, e_prob] 에 의해, 각 원소들이 state로 들어감.
    # state[0], state[1], state[2], state[3]을 통해 각각 R,C,H,e_prob를 반환할 수 있다.



    def reset(self):
        # state space - > R,c,H reset.
        R_transmission = np.zeros((self.nb_nodes,self.nb_nodes))
        c_computing = np.zeros((self.nb_nodes,1))
        
        for i in range (0,self.nb_nodes):
            for j in range (i+1,self.nb_nodes):
                R_transmission[i,j] = random.randrange(10,101,10)
                R_transmission[j,i] = R_transmission[i,j]
        R_transmission = (10**6)*R_transmission

        for i in range (0,self.nb_nodes):
            c_computing[i] = random.randrange(10,31,5)
        c_computing = (10**9)*c_computing
 
        
        n_shard = 2**(random.randrange(1,5)-1)

        env2 = ShardDistribute()
        H,e_prob,NodesInShard = env2.ShardDist(n_shard)
        
        self.state = [R_transmission, c_computing, H, e_prob]
        
        return self.state

コード例 #19

0

ファイルを表示

from experience_replay import ExperienceReplay
from qnet import QNetAgent
from torch.utils.tensorboard import SummaryWriter

# if gpu is to be used
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
Tensor = torch.Tensor
LongTensor = torch.LongTensor
random_seed = 42
torch.manual_seed(random_seed)
random.seed(random_seed)

writer = SummaryWriter()

actionSpace = ActionSpace()
memory = ExperienceReplay(config.replay_mem_size)
qnet_agent = QNetAgent()

steps_total = []

frames_total = 0
solved_after = 0
solved = False

start_time = time.time()

# Main loop
step = 0
total_reward = 0
done = False