def __init__(self): # System parameters self.nb_ST = 2 self.state_size = 2 * self.nb_ST + 1 self.nb_actions = (Backscatter02Env.TIME_FRAME + 1)**3 self.action_space = ActionSpace( (Discrete(Backscatter02Env.TIME_FRAME + 1), Discrete(Backscatter02Env.TIME_FRAME + 1), Discrete(Backscatter02Env.TIME_FRAME + 1))) self.observation_space = StateSpace( (Discrete(SecondTransmitor.QUEUE), Discrete( SecondTransmitor.ENERGY), Discrete(SecondTransmitor.QUEUE), Discrete(SecondTransmitor.ENERGY), Discrete(Backscatter02Env.TIME_FRAME + 1))) # initialize Second Transmitters self.ST1 = SecondTransmitor() self.ST2 = SecondTransmitor() self.busy_slot = None self.viewer = None self.state = None self.steps_beyond_done = None
def __init__(self): # Simulation parameters self.nb_nodes = 200 self.tx_size = 200 #bytes self.B_max = 8 #Megabytes self.Ti_amx = 8 #seconds self.K_max = 8 # maximum shard number self.sign = 2 # MHZ self.MAC = 1 # MHZ self.batchsize = 3 self.u = 6 # consecutive block confirm self.trans_prob = 0.1 # Transition Probability in Finitie Markov Chain # define action space & observation_space self.action_space = ActionSpace(64) self.observation_space = spaces.Box(low=np.array([0, 1]), high=np.array([48, 8]), dtype=np.float32) self.seed() self.viewer = None self.state = None self.steps_beyond_done = None # state 불러오기. ShardDist함수의 return값을 각각 R,c,H,e_prob로 할당하도록함. # 여기서 state는 main의 진짜 state에 필요한 구성요소들을 각각 업데이트하는것임. self.R_transmission = None self.c_computing = None self.H_history = None self.e_prob = None
def __init__(self): # System parameters self.nb_MB = 3 self.state_size = 2 * self.nb_MB self.nb_actions = (Mobile.MAX_DATA + 1) ** self.nb_MB * (Mobile.MAX_ENERGY + 1) ** self.nb_MB self.action_space = ActionSpace((Discrete(Mobile.MAX_DATA + 1), Discrete(Mobile.MAX_ENERGY + 1), Discrete(Mobile.MAX_DATA + 1), Discrete(Mobile.MAX_ENERGY + 1), Discrete(Mobile.MAX_DATA + 1), Discrete(Mobile.MAX_ENERGY + 1) )) self.observation_space = StateSpace((Discrete(Mobile.MAX_CPU), Discrete(Mobile.MAX_ENERGY), Discrete(Mobile.MAX_CPU), Discrete(Mobile.MAX_ENERGY), Discrete(Mobile.MAX_CPU), Discrete(Mobile.MAX_ENERGY))) # initialize Second Transmitters self.MB1 = Mobile() self.MB2 = Mobile() self.MB3 = Mobile() self.max_data = self.nb_MB * Mobile.MAX_DATA self.max_energy = self.nb_MB * Mobile.MAX_ENERGY self.max_latency = Mobile.MAX_LATENCY self.training_time = 0 self.training_data = 0 self.viewer = None self.state = None self.steps_beyond_done = None
def __init__(self): # System parameters self.nb_ST = 3 self.state_size = 2 * self.nb_ST self.nb_actions = (BackscatterEnv3.BUSY_TIMESLOT + 1)**3 * ( BackscatterEnv3.TIME_FRAME - BackscatterEnv3.BUSY_TIMESLOT + 1)**2 self.action_space = ActionSpace( (Discrete(BackscatterEnv3.BUSY_TIMESLOT + 1), Discrete(BackscatterEnv3.BUSY_TIMESLOT + 1), Discrete(BackscatterEnv3.BUSY_TIMESLOT + 1), Discrete(BackscatterEnv3.TIME_FRAME - BackscatterEnv3.BUSY_TIMESLOT + 1), Discrete(BackscatterEnv3.TIME_FRAME - BackscatterEnv3.BUSY_TIMESLOT + 1))) self.observation_space = StateSpace( (Discrete(SecondTransmitor.QUEUE), Discrete( SecondTransmitor.ENERGY), Discrete(SecondTransmitor.QUEUE), Discrete(SecondTransmitor.ENERGY), Discrete(SecondTransmitor.QUEUE), Discrete(SecondTransmitor.ENERGY))) # initialize Second Transmitters self.ST1 = SecondTransmitor(data_rate=BackscatterEnv3.DATA_RATE) self.ST2 = SecondTransmitor(data_rate=BackscatterEnv3.DATA_RATE) self.ST3 = SecondTransmitor(data_rate=BackscatterEnv3.DATA_RATE) self.viewer = None self.state = None self.steps_beyond_done = None
def __init__(self): self.action_space = ActionSpace(3) self.observation_space = spaces.Tuple((Discrete(100), Discrete(100), Discrete(100))) # self.seed() self.viewer = None self.state = None self.market_value = 100 self.alpha = -0.05 self.ob = 0.1 self.os = 0.15 self.steps_beyond_done = None
def __init__(self): self.max_stake = 99 self.max_action = 6 self.nb_actions = 2 * self.max_action-1 self.action_space = spaces.Tuple((ActionSpace(self.max_action), ActionSpace(self.max_action), ActionSpace(self.max_action))) self.observation_space = spaces.Tuple((Discrete(self.max_stake), Discrete(self.max_stake), Discrete(self.max_stake))) # self.seed() self.viewer = None self.state = None self.alpha = -0.01 self.alphaX = -0.01 self.ob = 0.5 self.os = 0.6 self.list_v = [] self.steps_beyond_done = None self.marketValue = MarketValue()
def __init__(self): self.url = 'http://www.trex-game.skipser.com/' self.capture_width = 500 self.capture_height = 325 self.sct = mss() self.bbox = {'top': 345, 'left': 120, 'width': 580, 'height': 65} self.terminal_bbox = {'top': 325, 'left': 310, 'width': 1, 'height': 1} self.game_over_sprite = Image.open('assets/G_game_over.png') self.action_space = ActionSpace() self.actions = self.action_space.actions self.state = None self.frame_history = deque(maxlen=4) self.logger = Logger()
def __init__(self, num_ccy: int, precision: int, gamma: float, metric_list: list, train_data: pd.DataFrame): self.num_ccy = num_ccy self.precision = precision self.gamma = gamma self.metric_list = metric_list self.train_data = train_data self.A = ActionSpace(self.num_ccy, self.precision) self.a_space = self.A.actions self.X = StateSpace(self.metric_list, self.train_data) self.state_map = self.X.state_map self.n_states = len(self.state_map) self.n_actions = len(self.a_space) self.q_table = np.zeros((self.n_states, self.n_actions)) self.lr_table = np.zeros((self.n_states, self.n_actions))
def __init__(self): # Channel parameters self.nb_channels = 4 self.idleChannel = 1 self.prob_switching = 0.9 self.channelObservation = None self.prob_late = BlockchainNetworkingEnv.LATE_PROB self.cost_channels = [0.1, 0.1, 0.1, 0.1] # Blockchain parameters self.mempool = Mempool() self.userTransaction = Transaction() self.lastBlock = Block() self.hashRate = None self.doubleSpendSuccess = None # System parameters self.nb_past_observations = 4 self.state_size = Mempool.NB_FEE_INTERVALS + 2 * self.nb_past_observations self.action_space = ActionSpace(self.nb_channels + 1) self.observation_space = StateSpace( (Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), ActionSpace(self.nb_channels + 1), ChannelSpace(), ActionSpace(self.nb_channels + 1), ChannelSpace(), ActionSpace(self.nb_channels + 1), ChannelSpace(), ActionSpace(self.nb_channels + 1), ChannelSpace())) # reward define self.totalReward = 0 self.successReward = 0 self.channelCost = 0 self.transactionFee = 0 self.cost = 0 self.viewer = None self.state = None self.steps_beyond_done = None
class FederatedLearningEnv(gym.Env): TIME_LIMIT = 10000 DATA_LIMIT = 1500 def __init__(self): # System parameters self.nb_MB = 3 self.state_size = 2 * self.nb_MB self.nb_actions = (Mobile.MAX_DATA + 1) ** self.nb_MB * (Mobile.MAX_ENERGY + 1) ** self.nb_MB self.action_space = ActionSpace((Discrete(Mobile.MAX_DATA + 1), Discrete(Mobile.MAX_ENERGY + 1), Discrete(Mobile.MAX_DATA + 1), Discrete(Mobile.MAX_ENERGY + 1), Discrete(Mobile.MAX_DATA + 1), Discrete(Mobile.MAX_ENERGY + 1) )) self.observation_space = StateSpace((Discrete(Mobile.MAX_CPU), Discrete(Mobile.MAX_ENERGY), Discrete(Mobile.MAX_CPU), Discrete(Mobile.MAX_ENERGY), Discrete(Mobile.MAX_CPU), Discrete(Mobile.MAX_ENERGY))) # initialize Second Transmitters self.MB1 = Mobile() self.MB2 = Mobile() self.MB3 = Mobile() self.max_data = self.nb_MB * Mobile.MAX_DATA self.max_energy = self.nb_MB * Mobile.MAX_ENERGY self.max_latency = Mobile.MAX_LATENCY self.training_time = 0 self.training_data = 0 self.viewer = None self.state = None self.steps_beyond_done = None def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action)) data_required1 = action[0] energy_required1 = action[1] data_required2 = action[2] energy_required2 = action[3] data_required3 = action[4] energy_required3 = action[5] data1, latency1, energy_consumption1, fault1 = self.MB1.update(data_required1, energy_required1) data2, latency2, energy_consumption2, fault2 = self.MB2.update(data_required2, energy_required2) data3, latency3, energy_consumption3, fault3 = self.MB3.update(data_required3, energy_required3) data = data1 + data2 + data3 latency = max(latency1, latency2, latency3) energy_consumption = energy_consumption1 + energy_consumption2 + energy_consumption3 fault = fault1 + fault2 + fault3 state = [self.MB1.CPU_shared, self.MB1.energy, self.MB2.CPU_shared, self.MB2.energy, self.MB3.CPU_shared, self.MB3.energy] # print (state) self.state = tuple(state) self.training_data += data self.training_time += latency reward = 10 * (5 * data/self.max_data - latency/self.max_latency - energy_consumption/self.max_energy) + fault if (self.training_data > FederatedLearningEnv.DATA_LIMIT): done = True else: done = False # if (fault < 0): # print (fault) # print(np.array(self.state), action, [reward, data, latency, energy_consumption, fault], done) reward /= 10 return np.array(self.state), [reward, data, latency, energy_consumption, data1, data2, data3], done, {} def reset(self): self.state = [] self.MB1.reset() self.MB2.reset() self.MB3.reset() state = [self.MB1.CPU_shared, self.MB1.energy, self.MB2.CPU_shared, self.MB2.energy, self.MB3.CPU_shared, self.MB3.energy] self.state = tuple(state) self.training_time = 0 self.training_data = 0 print(self.state) self.steps_beyond_done = None return np.array(self.state) def updateObservation(self): return def render(self, mode='human', close=False): return def close(self): """Override in your subclass to perform any necessary cleanup. Environments will automatically close() themselves when garbage collected or when the program exits. """ raise NotImplementedError() def seed(self, seed=None): """Sets the seed for this env's random number generator(s). # Returns Returns the list of seeds used in this env's random number generators """ raise NotImplementedError() def configure(self, *args, **kwargs): """Provides runtime configuration to the environment. This configuration should consist of data that tells your environment how to run (such as an address of a remote server, or path to your ImageNet data). It should not affect the semantics of the environment. """ raise NotImplementedError() # env = FederatedLearningEnv() # env.reset() # for index in range(0, 100): # env.step(env.action_space.sample())
class Env(object): """The abstract environment class that is used by all agents. This class has the exact same API that OpenAI Gym uses so that integrating with it is trivial. In contrast to the OpenAI Gym implementation, this class only defines the abstract methods without any actual implementation. """ def __init__(self): self.robot = Robot((84, 84)) self.token = np.array([14, -5, 0]) self.previousAction = 0 self.iteration = 0 reward_range = (-1, 1) action_space = ActionSpace() observation_space = ObservationSpace() def step(self, action): """Run one timestep of the environment's dynamics. Accepts an action and returns a tuple (observation, reward, done, info). Args: action (object): an action provided by the environment Returns: observation (object): agent's observation of the current environment reward (float) : amount of reward returned after previous action done (boolean): whether the episode has ended, in which case further step() calls will return undefined results info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning) """ self.previousAction = action pos1 = self.robot.position self.robot.executeAction(action) pos2 = self.robot.position + self.robot.direction self.iteration += 1 isDone = self.iteration > 50 or np.array_equal(self.robot.position, self.token) reward = 0 if self.iteration > 50: reward = -1 elif isDone == True: reward = 1 elif (np.linalg.norm(self.token - pos1) > np.linalg.norm(self.token - pos2)): reward = 0.25 else: reward = -0.25 self.robot.update() observation = getObservation(self.robot, self.token) return observation, reward, isDone, {'info': 'test'} def reset(self): """ Resets the state of the environment and returns an initial observation. Returns: observation (object): the initial observation of the space. (Initial reward is assumed to be 0.) """ self.robot.reset() self.iteration = 0 return np.zeros((84, 84, 3), dtype=np.uint8) def render(self, mode='human', close=False): """Renders the environment. The set of supported modes varies per environment. (And some environments do not support rendering at all.) By convention, if mode is: - human: render to the current display or terminal and return nothing. Usually for human consumption. - rgb_array: Return an numpy.ndarray with shape (x, y, 3), representing RGB values for an x-by-y pixel image, suitable for turning into a video. - ansi: Return a string (str) or StringIO.StringIO containing a terminal-style text representation. The text can include newlines and ANSI escape sequences (e.g. for colors). Note: Make sure that your class's metadata 'render.modes' key includes the list of supported modes. It's recommended to call super() in implementations to use the functionality of this method. Args: mode (str): the mode to render with close (bool): close all open renderings """ text = str(self.iteration) + " " text += "Robot: " + str(self.robot.position) + "; " text += "Token " + str(self.token) + "; " text += "Action " + str(self.previousAction) + ";" sys.stdout.write('\r' + str(text) + ' ' * 20) sys.stdout.flush() def close(self): """Override in your subclass to perform any necessary cleanup. Environments will automatically close() themselves when garbage collected or when the program exits. """ return 1 def seed(self, seed=None): """Sets the seed for this env's random number generator(s). Note: Some environments use multiple pseudorandom number generators. We want to capture all such seeds used in order to ensure that there aren't accidental correlations between multiple generators. Returns: list<bigint>: Returns the list of seeds used in this env's random number generators. The first value in the list should be the "main" seed, or the value which a reproducer should pass to 'seed'. Often, the main seed equals the provided 'seed', but this won't be true if seed=None, for example. """ return [1] def configure(self, *args, **kwargs): """Provides runtime configuration to the environment. This configuration should consist of data that tells your environment how to run (such as an address of a remote server, or path to your ImageNet data). It should not affect the semantics of the environment. """ return 1 def __del__(self): self.close() def __str__(self): return '<{} instance>'.format(type(self).__name__)
class BCnetenv(gym.Env): ''' Actions: Type: MultiDiscrete form. 1) Block_Size(shard) : Discrete 4 - 2MB[0], 4MB[1], 6MB[2], 8MB[3], - params: min: 2, max: 8 (megabytes) 2) Time Interval : Discrete 4 - 2[0] , 4[1], 6[2], 8[3] - params: min: 2, max: 8 (seconds) 3) number of shard (K) : Discrete 4 - 1[0], 2[1], 4[2], 8[3] - params: min: 1, max: 8 MultiDiscrete([ 4, 4, 4 ]) -> we use discrete expression (64) 0, 0 ,0 ->0 0, 0, 1 ->1 0, 0, 2 ->2 ... 3, 3, 3 -> 63 state space: Type: Num state Min Max format 0 data transmission link 10MHZ 100MHZ nxn 1 computing capability 10GHZ 30GHZ nx1 2 consensus history 0 1 nxn 3 estimated faulty probability 0 1/3 nx1 : Type: Box(2) num observation min max 0 latency 0 48 1 required shard limit 1 8 ''' def __init__(self): # Simulation parameters self.nb_nodes = 200 self.tx_size = 200 #bytes self.B_max = 8 #Megabytes self.Ti_amx = 8 #seconds self.K_max = 8 # maximum shard number self.sign = 2 # MHZ self.MAC = 1 # MHZ self.batchsize = 3 self.u = 6 # consecutive block confirm self.trans_prob = 0.5 # Transition Probability in Finite Markov Chain # define action space & observation_space self.action_space = ActionSpace(512) self.observation_space = spaces.Box(low=np.array([0, 1]), high=np.array([48, 8]), dtype=np.float32) self.seed() self.viewer = None self.state = None self.steps_beyond_done = None # state 불러오기. ShardDist함수의 return값을 각각 R,c,H,e_prob로 할당하도록함. # 여기서 state는 main의 진짜 state에 필요한 구성요소들을 각각 업데이트하는것임. self.R_transmission = None self.c_computing = None self.H_history = None self.e_prob = None self.reward = 0 def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): # step 함수 진행하면서 state space 를 update assert self.action_space.contains( action), "%r (%s) invalid" % (action, type(action)) state = self.state R, C, H, e_prob = state a = action // 128 # block size (0~3) 2, 4, 6, 8 b = (action - 128 * a) // 16 # # of shard (1~8) 1, 2 ,3 ,4 ,5 ,6, 7, 8 c = (action - (128 * a) - (16 * b) ) # time interval (0~15) 0.5/1/1.5 ~~~ 16 b_size = 2 * (a + 1) # block size 2,4,6,8 (4) t_interval = 0.5 * (c + 1) # time interval 0.5, 1, 1.5 ~ ,8 (16) n_shard = b + 1 # number of shard 1,2,3,4,5,6,7,8 (8) # R 업데이트 finite markov channel 기반. for i in range(0, self.nb_nodes): for j in range(i + 1, self.nb_nodes): random_number = random.random() if (R[i, j] == 10 * (10**6)): if (random_number < self.trans_prob): R[i, j] += 10 * (10**6) R[j, i] = R[i, j] elif (R[i, j] == 100 * (10**6)): if (random_number < self.trans_prob): R[i, j] -= 10 * (10**6) R[j, i] = R[i, j] else: if (random_number < self.trans_prob): R[i, j] += 10 * (10**6) R[j, i] = R[i, j] elif (self.trans_prob <= random_number < 2 * self.trans_prob): R[i, j] -= 10 * (10**6) R[j, i] = R[i, j] # C 업데이트 finite markov channel 기반. for i in range(0, self.nb_nodes): for j in range(0, self.nb_nodes): random_number = random.random() if (C[i, j] == 10 * (10**9)): if (random_number < self.trans_prob): C[i, j] += 5 * (10**9) elif (C[i, j] == 30 * (10**9)): if (random_number < self.trans_prob): C[i, j] -= 5 * (10**9) else: if (random_number < self.trans_prob): C[i, j] += 5 * (10**9) elif (self.trans_prob <= random_number < 2 * self.trans_prob): C[i, j] -= 5 * (10**9) env2 = ShardDistribute() H, e_prob, NodesInShard, Success_ratio, FCP = env2.ShardDist(n_shard) self.state = [R, C, H, e_prob] e_p = e_prob[0, 0] # constraint에 쓸 변수를 여기서 미리 불러옴 # latency 계산시 R,c로부터 max/min값을 추출하여 latency 세부요소들 전부 계산 # M,theta,C_numb,alpha,B,timeout 값 설 M = 3 theta = 2 * (10**6) alpha = 2 * 10**6 beta = 10**6 B = b_size * 8 * 10**6 timeout = 2.2 #2.2 #3.2 # 0.64 에서 최대 6.4초. 중간을 기점으로 threshold 설정 nb_nodes = self.nb_nodes ### latency computation (Sharding) (2이상일 때) if (n_shard >= 2): C_numb = len(NodesInShard[n_shard]) # 1) Intra 샤드에서 validation time 계산 T_k_in_val = [] primary = [] for K in range(n_shard): primary.append(NodesInShard[K][random.randint( 0, len(NodesInShard[K]) - 1)]) T_in_val = [] for i in NodesInShard[K]: if (i == primary[K]): T_in_val.append( (M * theta + (M * (1 + C_numb) + 4 * (len(NodesInShard[K]) - 1)) * alpha) / C[i][0]) else: T_in_val.append( (M * theta + (C_numb * M + 4 * (len(NodesInShard[K]) - 1)) * alpha) / C[i][0]) T_k_in_val.append(max(T_in_val)) T_k_in_val = (1 / M) * max(T_k_in_val) # 2) Intra 샤드에서 propagation time 계산 T_k_in_prop = [] for K in range(n_shard): T_in_preprepare = [] T_in_prepare = [] T_in_commit = [] for i in NodesInShard[K]: for j in NodesInShard[K]: if (j != i): if (i == primary[K]): T_in_preprepare.append((M * B) / R[i, j]) else: T_in_prepare.append((M * B) / R[i, j]) T_in_commit.append((M * B) / R[i, j]) T_k_in_prop.append( min(max(T_in_preprepare), timeout) + min(max(T_in_prepare), timeout) + min(max(T_in_commit), timeout)) T_k_in_prop = (1 / M) * max(T_k_in_prop) # 3) DC (Final shard)에서 validation time 계산 primary_DC = NodesInShard[n_shard][random.randint( 0, len(NodesInShard[n_shard]) - 1)] T_k_f_val = [] for i in NodesInShard[n_shard]: if (i == primary_DC): T_k_f_val.append( (n_shard * M * theta + (n_shard * M + 4 * (C_numb - 1) + (self.nb_nodes - C_numb) * M) * alpha) / C[i][0]) else: T_k_f_val.append( (n_shard * M * theta + (4 * (C_numb - 1) + (self.nb_nodes - C_numb) * M) * alpha) / C[i][0]) T_k_f_val = (1 / M) * max(T_k_f_val) # 4) DC (Final shard)에서 propagation time 계산 T_k_f_request = [] T_k_f_preprepare = [] T_k_f_prepare = [] T_k_f_commit = [] T_k_f_reply = [] for i in primary: for j in NodesInShard[n_shard]: T_k_f_request.append((M * B) / R[i, j]) for i in NodesInShard[n_shard]: for j in NodesInShard[n_shard]: if (j != i): if (i == primary_DC): T_k_f_preprepare.append((M * B) / R[i, j]) else: T_k_f_prepare.append((M * B) / R[i, j]) T_k_f_commit.append((M * B) / R[i, j]) for i in NodesInShard[n_shard]: for j in primary: T_k_f_reply.append((M * B) / R[i, j]) T_k_f_prop = (1 / M) * (min(max(T_k_f_request), timeout) + min(max(T_k_f_preprepare), timeout) + min(max(T_k_f_prepare), timeout) + min(max(T_k_f_commit), timeout) + min(max(T_k_f_request), timeout)) # 최종 latency의 값은 block interval + 위 4가지 time Tlatency = t_interval + (T_k_in_val + T_k_in_prop + T_k_f_val + T_k_f_prop) else: #Shard가 1개일땐 PBFT T_V = [] client = random.randint(0, nb_nodes - 1) primary = random.randint(0, nb_nodes - 1) while (primary == client): primary = random.randint(0, nb_nodes - 1) for i in range(nb_nodes): if (i == primary): T_V.append((M * alpha + beta * (2 * M + 4 * (nb_nodes - 1))) / C[i][0]) elif (i != client): T_V.append((M * alpha + beta * (M + 4 * (nb_nodes - 1))) / C[i][0]) T_V = (1 / M) * max(T_V) t1 = min((M * B / R[client, primary]), timeout) t2 = [] for i in range(nb_nodes): if ((i != client) & (i != primary)): t2.append(M * B / R[primary, i]) t2 = min(max(t2), timeout) t3 = [] for i in range(nb_nodes): for j in range(nb_nodes): if ((j != i) & (i != client) & (j != client)): t3.append(M * B / R[i, j]) t3 = min(max(t3), timeout) t4 = [] for i in range(nb_nodes): for j in range(nb_nodes): if (j != i): t4.append(M * B / R[i, j]) t4 = min(max(t4), timeout) t5 = [] for i in range(nb_nodes): for j in range(nb_nodes): if (i != client): t5.append(M * B / R[i, client]) t5 = min(max(t5), timeout) T_D = (1 / M) * (t1 + t2 + t3 + t4 + t5) Tlatency = t_interval + T_V + T_D ### constraint (latency & shard) done_t = Tlatency > self.u * t_interval constraint = 0 ### const 1 if n_shard == 1: done_n = False else: constraint = (self.nb_nodes * (1 - (3 * e_p)) - 1) / (3 * self.nb_nodes * e_p + 1) done_n = (n_shard >= constraint) #########lemma1 #### const 2 # constraint = (((2*self.nb_nodes) / (3*(self.nb_nodes * e_p +1))) -1) #done_n = n_shard >= (((2*self.nb_nodes) / (3*(self.nb_nodes * e_p +1))) -1) # #### lemma2 # done_n =False # no security bound done = done_t or done_n done = bool(done) #성공한샤드 = prob* K #done 이 1인경우, 즉 끝났다면(조건을 위반하여) reward는 0 #done이 0인 경우, reward는 TPS를 반영한다. reward = self.reward if not done: reward = Success_ratio * M * ((n_shard * (math.floor( (b_size / self.tx_size) * 1000 * 1000))) / t_interval) elif self.steps_beyond_done is None: ## step beyond done? self.steps_beyond_done = 0 else: # done인 경우, if self.steps_beyond_done == 0: logger.warn( "You are calling 'step()' even though this environment has already returned done = True. " "You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior." ) self.steps_beyond_done += 1 reward = 0 self.reward = float(reward) print('reward', reward) const = [ Tlatency, b_size, t_interval, n_shard, constraint, done_t, done_n, e_p, FCP ] print(const) ##### state change 적용해서 R,C,H,e_prop 업데이트. ### 리턴되어야 하는 값은, action이 들어왔을때, 변경된 R,C,H,e-Prob return self.state, self.reward, done, const, { } ## 카트폴에서는 self.state에 np.array가 있는데, 여기서는 풀어줘야함 def reset(self): # state space - > R,c,H,e_prob reset. R_transmission = np.zeros((self.nb_nodes, self.nb_nodes)) c_computing = np.zeros((self.nb_nodes, 1)) for i in range(0, self.nb_nodes): for j in range(i + 1, self.nb_nodes): R_transmission[i, j] = random.randrange(10, 101, 10) R_transmission[j, i] = R_transmission[i, j] R_transmission = (10**6) * R_transmission # 200x200 for i in range(0, self.nb_nodes): c_computing[i] = random.randrange(10, 31, 5) c_computing = (10**9) * c_computing # 200x1 c_computing = np.kron(c_computing, np.ones( (1, self.nb_nodes))) # 200x200으로 확장한뒤 H_his에 대입 n_shard = random.randrange(1, 9) env2 = ShardDistribute() H, e_prob, NodesInShard, Success_ratio, FCP = env2.ShardDist(n_shard) # H, e_prob는 shardDist를 통해 get self.state = [R_transmission, c_computing, H, e_prob] return self.state
class BlockchainEnv(gym.Env): def __init__(self): self.action_space = ActionSpace(3) self.observation_space = spaces.Tuple((Discrete(100), Discrete(100), Discrete(100))) # self.seed() self.viewer = None self.state = None self.market_value = 100 self.alpha = -0.05 self.ob = 0.1 self.os = 0.15 self.steps_beyond_done = None def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action)) state = self.state state_list = list(state) action = min(action,state_list[0]) actions = np.array([action, self.action_space.sample(), self.action_space.sample()]) for index in range(len(state)): win_prob = state[index]*1.0/sum(state) if(win_prob > np.random.rand(1)): state_list[index] = state_list[index] - actions[index] + 1 else: state_list[index] = state_list[index] - actions[index] state = tuple(state_list) self.state = state self.market_value += sum(actions) * self.alpha if (action > 0): #selling reward = action * self.market_value - self.ob elif (action < 0): reward = action * self.market_value - self.os else: reward = 0 done = sum(state)==0 done = bool(done) return np.array(self.state), reward, done, {} def reset(self): self.state = self.observation_space.sample() print(self.state) self.steps_beyond_done = None self.market_value = 100 return np.array(self.state) def render(self, mode='human', close=False): return def close(self): """Override in your subclass to perform any necessary cleanup. Environments will automatically close() themselves when garbage collected or when the program exits. """ raise NotImplementedError() def seed(self, seed=None): """Sets the seed for this env's random number generator(s). # Returns Returns the list of seeds used in this env's random number generators """ raise NotImplementedError() def configure(self, *args, **kwargs): """Provides runtime configuration to the environment. This configuration should consist of data that tells your environment how to run (such as an address of a remote server, or path to your ImageNet data). It should not affect the semantics of the environment. """ raise NotImplementedError()
import torch import torch.optim as optim import torch.nn as nn from dnn import NeuralNetwork import config from action_space import ActionSpace # if gpu is to be used use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") # Instantiate action space actionSpace = ActionSpace() # Shortcuts Tensor = torch.Tensor LongTensor = torch.LongTensor class QNetAgent(object): def __init__(self): self.nn = NeuralNetwork().to(device) self.loss_func = nn.MSELoss() # self.loss_func = nn.SmoothL1Loss() self.optimizer = optim.Adam(params=self.nn.parameters(), lr=config.learning_rate) # self.optimizer = optim.RMSprop(params=mynn.parameters(), lr=learning_rate) def select_action(self, state, epsilon):
# orderbook_test = Orderbook(extraFeatures=False) # orderbook_test.loadFromBitfinexFile('orderbook_bitfinex_btcusd_view.tsv') # Load orderbook cols = ["ts", "seq", "size", "price", "is_bid", "is_trade", "ttype"] import pandas as pd events = pd.read_table('ob-1-small.tsv', sep='\t', names=cols, index_col="seq") d = Orderbook.generateDictFromEvents(events) orderbook = Orderbook() orderbook.loadFromDict(d) # clean first n states (due to lack of bids and asks) print("#States: " + str(len(orderbook.states))) for i in range(100): orderbook.states.pop(0) del d[list(d.keys())[0]] orderbook_test = orderbook #orderbook.plot() T = [0, 10, 20, 40, 60, 80, 100] #, 120, 240] T_test = [0, 10, 20, 40, 60, 80, 100] # 120, 240] I = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] actionSpace = ActionSpace(orderbook, side, T, I, ai, levels) actionSpace_test = ActionSpace(orderbook_test, side, T_test, I, ai, levels) #priceReturnCurve(crossval=1) from agent_utils.ui import UI UI.animate(run_profit, interval=100) # UI.animate(run_q_reward, interval=1000)
class BlockchainNetworkingEnv(gym.Env): SUCCESS_REWARD = 5 LATE_PROB = 1 MAX_ATTACK = 0.1 def __init__(self): # Channel parameters self.nb_channels = 4 self.idleChannel = 1 self.prob_switching = 0.9 self.channelObservation = None self.prob_late = BlockchainNetworkingEnv.LATE_PROB self.cost_channels = [0.1, 0.1, 0.1, 0.1] # Blockchain parameters self.mempool = Mempool() self.userTransaction = Transaction() self.lastBlock = Block() self.hashRate = None self.doubleSpendSuccess = None # System parameters self.nb_past_observations = 4 self.state_size = Mempool.NB_FEE_INTERVALS + 2 * self.nb_past_observations self.action_space = ActionSpace(self.nb_channels + 1) self.observation_space = StateSpace( (Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), ActionSpace(self.nb_channels + 1), ChannelSpace(), ActionSpace(self.nb_channels + 1), ChannelSpace(), ActionSpace(self.nb_channels + 1), ChannelSpace(), ActionSpace(self.nb_channels + 1), ChannelSpace())) # reward define self.totalReward = 0 self.successReward = 0 self.channelCost = 0 self.transactionFee = 0 self.cost = 0 self.viewer = None self.state = None self.steps_beyond_done = None def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): assert self.action_space.contains( action), "%r (%s) invalid" % (action, type(action)) # reset the rewards self.totalReward = 0 self.successReward = 0 self.channelCost = 0 self.transactionFee = 0 self.prob_late = None self.attacked = False state = list(self.state) # 1. User's transaction initialization self.userTransaction = Transaction() if (len(self.lastBlock.blockTransaction) != 0): self.userTransaction.estimateFee(self.lastBlock) # 2. The channel state changes - single idle channel, round robin switching if (np.random.rand() < self.prob_switching): self.idleChannel = (self.idleChannel + 1) % self.nb_channels # print(self.idleChannel) # 3. Mempool updates - some new transactions come self.mempool.generateNewTransactions() # if user does not submit transaction if (action == 0): self.totalReward = 0 self.channelObservation = 2 # miners mine a block self.lastBlock.mineBlock(self.mempool) # if user submits transaction else: self.channelCost = self.cost_channels[action - 1] # in case, channel is idle if ((action - 1) == self.idleChannel): self.prob_late = 0 self.channelObservation = 1 # if channel is busy, transaction can be late of mining process else: self.prob_late = BlockchainNetworkingEnv.LATE_PROB self.channelObservation = 0 # if the transaction comes late if (np.random.rand() < self.prob_late): # mining process occurs before user's transaction is added # 4. Miners start mining process, transactions which are included in Block will be removed from mempool self.lastBlock.mineBlock(self.mempool) self.mempool.listTransactions.append(self.userTransaction) self.transactionFee = self.userTransaction.transactionFee else: self.mempool.listTransactions.append(self.userTransaction) # 4. Miners start mining process, transactions which are included in Block will be removed from mempool self.lastBlock.mineBlock(self.mempool) self.transactionFee = self.userTransaction.transactionFee # 5. Attack process self.hashRate = np.random.uniform( 0, BlockchainNetworkingEnv.MAX_ATTACK) self.doubleSpendSuccess = 2 * self.hashRate if (np.random.rand() < self.doubleSpendSuccess): self.attacked = True # if user's transaction is successfully added inti the block -> reward=2 if (self.userTransaction in self.lastBlock.blockTransaction and not self.attacked): self.successReward = BlockchainNetworkingEnv.SUCCESS_REWARD self.totalReward = self.successReward - self.channelCost - self.transactionFee self.cost = self.channelCost + self.transactionFee # 6. determine new state self.mempool.updateMempoolState() for index in range(0, Mempool.NB_FEE_INTERVALS): state[index] = self.mempool.mempoolState[index] state.insert(Mempool.NB_FEE_INTERVALS, action) state.insert(Mempool.NB_FEE_INTERVALS + 1, self.channelObservation) state.pop() state.pop() self.state = tuple(state) done = False # print(np.array(self.state), [self.totalReward, self.cost], done, {}) return np.array(self.state), [ self.totalReward, self.channelCost, self.transactionFee, self.cost ], done, {} def reset(self): self.state = [] self.mempool.resetMempool() self.idleChannel = 1 for index in range(0, len(self.mempool.mempoolState)): self.state.append(self.mempool.mempoolState[index]) for obs_index in range(0, self.nb_past_observations): self.state.append(0) self.state.append(2) print(self.state) self.steps_beyond_done = None return np.array(self.state) def updateObservation(self): return def render(self, mode='human', close=False): return def close(self): """Override in your subclass to perform any necessary cleanup. Environments will automatically close() themselves when garbage collected or when the program exits. """ raise NotImplementedError() def seed(self, seed=None): """Sets the seed for this env's random number generator(s). # Returns Returns the list of seeds used in this env's random number generators """ raise NotImplementedError() def configure(self, *args, **kwargs): """Provides runtime configuration to the environment. This configuration should consist of data that tells your environment how to run (such as an address of a remote server, or path to your ImageNet data). It should not affect the semantics of the environment. """ raise NotImplementedError() # env = BlockchainNetworkingEnv() # env.reset() # for index in range(0, 50): # env.step(np.random.randint(0, env.nb_channels))
class BackscatterEnv3(gym.Env): TIME_FRAME = 10 BUSY_TIMESLOT = 4 DATA_RATE = 0.3 def __init__(self): # System parameters self.nb_ST = 3 self.state_size = 2 * self.nb_ST self.nb_actions = (BackscatterEnv3.BUSY_TIMESLOT + 1)**3 * ( BackscatterEnv3.TIME_FRAME - BackscatterEnv3.BUSY_TIMESLOT + 1)**2 self.action_space = ActionSpace( (Discrete(BackscatterEnv3.BUSY_TIMESLOT + 1), Discrete(BackscatterEnv3.BUSY_TIMESLOT + 1), Discrete(BackscatterEnv3.BUSY_TIMESLOT + 1), Discrete(BackscatterEnv3.TIME_FRAME - BackscatterEnv3.BUSY_TIMESLOT + 1), Discrete(BackscatterEnv3.TIME_FRAME - BackscatterEnv3.BUSY_TIMESLOT + 1))) self.observation_space = StateSpace( (Discrete(SecondTransmitor.QUEUE), Discrete( SecondTransmitor.ENERGY), Discrete(SecondTransmitor.QUEUE), Discrete(SecondTransmitor.ENERGY), Discrete(SecondTransmitor.QUEUE), Discrete(SecondTransmitor.ENERGY))) # initialize Second Transmitters self.ST1 = SecondTransmitor(data_rate=BackscatterEnv3.DATA_RATE) self.ST2 = SecondTransmitor(data_rate=BackscatterEnv3.DATA_RATE) self.ST3 = SecondTransmitor(data_rate=BackscatterEnv3.DATA_RATE) self.viewer = None self.state = None self.steps_beyond_done = None def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): assert self.action_space.contains( action), "%r (%s) invalid" % (action, type(action)) harvest = action[0] backscatter_time_1 = action[1] backscatter_time_2 = action[2] transmit_time_1 = action[3] transmit_time_2 = action[4] backscatter_time_3 = BackscatterEnv3.BUSY_TIMESLOT - harvest - backscatter_time_1 - backscatter_time_2 transmit_time_3 = BackscatterEnv3.TIME_FRAME - BackscatterEnv3.BUSY_TIMESLOT - transmit_time_1 - transmit_time_2 reward = 0 if ((backscatter_time_3 >= 0) and (transmit_time_3 >= 0)): harvest_time_1 = BackscatterEnv3.BUSY_TIMESLOT - backscatter_time_1 harvest_time_2 = BackscatterEnv3.BUSY_TIMESLOT - backscatter_time_2 harvest_time_3 = BackscatterEnv3.BUSY_TIMESLOT - backscatter_time_3 reward += self.ST1.update(harvest_time_1, backscatter_time_1, transmit_time_1) reward += self.ST2.update(harvest_time_2, backscatter_time_2, transmit_time_2) reward += self.ST3.update(harvest_time_3, backscatter_time_3, transmit_time_3) throughtput = reward datawaiting_before = self.ST1.queue self.ST1.generateData() self.ST2.generateData() self.ST3.generateData() datawaiting = self.ST1.queue state = [ self.ST1.queue, self.ST1.energy, self.ST2.queue, self.ST2.energy, self.ST3.queue, self.ST3.energy ] self.state = tuple(state) else: # in case, assignment is not suitable reward = -10 throughtput = 0 datawaiting_before = self.ST1.queue if (self.ST1.queue == SecondTransmitor.QUEUE and self.ST2.queue == SecondTransmitor.QUEUE and self.ST3.queue == SecondTransmitor.QUEUE): self.ST1.reset() self.ST2.reset() self.ST3.reset() else: self.ST1.generateData() self.ST2.generateData() self.ST3.generateData() datawaiting = self.ST1.queue state = [ self.ST1.queue, self.ST1.energy, self.ST2.queue, self.ST2.energy, self.ST3.queue, self.ST3.energy ] self.state = tuple(state) print(np.array(self.state), reward, datawaiting, action) done = False # print(np.array(self.state), reward, done, {}) return np.array(self.state), [ reward, throughtput, datawaiting_before, datawaiting ], done, {} def reset(self): self.state = [] self.ST1.reset() self.ST2.reset() self.ST3.reset() state = [ self.ST1.queue, self.ST1.energy, self.ST2.queue, self.ST2.energy, self.ST3.queue, self.ST3.energy ] self.state = tuple(state) print(self.state) self.steps_beyond_done = None return np.array(self.state) def updateObservation(self): return def render(self, mode='human', close=False): return def close(self): """Override in your subclass to perform any necessary cleanup. Environments will automatically close() themselves when garbage collected or when the program exits. """ raise NotImplementedError() def seed(self, seed=None): """Sets the seed for this env's random number generator(s). # Returns Returns the list of seeds used in this env's random number generators """ raise NotImplementedError() def configure(self, *args, **kwargs): """Provides runtime configuration to the environment. This configuration should consist of data that tells your environment how to run (such as an address of a remote server, or path to your ImageNet data). It should not affect the semantics of the environment. """ raise NotImplementedError() # env = BackscatterEnv3() # env.reset() # for index in range(0, 1000): # env.step(env.action_space.sample())
class BCnetenv(gym.Env): ''' Actions: Type: MultiDiscrete form. 1) Block_Size(shard) : Discrete 4 - 2MB[0], 4MB[1], 6MB[2], 8MB[3], - params: min: 2, max: 8 (megabytes) 2) Time Interval : Discrete 4 - 2[0] , 4[1], 6[2], 8[3] - params: min: 2, max: 8 (seconds) 3) number of shard (K) : Discrete 4 - 1[0], 2[1], 4[2], 8[3] - params: min: 1, max: 8 MultiDiscrete([ 4, 4, 4 ]) -> we use discrete expression (64) 0, 0 ,0 ->0 0, 0, 1 ->1 0, 0, 2 ->2 ... 3, 3, 3 -> 63 state space: Type: Num state Min Max format 0 data transmission link 10MHZ 100MHZ nxn 1 computing capability 10GHZ 30GHZ nx1 2 consensus history 0 1 nxn 3 estimated faulty probability 0 1/3 nx1 : Type: Box(2) num observation min max 0 latency 0 48 1 required shard limit 1 8 ''' def __init__(self): # Simulation parameters self.nb_nodes = 200 self.tx_size = 200 #bytes self.B_max = 8 #Megabytes self.Ti_amx = 8 #seconds self.K_max = 8 # maximum shard number self.sign = 2 # MHZ self.MAC = 1 # MHZ self.batchsize = 3 self.u = 6 # consecutive block confirm self.trans_prob = 0.1 # Transition Probability in Finitie Markov Chain # define action space & observation_space self.action_space = ActionSpace(64) self.observation_space = spaces.Box(low=np.array([0, 1]), high=np.array([48, 8]), dtype=np.float32) self.seed() self.viewer = None self.state = None self.steps_beyond_done = None # state 불러오기. ShardDist함수의 return값을 각각 R,c,H,e_prob로 할당하도록함. # 여기서 state는 main의 진짜 state에 필요한 구성요소들을 각각 업데이트하는것임. self.R_transmission = None self.c_computing = None self.H_history = None self.e_prob = None def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): # step 함수 진행하면서 state space 를 update assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action)) state = self.state R, C, H, e_prob = state # state라는 지역변수에 self.state 입력. (추후 self.state = 를 다시 입력해 state를 업뎃해야함 # state는 R,C,H,e_prob 로 구성 # action 진행시 현재 state를 받아오고, 해당 action을 import한 뒤 다음 state를 출력할 수 있어야함. ## 선택된 action에 대한 local variable 반환 a=action//16 # action을 16으로 나눈 몫 b=(action%16)//4 # action을 16으로 나눈 나머지를 다시 4로나눈 몫 c=action%4 # action을 4로 나눈 나머지 b_size = 2*(a+1) # block size 2,4,6,8 t_interval = 2*(b+1) # time interval 2,4,6,8 n_shard = 2**c # number of shard 1,2,4,8 # R 업데이트 finite markov channel 기반. for i in range (0,self.nb_nodes): for j in range (i+1,self.nb_nodes): random_number = random.random() if (R[i,j] == 10*(10**6)): if (random_number < self.trans_prob): R[i,j] += 10*(10**6) R[j,i] = R[i,j] elif (R[i,j] == 100*(10**6)): if (random_number < self.trans_prob): R[i,j] -= 10*(10**6) R[j,i] = R[i,j] else : if (random_number < self.trans_prob): R[i,j] += 10*(10**6) R[j,i] = R[i,j] elif (self.trans_prob <= random_number < 2*self.trans_prob): R[i,j] -= 10*(10**6) R[j,i] = R[i,j] # C 업데이트 finite markov channel 기반. for i in range (0,self.nb_nodes): random_number = random.random() if (C[i] == 10*(10**9)): if (random_number < self.trans_prob): C[i] += 5*(10**9) elif (C[i] == 30*(10**9)): if (random_number < self.trans_prob): C[i] -= 5*(10**9) else : if (random_number < self.trans_prob): C[i] += 5*(10**9) elif (self.trans_prob <= random_number < 2*self.trans_prob): C[i] -= 5*(10**9) # H, e_prob 계산. (ShardDist) env2 = ShardDistribute() H,e_prob,NodesInShard = env2.ShardDist(n_shard) self.state = [R, C, H, e_prob] ## 여기서 e_prob를 받았는데 nx1형식임. # 뒤에 constraint에 넣기위해 float32로 바꿔줌 e_p = e_prob[0,0] # constraint에 쓸 변수를 여기서 미리 불러옴 ### latency computation # latency 계산시 R,c로부터 max/min값을 추출하여 latency 세부요소들 전부 계산 # M,theta,C_numb,alpha,B,timeout 값 설 M = 3 theta = 2*(10**6) C_numb = len(NodesInShard[n_shard]) alpha = 10**6 B = b_size timeout = 1000000000000000000000000 # no timeout # 1) Intra 샤드에서 validation time 계산 T_k_in_val = [] primary = [] for K in range(n_shard): primary.append(NodesInShard[K][random.randint(0,len(NodesInShard[K])-1)]) T_in_val = [] for i in NodesInShard[K]: if (i == primary[K]): T_in_val.append((M*theta + (M*(1+C_numb) + 4*(len(NodesInShard[K])-1))*alpha) / C[i][0]) else : T_in_val.append((M*theta + (C_numb*M + 4*(len(NodesInShard[K])-1))*alpha) / C[i][0]) T_k_in_val.append(max(T_in_val)) T_k_in_val = (1/M)*max(T_k_in_val) # 2) Intra 샤드에서 propagation time 계산 T_k_in_prop = [] for K in range(n_shard): T_in_preprepare = [] T_in_prepare = [] T_in_commit = [] for i in NodesInShard[K]: for j in NodesInShard[K]: if (j != i): if (i == primary[K]): T_in_preprepare.append((M*B)/R[i,j]) else : T_in_prepare.append((M*B)/R[i,j]) T_in_commit.append((M*B)/R[i,j]) T_k_in_prop.append( min(max(T_in_preprepare),timeout) + min(max(T_in_prepare),timeout) + min(max(T_in_commit),timeout) ) T_k_in_prop = (1/M)*max(T_k_in_prop) # 3) DC (Final shard)에서 validation time 계산 primary_DC = NodesInShard[n_shard][random.randint(0,len(NodesInShard[n_shard])-1)] T_k_f_val = [] for i in NodesInShard[n_shard]: if (i == primary_DC): T_k_f_val.append((n_shard*M*theta + (n_shard*M + 4*(C_numb-1) + (self.nb_nodes-C_numb)*M)*alpha) / C[i][0]) else : T_k_f_val.append((n_shard*M*theta + (4*(C_numb-1) + (self.nb_nodes-C_numb)*M)*alpha) / C[i][0]) T_k_f_val = (1/M)*max(T_k_f_val) # 4) DC (Final shard)에서 propagation time 계산 T_k_f_request = [] T_k_f_preprepare = [] T_k_f_prepare = [] T_k_f_commit = [] T_k_f_reply = [] for i in primary: for j in NodesInShard[n_shard]: T_k_f_request.append((M*B)/R[i,j]) for i in NodesInShard[n_shard]: for j in NodesInShard[n_shard]: if (j != i): if (i == primary_DC): T_k_f_preprepare.append((M*B)/R[i,j]) else: T_k_f_prepare.append((M*B)/R[i,j]) T_k_f_commit.append((M*B)/R[i,j]) for i in NodesInShard[n_shard]: for j in primary: T_k_f_reply.append((M*B)/R[i,j]) T_k_f_prop = (1/M)*(min(max(T_k_f_request),timeout) + min(max(T_k_f_preprepare),timeout) + min(max(T_k_f_prepare),timeout) + min(max(T_k_f_commit),timeout) + min(max(T_k_f_request),timeout)) # 최종 latency의 값은 block interval + 위 4가지 time Tlatency = t_interval + (T_k_in_val + T_k_in_prop + T_k_f_val + T_k_f_prop) ### constraint (latency & shard) done = Tlatency > self.u * t_interval \ or n_shard >= (self.nb_nodes*(1-(3*e_p))-1)/(3*self.nb_nodes*e_p + 1) done = bool(done) #done 이 1인경우, 즉 끝났다면(조건을 위반하여) reward는 0 #done이 0인 경우, reward는 TPS를 반영한다. if not done: reward = (n_shard * (math.floor((b_size/self.tx_size)*1024*1024)))/t_interval elif self.steps_beyond_done is None: ## step beyond done? self.steps_beyond_done = 0 reward = 1.0 else: if self.steps_beyond_done == 0: logger.warn("You are calling 'step()' even though this environment has already returned done = True. " "You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.") self.steps_beyond_done += 1 reward = 0.0 ##### state change 적용해서 R,C,H,e_prop 업데이트. ### 리턴되어야 하는 값은, action이 들어왔을때, 변경된 R,C,H,e-Prob return self.state, reward, done, {} ## 카트폴에서는 self.state에 np.array가 있는데, 여기서는 풀어줘야함 # 왜냐하면, R,C,H,e_prob는 모두 차원이 달라 np.array를 쓰면 차원오류가 뜸. # 144번라인에서 self.state = [R, C, H, e_prob] 에 의해, 각 원소들이 state로 들어감. # state[0], state[1], state[2], state[3]을 통해 각각 R,C,H,e_prob를 반환할 수 있다. def reset(self): # state space - > R,c,H reset. R_transmission = np.zeros((self.nb_nodes,self.nb_nodes)) c_computing = np.zeros((self.nb_nodes,1)) for i in range (0,self.nb_nodes): for j in range (i+1,self.nb_nodes): R_transmission[i,j] = random.randrange(10,101,10) R_transmission[j,i] = R_transmission[i,j] R_transmission = (10**6)*R_transmission for i in range (0,self.nb_nodes): c_computing[i] = random.randrange(10,31,5) c_computing = (10**9)*c_computing n_shard = 2**(random.randrange(1,5)-1) env2 = ShardDistribute() H,e_prob,NodesInShard = env2.ShardDist(n_shard) self.state = [R_transmission, c_computing, H, e_prob] return self.state
from experience_replay import ExperienceReplay from qnet import QNetAgent from torch.utils.tensorboard import SummaryWriter # if gpu is to be used use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") Tensor = torch.Tensor LongTensor = torch.LongTensor random_seed = 42 torch.manual_seed(random_seed) random.seed(random_seed) writer = SummaryWriter() actionSpace = ActionSpace() memory = ExperienceReplay(config.replay_mem_size) qnet_agent = QNetAgent() steps_total = [] frames_total = 0 solved_after = 0 solved = False start_time = time.time() # Main loop step = 0 total_reward = 0 done = False