class BackscatterEnv3(gym.Env): TIME_FRAME = 10 BUSY_TIMESLOT = 4 DATA_RATE = 0.3 def __init__(self): # System parameters self.nb_ST = 3 self.state_size = 2 * self.nb_ST self.nb_actions = (BackscatterEnv3.BUSY_TIMESLOT + 1)**3 * ( BackscatterEnv3.TIME_FRAME - BackscatterEnv3.BUSY_TIMESLOT + 1)**2 self.action_space = ActionSpace( (Discrete(BackscatterEnv3.BUSY_TIMESLOT + 1), Discrete(BackscatterEnv3.BUSY_TIMESLOT + 1), Discrete(BackscatterEnv3.BUSY_TIMESLOT + 1), Discrete(BackscatterEnv3.TIME_FRAME - BackscatterEnv3.BUSY_TIMESLOT + 1), Discrete(BackscatterEnv3.TIME_FRAME - BackscatterEnv3.BUSY_TIMESLOT + 1))) self.observation_space = StateSpace( (Discrete(SecondTransmitor.QUEUE), Discrete( SecondTransmitor.ENERGY), Discrete(SecondTransmitor.QUEUE), Discrete(SecondTransmitor.ENERGY), Discrete(SecondTransmitor.QUEUE), Discrete(SecondTransmitor.ENERGY))) # initialize Second Transmitters self.ST1 = SecondTransmitor(data_rate=BackscatterEnv3.DATA_RATE) self.ST2 = SecondTransmitor(data_rate=BackscatterEnv3.DATA_RATE) self.ST3 = SecondTransmitor(data_rate=BackscatterEnv3.DATA_RATE) self.viewer = None self.state = None self.steps_beyond_done = None def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): assert self.action_space.contains( action), "%r (%s) invalid" % (action, type(action)) harvest = action[0] backscatter_time_1 = action[1] backscatter_time_2 = action[2] transmit_time_1 = action[3] transmit_time_2 = action[4] backscatter_time_3 = BackscatterEnv3.BUSY_TIMESLOT - harvest - backscatter_time_1 - backscatter_time_2 transmit_time_3 = BackscatterEnv3.TIME_FRAME - BackscatterEnv3.BUSY_TIMESLOT - transmit_time_1 - transmit_time_2 reward = 0 if ((backscatter_time_3 >= 0) and (transmit_time_3 >= 0)): harvest_time_1 = BackscatterEnv3.BUSY_TIMESLOT - backscatter_time_1 harvest_time_2 = BackscatterEnv3.BUSY_TIMESLOT - backscatter_time_2 harvest_time_3 = BackscatterEnv3.BUSY_TIMESLOT - backscatter_time_3 reward += self.ST1.update(harvest_time_1, backscatter_time_1, transmit_time_1) reward += self.ST2.update(harvest_time_2, backscatter_time_2, transmit_time_2) reward += self.ST3.update(harvest_time_3, backscatter_time_3, transmit_time_3) throughtput = reward datawaiting_before = self.ST1.queue self.ST1.generateData() self.ST2.generateData() self.ST3.generateData() datawaiting = self.ST1.queue state = [ self.ST1.queue, self.ST1.energy, self.ST2.queue, self.ST2.energy, self.ST3.queue, self.ST3.energy ] self.state = tuple(state) else: # in case, assignment is not suitable reward = -10 throughtput = 0 datawaiting_before = self.ST1.queue if (self.ST1.queue == SecondTransmitor.QUEUE and self.ST2.queue == SecondTransmitor.QUEUE and self.ST3.queue == SecondTransmitor.QUEUE): self.ST1.reset() self.ST2.reset() self.ST3.reset() else: self.ST1.generateData() self.ST2.generateData() self.ST3.generateData() datawaiting = self.ST1.queue state = [ self.ST1.queue, self.ST1.energy, self.ST2.queue, self.ST2.energy, self.ST3.queue, self.ST3.energy ] self.state = tuple(state) print(np.array(self.state), reward, datawaiting, action) done = False # print(np.array(self.state), reward, done, {}) return np.array(self.state), [ reward, throughtput, datawaiting_before, datawaiting ], done, {} def reset(self): self.state = [] self.ST1.reset() self.ST2.reset() self.ST3.reset() state = [ self.ST1.queue, self.ST1.energy, self.ST2.queue, self.ST2.energy, self.ST3.queue, self.ST3.energy ] self.state = tuple(state) print(self.state) self.steps_beyond_done = None return np.array(self.state) def updateObservation(self): return def render(self, mode='human', close=False): return def close(self): """Override in your subclass to perform any necessary cleanup. Environments will automatically close() themselves when garbage collected or when the program exits. """ raise NotImplementedError() def seed(self, seed=None): """Sets the seed for this env's random number generator(s). # Returns Returns the list of seeds used in this env's random number generators """ raise NotImplementedError() def configure(self, *args, **kwargs): """Provides runtime configuration to the environment. This configuration should consist of data that tells your environment how to run (such as an address of a remote server, or path to your ImageNet data). It should not affect the semantics of the environment. """ raise NotImplementedError() # env = BackscatterEnv3() # env.reset() # for index in range(0, 1000): # env.step(env.action_space.sample())
class BlockchainEnv(gym.Env): def __init__(self): self.action_space = ActionSpace(3) self.observation_space = spaces.Tuple((Discrete(100), Discrete(100), Discrete(100))) # self.seed() self.viewer = None self.state = None self.market_value = 100 self.alpha = -0.05 self.ob = 0.1 self.os = 0.15 self.steps_beyond_done = None def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action)) state = self.state state_list = list(state) action = min(action,state_list[0]) actions = np.array([action, self.action_space.sample(), self.action_space.sample()]) for index in range(len(state)): win_prob = state[index]*1.0/sum(state) if(win_prob > np.random.rand(1)): state_list[index] = state_list[index] - actions[index] + 1 else: state_list[index] = state_list[index] - actions[index] state = tuple(state_list) self.state = state self.market_value += sum(actions) * self.alpha if (action > 0): #selling reward = action * self.market_value - self.ob elif (action < 0): reward = action * self.market_value - self.os else: reward = 0 done = sum(state)==0 done = bool(done) return np.array(self.state), reward, done, {} def reset(self): self.state = self.observation_space.sample() print(self.state) self.steps_beyond_done = None self.market_value = 100 return np.array(self.state) def render(self, mode='human', close=False): return def close(self): """Override in your subclass to perform any necessary cleanup. Environments will automatically close() themselves when garbage collected or when the program exits. """ raise NotImplementedError() def seed(self, seed=None): """Sets the seed for this env's random number generator(s). # Returns Returns the list of seeds used in this env's random number generators """ raise NotImplementedError() def configure(self, *args, **kwargs): """Provides runtime configuration to the environment. This configuration should consist of data that tells your environment how to run (such as an address of a remote server, or path to your ImageNet data). It should not affect the semantics of the environment. """ raise NotImplementedError()
class BCnetenv(gym.Env): ''' Actions: Type: MultiDiscrete form. 1) Block_Size(shard) : Discrete 4 - 2MB[0], 4MB[1], 6MB[2], 8MB[3], - params: min: 2, max: 8 (megabytes) 2) Time Interval : Discrete 4 - 2[0] , 4[1], 6[2], 8[3] - params: min: 2, max: 8 (seconds) 3) number of shard (K) : Discrete 4 - 1[0], 2[1], 4[2], 8[3] - params: min: 1, max: 8 MultiDiscrete([ 4, 4, 4 ]) -> we use discrete expression (64) 0, 0 ,0 ->0 0, 0, 1 ->1 0, 0, 2 ->2 ... 3, 3, 3 -> 63 state space: Type: Num state Min Max format 0 data transmission link 10MHZ 100MHZ nxn 1 computing capability 10GHZ 30GHZ nx1 2 consensus history 0 1 nxn 3 estimated faulty probability 0 1/3 nx1 : Type: Box(2) num observation min max 0 latency 0 48 1 required shard limit 1 8 ''' def __init__(self): # Simulation parameters self.nb_nodes = 200 self.tx_size = 200 #bytes self.B_max = 8 #Megabytes self.Ti_amx = 8 #seconds self.K_max = 8 # maximum shard number self.sign = 2 # MHZ self.MAC = 1 # MHZ self.batchsize = 3 self.u = 6 # consecutive block confirm self.trans_prob = 0.1 # Transition Probability in Finitie Markov Chain # define action space & observation_space self.action_space = ActionSpace(64) self.observation_space = spaces.Box(low=np.array([0, 1]), high=np.array([48, 8]), dtype=np.float32) self.seed() self.viewer = None self.state = None self.steps_beyond_done = None # state 불러오기. ShardDist함수의 return값을 각각 R,c,H,e_prob로 할당하도록함. # 여기서 state는 main의 진짜 state에 필요한 구성요소들을 각각 업데이트하는것임. self.R_transmission = None self.c_computing = None self.H_history = None self.e_prob = None def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): # step 함수 진행하면서 state space 를 update assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action)) state = self.state R, C, H, e_prob = state # state라는 지역변수에 self.state 입력. (추후 self.state = 를 다시 입력해 state를 업뎃해야함 # state는 R,C,H,e_prob 로 구성 # action 진행시 현재 state를 받아오고, 해당 action을 import한 뒤 다음 state를 출력할 수 있어야함. ## 선택된 action에 대한 local variable 반환 a=action//16 # action을 16으로 나눈 몫 b=(action%16)//4 # action을 16으로 나눈 나머지를 다시 4로나눈 몫 c=action%4 # action을 4로 나눈 나머지 b_size = 2*(a+1) # block size 2,4,6,8 t_interval = 2*(b+1) # time interval 2,4,6,8 n_shard = 2**c # number of shard 1,2,4,8 # R 업데이트 finite markov channel 기반. for i in range (0,self.nb_nodes): for j in range (i+1,self.nb_nodes): random_number = random.random() if (R[i,j] == 10*(10**6)): if (random_number < self.trans_prob): R[i,j] += 10*(10**6) R[j,i] = R[i,j] elif (R[i,j] == 100*(10**6)): if (random_number < self.trans_prob): R[i,j] -= 10*(10**6) R[j,i] = R[i,j] else : if (random_number < self.trans_prob): R[i,j] += 10*(10**6) R[j,i] = R[i,j] elif (self.trans_prob <= random_number < 2*self.trans_prob): R[i,j] -= 10*(10**6) R[j,i] = R[i,j] # C 업데이트 finite markov channel 기반. for i in range (0,self.nb_nodes): random_number = random.random() if (C[i] == 10*(10**9)): if (random_number < self.trans_prob): C[i] += 5*(10**9) elif (C[i] == 30*(10**9)): if (random_number < self.trans_prob): C[i] -= 5*(10**9) else : if (random_number < self.trans_prob): C[i] += 5*(10**9) elif (self.trans_prob <= random_number < 2*self.trans_prob): C[i] -= 5*(10**9) # H, e_prob 계산. (ShardDist) env2 = ShardDistribute() H,e_prob,NodesInShard = env2.ShardDist(n_shard) self.state = [R, C, H, e_prob] ## 여기서 e_prob를 받았는데 nx1형식임. # 뒤에 constraint에 넣기위해 float32로 바꿔줌 e_p = e_prob[0,0] # constraint에 쓸 변수를 여기서 미리 불러옴 ### latency computation # latency 계산시 R,c로부터 max/min값을 추출하여 latency 세부요소들 전부 계산 # M,theta,C_numb,alpha,B,timeout 값 설 M = 3 theta = 2*(10**6) C_numb = len(NodesInShard[n_shard]) alpha = 10**6 B = b_size timeout = 1000000000000000000000000 # no timeout # 1) Intra 샤드에서 validation time 계산 T_k_in_val = [] primary = [] for K in range(n_shard): primary.append(NodesInShard[K][random.randint(0,len(NodesInShard[K])-1)]) T_in_val = [] for i in NodesInShard[K]: if (i == primary[K]): T_in_val.append((M*theta + (M*(1+C_numb) + 4*(len(NodesInShard[K])-1))*alpha) / C[i][0]) else : T_in_val.append((M*theta + (C_numb*M + 4*(len(NodesInShard[K])-1))*alpha) / C[i][0]) T_k_in_val.append(max(T_in_val)) T_k_in_val = (1/M)*max(T_k_in_val) # 2) Intra 샤드에서 propagation time 계산 T_k_in_prop = [] for K in range(n_shard): T_in_preprepare = [] T_in_prepare = [] T_in_commit = [] for i in NodesInShard[K]: for j in NodesInShard[K]: if (j != i): if (i == primary[K]): T_in_preprepare.append((M*B)/R[i,j]) else : T_in_prepare.append((M*B)/R[i,j]) T_in_commit.append((M*B)/R[i,j]) T_k_in_prop.append( min(max(T_in_preprepare),timeout) + min(max(T_in_prepare),timeout) + min(max(T_in_commit),timeout) ) T_k_in_prop = (1/M)*max(T_k_in_prop) # 3) DC (Final shard)에서 validation time 계산 primary_DC = NodesInShard[n_shard][random.randint(0,len(NodesInShard[n_shard])-1)] T_k_f_val = [] for i in NodesInShard[n_shard]: if (i == primary_DC): T_k_f_val.append((n_shard*M*theta + (n_shard*M + 4*(C_numb-1) + (self.nb_nodes-C_numb)*M)*alpha) / C[i][0]) else : T_k_f_val.append((n_shard*M*theta + (4*(C_numb-1) + (self.nb_nodes-C_numb)*M)*alpha) / C[i][0]) T_k_f_val = (1/M)*max(T_k_f_val) # 4) DC (Final shard)에서 propagation time 계산 T_k_f_request = [] T_k_f_preprepare = [] T_k_f_prepare = [] T_k_f_commit = [] T_k_f_reply = [] for i in primary: for j in NodesInShard[n_shard]: T_k_f_request.append((M*B)/R[i,j]) for i in NodesInShard[n_shard]: for j in NodesInShard[n_shard]: if (j != i): if (i == primary_DC): T_k_f_preprepare.append((M*B)/R[i,j]) else: T_k_f_prepare.append((M*B)/R[i,j]) T_k_f_commit.append((M*B)/R[i,j]) for i in NodesInShard[n_shard]: for j in primary: T_k_f_reply.append((M*B)/R[i,j]) T_k_f_prop = (1/M)*(min(max(T_k_f_request),timeout) + min(max(T_k_f_preprepare),timeout) + min(max(T_k_f_prepare),timeout) + min(max(T_k_f_commit),timeout) + min(max(T_k_f_request),timeout)) # 최종 latency의 값은 block interval + 위 4가지 time Tlatency = t_interval + (T_k_in_val + T_k_in_prop + T_k_f_val + T_k_f_prop) ### constraint (latency & shard) done = Tlatency > self.u * t_interval \ or n_shard >= (self.nb_nodes*(1-(3*e_p))-1)/(3*self.nb_nodes*e_p + 1) done = bool(done) #done 이 1인경우, 즉 끝났다면(조건을 위반하여) reward는 0 #done이 0인 경우, reward는 TPS를 반영한다. if not done: reward = (n_shard * (math.floor((b_size/self.tx_size)*1024*1024)))/t_interval elif self.steps_beyond_done is None: ## step beyond done? self.steps_beyond_done = 0 reward = 1.0 else: if self.steps_beyond_done == 0: logger.warn("You are calling 'step()' even though this environment has already returned done = True. " "You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.") self.steps_beyond_done += 1 reward = 0.0 ##### state change 적용해서 R,C,H,e_prop 업데이트. ### 리턴되어야 하는 값은, action이 들어왔을때, 변경된 R,C,H,e-Prob return self.state, reward, done, {} ## 카트폴에서는 self.state에 np.array가 있는데, 여기서는 풀어줘야함 # 왜냐하면, R,C,H,e_prob는 모두 차원이 달라 np.array를 쓰면 차원오류가 뜸. # 144번라인에서 self.state = [R, C, H, e_prob] 에 의해, 각 원소들이 state로 들어감. # state[0], state[1], state[2], state[3]을 통해 각각 R,C,H,e_prob를 반환할 수 있다. def reset(self): # state space - > R,c,H reset. R_transmission = np.zeros((self.nb_nodes,self.nb_nodes)) c_computing = np.zeros((self.nb_nodes,1)) for i in range (0,self.nb_nodes): for j in range (i+1,self.nb_nodes): R_transmission[i,j] = random.randrange(10,101,10) R_transmission[j,i] = R_transmission[i,j] R_transmission = (10**6)*R_transmission for i in range (0,self.nb_nodes): c_computing[i] = random.randrange(10,31,5) c_computing = (10**9)*c_computing n_shard = 2**(random.randrange(1,5)-1) env2 = ShardDistribute() H,e_prob,NodesInShard = env2.ShardDist(n_shard) self.state = [R_transmission, c_computing, H, e_prob] return self.state
class FederatedLearningEnv(gym.Env): TIME_LIMIT = 10000 DATA_LIMIT = 1500 def __init__(self): # System parameters self.nb_MB = 3 self.state_size = 2 * self.nb_MB self.nb_actions = (Mobile.MAX_DATA + 1) ** self.nb_MB * (Mobile.MAX_ENERGY + 1) ** self.nb_MB self.action_space = ActionSpace((Discrete(Mobile.MAX_DATA + 1), Discrete(Mobile.MAX_ENERGY + 1), Discrete(Mobile.MAX_DATA + 1), Discrete(Mobile.MAX_ENERGY + 1), Discrete(Mobile.MAX_DATA + 1), Discrete(Mobile.MAX_ENERGY + 1) )) self.observation_space = StateSpace((Discrete(Mobile.MAX_CPU), Discrete(Mobile.MAX_ENERGY), Discrete(Mobile.MAX_CPU), Discrete(Mobile.MAX_ENERGY), Discrete(Mobile.MAX_CPU), Discrete(Mobile.MAX_ENERGY))) # initialize Second Transmitters self.MB1 = Mobile() self.MB2 = Mobile() self.MB3 = Mobile() self.max_data = self.nb_MB * Mobile.MAX_DATA self.max_energy = self.nb_MB * Mobile.MAX_ENERGY self.max_latency = Mobile.MAX_LATENCY self.training_time = 0 self.training_data = 0 self.viewer = None self.state = None self.steps_beyond_done = None def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action)) data_required1 = action[0] energy_required1 = action[1] data_required2 = action[2] energy_required2 = action[3] data_required3 = action[4] energy_required3 = action[5] data1, latency1, energy_consumption1, fault1 = self.MB1.update(data_required1, energy_required1) data2, latency2, energy_consumption2, fault2 = self.MB2.update(data_required2, energy_required2) data3, latency3, energy_consumption3, fault3 = self.MB3.update(data_required3, energy_required3) data = data1 + data2 + data3 latency = max(latency1, latency2, latency3) energy_consumption = energy_consumption1 + energy_consumption2 + energy_consumption3 fault = fault1 + fault2 + fault3 state = [self.MB1.CPU_shared, self.MB1.energy, self.MB2.CPU_shared, self.MB2.energy, self.MB3.CPU_shared, self.MB3.energy] # print (state) self.state = tuple(state) self.training_data += data self.training_time += latency reward = 10 * (5 * data/self.max_data - latency/self.max_latency - energy_consumption/self.max_energy) + fault if (self.training_data > FederatedLearningEnv.DATA_LIMIT): done = True else: done = False # if (fault < 0): # print (fault) # print(np.array(self.state), action, [reward, data, latency, energy_consumption, fault], done) reward /= 10 return np.array(self.state), [reward, data, latency, energy_consumption, data1, data2, data3], done, {} def reset(self): self.state = [] self.MB1.reset() self.MB2.reset() self.MB3.reset() state = [self.MB1.CPU_shared, self.MB1.energy, self.MB2.CPU_shared, self.MB2.energy, self.MB3.CPU_shared, self.MB3.energy] self.state = tuple(state) self.training_time = 0 self.training_data = 0 print(self.state) self.steps_beyond_done = None return np.array(self.state) def updateObservation(self): return def render(self, mode='human', close=False): return def close(self): """Override in your subclass to perform any necessary cleanup. Environments will automatically close() themselves when garbage collected or when the program exits. """ raise NotImplementedError() def seed(self, seed=None): """Sets the seed for this env's random number generator(s). # Returns Returns the list of seeds used in this env's random number generators """ raise NotImplementedError() def configure(self, *args, **kwargs): """Provides runtime configuration to the environment. This configuration should consist of data that tells your environment how to run (such as an address of a remote server, or path to your ImageNet data). It should not affect the semantics of the environment. """ raise NotImplementedError() # env = FederatedLearningEnv() # env.reset() # for index in range(0, 100): # env.step(env.action_space.sample())
class BlockchainNetworkingEnv(gym.Env): SUCCESS_REWARD = 5 LATE_PROB = 1 MAX_ATTACK = 0.1 def __init__(self): # Channel parameters self.nb_channels = 4 self.idleChannel = 1 self.prob_switching = 0.9 self.channelObservation = None self.prob_late = BlockchainNetworkingEnv.LATE_PROB self.cost_channels = [0.1, 0.1, 0.1, 0.1] # Blockchain parameters self.mempool = Mempool() self.userTransaction = Transaction() self.lastBlock = Block() self.hashRate = None self.doubleSpendSuccess = None # System parameters self.nb_past_observations = 4 self.state_size = Mempool.NB_FEE_INTERVALS + 2 * self.nb_past_observations self.action_space = ActionSpace(self.nb_channels + 1) self.observation_space = StateSpace( (Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), Discrete(Mempool.MAX_FEE), ActionSpace(self.nb_channels + 1), ChannelSpace(), ActionSpace(self.nb_channels + 1), ChannelSpace(), ActionSpace(self.nb_channels + 1), ChannelSpace(), ActionSpace(self.nb_channels + 1), ChannelSpace())) # reward define self.totalReward = 0 self.successReward = 0 self.channelCost = 0 self.transactionFee = 0 self.cost = 0 self.viewer = None self.state = None self.steps_beyond_done = None def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): assert self.action_space.contains( action), "%r (%s) invalid" % (action, type(action)) # reset the rewards self.totalReward = 0 self.successReward = 0 self.channelCost = 0 self.transactionFee = 0 self.prob_late = None self.attacked = False state = list(self.state) # 1. User's transaction initialization self.userTransaction = Transaction() if (len(self.lastBlock.blockTransaction) != 0): self.userTransaction.estimateFee(self.lastBlock) # 2. The channel state changes - single idle channel, round robin switching if (np.random.rand() < self.prob_switching): self.idleChannel = (self.idleChannel + 1) % self.nb_channels # print(self.idleChannel) # 3. Mempool updates - some new transactions come self.mempool.generateNewTransactions() # if user does not submit transaction if (action == 0): self.totalReward = 0 self.channelObservation = 2 # miners mine a block self.lastBlock.mineBlock(self.mempool) # if user submits transaction else: self.channelCost = self.cost_channels[action - 1] # in case, channel is idle if ((action - 1) == self.idleChannel): self.prob_late = 0 self.channelObservation = 1 # if channel is busy, transaction can be late of mining process else: self.prob_late = BlockchainNetworkingEnv.LATE_PROB self.channelObservation = 0 # if the transaction comes late if (np.random.rand() < self.prob_late): # mining process occurs before user's transaction is added # 4. Miners start mining process, transactions which are included in Block will be removed from mempool self.lastBlock.mineBlock(self.mempool) self.mempool.listTransactions.append(self.userTransaction) self.transactionFee = self.userTransaction.transactionFee else: self.mempool.listTransactions.append(self.userTransaction) # 4. Miners start mining process, transactions which are included in Block will be removed from mempool self.lastBlock.mineBlock(self.mempool) self.transactionFee = self.userTransaction.transactionFee # 5. Attack process self.hashRate = np.random.uniform( 0, BlockchainNetworkingEnv.MAX_ATTACK) self.doubleSpendSuccess = 2 * self.hashRate if (np.random.rand() < self.doubleSpendSuccess): self.attacked = True # if user's transaction is successfully added inti the block -> reward=2 if (self.userTransaction in self.lastBlock.blockTransaction and not self.attacked): self.successReward = BlockchainNetworkingEnv.SUCCESS_REWARD self.totalReward = self.successReward - self.channelCost - self.transactionFee self.cost = self.channelCost + self.transactionFee # 6. determine new state self.mempool.updateMempoolState() for index in range(0, Mempool.NB_FEE_INTERVALS): state[index] = self.mempool.mempoolState[index] state.insert(Mempool.NB_FEE_INTERVALS, action) state.insert(Mempool.NB_FEE_INTERVALS + 1, self.channelObservation) state.pop() state.pop() self.state = tuple(state) done = False # print(np.array(self.state), [self.totalReward, self.cost], done, {}) return np.array(self.state), [ self.totalReward, self.channelCost, self.transactionFee, self.cost ], done, {} def reset(self): self.state = [] self.mempool.resetMempool() self.idleChannel = 1 for index in range(0, len(self.mempool.mempoolState)): self.state.append(self.mempool.mempoolState[index]) for obs_index in range(0, self.nb_past_observations): self.state.append(0) self.state.append(2) print(self.state) self.steps_beyond_done = None return np.array(self.state) def updateObservation(self): return def render(self, mode='human', close=False): return def close(self): """Override in your subclass to perform any necessary cleanup. Environments will automatically close() themselves when garbage collected or when the program exits. """ raise NotImplementedError() def seed(self, seed=None): """Sets the seed for this env's random number generator(s). # Returns Returns the list of seeds used in this env's random number generators """ raise NotImplementedError() def configure(self, *args, **kwargs): """Provides runtime configuration to the environment. This configuration should consist of data that tells your environment how to run (such as an address of a remote server, or path to your ImageNet data). It should not affect the semantics of the environment. """ raise NotImplementedError() # env = BlockchainNetworkingEnv() # env.reset() # for index in range(0, 50): # env.step(np.random.randint(0, env.nb_channels))
class BCnetenv(gym.Env): ''' Actions: Type: MultiDiscrete form. 1) Block_Size(shard) : Discrete 4 - 2MB[0], 4MB[1], 6MB[2], 8MB[3], - params: min: 2, max: 8 (megabytes) 2) Time Interval : Discrete 4 - 2[0] , 4[1], 6[2], 8[3] - params: min: 2, max: 8 (seconds) 3) number of shard (K) : Discrete 4 - 1[0], 2[1], 4[2], 8[3] - params: min: 1, max: 8 MultiDiscrete([ 4, 4, 4 ]) -> we use discrete expression (64) 0, 0 ,0 ->0 0, 0, 1 ->1 0, 0, 2 ->2 ... 3, 3, 3 -> 63 state space: Type: Num state Min Max format 0 data transmission link 10MHZ 100MHZ nxn 1 computing capability 10GHZ 30GHZ nx1 2 consensus history 0 1 nxn 3 estimated faulty probability 0 1/3 nx1 : Type: Box(2) num observation min max 0 latency 0 48 1 required shard limit 1 8 ''' def __init__(self): # Simulation parameters self.nb_nodes = 200 self.tx_size = 200 #bytes self.B_max = 8 #Megabytes self.Ti_amx = 8 #seconds self.K_max = 8 # maximum shard number self.sign = 2 # MHZ self.MAC = 1 # MHZ self.batchsize = 3 self.u = 6 # consecutive block confirm self.trans_prob = 0.5 # Transition Probability in Finite Markov Chain # define action space & observation_space self.action_space = ActionSpace(512) self.observation_space = spaces.Box(low=np.array([0, 1]), high=np.array([48, 8]), dtype=np.float32) self.seed() self.viewer = None self.state = None self.steps_beyond_done = None # state 불러오기. ShardDist함수의 return값을 각각 R,c,H,e_prob로 할당하도록함. # 여기서 state는 main의 진짜 state에 필요한 구성요소들을 각각 업데이트하는것임. self.R_transmission = None self.c_computing = None self.H_history = None self.e_prob = None self.reward = 0 def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def step(self, action): # step 함수 진행하면서 state space 를 update assert self.action_space.contains( action), "%r (%s) invalid" % (action, type(action)) state = self.state R, C, H, e_prob = state a = action // 128 # block size (0~3) 2, 4, 6, 8 b = (action - 128 * a) // 16 # # of shard (1~8) 1, 2 ,3 ,4 ,5 ,6, 7, 8 c = (action - (128 * a) - (16 * b) ) # time interval (0~15) 0.5/1/1.5 ~~~ 16 b_size = 2 * (a + 1) # block size 2,4,6,8 (4) t_interval = 0.5 * (c + 1) # time interval 0.5, 1, 1.5 ~ ,8 (16) n_shard = b + 1 # number of shard 1,2,3,4,5,6,7,8 (8) # R 업데이트 finite markov channel 기반. for i in range(0, self.nb_nodes): for j in range(i + 1, self.nb_nodes): random_number = random.random() if (R[i, j] == 10 * (10**6)): if (random_number < self.trans_prob): R[i, j] += 10 * (10**6) R[j, i] = R[i, j] elif (R[i, j] == 100 * (10**6)): if (random_number < self.trans_prob): R[i, j] -= 10 * (10**6) R[j, i] = R[i, j] else: if (random_number < self.trans_prob): R[i, j] += 10 * (10**6) R[j, i] = R[i, j] elif (self.trans_prob <= random_number < 2 * self.trans_prob): R[i, j] -= 10 * (10**6) R[j, i] = R[i, j] # C 업데이트 finite markov channel 기반. for i in range(0, self.nb_nodes): for j in range(0, self.nb_nodes): random_number = random.random() if (C[i, j] == 10 * (10**9)): if (random_number < self.trans_prob): C[i, j] += 5 * (10**9) elif (C[i, j] == 30 * (10**9)): if (random_number < self.trans_prob): C[i, j] -= 5 * (10**9) else: if (random_number < self.trans_prob): C[i, j] += 5 * (10**9) elif (self.trans_prob <= random_number < 2 * self.trans_prob): C[i, j] -= 5 * (10**9) env2 = ShardDistribute() H, e_prob, NodesInShard, Success_ratio, FCP = env2.ShardDist(n_shard) self.state = [R, C, H, e_prob] e_p = e_prob[0, 0] # constraint에 쓸 변수를 여기서 미리 불러옴 # latency 계산시 R,c로부터 max/min값을 추출하여 latency 세부요소들 전부 계산 # M,theta,C_numb,alpha,B,timeout 값 설 M = 3 theta = 2 * (10**6) alpha = 2 * 10**6 beta = 10**6 B = b_size * 8 * 10**6 timeout = 2.2 #2.2 #3.2 # 0.64 에서 최대 6.4초. 중간을 기점으로 threshold 설정 nb_nodes = self.nb_nodes ### latency computation (Sharding) (2이상일 때) if (n_shard >= 2): C_numb = len(NodesInShard[n_shard]) # 1) Intra 샤드에서 validation time 계산 T_k_in_val = [] primary = [] for K in range(n_shard): primary.append(NodesInShard[K][random.randint( 0, len(NodesInShard[K]) - 1)]) T_in_val = [] for i in NodesInShard[K]: if (i == primary[K]): T_in_val.append( (M * theta + (M * (1 + C_numb) + 4 * (len(NodesInShard[K]) - 1)) * alpha) / C[i][0]) else: T_in_val.append( (M * theta + (C_numb * M + 4 * (len(NodesInShard[K]) - 1)) * alpha) / C[i][0]) T_k_in_val.append(max(T_in_val)) T_k_in_val = (1 / M) * max(T_k_in_val) # 2) Intra 샤드에서 propagation time 계산 T_k_in_prop = [] for K in range(n_shard): T_in_preprepare = [] T_in_prepare = [] T_in_commit = [] for i in NodesInShard[K]: for j in NodesInShard[K]: if (j != i): if (i == primary[K]): T_in_preprepare.append((M * B) / R[i, j]) else: T_in_prepare.append((M * B) / R[i, j]) T_in_commit.append((M * B) / R[i, j]) T_k_in_prop.append( min(max(T_in_preprepare), timeout) + min(max(T_in_prepare), timeout) + min(max(T_in_commit), timeout)) T_k_in_prop = (1 / M) * max(T_k_in_prop) # 3) DC (Final shard)에서 validation time 계산 primary_DC = NodesInShard[n_shard][random.randint( 0, len(NodesInShard[n_shard]) - 1)] T_k_f_val = [] for i in NodesInShard[n_shard]: if (i == primary_DC): T_k_f_val.append( (n_shard * M * theta + (n_shard * M + 4 * (C_numb - 1) + (self.nb_nodes - C_numb) * M) * alpha) / C[i][0]) else: T_k_f_val.append( (n_shard * M * theta + (4 * (C_numb - 1) + (self.nb_nodes - C_numb) * M) * alpha) / C[i][0]) T_k_f_val = (1 / M) * max(T_k_f_val) # 4) DC (Final shard)에서 propagation time 계산 T_k_f_request = [] T_k_f_preprepare = [] T_k_f_prepare = [] T_k_f_commit = [] T_k_f_reply = [] for i in primary: for j in NodesInShard[n_shard]: T_k_f_request.append((M * B) / R[i, j]) for i in NodesInShard[n_shard]: for j in NodesInShard[n_shard]: if (j != i): if (i == primary_DC): T_k_f_preprepare.append((M * B) / R[i, j]) else: T_k_f_prepare.append((M * B) / R[i, j]) T_k_f_commit.append((M * B) / R[i, j]) for i in NodesInShard[n_shard]: for j in primary: T_k_f_reply.append((M * B) / R[i, j]) T_k_f_prop = (1 / M) * (min(max(T_k_f_request), timeout) + min(max(T_k_f_preprepare), timeout) + min(max(T_k_f_prepare), timeout) + min(max(T_k_f_commit), timeout) + min(max(T_k_f_request), timeout)) # 최종 latency의 값은 block interval + 위 4가지 time Tlatency = t_interval + (T_k_in_val + T_k_in_prop + T_k_f_val + T_k_f_prop) else: #Shard가 1개일땐 PBFT T_V = [] client = random.randint(0, nb_nodes - 1) primary = random.randint(0, nb_nodes - 1) while (primary == client): primary = random.randint(0, nb_nodes - 1) for i in range(nb_nodes): if (i == primary): T_V.append((M * alpha + beta * (2 * M + 4 * (nb_nodes - 1))) / C[i][0]) elif (i != client): T_V.append((M * alpha + beta * (M + 4 * (nb_nodes - 1))) / C[i][0]) T_V = (1 / M) * max(T_V) t1 = min((M * B / R[client, primary]), timeout) t2 = [] for i in range(nb_nodes): if ((i != client) & (i != primary)): t2.append(M * B / R[primary, i]) t2 = min(max(t2), timeout) t3 = [] for i in range(nb_nodes): for j in range(nb_nodes): if ((j != i) & (i != client) & (j != client)): t3.append(M * B / R[i, j]) t3 = min(max(t3), timeout) t4 = [] for i in range(nb_nodes): for j in range(nb_nodes): if (j != i): t4.append(M * B / R[i, j]) t4 = min(max(t4), timeout) t5 = [] for i in range(nb_nodes): for j in range(nb_nodes): if (i != client): t5.append(M * B / R[i, client]) t5 = min(max(t5), timeout) T_D = (1 / M) * (t1 + t2 + t3 + t4 + t5) Tlatency = t_interval + T_V + T_D ### constraint (latency & shard) done_t = Tlatency > self.u * t_interval constraint = 0 ### const 1 if n_shard == 1: done_n = False else: constraint = (self.nb_nodes * (1 - (3 * e_p)) - 1) / (3 * self.nb_nodes * e_p + 1) done_n = (n_shard >= constraint) #########lemma1 #### const 2 # constraint = (((2*self.nb_nodes) / (3*(self.nb_nodes * e_p +1))) -1) #done_n = n_shard >= (((2*self.nb_nodes) / (3*(self.nb_nodes * e_p +1))) -1) # #### lemma2 # done_n =False # no security bound done = done_t or done_n done = bool(done) #성공한샤드 = prob* K #done 이 1인경우, 즉 끝났다면(조건을 위반하여) reward는 0 #done이 0인 경우, reward는 TPS를 반영한다. reward = self.reward if not done: reward = Success_ratio * M * ((n_shard * (math.floor( (b_size / self.tx_size) * 1000 * 1000))) / t_interval) elif self.steps_beyond_done is None: ## step beyond done? self.steps_beyond_done = 0 else: # done인 경우, if self.steps_beyond_done == 0: logger.warn( "You are calling 'step()' even though this environment has already returned done = True. " "You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior." ) self.steps_beyond_done += 1 reward = 0 self.reward = float(reward) print('reward', reward) const = [ Tlatency, b_size, t_interval, n_shard, constraint, done_t, done_n, e_p, FCP ] print(const) ##### state change 적용해서 R,C,H,e_prop 업데이트. ### 리턴되어야 하는 값은, action이 들어왔을때, 변경된 R,C,H,e-Prob return self.state, self.reward, done, const, { } ## 카트폴에서는 self.state에 np.array가 있는데, 여기서는 풀어줘야함 def reset(self): # state space - > R,c,H,e_prob reset. R_transmission = np.zeros((self.nb_nodes, self.nb_nodes)) c_computing = np.zeros((self.nb_nodes, 1)) for i in range(0, self.nb_nodes): for j in range(i + 1, self.nb_nodes): R_transmission[i, j] = random.randrange(10, 101, 10) R_transmission[j, i] = R_transmission[i, j] R_transmission = (10**6) * R_transmission # 200x200 for i in range(0, self.nb_nodes): c_computing[i] = random.randrange(10, 31, 5) c_computing = (10**9) * c_computing # 200x1 c_computing = np.kron(c_computing, np.ones( (1, self.nb_nodes))) # 200x200으로 확장한뒤 H_his에 대입 n_shard = random.randrange(1, 9) env2 = ShardDistribute() H, e_prob, NodesInShard, Success_ratio, FCP = env2.ShardDist(n_shard) # H, e_prob는 shardDist를 통해 get self.state = [R_transmission, c_computing, H, e_prob] return self.state