def reset(self): raw_s = np.zeros(self.shape, dtype=np.int32) self.time = 0 raw_s[:, 0] = 1 # raw state, one-hot setting; all components are in condition 1 raw_s[:, 6] = time_encoder(self.time) self.state = raw_s self.state_num = np.zeros(7, dtype=np.int32) return self.state
def randomint(self, stau=False): s = np.random.randint(0, 6, 7) self.time = np.random.randint(0, 100, 1)[0] raw_s = np.zeros([7, 7], dtype=np.int32) raw_s[:, 0:6] = processsa(s, 6) raw_s[:, 6] = time_encoder(self.time) if stau == True: plot_state(raw_s, 7, 7) self.state = raw_s self.state_num = s return self.state, self.time
plt.savefig('./figures/cost&loss.eps') # plot paper figures "# plot encoded_state fig-**" import numpy as np from bridgedeterioration import BridgeEnv from utils import * import matplotlib.pyplot as plt num_component = 263 env = BridgeEnv(num_component) env.randomint() s = env.state[:num_component] encode_s = processsa(s, 6) encode_s = np.reshape(encode_s, [1, num_component * 6]) year = time_encoder(env.time, 22)[np.newaxis, :] SS = np.hstack([encode_s, year]) S = np.reshape(SS, [40, 40]) plot_state(S, 40, 40) plt.savefig('./figures/state_263.pdf') plt.savefig('./figures/state_263.eps') # comparison among DRL c1c2c3; Table 3 ******************************************************************************* from guide_experience import generate_Q generate_Q() # *********************** DRL ***************************************************** import numpy as np import os, time, copy, pickle from bridgedeterioration import BridgeEnv
def step(self, action, render=False): self.time += 1 done = False if self.time >= 100: done = True # transition possibility matrices trans = np.zeros(shape=[7, 4, 6, 6]) trans[:, 3, :, 0] = 1.0 # replace action trans[:, 1] = [[1.00, 0.00, 0.00, 0.00, 0.00, 0.00], [0.85, 0.15, 0.00, 0.00, 0.00, 0.00], [0.65, 0.27, 0.08, 0.00, 0.00, 0.00], [0.45, 0.30, 0.17, 0.08, 0.00, 0.00], [0.30, 0.35, 0.20, 0.08, 0.07, 0.00], [0.25, 0.30, 0.15, 0.17, 0.10, 0.03]] # minor repair action trans[:, 2] = [[1.00, 0.00, 0.00, 0.00, 0.00, 0.00], [1.00, 0.00, 0.00, 0.00, 0.00, 0.00], [0.90, 0.08, 0.02, 0.00, 0.00, 0.00], [0.70, 0.15, 0.10, 0.05, 0.00, 0.00], [0.60, 0.18, 0.12, 0.07, 0.03, 0.00], [0.50, 0.25, 0.15, 0.06, 0.03, 0.01]] # major repair action # no-actions for all components trans[0, 0] = [[0.81, 0.19, 0.00, 0.00, 0.00, 0.00], [0.00, 0.90, 0.10, 0.00, 0.00, 0.00], [0.00, 0.00, 0.91, 0.09, 0.00, 0.00], [0.00, 0.00, 0.00, 0.94, 0.06, 0.00], [0.00, 0.00, 0.00, 0.00, 0.99, 0.01], [0.00, 0.00, 0.00, 0.00, 0.00, 1.00]] trans[1, 0] = [[0.91, 0.09, 0.00, 0.00, 0.00, 0.00], [0.00, 0.90, 0.10, 0.00, 0.00, 0.00], [0.00, 0.00, 0.86, 0.14, 0.00, 0.00], [0.00, 0.00, 0.00, 0.94, 0.06, 0.00], [0.00, 0.00, 0.00, 0.00, 0.93, 0.07], [0.00, 0.00, 0.00, 0.00, 0.00, 1.00]] trans[2, 0] = [[0.86, 0.14, 0.00, 0.00, 0.00, 0.00], [0.00, 0.97, 0.03, 0.00, 0.00, 0.00], [0.00, 0.00, 0.94, 0.06, 0.00, 0.00], [0.00, 0.00, 0.00, 0.89, 0.11, 0.00], [0.00, 0.00, 0.00, 0.00, 0.99, 0.01], [0.00, 0.00, 0.00, 0.00, 0.00, 1.00]] trans[3, 0] = [[0.86, 0.14, 0.00, 0.00, 0.00, 0.00], [0.00, 0.96, 0.04, 0.00, 0.00, 0.00], [0.00, 0.00, 0.95, 0.05, 0.00, 0.00], [0.00, 0.00, 0.00, 0.92, 0.08, 0.00], [0.00, 0.00, 0.00, 0.00, 0.91, 0.09], [0.00, 0.00, 0.00, 0.00, 0.00, 1.00]] trans[4, 0] = [[0.87, 0.13, 0.00, 0.00, 0.00, 0.00], [0.00, 0.95, 0.05, 0.00, 0.00, 0.00], [0.00, 0.00, 0.94, 0.06, 0.00, 0.00], [0.00, 0.00, 0.00, 0.92, 0.08, 0.00], [0.00, 0.00, 0.00, 0.00, 0.99, 0.01], [0.00, 0.00, 0.00, 0.00, 0.00, 1.00]] trans[5, 0] = [[0.87, 0.13, 0.00, 0.00, 0.00, 0.00], [0.00, 0.96, 0.04, 0.00, 0.00, 0.00], [0.00, 0.00, 0.94, 0.06, 0.00, 0.00], [0.00, 0.00, 0.00, 0.94, 0.06, 0.00], [0.00, 0.00, 0.00, 0.00, 0.99, 0.01], [0.00, 0.00, 0.00, 0.00, 0.00, 1.00]] trans[6, 0] = [[0.87, 0.13, 0.00, 0.00, 0.00, 0.00], [0.00, 0.96, 0.04, 0.00, 0.00, 0.00], [0.00, 0.00, 0.94, 0.06, 0.00, 0.00], [0.00, 0.00, 0.00, 0.91, 0.09, 0.00], [0.00, 0.00, 0.00, 0.00, 0.99, 0.01], [0.00, 0.00, 0.00, 0.00, 0.00, 1.00]] # calculate costs reward = self.costs(action) # deterioration for component in range(self.components): # for each component s = self.state_num[component] next_s_index = np.random.choice(6, 1, p=trans[component, action[component], s, :]) self.state_num[component] = next_s_index self.state[:, 0:6] = processsa(self.state_num, 6) self.state[:, 6] = time_encoder(self.time) if render == True: self.render() return self.state, reward, done