def reset(self):
     raw_s = np.zeros(self.shape, dtype=np.int32)
     self.time = 0
     raw_s[:,
           0] = 1  # raw state, one-hot setting; all components are in condition 1
     raw_s[:, 6] = time_encoder(self.time)
     self.state = raw_s
     self.state_num = np.zeros(7, dtype=np.int32)
     return self.state
 def randomint(self, stau=False):
     s = np.random.randint(0, 6, 7)
     self.time = np.random.randint(0, 100, 1)[0]
     raw_s = np.zeros([7, 7], dtype=np.int32)
     raw_s[:, 0:6] = processsa(s, 6)
     raw_s[:, 6] = time_encoder(self.time)
     if stau == True:
         plot_state(raw_s, 7, 7)
     self.state = raw_s
     self.state_num = s
     return self.state, self.time
plt.savefig('./figures/cost&loss.eps')

# plot paper figures
"# plot encoded_state fig-**"
import numpy as np
from bridgedeterioration import BridgeEnv
from utils import *
import matplotlib.pyplot as plt

num_component = 263
env = BridgeEnv(num_component)
env.randomint()
s = env.state[:num_component]
encode_s = processsa(s, 6)
encode_s = np.reshape(encode_s, [1, num_component * 6])
year = time_encoder(env.time, 22)[np.newaxis, :]
SS = np.hstack([encode_s, year])
S = np.reshape(SS, [40, 40])
plot_state(S, 40, 40)
plt.savefig('./figures/state_263.pdf')
plt.savefig('./figures/state_263.eps')

# comparison among DRL c1c2c3;  Table 3  *******************************************************************************
from guide_experience import generate_Q

generate_Q()

# *********************** DRL *****************************************************
import numpy as np
import os, time, copy, pickle
from bridgedeterioration import BridgeEnv
    def step(self, action, render=False):
        self.time += 1
        done = False
        if self.time >= 100:
            done = True

        # transition possibility matrices
        trans = np.zeros(shape=[7, 4, 6, 6])
        trans[:, 3, :, 0] = 1.0  # replace action
        trans[:, 1] = [[1.00, 0.00, 0.00, 0.00, 0.00, 0.00],
                       [0.85, 0.15, 0.00, 0.00, 0.00, 0.00],
                       [0.65, 0.27, 0.08, 0.00, 0.00, 0.00],
                       [0.45, 0.30, 0.17, 0.08, 0.00, 0.00],
                       [0.30, 0.35, 0.20, 0.08, 0.07, 0.00],
                       [0.25, 0.30, 0.15, 0.17, 0.10,
                        0.03]]  # minor repair action
        trans[:, 2] = [[1.00, 0.00, 0.00, 0.00, 0.00, 0.00],
                       [1.00, 0.00, 0.00, 0.00, 0.00, 0.00],
                       [0.90, 0.08, 0.02, 0.00, 0.00, 0.00],
                       [0.70, 0.15, 0.10, 0.05, 0.00, 0.00],
                       [0.60, 0.18, 0.12, 0.07, 0.03, 0.00],
                       [0.50, 0.25, 0.15, 0.06, 0.03,
                        0.01]]  # major repair action
        # no-actions for all components
        trans[0, 0] = [[0.81, 0.19, 0.00, 0.00, 0.00, 0.00],
                       [0.00, 0.90, 0.10, 0.00, 0.00, 0.00],
                       [0.00, 0.00, 0.91, 0.09, 0.00, 0.00],
                       [0.00, 0.00, 0.00, 0.94, 0.06, 0.00],
                       [0.00, 0.00, 0.00, 0.00, 0.99, 0.01],
                       [0.00, 0.00, 0.00, 0.00, 0.00, 1.00]]
        trans[1, 0] = [[0.91, 0.09, 0.00, 0.00, 0.00, 0.00],
                       [0.00, 0.90, 0.10, 0.00, 0.00, 0.00],
                       [0.00, 0.00, 0.86, 0.14, 0.00, 0.00],
                       [0.00, 0.00, 0.00, 0.94, 0.06, 0.00],
                       [0.00, 0.00, 0.00, 0.00, 0.93, 0.07],
                       [0.00, 0.00, 0.00, 0.00, 0.00, 1.00]]
        trans[2, 0] = [[0.86, 0.14, 0.00, 0.00, 0.00, 0.00],
                       [0.00, 0.97, 0.03, 0.00, 0.00, 0.00],
                       [0.00, 0.00, 0.94, 0.06, 0.00, 0.00],
                       [0.00, 0.00, 0.00, 0.89, 0.11, 0.00],
                       [0.00, 0.00, 0.00, 0.00, 0.99, 0.01],
                       [0.00, 0.00, 0.00, 0.00, 0.00, 1.00]]
        trans[3, 0] = [[0.86, 0.14, 0.00, 0.00, 0.00, 0.00],
                       [0.00, 0.96, 0.04, 0.00, 0.00, 0.00],
                       [0.00, 0.00, 0.95, 0.05, 0.00, 0.00],
                       [0.00, 0.00, 0.00, 0.92, 0.08, 0.00],
                       [0.00, 0.00, 0.00, 0.00, 0.91, 0.09],
                       [0.00, 0.00, 0.00, 0.00, 0.00, 1.00]]
        trans[4, 0] = [[0.87, 0.13, 0.00, 0.00, 0.00, 0.00],
                       [0.00, 0.95, 0.05, 0.00, 0.00, 0.00],
                       [0.00, 0.00, 0.94, 0.06, 0.00, 0.00],
                       [0.00, 0.00, 0.00, 0.92, 0.08, 0.00],
                       [0.00, 0.00, 0.00, 0.00, 0.99, 0.01],
                       [0.00, 0.00, 0.00, 0.00, 0.00, 1.00]]
        trans[5, 0] = [[0.87, 0.13, 0.00, 0.00, 0.00, 0.00],
                       [0.00, 0.96, 0.04, 0.00, 0.00, 0.00],
                       [0.00, 0.00, 0.94, 0.06, 0.00, 0.00],
                       [0.00, 0.00, 0.00, 0.94, 0.06, 0.00],
                       [0.00, 0.00, 0.00, 0.00, 0.99, 0.01],
                       [0.00, 0.00, 0.00, 0.00, 0.00, 1.00]]
        trans[6, 0] = [[0.87, 0.13, 0.00, 0.00, 0.00, 0.00],
                       [0.00, 0.96, 0.04, 0.00, 0.00, 0.00],
                       [0.00, 0.00, 0.94, 0.06, 0.00, 0.00],
                       [0.00, 0.00, 0.00, 0.91, 0.09, 0.00],
                       [0.00, 0.00, 0.00, 0.00, 0.99, 0.01],
                       [0.00, 0.00, 0.00, 0.00, 0.00, 1.00]]
        # calculate costs
        reward = self.costs(action)
        # deterioration
        for component in range(self.components):  # for each component
            s = self.state_num[component]
            next_s_index = np.random.choice(6,
                                            1,
                                            p=trans[component,
                                                    action[component], s, :])
            self.state_num[component] = next_s_index

        self.state[:, 0:6] = processsa(self.state_num, 6)
        self.state[:, 6] = time_encoder(self.time)

        if render == True:
            self.render()

        return self.state, reward, done