def __init__(self, x,y,z ,gamma, turnspc, policy, rg_prob='rg', envpath='./environments', rendermode='off'): self.rendermode=rendermode # on/off display block model in matplotlib # self.cutoffpenaltyscalar=penaltyscalar #scaling parameter for changing the penalty for taking no action (cutoff). self.rg_prob=rg_prob #rg for randomly generated, loadenv for loading premade envionments #self.savepath=savepath #envpath='./environments' self.savedgeo='%s/geology' % envpath # self.savedtruth='%s/truth' % envpath self.savedenv='%s/environment' % envpath self.saveddepdic='%s/depdict' % envpath self.savedeffdic='%s/effdict' % envpath self.policy=policy #initiating values self.framecounter=0 self.actionslist = list() self.reward=0 self.discountedmined=0 self.turncounter=1 self.i=-1 self.j=-1 self.terminal=False self.gamma=gamma #discount factor exponential (reward*turn^discount factor) self.Imin=0 self.Imax=x self.Jmin=0 self.Jmax=y self.RLmin=0 self.RLmax=z self.mined=-1 self.callnumber=1 self.savenumber=0 try: self.maxloadid=len([name for name in os.listdir(self.savedgeo) if os.path.isfile(os.path.join(self.savedgeo, name))]) except: self.maxloadid=0 #sizing the block model environment self.Ilen=self.Imax-self.Imin self.Jlen=self.Jmax-self.Jmin self.RLlen=self.RLmax-self.RLmin #RL (z coordinate) counts up as depth increases self.channels = 2 #H2O mean, mined state, Standard deviation self.flatlen=self.Ilen*self.Jlen*self.RLlen*self.channels #initiating block dependency dictionaries #self.block_dic={} self.block_dic_init={} self.dep_dic={} self.dep_dic_init={} self.eff_dic_init={} #create block model self.model=automodel(self.Ilen,self.Jlen,self.RLlen) self.build() self.turns=round(len(self.dep_dic)*turnspc,0) #set max number of turns (actions) in each episode based on percentage of block model size. # Define action and observation space # They must be gym.spaces objects # Example when using discrete actions: #actions are taken on a 2D checkerboard style view of the environement. progress will be made downwards in 3D over time. self.action_space = spaces.Discrete((self.Ilen)*(self.Jlen))#+1) #+1 action for choosing terminal state. if self.policy=='CnnPolicy': #observations are made of the entire environment (3D model with 3 channels, 1 channel represents mined state) self.observation_space = spaces.Box(low=-1, high=1, shape=(self.Ilen, self.Jlen, self.RLlen,self.channels), dtype=np.float64) elif self.policy=='MlpPolicy': #observations are made of the entire environment (3D model with 3 channels, 1 channel represents mined state) self.observation_space = spaces.Box(low=-1, high=1, shape=(self.flatlen,), dtype=np.float64)
def __init__(self, x, y, z, gamma, penaltyscalar, rg_prob, rendermode='off'): self.rendermode = rendermode self.cutoffpenaltyscalar = penaltyscalar self.rg_prob = rg_prob #self.data=self.inputdata self.actionslist = list() self.turnore = 0 self.discountedmined = 0 self.turncounter = 1 self.i = -1 self.j = -1 self.terminal = False self.gamma = gamma self.Imin = 0 self.Imax = x self.Jmin = 0 self.Jmax = y self.RLmin = 0 self.RLmax = z self.Ilen = self.Imax - self.Imin self.Jlen = self.Jmax - self.Jmin self.RLlen = self.RLmax - self.RLmin #RL counts up as depth increases #self.orebody=np.array([self.Ilen,self.Jlen,self.RLlen]) #self.idxbody=np.array([self.Ilen,self.Jlen,self.RLlen]) self.block_dic = {} self.block_dic_init = {} self.dep_dic = {} self.dep_dic_init = {} #self.RL=self.RLlen-1 self.channels = 3 #self.geo_array= np.zeros([self.Ilen, self.Jlen, self.RLlen, self.channels], dtype=float) #self.state_size = self.geo_array.shape self.flatlen = self.Ilen * self.Jlen * self.RLlen * self.channels self.mined = -1 self.callnumber = 1 self.automodel = automodel() self.build() #super(environment, self).__init__() # Define action and observation space # They must be gym.spaces objects # Example when using discrete actions: self.action_space = spaces.Discrete( (self.Ilen) * (self.Jlen) + 1) #Box(low=0, high=1, #shape=((self.Ilen)*(self.Jlen),), dtype=np.float64) # Example for using image as input: self.observation_space = spaces.Box(low=-1, high=1, shape=(self.flatlen, ), dtype=np.float64)
def __init__(self, x,y,z ,gamma, penaltyscalar, rg_prob, turnspc, savepath, policy, rendermode='off'): self.rendermode=rendermode # on/off display block model in matplotlib self.cutoffpenaltyscalar=penaltyscalar #scaling parameter for changing the penalty for taking no action (cutoff). self.rg_prob=rg_prob #probability of randomly generating a new environment self.savepath=savepath self.savedenv='%s/environment' % savepath self.policy=policy #initiating values self.framecounter=0 self.actionslist = list() self.reward=0 self.discountedmined=0 self.turncounter=1 self.i=-1 self.j=-1 self.terminal=False self.gamma=gamma #discount factor exponential (reward*turn^discount factor) self.Imin=0 self.Imax=x self.Jmin=0 self.Jmax=y self.RLmin=0 self.RLmax=z self.mined=-1 self.callnumber=1 #sizing the block model environment self.Ilen=self.Imax-self.Imin self.Jlen=self.Jmax-self.Jmin self.RLlen=self.RLmax-self.RLmin #RL (z coordinate) counts up as depth increases self.channels = 3 self.flatlen=self.Ilen*self.Jlen*self.RLlen*self.channels #initiating block dependency dictionaries self.block_dic={} self.block_dic_init={} self.dep_dic={} self.dep_dic_init={} self.eff_dic_init={} #create block model self.automodel=automodel(self.Ilen,self.Jlen,self.RLlen) self.build() self.turns=round(len(self.dep_dic)*turnspc,0) #set max number of turns (actions) in each episode based on percentage of block model size. # Define action and observation space # They must be gym.spaces objects # Example when using discrete actions: #actions are taken on a 2D checkerboard style view of the environement. progress will be made downwards in 3D over time. self.action_space = spaces.Discrete((self.Ilen)*(self.Jlen))#+1) #+1 action for choosing terminal state. if self.policy=='CnnPolicy': #observations are made of the entire environment (3D model with 3 channels, 1 channel represents mined state) self.observation_space = spaces.Box(low=-1, high=1, shape=(self.Ilen, self.Jlen, self.RLlen,self.channels), dtype=np.float64) elif self.policy=='MlpPolicy': #observations are made of the entire environment (3D model with 3 channels, 1 channel represents mined state) self.observation_space = spaces.Box(low=-1, high=1, shape=(self.flatlen,), dtype=np.float64) self.init_cutoffpenalty=self.cutoffpenalty() #experimental parameter function. penalises agent for not mining (do nothing), reward for taking action. self.averagereward=np.average((np.multiply(self.geo_array[:,:,:,0],self.geo_array[:,:,:,1])))
def __init__(self, x, y, z, gamma, rendermode='off'): self.rendermode = rendermode #self.data=self.inputdata self.actionslist = list() self.turnore = 0 self.discountedmined = 0 self.turncounter = 1 self.i = -1 self.j = -1 self.terminal = False self.gamma = gamma self.Imin = 0 self.Imax = x self.Jmin = 0 self.Jmax = y self.RLmin = 0 self.RLmax = z self.Ilen = self.Imax - self.Imin self.Jlen = self.Jmax - self.Jmin self.RLlen = self.RLmax - self.RLmin #RL counts up as depth increases #self.orebody=np.array([self.Ilen,self.Jlen,self.RLlen]) #self.idxbody=np.array([self.Ilen,self.Jlen,self.RLlen]) self.block_dic = {} self.block_dic_init = {} self.dep_dic = {} #self.RL=self.RLlen-1 self.channels = 3 #self.geo_array= np.zeros([self.Ilen, self.Jlen, self.RLlen, self.channels], dtype=float) #self.state_size = self.geo_array.shape self.flatlen = self.Ilen * self.Jlen * self.RLlen * self.channels self.mined = -1 self.callnumber = 1 a = automodel() self.geo_array = a.buildmodel(self.Ilen, self.Jlen, self.RLlen) # normalising input space # for i in self.data.index: # self.geo_array[self.data._I[i]-1,self.data._J[i]-1,self.data.RL[i]-1,0]=self.data.H2O[i] # self.geo_array[self.data._I[i]-1,self.data._J[i]-1,self.data.RL[i]-1,1]=self.data.Tonnes[i] #state space (mined/notmined) #self.geo_array[self.data._I[i]-1,self.data._J[i]-1,self.data.RL[i]-1,2]=1 scaler = MinMaxScaler() H2O_init = self.geo_array[:, :, :, 0] Tonnes_init = self.geo_array[:, :, :, 1] State_init = self.geo_array[:, :, :, 2] H2O_reshaped = H2O_init.reshape([-1, 1]) Tonnes_reshaped = Tonnes_init.reshape([-1, 1]) State_reshaped = State_init.reshape([-1, 1]) H2O_scaled = scaler.fit_transform(H2O_reshaped) Tonnes_scaled = scaler.fit_transform(Tonnes_reshaped) a = H2O_scaled.reshape([self.Ilen, self.Jlen, self.RLlen, 1]) b = Tonnes_scaled.reshape([self.Ilen, self.Jlen, self.RLlen, 1]) c = State_reshaped.reshape([self.Ilen, self.Jlen, self.RLlen, 1]) self.norm = np.append(a, b, axis=3) self.norm = np.append(self.norm, c, axis=3) #.reshape(1,self.Imax+1-self.Imin, self.Jmax+1-self.Jmin, self.RLmax+1-self.RLmin, self.channels) #self.norm=normalize(np.reshape(self.geo_array,((1,self.Imax+1-self.Imin, self.Jmax+1-self.Jmin, self.RLmax+1-self.RLmin, self.channels))),4) self.ob_sample = deepcopy(self.norm) self.construct_dep_dic() #construct_dependencies blocks with padding self.construct_block_dic() self.block_dic = deepcopy(self.block_dic_init) self.render_update = self.geo_array[:, :, :, 0] self.turns = round(len(self.dep_dic) * 0.5, 0) #super(environment, self).__init__() # Define action and observation space # They must be gym.spaces objects # Example when using discrete actions: self.action_space = spaces.Discrete( (self.Ilen) * (self.Jlen)) #Box(low=0, high=1, #shape=((self.Ilen)*(self.Jlen),), dtype=np.float64) # Example for using image as input: self.observation_space = spaces.Box(low=-1, high=1, shape=(self.flatlen, ), dtype=np.float64)