def reset(self): self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(2,)) # print(str({"metric": "epoch_reward", "value": self.epoch_reward, "step": self.total_steps})) print('{"metric": "epoch_reward", "value": '+str(self.epoch_reward)+', "step":'+str(self.total_steps)+'}') logbook().record_epoch_reward(self.epoch_reward) self.epoch_reward = 0 return np.array(self.state)
def reset(self): # We used to reset the state each time - dont see how this is sensible as its a continuation in simulation backend. # Still worthwhile having regular epochs as we want it to think about maximising daily bidding (or some other period) # But resetting state is a little incompehensible. # Edge case here is first instance, whereby state is set to obs_low. # self.state = self.np_random.uniform(low=0, high=0.05, size=(8,)) # print(str({"metric": "epoch_reward", "value": self.epoch_reward, "step": self.total_steps})) print('{"metric": "epoch_reward", "value": '+str(self.epoch_reward)+', "step":'+str(self.total_steps)+'}') logbook().record_epoch_reward(self.epoch_reward) self.epoch_reward = 0 return np.array(self.state)
def reset(self): # We used to reset the state each time - dont see how this is sensible as its a continuation in simulation backend. # Still worthwhile having regular epochs as we want it to think about maximising daily bidding (or some other period) # But resetting state is a little incompehensible. # Edge case here is first instance, whereby state is set to obs_low. # self.state = self.np_random.uniform(low=0, high=0.05, size=(8,)) # print(str({"metric": "epoch_reward", "value": self.epoch_reward, "step": self.total_steps})) print('{"metric": "epoch_reward", "value": ' + str(self.epoch_reward) + ', "step":' + str(self.total_steps) + '}') print('{"metric": "unique_bids", "value": ' + str(logbook().get_num_unique_bids(previous_steps=50)) + ', "step":' + str(self.total_steps) + '}') logbook().record_epoch_reward(self.epoch_reward) self.epoch_reward = 0 # Every X steps, write results to file in case of dramatic failure. if self.total_steps % 20000 == 0: # if self.total_steps % 100 == 0: logbook().save_json(label=str(self.label)) return np.array(self.state)
def render(self, mode='human'): # Log bid/value in Floydhub # print('{"metric": "bid", "value": '+str(self.last_action)+', "step":'+str(self.total_steps)+'}') # Log in logbook suite logbook().record_price(self._state_dict['price'], self.total_steps) logbook().record_demand(self._state_dict['demand'], self.total_steps) # Log bidstack in logbook suite. for bid in self._state_dict['all_bids']: logbook().record_bid(bid['label'], bid['price'], bid['quantity'], self.total_steps) return None
sys.exit() # Make sure that the participant name is one of hte allowed ones. elif sys.argv[1] not in market_config['PARTICIPANTS']: print( 'Error: Participant not in list of possible participants. Must be one of:' ) [print(" -" + p) for p in market_config['PARTICIPANTS']] sys.exit() else: participant_name = sys.argv[1] # ENV_NAME = 'CartPole-v0' # ENV_NAME = 'MultiBidMarket-v0' ENV_NAME = 'MultiBidMarketEfficient-v0' logbook().record_metadata('Environment', ENV_NAME) logbook().record_metadata('datetime', pendulum.now().isoformat()) for param in market_config: logbook().record_metadata('Market: ' + param, market_config[param]) # # Set the tensorflow memory growth to auto - this is important when running two simultaneous models # # Otherwise, the first process hogs all the memory and the second (the one that we watch the output of) # # gets a CUDA_ERROR_OUT_OF_MEMORY message and crashes. Instead ofthe majority of below, you can also use config.gpu_options.allow_growth = True #Set automatically - takes some time. # config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.95 / float(len(market_config['PARTICIPANTS'])) # Alternatively, allocate as a fraction of the available memory: # sess = tf.Session(config=config) # K.set_session(sess) # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) # Wrap so that we have a discrete action space - maps the internal MultiDiscrete action space to a Discrete action space.
from marketsim.logbook.logbook import logbook import pendulum logbook().set_label("TEST" + " " + pendulum.now().format('ddd D/M HH:mm')) logbook().record_hyperparameter('alpha', 1) logbook().record_hyperparameter('beta', 2) logbook().submit()