class TestIntegration(unittest.TestCase): def setUp(self): self.mm = Mock() self.mm.predict.return_value = np.arange(0,24, dtype=np.float64).reshape((1,24)) self.e = Estimator(self.mm) self.e.customer_counts["jim"] = 1 self.e.customer_populations["jim"] = 1 self.e.subscribe() #mocking some values in the estimator for i in range(TIMESLOT_NOW): self.e._apply_usage('jim', i,i) self.e.scalers['jim'] = MinMaxScaler().fit(np.array([1,1000],dtype=np.float64).reshape(-1,1)) #create a new env manager self.reward_mock = Mock() self.reward_mock.return_value = 0 self.wem = WholesaleEnvironmentManager(Mock(), self.reward_mock) self.wem.get_historical_prices = Mock() self.wem.get_historical_prices.return_value = np.zeros(168) self.wem.subscribe() def tearDown(self): self.e.unsubscribe() self.wem.unsubscribe() def test_estimator_wholesale_integration(self): # the integration actually requires a proper Model self.wem.agent = BaselineTrader() #listen for predictions calculated predictions_received: List[List[CustomerPredictions]] = [] def listen_pred_ev(signal, sender, msg): predictions_received.append(msg) dispatcher.connect(listen_pred_ev, signals.COMP_USAGE_EST) #listen for wholesale orders orders_received = [] def listen_orders_ev(signal, sender, msg): orders_received.append(msg) dispatcher.connect(listen_orders_ev, signals.OUT_PB_ORDER) # 1. send some market messages that get picked up by wholesale dispatcher.send(signal=signals.PB_TIMESLOT_UPDATE, msg=PBTimeslotUpdate(firstEnabled=TIMESLOT_NOW, lastEnabled=TIMESLOT_NOW+24)) # no cleared trades or market transactions necessary for core test assert len(orders_received) == 0 # 2. send pubsub messages that trigger prediction dispatcher.send(signal=signals.PB_TIMESLOT_COMPLETE, msg=PBTimeslotComplete(timeslotIndex=TIMESLOT_NOW-1)) assert len(predictions_received) == 1 #predictions are lists of predictions for each customer assert len(predictions_received[0][0].predictions) == 24 # 3.5 meanwhile the wholesale agent reacted to the predictions and sent its orders # 3. expect wholesale market to react to prediction assert len(orders_received) == 24 # clean up listeners dispatcher.disconnect(listen_pred_ev, signals.COMP_USAGE_EST) dispatcher.disconnect(listen_orders_ev, signals.OUT_PB_ORDER)
def start(self, max_games=None): """Starts the learning, with control coming 'from the server', not from the agent""" # take random game # start stepping through timeslots, 24 at a time while self.game_numbers: self.reward_count = 0 self.reward_average = 0 # pops one from numbers self.new_game() self.current_timestep = self.get_first_timestep() self.first_timestep = self.current_timestep # create env_manager self.env_manager = WholesaleEnvironmentManager( self.agent, self.reward_function) self.env_manager.subscribe() self.step_game() # while self.current_timestep < self.wholesale_data self.games_played += 1 if max_games and max_games <= self.games_played: break return self.reward_average
def setUp(self): self.mm = Mock() self.mm.predict.return_value = np.arange(0,24, dtype=np.float64).reshape((1,24)) self.e = Estimator(self.mm) self.e.customer_counts["jim"] = 1 self.e.customer_populations["jim"] = 1 self.e.subscribe() #mocking some values in the estimator for i in range(TIMESLOT_NOW): self.e._apply_usage('jim', i,i) self.e.scalers['jim'] = MinMaxScaler().fit(np.array([1,1000],dtype=np.float64).reshape(-1,1)) #create a new env manager self.reward_mock = Mock() self.reward_mock.return_value = 0 self.wem = WholesaleEnvironmentManager(Mock(), self.reward_mock) self.wem.get_historical_prices = Mock() self.wem.get_historical_prices.return_value = np.zeros(168) self.wem.subscribe()
def compete(continuous, demand_model, wholesale_model): """take part in a powertac competition""" from agent_components.wholesale.environments.WholesaleEnvironmentManager import WholesaleEnvironmentManager from agent_components.wholesale.learning.baseline import BaselineTrader from agent_components.demand.estimator import Estimator from agent_components.tariffs.publisher import TariffPublisher from util.learning_utils import ModelWriter from communication import messages_cache # bootstrapping logging and caching of messages messages_cache.subscribe() # bootstrapping models from stored data model = ModelWriter(demand_model, False).load_model() estimator = Estimator(model) estimator.subscribe() # TODO wholesale_trader dynamic loading ws_agent = BaselineTrader() wholesale = WholesaleEnvironmentManager(ws_agent, None) wholesale.subscribe() # simple tariff mirroring publisher = TariffPublisher() publisher.subscribe() # GRPC comm with powertac import communication.powertac_communication_server as grpc_com # subscribing to outgoing messages grpc_com.submit_service.subscribe() # main comm thread grpc_server = grpc_com.serve() try: while True: time.sleep(1) except KeyboardInterrupt: grpc_server.stop(0)
class LogEnvManagerAdapter(SignalConsumer): """This class simulates a powertac trading environment but is based on logs of historical games. It assumes that the broker actions have no impact on the clearing price which is a reasonable estimation for any market that has a large enough volume in relation to the broker trading volume. Of course this does not apply once the broker is large enough to itself have an influence on the clearing prices. In PowerTAC, the broker wil actually have a significant impact on the prices. Therefore this is an optimistic first stage learner for the broker. It will allow it to learn a certain base amount but will underperform once it acts in a live environment. The basic skills the broker learns in the wholesale trading are as follows: - based on a (changing) demand forecast, try to equalize the portfolio so that the broker doesn't incur any balancing costs by the DU - try to pay as little as possible for the energy needed at timeslot x. Buying earlier is cheaper but riskier These two goals are reasoned by the following assumptions: The wholesale trader has no influence on the amount of energy needed by its customers. This is a partial truth because some brokers may be able to curtail their customers usage if market prices are too high and the cost of curtailing the customer is valued less than the cost of purchasing and delivering the energy. Because the current broker implementation does not make use of this ability, the assumption is correct. Another assumption is the idea of the agents actions not influencing the clearing price. The server logs suggest clearing amounts of low two digit megawatt per timeslot. If the broker simply tries to predict small amounts of energy, this assumption is appropriate. A broker that only represents a few dozen private households would therefore trade small kilowatt amounts per timeslot, barely influencing the market prices. An on-policy RL agent may therefore still learn successfully, despite the fact that the environment doesn't *actually* react to its actions. To allow the broker to learn with offline files, the following process is taken: - Creation of market price statistics with the `org.powertac.logtool.example.MktPriceStats` class - Creation of usage data with the `org.powertac.logtool.example.CustomerProductionConsumption` class - selecting a small set of customers as a permanent customer portfolio for the broker - passing observations to the agent - predictions from the demand predictor or alternatively, a true prediction (i.e. the real value) or a noisy prediction where the noise may be adapted - historical market clearing prices - rewards based on reward calculation function """ def __init__(self, agent: PowerTacWholesaleAgent, reward_function): # handling params self.agent = agent self.games_played = 0 self.env_manager: WholesaleEnvironmentManager = None self.step_rewards = 0 self.reward_function = reward_function self.orders: List[PBOrder] = [] # current timestep. the next X are open for trading (X set in config) self.current_timestep = 0 self.first_timestep = self.current_timestep # --------------------------------------------------------------- # base data to generate the mdp from. Needs to be reset on new game base data # --------------------------------------------------------------- # self.initial_timeslot = 0 self.wholesale_data = {} self.demand_data = None # careful here, its kWh! # --------------------------------------------------------------- # stays until training is completed # --------------------------------------------------------------- # for mocking the market with the log files self._wholesale_files = get_wholesale_file_paths() self._demand_files = get_usage_file_paths() self.game_numbers = self._make_random_game_order() self.reward_average = 0 self.reward_count = 0 def subscribe(self): # need to catch all orders and determine if they lead to clearing dispatcher.connect(self.handle_order, signal=signals.OUT_PB_ORDER) dispatcher.connect(self.handle_reward, signal=signals.COMP_WS_REWARD) def unsubscribe(self): dispatcher.disconnect(self.handle_order, signal=signals.OUT_PB_ORDER) def handle_order(self, sender, signal, msg: PBOrder): self.orders.append(msg) def handle_reward(self, sender, signal, msg: float): now = self.reward_count next = self.reward_count + 1 self.reward_average = self.reward_average * (now / next) + msg * (1 / next) self.reward_count += 1 def start(self, max_games=None): """Starts the learning, with control coming 'from the server', not from the agent""" # take random game # start stepping through timeslots, 24 at a time while self.game_numbers: self.reward_count = 0 self.reward_average = 0 # pops one from numbers self.new_game() self.current_timestep = self.get_first_timestep() self.first_timestep = self.current_timestep # create env_manager self.env_manager = WholesaleEnvironmentManager( self.agent, self.reward_function) self.env_manager.subscribe() self.step_game() # while self.current_timestep < self.wholesale_data self.games_played += 1 if max_games and max_games <= self.games_played: break return self.reward_average def step_game(self): """loop per game. simulates all events coming from server and by listening to the PBOrder events, responds to agent actions""" while self.wholesale_data: # evaluate any orders received in previous step and send PBMarketTransaction self.evaluate_orders_received() # normally triggers demand forecasting --> predictions self.simulate_timeslot_complete() # ----- Timeslot cut ----- # send out Transactions by customers self.simulate_tariff_transactions() # send out PBTimeslotUpdate --> triggers wholesale backward learning cycle self.simulate_timeslot_update() # send out Predictions based on DemandData --> triggers wholesale trader (forward) self.simulate_predictions() # send out PBClearedTrade for the next 24h timesteps self.simulate_cleared_trade() # simulate balancing_transactions self.simulate_balancing_transactions() # the stepping is sort of "half dependent" on previous data. # in Timestep 363, ClearedTrades and MarketTransactions that refer to 362 are given out. # therefore at THE END of the step, the PREVIOUS timestep data is deleted if self.current_timestep - 1 in self.wholesale_data: del self.wholesale_data[self.current_timestep - 1] self.current_timestep += 1 def new_game(self): """load data for new game into object""" # get new game number if not self.game_numbers: self.game_numbers = self._make_random_game_order() if hasattr(cfg, 'WHOLESALE_OFFLINE_TRAIN_GAME'): # only using this one game! gn = cfg.WHOLESALE_OFFLINE_TRAIN_GAME else: gn = self.game_numbers.pop() # getting data and storing it locally self.make_data_for_game(gn) def make_data_for_game(self, i): wholesale_file_path = self._wholesale_files[i] self.make_wholesale_data(wholesale_file_path) demand_file_path = self._demand_files[i] self.make_demand_data(demand_file_path) def make_wholesale_data(self, wholesale_file_path): with open(wholesale_file_path) as file: wholesale_data = parse_wholesale_file(file) for ts in wholesale_data: timestep = ts[0] data = np.array(ts[3:]) self.wholesale_data[timestep] = data def make_demand_data(self, demand_file_path): # let's reuse this # resetting first demand_data.clear() # getting unscaled predictions demand_data.parse_usage_game_log(demand_file_path, pp_type='none') demand = demand_data.get_demand_data_values() # using only random 30 picks from customers idx = np.random.randint(0, high=len(demand), size=30) demand = demand[idx, :] # make the demand smaller (1/10th) to simulate the broker only having 1/10th of the selected customers demand. # this is because a large portion of the customer demand is actually generated by population scale models. # and the broker only gets a part of that demand demand = demand / 10 self.demand_data = demand def _make_random_game_order(self): # whichever is shorter. max_game = len(self._wholesale_files) if len( self._wholesale_files) < len(self._demand_files) else len( self._demand_files) game_numbers = list(range(1, max_game)) if cfg.WHOLESALE_OFFLINE_TRAIN_RANDOM_GAME: # mix up all the game numbers # for reproducability and comparability, only shuffling when set in config random.shuffle(game_numbers) return game_numbers def get_first_timestep(self): return np.array(list(self.wholesale_data.keys())).min() def simulate_cleared_trade(self): """simulates the sending of PBClearedTrade messages for the next [t-1,t-1+24] timesteps""" last_step = self.current_timestep - 1 cleared_steps = list( range(last_step, last_step + cfg.WHOLESALE_OPEN_FOR_TRADING_PARALLEL)) for i, s in enumerate(cleared_steps): if s not in self.wholesale_data: break data = self.wholesale_data[s] # going from the back because the first cleared_steps step is cleared at its last clearing cleared_data = data[23 - i] trade = PBClearedTrade(timeslot=s, executionMWh=cleared_data[0], executionPrice=cleared_data[1]) dispatcher.send(signals.PB_CLEARED_TRADE, msg=trade) def simulate_timeslot_update(self): """Simulates the TimeslotUpdate message""" now = self.current_timestep dispatcher.send( signals.PB_TIMESLOT_UPDATE, msg=PBTimeslotUpdate(firstEnabled=now, lastEnabled=now + cfg.WHOLESALE_OPEN_FOR_TRADING_PARALLEL)) def simulate_balancing_transactions(self): # get env where the final step has been completed envs = [ e for e in self.env_manager.environments.values() if e._step >= 24 ] if not envs: return env: PowerTacEnv = envs[0] tx = self.generate_du_balancing_tx(env) dispatcher.send(signals.PB_BALANCING_TRANSACTION, msg=tx) def generate_du_balancing_tx(self, env: PowerTacEnv) -> PBBalancingTransaction: """Helper function that simulates the DU fee for offline based training""" market_trades = [[tr.executionMWh, tr.executionPrice] for tr in env.cleared_trades] realized_usage = env.realized_usage average_market = calculate_running_averages(np.array([market_trades ]))[0] balancing_needed = calculate_balancing_needed_obj( env.purchases, realized_usage) #log.info("balancing needed for target ts {} -- {}".format(env._target_timeslot, balancing_needed)) # seen as a "forced transaction" of similar logic as the Market TX du_trans = [] if balancing_needed > 0: # being forced to buy for 5x the market price! try and get your kWh in ahead of time is what it learns du_trans = [balancing_needed, -1 * average_market * 5] if balancing_needed < 0: # getting only a 0.5 of what the normal market price was du_trans = [balancing_needed, 0.5 * average_market] # TODO to config if balancing_needed == 0: du_trans = [0, 0] #but the BalancingTX is actually kWh and the energy sign is reverse (positive for surplus, not negative) return PBBalancingTransaction(postedTimeslot=env._target_timeslot, kWh=du_trans[0] * 1000 * -1, charge=du_trans[1]) def evaluate_orders_received(self): """Evaluate order and check if it should be cleared""" cleared_mask = [] for o in self.orders: # ignore orders at the end of a game if o.timeslot not in self.wholesale_data: continue distance = o.timeslot - self.current_timestep ts_data = self.wholesale_data[o.timeslot] market_clearing = ts_data[distance] cleared, prob = is_cleared_with_volume_probability( o, market_clearing) cleared_mask.append((cleared, distance)) if cleared: # price is positive only when mWh is smaller 0 price = market_clearing[ 1] * -1 if o.mWh > 0 else market_clearing[1] volume_received = o.mWh * prob # assuming we only get a part of what we want # sending out message dispatcher.send(signal=signals.PB_MARKET_TRANSACTION, msg=PBMarketTransaction( price=market_clearing[1], mWh=volume_received, timeslot=o.timeslot)) else: # not cleared pass log.info("Cleared timesteps: " + ' '.join([str(i[1]) for i in cleared_mask if i[0]])) self.orders = [] def simulate_timeslot_complete(self): dispatcher.send( signals.PB_TIMESLOT_COMPLETE, PBTimeslotComplete(timeslotIndex=self.current_timestep - 1)) def simulate_predictions(self): dd = self.demand_data fts = self.first_timestep start = self.current_timestep + 1 end = start + cfg.DEMAND_FORECAST_DISTANCE demand_data = dd[:, start - fts:end - fts] demand_data = demand_data / 1000 # dividing by 1000 to turn kWh into mWh if cfg.WHOLESALE_FORECAST_ERROR_PER_TS > 0: demand_data = np.array([ fuzz_forecast_for_training(customer_data) for customer_data in demand_data ]) preds = [] for cust_number, customer_data in enumerate(demand_data): customer_pred_obj = CustomerPredictions( "customer{}".format(cust_number), predictions=customer_data, first_ts=start) preds.append(customer_pred_obj) dispatcher.send(signals.COMP_USAGE_EST, msg=preds) def simulate_tariff_transactions(self): """sends out simulated timeslot """ timestep = self.current_timestep - self.first_timestep if len(self.demand_data[0]) < timestep: return usages = self.demand_data[:, timestep] for u in usages: if u < 0: t = CONSUME else: t = PRODUCE dispatcher.send(signals.PB_TARIFF_TRANSACTION, msg=PBTariffTransaction( txType=t, kWh=u, postedTimeslot=self.current_timestep))
def setUp(self): self.agent_mock = Mock() self.reward_mock = Mock() self.reward_mock.return_value = 0 self.env_mgr = WholesaleEnvironmentManager(agent=self.agent_mock, reward_function=self.reward_mock)
class TestWholesaleEnvironmentManager(unittest.TestCase): def setUp(self): self.agent_mock = Mock() self.reward_mock = Mock() self.reward_mock.return_value = 0 self.env_mgr = WholesaleEnvironmentManager(agent=self.agent_mock, reward_function=self.reward_mock) def test_handle_market_transaction(self): trans = PBMarketTransaction(timeslot=1, mWh=2, price=-12) env_mock = Mock() self.env_mgr.environments[1] = env_mock self.env_mgr.handle_market_transaction(None, None, trans) env_mock.handle_market_transaction.assert_called_with(trans) #should err if the env hasn't been added yet self.assertRaises(Exception, self.env_mgr.handle_market_transaction, [None, None, PBMarketTransaction(timeslot=2)]) def test_handle_timeslot_update(self): update = PBTimeslotUpdate(firstEnabled=1, lastEnabled=24) self.env_mgr.environments[0] = Mock() self.env_mgr.environments[0].predictions = [1] self.env_mgr.environments[0].purchases = [PBMarketTransaction(mWh=1, price=2)] self.env_mgr.environments[0].actions = [[1,2]] self.env_mgr.agent = "agent" with patch.object(self.env_mgr, 'get_historical_prices') as hp_mock: hp_mock.return_value = [1,2,3] self.env_mgr.handle_timeslot_update(None, None, update) #adds the new ones assert 1 in self.env_mgr.environments assert 24 in self.env_mgr.environments #removes the old ones assert 0 not in self.env_mgr.environments # historicals added to the new ones assert list(self.env_mgr.environments[1]._historical_prices) == [1, 2, 3] # agent set on the new ones assert self.env_mgr.environments[1].agent == "agent" assert len(self.env_mgr.environments.keys()) == 24 def test_historical_prices(self): for i in range(200): self.env_mgr.append_historical(PBClearedTrade(timeslot=i, executionPrice=i, executionMWh=1)) #one extra to show the averaging works self.env_mgr.append_historical(PBClearedTrade(timeslot=198, executionPrice=1, executionMWh=3)) #for the timeslot just after all historical pricesj res = self.env_mgr.get_historical_prices(200) assert len(res) == 168 assert res[0] == 200-168 assert res[-1] == 199 #the extra one that was added above makes it on average 100 assert res[-2] == 50.25 #in the bootstrap situation, we have 336 timeslots and for some reason start at 360. Therefore, it's 24h "lost" res = self.env_mgr.get_historical_prices(200 + 24) assert(len(res)) == 168 assert res[-1] == res[-24] assert res[0] == 224-168 def test_handle_predictions(self): self.agent_mock.forward.return_value = ([0,0], None, None) #create some active timeslots --> active environments with patch.object(self.env_mgr, 'get_historical_prices') as hp_mock: hp_mock.return_value = np.zeros(168) self.env_mgr.handle_timeslot_update(None, None, PBTimeslotUpdate(firstEnabled=169, lastEnabled=169 + 24)) #some mock preds preds:List[CustomerPredictions] = [] for i in range(3): cp = CustomerPredictions("jim{}".format(i), np.arange(24), 169) preds.append(cp) #call self.env_mgr.handle_predictions(None, None, preds) #assert #assert some orders being sent to server via submitservice arg = self.agent_mock.forward.call_args assert isinstance(arg[0][0], PowerTacEnv) def test_handle_cleared_trade(self): self.env_mgr.handle_timeslot_update(None, None, PBTimeslotUpdate(firstEnabled=1, lastEnabled=1)) msg = PBClearedTrade(timeslot=1, executionMWh=2, executionPrice=3) self.env_mgr.handle_cleared_trade(None, None, msg) self.env_mgr.handle_cleared_trade(None, None, msg) self.env_mgr.handle_cleared_trade(None, None, msg) assert len(self.env_mgr.historical_average_prices[1]) == 3 def test_get_sums_from_preds(self): preds = [] for i in range(5): vals = np.zeros(24) vals.fill(i) pred = CustomerPredictions("john", vals, first_ts=1) preds.append(pred) sums = self.env_mgr.get_sums_from_preds(preds) expected = {i: 10 for i in range(1, 25)} for i in expected: assert expected[i] == sums[i] def test_handle_market_bootstrap_data(self): mWh = np.arange(360) price = np.arange(360) * 10 mbd = PBMarketBootstrapData(mwh=mWh, marketPrice=price) self.env_mgr.handle_market_bootstrap_data(None, None, mbd) for i in range(360): assert self.env_mgr.historical_average_prices[i][0][0] == i assert self.env_mgr.historical_average_prices[i][0][1] == i * 10 def test_multiple_coroutines(self): """A test for myself. Learning how to use coroutines.""" cr = [] received = [] #creating two callables and calling one from other def callable2(): observation = (yield) received.append(observation) def callable1(): return callable2() #run this a couple times, I wanna see if I can have many generators for i in range(5): coro = callable1() cr.append(coro) #assume they are all generators for i in range(5): assert isinstance(cr[i], Generator) #now let's pass them all some observations for i in range(5): assert len(received) < i+1 next(cr[i]) try: cr[i].send(i) except StopIteration as e: #a generator throws a StopIteration when it is completed pass assert len(received) == i+1