Example #1
0
class TestIntegration(unittest.TestCase):

    def setUp(self):
        self.mm = Mock()
        self.mm.predict.return_value = np.arange(0,24, dtype=np.float64).reshape((1,24))
        self.e = Estimator(self.mm)
        self.e.customer_counts["jim"] = 1
        self.e.customer_populations["jim"] = 1
        self.e.subscribe()

        #mocking some values in the estimator
        for i in range(TIMESLOT_NOW):
            self.e._apply_usage('jim', i,i)
        self.e.scalers['jim'] = MinMaxScaler().fit(np.array([1,1000],dtype=np.float64).reshape(-1,1))

        #create a new env manager
        self.reward_mock = Mock()
        self.reward_mock.return_value = 0

        self.wem = WholesaleEnvironmentManager(Mock(), self.reward_mock)
        self.wem.get_historical_prices = Mock()
        self.wem.get_historical_prices.return_value = np.zeros(168)
        self.wem.subscribe()

    def tearDown(self):
        self.e.unsubscribe()
        self.wem.unsubscribe()

    def test_estimator_wholesale_integration(self):
        # the integration actually requires a proper Model
        self.wem.agent = BaselineTrader()
        #listen for predictions calculated
        predictions_received: List[List[CustomerPredictions]] = []
        def listen_pred_ev(signal, sender, msg):
            predictions_received.append(msg)
        dispatcher.connect(listen_pred_ev, signals.COMP_USAGE_EST)
        #listen for wholesale orders
        orders_received = []
        def listen_orders_ev(signal, sender, msg):
            orders_received.append(msg)
        dispatcher.connect(listen_orders_ev, signals.OUT_PB_ORDER)

        # 1. send some market messages that get picked up by wholesale
        dispatcher.send(signal=signals.PB_TIMESLOT_UPDATE, msg=PBTimeslotUpdate(firstEnabled=TIMESLOT_NOW, lastEnabled=TIMESLOT_NOW+24))
        # no cleared trades or market transactions necessary for core test
        assert len(orders_received) == 0

        # 2. send pubsub messages that trigger prediction
        dispatcher.send(signal=signals.PB_TIMESLOT_COMPLETE, msg=PBTimeslotComplete(timeslotIndex=TIMESLOT_NOW-1))
        assert len(predictions_received) == 1
        #predictions are lists of predictions for each customer
        assert len(predictions_received[0][0].predictions) == 24

        # 3.5 meanwhile the wholesale agent reacted to the predictions and sent its orders
        # 3. expect wholesale market to react to prediction
        assert len(orders_received) == 24

        # clean up listeners
        dispatcher.disconnect(listen_pred_ev, signals.COMP_USAGE_EST)
        dispatcher.disconnect(listen_orders_ev, signals.OUT_PB_ORDER)
 def start(self, max_games=None):
     """Starts the learning, with control coming 'from the server', not from the agent"""
     # take random game
     # start stepping through timeslots, 24 at a time
     while self.game_numbers:
         self.reward_count = 0
         self.reward_average = 0
         # pops one from numbers
         self.new_game()
         self.current_timestep = self.get_first_timestep()
         self.first_timestep = self.current_timestep
         # create env_manager
         self.env_manager = WholesaleEnvironmentManager(
             self.agent, self.reward_function)
         self.env_manager.subscribe()
         self.step_game()
         # while self.current_timestep < self.wholesale_data
         self.games_played += 1
         if max_games and max_games <= self.games_played:
             break
     return self.reward_average
Example #3
0
    def setUp(self):
        self.mm = Mock()
        self.mm.predict.return_value = np.arange(0,24, dtype=np.float64).reshape((1,24))
        self.e = Estimator(self.mm)
        self.e.customer_counts["jim"] = 1
        self.e.customer_populations["jim"] = 1
        self.e.subscribe()

        #mocking some values in the estimator
        for i in range(TIMESLOT_NOW):
            self.e._apply_usage('jim', i,i)
        self.e.scalers['jim'] = MinMaxScaler().fit(np.array([1,1000],dtype=np.float64).reshape(-1,1))

        #create a new env manager
        self.reward_mock = Mock()
        self.reward_mock.return_value = 0

        self.wem = WholesaleEnvironmentManager(Mock(), self.reward_mock)
        self.wem.get_historical_prices = Mock()
        self.wem.get_historical_prices.return_value = np.zeros(168)
        self.wem.subscribe()
Example #4
0
def compete(continuous, demand_model, wholesale_model):
    """take part in a powertac competition"""

    from agent_components.wholesale.environments.WholesaleEnvironmentManager import WholesaleEnvironmentManager
    from agent_components.wholesale.learning.baseline import BaselineTrader
    from agent_components.demand.estimator import Estimator
    from agent_components.tariffs.publisher import TariffPublisher
    from util.learning_utils import ModelWriter
    from communication import messages_cache

    # bootstrapping logging and caching of messages
    messages_cache.subscribe()

    # bootstrapping models from stored data
    model = ModelWriter(demand_model, False).load_model()
    estimator = Estimator(model)
    estimator.subscribe()

    # TODO wholesale_trader dynamic loading
    ws_agent = BaselineTrader()
    wholesale = WholesaleEnvironmentManager(ws_agent, None)
    wholesale.subscribe()

    # simple tariff mirroring
    publisher = TariffPublisher()
    publisher.subscribe()

    # GRPC comm with powertac
    import communication.powertac_communication_server as grpc_com

    # subscribing to outgoing messages
    grpc_com.submit_service.subscribe()

    # main comm thread
    grpc_server = grpc_com.serve()
    try:
        while True:
            time.sleep(1)
    except KeyboardInterrupt:
        grpc_server.stop(0)
class LogEnvManagerAdapter(SignalConsumer):
    """This class simulates a powertac trading environment but is based on logs of historical games.  It assumes that
    the broker actions have no impact on the clearing price which is a reasonable estimation for any market that has a
    large enough volume in relation to the broker trading volume. Of course this does not apply once the broker is large
    enough to itself have an influence on the clearing prices. In PowerTAC, the broker wil actually have a significant
    impact on the prices. Therefore this is an optimistic first stage learner for the broker. It will allow it to learn
    a certain base amount but will underperform once it acts in a live environment.

    The basic skills the broker learns in the wholesale trading are as follows:

    - based on a (changing) demand forecast, try to equalize the portfolio so that the broker doesn't incur any
      balancing costs by the DU
    - try to pay as little as possible for the energy needed at timeslot x. Buying earlier is cheaper but riskier

    These two goals are reasoned by the following assumptions: The wholesale trader has no influence on the amount of
    energy needed by its customers.  This is a partial truth because some brokers may be able to curtail their customers
    usage if market prices are too high and the cost of curtailing the customer is valued less than the cost of
    purchasing and delivering the energy. Because the current broker implementation does not make use of this ability,
    the assumption is correct.  Another assumption is the idea of the agents actions not influencing the clearing price.
    The server logs suggest clearing amounts of low two digit megawatt per timeslot.  If the broker simply tries to
    predict small amounts of energy, this assumption is appropriate. A broker that only represents a few dozen private
    households would therefore trade small kilowatt amounts per timeslot, barely influencing the market prices. An
    on-policy RL agent may therefore still learn successfully, despite the fact that the environment doesn't *actually*
    react to its actions.

    To allow the broker to learn with offline files, the following process is taken:

    - Creation of market price statistics with the `org.powertac.logtool.example.MktPriceStats` class
    - Creation of usage data with the `org.powertac.logtool.example.CustomerProductionConsumption` class
    - selecting a small set of customers as a permanent customer portfolio for the broker
    - passing observations to the agent
        - predictions from the demand predictor or alternatively, a true prediction (i.e. the real value) or a noisy
          prediction where the noise may be adapted
        - historical market clearing prices
        - rewards based on reward calculation function

    """
    def __init__(self, agent: PowerTacWholesaleAgent, reward_function):

        # handling params
        self.agent = agent
        self.games_played = 0
        self.env_manager: WholesaleEnvironmentManager = None
        self.step_rewards = 0
        self.reward_function = reward_function

        self.orders: List[PBOrder] = []

        # current timestep. the next X are open for trading (X set in config)
        self.current_timestep = 0
        self.first_timestep = self.current_timestep

        # ---------------------------------------------------------------
        # base data to generate the mdp from. Needs to be reset on new game base data
        # ---------------------------------------------------------------
        # self.initial_timeslot = 0
        self.wholesale_data = {}
        self.demand_data = None  # careful here, its kWh!

        # ---------------------------------------------------------------
        # stays until training is completed
        # ---------------------------------------------------------------
        # for mocking the market with the log files
        self._wholesale_files = get_wholesale_file_paths()
        self._demand_files = get_usage_file_paths()
        self.game_numbers = self._make_random_game_order()

        self.reward_average = 0
        self.reward_count = 0

    def subscribe(self):
        # need to catch all orders and determine if they lead to clearing
        dispatcher.connect(self.handle_order, signal=signals.OUT_PB_ORDER)
        dispatcher.connect(self.handle_reward, signal=signals.COMP_WS_REWARD)

    def unsubscribe(self):
        dispatcher.disconnect(self.handle_order, signal=signals.OUT_PB_ORDER)

    def handle_order(self, sender, signal, msg: PBOrder):
        self.orders.append(msg)

    def handle_reward(self, sender, signal, msg: float):
        now = self.reward_count
        next = self.reward_count + 1

        self.reward_average = self.reward_average * (now / next) + msg * (1 /
                                                                          next)
        self.reward_count += 1

    def start(self, max_games=None):
        """Starts the learning, with control coming 'from the server', not from the agent"""
        # take random game
        # start stepping through timeslots, 24 at a time
        while self.game_numbers:
            self.reward_count = 0
            self.reward_average = 0
            # pops one from numbers
            self.new_game()
            self.current_timestep = self.get_first_timestep()
            self.first_timestep = self.current_timestep
            # create env_manager
            self.env_manager = WholesaleEnvironmentManager(
                self.agent, self.reward_function)
            self.env_manager.subscribe()
            self.step_game()
            # while self.current_timestep < self.wholesale_data
            self.games_played += 1
            if max_games and max_games <= self.games_played:
                break
        return self.reward_average

    def step_game(self):
        """loop per game. simulates all events coming from server and by listening to the PBOrder events,
        responds to agent actions"""
        while self.wholesale_data:
            # evaluate any orders received in previous step and send PBMarketTransaction
            self.evaluate_orders_received()
            # normally triggers demand forecasting --> predictions
            self.simulate_timeslot_complete()
            # ----- Timeslot cut -----
            # send out Transactions by customers
            self.simulate_tariff_transactions()
            # send out PBTimeslotUpdate --> triggers wholesale backward learning cycle
            self.simulate_timeslot_update()
            # send out Predictions based on DemandData --> triggers wholesale trader (forward)
            self.simulate_predictions()
            # send out PBClearedTrade for the next 24h timesteps
            self.simulate_cleared_trade()

            # simulate balancing_transactions
            self.simulate_balancing_transactions()

            # the stepping is sort of "half dependent" on previous data.
            # in Timestep 363, ClearedTrades and MarketTransactions that refer to 362 are given out.
            # therefore at THE END of the step, the PREVIOUS timestep data is deleted
            if self.current_timestep - 1 in self.wholesale_data:
                del self.wholesale_data[self.current_timestep - 1]
            self.current_timestep += 1

    def new_game(self):
        """load data for new game into object"""
        # get new game number
        if not self.game_numbers:
            self.game_numbers = self._make_random_game_order()
        if hasattr(cfg, 'WHOLESALE_OFFLINE_TRAIN_GAME'):
            # only using this one game!
            gn = cfg.WHOLESALE_OFFLINE_TRAIN_GAME
        else:
            gn = self.game_numbers.pop()
        # getting data and storing it locally
        self.make_data_for_game(gn)

    def make_data_for_game(self, i):
        wholesale_file_path = self._wholesale_files[i]
        self.make_wholesale_data(wholesale_file_path)

        demand_file_path = self._demand_files[i]
        self.make_demand_data(demand_file_path)

    def make_wholesale_data(self, wholesale_file_path):
        with open(wholesale_file_path) as file:
            wholesale_data = parse_wholesale_file(file)
        for ts in wholesale_data:
            timestep = ts[0]
            data = np.array(ts[3:])
            self.wholesale_data[timestep] = data

    def make_demand_data(self, demand_file_path):
        # let's reuse this
        # resetting first
        demand_data.clear()
        # getting unscaled predictions
        demand_data.parse_usage_game_log(demand_file_path, pp_type='none')
        demand = demand_data.get_demand_data_values()
        # using only random 30 picks from customers
        idx = np.random.randint(0, high=len(demand), size=30)
        demand = demand[idx, :]

        # make the demand smaller (1/10th) to simulate the broker only having 1/10th of the selected customers demand.
        # this is because a large portion of the customer demand is actually generated by population scale models.
        # and the broker only gets a part of that demand
        demand = demand / 10

        self.demand_data = demand

    def _make_random_game_order(self):
        # whichever is shorter.
        max_game = len(self._wholesale_files) if len(
            self._wholesale_files) < len(self._demand_files) else len(
                self._demand_files)
        game_numbers = list(range(1, max_game))
        if cfg.WHOLESALE_OFFLINE_TRAIN_RANDOM_GAME:
            # mix up all the game numbers
            # for reproducability and comparability, only shuffling when set in config
            random.shuffle(game_numbers)
        return game_numbers

    def get_first_timestep(self):
        return np.array(list(self.wholesale_data.keys())).min()

    def simulate_cleared_trade(self):
        """simulates the sending of PBClearedTrade messages for the next [t-1,t-1+24] timesteps"""
        last_step = self.current_timestep - 1
        cleared_steps = list(
            range(last_step,
                  last_step + cfg.WHOLESALE_OPEN_FOR_TRADING_PARALLEL))
        for i, s in enumerate(cleared_steps):
            if s not in self.wholesale_data:
                break
            data = self.wholesale_data[s]
            # going from the back because the first cleared_steps step is cleared at its last clearing
            cleared_data = data[23 - i]
            trade = PBClearedTrade(timeslot=s,
                                   executionMWh=cleared_data[0],
                                   executionPrice=cleared_data[1])
            dispatcher.send(signals.PB_CLEARED_TRADE, msg=trade)

    def simulate_timeslot_update(self):
        """Simulates the TimeslotUpdate message"""
        now = self.current_timestep
        dispatcher.send(
            signals.PB_TIMESLOT_UPDATE,
            msg=PBTimeslotUpdate(firstEnabled=now,
                                 lastEnabled=now +
                                 cfg.WHOLESALE_OPEN_FOR_TRADING_PARALLEL))

    def simulate_balancing_transactions(self):
        # get env where the final step has been completed
        envs = [
            e for e in self.env_manager.environments.values() if e._step >= 24
        ]
        if not envs:
            return
        env: PowerTacEnv = envs[0]
        tx = self.generate_du_balancing_tx(env)
        dispatcher.send(signals.PB_BALANCING_TRANSACTION, msg=tx)

    def generate_du_balancing_tx(self,
                                 env: PowerTacEnv) -> PBBalancingTransaction:
        """Helper function that simulates the DU fee for offline based training"""
        market_trades = [[tr.executionMWh, tr.executionPrice]
                         for tr in env.cleared_trades]
        realized_usage = env.realized_usage
        average_market = calculate_running_averages(np.array([market_trades
                                                              ]))[0]
        balancing_needed = calculate_balancing_needed_obj(
            env.purchases, realized_usage)
        #log.info("balancing needed for target ts {}  -- {}".format(env._target_timeslot, balancing_needed))

        # seen as a "forced transaction" of similar logic as the Market TX
        du_trans = []
        if balancing_needed > 0:
            # being forced to buy for 5x the market price! try and get your kWh in ahead of time is what it learns
            du_trans = [balancing_needed, -1 * average_market * 5]
        if balancing_needed < 0:
            # getting only a 0.5 of what the normal market price was
            du_trans = [balancing_needed,
                        0.5 * average_market]  # TODO to config
        if balancing_needed == 0:
            du_trans = [0, 0]
        #but the BalancingTX is actually kWh and the energy sign is reverse (positive for surplus, not negative)
        return PBBalancingTransaction(postedTimeslot=env._target_timeslot,
                                      kWh=du_trans[0] * 1000 * -1,
                                      charge=du_trans[1])

    def evaluate_orders_received(self):
        """Evaluate order and check if it should be cleared"""
        cleared_mask = []
        for o in self.orders:

            # ignore orders at the end of a game
            if o.timeslot not in self.wholesale_data:
                continue
            distance = o.timeslot - self.current_timestep
            ts_data = self.wholesale_data[o.timeslot]
            market_clearing = ts_data[distance]
            cleared, prob = is_cleared_with_volume_probability(
                o, market_clearing)
            cleared_mask.append((cleared, distance))
            if cleared:
                # price is positive only when mWh is smaller 0
                price = market_clearing[
                    1] * -1 if o.mWh > 0 else market_clearing[1]
                volume_received = o.mWh * prob  # assuming we only get a part of what we want
                # sending out message
                dispatcher.send(signal=signals.PB_MARKET_TRANSACTION,
                                msg=PBMarketTransaction(
                                    price=market_clearing[1],
                                    mWh=volume_received,
                                    timeslot=o.timeslot))
            else:
                # not cleared
                pass
        log.info("Cleared timesteps: " +
                 ' '.join([str(i[1]) for i in cleared_mask if i[0]]))
        self.orders = []

    def simulate_timeslot_complete(self):
        dispatcher.send(
            signals.PB_TIMESLOT_COMPLETE,
            PBTimeslotComplete(timeslotIndex=self.current_timestep - 1))

    def simulate_predictions(self):
        dd = self.demand_data
        fts = self.first_timestep
        start = self.current_timestep + 1
        end = start + cfg.DEMAND_FORECAST_DISTANCE
        demand_data = dd[:, start - fts:end - fts]
        demand_data = demand_data / 1000  # dividing by 1000 to turn kWh into mWh
        if cfg.WHOLESALE_FORECAST_ERROR_PER_TS > 0:
            demand_data = np.array([
                fuzz_forecast_for_training(customer_data)
                for customer_data in demand_data
            ])
        preds = []
        for cust_number, customer_data in enumerate(demand_data):
            customer_pred_obj = CustomerPredictions(
                "customer{}".format(cust_number),
                predictions=customer_data,
                first_ts=start)
            preds.append(customer_pred_obj)
        dispatcher.send(signals.COMP_USAGE_EST, msg=preds)

    def simulate_tariff_transactions(self):
        """sends out simulated timeslot """
        timestep = self.current_timestep - self.first_timestep
        if len(self.demand_data[0]) < timestep:
            return
        usages = self.demand_data[:, timestep]
        for u in usages:
            if u < 0:
                t = CONSUME
            else:
                t = PRODUCE
            dispatcher.send(signals.PB_TARIFF_TRANSACTION,
                            msg=PBTariffTransaction(
                                txType=t,
                                kWh=u,
                                postedTimeslot=self.current_timestep))
 def setUp(self):
     self.agent_mock = Mock()
     self.reward_mock = Mock()
     self.reward_mock.return_value = 0
     self.env_mgr = WholesaleEnvironmentManager(agent=self.agent_mock, reward_function=self.reward_mock)
class TestWholesaleEnvironmentManager(unittest.TestCase):
    def setUp(self):
        self.agent_mock = Mock()
        self.reward_mock = Mock()
        self.reward_mock.return_value = 0
        self.env_mgr = WholesaleEnvironmentManager(agent=self.agent_mock, reward_function=self.reward_mock)

    def test_handle_market_transaction(self):
        trans = PBMarketTransaction(timeslot=1, mWh=2, price=-12)
        env_mock = Mock()
        self.env_mgr.environments[1] = env_mock
        self.env_mgr.handle_market_transaction(None, None, trans)
        env_mock.handle_market_transaction.assert_called_with(trans)
        #should err if the env hasn't been added yet
        self.assertRaises(Exception, self.env_mgr.handle_market_transaction, [None, None, PBMarketTransaction(timeslot=2)])

    def test_handle_timeslot_update(self):
        update = PBTimeslotUpdate(firstEnabled=1, lastEnabled=24)
        self.env_mgr.environments[0] = Mock()
        self.env_mgr.environments[0].predictions = [1]
        self.env_mgr.environments[0].purchases = [PBMarketTransaction(mWh=1, price=2)]
        self.env_mgr.environments[0].actions = [[1,2]]
        self.env_mgr.agent = "agent"
        with patch.object(self.env_mgr, 'get_historical_prices') as hp_mock:
            hp_mock.return_value = [1,2,3]
            self.env_mgr.handle_timeslot_update(None, None, update)
    #adds the new ones
        assert 1 in self.env_mgr.environments
        assert 24 in self.env_mgr.environments
        #removes the old ones
        assert 0 not in self.env_mgr.environments
        # historicals added to the new ones
        assert list(self.env_mgr.environments[1]._historical_prices) == [1, 2, 3]
        # agent set on the new ones
        assert self.env_mgr.environments[1].agent == "agent"
        assert len(self.env_mgr.environments.keys()) == 24


    def test_historical_prices(self):
        for i in range(200):
            self.env_mgr.append_historical(PBClearedTrade(timeslot=i, executionPrice=i, executionMWh=1))
        #one extra to show the averaging works
        self.env_mgr.append_historical(PBClearedTrade(timeslot=198, executionPrice=1, executionMWh=3))
        #for the timeslot just after all historical pricesj
        res = self.env_mgr.get_historical_prices(200)
        assert len(res) == 168
        assert res[0] == 200-168
        assert res[-1] == 199
        #the extra one that was added above makes it on average 100
        assert res[-2] == 50.25

        #in the bootstrap situation, we have 336 timeslots and for some reason start at 360. Therefore, it's 24h "lost"
        res = self.env_mgr.get_historical_prices(200 + 24)
        assert(len(res)) == 168
        assert res[-1] == res[-24]
        assert res[0] == 224-168


    def test_handle_predictions(self):
        self.agent_mock.forward.return_value = ([0,0], None, None)
        #create some active timeslots --> active environments
        with patch.object(self.env_mgr, 'get_historical_prices') as hp_mock:
            hp_mock.return_value = np.zeros(168)
            self.env_mgr.handle_timeslot_update(None, None, PBTimeslotUpdate(firstEnabled=169, lastEnabled=169 + 24))
        #some mock preds
        preds:List[CustomerPredictions] = []
        for i in range(3):
            cp = CustomerPredictions("jim{}".format(i), np.arange(24), 169)
            preds.append(cp)
        #call
        self.env_mgr.handle_predictions(None, None, preds)
        #assert
        #assert some orders being sent to server via submitservice
        arg = self.agent_mock.forward.call_args
        assert isinstance(arg[0][0], PowerTacEnv)

    def test_handle_cleared_trade(self):
        self.env_mgr.handle_timeslot_update(None, None, PBTimeslotUpdate(firstEnabled=1, lastEnabled=1))
        msg = PBClearedTrade(timeslot=1, executionMWh=2, executionPrice=3)
        self.env_mgr.handle_cleared_trade(None, None, msg)
        self.env_mgr.handle_cleared_trade(None, None, msg)
        self.env_mgr.handle_cleared_trade(None, None, msg)
        assert len(self.env_mgr.historical_average_prices[1]) == 3


    def test_get_sums_from_preds(self):
        preds = []
        for i in range(5):
            vals = np.zeros(24)
            vals.fill(i)
            pred = CustomerPredictions("john", vals, first_ts=1)
            preds.append(pred)
        sums = self.env_mgr.get_sums_from_preds(preds)
        expected = {i: 10 for i in range(1, 25)}
        for i in expected:
            assert expected[i] == sums[i]

    def test_handle_market_bootstrap_data(self):
        mWh = np.arange(360)
        price = np.arange(360) * 10
        mbd = PBMarketBootstrapData(mwh=mWh, marketPrice=price)
        self.env_mgr.handle_market_bootstrap_data(None, None, mbd)
        for i in range(360):
            assert self.env_mgr.historical_average_prices[i][0][0] == i
            assert self.env_mgr.historical_average_prices[i][0][1] == i * 10




    def test_multiple_coroutines(self):
        """A test for myself. Learning how to use coroutines."""
        cr = []
        received = []

        #creating two callables and calling one from other
        def callable2():
            observation = (yield)
            received.append(observation)

        def callable1():
            return callable2()

        #run this a couple times, I wanna see if I can have many generators
        for i in range(5):
            coro = callable1()
            cr.append(coro)

        #assume they are all generators
        for i in range(5):
            assert isinstance(cr[i], Generator)

        #now let's pass them all some observations
        for i in range(5):
            assert len(received) < i+1
            next(cr[i])
            try:
                cr[i].send(i)
            except StopIteration as e:
                #a generator throws a StopIteration when it is completed
                pass

            assert len(received) == i+1