def get_re_experiment(case, minor=1): """ Returns an experiment that uses the Roth-Erev learning method. """ gen = case.generators profile = array([1.0]) maxSteps = len(profile) experimentation = 0.55 recency = 0.3 tau = 100.0 decay = 0.99#9995 market = pyreto.SmartMarket(case, priceCap=cap, decommit=decommit, auctionType=auctionType) experiment = pyreto.continuous.MarketExperiment([], [], market, profile) for g in gen[0:2]: #learner = RothErev(experimentation, recency) learner = VariantRothErev(experimentation, recency) learner.explorer = BoltzmannExplorer(tau, decay) task, agent = get_discrete_task_agent([g], market, nStates, nOffer, markups, withholds, maxSteps, learner) experiment.tasks.append(task) experiment.agents.append(agent) task1, agent1 = get_zero_task_agent(gen[2:3], market, nOffer, maxSteps) experiment.tasks.append(task1) experiment.agents.append(agent1) return experiment
def get_re_experiment(case, minor=1): """ Returns an experiment that uses the Roth-Erev learning method. """ locAdj = "ac" experimentation = 0.55 recency = 0.3 tau = 100.0 decay = 0.999 nStates = 3 # stateless RE? Pd0 = get_pd_max(case, profile) Pd_min = get_pd_min(case, profile) market = pyreto.SmartMarket(case, priceCap=cap, decommit=decommit, auctionType=auctionType, locationalAdjustment=locAdj) experiment = pyreto.continuous.MarketExperiment([], [], market) portfolios, sync_cond = get_portfolios3() for gidx in portfolios: g = [case.generators[i] for i in gidx] learner = VariantRothErev(experimentation, recency) learner.explorer = BoltzmannExplorer(tau, decay) task, agent = get_discrete_task_agent(g, market, nStates, nOffer, markups, withholds, maxSteps, learner, Pd0, Pd_min) print "ALL ACTIONS:", len(task.env._allActions) * nStates experiment.tasks.append(task) experiment.agents.append(agent) passive = [case.generators[i] for i in sync_cond] passive[0].p_min = 0.001 # Avoid invalid offer withholding. passive[0].p_max = 0.002 task, agent = get_zero_task_agent(passive, market, 1, maxSteps) experiment.tasks.append(task) experiment.agents.append(agent) return experiment
def case6ww1(): case = pylon.Case.load(join("..", "data", "case6ww.pkl")) case.generators[0].p_cost = (0.0, 5.0, 200.0) case.generators[1].p_cost = (0.0, 6.5, 200.0) case.generators[2].p_cost = (0.0, 2.0, 200.0) case.generators[0].c_shutdown = 100.0 #case.generators[0].p_min = 0.0 # TODO: Unit-decommitment. #case.generators[1].p_min = 0.0 ##case.generators[2].p_min = 0.0 case.generators[0].p_max = 100.0 case.generators[1].p_max = 70.0 case.generators[2].p_max = 70.0 vre = VariantRothErev(experimentation=0.55, recency=0.3) vre.explorer = BoltzmannExplorer()#tau=100, decay=0.95) learners = [vre, Q(), Reinforce()] profile = [0.9, 0.6] m = (20, 75) # markups nb = 1 # no. offers ns = 3 # no. states mx = 60.0 # max markup weeks = 2 days = 2 outdir = "/tmp/case6ww1" dc = True trials = 1 for i in range(trials): roundrobin(case, learners, profile, m, nb, ns, mx, weeks, days, outdir, dc, trial=i)
market = pyreto.SmartMarket(case, priceCap=cap, decommit=decommit, auctionType=auctionType, locationalAdjustment=locAdj) experiment = pyreto.continuous.MarketExperiment([], [], market) portfolios, sync_cond = get_portfolios3() for gidx in portfolios: g = [case.generators[i] for i in gidx] learner = VariantRothErev(experimentation, recency) learner.explorer = BoltzmannExplorer(tau, decay) task, agent = get_discrete_task_agent(g, market, nStates, nOffer, markups, withholds, maxSteps, learner, Pd0, Pd_min) print "ALL ACTIONS:", len(task.env._allActions) * nStates experiment.tasks.append(task) experiment.agents.append(agent) passive = [case.generators[i] for i in sync_cond] passive[0].p_min = 0.001 # Avoid invalid offer withholding. passive[0].p_max = 0.002 task, agent = get_zero_task_agent(passive, market, 1, maxSteps) experiment.tasks.append(task)
distrib = scipy.array([[0.7, 0.2, 0.1], [0.1, 0.6, 0.3], [0.4, 0.2, 0.3], [0.5, 0.45, 0.05], [0.3, 0.5, 0.2]]) env = BanditEnvironment(payouts, distrib) task = BanditTask(env) table = PropensityTable(payouts.shape[0]) table.initialize(500.0) #learner = RothErev(experimentation=0.55, recency=0.3) learner = VariantRothErev(experimentation=0.65, recency=0.3) learner.explorer = BoltzmannExplorer(tau=100.0, decay=0.9995) agent = LearningAgent(table, learner) experiment = Experiment(task, agent) epis = int(1e1) batch = 2 avgRewards = scipy.zeros(epis) allActions = scipy.zeros(epis * batch) c = 0 for i in range(epis): experiment.doInteractions(batch) avgRewards[i] = scipy.mean(agent.history["reward"]) allActions[c:c + batch] = agent.history["action"].flatten() + 1 agent.learn()
case.generators[0].p_cost = (0.0, 5.0, 200.0) case.generators[1].p_cost = (0.0, 6.5, 200.0) case.generators[2].p_cost = (0.0, 2.0, 200.0) case.generators[0].c_shutdown = 100.0 #case.generators[0].p_min = 0.0 # TODO: Unit-decommitment. #case.generators[1].p_min = 0.0 ##case.generators[2].p_min = 0.0 case.generators[0].p_max = 100.0 case.generators[1].p_max = 70.0 case.generators[2].p_max = 70.0 vre = VariantRothErev(experimentation=0.55, recency=0.3) vre.explorer = BoltzmannExplorer() #tau=100, decay=0.95) learners = [vre, Q(), Reinforce()] profile = [0.9, 0.6] m = (20, 75) # markups nb = 1 # no. offers ns = 3 # no. states mx = 60.0 # max markup weeks = 2 days = 2 outdir = "/tmp/case6ww1" dc = True
[700.0, 800.0, 900.0] ]) # Expected value: 790 distrib = scipy.array([[0.7, 0.2, 0.1], [0.1, 0.6, 0.3], [0.4, 0.2, 0.3], [0.5, 0.45, 0.05], [0.3, 0.5, 0.2]]) env = BanditEnvironment(payouts, distrib) task = BanditTask(env) table = PropensityTable(payouts.shape[0]) table.initialize(500.0) #learner = RothErev(experimentation=0.55, recency=0.3) learner = VariantRothErev(experimentation=0.65, recency=0.3) learner.explorer = BoltzmannExplorer(tau=100.0, decay=0.9995) agent = LearningAgent(table, learner) experiment = Experiment(task, agent) epis = int(1e1) batch = 2 avgRewards = scipy.zeros(epis) allActions = scipy.zeros(epis * batch) c = 0 for i in range(epis): experiment.doInteractions(batch) avgRewards[i] = scipy.mean(agent.history["reward"]) allActions[c:c + batch] = agent.history["action"].flatten() + 1 agent.learn()