def get_q_experiment(case, minor=1): locAdj = "ac" nStates = 6 alpha = 0.2 # Learning rate. gamma = 0.99 # Discount factor # The closer epsilon gets to 0, the more greedy and less explorative. epsilon = 0.9 decay = 0.999 Pd0 = get_pd_max(case, profile) Pd_min = get_pd_min(case, profile) market = pyreto.SmartMarket(case, priceCap=cap, decommit=decommit, auctionType=auctionType, locationalAdjustment=locAdj) experiment = pyreto.continuous.MarketExperiment([], [], market) portfolios, sync_cond = get_portfolios3() for gidx in portfolios: g = [case.generators[i] for i in gidx] learner = Q(alpha, gamma) learner.explorer.epsilon = epsilon learner.explorer.decay = decay task, agent = get_discrete_task_agent(g, market, nStates, nOffer, markups, withholds, maxSteps, learner, Pd0, Pd_min) print "ALL ACTIONS:", len(task.env._allActions) * nStates experiment.tasks.append(task) experiment.agents.append(agent) passive = [case.generators[i] for i in sync_cond] passive[0].p_min = 0.001 # Avoid invalid offer withholding. passive[0].p_max = 0.002 task, agent = get_zero_task_agent(passive, market, 1, maxSteps) experiment.tasks.append(task) experiment.agents.append(agent) return experiment
def get_re_experiment(case, minor=1): """ Returns an experiment that uses the Roth-Erev learning method. """ locAdj = "ac" experimentation = 0.55 recency = 0.3 tau = 100.0 decay = 0.999 nStates = 3 # stateless RE? Pd0 = get_pd_max(case, profile) Pd_min = get_pd_min(case, profile) market = pyreto.SmartMarket(case, priceCap=cap, decommit=decommit, auctionType=auctionType, locationalAdjustment=locAdj) experiment = pyreto.continuous.MarketExperiment([], [], market) portfolios, sync_cond = get_portfolios3() for gidx in portfolios: g = [case.generators[i] for i in gidx] learner = VariantRothErev(experimentation, recency) learner.explorer = BoltzmannExplorer(tau, decay) task, agent = get_discrete_task_agent(g, market, nStates, nOffer, markups, withholds, maxSteps, learner, Pd0, Pd_min) print "ALL ACTIONS:", len(task.env._allActions) * nStates experiment.tasks.append(task) experiment.agents.append(agent) passive = [case.generators[i] for i in sync_cond] passive[0].p_min = 0.001 # Avoid invalid offer withholding. passive[0].p_max = 0.002 task, agent = get_zero_task_agent(passive, market, 1, maxSteps) experiment.tasks.append(task) experiment.agents.append(agent) return experiment