def get_re_experiment(case, minor=1): """ Returns an experiment that uses the Roth-Erev learning method. """ gen = case.generators profile = array([1.0]) maxSteps = len(profile) experimentation = 0.55 recency = 0.3 tau = 100.0 decay = 0.99#9995 market = pyreto.SmartMarket(case, priceCap=cap, decommit=decommit, auctionType=auctionType) experiment = pyreto.continuous.MarketExperiment([], [], market, profile) for g in gen[0:2]: #learner = RothErev(experimentation, recency) learner = VariantRothErev(experimentation, recency) learner.explorer = BoltzmannExplorer(tau, decay) task, agent = get_discrete_task_agent([g], market, nStates, nOffer, markups, withholds, maxSteps, learner) experiment.tasks.append(task) experiment.agents.append(agent) task1, agent1 = get_zero_task_agent(gen[2:3], market, nOffer, maxSteps) experiment.tasks.append(task1) experiment.agents.append(agent1) return experiment
def get_q_experiment(case, minor=1): """ Returns an experiment that uses Q-learning. """ gen = case.generators profile = array([1.0]) maxSteps = len(profile) if minor == 1: alpha = 0.3 # Learning rate. gamma = 0.99 # Discount factor # The closer epsilon gets to 0, the more greedy and less explorative. epsilon = 0.9 decay = 0.97 tau = 150.0 # Boltzmann temperature. qlambda = 0.9 elif minor == 2: alpha = 0.1 # Learning rate. gamma = 0.99 # Discount factor # The closer epsilon gets to 0, the more greedy and less explorative. epsilon = 0.9 decay = 0.99 tau = 150.0 # Boltzmann temperature. qlambda = 0.9 else: raise ValueError market = pyreto.SmartMarket(case, priceCap=cap, decommit=decommit, auctionType=auctionType) experiment = pyreto.continuous.MarketExperiment([], [], market, profile) for g in gen[0:2]: learner = Q(alpha, gamma) # learner = QLambda(alpha, gamma, qlambda) # learner = SARSA(alpha, gamma) learner.explorer.epsilon = epsilon learner.explorer.decay = decay # learner.explorer = BoltzmannExplorer(tau, decay) task, agent = get_discrete_task_agent([g], market, nStates, nOffer, markups, withholds, maxSteps, learner) experiment.tasks.append(task) experiment.agents.append(agent) # Passive agent. task, agent = get_zero_task_agent(gen[2:3], market, nOffer, maxSteps) experiment.tasks.append(task) experiment.agents.append(agent) return experiment
def get_q_experiment(case, minor=1): locAdj = "ac" nStates = 6 alpha = 0.2 # Learning rate. gamma = 0.99 # Discount factor # The closer epsilon gets to 0, the more greedy and less explorative. epsilon = 0.9 decay = 0.999 Pd0 = get_pd_max(case, profile) Pd_min = get_pd_min(case, profile) market = pyreto.SmartMarket(case, priceCap=cap, decommit=decommit, auctionType=auctionType, locationalAdjustment=locAdj) experiment = pyreto.continuous.MarketExperiment([], [], market) portfolios, sync_cond = get_portfolios3() for gidx in portfolios: g = [case.generators[i] for i in gidx] learner = Q(alpha, gamma) learner.explorer.epsilon = epsilon learner.explorer.decay = decay task, agent = get_discrete_task_agent(g, market, nStates, nOffer, markups, withholds, maxSteps, learner, Pd0, Pd_min) print "ALL ACTIONS:", len(task.env._allActions) * nStates experiment.tasks.append(task) experiment.agents.append(agent) passive = [case.generators[i] for i in sync_cond] passive[0].p_min = 0.001 # Avoid invalid offer withholding. passive[0].p_max = 0.002 task, agent = get_zero_task_agent(passive, market, 1, maxSteps) experiment.tasks.append(task) experiment.agents.append(agent) return experiment
def get_re_experiment(case, minor=1): """ Returns an experiment that uses the Roth-Erev learning method. """ locAdj = "ac" experimentation = 0.55 recency = 0.3 tau = 100.0 decay = 0.999 nStates = 3 # stateless RE? Pd0 = get_pd_max(case, profile) Pd_min = get_pd_min(case, profile) market = pyreto.SmartMarket(case, priceCap=cap, decommit=decommit, auctionType=auctionType, locationalAdjustment=locAdj) experiment = pyreto.continuous.MarketExperiment([], [], market) portfolios, sync_cond = get_portfolios3() for gidx in portfolios: g = [case.generators[i] for i in gidx] learner = VariantRothErev(experimentation, recency) learner.explorer = BoltzmannExplorer(tau, decay) task, agent = get_discrete_task_agent(g, market, nStates, nOffer, markups, withholds, maxSteps, learner, Pd0, Pd_min) print "ALL ACTIONS:", len(task.env._allActions) * nStates experiment.tasks.append(task) experiment.agents.append(agent) passive = [case.generators[i] for i in sync_cond] passive[0].p_min = 0.001 # Avoid invalid offer withholding. passive[0].p_max = 0.002 task, agent = get_zero_task_agent(passive, market, 1, maxSteps) experiment.tasks.append(task) experiment.agents.append(agent) return experiment
decommit=decommit, auctionType=auctionType, locationalAdjustment=locAdj) experiment = pyreto.continuous.MarketExperiment([], [], market) portfolios, sync_cond = get_portfolios3() for gidx in portfolios: g = [case.generators[i] for i in gidx] learner = VariantRothErev(experimentation, recency) learner.explorer = BoltzmannExplorer(tau, decay) task, agent = get_discrete_task_agent(g, market, nStates, nOffer, markups, withholds, maxSteps, learner, Pd0, Pd_min) print "ALL ACTIONS:", len(task.env._allActions) * nStates experiment.tasks.append(task) experiment.agents.append(agent) passive = [case.generators[i] for i in sync_cond] passive[0].p_min = 0.001 # Avoid invalid offer withholding. passive[0].p_max = 0.002 task, agent = get_zero_task_agent(passive, market, 1, maxSteps) experiment.tasks.append(task) experiment.agents.append(agent) return experiment
experimentation = 0.55 recency = 0.3 tau = 100.0 decay = 0.99#9995 market = pyreto.SmartMarket(case, priceCap=cap, decommit=decommit, auctionType=auctionType) experiment = pyreto.continuous.MarketExperiment([], [], market, profile) for g in gen[0:2]: #learner = RothErev(experimentation, recency) learner = VariantRothErev(experimentation, recency) learner.explorer = BoltzmannExplorer(tau, decay) task, agent = get_discrete_task_agent([g], market, nStates, nOffer, markups, withholds, maxSteps, learner) experiment.tasks.append(task) experiment.agents.append(agent) task1, agent1 = get_zero_task_agent(gen[2:3], market, nOffer, maxSteps) experiment.tasks.append(task1) experiment.agents.append(agent1) return experiment def get_q_experiment(case, minor=1): """ Returns an experiment that uses Q-learning. """ gen = case.generators