Example #1
0
def get_re_experiment(case, minor=1):
    """ Returns an experiment that uses the Roth-Erev learning method.
    """
    gen = case.generators

    profile = array([1.0])
    maxSteps = len(profile)
    experimentation = 0.55
    recency = 0.3
    tau = 100.0
    decay = 0.99#9995

    market = pyreto.SmartMarket(case, priceCap=cap, decommit=decommit,
                                auctionType=auctionType)

    experiment = pyreto.continuous.MarketExperiment([], [], market, profile)

    for g in gen[0:2]:
        #learner = RothErev(experimentation, recency)
        learner = VariantRothErev(experimentation, recency)
        learner.explorer = BoltzmannExplorer(tau, decay)

        task, agent = get_discrete_task_agent([g], market, nStates, nOffer,
            markups, withholds, maxSteps, learner)

        experiment.tasks.append(task)
        experiment.agents.append(agent)

    task1, agent1 = get_zero_task_agent(gen[2:3], market, nOffer, maxSteps)
    experiment.tasks.append(task1)
    experiment.agents.append(agent1)

    return experiment
Example #2
0
def get_re_experiment(case, minor=1):
    """ Returns an experiment that uses the Roth-Erev learning method.
    """
    locAdj = "ac"
    experimentation = 0.55
    recency = 0.3
    tau = 100.0
    decay = 0.999
    nStates = 3 # stateless RE?

    Pd0 = get_pd_max(case, profile)
    Pd_min = get_pd_min(case, profile)

    market = pyreto.SmartMarket(case, priceCap=cap, decommit=decommit,
                                auctionType=auctionType,
                                locationalAdjustment=locAdj)

    experiment = pyreto.continuous.MarketExperiment([], [], market)

    portfolios, sync_cond = get_portfolios3()

    for gidx in portfolios:
        g = [case.generators[i] for i in gidx]

        learner = VariantRothErev(experimentation, recency)
        learner.explorer = BoltzmannExplorer(tau, decay)

        task, agent = get_discrete_task_agent(g, market, nStates, nOffer,
            markups, withholds, maxSteps, learner, Pd0, Pd_min)

        print "ALL ACTIONS:", len(task.env._allActions) * nStates

        experiment.tasks.append(task)
        experiment.agents.append(agent)


    passive = [case.generators[i] for i in sync_cond]
    passive[0].p_min = 0.001 # Avoid invalid offer withholding.
    passive[0].p_max = 0.002
    task, agent = get_zero_task_agent(passive, market, 1, maxSteps)
    experiment.tasks.append(task)
    experiment.agents.append(agent)

    return experiment
Example #3
0
def case6ww1():
    case = pylon.Case.load(join("..", "data", "case6ww.pkl"))
    case.generators[0].p_cost = (0.0, 5.0, 200.0)
    case.generators[1].p_cost = (0.0, 6.5, 200.0)
    case.generators[2].p_cost = (0.0, 2.0, 200.0)

    case.generators[0].c_shutdown = 100.0

    #case.generators[0].p_min = 0.0 # TODO: Unit-decommitment.
    #case.generators[1].p_min = 0.0
    ##case.generators[2].p_min = 0.0

    case.generators[0].p_max = 100.0
    case.generators[1].p_max = 70.0
    case.generators[2].p_max = 70.0


    vre = VariantRothErev(experimentation=0.55, recency=0.3)
    vre.explorer = BoltzmannExplorer()#tau=100, decay=0.95)
    learners = [vre, Q(), Reinforce()]

    profile = [0.9, 0.6]

    m = (20, 75) # markups
    nb = 1 # no. offers
    ns = 3 # no. states

    mx = 60.0 # max markup

    weeks = 2
    days = 2

    outdir = "/tmp/case6ww1"
    dc = True

    trials = 1
    for i in range(trials):
        roundrobin(case, learners, profile, m, nb, ns, mx, weeks, days,
                   outdir, dc, trial=i)
Example #4
0
    Pd_min = get_pd_min(case, profile)

    market = pyreto.SmartMarket(case,
                                priceCap=cap,
                                decommit=decommit,
                                auctionType=auctionType,
                                locationalAdjustment=locAdj)

    experiment = pyreto.continuous.MarketExperiment([], [], market)

    portfolios, sync_cond = get_portfolios3()

    for gidx in portfolios:
        g = [case.generators[i] for i in gidx]

        learner = VariantRothErev(experimentation, recency)
        learner.explorer = BoltzmannExplorer(tau, decay)

        task, agent = get_discrete_task_agent(g, market, nStates, nOffer,
                                              markups, withholds, maxSteps,
                                              learner, Pd0, Pd_min)

        print "ALL ACTIONS:", len(task.env._allActions) * nStates

        experiment.tasks.append(task)
        experiment.agents.append(agent)

    passive = [case.generators[i] for i in sync_cond]
    passive[0].p_min = 0.001  # Avoid invalid offer withholding.
    passive[0].p_max = 0.002
    task, agent = get_zero_task_agent(passive, market, 1, maxSteps)
Example #5
0
distrib = scipy.array([[0.7, 0.2, 0.1],
                       [0.1, 0.6, 0.3],
                       [0.4, 0.2, 0.3],
                       [0.5, 0.45, 0.05],
                       [0.3, 0.5, 0.2]])

env = BanditEnvironment(payouts, distrib)

task = BanditTask(env)

table = PropensityTable(payouts.shape[0])
table.initialize(500.0)

#learner = RothErev(experimentation=0.55, recency=0.3)
learner = VariantRothErev(experimentation=0.65, recency=0.3)
learner.explorer = BoltzmannExplorer(tau=100.0, decay=0.9995)

agent = LearningAgent(table, learner)

experiment = Experiment(task, agent)

epis = int(1e1)
batch = 2
avgRewards = scipy.zeros(epis)
allActions = scipy.zeros(epis * batch)
c = 0
for i in range(epis):
    experiment.doInteractions(batch)
    avgRewards[i] = scipy.mean(agent.history["reward"])
    allActions[c:c + batch] = agent.history["action"].flatten() + 1
Example #6
0
    case = pylon.Case.load(join("..", "data", "case6ww.pkl"))
    case.generators[0].p_cost = (0.0, 5.0, 200.0)
    case.generators[1].p_cost = (0.0, 6.5, 200.0)
    case.generators[2].p_cost = (0.0, 2.0, 200.0)

    case.generators[0].c_shutdown = 100.0

    #case.generators[0].p_min = 0.0 # TODO: Unit-decommitment.
    #case.generators[1].p_min = 0.0
    ##case.generators[2].p_min = 0.0

    case.generators[0].p_max = 100.0
    case.generators[1].p_max = 70.0
    case.generators[2].p_max = 70.0

    vre = VariantRothErev(experimentation=0.55, recency=0.3)
    vre.explorer = BoltzmannExplorer()  #tau=100, decay=0.95)
    learners = [vre, Q(), Reinforce()]

    profile = [0.9, 0.6]

    m = (20, 75)  # markups
    nb = 1  # no. offers
    ns = 3  # no. states

    mx = 60.0  # max markup

    weeks = 2
    days = 2

    outdir = "/tmp/case6ww1"
Example #7
0
    [150.0, 50.0, 1000.0],  # Expected value: 147.5
    [700.0, 800.0, 900.0]
])  # Expected value: 790

distrib = scipy.array([[0.7, 0.2, 0.1], [0.1, 0.6, 0.3], [0.4, 0.2, 0.3],
                       [0.5, 0.45, 0.05], [0.3, 0.5, 0.2]])

env = BanditEnvironment(payouts, distrib)

task = BanditTask(env)

table = PropensityTable(payouts.shape[0])
table.initialize(500.0)

#learner = RothErev(experimentation=0.55, recency=0.3)
learner = VariantRothErev(experimentation=0.65, recency=0.3)
learner.explorer = BoltzmannExplorer(tau=100.0, decay=0.9995)

agent = LearningAgent(table, learner)

experiment = Experiment(task, agent)

epis = int(1e1)
batch = 2
avgRewards = scipy.zeros(epis)
allActions = scipy.zeros(epis * batch)
c = 0
for i in range(epis):
    experiment.doInteractions(batch)
    avgRewards[i] = scipy.mean(agent.history["reward"])
    allActions[c:c + batch] = agent.history["action"].flatten() + 1