예제 #1
0
파일: ex5_1.py 프로젝트: Waqquas/pylon
def get_re_experiment(case, minor=1):
    """ Returns an experiment that uses the Roth-Erev learning method.
    """
    gen = case.generators

    profile = array([1.0])
    maxSteps = len(profile)
    experimentation = 0.55
    recency = 0.3
    tau = 100.0
    decay = 0.99#9995

    market = pyreto.SmartMarket(case, priceCap=cap, decommit=decommit,
                                auctionType=auctionType)

    experiment = pyreto.continuous.MarketExperiment([], [], market, profile)

    for g in gen[0:2]:
        #learner = RothErev(experimentation, recency)
        learner = VariantRothErev(experimentation, recency)
        learner.explorer = BoltzmannExplorer(tau, decay)

        task, agent = get_discrete_task_agent([g], market, nStates, nOffer,
            markups, withholds, maxSteps, learner)

        experiment.tasks.append(task)
        experiment.agents.append(agent)

    task1, agent1 = get_zero_task_agent(gen[2:3], market, nOffer, maxSteps)
    experiment.tasks.append(task1)
    experiment.agents.append(agent1)

    return experiment
예제 #2
0
파일: ex5_1.py 프로젝트: Waqquas/pylon
def get_q_experiment(case, minor=1):
    """ Returns an experiment that uses Q-learning.
    """
    gen = case.generators

    profile = array([1.0])
    maxSteps = len(profile)

    if minor == 1:
        alpha = 0.3 # Learning rate.
        gamma = 0.99 # Discount factor
        # The closer epsilon gets to 0, the more greedy and less explorative.
        epsilon = 0.9
        decay = 0.97

        tau = 150.0 # Boltzmann temperature.
        qlambda = 0.9
    elif minor == 2:
        alpha = 0.1 # Learning rate.
        gamma = 0.99 # Discount factor
        # The closer epsilon gets to 0, the more greedy and less explorative.
        epsilon = 0.9
        decay = 0.99

        tau = 150.0 # Boltzmann temperature.
        qlambda = 0.9
    else:
        raise ValueError

    market = pyreto.SmartMarket(case, priceCap=cap, decommit=decommit,
                                auctionType=auctionType)

    experiment = pyreto.continuous.MarketExperiment([], [], market, profile)

    for g in gen[0:2]:
        learner = Q(alpha, gamma)
    #    learner = QLambda(alpha, gamma, qlambda)
    #    learner = SARSA(alpha, gamma)

        learner.explorer.epsilon = epsilon
        learner.explorer.decay = decay
#        learner.explorer = BoltzmannExplorer(tau, decay)

        task, agent = get_discrete_task_agent([g], market, nStates, nOffer,
            markups, withholds, maxSteps, learner)

        experiment.tasks.append(task)
        experiment.agents.append(agent)

    # Passive agent.
    task, agent = get_zero_task_agent(gen[2:3], market, nOffer, maxSteps)
    experiment.tasks.append(task)
    experiment.agents.append(agent)

    return experiment
예제 #3
0
파일: ex6_1.py 프로젝트: Waqquas/pylon
def get_q_experiment(case, minor=1):

    locAdj = "ac"
    nStates = 6
    alpha = 0.2 # Learning rate.
    gamma = 0.99 # Discount factor
    # The closer epsilon gets to 0, the more greedy and less explorative.
    epsilon = 0.9
    decay = 0.999

    Pd0 = get_pd_max(case, profile)
    Pd_min = get_pd_min(case, profile)

    market = pyreto.SmartMarket(case, priceCap=cap, decommit=decommit,
                                auctionType=auctionType,
                                locationalAdjustment=locAdj)

    experiment = pyreto.continuous.MarketExperiment([], [], market)

    portfolios, sync_cond = get_portfolios3()

    for gidx in portfolios:
        g = [case.generators[i] for i in gidx]

        learner = Q(alpha, gamma)
        learner.explorer.epsilon = epsilon
        learner.explorer.decay = decay

        task, agent = get_discrete_task_agent(g, market, nStates, nOffer,
            markups, withholds, maxSteps, learner, Pd0, Pd_min)

        print "ALL ACTIONS:", len(task.env._allActions) * nStates

        experiment.tasks.append(task)
        experiment.agents.append(agent)

    passive = [case.generators[i] for i in sync_cond]
    passive[0].p_min = 0.001 # Avoid invalid offer withholding.
    passive[0].p_max = 0.002
    task, agent = get_zero_task_agent(passive, market, 1, maxSteps)
    experiment.tasks.append(task)
    experiment.agents.append(agent)

    return experiment
예제 #4
0
파일: ex6_1.py 프로젝트: Waqquas/pylon
def get_re_experiment(case, minor=1):
    """ Returns an experiment that uses the Roth-Erev learning method.
    """
    locAdj = "ac"
    experimentation = 0.55
    recency = 0.3
    tau = 100.0
    decay = 0.999
    nStates = 3 # stateless RE?

    Pd0 = get_pd_max(case, profile)
    Pd_min = get_pd_min(case, profile)

    market = pyreto.SmartMarket(case, priceCap=cap, decommit=decommit,
                                auctionType=auctionType,
                                locationalAdjustment=locAdj)

    experiment = pyreto.continuous.MarketExperiment([], [], market)

    portfolios, sync_cond = get_portfolios3()

    for gidx in portfolios:
        g = [case.generators[i] for i in gidx]

        learner = VariantRothErev(experimentation, recency)
        learner.explorer = BoltzmannExplorer(tau, decay)

        task, agent = get_discrete_task_agent(g, market, nStates, nOffer,
            markups, withholds, maxSteps, learner, Pd0, Pd_min)

        print "ALL ACTIONS:", len(task.env._allActions) * nStates

        experiment.tasks.append(task)
        experiment.agents.append(agent)


    passive = [case.generators[i] for i in sync_cond]
    passive[0].p_min = 0.001 # Avoid invalid offer withholding.
    passive[0].p_max = 0.002
    task, agent = get_zero_task_agent(passive, market, 1, maxSteps)
    experiment.tasks.append(task)
    experiment.agents.append(agent)

    return experiment
예제 #5
0
파일: ex6_1.py 프로젝트: oosterden/pylon
                                decommit=decommit,
                                auctionType=auctionType,
                                locationalAdjustment=locAdj)

    experiment = pyreto.continuous.MarketExperiment([], [], market)

    portfolios, sync_cond = get_portfolios3()

    for gidx in portfolios:
        g = [case.generators[i] for i in gidx]

        learner = VariantRothErev(experimentation, recency)
        learner.explorer = BoltzmannExplorer(tau, decay)

        task, agent = get_discrete_task_agent(g, market, nStates, nOffer,
                                              markups, withholds, maxSteps,
                                              learner, Pd0, Pd_min)

        print "ALL ACTIONS:", len(task.env._allActions) * nStates

        experiment.tasks.append(task)
        experiment.agents.append(agent)

    passive = [case.generators[i] for i in sync_cond]
    passive[0].p_min = 0.001  # Avoid invalid offer withholding.
    passive[0].p_max = 0.002
    task, agent = get_zero_task_agent(passive, market, 1, maxSteps)
    experiment.tasks.append(task)
    experiment.agents.append(agent)

    return experiment
예제 #6
0
    experimentation = 0.55
    recency = 0.3
    tau = 100.0
    decay = 0.99#9995

    market = pyreto.SmartMarket(case, priceCap=cap, decommit=decommit,
                                auctionType=auctionType)

    experiment = pyreto.continuous.MarketExperiment([], [], market, profile)

    for g in gen[0:2]:
        #learner = RothErev(experimentation, recency)
        learner = VariantRothErev(experimentation, recency)
        learner.explorer = BoltzmannExplorer(tau, decay)

        task, agent = get_discrete_task_agent([g], market, nStates, nOffer,
            markups, withholds, maxSteps, learner)

        experiment.tasks.append(task)
        experiment.agents.append(agent)

    task1, agent1 = get_zero_task_agent(gen[2:3], market, nOffer, maxSteps)
    experiment.tasks.append(task1)
    experiment.agents.append(agent1)

    return experiment


def get_q_experiment(case, minor=1):
    """ Returns an experiment that uses Q-learning.
    """
    gen = case.generators