コード例 #1
0
ファイル: ex6_1.py プロジェクト: Waqquas/pylon
def get_q_experiment(case, minor=1):

    locAdj = "ac"
    nStates = 6
    alpha = 0.2 # Learning rate.
    gamma = 0.99 # Discount factor
    # The closer epsilon gets to 0, the more greedy and less explorative.
    epsilon = 0.9
    decay = 0.999

    Pd0 = get_pd_max(case, profile)
    Pd_min = get_pd_min(case, profile)

    market = pyreto.SmartMarket(case, priceCap=cap, decommit=decommit,
                                auctionType=auctionType,
                                locationalAdjustment=locAdj)

    experiment = pyreto.continuous.MarketExperiment([], [], market)

    portfolios, sync_cond = get_portfolios3()

    for gidx in portfolios:
        g = [case.generators[i] for i in gidx]

        learner = Q(alpha, gamma)
        learner.explorer.epsilon = epsilon
        learner.explorer.decay = decay

        task, agent = get_discrete_task_agent(g, market, nStates, nOffer,
            markups, withholds, maxSteps, learner, Pd0, Pd_min)

        print "ALL ACTIONS:", len(task.env._allActions) * nStates

        experiment.tasks.append(task)
        experiment.agents.append(agent)

    passive = [case.generators[i] for i in sync_cond]
    passive[0].p_min = 0.001 # Avoid invalid offer withholding.
    passive[0].p_max = 0.002
    task, agent = get_zero_task_agent(passive, market, 1, maxSteps)
    experiment.tasks.append(task)
    experiment.agents.append(agent)

    return experiment
コード例 #2
0
ファイル: ex6_1.py プロジェクト: Waqquas/pylon
def get_re_experiment(case, minor=1):
    """ Returns an experiment that uses the Roth-Erev learning method.
    """
    locAdj = "ac"
    experimentation = 0.55
    recency = 0.3
    tau = 100.0
    decay = 0.999
    nStates = 3 # stateless RE?

    Pd0 = get_pd_max(case, profile)
    Pd_min = get_pd_min(case, profile)

    market = pyreto.SmartMarket(case, priceCap=cap, decommit=decommit,
                                auctionType=auctionType,
                                locationalAdjustment=locAdj)

    experiment = pyreto.continuous.MarketExperiment([], [], market)

    portfolios, sync_cond = get_portfolios3()

    for gidx in portfolios:
        g = [case.generators[i] for i in gidx]

        learner = VariantRothErev(experimentation, recency)
        learner.explorer = BoltzmannExplorer(tau, decay)

        task, agent = get_discrete_task_agent(g, market, nStates, nOffer,
            markups, withholds, maxSteps, learner, Pd0, Pd_min)

        print "ALL ACTIONS:", len(task.env._allActions) * nStates

        experiment.tasks.append(task)
        experiment.agents.append(agent)


    passive = [case.generators[i] for i in sync_cond]
    passive[0].p_min = 0.001 # Avoid invalid offer withholding.
    passive[0].p_max = 0.002
    task, agent = get_zero_task_agent(passive, market, 1, maxSteps)
    experiment.tasks.append(task)
    experiment.agents.append(agent)

    return experiment