def main():
    choice = input(
        "Enter 1 to select question3 agent, enter 2 to select bonus agent: ")
    if choice == '1':

        Q1 = float(input("Enter value of Q1: "))
        alpha = float(input("Enter value of alpha: "))
        epsilon = float(input("Enter value of epsilon: "))
        agent = BanditAgent(Q1, alpha, epsilon)
    if choice == '2':
        Q1 = float(input("Enter value of Q1: "))
        c = float(input("Enter value of c: "))
        alpha = float(input("Enter value of alpha: "))
        agent = Ucb_BanditAgent(Q1, c, alpha)
    name = input("Input output file name: ")

    max_steps = 1000  # max number of steps in an episode
    num_runs = 2000  # number of repetitions of the experiment

    # Create and pass agent and environment objects to RLGlue

    environment = BanditEnv()
    rlglue = RLGlue(environment, agent)
    del agent, environment  # don't use these anymore

    # run the experiment
    optimalAction = BanditExp(rlglue, num_runs, max_steps)
    result = optimalAction / num_runs
    print(result)
    with open(name + '.csv', 'w') as out_file:
        for i in range(max_steps):
            out_file.write("%f\n" % result[i])
def getPlotPoint(actor_type, param_list):
    reward_hist = np.zeros(steps)
    for run in range(runs):
        env = BanditEnv(rng, num_actions, 4, stationary)
        actor = actor_list[actor_type](env, *param_list)
        rewards, _ = actor.run(steps)
        reward_hist += (rewards - reward_hist)/(run+1)
    return(np.mean(reward_hist[len(reward_hist)//2:])) # use last half of steps
Ejemplo n.º 3
0
    def __init__(self):
        super(Sim, self).__init__()
        self.BurstDaGain = float(1)
        self.SetTags(
            "BurstDaGain",
            'min:"0" step:"0.1" desc:"strength of dopamine bursts: 1 default -- reduce for PD OFF, increase for PD ON"'
        )
        self.DipDaGain = float(1)
        self.SetTags(
            "DipDaGain",
            'min:"0" step:"0.1" desc:"strength of dopamine dips: 1 default -- reduce to siulate D2 agonists"'
        )
        self.Net = pbwm.Network()
        self.SetTags(
            "Net",
            'view:"no-inline" desc:"the network -- click to view / edit parameters for layers, prjns, etc"'
        )
        self.TrnEpcLog = etable.Table()
        self.SetTags("TrnEpcLog",
                     'view:"no-inline" desc:"training epoch-level log data"')
        self.TstEpcLog = etable.Table()
        self.SetTags("TstEpcLog",
                     'view:"no-inline" desc:"testing epoch-level log data"')
        self.TstTrlLog = etable.Table()
        self.SetTags("TstTrlLog",
                     'view:"no-inline" desc:"testing trial-level log data"')
        self.MtxInputWts = etensor.Float32()
        self.SetTags(
            "MtxInputWts",
            'view:"no-inline" desc:"weights from input to hidden layer"')
        self.RunLog = etable.Table()
        self.SetTags("RunLog",
                     'view:"no-inline" desc:"summary log of each run"')
        self.RunStats = etable.Table()
        self.SetTags("RunStats",
                     'view:"no-inline" desc:"aggregate stats on all runs"')
        self.Params = params.Sets()
        self.SetTags("Params",
                     'view:"no-inline" desc:"full collection of param sets"')
        self.ParamSet = str()
        self.SetTags(
            "ParamSet",
            'view:"-" desc:"which set of *additional* parameters to use -- always applies Base and optionaly this next if set -- can use multiple names separated by spaces (don\'t put spaces in ParamSet names!)"'
        )
        self.MaxRuns = int(1)
        self.SetTags("MaxRuns",
                     'desc:"maximum number of model runs to perform"')
        self.MaxEpcs = int(30)
        self.SetTags("MaxEpcs",
                     'desc:"maximum number of epochs to run per model run"')
        self.MaxTrls = int(100)
        self.SetTags("MaxTrls",
                     'desc:"maximum number of training trials per epoch"')
        self.TrainEnv = BanditEnv()
        self.SetTags("TrainEnv",
                     'desc:"Training environment -- bandit environment"')
        self.Time = leabra.Time()
        self.SetTags("Time", 'desc:"leabra timing parameters and state"')
        self.ViewOn = True
        self.SetTags(
            "ViewOn",
            'desc:"whether to update the network view while running"')
        self.TrainUpdt = leabra.TimeScales.AlphaCycle
        self.SetTags(
            "TrainUpdt",
            'desc:"at what time scale to update the display during training?  Anything longer than Epoch updates at Epoch in this model"'
        )
        self.TestUpdt = leabra.TimeScales.AlphaCycle
        self.SetTags(
            "TestUpdt",
            'desc:"at what time scale to update the display during testing?  Anything longer than Epoch updates at Epoch in this model"'
        )
        self.TstRecLays = go.Slice_string(["MatrixGo", "MatrixNoGo"])
        self.SetTags(
            "TstRecLays",
            'desc:"names of layers to record activations etc of during testing"'
        )

        # internal state - view:"-"
        self.Win = 0
        self.SetTags("Win", 'view:"-" desc:"main GUI window"')
        self.NetView = 0
        self.SetTags("NetView", 'view:"-" desc:"the network viewer"')
        self.ToolBar = 0
        self.SetTags("ToolBar", 'view:"-" desc:"the master toolbar"')
        self.WtsGrid = 0
        self.SetTags("WtsGrid", 'view:"-" desc:"the weights grid view"')
        self.TrnEpcPlot = 0
        self.SetTags("TrnEpcPlot", 'view:"-" desc:"the training epoch plot"')
        self.TstEpcPlot = 0
        self.SetTags("TstEpcPlot", 'view:"-" desc:"the testing epoch plot"')
        self.TstTrlPlot = 0
        self.SetTags("TstTrlPlot", 'view:"-" desc:"the test-trial plot"')
        self.RunPlot = 0
        self.SetTags("RunPlot", 'view:"-" desc:"the run plot"')
        self.TrnEpcFile = 0
        self.SetTags("TrnEpcFile", 'view:"-" desc:"log file"')
        self.RunFile = 0
        self.SetTags("RunFile", 'view:"-" desc:"log file"')
        self.ValsTsrs = {}
        self.SetTags("ValsTsrs", 'view:"-" desc:"for holding layer values"')
        self.IsRunning = False
        self.SetTags("IsRunning", 'view:"-" desc:"true if sim is running"')
        self.StopNow = False
        self.SetTags("StopNow", 'view:"-" desc:"flag to stop running"')
        self.NeedsNewRun = False
        self.SetTags(
            "NeedsNewRun",
            'view:"-" desc:"flag to initialize NewRun if last one finished"')
        self.RndSeed = int(1)
        self.SetTags("RndSeed", 'view:"-" desc:"the current random seed"')
        self.vp = 0
        self.SetTags("vp", 'view:"-" desc:"viewport"')
Ejemplo n.º 4
0
import numpy as np
import random
import matplotlib.pyplot as plt
from bandit_env import BanditEnv
from actors import grad

runs = 2000
steps = 1000
num_actions = 10
rng = random.Random(1234)

for baseline in [False, True]:
    for alpha in [0.1, 0.4]:
        percent_correct_action = np.zeros(steps)

        for run in range(runs):
            env = BanditEnv(rng, num_actions, 4)
            actor = grad.GradientActor(env, alpha, baseline)
            _, correct_actions = actor.run(steps)
            percent_correct_action += correct_actions

        plt.plot(range(steps),
                 percent_correct_action / runs * 100,
                 label="baseline=" + str(baseline) + ", alpha=" + str(alpha))
        plt.ylim([0, 100])
        plt.ylabel("% Optimal action")
        plt.xlabel("Steps")

plt.legend()
plt.show()