コード例 #1
0
ファイル: test.py プロジェクト: amidos2006/Worst-First-Search
def singleEffect(filename, parameter):
    seed = rnd.randint(0, Global.BIG_NUM)
    playAllAgainst(filename, Agents.DepMCTS(), str(parameter), seed)
    playAllAgainst(filename, Agents.DepUnDepMCTS(0.5), str(parameter), seed)
    playAllAgainst(filename, Agents.SimulatedMCTS(0.97), str(parameter), seed)
    playAllAgainst(filename, Agents.FlippingMCTS(), str(parameter), seed)
    playAllAgainst(filename, Agents.NormalMCTS(), str(parameter), seed)
コード例 #2
0
def test_mountain_car(n_states,qhat_file='Data/mc_qhat.pkl',max_eps = 10000):
    """
    Compares aggregated mountain car with n_states with a simple discretised version of the same.
    Supplying a precalculated qhat file will drastically speed computation
    """

    divs = int(np.floor(np.sqrt(n_states)))

    if qhat_file:
        with open(qhat_file,'rb') as f:
            qhat = pickle.load(f)
    else:
        qhat = evaluate_MC_qhat()

    agg = Aggregation.generateAggregation(qhat,target_divisions=n_states)

    mc_d = Problems.MountainCar(representation='disc',divisions = divs)
    mc_a = Problems.MountainCar(representation='aggr',aggregation=agg,divisions=100)
    
    ag_d = Agents.QAgent(mc_d,alpha=1e-3)
    ag_a = Agents.QAgent(mc_a,alpha=1e-3)

    d_eps = ag_d.run_n_episodes(max_eps)
    a_eps = ag_a.run_n_episodes(max_eps)

    n_eps = np.array([2**i for i in range(1,int(np.log2(max_eps))+1)])

    data = pd.DataFrame()
    data['n_eps'] = n_eps
    data['disc'] = d_eps
    data['aggr'] = a_eps

    return data
コード例 #3
0
def testMaze(n_train, n_nav):
    ValueLearning.DBG_LVL = 1
    move_distance = 0.99

    # Experiment parameters
    nx = 6
    ny = 6
    n_fields = round(1.0 * (nx + 3) * (ny + 3))

    Hippocampus.N_CELLS_PER_FIELD = 1
    n_cells = n_fields * Hippocampus.N_CELLS_PER_FIELD

    # Maze creation
    maze = Environment.RandomGoalOpenField(nx, ny, move_distance)

    # Generate place fields and place cells
    place_fields = Hippocampus.setupPlaceFields(maze, n_fields)
    place_cells = Hippocampus.assignPlaceCells(n_cells, place_fields)

    # Create Actor and Critic
    actor = Agents.RandomAgent(maze.getActions(), n_cells)
    critic = Agents.Critic(n_cells)

    ValueLearning.learnValueFunction(n_train,
                                     maze,
                                     place_cells,
                                     actor,
                                     critic,
                                     max_steps=1000)
コード例 #4
0
def updateGame(debug):
    global base
    while True:
        Pygame.update()

        # Checks job list for available jobs and assigns them to workers.
        if Agents.jobList:
            for agent in agents:
                if agent.getRole(
                ) == "worker" and agent.getState() != "upgrading":
                    agent.setJob(Agents.jobList[0])
                    Agents.removeFromJobList()
                    agent.setState(StateManager.upgrading())
                    if not Agents.jobList:
                        break

        # Executes the agents current state
        for agent in agents:
            agent.state.execute(agent)

        FogOfWar.updateFogOfWar(agents)
        Pygame.drawAgents(agents)

        # Checks if victory requirements have been met.
        if agents[0].base.getCoal() >= 200 and agents[0].base.getIron() >= 20:
            print("Victory!")
            return True
        if debug:
            infoPrints()
コード例 #5
0
ファイル: test.py プロジェクト: amidos2006/Worst-First-Search
def playAllAgainst(filename, agent, parameter):
    agentName = type(agent).__name__
    result = playGame([Agents.NormalMCTS(), agent]).split("-")
    writeFile(
        filename, "NormalMCTS" + ", " + agentName + ", " + str(parameter) +
        ", " + result[0] + ", " + result[1])
    result = playGame([agent, Agents.NormalMCTS()]).split("-")
    writeFile(
        filename, agentName + ", " + "NormalMCTS" + ", " + str(parameter) +
        ", " + result[0] + ", " + result[1])
コード例 #6
0
ファイル: NS.py プロジェクト: salehiac/BR-NS
 def make_ag():
     return Agents.SmallFC_FW(
         in_d=in_dims,
         out_d=out_dims,
         num_hidden=num_hidden,
         hidden_dim=hidden_dim,
         output_normalisation=normalise_output_with)
コード例 #7
0
    def __set_agent_values__(self):
        int_rate = self.model_params["int_rate"]
        min_holding = self.model_params['min_holding']
        init_cash = self.model_params['init_cash']
        position = self.model_params['init_holding']
        min_cash = self.model_params['min_cash']
        tolerance = self.model_params['tolerance']
        mistake_threshold = self.model_params['mistake_threshold']
        make_mistakes = self.model_params['make_mistakes']

        for i in range(int(self.model_params['num_agents'])):
            agent = Agents.Agent(id=i,
                                 name='Agent ' + str(i),
                                 int_rate=int_rate,
                                 min_holding=min_holding,
                                 init_cash=init_cash,
                                 position=position,
                                 forecast_params=self.forecast_params,
                                 dividend=self.init_dividend,
                                 price=self.init_asset_price,
                                 conditions=self.conditions,
                                 min_cash=min_cash,
                                 risk_aversion=self.__gen_agent_risk__,
                                 tolerance=tolerance,
                                 mistake_threshold=mistake_threshold,
                                 make_mistakes=make_mistakes)
            agent.__set_holdings__()
            self.population.append(agent)
コード例 #8
0
def initialize_agents(grid, num_agents=NUM_AGENTS):
    agents = []
    for i in range(num_agents):
        agent = Agents.BasicAgent(pygame, grid, i)
        agents.append(agent)
        grid.add_agent(agent)
    return agents
コード例 #9
0
def setupStartPos(r):
    global base
    map = Map.map
    R = copy.deepcopy(r)
    redo = True

    # Finds suitable spawning area
    while redo:
        startPos = (random.randrange(1, 99), random.randrange(1, 99))
        for neighbour in R:
            if map[startPos[0] + neighbour[0]][startPos[1] +
                                               neighbour[1]] in ("B", "V", "T",
                                                                 "I"):
                redo = True
                break
            else:
                redo = False

    map[startPos[0]][startPos[1]] = "S"
    base = BaseManager.base(startPos)

    # Spawn workers around spawning area
    for i in range(StatParser.statDict["workers"]):
        for next in R:
            if next[2] == 7:
                continue
            agents.append(
                Agents.agent((startPos[0] + next[0], startPos[1] + next[1]),
                             base, i + 1))
            next[2] += 1
            break

    return startPos
コード例 #10
0
    def test_Q(self):

        p_raw, p_agg = setupESA()

        ql_raw = Agents.QAgent(p_raw, 1)
        ql_agg = Agents.QAgent(p_agg, 1)

        ql_raw.episode(timeout=1000)
        ql_agg.episode(timeout=1000)

        delta_r = sum(abs(ql_raw.qValues[0] - p_raw.qValues[0])) / 4
        delta_a = sum(abs(ql_agg.qValues[0] - p_agg.qValues[0])) / 2

        print("\nQ learning raw delta = {}, agg delta = {}".format(
            delta_r, delta_a))

        self.assertTrue(delta_r < 1e-1)
        self.assertTrue(delta_a < 1e-1)
コード例 #11
0
    def test_UT(self):

        p_raw, p_agg = setupESA()

        ut_raw = Agents.UTreeAgent(p_raw)

        ut_raw.episode(timeout=1000)

        ut_raw.utree.print_tree()
コード例 #12
0
    def test_SL(self):

        p_raw, p_agg = setupESA()

        sl_raw = Agents.SarsaLambda(p_raw, 1)
        sl_agg = Agents.SarsaLambda(p_agg, 1)

        sl_raw.episode(timeout=1000)
        sl_agg.episode(timeout=1000)

        delta_r = sum(sl_raw.qValues[0] - p_raw.qValues[0]) / 4
        delta_a = sum(sl_agg.qValues[0] - p_agg.qValues[0]) / 2

        print("\nSarsa(l) raw delta = {}, agg delta = {}".format(
            delta_r, delta_a))

        self.assertTrue(delta_r < 1e-1)
        self.assertTrue(delta_a < 1e-1)
コード例 #13
0
def compare_raw_agg_ql(param_tuples,timeout=1000,gamma=0.5,aggtype='q',log_prob=False,rep=1,alpha=0.005,decayAlpha=False):
    """
    Generates random problems from parameter lists, runs qlearning 
    and records value function deviations for each problem after timeout steps
    log optionally records problems so that they can be retrieved later
    """

    dmat = np.zeros((len(param_tuples)*rep,9))
    
    if log_prob:
        problem_dict = {}
        d = datetime.today().strftime('%d-%m-%Y--%H_%M_%S')
        filename = 'problems' + d + '.pkl'
           
    for i, (n,n_agg,b,acts,e_noise) in enumerate(param_tuples):
        for j in range(rep):           
            p_r, p_a, _ = Problems.genRandomProblems(n,n_agg,acts,b,gamma=gamma,e_noise=e_noise)
                    
            pid = hash(str(p_r.transitions))
        
            if log_prob:
                problem_dict[pid] = {'raw':p_r,'agg':p_a}
                    
            agent_r = Agents.QAgent(p_r,alpha=alpha)
            agent_a = Agents.QAgent(p_a,alpha=alpha)
                    
            agent_r.episode(timeout=timeout,decayAlpha=decayAlpha)
            agent_a.episode(timeout=timeout,decayAlpha=decayAlpha)
        
            delta_r = Evaluation.getDeltas(agent_r,p_r)
            delta_a = Evaluation.getDeltas(agent_a,p_a,agg=p_a.aggregation)
        
            dtilde = Evaluation.nonMarkovianity(p_a.transitions[0], p_a.aggregation)

            dmat[i*rep+j] = (pid,n,n_agg,b,acts,e_noise,dtilde,np.average(delta_r),np.average(delta_a))
            
                    
    if log_prob:
        with open(path+filename,'wb') as f:
            pickle.dump(problem_dict,f)
                    
    data = pd.DataFrame(data=dmat,columns=['pid','n','n_agg','b','acts','e_noise','nonmarkovianity','d_r','d_a'])
    return data
コード例 #14
0
    def test_VI(self):

        p_raw, _ = setupESA()

        vi_raw = Agents.VIAgent(p_raw, 1)
        vi_raw.VISweep()

        delta_r = sum(vi_raw.qValues[0] - p_raw.qValues[0]) / 4

        print("\nValue Iteration raw delta = {}".format(delta_r))

        self.assertTrue(delta_r < 1e-4)
コード例 #15
0
    def __init__(self,
                 rnn_size=256,
                 embed_dim=256,
                 learning_rate=0.001,
                 vocab_size=8638,
                 batch_size=512,
                 dropout=0.75):

        self.rnn_size = rnn_size
        self.embed_dim = embed_dim
        self.learning_rate = learning_rate
        self.vocab_size = vocab_size + 1
        self.batch_size = batch_size
        self.drop_out = dropout

        # Structures define
        self.img_dim = 2048
        self.FE_m = Agents.FeatEmbed(self.rnn_size, self.img_dim, name='FE_m')
        self.FE_o = Agents.FeatEmbed(self.rnn_size, self.img_dim, name='FE_o')
        self.FE_w = Agents.FeatEmbed(self.rnn_size, self.img_dim, name='FE_w')
        self.attention = Agents.Att(self.rnn_size, self.img_dim, name='Att')
        self.initialize_model()
コード例 #16
0
def test_convergence_ql(param_tuple,timeout=100,interval=100,gamma=0.9,aggtype='q'):
    """
    Generates a random problem from (single) parameter tuple, runs qlearning 
    and records value function deviation at regular intervals. 
    """
    n, n_agg, b, acts, e_noise = param_tuple
    p_r, p_a, _ = problems.genRandomProblems(n,n_agg,acts,b,gamma=gamma,e_noise=e_noise)
    
    agent_r = Agents.QAgent(p_r,alpha=1e-1)
    agent_a = Agents.QAgent(p_a,alpha=1e-1)
    
    dmat = np.zeros((intervals,3))
    
    for i in range(intervals):
        agent_r.episode(timeout=timeout)
        agent_a.episode(timeout=timeout)
        
        delta_a = Evaluation.getDeltas(agent_a,p_a)
        delta_r = Evaluation.getDeltas(agent_r,p_r)
        
        dmat[i] = (i*interval,delta_a,delta_r)
        
    data = pd.DataFrame(data=dmat,columns=['n','d_a','d_r'])
    return data
コード例 #17
0
def main():
    global base
    r = ([0, 0, 0], [1, 1, 0], [1, 0, 0], [0, 1, 0], [-1, 1, 0], [1, -1, 0],
         [-1, 0, 0], [0, -1, 0], [-1, -1, 0])
    StatParser.readStats()
    Map.makeMap()
    startPos = setupStartPos(r)
    FogOfWar.createFogOfWar(startPos)
    FogOfWar.updateFogOfWar(agents)
    Resources.findMaterials()
    Pygame.init()

    # Adds jobs to the job list for workers to be assigned to.
    for j in range(StatParser.statDict["workers"]):
        if j < 30:
            Agents.addToJobList("woodCutter")
        elif j >= 30 and j < 40:
            Agents.addToJobList("miner")
        elif j >= 40 and j < 46:
            Agents.addToJobList("explorer")
        else:
            Agents.addToJobList("builder")

    # Adds some buildings to the build list so the builders know what to build.
    for k in range(8):
        if k < 6:
            base.addToBuildList(["coalFurnace", 0])
        else:
            base.addToBuildList(["smeltery", 0])

    # Allows user to speed up the application (can be laggy).
    TimeMultiplier.setTimeMultiplier(int(input("Set a time multiplier: ")))
    # Allows the user to choose if they want debug info to be displayed or not
    debug = input("Debug info y/n?: ")
    if debug == "y":
        debug = True
    else:
        debug = False

    # Runs and times update to see how long it took for the AI to complete the wanted task
    gameStart = time.time()
    if updateGame(debug):
        print("Time to finish: " +
              str((time.time() - gameStart) * TimeMultiplier.timeMultiplier))
        return True
コード例 #18
0
ファイル: test.py プロジェクト: amidos2006/Worst-First-Search
def playAllAgainst(filename, agent, parameter, seed):
    agentName = agent.__class__.__name__
    Agents.fixSeed(seed)
    result = playGame([Agents.NormalMCTS(), agent]).split("-")
    writeFile(
        filename, "NormalMCTS" + ", " + agentName + ", " + str(parameter) +
        ", " + result[0] + ", " + result[1])
    Agents.fixSeed(seed)
    result = playGame([agent, Agents.NormalMCTS()]).split("-")
    writeFile(
        filename, agentName + ", " + "NormalMCTS" + ", " + str(parameter) +
        ", " + result[0] + ", " + result[1])
コード例 #19
0
    def createWidgets(self):
        """
        Create qt widgets

        QTabWidget
         / QWidget \/ QWidget \_________________
        |                                       |
        | ______________________________________|
        """
        self.steps = Steps.StepsQWidget(self)
        self.descrs = Descriptions.DescriptionsQWidget(self)
        self.parameters = Parameters.ParametersQWidget(self)
        self.parametersOutput = Parameters.ParametersQWidget(self, forParamsOutput=True)
        self.probes = Probes.ProbesQWidget(self)
        self.agents = Agents.AgentsQWidget(self)
        self.adapters = Adapters.AdaptersQWidget(self, testParams=self, testDescrs=self.descrs)
        self.libraries = Libraries.LibrariesQWidget(self, testParams=self, testDescrs=self.descrs)
        
        self.parametersTab = QTabWidget()
        self.parametersTab.setTabPosition(QTabWidget.North)
        self.parametersTab.setStyleSheet("QTabWidget { border: 0px; }") # remove 3D border
        self.parametersTab.addTab(self.descrs, QIcon(":/test-config.png"), "Description")
        self.parametersTab.addTab(self.steps, QIcon(":/run-state.png"), "Steps")

        self.paramsTab = QTabWidget()
        self.paramsTab.setStyleSheet("QTabWidget { border: 0px; }") # remove 3D border
        self.paramsTab.setTabPosition(QTabWidget.North)
        self.paramsTab.addTab(self.parameters, QIcon(":/test-input.png"), "Inputs")
        self.paramsTab.addTab(self.parametersOutput, QIcon(":/test-output.png"), "Outputs")
        self.paramsTab.addTab(self.adapters, QIcon(":/adapters.png"), "Adapters")
        self.paramsTab.addTab(self.libraries, QIcon(":/libraries.png"), "Libraries")

        self.miscsTab = QTabWidget()
        self.miscsTab.setStyleSheet("QTabWidget { border: 0px; }") # remove 3D border
        self.miscsTab.setTabPosition(QTabWidget.North)
        self.miscsTab.addTab(self.agents, QIcon(":/agent.png"), "Agents")
        self.miscsTab.addTab(self.probes, QIcon(":/probe.png"), "Probes")

        self.title = QLabel("Test Properties")
        font = QFont()
        font.setBold(True)
        self.title.setFont(font)

        self.labelHelp = QLabel("Prepare the test.")
        font = QFont()
        font.setItalic(True)
        self.labelHelp.setFont(font)

        self.mainTab = QTabWidget()
        self.mainTab.setTabPosition(QTabWidget.North)
        
        self.mainTab.addTab(self.parametersTab, QIcon(":/test-description.png"), "Test Design")
        self.mainTab.addTab(self.paramsTab, QIcon(":/repository.png"), "Test Data")
        self.mainTab.addTab(self.miscsTab, QIcon(":/server-config.png"), "Miscellaneous")


        if Settings.instance().readValue( key = 'TestProperties/inputs-default-tab' ) == "True":
            self.mainTab.setCurrentIndex(1)
            self.paramsTab.setCurrentIndex(TAB_INPUTS)

        layout = QVBoxLayout()
        layout.addWidget( self.title )
        layout.addWidget( self.labelHelp )

        layout.addWidget(self.mainTab)
        layout.setContentsMargins(0,0,0,0)

        self.setLayout(layout)
コード例 #20
0
    def execute(self, agent):
        readyToBuild = False
        # Where to build
        if agent.base.getBuildList() and not agent.getLocked():
            pos = agent.getPos()
            # find unused buildable tile
            for next in Map.r:
                if Map.map[pos[0] + next[0]][pos[1] + next[1]] in (
                        "M",
                        "G") and FogOfWar.fogOfWar[pos[0] + next[0]][pos[1] +
                                                                     next[1]]:
                    # What to build
                    for building in agent.base.getBuildList():
                        if building[0] == "coalFurnace":
                            if building[1] == 0 or building[1] == agent.getId(
                            ):
                                readyToBuild = agent.base.buildCoalFurnace(
                                    agent,
                                    (pos[0] + next[0], pos[1] + next[1]))
                                if readyToBuild == True:
                                    agent.setPos(
                                        (pos[0] + next[0], pos[1] + next[1]))
                                    building[1] = agent.getId()
                                    agent.setLocked(True)
                                break

                        elif building[0] == "smeltery":
                            if building[1] == 0 or building[1] == agent.getId(
                            ):
                                readyToBuild = agent.base.buildSmeltery(
                                    agent,
                                    (pos[0] + next[0], pos[1] + next[1]))
                                if readyToBuild == True:
                                    agent.setPos(
                                        (pos[0] + next[0], pos[1] + next[1]))
                                    building[1] = agent.getId()
                                    agent.setLocked(True)
                                break

                        elif building[0] == "blacksmith":
                            if building[1] == 0 or building[1] == agent.getId(
                            ):
                                readyToBuild = agent.base.buildBlacksmith(
                                    agent,
                                    (pos[0] + next[0], pos[1] + next[1]))
                                if readyToBuild:
                                    agent.setPos(
                                        (pos[0] + next[0], pos[1] + next[1]))
                                    building[1] = agent.getId()
                                    agent.setLocked(True)
                                break

                        elif building[0] == "trainingCamp":
                            if building[1] == 0 or building[1] == agent.getId(
                            ):
                                agent.base.buildTrainingcamp(
                                    agent,
                                    (pos[0] + next[0], pos[1] + next[1]))
                                building[1] = agent.getId()
                                if readyToBuild:
                                    agent.setPos(
                                        (pos[0] + next[0], pos[1] + next[1]))
                                    agent.setLocked(True)
                                break
                    break
        elif agent.getPos() == agent.getJob()[1]:
            diff = (time.time() -
                    agent.getTimer()) * TimeMultiplier.timeMultiplier

            if agent.getJob()[0] == "coalFurnace*":
                if diff >= StatParser.statDict["cfBuildTime"]:
                    agent.base.addBuilding(
                        BaseManager.coalFurnace(agent.getPos()))
                    agent.setJob("builder")
                    agent.setLocked(False)
                    agent.setState(returnHome())

            elif agent.getJob()[0] == "smeltery*":
                if diff >= StatParser.statDict["smelteryBuildTime"]:
                    agent.base.addBuilding(BaseManager.smeltery(
                        agent.getPos()))
                    agent.setJob("builder")
                    agent.setLocked(False)
                    agent.setState(returnHome())

            elif agent.getJob()[0] == "blacksmith*":
                if diff >= StatParser.statDict["bsBuildTime"]:
                    agent.base.addBuilding(
                        BaseManager.blacksmith(agent.getPos()))
                    agent.setJob("builder")
                    agent.setLocked(False)
                    agent.setState(returnHome())

            elif agent.getJob()[0] == "trainingCamp":
                if diff >= StatParser.statDict["tcBuildTime"]:
                    agent.base.addBuilding(
                        BaseManager.trainingCamp(agent.getPos()))
                    agent.setJob("builder")
                    agent.setLocked(False)
                    agent.setState(returnHome())

            # When to hire
            elif agent.getJob()[0] == "coalFurnace":
                if StatParser.statDict["cfBuildTime"] - (
                        time.time() - agent.getTimer()
                ) <= StatParser.statDict["artisanUpgradeTime"]:
                    Agents.addToJobList("coalWorker")
                    agent.setJob(("coalFurnace*", agent.getJob()[1]))
            elif agent.getJob()[0] == "smeltery":
                if StatParser.statDict["smelteryBuildTime"] - (
                        time.time() - agent.getTimer()
                ) <= StatParser.statDict["artisanUpgradeTime"]:
                    Agents.addToJobList("smelteryWorker")
                    agent.setJob(("smeltery*", agent.getJob()[1]))
            elif agent.getJob()[0] == "blacksmith":
                if StatParser.statDict["bsBuildTime"] - (
                        time.time() - agent.getTimer()
                ) <= StatParser.statDict["artisanUpgradeTime"]:
                    Agents.addToJobList("weaponSmith")
                    agent.setJob(("blacksmith*", agent.getJob()[1]))

        elif agent.base.getBuildList() == []:
            # When finished go idle until build list has content
            agent.setState(returnHome())
コード例 #21
0
ファイル: NS.py プロジェクト: salehiac/BR-NS
 def make_ag():
     return Agents.Agent1d(min(problem.env.phi_vals),
                           max(problem.env.phi_vals))
コード例 #22
0
ファイル: xray_cmd_parser.py プロジェクト: jaredchandler/xray
import pyparsing as pp
import Agents
import argparse

from settings import *

import re
global args
agent = Agents.Agent(args.groups)
#===========================


def printenv(e):
    # print("")
    # print("========ENVIRONMENT=======")
    # for k in e.keys():
    #     print("\t"+k)
    #     print("\t\t"+str(e[k]))
    # print("--------------------------")
    # print("")
    pass


def dprintenv(e):
    print("")
    print("========ENVIRONMENT=======")
    for k in e.keys():
        print("\t" + k.replace("_", " "))
        print("\t\t" + str(e[k]))
    print("--------------------------")
    print("")
コード例 #23
0
cost_function = 'exp'
# data set parameter
DATA_PATH = r'D:\workspaces\datasets\USCensus1990\USCensus1990.data.txt'
predictor = ['iMobillim', 'iWork89']
target = 'iMilitary'
with_data_set = False

# agent hyperparameter
rho = 0.5
v1 = 1

# agents

# agent1 = Agents.HierarchicalOptimisticOptimization([0, 1], v1=v1, rho=rho, setup=setup)
# agent2 = Agents.HierarchicalOptimisticOptimization([0, 1], v1=v1, rho=rho, setup=setup)
agent1 = Agents.Zooming([0, 1])
agent2 = Agents.Zooming([0, 1])
# agent1 = Agents.Random([0, 1])
# agent2 = Agents.Random([0, 1])
discrete_actions = [[0.01, 0.07, 0.13, 0.19, 0.25, 0.31, 0.37, 0.43, 0.49],
                    [0.01, 0.045, 0.08, 0.115, 0.15, 0.185, 0.22, 0.255, 0.29]]
# agent1 = Agents.EpsilonGreedy(discrete_actions[0], eps_greedy=0.999, eps_decay=0.9996)
# agent2 = Agents.EpsilonGreedy(discrete_actions[1], eps_greedy=0.999, eps_decay=0.9996)

# DPG parameter
logistic_growth = 0.2
betas = [5, -3]
sigmas_j_square = [.5, .3]
# 'loo' or 'shapley'
pricing_mechanism = 'loo'
r_max = 15
コード例 #24
0
    def __init__(self, ng=c.numOfGoods, nr=c.numofRounds, mem=c.memory, al=c.alpha, maxCost=c.max_fixedCost):

        # This list will be used to keep track of goods that agents recieve...
        # ..though it is not their consumption goods, i.e. goods agents...
        # choose to carry for purpose of indirect trade

        c.numOfBaseAgents = ng * (ng - 1)
        c.numOfGoods = ng
        c.numofRounds = nr
        c.memory = mem
        c.alpha = al
        c.max_fixedCost = maxCost

        self.listofMoney = [0] * (c.numOfGoods)

        # agent self.types
        self.types = []

        # array of all trades
        self.allTrades = []

        # array of goods traded
        self.goodsTraded = []

        # cost list
        self.costList_unchanging = []

        # create a dictionary to store the cost of trading goods, the structure will be of the following kind
        # {(0,1):[0.1,0.02,0.5],(0,2):[0.1,0.6,0.02],(1,0):[0.5,0.3]}
        # (0,1) records the cost of the agent who recieved 0
        # we initialize an empty dictionary with the intention of filling it up with the above
        self.tradeCosts = dict()

        # create two list of number of goods
        self.goods_listOne = [i for i in range(c.numOfGoods)]
        self.goods_listTwo = [i for i in range(c.numOfGoods)]

        # create a list of combinations of goods, it will look like this: [(0,1),(0,2),(1,0) and so on]
        self.goodsCombinations = []

        for i in self.goods_listOne:
            for j in self.goods_listTwo:
                self.goodsCombinations.append((i, j))

        # attaches an empty list to a dictionary where elements shows combinations of goods, so we get {(0,1):[],(0,2):[],(1,0):[]}
        for x in self.goodsCombinations:
            self.tradeCosts[x] = list()

        # print "fixed costs", c.max_fixedCost

        # list of costs assigned to each agent
        for i in range(0, c.numOfGoods):
            self.costList_unchanging.append(random.uniform(0, c.max_fixedCost))

        self.costList = copy.deepcopy(self.costList_unchanging)

        # We generate a list of agents
        self.agentList = [ag.simpleAgents() for count in xrange(c.numOfBaseAgents)]

        self.listofMoney = [0] * (c.numOfGoods)

        # initialize agent self.types
        for i in range(0, c.numOfGoods):
            for j in range(0, c.numOfGoods):
                if i != j:
                    nlist = [str(i), str(j), str(j)]
                    self.types.append(nlist)

        # And we give each agents agent1 list of consumption good,..
        # ...production good and carry good. Initially the production...
        # ...good and carry good are the same

        for i in range(0, len(self.types)):
            self.agentList[i].goods = self.types[i]

        for i in self.agentList:
            i.cost = self.costList

        # print "costs", self.costList

        # register callback funciton if necessary
        self.callback_function = None
コード例 #25
0
    def return_if_have_shot(self, start_node, target):
        '''Uses linear equation to determine if, starting from a particular tile, if it has a clean shot at the target'''
        #First, define the starting and ending positions
        start_cell = Terrain.terrain_obj.terrain_dict[start_node]
        target_coord = Terrain.terrain_obj.return_cell_index(
            target.x, target.y)
        target_cell = Terrain.terrain_obj.terrain_dict[target_coord]

        #Linear equation parameters
        t = 10
        x = start_cell.sprite.x
        y = start_cell.sprite.y
        bottom_fact = math.sqrt(((target_cell.sprite.x - start_cell.sprite.x) * (target_cell.sprite.x - start_cell.sprite.x)) + \
                ((target_cell.sprite.y - start_cell.sprite.y) * (target_cell.sprite.y - start_cell.sprite.y)))
        x_fact = (target_cell.sprite.x - start_cell.sprite.x) / bottom_fact
        y_fact = (target_cell.sprite.y - start_cell.sprite.y) / bottom_fact

        #List of cells that pass between two points
        available_coords = []

        #While loop until reached target cell
        test_limit = 1000
        iter = 0
        while True:
            x += t * x_fact
            y += t * y_fact

            new_coord = Terrain.terrain_obj.return_sprite_index(x, y)
            #print(x,y)
            #print(new_coord)

            if new_coord not in available_coords:
                available_coords.append(new_coord)

            #If searched has reached its end, then find neighbors and return false if mountains, else true
            if target_coord in available_coords:

                test_lazer = Agents.Laser(x=None,
                                          y=None,
                                          rotation=0,
                                          target=None)

                for coord in available_coords:
                    test_lazer.sprite.x = Terrain.terrain_obj.terrain_dict[
                        coord].sprite.x
                    test_lazer.sprite.y = Terrain.terrain_obj.terrain_dict[
                        coord].sprite.y
                    test_lazer.rotation = -math.degrees(
                        Functions.angle(
                            point_1=(test_lazer.sprite.x, test_lazer.sprite.y),
                            point_2=(target.sprite.x, target.sprite.y)))
                    test_lazer.sprite.rotation = test_lazer.rotation
                    #print('laser position',test_lazer.sprite.x,test_lazer.sprite.y)

                    for mountain in [obj for obj in Terrain.terrain_obj.terrain_dict if \
                        Terrain.terrain_obj.terrain_dict[obj].terrain_mov_mod == math.inf]:
                        mountain_cell = Terrain.terrain_obj.terrain_dict[
                            mountain]
                        #print('mountain cell position',mountain_cell.sprite.x,mountain_cell.sprite.y)
                        if test_lazer.return_if_x_y_in_sprite_loc(
                                mountain_cell.sprite.x,
                                mountain_cell.sprite.y):
                            #print('mountain cell position',mountain_cell.sprite.x,mountain_cell.sprite.y)

                            return False
                '''
                for coord in available_coords:
                    if Terrain.terrain_obj.terrain_dict[coord].terrain_mov_mod == math.inf:
                        #print('mountain the way')
                        return False
                '''
                #Terrain.terrain_obj.color_path(available_coords)
                #print('has a shot')
                return True

            iter += 1
            if iter > test_limit:
                print(available_coords)

                #Terrain.terrain_obj.color_path(available_coords)
                print('got to iter end')
                return False
コード例 #26
0
#%%

# import necessary packages
import numpy as np # for numeric calculations
import pandas as pd
import func as f
import Agents as ag
agent_list = [ag.RLWM_noneFree(),ag.RLWM_allFree(),ag.RLWM_noise(),ag.RLWM_modulation(), ag.RLWM_actionSoftmax()]
numMod, numBlocks = len(agent_list), len(agent_list[0].Sub)
ModOrder = iter(range(numMod))
AICs, BICs, Mods = np.zeros((numBlocks, numMod)), np.zeros((numBlocks, numMod)), np.empty(numMod, dtype="object")
AICs_te, BICs_te = np.zeros((numBlocks, numMod)), np.zeros((numBlocks, numMod))

#%%

for i in agent_list:
    Order = next(ModOrder)
    agent = agent_list[Order]
    ModFit = np.load('../ModelFit/ModFit'+agent.name+'.npz',allow_pickle=True) # loads your saved array into variable a.
    te_ModFit = np.load('../Testing_ModelFit/Testing_ModelFit' + agent.name + '.npz', allow_pickle=True)
    f.ParamBar(agent.name,agent.pname, ModFit['Params'], ModFit['Ks'])
    AICs[:, Order], BICs[:, Order], Mods[Order] = ModFit['AICs'], ModFit['BICs'], ModFit['agtName']
    AICs_te[:, Order], BICs_te[:, Order] = te_ModFit['AICs'], te_ModFit['BICs']

#%%
Mods = ['noneFree', 'allFree', 'noise', 'modulation', 'action confusion']
f.ICBar(AICs, BICs, Mods, degree = 10)
コード例 #27
0
def learnValueFunction(n_trials,
                       environment,
                       place_cells,
                       actor=None,
                       critic=None,
                       max_steps=np.Inf):
    """
    Main function responsible for learning value function for a given environment
    INPUTS:
    -------
    n_trials: (INTEGER) Number of trials allowed on the task
    environment: (Maze) Physical space in which the task has to be learnt
    place_cells: (PlaceCell) Entity that encodes a particular location as a population

    <OPTIONAL INPUTS>
    actor: Pre-trained actor
    critic: Pre-trained critic

    OUTPUTS:
    --------
    actor: (Actor Class) Entity that learns actions for a given state
    critic: (Critic Class) Entity that evaluates the value for a
        particular state. These values are used for taking actions.
    """

    # Visualize place fields for a few cells and then the aggregate activity
    # Set up the actor and critic based on the place fields
    if critic is None:
        critic = Agents.Critic(len(place_cells))
    else:
        assert (critic.getNFields() == len(place_cells))

    if actor is None:
        actor = Agents.Actor(environment.getActions(), len(place_cells))
        # actor = Agents.RandomAgent(environment.getActions(), len(place_cells))
        # actor = Agents.IdealActor(environment, critic, place_cells)
    else:
        assert (actor.getNFields() == len(place_cells))

    n_steps = np.zeros(n_trials, dtype=float)
    for trial in range(n_trials):
        # Path is visualized using a graphics object
        canvas = Graphics.WallMazeCanvas(environment)
        if DBG_LVL > 2:
            n_cells_to_visualize = 4
            for _ in range(n_cells_to_visualize):
                sample_cell = random.randint(0, len(place_cells))
                canvas.visualizePlaceField(place_cells[sample_cell])
            canvas.visualizeAggregatePlaceFields(place_cells)

        # Initialize a new location and adjust for the optimal number of steps
        # needed to get to the goal.
        environment.redrawInitLocation()
        optimal_steps_to_goal = environment.getOptimalDistanceToGoal()
        n_steps[trial] = -optimal_steps_to_goal

        initial_state = environment.getCurrentState()
        canvas.update(initial_state)
        terminate_trial = False
        while not terminate_trial:
            terminate_trial = environment.reachedGoalState()
            if (n_steps[trial] > max_steps * environment.MOVE_DISTANCE):
                break

            n_steps[trial] += environment.MOVE_DISTANCE
            current_state = environment.getCurrentState()
            if DBG_LVL > 1:
                print('On state: (%.2f, %.2f)' %
                      (current_state[0], current_state[1]))

            # Get the place field activity based on the current location
            pf_activity = [pf.getActivity(current_state) for pf in place_cells]

            # Get an action based on the place field activity
            next_action = actor.getAction(pf_activity)
            if DBG_LVL > 1:
                print('Selected Action: %s' % next_action)

            # Apply this action onto the environment
            reward = environment.move(next_action)
            # canvas.update(environment.getCurrentState())

            # Use the obtained reward to update the value
            new_environment_state = environment.getCurrentState()
            canvas.update(new_environment_state)

            new_pf_activity = [
                pf.getActivity(new_environment_state) for pf in place_cells
            ]
            prediction_error = critic.updateValue(pf_activity, new_pf_activity,
                                                  reward)
            actor.updateWeights(pf_activity, prediction_error)

        if (DBG_LVL > 0):
            print('Ended trial %d moving %.1f.' % (trial, n_steps[trial]))
            # At debug level 1, only the first and the last trajectories, and
            # corresponding value functions are shown. At higher debug levels,
            # the entire trajectory is shown for every iteration
            if (DBG_LVL > 1) or (trial == 1) or (trial == n_trials - 1):
                # Plot the trajectory taken for this trial
                canvas.plotTrajectory()

                # This takes extremely long when using a population of neurons
                canvas.plotValueFunction(place_cells,
                                         critic,
                                         limits=False,
                                         continuous=True)

                # Plot a histogram of the weightS
                """
                critic_weights = np.reshape(critic.getWeights(), -1)
                Graphics.histogram(critic_weights)
                """

    if (DBG_LVL > 0):
        Graphics.plot(n_steps)
    else:
        print('Step Statistics - Mean (%.2f), STD (%.2f)' %
              (np.mean(n_steps), np.std(n_steps)))

    return (actor, critic, n_steps)
コード例 #28
0
import numpy as np  # for numeric calculations
import time  #calculate runtime
import Agents as ag
import func as f

#%%
agent_list = [
    ag.RLWM_noneFree(),
    ag.RLWM_allFree(),
    ag.RLWM_noise(),
    ag.RLWM_modulation(),
    ag.RLWM_actionSoftmax()
]
start_time = time.time()
AICs, BICs = f.modRec(agent_list, initSample=22, endSample=33)
print("--- %s seconds ---" % (time.time() - start_time))
np.savez('Model_Recovery', AICs=AICs,
         BICs=BICs)  # save the file as "outfile_name.npy"
コード例 #29
0
#%%
# import necessary packages
import pandas as pd  # for python data frame
import time  #calculate runtime
import numpy as np  # for numeric calculations
import Agents as ag
import func as f
#%%
agent_list = [
    ag.RLWM_noneFree(),
    ag.RLWM_allFree(),
    ag.RLWM_noise(),
    ag.RLWM_modulation()
]

#%%
for agent in agent_list:
    agent.te_IC()
コード例 #30
0
    def __init__(self,
                 ng=c.numOfGoods,
                 nr=c.numofRounds,
                 mem=c.memory,
                 al=c.alpha,
                 maxCost=c.max_fixedCost):

        #This list will be used to keep track of goods that agents recieve...
        #..though it is not their consumption goods, i.e. goods agents...
        #choose to carry for purpose of indirect trade

        c.numOfBaseAgents = ng * (ng - 1)
        c.numOfGoods = ng
        c.numofRounds = nr
        c.memory = mem
        c.alpha = al
        c.max_fixedCost = maxCost

        self.listofMoney = [0] * (c.numOfGoods)

        #agent self.types
        self.types = []

        #array of all trades
        self.allTrades = []

        #array of goods traded
        self.goodsTraded = []

        #cost list
        self.costList_unchanging = []

        # create a dictionary to store the cost of trading goods, the structure will be of the following kind
        # {(0,1):[0.1,0.02,0.5],(0,2):[0.1,0.6,0.02],(1,0):[0.5,0.3]}
        # (0,1) records the cost of the agent who recieved 0
        # we initialize an empty dictionary with the intention of filling it up with the above
        self.tradeCosts = dict()

        # create two list of number of goods
        self.goods_listOne = [i for i in range(c.numOfGoods)]
        self.goods_listTwo = [i for i in range(c.numOfGoods)]

        # create a list of combinations of goods, it will look like this: [(0,1),(0,2),(1,0) and so on]
        self.goodsCombinations = []

        for i in self.goods_listOne:
            for j in self.goods_listTwo:
                self.goodsCombinations.append((i, j))

        # attaches an empty list to a dictionary where elements shows combinations of goods, so we get {(0,1):[],(0,2):[],(1,0):[]}
        for x in self.goodsCombinations:
            self.tradeCosts[x] = list()

        #print "fixed costs", c.max_fixedCost

        #list of costs assigned to each agent
        for i in range(0, c.numOfGoods):
            self.costList_unchanging.append(random.uniform(0, c.max_fixedCost))

        self.costList = copy.deepcopy(self.costList_unchanging)

        # We generate a list of agents
        self.agentList = [
            ag.simpleAgents() for count in xrange(c.numOfBaseAgents)
        ]

        self.listofMoney = [0] * (c.numOfGoods)

        #initialize agent self.types
        for i in range(0, c.numOfGoods):
            for j in range(0, c.numOfGoods):
                if i != j:
                    nlist = [str(i), str(j), str(j)]
                    self.types.append(nlist)

        # And we give each agents agent1 list of consumption good,..
        # ...production good and carry good. Initially the production...
        # ...good and carry good are the same

        for i in range(0, len(self.types)):
            self.agentList[i].goods = self.types[i]

        for i in self.agentList:
            i.cost = self.costList

        #print "costs", self.costList

        #register callback funciton if necessary
        self.callback_function = None
コード例 #31
0
def testMaze():
    """
    No comments here. Look at single_maze_learning_agent.py for more details!
    """
    ValueLearning.DBG_LVL = 0

    nx = 6
    ny = 6

    # Set the number of cells to be used per "place field" - Same for all the environments
    Hippocampus.N_CELLS_PER_FIELD = 1

    n_fields = round(1.0 * (nx + 3) * (ny + 3))
    n_cells = Hippocampus.N_CELLS_PER_FIELD * n_fields
    move_distance = 0.99

    n_training_trials = 100
    n_single_env_episodes = 2
    n_alternations = 1
    max_train_steps = 1000

    # First Environment: Has its own place cells and place fields
    env_E1 = Environment.RandomGoalOpenField(nx, ny, move_distance)
    canvas_E1 = Graphics.WallMazeCanvas(env_E1)
    place_fields_E1 = Hippocampus.setupPlaceFields(env_E1, n_fields)
    place_cells_E1 = Hippocampus.assignPlaceCells(n_cells, place_fields_E1)

    # Train a critic on the first environment
    print('Training Critic solely on Env A')
    critic_E1 = None
    weights_E1 = np.empty((n_cells, n_single_env_episodes), dtype=float)
    for episode in range(n_single_env_episodes):
        (_, critic_E1,
         _) = ValueLearning.learnValueFunction(n_training_trials,
                                               env_E1,
                                               place_cells_E1,
                                               critic=critic_E1,
                                               max_steps=max_train_steps)
        weights_E1[:, episode] = critic_E1.getWeights()

    # Get a trajectory in the environment and plot the value function
    canvas_E1.plotValueFunction(place_cells_E1, critic_E1, continuous=True)
    input('Press return to run next environment...')

    components_E1 = Graphics.showDecomposition(weights_E1,
                                               title='Environment 01')

    # Create empty actors and critics
    actor = Agents.RandomAgent(env_E1.getActions(), n_cells)
    critic = Agents.Critic(n_cells)

    # Second Environment: This has a different set (but the same number) of
    # place fields and place cells (also has a bunch of walls)
    nx = 6
    ny = 6
    lp_wall = Environment.Wall((0, 3), (3, 3))
    rp_wall = Environment.Wall((4, 3), (6, 3))
    env_E2 = Environment.MazeWithWalls(nx,
                                       ny, [lp_wall, rp_wall],
                                       move_distance=move_distance)
    canvas_E2 = Graphics.WallMazeCanvas(env_E2)
    place_fields_E2 = Hippocampus.setupPlaceFields(env_E2, n_fields)
    place_cells_E2 = Hippocampus.assignPlaceCells(n_cells, place_fields_E2)

    # Train another critic on the second environment
    print()
    print('Training Critic solely on Env B')
    critic_E2 = None
    weights_E2 = np.empty((n_cells, n_single_env_episodes), dtype=float)
    for episode in range(n_single_env_episodes):
        (_, critic_E2,
         _) = ValueLearning.learnValueFunction(n_training_trials,
                                               env_E2,
                                               place_cells_E2,
                                               critic=critic_E2,
                                               max_steps=max_train_steps)
        weights_E2[:, episode] = critic_E2.getWeights()

    components_E2 = Graphics.showDecomposition(weights_E2,
                                               title='Environment 02')
    canvas_E2.plotValueFunction(place_cells_E2, critic_E2, continuous=True)

    # Look at the projection of one environment's weights on the other's principal components
    Graphics.showDecomposition(weights_E1,
                               components=components_E2,
                               title='E2 on E1')
    Graphics.showDecomposition(weights_E2,
                               components=components_E1,
                               title='E1 on E2')
    input('Press any key to start Alternation.')

    # This can be used to just reinforce the fact that the agent is indeed
    # random! The steps taken to goal would not change over time because of the
    # way the agent behaves.
    learning_steps_E1 = np.zeros((n_alternations, 1), dtype=float)
    learning_steps_E2 = np.zeros((n_alternations, 1), dtype=float)

    # keep track of weights for PCA
    weights = np.empty((n_cells, n_alternations * 2), dtype=float)
    for alt in range(n_alternations):
        n_alternation_trials = n_single_env_episodes * n_training_trials
        # n_alternation_trials = n_training_trials
        print('Alternation: %d' % alt)
        # First look at the performance of the agent in the task before it is
        # allowed to learn anything. Then allow learning
        print('Learning Environment A')
        (actor, critic, steps_E1) = ValueLearning.learnValueFunction(
            n_alternation_trials, env_E1, place_cells_E1, actor, critic,
            max_train_steps)
        learning_steps_E1[alt] = np.mean(steps_E1)
        weights[:, 2 * alt] = critic.getWeights()

        # Repeat for environment 1
        print('Learning Environment B')
        (actor, critic, steps_E2) = ValueLearning.learnValueFunction(
            n_alternation_trials, env_E2, place_cells_E2, actor, critic,
            max_train_steps)
        learning_steps_E2[alt] = np.mean(steps_E2)
        weights[:, 2 * alt + 1] = critic.getWeights()

    # Show the alternation weights in the two basis
    Graphics.showDecomposition(weights,
                               components=components_E1,
                               title='Alternation weights in E1')
    Graphics.showDecomposition(weights,
                               components=components_E2,
                               title='Alternation weights in E2')

    # Show the value functions for both the environments
    input('Press return for Value Function of E1')
    canvas_E1.plotValueFunction(place_cells_E1, critic, continuous=True)
    canvas_E1.plotValueFunction(place_cells_E1, critic_E1, continuous=True)
    canvas_E1.plotValueFunction(place_cells_E1, critic_E2, continuous=True)

    # Plot the ideal value function
    ideal_critic = Agents.IdealValueAgent(env_E1, place_cells_E1)
    optimal_value_function = ideal_critic.getValueFunction()

    scaling_factor = 1.0 / (1 - critic_E1.getDiscountFactor())
    # Graphics.showImage(optimal_value_function, xticks=range(1,nx), yticks=range(1,ny), range=(maze.NON_GOAL_STATE_REWARD, scaling_factor * maze.GOAL_STATE_REWARD))
    Graphics.showImage(optimal_value_function, xticks=range(1,nx), yticks=range(1,ny), \
        range=(env_E1.NON_GOAL_STATE_REWARD, scaling_factor * env_E1.GOAL_STATE_REWARD))

    input('Press return for Value Function of E2')
    canvas_E2.plotValueFunction(place_cells_E2, critic, continuous=True)
    canvas_E2.plotValueFunction(place_cells_E2, critic_E2, continuous=True)
    canvas_E2.plotValueFunction(place_cells_E2, critic_E1, continuous=True)

    # Plot the ideal value function
    ideal_critic = Agents.IdealValueAgent(env_E2, place_cells_E2)
    optimal_value_function = ideal_critic.getValueFunction()

    scaling_factor = 1.0 / (1 - critic_E2.getDiscountFactor())
    # Graphics.showImage(optimal_value_function, xticks=range(1,nx), yticks=range(1,ny), range=(maze.NON_GOAL_STATE_REWARD, scaling_factor * maze.GOAL_STATE_REWARD))
    Graphics.showImage(optimal_value_function, xticks=range(1,nx), yticks=range(1,ny), \
        range=(env_E2.NON_GOAL_STATE_REWARD, scaling_factor * env_E2.GOAL_STATE_REWARD))
    input('Press any key to exit!')