예제 #1
0
    def __init__(self,
                 env,
                 layerSizes=2 * [Int3(4, 4, 16)],
                 layerRadius=4,
                 hiddenSize=Int3(8, 8, 16),
                 imageRadius=9,
                 imageScale=1.0,
                 obsResolution=32,
                 actionResolution=16,
                 rewardScale=1.0,
                 terminalReward=0.0,
                 infSensitivity=1.0,
                 nThreads=4,
                 loadName=None):
        self.env = env

        pyogmaneo.ComputeSystem.setNumThreads(nThreads)
        self.cs = pyogmaneo.ComputeSystem()

        self.imEnc = None
        self.imEncIndex = -1

        self.inputSizes = []
        self.inputLows = []
        self.inputHighs = []
        self.inputTypes = []
        self.imageSizes = []
        self.imgsPrev = []
        self.actionIndices = []

        self.rewardScale = rewardScale
        self.terminalReward = terminalReward

        self.infSensitivity = infSensitivity

        if type(self.env.observation_space) is gym.spaces.Discrete:
            self.inputSizes.append(Int3(1, 1, self.env.observation_space.n))
            self.inputTypes.append(pyogmaneo.inputTypeNone)
            self.inputLows.append([0.0])
            self.inputHighs.append([0.0])
        elif type(self.env.observation_space) is gym.spaces.Box:
            if len(self.env.observation_space.shape) == 1 or len(
                    self.env.observation_space.shape) == 0:
                squareSize = int(
                    np.ceil(np.sqrt(len(self.env.observation_space.low))))
                squareTotal = squareSize * squareSize
                self.inputSizes.append(
                    Int3(squareSize, squareSize, obsResolution))
                self.inputTypes.append(pyogmaneo.inputTypeNone)
                lows = list(self.env.observation_space.low)
                highs = list(self.env.observation_space.high)

                # Detect large numbers/inf
                for i in range(len(lows)):
                    if abs(lows[i]) > 100000 or abs(highs[i]) > 100000:
                        # Indicate inf by making low greater than high
                        lows[i] = 1.0
                        highs[i] = -1.0

                self.inputLows.append(lows)
                self.inputHighs.append(highs)
            elif len(self.env.observation_space.shape) == 2:
                scaledSize = (int(self.env.observation_space.shape[0] *
                                  imageScale),
                              int(self.env.observation_space.shape[1] *
                                  imageScale), 1)

                self.imageSizes.append(scaledSize)
            elif len(self.env.observation_space.shape) == 3:
                scaledSize = (int(self.env.observation_space.shape[0] *
                                  imageScale),
                              int(self.env.observation_space.shape[1] *
                                  imageScale), 3)

                self.imageSizes.append(scaledSize)
            else:
                raise Exception("Unsupported Box input: Dimensions too high " +
                                str(self.env.observation_space.shape))
        else:
            raise Exception("Unsupported input type " +
                            str(type(self.env.observation_space)))

        if len(self.imageSizes) > 0:
            vlds = []

            for i in range(len(self.imageSizes)):
                vld = pyogmaneo.ImVisibleLayerDesc(
                    Int3(self.imageSizes[i][0], self.imageSizes[i][1],
                         self.imageSizes[i][2]), imageRadius)

                vlds.append(vld)

                self.imgsPrev.append(np.zeros(self.imageSizes[i]))

            self.imEnc = pyogmaneo.ImageEncoder(self.cs, hiddenSize, vlds)

            self.imEncIndex = len(self.inputSizes)
            self.inputSizes.append(hiddenSize)
            self.inputTypes.append(pyogmaneo.inputTypeNone)
            self.inputLows.append([0.0])
            self.inputHighs.append([1.0])

        # Actions
        if type(self.env.action_space) is gym.spaces.Discrete:
            self.actionIndices.append(len(self.inputSizes))
            self.inputSizes.append(Int3(1, 1, self.env.action_space.n))
            self.inputTypes.append(pyogmaneo.inputTypeAction)
            self.inputLows.append([0.0])
            self.inputHighs.append([0.0])
        elif type(self.env.action_space) is gym.spaces.Box:
            if len(self.env.action_space.shape) < 3:
                if len(self.env.action_space.shape) == 2:
                    self.actionIndices.append(len(self.inputSizes))
                    self.inputSizes.append(
                        Int3(self.env.action_space.shape[0],
                             self.env.action_space.shape[1], actionResolution))
                    self.inputTypes.append(pyogmaneo.inputTypeAction)
                    lows = list(self.env.action_space.low)
                    highs = list(self.env.action_space.high)

                    self.inputLows.append(lows)
                    self.inputHighs.append(highs)
                else:
                    squareSize = int(
                        np.ceil(np.sqrt(len(self.env.action_space.low))))
                    squareTotal = squareSize * squareSize
                    self.actionIndices.append(len(self.inputSizes))
                    self.inputSizes.append(
                        Int3(squareSize, squareSize, actionResolution))
                    self.inputTypes.append(pyogmaneo.inputTypeAction)
                    lows = list(self.env.action_space.low)
                    highs = list(self.env.action_space.high)

                    self.inputLows.append(lows)
                    self.inputHighs.append(highs)
            else:
                raise Exception(
                    "Unsupported Box action: Dimensions too high " +
                    str(self.env.action_space.shape))
        else:
            raise Exception("Unsupported action type " +
                            str(type(self.env.action_space)))

        lds = []

        for i in range(len(layerSizes)):
            ld = pyogmaneo.LayerDesc()

            ld.hiddenSize = layerSizes[i]

            ld.ffRadius = layerRadius
            ld.pRadius = layerRadius
            ld.aRadius = layerRadius

            lds.append(ld)

        if loadName is None:
            self.h = pyogmaneo.Hierarchy(self.cs, self.inputSizes,
                                         self.inputTypes, lds)
        else:
            self.h = pyogmaneo.Hierarchy(loadName)

        self.actions = []

        for i in range(len(self.actionIndices)):
            index = self.actionIndices[i]

            size = len(self.inputLows[index])

            startAct = []

            for j in range(size):
                startAct.append(np.random.randint(0, self.inputSizes[index].z))

            self.actions.append(startAct)
예제 #2
0
for i in range(5):  # Layers with exponential memory
    ld = pyogmaneo.LayerDesc()

    # Set the hidden (encoder) layer size: width x height x columnSize
    ld.hiddenSize = pyogmaneo.Int3(4, 4, 16)

    ld.ffRadius = 2  # Sparse coder radius onto visible layers
    ld.pRadius = 2  # Predictor radius onto sparse coder hidden layer (and feed back)

    ld.ticksPerUpdate = 2  # How many ticks before a layer updates (compared to previous layer) - clock speed for exponential memory
    ld.temporalHorizon = 4  # Memory horizon of the layer. Must be greater or equal to ticksPerUpdate

    lds.append(ld)

# Create the hierarchy: Provided with input layer sizes (a single column in this case), and input types (a single predicted layer)
h = pyogmaneo.Hierarchy(cs, [pyogmaneo.Int3(1, 1, inputColumnSize)],
                        [pyogmaneo.inputTypePrediction], lds)

# Present the wave sequence for some timesteps
iters = 2000

for t in range(iters):
    # The value to encode into the input column
    valueToEncode = np.sin(t * 0.02 * 2.0 * np.pi) * np.sin(
        t * 0.035 * 2.0 * np.pi + 0.45)  # Some wavy line

    valueToEncodeBinned = int((valueToEncode - bounds[0]) /
                              (bounds[1] - bounds[0]) * (inputColumnSize - 1) +
                              0.5)

    # Step the hierarchy given the inputs (just one here)
    h.step(cs, [[valueToEncodeBinned]], True)  # True for enabling learning
예제 #3
0
 def loadHierarchy(self):
     self.cs = pyogmaneo.ComputeSystem()
     self.h = pyogmaneo.Hierarchy("pupper.ohr")
예제 #4
0
    ld = pyogmaneo.LayerDesc()

    # Set the hidden (encoder) layer size: width x height x columnSize
    ld.hiddenSize = pyogmaneo.Int3(4, 4, 16)

    ld.ffRadius = 4 # Sparse coder radius onto visible layers
    ld.pRadius = 4 # Predictor radius onto sparse coder hidden layer (and feed back)
    ld.aRadius = 4 # Actor radius onto sparse coder hidden layer (and feed back)

    ld.ticksPerUpdate = 2 # How many ticks before a layer updates (compared to previous layer) - clock speed for exponential memory
    ld.temporalHorizon = 4 # Memory horizon of the layer. Must be greater or equal to ticksPerUpdate
    
    lds.append(ld)

# Create the hierarchy: Provided with input layer sizes (a single column in this case), and input types (a single predicted layer)
h = pyogmaneo.Hierarchy(cs, [ pyogmaneo.Int3(1, numObs, obsColumnSize), pyogmaneo.Int3(1, 1, numActions) ], [ pyogmaneo.inputTypeNone, pyogmaneo.inputTypeAction ], lds)

reward = 0.0

for episode in range(1000):
    obs = env.reset()

    # Timesteps
    for t in range(500):
        # Bin the 4 observations. Since we don't know the limits of the observation, we just squash it
        binnedObs = (sigmoid(obs * obsSquashScale) * (obsColumnSize - 1) + 0.5).astype(np.int).ravel().tolist()

        h.step(cs, [ binnedObs, h.getPredictionCs(1) ], True, reward)

        # Retrieve the action, the hierarchy already automatically applied exploration
        action = h.getPredictionCs(1)[0] # First and only column