Exemple #1
0
  def reset(self, params, repetition):
    random.seed(params['seed'])

    if params['encoding'] == 'basic':
      self.encoder = BasicEncoder(params['encoding_num'])
    elif params['encoding'] == 'distributed':
      self.encoder = DistributedEncoder(params['encoding_num'],
                                        maxValue=params['encoding_max'],
                                        minValue=params['encoding_min'],
                                        classifyWithRandom=params[
                                          'classify_with_random'])
    else:
      raise Exception("Encoder not found")

    if params['dataset'] == 'simple':
      self.dataset = SimpleDataset()
    elif params['dataset'] == 'reber':
      self.dataset = ReberDataset(maxLength=params['max_length'])
    elif params['dataset'] == 'high-order':
      self.dataset = HighOrderDataset(numPredictions=params['num_predictions'])
    else:
      raise Exception("Dataset not found")

    self.computeCounter = 0

    self.history = []
    self.resets = []
    self.randoms = []
    self.currentSequence = self.dataset.generateSequence()

    self.net = None
    self.sequenceCounter = 0
  def reset(self, params, repetition):
    random.seed(params['seed'])

    if params['dataset'] == 'simple':
      self.dataset = SimpleDataset()
    elif params['dataset'] == 'reber':
      self.dataset = ReberDataset(maxLength=params['max_length'])
    elif params['dataset'] == 'high-order':
      self.dataset = HighOrderDataset(numPredictions=params['num_predictions'])
    else:
      raise Exception("Dataset not found")

    # if not os.path.exists(resultsDir):
    #   os.makedirs(resultsDir)
    # self.resultsFile = open(os.path.join(resultsDir, "0.log"), 'w')
    if params['verbosity'] > 0:
      print " initializing HTM model..."
    self.model = ModelFactory.create(MODEL_PARAMS)
    self.model.enableInference({"predictedField": "element"})
    self.shifter = InferenceShifter()
    self.mapping = getEncoderMapping(self.model)

    self.currentSequence = self.dataset.generateSequence()
    self.numPredictedActiveCells = []
    self.numPredictedInactiveCells = []
    self.numUnpredictedActiveColumns = []

    self.currentSequence = self.dataset.generateSequence()
    self.perturbed = False
    self.randoms = []
    self.verbosity = 1
    self.sequenceCounter = 0
Exemple #3
0
    def __init__(self, numPredictions, resultsDir):
        random.seed(43)
        self.numPredictions = numPredictions

        if not os.path.exists(resultsDir):
            os.makedirs(resultsDir)

        self.resultsFile = open(os.path.join(resultsDir, "0.log"), 'w')

        self.model = ModelFactory.create(MODEL_PARAMS)
        self.model.enableInference({"predictedField": "element"})
        self.shifter = InferenceShifter()
        self.mapping = getEncoderMapping(self.model)

        self.correct = []
        self.numPredictedActiveCells = []
        self.numPredictedInactiveCells = []
        self.numUnpredictedActiveColumns = []

        self.iteration = 0
        self.perturbed = False
        self.randoms = []
        self.verbosity = 1

        self.dataset = HighOrderDataset(numPredictions=self.numPredictions)
        self.sequences = []
        self.currentSequence = []
        self.replenish_sequence()
Exemple #4
0
  def reset(self, params, repetition):
    random.seed(params['seed'])

    if params['encoding'] == 'basic':
      self.encoder = BasicEncoder(params['encoding_num'])
    elif params['encoding'] == 'distributed':
      self.encoder = DistributedEncoder(params['encoding_num'],
                                        maxValue=params['encoding_max'],
                                        minValue=params['encoding_min'],
                                        classifyWithRandom=params[
                                          'classify_with_random'])
    else:
      raise Exception("Encoder not found")

    if params['dataset'] == 'simple':
      self.dataset = SimpleDataset()
    elif params['dataset'] == 'reber':
      self.dataset = ReberDataset(maxLength=params['max_length'])
    elif params['dataset'] == 'high-order':
      self.dataset = HighOrderDataset(numPredictions=params['num_predictions'])
    else:
      raise Exception("Dataset not found")

    self.computeCounter = 0

    self.history = []
    self.resets = []
    self.randoms = []

    self.currentSequence = []
    self.targetPrediction = []
    self.replenishSequence(params, iteration=0)

    self.net = None
    self.sequenceCounter = 0
Exemple #5
0
    def reset(self, params, repetition):
        random.seed(params['seed'])

        if params['encoding'] == 'basic':
            self.encoder = BasicEncoder(params['encoding_num'])
        elif params['encoding'] == 'distributed':
            self.encoder = DistributedEncoder(
                params['encoding_num'],
                maxValue=params['encoding_max'],
                minValue=params['encoding_min'],
                classifyWithRandom=params['classify_with_random'])
        else:
            raise Exception("Encoder not found")

        if params['dataset'] == 'simple':
            self.dataset = SimpleDataset()
        elif params['dataset'] == 'reber':
            self.dataset = ReberDataset(maxLength=params['max_length'])
        elif params['dataset'] == 'high-order':
            self.dataset = HighOrderDataset(
                numPredictions=params['num_predictions'], seed=params['seed'])
        else:
            raise Exception("Dataset not found")

        self.computeCounter = 0

        self.history = []
        self.resets = []
        self.randoms = []

        self.currentSequence = []
        self.targetPrediction = []
        self.replenishSequence(params, iteration=0)

        self.net = buildNetwork(params['encoding_num'],
                                params['num_cells'],
                                params['encoding_num'],
                                hiddenclass=LSTMLayer,
                                bias=True,
                                outputbias=params['output_bias'],
                                recurrent=True)

        self.trainer = BackpropTrainer(self.net,
                                       dataset=SequentialDataSet(
                                           params['encoding_num'],
                                           params['encoding_num']),
                                       learningrate=0.01,
                                       momentum=0,
                                       verbose=params['verbosity'] > 0)

        self.sequenceCounter = 0
Exemple #6
0
  def reset(self, params, repetition):
    random.seed(params['seed'])

    if params['encoding'] == 'basic':
      self.encoder = BasicEncoder(params['encoding_num'])
    elif params['encoding'] == 'distributed':
      self.encoder = DistributedEncoder(params['encoding_num'],
                                        maxValue=params['encoding_max'],
                                        minValue=params['encoding_min'],
                                        classifyWithRandom=params[
                                          'classify_with_random'])
    else:
      raise Exception("Encoder not found")

    if params['dataset'] == 'simple':
      self.dataset = SimpleDataset()
    elif params['dataset'] == 'reber':
      self.dataset = ReberDataset(maxLength=params['max_length'])
    elif params['dataset'] == 'high-order':
      self.dataset = HighOrderDataset(numPredictions=params['num_predictions'],
                                      seed=params['seed'])
    else:
      raise Exception("Dataset not found")

    self.computeCounter = 0

    self.history = []
    self.resets = []
    self.randoms = []

    self.currentSequence = []
    self.targetPrediction = []
    self.replenishSequence(params, iteration=0)

    self.net = buildNetwork(params['encoding_num'], params['num_cells'],
                            params['encoding_num'],
                            hiddenclass=LSTMLayer,
                            bias=True,
                            outputbias=params['output_bias'],
                            recurrent=True)

    self.trainer = BackpropTrainer(self.net,
                          dataset=SequentialDataSet(params['encoding_num'], params['encoding_num']),
                          learningrate=0.01,
                          momentum=0,
                          verbose=params['verbosity'] > 0)


    self.sequenceCounter = 0
Exemple #7
0
  def reset(self, params, repetition):
    random.seed(params['seed'])

    if params['encoding'] == 'basic':
      self.encoder = BasicEncoder(params['encoding_num'])
    elif params['encoding'] == 'distributed':
      self.encoder = DistributedEncoder(params['encoding_num'],
                                        params['encoding_num_non_random'],
                                        maxValue=params['encoding_max'],
                                        minValue=params['encoding_min'])
    elif params['encoding'] == 'sparse-distributed':
      self.encoder = SparseDistributedEncoder(params['encoding_num'],
                                              params['encoding_num_non_random'],
                                              params['encoding_active_bits'])
    else:
      raise Exception("Encoder not found")

    if params['dataset'] == 'simple':
      self.dataset = SimpleDataset()
    elif params['dataset'] == 'reber':
      self.dataset = ReberDataset(maxLength=params['max_length'])
    elif params['dataset'] == 'high-order':
      self.dataset = HighOrderDataset(numPredictions=params['num_predictions'],
                                      seed=params['seed'])
    else:
      raise Exception("Dataset not found")

    self.numLags = params['num_lags']

    self.computeCounter = 0
    self.history = []
    self.resets = []


    self.finishInitializeX = False
    self.randoms = []

    self.currentSequence = []
    self.targetPrediction = []
    self.replenishSequence(params, iteration=0)

    self.net = buildNetwork(params['encoding_num'] * params['num_lags'],
                            params['num_cells'],
                            params['encoding_num'],
                            bias=True,
                            outputbias=True)
    # self.trainer = BackpropTrainer(self.net, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01)
    self.sequenceCounter = 0
  def __init__(self, numPredictions, resultsDir):
    random.seed(43)
    self.numPredictions = numPredictions

    if not os.path.exists(resultsDir):
      os.makedirs(resultsDir)

    self.resultsFile = open(os.path.join(resultsDir, "0.log"), 'w')

    self.model = ModelFactory.create(MODEL_PARAMS)
    self.model.enableInference({"predictedField": "element"})
    self.shifter = InferenceShifter()
    self.mapping = getEncoderMapping(self.model)

    self.correct = []
    self.numPredictedActiveCells = []
    self.numPredictedInactiveCells = []
    self.numUnpredictedActiveColumns = []

    self.iteration = 0
    self.perturbed = False
    self.randoms = []
    self.verbosity = 1

    self.dataset = HighOrderDataset(numPredictions=self.numPredictions)
    self.sequences = []
    self.currentSequence = []
    self.replenish_sequence()
Exemple #9
0
    def reset(self, params, repetition):
        random.seed(params['seed'])

        if params['encoding'] == 'basic':
            self.encoder = BasicEncoder(params['encoding_num'])
        elif params['encoding'] == 'distributed':
            self.encoder = DistributedEncoder(
                params['encoding_num'],
                params['encoding_num_non_random'],
                maxValue=params['encoding_max'],
                minValue=params['encoding_min'])
        elif params['encoding'] == 'sparse-distributed':
            self.encoder = SparseDistributedEncoder(
                params['encoding_num'], params['encoding_num_non_random'],
                params['encoding_active_bits'])
        else:
            raise Exception("Encoder not found")

        if params['dataset'] == 'simple':
            self.dataset = SimpleDataset()
        elif params['dataset'] == 'reber':
            self.dataset = ReberDataset(maxLength=params['max_length'])
        elif params['dataset'] == 'high-order':
            self.dataset = HighOrderDataset(
                numPredictions=params['num_predictions'], seed=params['seed'])
        else:
            raise Exception("Dataset not found")

        self.numLags = params['num_lags']

        self.history = []
        self.resets = []

        self.finishInitializeX = False
        self.randoms = []

        self.currentSequence = []
        self.targetPrediction = []
        self.replenishSequence(params, iteration=0)

        self.net = initializeELMnet(params['encoding_num'] *
                                    params['num_lags'],
                                    params['encoding_num'],
                                    numNeurons=params['num_cells'])
        self.sequenceCounter = 0
Exemple #10
0
  def reset(self, params, repetition):
    random.seed(params['seed'])

    if params['encoding'] == 'basic':
      self.encoder = BasicEncoder(params['encoding_num'])
    elif params['encoding'] == 'distributed':
      self.encoder = DistributedEncoder(params['encoding_num'],
                                        params['encoding_num_non_random'],
                                        maxValue=params['encoding_max'],
                                        minValue=params['encoding_min'])
    elif params['encoding'] == 'sparse-distributed':
      self.encoder = SparseDistributedEncoder(params['encoding_num'],
                                              params['encoding_num_non_random'],
                                              params['encoding_active_bits'])
    else:
      raise Exception("Encoder not found")

    if params['dataset'] == 'simple':
      self.dataset = SimpleDataset()
    elif params['dataset'] == 'reber':
      self.dataset = ReberDataset(maxLength=params['max_length'])
    elif params['dataset'] == 'high-order':
      self.dataset = HighOrderDataset(numPredictions=params['num_predictions'],
                                      seed=params['seed'])
    else:
      raise Exception("Dataset not found")

    self.numLags = params['num_lags']

    self.history = []
    self.resets = []


    self.finishInitializeX = False
    self.randoms = []

    self.currentSequence = []
    self.targetPrediction = []
    self.replenishSequence(params, iteration=0)

    self.net = initializeELMnet(params['encoding_num'] * params['num_lags'],
                                params['encoding_num'],
                                numNeurons=params['num_cells'])
    self.sequenceCounter = 0
  def reset(self, params, repetition):
    random.seed(params['seed'])

    if params['dataset'] == 'simple':
      self.dataset = SimpleDataset()
    elif params['dataset'] == 'reber':
      self.dataset = ReberDataset(maxLength=params['max_length'])
    elif params['dataset'] == 'high-order':
      self.dataset = HighOrderDataset(numPredictions=params['num_predictions'],
                                      seed=params['seed'])
      print "Sequence dataset: "
      print " Symbol Number {}".format(self.dataset.numSymbols)
      for seq in self.dataset.sequences:
        print seq

    elif params['dataset'] == 'high-order-long':
      self.dataset = LongHighOrderDataset(params['sequence_length'],
                                          seed=params['seed'])
      print "Sequence dataset: "
      print " Symbol Number {}".format(self.dataset.numSymbols)
      for seq in self.dataset.sequences:
        print seq
    else:
      raise Exception("Dataset not found")

    self.randomStart = self.dataset.numSymbols + 1
    self.randomEnd = self.randomStart + 5000

    MODEL_PARAMS['modelParams']['sensorParams']['encoders']['element']\
      ['categoryList'] = range(self.randomEnd)

    # if not os.path.exists(resultsDir):
    #   os.makedirs(resultsDir)
    # self.resultsFile = open(os.path.join(resultsDir, "0.log"), 'w')
    if params['verbosity'] > 0:
      print " initializing HTM model..."
    self.model = ModelFactory.create(MODEL_PARAMS)
    self.model.enableInference({"predictedField": "element"})
    # self.classifier = SDRClassifier(steps=[1], alpha=0.001)

    self.mapping = getEncoderMapping(self.model, self.dataset.numSymbols)

    self.numPredictedActiveCells = []
    self.numPredictedInactiveCells = []
    self.numUnpredictedActiveColumns = []

    self.currentSequence = []
    self.targetPrediction = []
    self.replenish_sequence(params, iteration=0)

    self.resets = []
    self.randoms = []
    self.verbosity = 1
    self.sequenceCounter = 0
Exemple #12
0
    def reset(self, params, repetition):
        random.seed(params['seed'])

        if params['dataset'] == 'simple':
            self.dataset = SimpleDataset()
        elif params['dataset'] == 'reber':
            self.dataset = ReberDataset(maxLength=params['max_length'])
        elif params['dataset'] == 'high-order':
            self.dataset = HighOrderDataset(
                numPredictions=params['num_predictions'],
                seed=params['seed'],
                smallAlphabet=params['use_small_alphabet'])
            print "Sequence dataset: "
            print " Symbol Number {}".format(self.dataset.numSymbols)
            for seq in self.dataset.sequences:
                print seq

        elif params['dataset'] == 'high-order-long':
            self.dataset = LongHighOrderDataset(params['sequence_length'],
                                                seed=params['seed'])
            print "Sequence dataset: "
            print " Symbol Number {}".format(self.dataset.numSymbols)
            for seq in self.dataset.sequences:
                print seq
        else:
            raise Exception("Dataset not found")

        self.randomStart = self.dataset.numSymbols + 1
        self.randomEnd = self.randomStart + 5000

        MODEL_PARAMS['modelParams']['sensorParams']['encoders']['element']\
          ['categoryList'] = range(self.randomEnd)

        # if not os.path.exists(resultsDir):
        #   os.makedirs(resultsDir)
        # self.resultsFile = open(os.path.join(resultsDir, "0.log"), 'w')
        if params['verbosity'] > 0:
            print " initializing HTM model..."
            # print MODEL_PARAMS
        self.model = ModelFactory.create(MODEL_PARAMS)
        self.model.enableInference({"predictedField": "element"})
        # self.classifier = SDRClassifier(steps=[1], alpha=0.001)
        print "finish initializing HTM model "

        if params['kill_cell_percent'] > 0:
            # a hack to use faulty temporal memory instead
            self.model._getTPRegion().getSelf()._tfdr = MonitoredFaultyTPShim(
                numberOfCols=2048,
                cellsPerColumn=32,
                newSynapseCount=32,
                maxSynapsesPerSegment=128,
                maxSegmentsPerCell=128,
                initialPerm=0.21,
                connectedPerm=0.50,
                permanenceInc=0.10,
                permanenceDec=0.10,
                predictedSegmentDecrement=0.01,
                minThreshold=15,
                activationThreshold=15,
                seed=1960,
            )

        self.mapping = getEncoderMapping(self.model, self.dataset.numSymbols)

        self.numPredictedActiveCells = []
        self.numPredictedInactiveCells = []
        self.numUnpredictedActiveColumns = []

        self.currentSequence = []
        self.targetPrediction = []
        self.replenish_sequence(params, iteration=0)

        self.resets = []
        self.randoms = []
        self.verbosity = 1
        self.sequenceCounter = 0
Exemple #13
0
class Suite(PyExperimentSuite):
  def reset(self, params, repetition):
    random.seed(params['seed'])

    if params['encoding'] == 'basic':
      self.encoder = BasicEncoder(params['encoding_num'])
    elif params['encoding'] == 'distributed':
      self.encoder = DistributedEncoder(params['encoding_num'],
                                        maxValue=params['encoding_max'],
                                        minValue=params['encoding_min'],
                                        classifyWithRandom=params[
                                          'classify_with_random'])
    else:
      raise Exception("Encoder not found")

    if params['dataset'] == 'simple':
      self.dataset = SimpleDataset()
    elif params['dataset'] == 'reber':
      self.dataset = ReberDataset(maxLength=params['max_length'])
    elif params['dataset'] == 'high-order':
      self.dataset = HighOrderDataset(numPredictions=params['num_predictions'])
    else:
      raise Exception("Dataset not found")

    self.computeCounter = 0

    self.history = []
    self.resets = []
    self.randoms = []

    self.currentSequence = []
    self.targetPrediction = []
    self.replenishSequence(params, iteration=0)

    self.net = None
    self.sequenceCounter = 0

  def window(self, data, params):
    start = max(0, len(data) - params['learning_window'])
    return data[start:]


  def train(self, params):
    """
    Train LSTM network on buffered dataset history
    After training, run LSTM on history[:-1] to get the state correct
    :param params:
    :return:
    """
    n = params['encoding_num']
    net = buildNetwork(n, params['num_cells'], n,
                       hiddenclass=LSTMLayer,
                       bias=True,
                       outputbias=params['output_bias'],
                       recurrent=True)
    net.reset()

    # prepare training dataset
    ds = SequentialDataSet(n, n)
    trainer = RPropMinusTrainer(net,
                                dataset=ds,
                                verbose=params['verbosity'] > 0)

    history = self.window(self.history, params)
    resets = self.window(self.resets, params)

    for i in xrange(1, len(history)):
      if not resets[i - 1]:
        ds.addSample(self.encoder.encode(history[i - 1]),
                     self.encoder.encode(history[i]))
      if resets[i]:
        ds.newSequence()

    if len(history) > 1:
      trainer.trainEpochs(params['num_epochs'])
      net.reset()

    # run network on buffered dataset after training to get the state right
    for i in xrange(len(history) - 1):
      symbol = history[i]
      output = net.activate(self.encoder.encode(symbol))
      predictions = self.encoder.classify(output, num=params['num_predictions'])

      if resets[i]:
        net.reset()

    return net


  def killCells(self, killCellPercent):
    """
    kill a fraction of LSTM cells from the network
    :param killCellPercent:
    :return:
    """
    if killCellPercent <= 0:
      return

    inputLayer = self.net['in']
    lstmLayer = self.net['hidden0']

    numLSTMCell = lstmLayer.outdim
    numDead = round(killCellPercent * numLSTMCell)
    zombiePermutation = numpy.random.permutation(numLSTMCell)
    deadCells = zombiePermutation[0:numDead]

    # remove connections from input layer to dead LSTM cells
    connectionInputToHidden = self.net.connections[inputLayer][0]
    weightInputToHidden = reshape(connectionInputToHidden.params,
                                  (connectionInputToHidden.outdim,
                                   connectionInputToHidden.indim))

    for cell in deadCells:
      for dim in range(4):
        weightInputToHidden[dim * numLSTMCell + cell, :] *= 0

    newParams = reshape(weightInputToHidden,
                        (connectionInputToHidden.paramdim,))
    self.net.connections[inputLayer][0]._setParameters(
      newParams, connectionInputToHidden.owner)

    # remove dead connections within LSTM layer
    connectionHiddenToHidden = self.net.recurrentConns[0]
    weightHiddenToHidden = reshape(connectionHiddenToHidden.params,
                                   (connectionHiddenToHidden.outdim,
                                    connectionHiddenToHidden.indim))

    for cell in deadCells:
      weightHiddenToHidden[:, cell] *= 0

    newParams = reshape(weightHiddenToHidden,
                        (connectionHiddenToHidden.paramdim,))
    self.net.recurrentConns[0]._setParameters(
      newParams, connectionHiddenToHidden.owner)

    # remove connections from dead LSTM cell to output layer
    connectionHiddenToOutput = self.net.connections[lstmLayer][0]
    weightHiddenToOutput = reshape(connectionHiddenToOutput.params,
                                   (connectionHiddenToOutput.outdim,
                                    connectionHiddenToOutput.indim))
    for cell in deadCells:
      weightHiddenToOutput[:, cell] *= 0

    newParams = reshape(weightHiddenToOutput,
                        (connectionHiddenToOutput.paramdim,))
    self.net.connections[lstmLayer][0]._setParameters(
      newParams, connectionHiddenToOutput.owner)


  def replenishSequence(self, params, iteration):
    if iteration > params['perturb_after']:
      sequence, target = self.dataset.generateSequence(iteration, perturbed=True)
    else:
      sequence, target = self.dataset.generateSequence(iteration)

    if (iteration > params['inject_noise_after'] and
            iteration < params['stop_inject_noise_after']):
      injectNoiseAt = random.randint(1, 3)
      sequence[injectNoiseAt] = self.encoder.randomSymbol()

    if params['separate_sequences_with'] == 'random':
      sequence.append(self.encoder.randomSymbol())
      target.append(None)

    if params['verbosity'] > 0:
      print "Add sequence to buffer"
      print "sequence: ", sequence
      print "target: ", target

    self.currentSequence += sequence
    self.targetPrediction += target


  def check_prediction(self, topPredictions, targets):
    if targets is None:
      correct = None
    else:
      if isinstance(targets, numbers.Number):
        correct = targets in topPredictions
      else:
        correct = True
        for prediction in topPredictions:
           correct = correct and (prediction in targets)
    return correct


  def iterate(self, params, repetition, iteration):
    element = self.currentSequence.pop(0)
    target = self.targetPrediction.pop(0)

    # update buffered dataset
    self.history.append(element)

    # whether there will be a reset signal after the current record
    resetFlag = (len(self.currentSequence) == 0 and
                 params['separate_sequences_with'] == 'reset')
    self.resets.append(resetFlag)

    # whether there will be a random symbol after the current record
    randomFlag = (len(self.currentSequence) == 1 and
                  params['separate_sequences_with'] == 'random')

    self.randoms.append(randomFlag)

    if len(self.currentSequence) == 0:
      self.replenishSequence(params, iteration)
      self.sequenceCounter += 1

    # kill cells
    killCell = False
    if iteration == params['kill_cell_after']:
      killCell = True
      self.killCells(params['kill_cell_percent'])

    # reset compute counter
    if iteration % params['compute_every'] == 0:
      self.computeCounter = params['compute_for']

    if self.computeCounter == 0 or iteration < params['compute_after']:
      computeLSTM = False
    else:
      computeLSTM = True

    if computeLSTM:
      self.computeCounter -= 1

      train = (not params['compute_test_mode'] or
               iteration % params['compute_every'] == 0)

      if train:
        if params['verbosity'] > 0:
          print "Training LSTM at iteration {}".format(iteration)

        self.net = self.train(params)

      # run LSTM on the latest data record

      output = self.net.activate(self.encoder.encode(element))
      predictions = self.encoder.classify(output, num=params['num_predictions'])

      correct = self.check_prediction(predictions, target)

      if params['verbosity'] > 0:
        print ("iteration: {0} \t"
               "current: {1} \t"
               "predictions: {2} \t"
               "truth: {3} \t"
               "correct: {4} \t").format(
          iteration, element, predictions, target, correct)

      if self.resets[-1]:
        if params['verbosity'] > 0:
          print "Reset LSTM at iteration {}".format(iteration)
        self.net.reset()

      return {"current": element,
              "reset": self.resets[-1],
              "random": self.randoms[-1],
              "train": train,
              "predictions": predictions,
              "truth": target,
              "killCell": killCell,
              "sequenceCounter": self.sequenceCounter}
Exemple #14
0
class Suite(PyExperimentSuite):
  def reset(self, params, repetition):
    random.seed(params['seed'])

    if params['encoding'] == 'basic':
      self.encoder = BasicEncoder(params['encoding_num'])
    elif params['encoding'] == 'distributed':
      self.encoder = DistributedEncoder(params['encoding_num'],
                                        params['encoding_num_non_random'],
                                        maxValue=params['encoding_max'],
                                        minValue=params['encoding_min'])
    elif params['encoding'] == 'sparse-distributed':
      self.encoder = SparseDistributedEncoder(params['encoding_num'],
                                              params['encoding_num_non_random'],
                                              params['encoding_active_bits'])
    else:
      raise Exception("Encoder not found")

    if params['dataset'] == 'simple':
      self.dataset = SimpleDataset()
    elif params['dataset'] == 'reber':
      self.dataset = ReberDataset(maxLength=params['max_length'])
    elif params['dataset'] == 'high-order':
      self.dataset = HighOrderDataset(numPredictions=params['num_predictions'],
                                      seed=params['seed'])
    else:
      raise Exception("Dataset not found")

    self.numLags = params['num_lags']

    self.history = []
    self.resets = []


    self.finishInitializeX = False
    self.randoms = []

    self.currentSequence = []
    self.targetPrediction = []
    self.replenishSequence(params, iteration=0)

    self.net = initializeELMnet(params['encoding_num'] * params['num_lags'],
                                params['encoding_num'],
                                numNeurons=params['num_cells'])
    self.sequenceCounter = 0


  def window(self, data, windowSize):
    start = max(0, len(data) - windowSize)
    return data[start:]


  def replenishSequence(self, params, iteration):
    if iteration > params['perturb_after']:
      sequence, target = self.dataset.generateSequence(params['seed']+iteration,
                                                       perturbed=True)
    else:
      sequence, target = self.dataset.generateSequence(params['seed']+iteration)

    if (iteration > params['inject_noise_after'] and
            iteration < params['stop_inject_noise_after']):
      injectNoiseAt = random.randint(1, 3)
      sequence[injectNoiseAt] = self.encoder.randomSymbol()

    if params['separate_sequences_with'] == 'random':
      sequence.append(self.encoder.randomSymbol(seed=params['seed']+iteration))
      target.append(None)

    if params['verbosity'] > 0:
      print "Add sequence to buffer"
      print "sequence: ", sequence
      print "target: ", target

    self.currentSequence += sequence
    self.targetPrediction += target


  def check_prediction(self, topPredictions, targets):
    if targets is None:
      correct = None
    else:
      if isinstance(targets, numbers.Number):
        correct = targets in topPredictions
      else:
        correct = True
        for prediction in topPredictions:
           correct = correct and (prediction in targets)
    return correct


  def killCells(self, killCellPercent):
    """
    kill a fraction of LSTM cells from the network
    """
    if killCellPercent <= 0:
      return

    numHiddenNeurons = self.net.numHiddenNeurons

    numDead = round(killCellPercent * numHiddenNeurons)
    zombiePermutation = numpy.random.permutation(numHiddenNeurons)
    deadCells = zombiePermutation[0:numDead]
    liveCells = zombiePermutation[numDead:]

    self.net.inputWeights = self.net.inputWeights[liveCells, :]
    self.net.bias = self.net.bias[:, liveCells]
    self.net.beta = self.net.beta[liveCells, :]
    self.net.M = self.net.M[liveCells, liveCells]
    self.net.numHiddenNeurons = numHiddenNeurons - numDead


  def iterate(self, params, repetition, iteration):
    currentElement = self.currentSequence.pop(0)
    target = self.targetPrediction.pop(0)

    # update buffered dataset
    self.history.append(currentElement)


    # whether there will be a reset signal after the current record
    resetFlag = (len(self.currentSequence) == 0 and
                 params['separate_sequences_with'] == 'reset')
    self.resets.append(resetFlag)

    # whether there will be a random symbol after the current record
    randomFlag = (len(self.currentSequence) == 1 and
                  params['separate_sequences_with'] == 'random')

    self.randoms.append(randomFlag)

    if len(self.currentSequence) == 0:
      self.replenishSequence(params, iteration)
      self.sequenceCounter += 1

    # # kill cells
    killCell = False
    if iteration == params['kill_cell_after']:
      killCell = True
      self.killCells(params['kill_cell_percent'])

    if iteration > params['train_after']:
      n = params['encoding_num']

      if self.finishInitializeX is False:
        # run initialization phase of OS-ELM
        NT = params['train_after']
        features = numpy.zeros(shape=(NT, n*params['num_lags']))
        targets = numpy.zeros(shape=(NT, n))

        history = self.window(self.history, NT)

        for i in range(params['num_lags'], NT):
          targets[i, :] = self.encoder.encode(history[i])

        for lags in xrange(params['num_lags']):
          shiftTargets = numpy.roll(targets, lags, axis=0)
          shiftTargets[:lags, :] = 0
          features[:, lags*n:(lags+1)*n] = shiftTargets

        self.net.initializePhase(features[:, :], targets[:, :])
        if iteration > params['train_after']:
          self.finishInitializeX = True
      else:
        # run sequential learning phase
        targets = numpy.zeros((1, params['encoding_num']))
        targets[0, :] = self.encoder.encode(self.history[-1])

        features = numpy.zeros((1, params['encoding_num'] * params['num_lags']))
        for lags in xrange(params['num_lags']):
          features[0, lags*n:(lags+1)*n] = self.encoder.encode(
            self.history[-1-(lags+1)])

      if iteration < params['stop_training_after']:
        self.net.train(features, targets)

    # run ELM on the latest data record
    n = params['encoding_num']
    currentFeatures = numpy.zeros((1, params['encoding_num'] * params['num_lags']))
    for lags in xrange(min(params['num_lags'], iteration)):
      currentFeatures[0, lags*n:(lags+1)*n] = self.encoder.encode(self.history[-1-lags])

    output = self.net.predict(currentFeatures)
    # print self.net.beta.shape
    # print output.shape
    # print params['num_predictions']
    predictions = self.encoder.classify(output[0],
                                        num=params['num_predictions'])

    correct = self.check_prediction(predictions, target)

    if params['verbosity'] > 0:
      print ("iteration: {0} \t"
             "current: {1} \t"
             "predictions: {2} \t"
             "truth: {3} \t"
             "correct: {4} \t").format(
        iteration, currentElement, predictions, target, correct)

      return {"current": currentElement,
              "reset": self.resets[-1],
              "random": self.randoms[-1],
              "predictions": predictions,
              "truth": target,
              "killCell": killCell,
              "sequenceCounter": self.sequenceCounter}
Exemple #15
0
class Runner(object):
    def __init__(self, numPredictions, resultsDir):
        random.seed(43)
        self.numPredictions = numPredictions

        if not os.path.exists(resultsDir):
            os.makedirs(resultsDir)

        self.resultsFile = open(os.path.join(resultsDir, "0.log"), 'w')

        self.model = ModelFactory.create(MODEL_PARAMS)
        self.model.enableInference({"predictedField": "element"})
        self.shifter = InferenceShifter()
        self.mapping = getEncoderMapping(self.model)

        self.correct = []
        self.numPredictedActiveCells = []
        self.numPredictedInactiveCells = []
        self.numUnpredictedActiveColumns = []

        self.iteration = 0
        self.perturbed = False
        self.randoms = []
        self.verbosity = 1

        self.dataset = HighOrderDataset(numPredictions=self.numPredictions)
        self.sequences = []
        self.currentSequence = []
        self.replenish_sequence()

    def replenish_sequence(self):
        if self.iteration > PERTURB_AFTER and not self.perturbed:
            print "PERTURBING"
            # self.sequences = generateSequences(self.numPredictions, perturbed=True)
            sequence, target = self.dataset.generateSequence(self.iteration,
                                                             perturbed=True)
            self.perturbed = True
        else:
            sequence, target = self.dataset.generateSequence(self.iteration)
            # self.sequences = generateSequences(self.numPredictions, perturbed=False)

        # sequence = random.choice(self.sequences)

        if self.iteration > TEMPORAL_NOISE_AFTER:
            injectNoiseAt = random.randint(1, 3)
            sequence[injectNoiseAt] = random.randrange(RANDOM_START,
                                                       RANDOM_END)

        # append noise element at end of sequence
        random.seed(self.iteration)
        print "seed {} start {} end {}".format(self.iteration, RANDOM_START,
                                               RANDOM_END)
        sequence.append(random.randrange(RANDOM_START, RANDOM_END))

        print "next sequence: ", sequence
        self.currentSequence += sequence

    def step(self):
        element = self.currentSequence.pop(0)

        randomFlag = (len(self.currentSequence) == 1)
        self.randoms.append(randomFlag)

        result = self.shifter.shift(self.model.run({"element": element}))
        tm = self.model._getTPRegion().getSelf()._tfdr

        tm.mmClearHistory()
        # Use custom classifier (uses predicted cells to make predictions)
        predictiveColumns = set(
            [tm.columnForCell(cell) for cell in tm.predictiveCells])
        topPredictions = classify(self.mapping, predictiveColumns,
                                  self.numPredictions)

        truth = None if (self.randoms[-1] or len(self.randoms) >= 2
                         and self.randoms[-2]) else self.currentSequence[0]

        correct = None if truth is None else (truth in topPredictions)

        data = {
            "iteration": self.iteration,
            "current": element,
            "reset": False,
            "random": randomFlag,
            "train": True,
            "predictions": topPredictions,
            "truth": truth
        }

        self.resultsFile.write(json.dumps(data) + '\n')
        self.resultsFile.flush()

        if self.verbosity > 0:
            print("iteration: {0} \t"
                  "current: {1} \t"
                  "predictions: {2} \t"
                  "truth: {3} \t"
                  "correct: {4} \t").format(self.iteration, element,
                                            topPredictions, truth, correct)

        # replenish sequence
        if len(self.currentSequence) == 0:
            self.replenish_sequence()

        self.iteration += 1
Exemple #16
0
class Suite(PyExperimentSuite):
    def reset(self, params, repetition):
        random.seed(params['seed'])

        if params['encoding'] == 'basic':
            self.encoder = BasicEncoder(params['encoding_num'])
        elif params['encoding'] == 'distributed':
            self.encoder = DistributedEncoder(
                params['encoding_num'],
                maxValue=params['encoding_max'],
                minValue=params['encoding_min'],
                classifyWithRandom=params['classify_with_random'])
        else:
            raise Exception("Encoder not found")

        if params['dataset'] == 'simple':
            self.dataset = SimpleDataset()
        elif params['dataset'] == 'reber':
            self.dataset = ReberDataset(maxLength=params['max_length'])
        elif params['dataset'] == 'high-order':
            self.dataset = HighOrderDataset(
                numPredictions=params['num_predictions'], seed=params['seed'])
        else:
            raise Exception("Dataset not found")

        self.computeCounter = 0

        self.history = []
        self.resets = []
        self.randoms = []

        self.currentSequence = []
        self.targetPrediction = []
        self.replenishSequence(params, iteration=0)

        self.net = buildNetwork(params['encoding_num'],
                                params['num_cells'],
                                params['encoding_num'],
                                hiddenclass=LSTMLayer,
                                bias=True,
                                outputbias=params['output_bias'],
                                recurrent=True)

        self.trainer = BackpropTrainer(self.net,
                                       dataset=SequentialDataSet(
                                           params['encoding_num'],
                                           params['encoding_num']),
                                       learningrate=0.01,
                                       momentum=0,
                                       verbose=params['verbosity'] > 0)

        self.sequenceCounter = 0

    def window(self, data, params):
        start = max(0, len(data) - params['learning_window'])
        return data[start:]

    def train(self, params):
        """
    Train LSTM network on buffered dataset history
    After training, run LSTM on history[:-1] to get the state correct
    :param params:
    :return:
    """
        if params['reset_every_training']:
            n = params['encoding_num']
            self.net = buildNetwork(n,
                                    params['num_cells'],
                                    n,
                                    hiddenclass=LSTMLayer,
                                    bias=True,
                                    outputbias=params['output_bias'],
                                    recurrent=True)
            self.net.reset()

        # prepare training dataset
        ds = SequentialDataSet(params['encoding_num'], params['encoding_num'])
        history = self.window(self.history, params)
        resets = self.window(self.resets, params)

        for i in xrange(1, len(history)):
            if not resets[i - 1]:
                ds.addSample(self.encoder.encode(history[i - 1]),
                             self.encoder.encode(history[i]))
            if resets[i]:
                ds.newSequence()

        print "Train LSTM network on buffered dataset of length ", len(history)
        if params['num_epochs'] > 1:
            trainer = RPropMinusTrainer(self.net,
                                        dataset=ds,
                                        verbose=params['verbosity'] > 0)

            if len(history) > 1:
                trainer.trainEpochs(params['num_epochs'])

            # run network on buffered dataset after training to get the state right
            self.net.reset()
            for i in xrange(len(history) - 1):
                symbol = history[i]
                output = self.net.activate(self.encoder.encode(symbol))
                self.encoder.classify(output, num=params['num_predictions'])

                if resets[i]:
                    self.net.reset()
        else:
            self.trainer.setData(ds)
            self.trainer.train()

            # run network on buffered dataset after training to get the state right
            self.net.reset()
            for i in xrange(len(history) - 1):
                symbol = history[i]
                output = self.net.activate(self.encoder.encode(symbol))
                self.encoder.classify(output, num=params['num_predictions'])

                if resets[i]:
                    self.net.reset()

    def killCells(self, killCellPercent):
        """
    kill a fraction of LSTM cells from the network
    :param killCellPercent:
    :return:
    """
        if killCellPercent <= 0:
            return

        inputLayer = self.net['in']
        lstmLayer = self.net['hidden0']

        numLSTMCell = lstmLayer.outdim
        numDead = round(killCellPercent * numLSTMCell)
        zombiePermutation = numpy.random.permutation(numLSTMCell)
        deadCells = zombiePermutation[0:numDead]

        # remove connections from input layer to dead LSTM cells
        connectionInputToHidden = self.net.connections[inputLayer][0]
        weightInputToHidden = reshape(
            connectionInputToHidden.params,
            (connectionInputToHidden.outdim, connectionInputToHidden.indim))

        for cell in deadCells:
            for dim in range(4):
                weightInputToHidden[dim * numLSTMCell + cell, :] *= 0

        newParams = reshape(weightInputToHidden,
                            (connectionInputToHidden.paramdim, ))
        self.net.connections[inputLayer][0]._setParameters(
            newParams, connectionInputToHidden.owner)

        # remove dead connections within LSTM layer
        connectionHiddenToHidden = self.net.recurrentConns[0]
        weightHiddenToHidden = reshape(
            connectionHiddenToHidden.params,
            (connectionHiddenToHidden.outdim, connectionHiddenToHidden.indim))

        for cell in deadCells:
            weightHiddenToHidden[:, cell] *= 0

        newParams = reshape(weightHiddenToHidden,
                            (connectionHiddenToHidden.paramdim, ))
        self.net.recurrentConns[0]._setParameters(
            newParams, connectionHiddenToHidden.owner)

        # remove connections from dead LSTM cell to output layer
        connectionHiddenToOutput = self.net.connections[lstmLayer][0]
        weightHiddenToOutput = reshape(
            connectionHiddenToOutput.params,
            (connectionHiddenToOutput.outdim, connectionHiddenToOutput.indim))
        for cell in deadCells:
            weightHiddenToOutput[:, cell] *= 0

        newParams = reshape(weightHiddenToOutput,
                            (connectionHiddenToOutput.paramdim, ))
        self.net.connections[lstmLayer][0]._setParameters(
            newParams, connectionHiddenToOutput.owner)

    def replenishSequence(self, params, iteration):
        if iteration > params['perturb_after']:
            sequence, target = self.dataset.generateSequence(params['seed'] +
                                                             iteration,
                                                             perturbed=True)
        else:
            sequence, target = self.dataset.generateSequence(params['seed'] +
                                                             iteration)

        if (iteration > params['inject_noise_after']
                and iteration < params['stop_inject_noise_after']):
            injectNoiseAt = random.randint(1, 3)
            sequence[injectNoiseAt] = self.encoder.randomSymbol()

        if params['separate_sequences_with'] == 'random':
            sequence.append(
                self.encoder.randomSymbol(seed=params['seed'] + iteration))
            target.append(None)

        if params['verbosity'] > 0:
            print "Add sequence to buffer"
            print "sequence: ", sequence
            print "target: ", target

        self.currentSequence += sequence
        self.targetPrediction += target

    def check_prediction(self, topPredictions, targets):
        if targets is None:
            correct = None
        else:
            if isinstance(targets, numbers.Number):
                correct = targets in topPredictions
            else:
                correct = True
                for prediction in topPredictions:
                    correct = correct and (prediction in targets)
        return correct

    def iterate(self, params, repetition, iteration):
        currentElement = self.currentSequence.pop(0)
        target = self.targetPrediction.pop(0)

        # update buffered dataset
        self.history.append(currentElement)

        # whether there will be a reset signal after the current record
        resetFlag = (len(self.currentSequence) == 0
                     and params['separate_sequences_with'] == 'reset')
        self.resets.append(resetFlag)

        # whether there will be a random symbol after the current record
        randomFlag = (len(self.currentSequence) == 1
                      and params['separate_sequences_with'] == 'random')

        self.randoms.append(randomFlag)

        if len(self.currentSequence) == 0:
            self.replenishSequence(params, iteration)
            self.sequenceCounter += 1

        # kill cells
        killCell = False
        if iteration == params['kill_cell_after']:
            killCell = True
            self.killCells(params['kill_cell_percent'])

        # reset compute counter
        if iteration > 0 and iteration % params['compute_every'] == 0:
            self.computeCounter = params['compute_for']

        if self.computeCounter == 0 or iteration < params['compute_after']:
            computeLSTM = False
        else:
            computeLSTM = True

        if computeLSTM:
            self.computeCounter -= 1

            train = (not params['compute_test_mode']
                     or iteration % params['compute_every'] == 0)

            if train:
                if params['verbosity'] > 0:
                    print "Training LSTM at iteration {}".format(iteration)

                self.train(params)

            # run LSTM on the latest data record

            output = self.net.activate(self.encoder.encode(currentElement))
            if params['encoding'] == 'distributed':
                predictions = self.encoder.classify(
                    output, num=params['num_predictions'])
            elif params['encoding'] == 'basic':
                predictions = self.encoder.classify(
                    output, num=params['num_predictions'])

            correct = self.check_prediction(predictions, target)

            if params['verbosity'] > 0:
                print("iteration: {0} \t"
                      "current: {1} \t"
                      "predictions: {2} \t"
                      "truth: {3} \t"
                      "correct: {4} \t").format(iteration, currentElement,
                                                predictions, target, correct)

            if self.resets[-1]:
                if params['verbosity'] > 0:
                    print "Reset LSTM at iteration {}".format(iteration)
                self.net.reset()

            return {
                "iteration": iteration,
                "current": currentElement,
                "reset": self.resets[-1],
                "random": self.randoms[-1],
                "train": train,
                "predictions": predictions,
                "truth": target,
                "killCell": killCell,
                "sequenceCounter": self.sequenceCounter
            }
Exemple #17
0
class Suite(PyExperimentSuite):

  def reset(self, params, repetition):
    random.seed(params['seed'])

    if params['dataset'] == 'simple':
      self.dataset = SimpleDataset()
    elif params['dataset'] == 'reber':
      self.dataset = ReberDataset(maxLength=params['max_length'])
    elif params['dataset'] == 'high-order':
      self.dataset = HighOrderDataset(numPredictions=params['num_predictions'])
    else:
      raise Exception("Dataset not found")

    self.randomStart = self.dataset.numSymbols + 1
    self.randomEnd = self.randomStart + 5000

    MODEL_PARAMS['modelParams']['sensorParams']['encoders']['element']\
      ['categoryList'] = range(self.randomEnd)

    # if not os.path.exists(resultsDir):
    #   os.makedirs(resultsDir)
    # self.resultsFile = open(os.path.join(resultsDir, "0.log"), 'w')
    if params['verbosity'] > 0:
      print " initializing HTM model..."
    self.model = ModelFactory.create(MODEL_PARAMS)
    self.model.enableInference({"predictedField": "element"})
    self.shifter = InferenceShifter()
    self.mapping = getEncoderMapping(self.model)

    self.numPredictedActiveCells = []
    self.numPredictedInactiveCells = []
    self.numUnpredictedActiveColumns = []

    self.currentSequence = []
    self.targetPrediction = []
    self.replenish_sequence(params, iteration=0)

    self.randoms = []
    self.verbosity = 1
    self.sequenceCounter = 0




  def replenish_sequence(self, params, iteration):
    if iteration > params['perturb_after']:
      print "PERTURBING"
      sequence, target = self.dataset.generateSequence(iteration, perturbed=True)
    else:
      sequence, target = self.dataset.generateSequence(iteration)

    if (iteration > params['inject_noise_after'] and
        iteration < params['stop_inject_noise_after']):
      injectNoiseAt = random.randint(1, 3)
      sequence[injectNoiseAt] = random.randrange(self.randomStart, self.randomEnd)

      if params['verbosity'] > 0:
        print "injectNoise ", sequence[injectNoiseAt],  " at: ", injectNoiseAt

    # separate sequences with random elements
    random.seed(iteration)
    print "seed {} start {} end {}".format(iteration, self.randomStart, self.randomEnd)
    sequence.append(random.randrange(self.randomStart, self.randomEnd))
    target.append(None)

    if params['verbosity'] > 0:
      print "Add sequence to buffer"
      print "sequence: ", sequence
      print "target: ", target

    self.currentSequence += sequence
    self.targetPrediction += target


  def check_prediction(self, topPredictions, targets):
    if targets is None:
      correct = None
    else:
      if isinstance(targets, numbers.Number):
        # single target, multiple predictions
        correct = targets in topPredictions
      else:
        # multiple targets, multiple predictions
        correct = True
        for prediction in topPredictions:
           correct = correct and (prediction in targets)
    return correct


  def iterate(self, params, repetition, iteration):
    element = self.currentSequence.pop(0)
    target = self.targetPrediction.pop(0)

    # whether there will be a random symbol after the current record
    randomFlag = (len(self.currentSequence) == 1)
    self.randoms.append(randomFlag)

    result = self.shifter.shift(self.model.run({"element": element}))
    tm = self.model._getTPRegion().getSelf()._tfdr

    tm.mmClearHistory()
    # Use custom classifier (uses predicted cells to make predictions)
    predictiveColumns = set([tm.columnForCell(cell) for cell in tm.predictiveCells])
    topPredictions = classify(
      self.mapping, predictiveColumns, params['num_predictions'])

    # correct = self.check_prediction(topPredictions, target)
    truth = None if (self.randoms[-1] or
                     len(self.randoms) >= 2 and self.randoms[-2]) else self.currentSequence[0]
    correct = None if truth is None else (truth in topPredictions)

    data = {"iteration": iteration,
            "current": element,
            "reset": False,
            "random": randomFlag,
            "train": True,
            "predictions": topPredictions,
            "truth": target,
            "sequenceCounter": self.sequenceCounter}

    if params['verbosity'] > 0:
      print ("iteration: {0} \t"
             "current: {1} \t"
             "predictions: {2} \t"
             "truth: {3} \t"
             "correct: {4} \t").format(
        iteration, element, topPredictions, target, correct)

    if len(self.currentSequence) == 0:
      self.replenish_sequence(params, iteration)
      self.sequenceCounter += 1

    return data
Exemple #18
0
class Runner(object):

  def __init__(self, numPredictions, resultsDir):
    random.seed(43)
    self.numPredictions = numPredictions

    if not os.path.exists(resultsDir):
      os.makedirs(resultsDir)

    self.resultsFile = open(os.path.join(resultsDir, "0.log"), 'w')

    self.model = ModelFactory.create(MODEL_PARAMS)
    self.model.enableInference({"predictedField": "element"})
    self.shifter = InferenceShifter()
    self.mapping = getEncoderMapping(self.model)

    self.correct = []
    self.numPredictedActiveCells = []
    self.numPredictedInactiveCells = []
    self.numUnpredictedActiveColumns = []

    self.iteration = 0
    self.perturbed = False
    self.randoms = []
    self.verbosity = 1

    self.dataset = HighOrderDataset(numPredictions=self.numPredictions)
    self.sequences = []
    self.currentSequence = []
    self.replenish_sequence()


  def replenish_sequence(self):
    if self.iteration > PERTURB_AFTER and not self.perturbed:
      print "PERTURBING"
      # self.sequences = generateSequences(self.numPredictions, perturbed=True)
      sequence, target = self.dataset.generateSequence(self.iteration, perturbed=True)
      self.perturbed = True
    else:
      sequence, target = self.dataset.generateSequence(self.iteration)
      # self.sequences = generateSequences(self.numPredictions, perturbed=False)

    # sequence = random.choice(self.sequences)

    if self.iteration > TEMPORAL_NOISE_AFTER:
      injectNoiseAt = random.randint(1, 3)
      sequence[injectNoiseAt] = random.randrange(RANDOM_START, RANDOM_END)

    # append noise element at end of sequence
    random.seed(self.iteration)
    print "seed {} start {} end {}".format(self.iteration, RANDOM_START, RANDOM_END)
    sequence.append(random.randrange(RANDOM_START, RANDOM_END))

    print "next sequence: ", sequence
    self.currentSequence += sequence


  def step(self):
    element = self.currentSequence.pop(0)

    randomFlag = (len(self.currentSequence) == 1)
    self.randoms.append(randomFlag)

    result = self.shifter.shift(self.model.run({"element": element}))
    tm = self.model._getTPRegion().getSelf()._tfdr

    tm.mmClearHistory()
    # Use custom classifier (uses predicted cells to make predictions)
    predictiveColumns = set([tm.columnForCell(cell) for cell in tm.predictiveCells])
    topPredictions = classify(self.mapping, predictiveColumns, self.numPredictions)

    truth = None if (self.randoms[-1] or
                     len(self.randoms) >= 2 and self.randoms[-2]) else self.currentSequence[0]

    correct = None if truth is None else (truth in topPredictions)

    data = {"iteration": self.iteration,
            "current": element,
            "reset": False,
            "random": randomFlag,
            "train": True,
            "predictions": topPredictions,
            "truth": truth}

    self.resultsFile.write(json.dumps(data) + '\n')
    self.resultsFile.flush()

    if self.verbosity > 0:
      print ("iteration: {0} \t"
             "current: {1} \t"
             "predictions: {2} \t"
             "truth: {3} \t"
             "correct: {4} \t").format(
        self.iteration, element, topPredictions, truth, correct)

    # replenish sequence
    if len(self.currentSequence) == 0:
      self.replenish_sequence()

    self.iteration += 1
class Suite(PyExperimentSuite):

  def reset(self, params, repetition):
    random.seed(params['seed'])

    if params['dataset'] == 'simple':
      self.dataset = SimpleDataset()
    elif params['dataset'] == 'reber':
      self.dataset = ReberDataset(maxLength=params['max_length'])
    elif params['dataset'] == 'high-order':
      self.dataset = HighOrderDataset(numPredictions=params['num_predictions'])
    else:
      raise Exception("Dataset not found")

    # if not os.path.exists(resultsDir):
    #   os.makedirs(resultsDir)
    # self.resultsFile = open(os.path.join(resultsDir, "0.log"), 'w')
    if params['verbosity'] > 0:
      print " initializing HTM model..."
    self.model = ModelFactory.create(MODEL_PARAMS)
    self.model.enableInference({"predictedField": "element"})
    self.shifter = InferenceShifter()
    self.mapping = getEncoderMapping(self.model)

    self.currentSequence = self.dataset.generateSequence()
    self.numPredictedActiveCells = []
    self.numPredictedInactiveCells = []
    self.numUnpredictedActiveColumns = []

    self.currentSequence = self.dataset.generateSequence()
    self.perturbed = False
    self.randoms = []
    self.verbosity = 1
    self.sequenceCounter = 0


  def replenish_sequence(self, params, iteration):
    if iteration > params['perturb_after'] and not self.perturbed:
      print "PERTURBING"
      sequence = self.dataset.generateSequence(perturbed=True)
      self.perturbed = True
    else:
      sequence = self.dataset.generateSequence()

    if iteration > params['inject_noise_after']:
      injectNoiseAt = random.randint(1, 3)
      print "injectNoiseAt: ", injectNoiseAt
      sequence[injectNoiseAt] = random.randrange(RANDOM_START, RANDOM_END)
      print sequence[injectNoiseAt]

    sequence.append(random.randrange(RANDOM_START, RANDOM_END))
    if params['verbosity'] > 0:
      print "Add sequence to buffer"
      print sequence
    self.currentSequence += sequence


  def iterate(self, params, repetition, iteration):
    element = self.currentSequence.pop(0)

    # whether there will be a random symbol after the current record
    randomFlag = (len(self.currentSequence) == 1)
    self.randoms.append(randomFlag)

    result = self.shifter.shift(self.model.run({"element": element}))
    tm = self.model._getTPRegion().getSelf()._tfdr

    tm.mmClearHistory()
    # Use custom classifier (uses predicted cells to make predictions)
    predictiveColumns = set([tm.columnForCell(cell) for cell in tm.predictiveCells])
    topPredictions = classify(self.mapping, predictiveColumns, params['num_predictions'])

    truth = None if (self.randoms[-1] or
                     len(self.randoms) >= 2 and self.randoms[-2]
                     ) else self.currentSequence[0]

    correct = None if truth is None else (truth in topPredictions)

    data = {"iteration": iteration,
            "current": element,
            "reset": False,
            "random": randomFlag,
            "train": True,
            "predictions": topPredictions,
            "truth": truth,
            "sequenceCounter": self.sequenceCounter}

    if params['verbosity'] > 0:
      print ("iteration: {0} \t"
             "current: {1} \t"
             "predictions: {2} \t"
             "truth: {3} \t"
             "correct: {4} \t").format(
        iteration, element, topPredictions, truth, correct)

    if len(self.currentSequence) == 0:
      self.replenish_sequence(params, iteration)
      self.sequenceCounter += 1

    return data
class Suite(PyExperimentSuite):
  def reset(self, params, repetition):
    random.seed(params['seed'])

    if params['encoding'] == 'basic':
      self.encoder = BasicEncoder(params['encoding_num'])
    elif params['encoding'] == 'distributed':
      self.encoder = DistributedEncoder(params['encoding_num'],
                                        maxValue=params['encoding_max'],
                                        minValue=params['encoding_min'],
                                        classifyWithRandom=params[
                                          'classify_with_random'])
    else:
      raise Exception("Encoder not found")

    if params['dataset'] == 'simple':
      self.dataset = SimpleDataset()
    elif params['dataset'] == 'reber':
      self.dataset = ReberDataset(maxLength=params['max_length'])
    elif params['dataset'] == 'high-order':
      self.dataset = HighOrderDataset(numPredictions=params['num_predictions'],
                                      seed=params['seed'])
    else:
      raise Exception("Dataset not found")

    self.numLags = params['num_lags']

    self.history = []
    self.resets = []


    self.finishInitializeX = False
    self.randoms = []

    self.currentSequence = []
    self.targetPrediction = []
    self.replenishSequence(params, iteration=0)

    self.net = initializeELMnet(params['encoding_num'] * params['num_lags'],
                                params['encoding_num'],
                                numNeurons=params['num_cells'])
    self.sequenceCounter = 0


  def window(self, data, windowSize):
    start = max(0, len(data) - windowSize)
    return data[start:]


  def replenishSequence(self, params, iteration):
    if iteration > params['perturb_after']:
      sequence, target = self.dataset.generateSequence(params['seed']+iteration,
                                                       perturbed=True)
    else:
      sequence, target = self.dataset.generateSequence(params['seed']+iteration)

    if (iteration > params['inject_noise_after'] and
            iteration < params['stop_inject_noise_after']):
      injectNoiseAt = random.randint(1, 3)
      sequence[injectNoiseAt] = self.encoder.randomSymbol()

    if params['separate_sequences_with'] == 'random':
      sequence.append(self.encoder.randomSymbol(seed=params['seed']+iteration))
      target.append(None)

    if params['verbosity'] > 0:
      print "Add sequence to buffer"
      print "sequence: ", sequence
      print "target: ", target

    self.currentSequence += sequence
    self.targetPrediction += target


  def check_prediction(self, topPredictions, targets):
    if targets is None:
      correct = None
    else:
      if isinstance(targets, numbers.Number):
        correct = targets in topPredictions
      else:
        correct = True
        for prediction in topPredictions:
           correct = correct and (prediction in targets)
    return correct


  def killCells(self, killCellPercent):
    """
    kill a fraction of LSTM cells from the network
    """
    if killCellPercent <= 0:
      return

    numHiddenNeurons = self.net.numHiddenNeurons

    numDead = round(killCellPercent * numHiddenNeurons)
    zombiePermutation = numpy.random.permutation(numHiddenNeurons)
    deadCells = zombiePermutation[0:numDead]
    liveCells = zombiePermutation[numDead:]

    self.net.inputWeights = self.net.inputWeights[liveCells, :]
    self.net.bias = self.net.bias[:, liveCells]
    self.net.beta = self.net.beta[liveCells, :]
    self.net.M = self.net.M[liveCells, liveCells]
    self.net.numHiddenNeurons = numHiddenNeurons - numDead


  def iterate(self, params, repetition, iteration):
    currentElement = self.currentSequence.pop(0)
    target = self.targetPrediction.pop(0)

    # update buffered dataset
    self.history.append(currentElement)


    # whether there will be a reset signal after the current record
    resetFlag = (len(self.currentSequence) == 0 and
                 params['separate_sequences_with'] == 'reset')
    self.resets.append(resetFlag)

    # whether there will be a random symbol after the current record
    randomFlag = (len(self.currentSequence) == 1 and
                  params['separate_sequences_with'] == 'random')

    self.randoms.append(randomFlag)

    if len(self.currentSequence) == 0:
      self.replenishSequence(params, iteration)
      self.sequenceCounter += 1

    # # kill cells
    killCell = False
    if iteration == params['kill_cell_after']:
      killCell = True
      self.killCells(params['kill_cell_percent'])

    if iteration > params['train_after']:
      n = params['encoding_num']

      if self.finishInitializeX is False:
        # run initialization phase of OS-ELM
        NT = params['train_after']
        features = numpy.zeros(shape=(NT, n*params['num_lags']))
        targets = numpy.zeros(shape=(NT, n))

        history = self.window(self.history, NT)

        for i in range(params['num_lags'], NT):
          targets[i, :] = self.encoder.encode(history[i])

        for lags in xrange(params['num_lags']):
          shiftTargets = numpy.roll(targets, lags, axis=0)
          shiftTargets[:lags, :] = 0
          features[:, lags*n:(lags+1)*n] = shiftTargets

        self.net.initializePhase(features[:, :], targets[:, :])
        if iteration > params['train_after']:
          self.finishInitializeX = True
      else:
        # run sequential learning phase
        targets = numpy.zeros((1, params['encoding_num']))
        targets[0, :] = self.encoder.encode(self.history[-1])

        features = numpy.zeros((1, params['encoding_num'] * params['num_lags']))
        for lags in xrange(params['num_lags']):
          features[0, lags*n:(lags+1)*n] = self.encoder.encode(
            self.history[-1-(lags+1)])

      if iteration < params['stop_training_after']:
        self.net.train(features, targets)

    # run ELM on the latest data record
    n = params['encoding_num']
    currentFeatures = numpy.zeros((1, params['encoding_num'] * params['num_lags']))
    for lags in xrange(min(params['num_lags'], iteration)):
      currentFeatures[0, lags*n:(lags+1)*n] = self.encoder.encode(self.history[-1-lags])

    output = self.net.predict(currentFeatures)
    # print self.net.beta.shape
    # print output.shape
    # print params['num_predictions']
    predictions = self.encoder.classify(output[0],
                                        num=params['num_predictions'])

    correct = self.check_prediction(predictions, target)

    if params['verbosity'] > 0:
      print ("iteration: {0} \t"
             "current: {1} \t"
             "predictions: {2} \t"
             "truth: {3} \t"
             "correct: {4} \t").format(
        iteration, currentElement, predictions, target, correct)

      return {"current": currentElement,
              "reset": self.resets[-1],
              "random": self.randoms[-1],
              "predictions": predictions,
              "truth": target,
              "killCell": killCell,
              "sequenceCounter": self.sequenceCounter}
Exemple #21
0
class Suite(PyExperimentSuite):
  def reset(self, params, repetition):
    random.seed(params['seed'])

    if params['encoding'] == 'basic':
      self.encoder = BasicEncoder(params['encoding_num'])
    elif params['encoding'] == 'distributed':
      self.encoder = DistributedEncoder(params['encoding_num'],
                                        params['encoding_num_non_random'],
                                        maxValue=params['encoding_max'],
                                        minValue=params['encoding_min'])
    elif params['encoding'] == 'sparse-distributed':
      self.encoder = SparseDistributedEncoder(params['encoding_num'],
                                              params['encoding_num_non_random'],
                                              params['encoding_active_bits'])
    else:
      raise Exception("Encoder not found")

    if params['dataset'] == 'simple':
      self.dataset = SimpleDataset()
    elif params['dataset'] == 'reber':
      self.dataset = ReberDataset(maxLength=params['max_length'])
    elif params['dataset'] == 'high-order':
      self.dataset = HighOrderDataset(numPredictions=params['num_predictions'],
                                      seed=params['seed'])
    else:
      raise Exception("Dataset not found")

    self.numLags = params['num_lags']

    self.computeCounter = 0
    self.history = []
    self.resets = []


    self.finishInitializeX = False
    self.randoms = []

    self.currentSequence = []
    self.targetPrediction = []
    self.replenishSequence(params, iteration=0)

    self.net = buildNetwork(params['encoding_num'] * params['num_lags'],
                            params['num_cells'],
                            params['encoding_num'],
                            bias=True,
                            outputbias=True)
    # self.trainer = BackpropTrainer(self.net, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01)
    self.sequenceCounter = 0


  def window(self, data, windowSize):
    start = max(0, len(data) - windowSize)
    return data[start:]


  def replenishSequence(self, params, iteration):
    if iteration > params['perturb_after']:
      sequence, target = self.dataset.generateSequence(params['seed']+iteration,
                                                       perturbed=True)
    else:
      sequence, target = self.dataset.generateSequence(params['seed']+iteration)

    if (iteration > params['inject_noise_after'] and
            iteration < params['stop_inject_noise_after']):
      injectNoiseAt = random.randint(1, 3)
      sequence[injectNoiseAt] = self.encoder.randomSymbol()

    if params['separate_sequences_with'] == 'random':
      sequence.append(self.encoder.randomSymbol(seed=params['seed']+iteration))
      target.append(None)

    if params['verbosity'] > 0:
      print "Add sequence to buffer"
      print "sequence: ", sequence
      print "target: ", target

    self.currentSequence += sequence
    self.targetPrediction += target


  def check_prediction(self, topPredictions, targets):
    if targets is None:
      correct = None
    else:
      if isinstance(targets, numbers.Number):
        correct = targets in topPredictions
      else:
        correct = True
        for prediction in topPredictions:
           correct = correct and (prediction in targets)
    return correct


  def train(self, params):
    """
    Train TDNN network on buffered dataset history
    :param params:
    :return:
    """
    # self.net = buildNetwork(params['encoding_num'] * params['num_lags'],
    #                         params['num_cells'],
    #                         params['encoding_num'],
    #                         bias=True,
    #                         outputbias=True)

    ds = SupervisedDataSet(params['encoding_num'] * params['num_lags'],
                           params['encoding_num'])
    history = self.window(self.history, params['learning_window'])

    n = params['encoding_num']
    for i in xrange(params['num_lags'], len(history)):
      targets = numpy.zeros((1, n))
      targets[0, :] = self.encoder.encode(history[i])

      features = numpy.zeros((1, n * params['num_lags']))
      for lags in xrange(params['num_lags']):
        features[0, lags * n:(lags + 1) * n] = self.encoder.encode(
          history[i - (lags + 1)])
      ds.addSample(features, targets)

    trainer = BackpropTrainer(self.net,
                              dataset=ds,
                              verbose=params['verbosity'] > 0)

    if len(history) > 1:
      trainer.trainEpochs(params['num_epochs'])

    # self.net.reset()
    #
    # for i in xrange(params['num_lags'], len(history)):
    #   targets = numpy.zeros((1, n))
    #   targets[0, :] = self.encoder.encode(self.history[i])
    #
    #   features = numpy.zeros((1, n * params['num_lags']))
    #   for lags in xrange(params['num_lags']):
    #     features[0, lags * n:(lags + 1) * n] = self.encoder.encode(
    #       self.history[i - (lags + 1)])
    #
    #   output = self.net.activate(features[0, :])
    #   predictions = self.encoder.classify(output, num=params['num_predictions'])
    #   correct = self.check_prediction(predictions, self.history[i])
    #   print ("iteration: {0} \t"
    #          "current: {1} \t"
    #          "predictions: {2} \t"
    #          "truth: {3} \t"
    #          "correct: {4} \t").format(
    #     i, self.history[i-1], predictions, self.history[i], correct)


  def killCells(self, killCellPercent):
    """
    kill a fraction of cells from the network
    """
    if killCellPercent <= 0:
      return

    numHiddenNeurons = self.net.numHiddenNeurons

    numDead = round(killCellPercent * numHiddenNeurons)
    zombiePermutation = numpy.random.permutation(numHiddenNeurons)
    deadCells = zombiePermutation[0:numDead]
    liveCells = zombiePermutation[numDead:]

    self.net.inputWeights = self.net.inputWeights[liveCells, :]
    self.net.bias = self.net.bias[:, liveCells]
    self.net.beta = self.net.beta[liveCells, :]
    self.net.M = self.net.M[liveCells, liveCells]
    self.net.numHiddenNeurons = numHiddenNeurons - numDead


  def iterate(self, params, repetition, iteration):
    currentElement = self.currentSequence.pop(0)
    target = self.targetPrediction.pop(0)

    # update buffered dataset
    self.history.append(currentElement)

    # whether there will be a reset signal after the current record
    resetFlag = (len(self.currentSequence) == 0 and
                 params['separate_sequences_with'] == 'reset')
    self.resets.append(resetFlag)

    # whether there will be a random symbol after the current record
    randomFlag = (len(self.currentSequence) == 1 and
                  params['separate_sequences_with'] == 'random')

    self.randoms.append(randomFlag)

    if len(self.currentSequence) == 0:
      self.replenishSequence(params, iteration)
      self.sequenceCounter += 1

    killCell = False
    if iteration == params['kill_cell_after']:
      killCell = True
      self.killCells(params['kill_cell_percent'])

    # reset compute counter
    if iteration > 0 and iteration % params['compute_every'] == 0:
      self.computeCounter = params['compute_for']

    if self.computeCounter == 0 or iteration < params['compute_after']:
      computeNet = False
    else:
      computeNet = True

    if computeNet:
      self.computeCounter -= 1

      train = iteration % params['compute_every'] == 0

      if train:
        if params['verbosity'] > 0:
          print "Training Network at iteration {}".format(iteration)
        self.train(params)

    if iteration > params['num_lags']:
      # run network on the latest data record
      n = params['encoding_num']
      currentFeatures = numpy.zeros((params['encoding_num'] * params['num_lags'], ))
      for lags in xrange(min(params['num_lags'], iteration)):
        currentFeatures[lags*n:(lags+1)*n] = self.encoder.encode(self.history[-1-lags])

      output = self.net.activate(currentFeatures)
      predictions = self.encoder.classify(output, num=params['num_predictions'])

      correct = self.check_prediction(predictions, target)

      if params['verbosity'] > 0:
        print ("iteration: {0} \t"
               "current: {1} \t"
               "predictions: {2} \t"
               "truth: {3} \t"
               "correct: {4} \t").format(
          iteration, currentElement, predictions, target, correct)

        return {"current": currentElement,
                "reset": self.resets[-1],
                "random": self.randoms[-1],
                "predictions": predictions,
                "truth": target,
                "killCell": killCell,
                "sequenceCounter": self.sequenceCounter}