Esempio n. 1
0
def getDescriptionImpl(datasets, config):
  """ Implementation for description.py getDescription() entry point function.
  Builds an experiment description dictionary as required by LPF (Lightweight
  Prediction Framework).  Hardcoded data that is less likely to vary between
  experiments is augmented with data from the config dictionary.
  See getBaseDatasets() and getDatasets().

    datasets:     a dictionary of input datasets that may have been pre-processed
                   via aggregation.  Keys:
                   'trainDataset'         -- path to the training dataset
                   'inferDataset.N.alias' -- path(s) to the inference dataset

    config:       configuration dictionary from description.py

    returns:      an experiment description dictionary as required by LPF
  """

  # ----------------------------------------------------------------------------
  # Encoder for the sensor
  encoder = MultiEncoder(_getDatasetEncoderConfig(config))

  # ------------------------------------------------------------------
  # Region params
  CLAParams = _getCLAParams(encoder=encoder, config=config)


  sensorParams = dict(
    # encoder/datasource are not parameters so don't include here
    verbosity=config['sensorVerbosity']
  )

  # Filesource for the sensor. Set the filename in setup functions.
  dataSource = FileRecordStream('foo')

  description = dict(
    options = dict(
      logOutputsDuringInference = False,
      ),

    network = dict(

      # Think of sensor as a shell with dataSource and encoder;
      # Encoder has a pre-encoder and post-encoder filters;
      # filters appear in a different place (TODO: where?)
      sensorDataSource = dataSource,
      sensorEncoder = encoder,

      # LPF converts this to JSON strings; used as constructor args; has simple
      # types (ints, strings, floats)
      sensorParams = sensorParams,

      # CLA class; py. prefix for class names implemented in python; older code
      # implemented regions in C++ and designated class name without prefix.
      CLAType = 'py.CLARegion',
      # dict converted to JSON string
      CLAParams = CLAParams,

      # classifiers are presently not used (formerly used by vision code); should
      # be okay to leave out Classifier, sensor, CLA
      classifierType = None,
      classifierParams = None),
  )

  # ----------------------------------------------------------------------------
  # Configure Training and Inference phases
  # ----------------------------------------------------------------------------
  #
  # phase is 0 or more steps (a list of dictionaries, each dict corresponds to one step)
  # (see py/nupic/frameworks/prediction/experiment.py docstring)
  #
  # step = dict (name, setup, iter, finish, iterationCount)
  #   setup, iter, finish are callbacks;
  #
  # name: step name string; optional, used for printing messages to console
  # setup: open input file (e.g., via dataSource), print stats, etc.
  # iter: for diagnostics/debugging; called by net.run between iterations.
  # finish: called at the end by net.run; usually prints out stats (e.g., how many
  #   synapses, time taken, etc.)
  # callbacks are almost always reused, so they are not experiment-specific (see
  #   imports at top of file)
  # a callback always has this form c(experiment_obj, iter_number); can get
  #   experiment.network.regions["sensor"].getSelf()

  spEnable = config['spEnable']
  spTrain = _isSPTrainingEnabled(config)

  tpEnable = config['tpEnable']
  tpTrain = _isTPTrainingEnabled(config)
  # NOTE: presently, we always train TP (during training phase) if TP is enabled
  assert(tpTrain == tpEnable)

  # At least one of SP/TP must be enabled for a meaningful system
  assert(spEnable or tpEnable)

  # NOTE: SP and Spatial regression need to undergo training over the same
  #       set of rows. Since we're not reading the training dataset here to
  #       find out the number of rows, we presently configure both with the
  #       same auto-rewind setting.
  # TODO: this may cause knn training to repeatedly iterate unnecessarily
  #       over the same records in case spTrainIterationCount is larger than the
  #       nuber of rows in the training dataset. Look into optimizing this to
  #       avoid wasting time on knn training due to unnecessary iterations, but
  #       make sure that both SP and knn train on the exact same rows.
  spTrainMayNeedAutoRewind = True \
                             if config['spTrainIterationCount'] is not None \
                             else False


  # ----------------------------------------------------------------------------
  # SP training
  if spTrain:
    description['spTrain'] = []
    for i in xrange(config['spTrainNPasses']):
      stepDict = dict(
        name='sp.train.pass_%d' % (i),
        iterationCount=config['spTrainIterationCount'],
        setup=[sensorOpen(datasets[_getTrainingDatasetKey(config)]) if i==0 \
                 else sensorRewind,
               fileSourceAutoRewind(spTrainMayNeedAutoRewind),],
        finish=[fileSourceAutoRewind(False),],
      )

      description['spTrain'].append(stepDict)

  elif spEnable:
    description['spTrain'] = dict(
      # need to train with one iteration just to initialize data structures
      # TODO: seems like a hack; shouldn't CLA framework automatically initialize
      #   the necessary subsystems? (ask Ron)
      iterationCount=1,
    )


  # ----------------------------------------------------------------------------
  # TP training
  if tpTrain:
    description['tpTrain'] = []
    mayNeedAutoRewind = True if config['tpTrainIterationCount'] is not None else False
    for i in xrange(config['tpTrainNPasses']):
      stepDict = dict(
        name='tp.train.pass_%d' % (i),
        iterationCount=config['tpTrainIterationCount'],
        setup=[
          sensorOpen(datasets[_getTrainingDatasetKey(config)]) if i==0 \
            else sensorRewind,
          fileSourceAutoRewind(mayNeedAutoRewind),
          ],
        finish=[fileSourceAutoRewind(False),],
        )
      if config['tpTrainPrintStatsPeriodIter'] > 0:
        stepDict['iter'] = printTPTiming(config['tpTrainPrintStatsPeriodIter'])
        stepDict['finish'] += [printTPTiming()] #, printTPCells]

      description['tpTrain'].append(stepDict)


  # ----------------------------------------------------------------------------
  # Inference tests
  # NOTE: Presently, SP and TP learning is disabled during inference
  description['infer'] = []

  predictionFields = None
  spatialRegrTests = None
  if 'spFieldPredictionSchema' in config and config['spFieldPredictionSchema'] != None:
    if len(config['spFieldPredictionSchema']['predictionFields']) > 0:
      spFieldPredictionSchema = config['spFieldPredictionSchema']
      predictionFields = spFieldPredictionSchema['predictionFields']
      if len(spFieldPredictionSchema['regressionTests']) > 0:
        # presently, our spatial regression modules (knn and linear) don't support
        # multiple fields
        assert(len(predictionFields) == 1)
        spatialRegrTests = spFieldPredictionSchema['regressionTests']


  # Set up test steps for all inference datasets
  for i, ds in enumerate(config['inferDatasets']):

    datasetInfo = config['inferDatasets'][i]

    # NOTE: the path/contents may differ from the corresponding dataset
    #       referenced in config['inferDatasets'] due to preprocessing (e.g.,
    #       aggregation)
    inferenceDatasetKey = \
      _datasetKeyFromInferenceDatasetIndex(index=i, config=config)
    inferenceDatasetPath = datasets[inferenceDatasetKey]


    # ----------------------------------------
    # Step: Temporal inference
    #
    if tpEnable:

      # Turn off plot histograms when running under darwin
      plotTemporalHistograms = True
      if sys.platform.startswith('darwin'):
        plotTemporalHistograms = False
        print "Turning off plotTemporalHistograms under darwin"

      temporalTestingStep = dict(
        name = getTemporalInferenceStepName(datasetInfo['alias'], i),
        iterationCount = ds['iterCount'],
        setup = [sensorOpen(inferenceDatasetPath)],
        ppOptions = dict(verbosity=config['postprocVerbosity'],
                         plotTemporalHistograms=plotTemporalHistograms,
                         printLearnedCoincidences=False,
                         logPredictions=True,)
      )
      description['infer'].append(temporalTestingStep)
    else:
      print 'temporalTestingStep skipped.'

    # ----------------------------------------
    # Step: Non-temporal Regression algorithm training (if enabled)
    #
    if spatialRegrTests:
      # NOTE: we don't need auto-rewind when training spatial regression algorithms
      regrTrainStep = dict(
        name = ('%s_nontemporal.training') % \
                 (_normalizeDatasetAliasNameForStepName(datasetInfo['alias']),),
        iterationCount=config['spTrainIterationCount'],
        setup=[sensorOpen(datasets[_getTrainingDatasetKey(config)]),
               fileSourceAutoRewind(spTrainMayNeedAutoRewind),],
        ppOptions = dict(verbosity=config['postprocVerbosity'],
                         printLearnedCoincidences=False,)
      )

      # Add Spatial Regression algorithm training requests
      ppOptions = regrTrainStep['ppOptions']
      for test in spatialRegrTests:
        assert(len(predictionFields) == 1)
        ppOptions[test['algorithm']] = 'train,%s' % (predictionFields[0])

      description['infer'].append(regrTrainStep)


    # ----------------------------------------
    # Step: Non-temporal Inference
    #
    nontemporalTestingStep = dict(
      name = getNonTemporalInferenceStepName(datasetInfo['alias'], i),
      iterationCount = ds['iterCount'],
      setup = [
        sensorOpen(inferenceDatasetPath),
        fileSourceAutoRewind(False),
        # TODO Do we need to turn off collectStats in the 'finish' sub-step?
        setTPAttribute('collectStats', 1),
        ],
      # TODO which ppOptions do we want in this template?
      ppOptions = dict(
        verbosity=config['postprocVerbosity'],
        plotTemporalHistograms=False,
        printLearnedCoincidences=False,
        logPredictions=True,
        ),
      )

    # Add Spatial Field Prediction options to inference step
    if predictionFields:
      # Set sparse encodings of prediction fields to zero
      setup = nontemporalTestingStep['setup']
      setup.append(
        setAttribute('sensor', 'postEncodingFilters',
                     [ModifyFields(fields=predictionFields, operation='setToZero')])
      )
    if spatialRegrTests:
      # Add regression test requests
      ppOptions = nontemporalTestingStep['ppOptions']
      for test in spatialRegrTests:
        assert(len(predictionFields) == 1)
        ppOptions[test['algorithm']] = 'test,%s' % (predictionFields[0])

    description['infer'].append(nontemporalTestingStep)


  # ----------------------------------------------------------------------------
  # Add auto-reset intervals to the sensor region for tpTrain and Infer phases
  # (if config['sensorAutoReset'] is enabled)
  # ----------------------------------------------------------------------------
  if 'sensorAutoReset' in config and config['sensorAutoReset'] is not None:
    dd = defaultdict(lambda: 0,  config['sensorAutoReset'])
    # class timedelta([days[, seconds[, microseconds[, milliseconds[, minutes[,
    #                 hours[, weeks]]]]]]])
    if not (0 == dd['days'] == dd['hours'] == dd['minutes'] == dd['seconds'] \
            == dd['milliseconds'] == dd['microseconds'] == dd['weeks']):
      timeDelta = timedelta(days=dd['days'],
                            hours=dd['hours'],
                            minutes=dd['minutes'],
                            seconds=dd['seconds'],
                            milliseconds=dd['milliseconds'],
                            microseconds=dd['microseconds'],
                            weeks=dd['weeks'])

      tpTrainSteps = description['tpTrain'] if 'tpTrain' in description else []
      inferSteps = description['infer'] if 'infer' in description else []
      for step in itertools.chain(tpTrainSteps, inferSteps):
        if 'setup' not in step:
          step['setup'] = []
        step['setup'].append(setAutoResetInterval(timeDelta))

  return description
Esempio n. 2
0
def getDescription(datasets):

  # ========================================================================
  # Network definition

  # Encoder for the sensor
  encoder = MultiEncoder()  
  if 'filenameCategory' in datasets:
    categories = [x.strip() for x in 
                              open(datasets['filenameCategory']).xreadlines()]
  else:
    categories = [chr(x+ord('a')) for x in range(26)]

  if config['overlappingPatterns']:
    encoder.addEncoder("name", SDRCategoryEncoder(n=200, 
      w=config['spNumActivePerInhArea'], categoryList=categories, name="name"))
  else:
    encoder.addEncoder("name", CategoryEncoder(w=config['spNumActivePerInhArea'], 
                        categoryList=categories, name="name"))


  # ------------------------------------------------------------------
  # Node params
  # The inputs are long, horizontal vectors
  inputDimensions = (1, encoder.getWidth())

  # Layout the coincidences vertically stacked on top of each other, each
  # looking at the entire input field. 
  columnDimensions = (config['spCoincCount'], 1)

  # If we have disableSpatial, then set the number of "coincidences" to be the
  #  same as the encoder width
  if config['disableSpatial']:
    columnDimensions = (encoder.getWidth(), 1)
    config['trainSP'] = 0

  sensorParams = dict(
    # encoder/datasource are not parameters so don't include here
    verbosity=config['sensorVerbosity']
  )

  CLAParams = dict(
    # SP params
    disableSpatial = config['disableSpatial'],
    inputDimensions = inputDimensions,
    columnDimensions = columnDimensions,
    potentialRadius = inputDimensions[1]/2,
    potentialPct = 1.00,
    gaussianDist = 0,
    commonDistributions = 0,    # should be False if possibly not training
    localAreaDensity = -1, #0.05, 
    numActiveColumnsPerInhArea = config['spNumActivePerInhArea'], 
    dutyCyclePeriod = 1000,
    stimulusThreshold = 1,
    synPermInactiveDec=0.11,
    synPermActiveInc=0.11,
    synPermActiveSharedDec=0.0,
    synPermOrphanDec = 0.0,
    minPctDutyCycleBeforeInh = 0.001,
    minPctDutyCycleAfterInh = 0.001,
    spVerbosity = config['spVerbosity'],
    spSeed = 1,
    printPeriodicStats = int(config['spPrintPeriodicStats']),


    # TP params
    tpSeed = 1,
    disableTemporal = 0 if config['trainTP'] else 1,
    temporalImp = config['temporalImp'],
    nCellsPerCol = config['tpNCellsPerCol'] if config['trainTP'] else 1,

    collectStats = 1,
    burnIn = 2,
    verbosity = config['tpVerbosity'],

    newSynapseCount = config['spNumActivePerInhArea'],
    minThreshold = config['spNumActivePerInhArea'],
    activationThreshold = config['spNumActivePerInhArea'],

    initialPerm = config['tpInitialPerm'],
    connectedPerm = 0.5,
    permanenceInc = config['tpPermanenceInc'],
    permanenceDec = config['tpPermanenceDec'],  # perhaps tune this
    globalDecay = config['tpGlobalDecay'],

    pamLength = config['tpPAMLength'],
    maxSeqLength = config['tpMaxSeqLength'],
    maxAge = config['tpMaxAge'],


    # General params
    computeTopDown = config['computeTopDown'],
    trainingStep = 'spatial',
    )


  dataSource = FileRecordStream(datasets['filenameTrain'])

  description = dict(
    options = dict(
      logOutputsDuringInference = False,
    ),

    network = dict(
      sensorDataSource = dataSource,
      sensorEncoder = encoder, 
      sensorParams = sensorParams,

      CLAType = 'py.CLARegion',
      CLAParams = CLAParams,

      classifierType = None,
      classifierParams = None),
  )

  if config['trainSP']:
    description['spTrain'] = dict(
      iterationCount=config['iterationCountTrain'], 
      #iter=displaySPCoincidences(50),
      #finish=printSPCoincidences()
      ),
  else:
    description['spTrain'] = dict(
      # need to train with one iteration just to initialize data structures
      iterationCount=1)

  if config['trainTP']:
    description['tpTrain'] = []
    for i in xrange(config['trainTPRepeats']):
      stepDict = dict(name='step_%d' % (i), 
                      setup=sensorRewind, 
                      iterationCount=config['iterationCountTrain'],
                      )
      if config['tpTimingEvery'] > 0:
        stepDict['iter'] = printTPTiming(config['tpTimingEvery'])
        stepDict['finish'] = [printTPTiming(), printTPCells]

      description['tpTrain'].append(stepDict)


  # ----------------------------------------------------------------------------
  # Inference tests
  inferSteps = []

  if config['evalTrainingSetNumIterations'] > 0:
    # The training set. Used to train the n-grams. 
    inferSteps.append(
      dict(name = 'confidenceTrain_baseline', 
           iterationCount = min(config['evalTrainingSetNumIterations'], 
                                config['iterationCountTrain']),
           ppOptions = dict(verbosity=config['ppVerbosity'],
                            printLearnedCoincidences=True,
                            nGrams='train',
                            #ipsDetailsFor = "name,None,2",
                            ),
             #finish=printTPCells,
          )
      )

    # Testing the training set on both the TP and n-grams. 
    inferSteps.append(
      dict(name = 'confidenceTrain_nonoise', 
             iterationCount = min(config['evalTrainingSetNumIterations'], 
                                  config['iterationCountTrain']),
             setup = [sensorOpen(datasets['filenameTrain'])],
             ppOptions = dict(verbosity=config['ppVerbosity'],
                              printLearnedCoincidences=False,
                              nGrams='test',
                              burnIns = [1,2,3,4],
                              #ipsDetailsFor = "name,None,2",
                              #ipsAt = [1,2,3,4],
                              ),
            )
        )

    # The test set
  if True:
    if datasets['filenameTest'] != datasets['filenameTrain']:
      inferSteps.append(
        dict(name = 'confidenceTest_baseline', 
             iterationCount = config['iterationCountTest'],
             setup = [sensorOpen(datasets['filenameTest'])],
             ppOptions = dict(verbosity=config['ppVerbosity'],
                              printLearnedCoincidences=False,
                              nGrams='test',
                              burnIns = [1,2,3,4],
                              #ipsAt = [1,2,3,4],
                              ipsDetailsFor = "name,None,2",
                              ),
            )
        )


  description['infer'] = inferSteps

  return description
Esempio n. 3
0
def getDescription(datasets):

    # ========================================================================
    # Network definition

    # Encoder for the sensor
    encoder = MultiEncoder()
    if 'filenameCategory' in datasets:
        categories = [x.strip() for x in open(datasets['filenameCategory'])]
    else:
        categories = [chr(x + ord('a')) for x in range(26)]

    if config['overlappingPatterns']:
        encoder.addEncoder(
            "name",
            SDRCategoryEncoder(n=200,
                               w=config['spNumActivePerInhArea'],
                               categoryList=categories,
                               name="name"))
    else:
        encoder.addEncoder(
            "name",
            CategoryEncoder(w=config['spNumActivePerInhArea'],
                            categoryList=categories,
                            name="name"))

    # ------------------------------------------------------------------
    # Node params
    # The inputs are long, horizontal vectors
    inputDimensions = (1, encoder.getWidth())

    # Layout the coincidences vertically stacked on top of each other, each
    # looking at the entire input field.
    columnDimensions = (config['spCoincCount'], 1)

    # If we have disableSpatial, then set the number of "coincidences" to be the
    #  same as the encoder width
    if config['disableSpatial']:
        columnDimensions = (encoder.getWidth(), 1)
        config['trainSP'] = 0

    sensorParams = dict(
        # encoder/datasource are not parameters so don't include here
        verbosity=config['sensorVerbosity'])

    CLAParams = dict(
        # SP params
        disableSpatial=config['disableSpatial'],
        inputDimensions=inputDimensions,
        columnDimensions=columnDimensions,
        potentialRadius=inputDimensions[1] / 2,
        potentialPct=1.00,
        gaussianDist=0,
        commonDistributions=0,  # should be False if possibly not training
        localAreaDensity=-1,  #0.05, 
        numActiveColumnsPerInhArea=config['spNumActivePerInhArea'],
        dutyCyclePeriod=1000,
        stimulusThreshold=1,
        synPermInactiveDec=0.11,
        synPermActiveInc=0.11,
        synPermActiveSharedDec=0.0,
        synPermOrphanDec=0.0,
        minPctDutyCycleBeforeInh=0.001,
        minPctDutyCycleAfterInh=0.001,
        spVerbosity=config['spVerbosity'],
        spSeed=1,
        printPeriodicStats=int(config['spPrintPeriodicStats']),

        # TM params
        tpSeed=1,
        disableTemporal=0 if config['trainTP'] else 1,
        temporalImp=config['temporalImp'],
        nCellsPerCol=config['tpNCellsPerCol'] if config['trainTP'] else 1,
        collectStats=1,
        burnIn=2,
        verbosity=config['tpVerbosity'],
        newSynapseCount=config['spNumActivePerInhArea'],
        minThreshold=config['spNumActivePerInhArea'],
        activationThreshold=config['spNumActivePerInhArea'],
        initialPerm=config['tpInitialPerm'],
        connectedPerm=0.5,
        permanenceInc=config['tpPermanenceInc'],
        permanenceDec=config['tpPermanenceDec'],  # perhaps tune this
        globalDecay=config['tpGlobalDecay'],
        pamLength=config['tpPAMLength'],
        maxSeqLength=config['tpMaxSeqLength'],
        maxAge=config['tpMaxAge'],

        # General params
        computeTopDown=config['computeTopDown'],
        trainingStep='spatial',
    )

    dataSource = FileRecordStream(datasets['filenameTrain'])

    description = dict(
        options=dict(logOutputsDuringInference=False, ),
        network=dict(sensorDataSource=dataSource,
                     sensorEncoder=encoder,
                     sensorParams=sensorParams,
                     CLAType='py.CLARegion',
                     CLAParams=CLAParams,
                     classifierType=None,
                     classifierParams=None),
    )

    if config['trainSP']:
        description['spTrain'] = dict(
            iterationCount=config['iterationCountTrain'],
            #iter=displaySPCoincidences(50),
            #finish=printSPCoincidences()
        ),
    else:
        description['spTrain'] = dict(
            # need to train with one iteration just to initialize data structures
            iterationCount=1)

    if config['trainTP']:
        description['tpTrain'] = []
        for i in range(config['trainTPRepeats']):
            stepDict = dict(
                name='step_%d' % (i),
                setup=sensorRewind,
                iterationCount=config['iterationCountTrain'],
            )
            if config['tpTimingEvery'] > 0:
                stepDict['iter'] = printTPTiming(config['tpTimingEvery'])
                stepDict['finish'] = [printTPTiming(), printTPCells]

            description['tpTrain'].append(stepDict)

    # ----------------------------------------------------------------------------
    # Inference tests
    inferSteps = []

    if config['evalTrainingSetNumIterations'] > 0:
        # The training set. Used to train the n-grams.
        inferSteps.append(
            dict(
                name='confidenceTrain_baseline',
                iterationCount=min(config['evalTrainingSetNumIterations'],
                                   config['iterationCountTrain']),
                ppOptions=dict(
                    verbosity=config['ppVerbosity'],
                    printLearnedCoincidences=True,
                    nGrams='train',
                    #ipsDetailsFor = "name,None,2",
                ),
                #finish=printTPCells,
            ))

        # Testing the training set on both the TM and n-grams.
        inferSteps.append(
            dict(
                name='confidenceTrain_nonoise',
                iterationCount=min(config['evalTrainingSetNumIterations'],
                                   config['iterationCountTrain']),
                setup=[sensorOpen(datasets['filenameTrain'])],
                ppOptions=dict(
                    verbosity=config['ppVerbosity'],
                    printLearnedCoincidences=False,
                    nGrams='test',
                    burnIns=[1, 2, 3, 4],
                    #ipsDetailsFor = "name,None,2",
                    #ipsAt = [1,2,3,4],
                ),
            ))

        # The test set
    if True:
        if datasets['filenameTest'] != datasets['filenameTrain']:
            inferSteps.append(
                dict(
                    name='confidenceTest_baseline',
                    iterationCount=config['iterationCountTest'],
                    setup=[sensorOpen(datasets['filenameTest'])],
                    ppOptions=dict(
                        verbosity=config['ppVerbosity'],
                        printLearnedCoincidences=False,
                        nGrams='test',
                        burnIns=[1, 2, 3, 4],
                        #ipsAt = [1,2,3,4],
                        ipsDetailsFor="name,None,2",
                    ),
                ))

    description['infer'] = inferSteps

    return description
Esempio n. 4
0
def getDescriptionImpl(datasets, config):
    """ Implementation for description.py getDescription() entry point function.
  Builds an experiment description dictionary as required by LPF (Lightweight
  Prediction Framework).  Hardcoded data that is less likely to vary between
  experiments is augmented with data from the config dictionary.
  See getBaseDatasets() and getDatasets().

    datasets:     a dictionary of input datasets that may have been pre-processed
                   via aggregation.  Keys:
                   'trainDataset'         -- path to the training dataset
                   'inferDataset.N.alias' -- path(s) to the inference dataset

    config:       configuration dictionary from description.py

    returns:      an experiment description dictionary as required by LPF
  """

    # ----------------------------------------------------------------------------
    # Encoder for the sensor
    encoder = MultiEncoder(_getDatasetEncoderConfig(config))

    # ------------------------------------------------------------------
    # Region params
    CLAParams = _getCLAParams(encoder=encoder, config=config)

    sensorParams = dict(
        # encoder/datasource are not parameters so don't include here
        verbosity=config['sensorVerbosity'])

    # Filesource for the sensor. Set the filename in setup functions.
    dataSource = FileRecordStream('foo')

    description = dict(
        options=dict(logOutputsDuringInference=False, ),
        network=dict(

            # Think of sensor as a shell with dataSource and encoder;
            # Encoder has a pre-encoder and post-encoder filters;
            # filters appear in a different place (TODO: where?)
            sensorDataSource=dataSource,
            sensorEncoder=encoder,

            # LPF converts this to JSON strings; used as constructor args; has simple
            # types (ints, strings, floats)
            sensorParams=sensorParams,

            # CLA class; py. prefix for class names implemented in python; older code
            # implemented regions in C++ and designated class name without prefix.
            CLAType='py.CLARegion',
            # dict converted to JSON string
            CLAParams=CLAParams,

            # classifiers are presently not used (formerly used by vision code); should
            # be okay to leave out Classifier, sensor, CLA
            classifierType=None,
            classifierParams=None),
    )

    # ----------------------------------------------------------------------------
    # Configure Training and Inference phases
    # ----------------------------------------------------------------------------
    #
    # phase is 0 or more steps (a list of dictionaries, each dict corresponds to one step)
    # (see py/nupic/frameworks/prediction/experiment.py docstring)
    #
    # step = dict (name, setup, iter, finish, iterationCount)
    #   setup, iter, finish are callbacks;
    #
    # name: step name string; optional, used for printing messages to console
    # setup: open input file (e.g., via dataSource), print stats, etc.
    # iter: for diagnostics/debugging; called by net.run between iterations.
    # finish: called at the end by net.run; usually prints out stats (e.g., how many
    #   synapses, time taken, etc.)
    # callbacks are almost always reused, so they are not experiment-specific (see
    #   imports at top of file)
    # a callback always has this form c(experiment_obj, iter_number); can get
    #   experiment.network.regions["sensor"].getSelf()

    spEnable = config['spEnable']
    spTrain = _isSPTrainingEnabled(config)

    tpEnable = config['tpEnable']
    tpTrain = _isTPTrainingEnabled(config)
    # NOTE: presently, we always train TP (during training phase) if TP is enabled
    assert (tpTrain == tpEnable)

    # At least one of SP/TP must be enabled for a meaningful system
    assert (spEnable or tpEnable)

    # NOTE: SP and Spatial regression need to undergo training over the same
    #       set of rows. Since we're not reading the training dataset here to
    #       find out the number of rows, we presently configure both with the
    #       same auto-rewind setting.
    # TODO: this may cause knn training to repeatedly iterate unnecessarily
    #       over the same records in case spTrainIterationCount is larger than the
    #       nuber of rows in the training dataset. Look into optimizing this to
    #       avoid wasting time on knn training due to unnecessary iterations, but
    #       make sure that both SP and knn train on the exact same rows.
    spTrainMayNeedAutoRewind = True \
                               if config['spTrainIterationCount'] is not None \
                               else False

    # ----------------------------------------------------------------------------
    # SP training
    if spTrain:
        description['spTrain'] = []
        for i in xrange(config['spTrainNPasses']):
            stepDict = dict(
              name='sp.train.pass_%d' % (i),
              iterationCount=config['spTrainIterationCount'],
              setup=[sensorOpen(datasets[_getTrainingDatasetKey(config)]) if i==0 \
                       else sensorRewind,
                     fileSourceAutoRewind(spTrainMayNeedAutoRewind),],
              finish=[fileSourceAutoRewind(False),],
            )

            description['spTrain'].append(stepDict)

    elif spEnable:
        description['spTrain'] = dict(
            # need to train with one iteration just to initialize data structures
            # TODO: seems like a hack; shouldn't CLA framework automatically initialize
            #   the necessary subsystems? (ask Ron)
            iterationCount=1, )

    # ----------------------------------------------------------------------------
    # TP training
    if tpTrain:
        description['tpTrain'] = []
        mayNeedAutoRewind = True if config[
            'tpTrainIterationCount'] is not None else False
        for i in xrange(config['tpTrainNPasses']):
            stepDict = dict(
              name='tp.train.pass_%d' % (i),
              iterationCount=config['tpTrainIterationCount'],
              setup=[
                sensorOpen(datasets[_getTrainingDatasetKey(config)]) if i==0 \
                  else sensorRewind,
                fileSourceAutoRewind(mayNeedAutoRewind),
                ],
              finish=[fileSourceAutoRewind(False),],
              )
            if config['tpTrainPrintStatsPeriodIter'] > 0:
                stepDict['iter'] = printTPTiming(
                    config['tpTrainPrintStatsPeriodIter'])
                stepDict['finish'] += [printTPTiming()]  #, printTPCells]

            description['tpTrain'].append(stepDict)

    # ----------------------------------------------------------------------------
    # Inference tests
    # NOTE: Presently, SP and TP learning is disabled during inference
    description['infer'] = []

    predictionFields = None
    spatialRegrTests = None
    if 'spFieldPredictionSchema' in config and config[
            'spFieldPredictionSchema'] != None:
        if len(config['spFieldPredictionSchema']['predictionFields']) > 0:
            spFieldPredictionSchema = config['spFieldPredictionSchema']
            predictionFields = spFieldPredictionSchema['predictionFields']
            if len(spFieldPredictionSchema['regressionTests']) > 0:
                # presently, our spatial regression modules (knn and linear) don't support
                # multiple fields
                assert (len(predictionFields) == 1)
                spatialRegrTests = spFieldPredictionSchema['regressionTests']

    # Set up test steps for all inference datasets
    for i, ds in enumerate(config['inferDatasets']):

        datasetInfo = config['inferDatasets'][i]

        # NOTE: the path/contents may differ from the corresponding dataset
        #       referenced in config['inferDatasets'] due to preprocessing (e.g.,
        #       aggregation)
        inferenceDatasetKey = \
          _datasetKeyFromInferenceDatasetIndex(index=i, config=config)
        inferenceDatasetPath = datasets[inferenceDatasetKey]

        # ----------------------------------------
        # Step: Temporal inference
        #
        if tpEnable:

            # Turn off plot histograms when running under darwin
            plotTemporalHistograms = True
            if sys.platform.startswith('darwin'):
                plotTemporalHistograms = False
                print "Turning off plotTemporalHistograms under darwin"

            temporalTestingStep = dict(
                name=getTemporalInferenceStepName(datasetInfo['alias'], i),
                iterationCount=ds['iterCount'],
                setup=[sensorOpen(inferenceDatasetPath)],
                ppOptions=dict(
                    verbosity=config['postprocVerbosity'],
                    plotTemporalHistograms=plotTemporalHistograms,
                    printLearnedCoincidences=False,
                    logPredictions=True,
                ))
            description['infer'].append(temporalTestingStep)
        else:
            print 'temporalTestingStep skipped.'

        # ----------------------------------------
        # Step: Non-temporal Regression algorithm training (if enabled)
        #
        if spatialRegrTests:
            # NOTE: we don't need auto-rewind when training spatial regression algorithms
            regrTrainStep = dict(
              name = ('%s_nontemporal.training') % \
                       (_normalizeDatasetAliasNameForStepName(datasetInfo['alias']),),
              iterationCount=config['spTrainIterationCount'],
              setup=[sensorOpen(datasets[_getTrainingDatasetKey(config)]),
                     fileSourceAutoRewind(spTrainMayNeedAutoRewind),],
              ppOptions = dict(verbosity=config['postprocVerbosity'],
                               printLearnedCoincidences=False,)
            )

            # Add Spatial Regression algorithm training requests
            ppOptions = regrTrainStep['ppOptions']
            for test in spatialRegrTests:
                assert (len(predictionFields) == 1)
                ppOptions[
                    test['algorithm']] = 'train,%s' % (predictionFields[0])

            description['infer'].append(regrTrainStep)

        # ----------------------------------------
        # Step: Non-temporal Inference
        #
        nontemporalTestingStep = dict(
            name=getNonTemporalInferenceStepName(datasetInfo['alias'], i),
            iterationCount=ds['iterCount'],
            setup=[
                sensorOpen(inferenceDatasetPath),
                fileSourceAutoRewind(False),
                # TODO Do we need to turn off collectStats in the 'finish' sub-step?
                setTPAttribute('collectStats', 1),
            ],
            # TODO which ppOptions do we want in this template?
            ppOptions=dict(
                verbosity=config['postprocVerbosity'],
                plotTemporalHistograms=False,
                printLearnedCoincidences=False,
                logPredictions=True,
            ),
        )

        # Add Spatial Field Prediction options to inference step
        if predictionFields:
            # Set sparse encodings of prediction fields to zero
            setup = nontemporalTestingStep['setup']
            setup.append(
                setAttribute('sensor', 'postEncodingFilters', [
                    ModifyFields(fields=predictionFields,
                                 operation='setToZero')
                ]))
        if spatialRegrTests:
            # Add regression test requests
            ppOptions = nontemporalTestingStep['ppOptions']
            for test in spatialRegrTests:
                assert (len(predictionFields) == 1)
                ppOptions[
                    test['algorithm']] = 'test,%s' % (predictionFields[0])

        description['infer'].append(nontemporalTestingStep)

    # ----------------------------------------------------------------------------
    # Add auto-reset intervals to the sensor region for tpTrain and Infer phases
    # (if config['sensorAutoReset'] is enabled)
    # ----------------------------------------------------------------------------
    if 'sensorAutoReset' in config and config['sensorAutoReset'] is not None:
        dd = defaultdict(lambda: 0, config['sensorAutoReset'])
        # class timedelta([days[, seconds[, microseconds[, milliseconds[, minutes[,
        #                 hours[, weeks]]]]]]])
        if not (0 == dd['days'] == dd['hours'] == dd['minutes'] == dd['seconds'] \
                == dd['milliseconds'] == dd['microseconds'] == dd['weeks']):
            timeDelta = timedelta(days=dd['days'],
                                  hours=dd['hours'],
                                  minutes=dd['minutes'],
                                  seconds=dd['seconds'],
                                  milliseconds=dd['milliseconds'],
                                  microseconds=dd['microseconds'],
                                  weeks=dd['weeks'])

            tpTrainSteps = description[
                'tpTrain'] if 'tpTrain' in description else []
            inferSteps = description['infer'] if 'infer' in description else []
            for step in itertools.chain(tpTrainSteps, inferSteps):
                if 'setup' not in step:
                    step['setup'] = []
                step['setup'].append(setAutoResetInterval(timeDelta))

    return description