Example #1
0
def generateFirstOrderData(model, numIterations=10000, seqLength=5,
                           resets=True, suffix='train'):
  
  print "Creating %d iteration file with seqLength %d" % (numIterations, seqLength)
  print "Filename", 
  categoryList, initProbability, transitionTable = model
  initProbability = initProbability.cumsum()
  transitionTable = transitionTable.cumsum(axis=1)
  
  outputFile = 'fo_%d_%d_%s.csv' % (numIterations, seqLength, suffix)
  print "Filename", outputFile
  fields = [('reset', 'int', 'R'), ('name', 'string', '')]
  o = File(outputFile, fields)
  
  seqIdx = 0
  rand = numpy.random.rand()
  catIdx = numpy.searchsorted(initProbability, rand)
  for i in xrange(numIterations):
    rand = numpy.random.rand()
    if seqIdx == 0 and resets:
      catIdx = numpy.searchsorted(initProbability, rand)
      reset  = 1
    else:
      catIdx = numpy.searchsorted(transitionTable[catIdx], rand)
      reset  = 0
      
    o.write([reset,categoryList[catIdx]])    
    seqIdx = (seqIdx+1)%seqLength
  
  o.close()
Example #2
0
def generateFirstOrderData(model,
                           numIterations=10000,
                           seqLength=5,
                           resets=True,
                           suffix='train'):

    print("Creating %d iteration file with seqLength %d" %
          (numIterations, seqLength))
    print("Filename", end=' ')
    categoryList, initProbability, transitionTable = model
    initProbability = initProbability.cumsum()
    transitionTable = transitionTable.cumsum(axis=1)

    outputFile = 'fo_%d_%d_%s.csv' % (numIterations, seqLength, suffix)
    print("Filename", outputFile)
    fields = [('reset', 'int', 'R'), ('name', 'string', '')]
    o = File(outputFile, fields)

    seqIdx = 0
    rand = numpy.random.rand()
    catIdx = numpy.searchsorted(initProbability, rand)
    for i in range(numIterations):
        rand = numpy.random.rand()
        if seqIdx == 0 and resets:
            catIdx = numpy.searchsorted(initProbability, rand)
            reset = 1
        else:
            catIdx = numpy.searchsorted(transitionTable[catIdx], rand)
            reset = 0

        o.write([reset, categoryList[catIdx]])
        seqIdx = (seqIdx + 1) % seqLength

    o.close()
Example #3
0
def _generateFile(filename, data):
  """ 
  Parameters:
  ----------------------------------------------------------------
  filename:         name of .csv file to generate
                   
  """
  
  # Create the file
  print "Creating %s..." % (filename)
  numRecords, numFields = data.shape
  
  fields = [('field%d'%(i+1), 'float', '') for i in range(numFields)]
  outFile = File(filename, fields)
  
  for i in xrange(numRecords):
    outFile.write(data[i].tolist())
    
  outFile.close()