Ejemplo n.º 1
0
def generateFirstOrderData(model, numIterations=10000, seqLength=5,
                           resets=True, suffix='train'):
  
  print "Creating %d iteration file with seqLength %d" % (numIterations, seqLength)
  print "Filename", 
  categoryList, initProbability, transitionTable = model
  initProbability = initProbability.cumsum()
  transitionTable = transitionTable.cumsum(axis=1)
  
  outputFile = 'fo_%d_%d_%s.csv' % (numIterations, seqLength, suffix)
  print "Filename", outputFile
  fields = [('reset', 'int', 'R'), ('name', 'string', '')]
  o = File(outputFile, fields)
  
  seqIdx = 0
  rand = numpy.random.rand()
  catIdx = numpy.searchsorted(initProbability, rand)
  for i in xrange(numIterations):
    rand = numpy.random.rand()
    if seqIdx == 0 and resets:
      catIdx = numpy.searchsorted(initProbability, rand)
      reset  = 1
    else:
      catIdx = numpy.searchsorted(transitionTable[catIdx], rand)
      reset  = 0
      
    o.write([reset,categoryList[catIdx]])    
    seqIdx = (seqIdx+1)%seqLength
  
  o.close()
Ejemplo n.º 2
0
def makeDataset():
    """
  """
    inputFile = 'numenta_air_Con.csv'

    fields = [('gym', 'string', 'S'), ('address', 'string', ''),
              ('timestamp', 'datetime', 'T'), ('consumption', 'float', '')]

    gymName = None

    missing = 0
    total = 0
    # Create a the output file by parsing the customer given csv
    with File('./hotgym2.csv', fields) as o:
        with open(inputFile) as f:
            # Skip header
            f.readline()

            # iterate over all the lines in the input file
            for line in f.xreadlines():

                # Parse the fields in the current line
                record = _parseLine(line)

                # Write the merged record to the output file
                o.write(record)

                if record[0] != gymName:
                    gymName = record[0]
                    print gymName

    return total, missing
Ejemplo n.º 3
0
def makeDataset():
    """
  """
    clubs = processAttendanceFiles()
    clubs = processConsumptionFiles(clubs)

    fields = [
        ('gym', 'string', 'S'),
        ('timestamp', 'datetime', 'T'),
        ('attendeeCount', 'int', ''),
        ('consumption', 'float', ''),
    ]
    with File('gym.csv', fields) as f:
        ## write header
        #f.write('Gym Name,Date,Time,Attendee Count,Consumption (KWH)\n')
        for c in clubs.values():
            for k, r in sorted(c.records.iteritems(),
                               key=operator.itemgetter(0)):
                #dd = r.date[2]
                #mm = r.date[1]
                #yyyy = r.date[0]
                #line = ','.join(str(x) for x in
                #          (c.name, '%d-%s-%d' % (dd, mmm, yyyy), r.time, r.attendeeCount, r.consumption))
                #f.write(line + '\n')
                f.write([r.club, r.timestamp, r.attendeeCount, r.consumption])
Ejemplo n.º 4
0
def generateFirstOrderData(model,
                           numIterations=10000,
                           seqLength=5,
                           resets=True,
                           suffix='train'):

    print("Creating %d iteration file with seqLength %d" %
          (numIterations, seqLength))
    print("Filename", end=' ')
    categoryList, initProbability, transitionTable = model
    initProbability = initProbability.cumsum()
    transitionTable = transitionTable.cumsum(axis=1)

    outputFile = 'fo_%d_%d_%s.csv' % (numIterations, seqLength, suffix)
    print("Filename", outputFile)
    fields = [('reset', 'int', 'R'), ('name', 'string', '')]
    o = File(outputFile, fields)

    seqIdx = 0
    rand = numpy.random.rand()
    catIdx = numpy.searchsorted(initProbability, rand)
    for i in range(numIterations):
        rand = numpy.random.rand()
        if seqIdx == 0 and resets:
            catIdx = numpy.searchsorted(initProbability, rand)
            reset = 1
        else:
            catIdx = numpy.searchsorted(transitionTable[catIdx], rand)
            reset = 0

        o.write([reset, categoryList[catIdx]])
        seqIdx = (seqIdx + 1) % seqLength

    o.close()
Ejemplo n.º 5
0
def _generateFile(filename, data):
  """ 
  Parameters:
  ----------------------------------------------------------------
  filename:         name of .csv file to generate
                   
  """
  
  # Create the file
  print "Creating %s..." % (filename)
  numRecords, numFields = data.shape
  
  fields = [('field%d'%(i+1), 'float', '') for i in range(numFields)]
  outFile = File(filename, fields)
  
  for i in xrange(numRecords):
    outFile.write(data[i].tolist())
    
  outFile.close()