def generateFirstOrderData(model, numIterations=10000, seqLength=5, resets=True, suffix='train'): print "Creating %d iteration file with seqLength %d" % (numIterations, seqLength) print "Filename", categoryList, initProbability, transitionTable = model initProbability = initProbability.cumsum() transitionTable = transitionTable.cumsum(axis=1) outputFile = 'fo_%d_%d_%s.csv' % (numIterations, seqLength, suffix) print "Filename", outputFile fields = [('reset', 'int', 'R'), ('name', 'string', '')] o = File(outputFile, fields) seqIdx = 0 rand = numpy.random.rand() catIdx = numpy.searchsorted(initProbability, rand) for i in xrange(numIterations): rand = numpy.random.rand() if seqIdx == 0 and resets: catIdx = numpy.searchsorted(initProbability, rand) reset = 1 else: catIdx = numpy.searchsorted(transitionTable[catIdx], rand) reset = 0 o.write([reset,categoryList[catIdx]]) seqIdx = (seqIdx+1)%seqLength o.close()
def makeDataset(): """ """ inputFile = 'numenta_air_Con.csv' fields = [('gym', 'string', 'S'), ('address', 'string', ''), ('timestamp', 'datetime', 'T'), ('consumption', 'float', '')] gymName = None missing = 0 total = 0 # Create a the output file by parsing the customer given csv with File('./hotgym2.csv', fields) as o: with open(inputFile) as f: # Skip header f.readline() # iterate over all the lines in the input file for line in f.xreadlines(): # Parse the fields in the current line record = _parseLine(line) # Write the merged record to the output file o.write(record) if record[0] != gymName: gymName = record[0] print gymName return total, missing
def makeDataset(): """ """ clubs = processAttendanceFiles() clubs = processConsumptionFiles(clubs) fields = [ ('gym', 'string', 'S'), ('timestamp', 'datetime', 'T'), ('attendeeCount', 'int', ''), ('consumption', 'float', ''), ] with File('gym.csv', fields) as f: ## write header #f.write('Gym Name,Date,Time,Attendee Count,Consumption (KWH)\n') for c in clubs.values(): for k, r in sorted(c.records.iteritems(), key=operator.itemgetter(0)): #dd = r.date[2] #mm = r.date[1] #yyyy = r.date[0] #line = ','.join(str(x) for x in # (c.name, '%d-%s-%d' % (dd, mmm, yyyy), r.time, r.attendeeCount, r.consumption)) #f.write(line + '\n') f.write([r.club, r.timestamp, r.attendeeCount, r.consumption])
def generateFirstOrderData(model, numIterations=10000, seqLength=5, resets=True, suffix='train'): print("Creating %d iteration file with seqLength %d" % (numIterations, seqLength)) print("Filename", end=' ') categoryList, initProbability, transitionTable = model initProbability = initProbability.cumsum() transitionTable = transitionTable.cumsum(axis=1) outputFile = 'fo_%d_%d_%s.csv' % (numIterations, seqLength, suffix) print("Filename", outputFile) fields = [('reset', 'int', 'R'), ('name', 'string', '')] o = File(outputFile, fields) seqIdx = 0 rand = numpy.random.rand() catIdx = numpy.searchsorted(initProbability, rand) for i in range(numIterations): rand = numpy.random.rand() if seqIdx == 0 and resets: catIdx = numpy.searchsorted(initProbability, rand) reset = 1 else: catIdx = numpy.searchsorted(transitionTable[catIdx], rand) reset = 0 o.write([reset, categoryList[catIdx]]) seqIdx = (seqIdx + 1) % seqLength o.close()
def _generateFile(filename, data): """ Parameters: ---------------------------------------------------------------- filename: name of .csv file to generate """ # Create the file print "Creating %s..." % (filename) numRecords, numFields = data.shape fields = [('field%d'%(i+1), 'float', '') for i in range(numFields)] outFile = File(filename, fields) for i in xrange(numRecords): outFile.write(data[i].tolist()) outFile.close()