def generateFirstOrderData(model, numIterations=10000, seqLength=5, resets=True, suffix='train'): print "Creating %d iteration file with seqLength %d" % (numIterations, seqLength) print "Filename", categoryList, initProbability, transitionTable = model initProbability = initProbability.cumsum() transitionTable = transitionTable.cumsum(axis=1) outputFile = 'fo_%d_%d_%s.csv' % (numIterations, seqLength, suffix) print "Filename", outputFile fields = [('reset', 'int', 'R'), ('name', 'string', '')] o = File(outputFile, fields) seqIdx = 0 rand = numpy.random.rand() catIdx = numpy.searchsorted(initProbability, rand) for i in xrange(numIterations): rand = numpy.random.rand() if seqIdx == 0 and resets: catIdx = numpy.searchsorted(initProbability, rand) reset = 1 else: catIdx = numpy.searchsorted(transitionTable[catIdx], rand) reset = 0 o.write([reset,categoryList[catIdx]]) seqIdx = (seqIdx+1)%seqLength o.close()
def generateFirstOrderData(model, numIterations=10000, seqLength=5, resets=True, suffix='train'): print("Creating %d iteration file with seqLength %d" % (numIterations, seqLength)) print("Filename", end=' ') categoryList, initProbability, transitionTable = model initProbability = initProbability.cumsum() transitionTable = transitionTable.cumsum(axis=1) outputFile = 'fo_%d_%d_%s.csv' % (numIterations, seqLength, suffix) print("Filename", outputFile) fields = [('reset', 'int', 'R'), ('name', 'string', '')] o = File(outputFile, fields) seqIdx = 0 rand = numpy.random.rand() catIdx = numpy.searchsorted(initProbability, rand) for i in range(numIterations): rand = numpy.random.rand() if seqIdx == 0 and resets: catIdx = numpy.searchsorted(initProbability, rand) reset = 1 else: catIdx = numpy.searchsorted(transitionTable[catIdx], rand) reset = 0 o.write([reset, categoryList[catIdx]]) seqIdx = (seqIdx + 1) % seqLength o.close()
def _generateFile(filename, data): """ Parameters: ---------------------------------------------------------------- filename: name of .csv file to generate """ # Create the file print "Creating %s..." % (filename) numRecords, numFields = data.shape fields = [('field%d'%(i+1), 'float', '') for i in range(numFields)] outFile = File(filename, fields) for i in xrange(numRecords): outFile.write(data[i].tolist()) outFile.close()