Exemple #1
0
    def save(self, ncFilename):
        seqLengths = np.array([seq.shape[0] for seq in self.sequences], dtype='int32')
        seqDims = seqLengths[:, None]
        inputs = np.vstack(self.sequences).astype('float32')

        print '---------------------------------------'
        if config.RNN_NORM:
            default_mean_std = path.join(path.dirname(ncFilename), 'mean_std.pickle')
            inputs = self.normalize(inputs, default_mean_std)

        #create a new .nc file
        f = netcdf_helpers.NetCDFFile(ncFilename, 'w')

        #create the dimensions
        netcdf_helpers.createNcDim(f,'numSeqs',len(seqLengths))
        netcdf_helpers.createNcDim(f,'numTimesteps',len(inputs))
        netcdf_helpers.createNcDim(f,'inputPattSize',len(inputs[0]))
        netcdf_helpers.createNcDim(f,'numDims',1)
        netcdf_helpers.createNcDim(f,'numLabels',len(self.labels))

        #create the variables
        netcdf_helpers.createNcStrings(f,'seqTags',self.seqTags,('numSeqs','maxSeqTagLength'),'sequence tags')
        netcdf_helpers.createNcStrings(f,'labels',self.labels,('numLabels','maxLabelLength'),'labels')
        netcdf_helpers.createNcStrings(f,'targetStrings',self.targetStrings,('numSeqs','maxTargStringLength'),'target strings')
        netcdf_helpers.createNcVar(f,'seqLengths',seqLengths,'i',('numSeqs',),'sequence lengths')
        netcdf_helpers.createNcVar(f,'seqDims',seqDims,'i',('numSeqs','numDims'),'sequence dimensions')
        netcdf_helpers.createNcVar(f,'inputs',inputs,'f',('numTimesteps','inputPattSize'),'input patterns')

        #write the data to disk
        print 'closing file', ncFilename
        f.close()
	def save_to_ncFile(ncFilename, labels, inputs, targetStrings, seqLengths, seqDims, seqTags=None):
		"""
		Builds the nc file from the given variables.
		"""
		#create a new .nc file
		file = netcdf_helpers.NetCDFFile(ncFilename, 'w')

		#create the dimensions
		netcdf_helpers.createNcDim(file, 'numSeqs', len(seqLengths))
		netcdf_helpers.createNcDim(file, 'numTimesteps', len(inputs))
		netcdf_helpers.createNcDim(file, 'inputPattSize', len(inputs[0]))
		netcdf_helpers.createNcDim(file, 'numDims', 1)
		netcdf_helpers.createNcDim(file, 'numLabels', len(labels))

		#create the variables
		if seqTags is not None:
			netcdf_helpers.createNcStrings(file, 'seqTags', seqTags, ('numSeqs', 'maxSeqTagLength'), 'sequence tags')
		netcdf_helpers.createNcStrings(file, 'labels', labels, ('numLabels', 'maxLabelLength'), 'labels')
		if targetStrings is not None:
			netcdf_helpers.createNcStrings(file, 'targetStrings', targetStrings, ('numSeqs', 'maxTargStringLength'),
			                               'target strings')
		netcdf_helpers.createNcVar(file, 'seqLengths', seqLengths, 'i', ('numSeqs',), 'sequence lengths')
		netcdf_helpers.createNcVar(file, 'seqDims', seqDims, 'i', ('numSeqs', 'numDims'), 'sequence dimensions')
		netcdf_helpers.createNcVar(file, 'inputs', inputs, 'f', ('numTimesteps', 'inputPattSize'), 'input patterns')

		#write the data to disk
		print "closing file", ncFilename
		file.close()
Exemple #3
0
#    print point
# for s in wordTargetStrings:
#    print s
ncFile = netcdf_helpers.NetCDFFile(ncFileName, 'w')

netcdf_helpers.createNcDim(ncFile, 'numSeqs', len(seqLengths))
netcdf_helpers.createNcDim(ncFile, 'numTimesteps', len(inputs))
netcdf_helpers.createNcDim(ncFile, 'inputPattSize', len(inputs[0]))
netcdf_helpers.createNcDim(ncFile, 'numDims', 1)
netcdf_helpers.createNcDim(ncFile, 'numLabels', len(labels))

netcdf_helpers.createNcStrings(ncFile, 'seqTags', seqTags,
                               ('numSeqs', 'maxSeqTagLength'), 'sequence tags')
netcdf_helpers.createNcStrings(ncFile, 'labels', labels,
                               ('numLabels', 'maxLabelLength'), 'labels')
netcdf_helpers.createNcStrings(ncFile, 'targetStrings', targetStrings,
                               ('numSeqs', 'maxTargetStringLength'),
                               'target strings')
netcdf_helpers.createNcStrings(ncFile, 'wordTargetStrings', wordTargetStrings,
                               ('numSeqs', 'maxWordTargetStringLength'),
                               'word target strings')
netcdf_helpers.createNcVar(ncFile, 'seqLengths', seqLengths, 'i',
                           ('numSeqs', ), 'seq lengths')
netcdf_helpers.createNcVar(ncFile, 'seqDims', seqDims, 'i',
                           ('numSeqs', 'numDims'), 'sequence dimensions')
netcdf_helpers.createNcVar(ncFile, 'inputs', inputs, 'f',
                           ('numTimesteps', 'inputPattSize'), 'input patterns')

print "closing file", ncFileName
ncFile.close()
#Later
#inputs = ((array(inputs)-inputMeans)/inputStds).tolist()

#print inputs
# print len(labels), labels
# print labels

#create a new .nc file
file = netcdf_helpers.NetCDFFile(ncFilename, 'w')

#create the dimensions
netcdf_helpers.createNcDim(file,'numSeqs',len(seqLengths))
netcdf_helpers.createNcDim(file,'numTimesteps',len(inputs))
netcdf_helpers.createNcDim(file,'inputPattSize',len(inputs[0]))
netcdf_helpers.createNcDim(file,'numDims',1)
netcdf_helpers.createNcDim(file,'numLabels',len(labels))

#create the variables
netcdf_helpers.createNcStrings(file,'seqTags',seqTags,('numSeqs','maxSeqTagLength'),'sequence tags')
netcdf_helpers.createNcStrings(file,'labels',labels,('numLabels','maxLabelLength'),'labels')
netcdf_helpers.createNcStrings(file,'targetStrings',targetStrings,('numSeqs','maxTargStringLength'),'target strings')
netcdf_helpers.createNcStrings(file,'wordTargetStrings',wordTargetStrings,('numSeqs','maxWordTargStringLength'),'word target strings')
netcdf_helpers.createNcVar(file,'seqLengths',seqLengths,'i',('numSeqs',),'sequence lengths')
netcdf_helpers.createNcVar(file,'seqDims',seqDims,'i',('numSeqs','numDims'),'sequence dimensions')
print inputs
netcdf_helpers.createNcVar(file,'inputs',inputs,'f',('numTimesteps','inputPattSize'),'input patterns')

#write the data to disk
print "closing file", ncFilename
file.close()
Exemple #5
0
	inputMeans=mean(outputArray[:options.maxArraySize],0)
if options.booleanColomn > 0:
        print "dont normalize boolean colomn", options.booleanColomn
        inputStds[options.booleanColomn] = 1
        inputMeans[options.booleanColomn] = 0
print inputStds
print inputMeans

for p in range(len(inputStds)):
	if (inputStds[p]>0):
		if options.bigFile:
			offset = 0
			step = options.maxArraySize
			while offset < len(outputArray):
				max = min (offset+step, len(outputArray))
				outputArray[offset:max,p] = (outputArray[offset:max,p] - inputMeans[p])/inputStds[p]
				offset += step
		else:
			outputArray[:,p]=(outputArray[:,p]-inputMeans[p])/inputStds[p]

outfile = netcdf_helpers.NetCDFFile(outputFilename, 'w')

for d in inputVar.dimensions:
	netcdf_helpers.createNcDim(outfile,d,infile.dimensions[d])

if options.stdMeanFilename == "":
	netcdf_helpers.createNcVar(outfile,options.outputArrayName+'Means',inputMeans,'f',(inputVar.dimensions[1],),'input means')
	netcdf_helpers.createNcVar(outfile,options.outputArrayName+'Stds',inputStds,'f',(inputVar.dimensions[1],),'input std deviations')
netcdf_helpers.createNcVar(outfile,options.outputArrayName,outputArray,'f',inputVar.dimensions,options.inputArrayName+' adjusted for mean 0 and std dev 1')
outfile.close()
Exemple #6
0
        inputs[firstIx] = array([0, 0, 0])
        firstIx += seqLengths[i]


#create a new .nc file
print ("open file %s", ncFilename)
file = netcdf_helpers.NetCDFFile(ncFilename, 'w')

#create the dimensions
netcdf_helpers.createNcDim(file,'numSeqs',len(seqLengths))
netcdf_helpers.createNcDim(file,'numTimesteps',len(inputs))
netcdf_helpers.createNcDim(file,'predNumTimesteps',len(predictions))
netcdf_helpers.createNcDim(file,'inputPattSize',len(inputs[0]))
netcdf_helpers.createNcDim(file,'numDims',1)


#create the variables
netcdf_helpers.createNcStrings(file,'seqTags',seqTags,('numSeqs','maxSeqTagLength'),'sequence tags')
netcdf_helpers.createNcStrings(file,'targetStrings',targetStrings,('numSeqs','maxTargStringLength'),'target strings')
netcdf_helpers.createNcStrings(file,'wordTargetStrings',wordTargetStrings,('numSeqs','maxWordTargStringLength'),'word target strings')
netcdf_helpers.createNcVar(file,'seqLengths',seqLengths,'i',('numSeqs',),'sequence lengths')
netcdf_helpers.createNcVar(file,'seqDims',seqDims,'i',('numSeqs','numDims'),'sequence dimensions')
netcdf_helpers.createNcVar(file,'inputs',inputs,'f',('numTimesteps','inputPattSize'),'input patterns')
netcdf_helpers.createNcVar(file,'predSeqLengths', predSeqLengths,'i',('numSeqs',),'pred sequence lengths')
netcdf_helpers.createNcVar(file,'targetSeqDims', targetSeqDims,'i',('numSeqs','numDims'),'pred sequence dimensions')
netcdf_helpers.createNcVar(file,'targetPatterns', predictions,'f',('predNumTimesteps','inputPattSize'),'prediction patterns')

#write the data to disk
print "closing file", ncFilename
file.close()
#        if x == p[-1]:
#            print >> outData, x
#        else:
#            print >> outData, x,

ncFile = netcdf_helpers.NetCDFFile(ncFileName, 'w')

netcdf_helpers.createNcDim(ncFile, 'numSeqs', len(seqLengths))
netcdf_helpers.createNcDim(ncFile, 'numTimesteps', len(inputs))
netcdf_helpers.createNcDim(ncFile, 'inputPattSize', len(inputs[0]))
netcdf_helpers.createNcDim(ncFile, 'targetPattSize', len(targetPatterns[0]))
netcdf_helpers.createNcDim(ncFile, 'targetClassSize', len(targetClasses[0]))
netcdf_helpers.createNcDim(ncFile, 'numDims', 1)
netcdf_helpers.createNcDim(ncFile, 'numLabels', len(labels))
netcdf_helpers.createNcDim(ncFile, 'numChars', len(chars))
netcdf_helpers.createNcDim(ncFile, 'classOutputSize', len(labels))

netcdf_helpers.createNcStrings(ncFile, 'seqTags', seqTags, ('numSeqs', 'maxSeqTagLength'), 'sequence tags') 
netcdf_helpers.createNcStrings(ncFile, 'labels', labels, ('numLabels', 'maxLabelLength'), 'labels')
netcdf_helpers.createNcStrings(ncFile, 'chars', chars, ('numChars', 'maxCharLength'), 'chars')
netcdf_helpers.createNcStrings(ncFile, 'targetStrings', targetStrings, ('numSeqs', 'maxTargetStringLength'), 'target strings')
netcdf_helpers.createNcStrings(ncFile, 'wordTargetStrings', wordTargetStrings, ('numSeqs', 'maxWordTargetStringLength'), 'word target strings')
netcdf_helpers.createNcVar(ncFile, 'seqLengths', seqLengths, 'i', ('numSeqs', ), 'seq lengths')
netcdf_helpers.createNcVar(ncFile, 'seqDims', seqDims, 'i', ('numSeqs', 'numDims'), 'sequence dimensions')
netcdf_helpers.createNcVar(ncFile, 'inputs', inputs, 'f', ('numTimesteps', 'inputPattSize'), 'input patterns')
netcdf_helpers.createNcVar(ncFile, 'targetPatterns', targetPatterns, 'f', ('numTimesteps', 'targetPattSize'), 'real_t target patterns')
netcdf_helpers.createNcVar(ncFile, 'targetClasses', targetClasses, 'i', ('numTimesteps', 'targetClassSize'), 'class target patterns')

print "closing file", ncFileName
ncFile.close()
Exemple #8
0
    for i in image.getdata():
        inputs[offset][0] = (float(i) - inputMean) / inputStd
        offset += 1

# create a new .nc file
file = netcdf_helpers.netCDF4.Dataset(outputFilename, "w")

# create the dimensions
netcdf_helpers.createNcDim(file, "numSeqs", len(seqLengths))
netcdf_helpers.createNcDim(file, "numTimesteps", len(inputs))
netcdf_helpers.createNcDim(file, "inputPattSize", len(inputs[0]))
netcdf_helpers.createNcDim(file, "numDims", 2)
netcdf_helpers.createNcDim(file, "numLabels", len(labels))

# create the variables
netcdf_helpers.createNcStrings(file, "seqTags", seqTags, ("numSeqs", "maxSeqTagLength"), "sequence tags")
netcdf_helpers.createNcStrings(file, "labels", labels, ("numLabels", "maxLabelLength"), "labels")
netcdf_helpers.createNcStrings(
    file, "wordTargetStrings", wordTargetStrings, ("numSeqs", "maxWordTargStringLength"), "target strings"
)
netcdf_helpers.createNcStrings(
    file, "targetStrings", targetStrings, ("numSeqs", "maxTargStringLength"), "target strings"
)
netcdf_helpers.createNcVar(file, "seqLengths", seqLengths, "i", ("numSeqs",), "sequence lengths")
netcdf_helpers.createNcVar(file, "seqDims", seqDims, "i", ("numSeqs", "numDims"), "sequence dimensions")
netcdf_helpers.createNcVar(file, "inputs", inputs, "f", ("numTimesteps", "inputPattSize"), "input patterns")

# write the data to disk
print "writing data to", outputFilename
file.close()
Exemple #9
0
	print "calculating std deviations"
	inputStds=Std(outputArray[:options.maxArraySize],0)
	print "calculating means"
	inputMeans=mean(outputArray[:options.maxArraySize],0)

print inputStds
print inputMeans

for p in range(len(inputStds)):
	if (inputStds[p]>0):
		if options.bigFile:
			offset = 0
			step = options.maxArraySize
			while offset < len(outputArray):
				max = min (offset+step, len(outputArray))
				outputArray[offset:max,p] = (outputArray[offset:max,p] - inputMeans[p])/inputStds[p]
				offset += step
		else:
			outputArray[:,p]=(outputArray[:,p]-inputMeans[p])/inputStds[p]

outfile = netcdf_helpers.NetCDFFile(outputFilename, 'w')

for d in inputVar.dimensions:
	netcdf_helpers.createNcDim(outfile,d,infile.dimensions[d])

if options.stdMeanFilename == "":
	netcdf_helpers.createNcVar(outfile,options.outputArrayName+'Means',inputMeans,'f',(inputVar.dimensions[1],),'input means')
	netcdf_helpers.createNcVar(outfile,options.outputArrayName+'Stds',inputStds,'f',(inputVar.dimensions[1],),'input std deviations')
netcdf_helpers.createNcVar(outfile,options.outputArrayName,outputArray,'f',inputVar.dimensions,options.inputArrayName+' adjusted for mean 0 and std dev 1')
outfile.close()
Exemple #10
0
#        offset += 1


#create a new .nc file
file = netcdf_helpers.NetCDFFile(outputFilename, "w")

#create the dimensions
netcdf_helpers.createNcDim(file, "numSeqs", len(seqLengths))
netcdf_helpers.createNcDim(file,"numTimesteps", len(inputs))
netcdf_helpers.createNcDim(file, "inputPattSize", len(inputs[0]))
netcdf_helpers.createNcDim(file,"numDims", 2)
netcdf_helpers.createNcDim(file,"numLabels", len(labels))


#create the variables
netcdf_helpers.createNcStrings(file, "seqTags", seqTags, ("numSeqs", "maxSeqTagLength"), "sequence tags")
netcdf_helpers.createNcStrings(file, "labels", labels, ("numLabels", "maxLabelLength"), "labels")
netcdf_helpers.createNcStrings(file, "targetStrings", targetStrings, ("numSeqs", "maxTargStringLength"), "target strings")
netcdf_helpers.createNcVar(file, "seqLengths", seqLengths, "i", ("numSeqs",), "sequence lengths")
netcdf_helpers.createNcVar(file, "seqDims", seqDims, "i", ("numSeqs","numDims"), "sequence dimensions")
netcdf_helpers.createNcVar(file, "inputs", inputs, "f", ("numTimesteps","inputPattSize"), "input patterns")


#write the data to disk
print "writing data to", outputFilename
json_data.close()
file.close()