예제 #1
0
    def save(self, ncFilename):
        seqLengths = np.array([seq.shape[0] for seq in self.sequences], dtype='int32')
        seqDims = seqLengths[:, None]
        inputs = np.vstack(self.sequences).astype('float32')

        print '---------------------------------------'
        if config.RNN_NORM:
            default_mean_std = path.join(path.dirname(ncFilename), 'mean_std.pickle')
            inputs = self.normalize(inputs, default_mean_std)

        #create a new .nc file
        f = netcdf_helpers.NetCDFFile(ncFilename, 'w')

        #create the dimensions
        netcdf_helpers.createNcDim(f,'numSeqs',len(seqLengths))
        netcdf_helpers.createNcDim(f,'numTimesteps',len(inputs))
        netcdf_helpers.createNcDim(f,'inputPattSize',len(inputs[0]))
        netcdf_helpers.createNcDim(f,'numDims',1)
        netcdf_helpers.createNcDim(f,'numLabels',len(self.labels))

        #create the variables
        netcdf_helpers.createNcStrings(f,'seqTags',self.seqTags,('numSeqs','maxSeqTagLength'),'sequence tags')
        netcdf_helpers.createNcStrings(f,'labels',self.labels,('numLabels','maxLabelLength'),'labels')
        netcdf_helpers.createNcStrings(f,'targetStrings',self.targetStrings,('numSeqs','maxTargStringLength'),'target strings')
        netcdf_helpers.createNcVar(f,'seqLengths',seqLengths,'i',('numSeqs',),'sequence lengths')
        netcdf_helpers.createNcVar(f,'seqDims',seqDims,'i',('numSeqs','numDims'),'sequence dimensions')
        netcdf_helpers.createNcVar(f,'inputs',inputs,'f',('numTimesteps','inputPattSize'),'input patterns')

        #write the data to disk
        print 'closing file', ncFilename
        f.close()
예제 #2
0
	def save_to_ncFile(ncFilename, labels, inputs, targetStrings, seqLengths, seqDims, seqTags=None):
		"""
		Builds the nc file from the given variables.
		"""
		#create a new .nc file
		file = netcdf_helpers.NetCDFFile(ncFilename, 'w')

		#create the dimensions
		netcdf_helpers.createNcDim(file, 'numSeqs', len(seqLengths))
		netcdf_helpers.createNcDim(file, 'numTimesteps', len(inputs))
		netcdf_helpers.createNcDim(file, 'inputPattSize', len(inputs[0]))
		netcdf_helpers.createNcDim(file, 'numDims', 1)
		netcdf_helpers.createNcDim(file, 'numLabels', len(labels))

		#create the variables
		if seqTags is not None:
			netcdf_helpers.createNcStrings(file, 'seqTags', seqTags, ('numSeqs', 'maxSeqTagLength'), 'sequence tags')
		netcdf_helpers.createNcStrings(file, 'labels', labels, ('numLabels', 'maxLabelLength'), 'labels')
		if targetStrings is not None:
			netcdf_helpers.createNcStrings(file, 'targetStrings', targetStrings, ('numSeqs', 'maxTargStringLength'),
			                               'target strings')
		netcdf_helpers.createNcVar(file, 'seqLengths', seqLengths, 'i', ('numSeqs',), 'sequence lengths')
		netcdf_helpers.createNcVar(file, 'seqDims', seqDims, 'i', ('numSeqs', 'numDims'), 'sequence dimensions')
		netcdf_helpers.createNcVar(file, 'inputs', inputs, 'f', ('numTimesteps', 'inputPattSize'), 'input patterns')

		#write the data to disk
		print "closing file", ncFilename
		file.close()
예제 #3
0
# print inputs

labels = list(charSet)
print len(labels), labels
# i = 0
# for s in targetStrings:
#    i += 1
#    print i
#    print s
# for point in inputs:
#    print point
# for s in wordTargetStrings:
#    print s
ncFile = netcdf_helpers.NetCDFFile(ncFileName, 'w')

netcdf_helpers.createNcDim(ncFile, 'numSeqs', len(seqLengths))
netcdf_helpers.createNcDim(ncFile, 'numTimesteps', len(inputs))
netcdf_helpers.createNcDim(ncFile, 'inputPattSize', len(inputs[0]))
netcdf_helpers.createNcDim(ncFile, 'numDims', 1)
netcdf_helpers.createNcDim(ncFile, 'numLabels', len(labels))

netcdf_helpers.createNcStrings(ncFile, 'seqTags', seqTags,
                               ('numSeqs', 'maxSeqTagLength'), 'sequence tags')
netcdf_helpers.createNcStrings(ncFile, 'labels', labels,
                               ('numLabels', 'maxLabelLength'), 'labels')
netcdf_helpers.createNcStrings(ncFile, 'targetStrings', targetStrings,
                               ('numSeqs', 'maxTargetStringLength'),
                               'target strings')
netcdf_helpers.createNcStrings(ncFile, 'wordTargetStrings', wordTargetStrings,
                               ('numSeqs', 'maxWordTargetStringLength'),
                               'word target strings')
			##and this is the point it shud stop inside the folder

			##here the loop for the respective folder shud stop

#Later
#inputs = ((array(inputs)-inputMeans)/inputStds).tolist()

#print inputs
# print len(labels), labels
# print labels

#create a new .nc file
file = netcdf_helpers.NetCDFFile(ncFilename, 'w')

#create the dimensions
netcdf_helpers.createNcDim(file,'numSeqs',len(seqLengths))
netcdf_helpers.createNcDim(file,'numTimesteps',len(inputs))
netcdf_helpers.createNcDim(file,'inputPattSize',len(inputs[0]))
netcdf_helpers.createNcDim(file,'numDims',1)
netcdf_helpers.createNcDim(file,'numLabels',len(labels))

#create the variables
netcdf_helpers.createNcStrings(file,'seqTags',seqTags,('numSeqs','maxSeqTagLength'),'sequence tags')
netcdf_helpers.createNcStrings(file,'labels',labels,('numLabels','maxLabelLength'),'labels')
netcdf_helpers.createNcStrings(file,'targetStrings',targetStrings,('numSeqs','maxTargStringLength'),'target strings')
netcdf_helpers.createNcStrings(file,'wordTargetStrings',wordTargetStrings,('numSeqs','maxWordTargStringLength'),'word target strings')
netcdf_helpers.createNcVar(file,'seqLengths',seqLengths,'i',('numSeqs',),'sequence lengths')
netcdf_helpers.createNcVar(file,'seqDims',seqDims,'i',('numSeqs','numDims'),'sequence dimensions')
print inputs
netcdf_helpers.createNcVar(file,'inputs',inputs,'f',('numTimesteps','inputPattSize'),'input patterns')
예제 #5
0
	inputMeans=mean(outputArray[:options.maxArraySize],0)
if options.booleanColomn > 0:
        print "dont normalize boolean colomn", options.booleanColomn
        inputStds[options.booleanColomn] = 1
        inputMeans[options.booleanColomn] = 0
print inputStds
print inputMeans

for p in range(len(inputStds)):
	if (inputStds[p]>0):
		if options.bigFile:
			offset = 0
			step = options.maxArraySize
			while offset < len(outputArray):
				max = min (offset+step, len(outputArray))
				outputArray[offset:max,p] = (outputArray[offset:max,p] - inputMeans[p])/inputStds[p]
				offset += step
		else:
			outputArray[:,p]=(outputArray[:,p]-inputMeans[p])/inputStds[p]

outfile = netcdf_helpers.NetCDFFile(outputFilename, 'w')

for d in inputVar.dimensions:
	netcdf_helpers.createNcDim(outfile,d,infile.dimensions[d])

if options.stdMeanFilename == "":
	netcdf_helpers.createNcVar(outfile,options.outputArrayName+'Means',inputMeans,'f',(inputVar.dimensions[1],),'input means')
	netcdf_helpers.createNcVar(outfile,options.outputArrayName+'Stds',inputStds,'f',(inputVar.dimensions[1],),'input std deviations')
netcdf_helpers.createNcVar(outfile,options.outputArrayName,outputArray,'f',inputVar.dimensions,options.inputArrayName+' adjusted for mean 0 and std dev 1')
outfile.close()
예제 #6
0
                if k > 0:
                        inputs[firstIx + k] = array(inputs[firstIx + k]) - array(inputs[firstIx + k - 1])
                        inputs[firstIx + k][-1] = abs(inputs[firstIx + k][-1])
                        predictions[firstIx + k - 1 ] = inputs[firstIx + k]
                if k == 0:
                        predictions[firstIx] = inputs[firstIx+1]
        inputs[firstIx] = array([0, 0, 0])
        firstIx += seqLengths[i]


#create a new .nc file
print ("open file %s", ncFilename)
file = netcdf_helpers.NetCDFFile(ncFilename, 'w')

#create the dimensions
netcdf_helpers.createNcDim(file,'numSeqs',len(seqLengths))
netcdf_helpers.createNcDim(file,'numTimesteps',len(inputs))
netcdf_helpers.createNcDim(file,'predNumTimesteps',len(predictions))
netcdf_helpers.createNcDim(file,'inputPattSize',len(inputs[0]))
netcdf_helpers.createNcDim(file,'numDims',1)


#create the variables
netcdf_helpers.createNcStrings(file,'seqTags',seqTags,('numSeqs','maxSeqTagLength'),'sequence tags')
netcdf_helpers.createNcStrings(file,'targetStrings',targetStrings,('numSeqs','maxTargStringLength'),'target strings')
netcdf_helpers.createNcStrings(file,'wordTargetStrings',wordTargetStrings,('numSeqs','maxWordTargStringLength'),'word target strings')
netcdf_helpers.createNcVar(file,'seqLengths',seqLengths,'i',('numSeqs',),'sequence lengths')
netcdf_helpers.createNcVar(file,'seqDims',seqDims,'i',('numSeqs','numDims'),'sequence dimensions')
netcdf_helpers.createNcVar(file,'inputs',inputs,'f',('numTimesteps','inputPattSize'),'input patterns')
netcdf_helpers.createNcVar(file,'predSeqLengths', predSeqLengths,'i',('numSeqs',),'pred sequence lengths')
netcdf_helpers.createNcVar(file,'targetSeqDims', targetSeqDims,'i',('numSeqs','numDims'),'pred sequence dimensions')
#print inputStds
#print inputs[:3]
#print targetPatterns[:3]
#print targetClasses[:3]
#print len(targetPatterns)
#outData = open("points_test.txt", "w")
#for p in inputs:
#    for x in p:
#        if x == p[-1]:
#            print >> outData, x
#        else:
#            print >> outData, x,

ncFile = netcdf_helpers.NetCDFFile(ncFileName, 'w')

netcdf_helpers.createNcDim(ncFile, 'numSeqs', len(seqLengths))
netcdf_helpers.createNcDim(ncFile, 'numTimesteps', len(inputs))
netcdf_helpers.createNcDim(ncFile, 'inputPattSize', len(inputs[0]))
netcdf_helpers.createNcDim(ncFile, 'targetPattSize', len(targetPatterns[0]))
netcdf_helpers.createNcDim(ncFile, 'targetClassSize', len(targetClasses[0]))
netcdf_helpers.createNcDim(ncFile, 'numDims', 1)
netcdf_helpers.createNcDim(ncFile, 'numLabels', len(labels))
netcdf_helpers.createNcDim(ncFile, 'numChars', len(chars))
netcdf_helpers.createNcDim(ncFile, 'classOutputSize', len(labels))

netcdf_helpers.createNcStrings(ncFile, 'seqTags', seqTags, ('numSeqs', 'maxSeqTagLength'), 'sequence tags') 
netcdf_helpers.createNcStrings(ncFile, 'labels', labels, ('numLabels', 'maxLabelLength'), 'labels')
netcdf_helpers.createNcStrings(ncFile, 'chars', chars, ('numChars', 'maxCharLength'), 'chars')
netcdf_helpers.createNcStrings(ncFile, 'targetStrings', targetStrings, ('numSeqs', 'maxTargetStringLength'), 'target strings')
netcdf_helpers.createNcStrings(ncFile, 'wordTargetStrings', wordTargetStrings, ('numSeqs', 'maxWordTargetStringLength'), 'word target strings')
netcdf_helpers.createNcVar(ncFile, 'seqLengths', seqLengths, 'i', ('numSeqs', ), 'seq lengths')
예제 #8
0
#print inputStds
#print inputs[:3]
#print targetPatterns[:3]
#print targetClasses[:3]
#print len(targetPatterns)
#outData = open("points_test.txt", "w")
#for p in inputs:
#    for x in p:
#        if x == p[-1]:
#            print >> outData, x
#        else:
#            print >> outData, x,

ncFile = netcdf_helpers.NetCDFFile(ncFileName, 'w')

netcdf_helpers.createNcDim(ncFile, 'numSeqs', len(seqLengths))
netcdf_helpers.createNcDim(ncFile, 'numTimesteps', len(inputs))
netcdf_helpers.createNcDim(ncFile, 'inputPattSize', len(inputs[0]))
netcdf_helpers.createNcDim(ncFile, 'targetPattSize', len(targetPatterns[0]))
netcdf_helpers.createNcDim(ncFile, 'targetClassSize', len(targetClasses[0]))
netcdf_helpers.createNcDim(ncFile, 'numDims', 1)
netcdf_helpers.createNcDim(ncFile, 'numLabels', len(labels))
netcdf_helpers.createNcDim(ncFile, 'numChars', len(chars))
netcdf_helpers.createNcDim(ncFile, 'classOutputSize', len(labels))

netcdf_helpers.createNcStrings(ncFile, 'seqTags', seqTags,
                               ('numSeqs', 'maxSeqTagLength'), 'sequence tags')
netcdf_helpers.createNcStrings(ncFile, 'labels', labels,
                               ('numLabels', 'maxLabelLength'), 'labels')
netcdf_helpers.createNcStrings(ncFile, 'chars', chars,
                               ('numChars', 'maxCharLength'), 'chars')
예제 #9
0
totalLen = sum(seqLengths)
print "totalLen", totalLen
inputs = zeros((totalLen, 1), "f")
offset = 0
for filename in seqTags:
    print "reading image file", filename
    image = Image.open(filename).transpose(Image.FLIP_TOP_BOTTOM).transpose(Image.ROTATE_270)
    for i in image.getdata():
        inputs[offset][0] = (float(i) - inputMean) / inputStd
        offset += 1

# create a new .nc file
file = netcdf_helpers.netCDF4.Dataset(outputFilename, "w")

# create the dimensions
netcdf_helpers.createNcDim(file, "numSeqs", len(seqLengths))
netcdf_helpers.createNcDim(file, "numTimesteps", len(inputs))
netcdf_helpers.createNcDim(file, "inputPattSize", len(inputs[0]))
netcdf_helpers.createNcDim(file, "numDims", 2)
netcdf_helpers.createNcDim(file, "numLabels", len(labels))

# create the variables
netcdf_helpers.createNcStrings(file, "seqTags", seqTags, ("numSeqs", "maxSeqTagLength"), "sequence tags")
netcdf_helpers.createNcStrings(file, "labels", labels, ("numLabels", "maxLabelLength"), "labels")
netcdf_helpers.createNcStrings(
    file, "wordTargetStrings", wordTargetStrings, ("numSeqs", "maxWordTargStringLength"), "target strings"
)
netcdf_helpers.createNcStrings(
    file, "targetStrings", targetStrings, ("numSeqs", "maxTargStringLength"), "target strings"
)
netcdf_helpers.createNcVar(file, "seqLengths", seqLengths, "i", ("numSeqs",), "sequence lengths")
예제 #10
0
	print "calculating std deviations"
	inputStds=Std(outputArray[:options.maxArraySize],0)
	print "calculating means"
	inputMeans=mean(outputArray[:options.maxArraySize],0)

print inputStds
print inputMeans

for p in range(len(inputStds)):
	if (inputStds[p]>0):
		if options.bigFile:
			offset = 0
			step = options.maxArraySize
			while offset < len(outputArray):
				max = min (offset+step, len(outputArray))
				outputArray[offset:max,p] = (outputArray[offset:max,p] - inputMeans[p])/inputStds[p]
				offset += step
		else:
			outputArray[:,p]=(outputArray[:,p]-inputMeans[p])/inputStds[p]

outfile = netcdf_helpers.NetCDFFile(outputFilename, 'w')

for d in inputVar.dimensions:
	netcdf_helpers.createNcDim(outfile,d,infile.dimensions[d])

if options.stdMeanFilename == "":
	netcdf_helpers.createNcVar(outfile,options.outputArrayName+'Means',inputMeans,'f',(inputVar.dimensions[1],),'input means')
	netcdf_helpers.createNcVar(outfile,options.outputArrayName+'Stds',inputStds,'f',(inputVar.dimensions[1],),'input std deviations')
netcdf_helpers.createNcVar(outfile,options.outputArrayName,outputArray,'f',inputVar.dimensions,options.inputArrayName+' adjusted for mean 0 and std dev 1')
outfile.close()
예제 #11
0
        for i in frame:
            inputs[offset][0]= (float(i)-inputMean)/inputStd
            offset += 1


##inputs array frameweise fuellen
#    for frame in transform:
#        inputs[offset]= frame
#        offset += 1


#create a new .nc file
file = netcdf_helpers.NetCDFFile(outputFilename, "w")

#create the dimensions
netcdf_helpers.createNcDim(file, "numSeqs", len(seqLengths))
netcdf_helpers.createNcDim(file,"numTimesteps", len(inputs))
netcdf_helpers.createNcDim(file, "inputPattSize", len(inputs[0]))
netcdf_helpers.createNcDim(file,"numDims", 2)
netcdf_helpers.createNcDim(file,"numLabels", len(labels))


#create the variables
netcdf_helpers.createNcStrings(file, "seqTags", seqTags, ("numSeqs", "maxSeqTagLength"), "sequence tags")
netcdf_helpers.createNcStrings(file, "labels", labels, ("numLabels", "maxLabelLength"), "labels")
netcdf_helpers.createNcStrings(file, "targetStrings", targetStrings, ("numSeqs", "maxTargStringLength"), "target strings")
netcdf_helpers.createNcVar(file, "seqLengths", seqLengths, "i", ("numSeqs",), "sequence lengths")
netcdf_helpers.createNcVar(file, "seqDims", seqDims, "i", ("numSeqs","numDims"), "sequence dimensions")
netcdf_helpers.createNcVar(file, "inputs", inputs, "f", ("numTimesteps","inputPattSize"), "input patterns")