def save(self, ncFilename): seqLengths = np.array([seq.shape[0] for seq in self.sequences], dtype='int32') seqDims = seqLengths[:, None] inputs = np.vstack(self.sequences).astype('float32') print '---------------------------------------' if config.RNN_NORM: default_mean_std = path.join(path.dirname(ncFilename), 'mean_std.pickle') inputs = self.normalize(inputs, default_mean_std) #create a new .nc file f = netcdf_helpers.NetCDFFile(ncFilename, 'w') #create the dimensions netcdf_helpers.createNcDim(f,'numSeqs',len(seqLengths)) netcdf_helpers.createNcDim(f,'numTimesteps',len(inputs)) netcdf_helpers.createNcDim(f,'inputPattSize',len(inputs[0])) netcdf_helpers.createNcDim(f,'numDims',1) netcdf_helpers.createNcDim(f,'numLabels',len(self.labels)) #create the variables netcdf_helpers.createNcStrings(f,'seqTags',self.seqTags,('numSeqs','maxSeqTagLength'),'sequence tags') netcdf_helpers.createNcStrings(f,'labels',self.labels,('numLabels','maxLabelLength'),'labels') netcdf_helpers.createNcStrings(f,'targetStrings',self.targetStrings,('numSeqs','maxTargStringLength'),'target strings') netcdf_helpers.createNcVar(f,'seqLengths',seqLengths,'i',('numSeqs',),'sequence lengths') netcdf_helpers.createNcVar(f,'seqDims',seqDims,'i',('numSeqs','numDims'),'sequence dimensions') netcdf_helpers.createNcVar(f,'inputs',inputs,'f',('numTimesteps','inputPattSize'),'input patterns') #write the data to disk print 'closing file', ncFilename f.close()
def save_to_ncFile(ncFilename, labels, inputs, targetStrings, seqLengths, seqDims, seqTags=None): """ Builds the nc file from the given variables. """ #create a new .nc file file = netcdf_helpers.NetCDFFile(ncFilename, 'w') #create the dimensions netcdf_helpers.createNcDim(file, 'numSeqs', len(seqLengths)) netcdf_helpers.createNcDim(file, 'numTimesteps', len(inputs)) netcdf_helpers.createNcDim(file, 'inputPattSize', len(inputs[0])) netcdf_helpers.createNcDim(file, 'numDims', 1) netcdf_helpers.createNcDim(file, 'numLabels', len(labels)) #create the variables if seqTags is not None: netcdf_helpers.createNcStrings(file, 'seqTags', seqTags, ('numSeqs', 'maxSeqTagLength'), 'sequence tags') netcdf_helpers.createNcStrings(file, 'labels', labels, ('numLabels', 'maxLabelLength'), 'labels') if targetStrings is not None: netcdf_helpers.createNcStrings(file, 'targetStrings', targetStrings, ('numSeqs', 'maxTargStringLength'), 'target strings') netcdf_helpers.createNcVar(file, 'seqLengths', seqLengths, 'i', ('numSeqs',), 'sequence lengths') netcdf_helpers.createNcVar(file, 'seqDims', seqDims, 'i', ('numSeqs', 'numDims'), 'sequence dimensions') netcdf_helpers.createNcVar(file, 'inputs', inputs, 'f', ('numTimesteps', 'inputPattSize'), 'input patterns') #write the data to disk print "closing file", ncFilename file.close()
# print inputs labels = list(charSet) print len(labels), labels # i = 0 # for s in targetStrings: # i += 1 # print i # print s # for point in inputs: # print point # for s in wordTargetStrings: # print s ncFile = netcdf_helpers.NetCDFFile(ncFileName, 'w') netcdf_helpers.createNcDim(ncFile, 'numSeqs', len(seqLengths)) netcdf_helpers.createNcDim(ncFile, 'numTimesteps', len(inputs)) netcdf_helpers.createNcDim(ncFile, 'inputPattSize', len(inputs[0])) netcdf_helpers.createNcDim(ncFile, 'numDims', 1) netcdf_helpers.createNcDim(ncFile, 'numLabels', len(labels)) netcdf_helpers.createNcStrings(ncFile, 'seqTags', seqTags, ('numSeqs', 'maxSeqTagLength'), 'sequence tags') netcdf_helpers.createNcStrings(ncFile, 'labels', labels, ('numLabels', 'maxLabelLength'), 'labels') netcdf_helpers.createNcStrings(ncFile, 'targetStrings', targetStrings, ('numSeqs', 'maxTargetStringLength'), 'target strings') netcdf_helpers.createNcStrings(ncFile, 'wordTargetStrings', wordTargetStrings, ('numSeqs', 'maxWordTargetStringLength'), 'word target strings')
##and this is the point it shud stop inside the folder ##here the loop for the respective folder shud stop #Later #inputs = ((array(inputs)-inputMeans)/inputStds).tolist() #print inputs # print len(labels), labels # print labels #create a new .nc file file = netcdf_helpers.NetCDFFile(ncFilename, 'w') #create the dimensions netcdf_helpers.createNcDim(file,'numSeqs',len(seqLengths)) netcdf_helpers.createNcDim(file,'numTimesteps',len(inputs)) netcdf_helpers.createNcDim(file,'inputPattSize',len(inputs[0])) netcdf_helpers.createNcDim(file,'numDims',1) netcdf_helpers.createNcDim(file,'numLabels',len(labels)) #create the variables netcdf_helpers.createNcStrings(file,'seqTags',seqTags,('numSeqs','maxSeqTagLength'),'sequence tags') netcdf_helpers.createNcStrings(file,'labels',labels,('numLabels','maxLabelLength'),'labels') netcdf_helpers.createNcStrings(file,'targetStrings',targetStrings,('numSeqs','maxTargStringLength'),'target strings') netcdf_helpers.createNcStrings(file,'wordTargetStrings',wordTargetStrings,('numSeqs','maxWordTargStringLength'),'word target strings') netcdf_helpers.createNcVar(file,'seqLengths',seqLengths,'i',('numSeqs',),'sequence lengths') netcdf_helpers.createNcVar(file,'seqDims',seqDims,'i',('numSeqs','numDims'),'sequence dimensions') print inputs netcdf_helpers.createNcVar(file,'inputs',inputs,'f',('numTimesteps','inputPattSize'),'input patterns')
inputMeans=mean(outputArray[:options.maxArraySize],0) if options.booleanColomn > 0: print "dont normalize boolean colomn", options.booleanColomn inputStds[options.booleanColomn] = 1 inputMeans[options.booleanColomn] = 0 print inputStds print inputMeans for p in range(len(inputStds)): if (inputStds[p]>0): if options.bigFile: offset = 0 step = options.maxArraySize while offset < len(outputArray): max = min (offset+step, len(outputArray)) outputArray[offset:max,p] = (outputArray[offset:max,p] - inputMeans[p])/inputStds[p] offset += step else: outputArray[:,p]=(outputArray[:,p]-inputMeans[p])/inputStds[p] outfile = netcdf_helpers.NetCDFFile(outputFilename, 'w') for d in inputVar.dimensions: netcdf_helpers.createNcDim(outfile,d,infile.dimensions[d]) if options.stdMeanFilename == "": netcdf_helpers.createNcVar(outfile,options.outputArrayName+'Means',inputMeans,'f',(inputVar.dimensions[1],),'input means') netcdf_helpers.createNcVar(outfile,options.outputArrayName+'Stds',inputStds,'f',(inputVar.dimensions[1],),'input std deviations') netcdf_helpers.createNcVar(outfile,options.outputArrayName,outputArray,'f',inputVar.dimensions,options.inputArrayName+' adjusted for mean 0 and std dev 1') outfile.close()
if k > 0: inputs[firstIx + k] = array(inputs[firstIx + k]) - array(inputs[firstIx + k - 1]) inputs[firstIx + k][-1] = abs(inputs[firstIx + k][-1]) predictions[firstIx + k - 1 ] = inputs[firstIx + k] if k == 0: predictions[firstIx] = inputs[firstIx+1] inputs[firstIx] = array([0, 0, 0]) firstIx += seqLengths[i] #create a new .nc file print ("open file %s", ncFilename) file = netcdf_helpers.NetCDFFile(ncFilename, 'w') #create the dimensions netcdf_helpers.createNcDim(file,'numSeqs',len(seqLengths)) netcdf_helpers.createNcDim(file,'numTimesteps',len(inputs)) netcdf_helpers.createNcDim(file,'predNumTimesteps',len(predictions)) netcdf_helpers.createNcDim(file,'inputPattSize',len(inputs[0])) netcdf_helpers.createNcDim(file,'numDims',1) #create the variables netcdf_helpers.createNcStrings(file,'seqTags',seqTags,('numSeqs','maxSeqTagLength'),'sequence tags') netcdf_helpers.createNcStrings(file,'targetStrings',targetStrings,('numSeqs','maxTargStringLength'),'target strings') netcdf_helpers.createNcStrings(file,'wordTargetStrings',wordTargetStrings,('numSeqs','maxWordTargStringLength'),'word target strings') netcdf_helpers.createNcVar(file,'seqLengths',seqLengths,'i',('numSeqs',),'sequence lengths') netcdf_helpers.createNcVar(file,'seqDims',seqDims,'i',('numSeqs','numDims'),'sequence dimensions') netcdf_helpers.createNcVar(file,'inputs',inputs,'f',('numTimesteps','inputPattSize'),'input patterns') netcdf_helpers.createNcVar(file,'predSeqLengths', predSeqLengths,'i',('numSeqs',),'pred sequence lengths') netcdf_helpers.createNcVar(file,'targetSeqDims', targetSeqDims,'i',('numSeqs','numDims'),'pred sequence dimensions')
#print inputStds #print inputs[:3] #print targetPatterns[:3] #print targetClasses[:3] #print len(targetPatterns) #outData = open("points_test.txt", "w") #for p in inputs: # for x in p: # if x == p[-1]: # print >> outData, x # else: # print >> outData, x, ncFile = netcdf_helpers.NetCDFFile(ncFileName, 'w') netcdf_helpers.createNcDim(ncFile, 'numSeqs', len(seqLengths)) netcdf_helpers.createNcDim(ncFile, 'numTimesteps', len(inputs)) netcdf_helpers.createNcDim(ncFile, 'inputPattSize', len(inputs[0])) netcdf_helpers.createNcDim(ncFile, 'targetPattSize', len(targetPatterns[0])) netcdf_helpers.createNcDim(ncFile, 'targetClassSize', len(targetClasses[0])) netcdf_helpers.createNcDim(ncFile, 'numDims', 1) netcdf_helpers.createNcDim(ncFile, 'numLabels', len(labels)) netcdf_helpers.createNcDim(ncFile, 'numChars', len(chars)) netcdf_helpers.createNcDim(ncFile, 'classOutputSize', len(labels)) netcdf_helpers.createNcStrings(ncFile, 'seqTags', seqTags, ('numSeqs', 'maxSeqTagLength'), 'sequence tags') netcdf_helpers.createNcStrings(ncFile, 'labels', labels, ('numLabels', 'maxLabelLength'), 'labels') netcdf_helpers.createNcStrings(ncFile, 'chars', chars, ('numChars', 'maxCharLength'), 'chars') netcdf_helpers.createNcStrings(ncFile, 'targetStrings', targetStrings, ('numSeqs', 'maxTargetStringLength'), 'target strings') netcdf_helpers.createNcStrings(ncFile, 'wordTargetStrings', wordTargetStrings, ('numSeqs', 'maxWordTargetStringLength'), 'word target strings') netcdf_helpers.createNcVar(ncFile, 'seqLengths', seqLengths, 'i', ('numSeqs', ), 'seq lengths')
#print inputStds #print inputs[:3] #print targetPatterns[:3] #print targetClasses[:3] #print len(targetPatterns) #outData = open("points_test.txt", "w") #for p in inputs: # for x in p: # if x == p[-1]: # print >> outData, x # else: # print >> outData, x, ncFile = netcdf_helpers.NetCDFFile(ncFileName, 'w') netcdf_helpers.createNcDim(ncFile, 'numSeqs', len(seqLengths)) netcdf_helpers.createNcDim(ncFile, 'numTimesteps', len(inputs)) netcdf_helpers.createNcDim(ncFile, 'inputPattSize', len(inputs[0])) netcdf_helpers.createNcDim(ncFile, 'targetPattSize', len(targetPatterns[0])) netcdf_helpers.createNcDim(ncFile, 'targetClassSize', len(targetClasses[0])) netcdf_helpers.createNcDim(ncFile, 'numDims', 1) netcdf_helpers.createNcDim(ncFile, 'numLabels', len(labels)) netcdf_helpers.createNcDim(ncFile, 'numChars', len(chars)) netcdf_helpers.createNcDim(ncFile, 'classOutputSize', len(labels)) netcdf_helpers.createNcStrings(ncFile, 'seqTags', seqTags, ('numSeqs', 'maxSeqTagLength'), 'sequence tags') netcdf_helpers.createNcStrings(ncFile, 'labels', labels, ('numLabels', 'maxLabelLength'), 'labels') netcdf_helpers.createNcStrings(ncFile, 'chars', chars, ('numChars', 'maxCharLength'), 'chars')
totalLen = sum(seqLengths) print "totalLen", totalLen inputs = zeros((totalLen, 1), "f") offset = 0 for filename in seqTags: print "reading image file", filename image = Image.open(filename).transpose(Image.FLIP_TOP_BOTTOM).transpose(Image.ROTATE_270) for i in image.getdata(): inputs[offset][0] = (float(i) - inputMean) / inputStd offset += 1 # create a new .nc file file = netcdf_helpers.netCDF4.Dataset(outputFilename, "w") # create the dimensions netcdf_helpers.createNcDim(file, "numSeqs", len(seqLengths)) netcdf_helpers.createNcDim(file, "numTimesteps", len(inputs)) netcdf_helpers.createNcDim(file, "inputPattSize", len(inputs[0])) netcdf_helpers.createNcDim(file, "numDims", 2) netcdf_helpers.createNcDim(file, "numLabels", len(labels)) # create the variables netcdf_helpers.createNcStrings(file, "seqTags", seqTags, ("numSeqs", "maxSeqTagLength"), "sequence tags") netcdf_helpers.createNcStrings(file, "labels", labels, ("numLabels", "maxLabelLength"), "labels") netcdf_helpers.createNcStrings( file, "wordTargetStrings", wordTargetStrings, ("numSeqs", "maxWordTargStringLength"), "target strings" ) netcdf_helpers.createNcStrings( file, "targetStrings", targetStrings, ("numSeqs", "maxTargStringLength"), "target strings" ) netcdf_helpers.createNcVar(file, "seqLengths", seqLengths, "i", ("numSeqs",), "sequence lengths")
print "calculating std deviations" inputStds=Std(outputArray[:options.maxArraySize],0) print "calculating means" inputMeans=mean(outputArray[:options.maxArraySize],0) print inputStds print inputMeans for p in range(len(inputStds)): if (inputStds[p]>0): if options.bigFile: offset = 0 step = options.maxArraySize while offset < len(outputArray): max = min (offset+step, len(outputArray)) outputArray[offset:max,p] = (outputArray[offset:max,p] - inputMeans[p])/inputStds[p] offset += step else: outputArray[:,p]=(outputArray[:,p]-inputMeans[p])/inputStds[p] outfile = netcdf_helpers.NetCDFFile(outputFilename, 'w') for d in inputVar.dimensions: netcdf_helpers.createNcDim(outfile,d,infile.dimensions[d]) if options.stdMeanFilename == "": netcdf_helpers.createNcVar(outfile,options.outputArrayName+'Means',inputMeans,'f',(inputVar.dimensions[1],),'input means') netcdf_helpers.createNcVar(outfile,options.outputArrayName+'Stds',inputStds,'f',(inputVar.dimensions[1],),'input std deviations') netcdf_helpers.createNcVar(outfile,options.outputArrayName,outputArray,'f',inputVar.dimensions,options.inputArrayName+' adjusted for mean 0 and std dev 1') outfile.close()
for i in frame: inputs[offset][0]= (float(i)-inputMean)/inputStd offset += 1 ##inputs array frameweise fuellen # for frame in transform: # inputs[offset]= frame # offset += 1 #create a new .nc file file = netcdf_helpers.NetCDFFile(outputFilename, "w") #create the dimensions netcdf_helpers.createNcDim(file, "numSeqs", len(seqLengths)) netcdf_helpers.createNcDim(file,"numTimesteps", len(inputs)) netcdf_helpers.createNcDim(file, "inputPattSize", len(inputs[0])) netcdf_helpers.createNcDim(file,"numDims", 2) netcdf_helpers.createNcDim(file,"numLabels", len(labels)) #create the variables netcdf_helpers.createNcStrings(file, "seqTags", seqTags, ("numSeqs", "maxSeqTagLength"), "sequence tags") netcdf_helpers.createNcStrings(file, "labels", labels, ("numLabels", "maxLabelLength"), "labels") netcdf_helpers.createNcStrings(file, "targetStrings", targetStrings, ("numSeqs", "maxTargStringLength"), "target strings") netcdf_helpers.createNcVar(file, "seqLengths", seqLengths, "i", ("numSeqs",), "sequence lengths") netcdf_helpers.createNcVar(file, "seqDims", seqDims, "i", ("numSeqs","numDims"), "sequence dimensions") netcdf_helpers.createNcVar(file, "inputs", inputs, "f", ("numTimesteps","inputPattSize"), "input patterns")