def write_to_file(filename, inputs, pred_seq_lengths, predictions, seq_dims, seq_lengths, seq_tags, target_seq_dims, target_strings, word_target_strings): out_file = netcdf_helpers.netcdf_file(filename, 'w') # create the dimensions netcdf_helpers.create_nc_dim(out_file, 'numSeqs', len(seq_lengths)) netcdf_helpers.create_nc_dim(out_file, 'numTimesteps', len(inputs)) netcdf_helpers.create_nc_dim(out_file, 'predNumTimesteps', len(predictions)) netcdf_helpers.create_nc_dim(out_file, 'inputPattSize', len(inputs[0])) netcdf_helpers.create_nc_dim(out_file, 'numDims', 1) # create the variables netcdf_helpers.create_nc_strings(out_file, 'seqTags', seq_tags, ('numSeqs', 'maxSeqTagLength'), 'sequence tags') netcdf_helpers.create_nc_strings(out_file, 'targetStrings', target_strings, ('numSeqs', 'maxTargStringLength'), 'target strings') netcdf_helpers.create_nc_strings(out_file, 'wordTargetStrings', word_target_strings, ('numSeqs', 'maxWordTargStringLength'), 'word target strings') netcdf_helpers.create_nc_var(out_file, 'seqLengths', seq_lengths, 'i', ('numSeqs',), 'sequence lengths') netcdf_helpers.create_nc_var(out_file, 'seqDims', seq_dims, 'i', ('numSeqs', 'numDims'), 'sequence dimensions') netcdf_helpers.create_nc_var(out_file, 'inputs', inputs, 'f', ('numTimesteps', 'inputPattSize'), 'input patterns') netcdf_helpers.create_nc_var(out_file, 'predSeqLengths', pred_seq_lengths, 'i', ('numSeqs',), 'pred sequence lengths') netcdf_helpers.create_nc_var(out_file, 'targetSeqDims', target_seq_dims, 'i', ('numSeqs', 'numDims'), 'pred sequence dimensions') netcdf_helpers.create_nc_var(out_file, 'targetPatterns', predictions, 'f', ('predNumTimesteps', 'inputPattSize'), 'prediction patterns') out_file.close()
def merge_tmp_files(tmp_files, nc_filename): print("Merging...") seq_dims = [] seq_lengths = [] target_strings = [] word_target_strings = [] seq_tags = [] inputs = [] target_patterns = [] pred_seq_lengths = [] target_seq_dims = [] for tmp_filename in tmp_files: tmp_file = netcdf_helpers.netcdf_file(tmp_filename, 'r') seq_dims.extend(tmp_file.variables['seqDims'][:]) seq_lengths.extend(tmp_file.variables['seqLengths'][:]) target_strings.extend(tmp_file.variables['targetStrings'][:]) word_target_strings.extend(tmp_file.variables['wordTargetStrings'][:]) seq_tags.extend(tmp_file.variables['seqTags'][:]) inputs.extend(tmp_file.variables['inputs'][:]) target_patterns.extend(tmp_file.variables['targetPatterns'][:]) pred_seq_lengths.extend(tmp_file.variables['predSeqLengths'][:]) target_seq_dims.extend(tmp_file.variables['targetSeqDims'][:]) tmp_file.close() os.remove(tmp_filename) first_ix = 0 for i in range(len(seq_lengths)): for k in reversed(range(seq_lengths[i])): if k > 0: inputs[first_ix + k] = array(inputs[first_ix + k]) - array(inputs[first_ix + k - 1]) inputs[first_ix + k][-1] = abs(inputs[first_ix + k][-1]) target_patterns[first_ix + k - 1] = inputs[first_ix + k] if k == 0: target_patterns[first_ix] = inputs[first_ix + 1] inputs[first_ix] = array([0, 0, 0]) first_ix += seq_lengths[i] write_to_file(nc_filename, inputs, pred_seq_lengths, target_patterns, seq_dims, seq_lengths, seq_tags, target_seq_dims, target_strings, word_target_strings)
Image.ROTATE_270).convert('L') if options.pad and (im.size[0] < padDims[0] or im.size[1] < padDims[1]): w = max(padDims[0], im.size[0]) h = max(padDims[1], im.size[1]) bigIm = Image.new(im.mode, (w, h), 255) bigIm.paste(im, ((w - im.size[0]) / 2, (h - im.size[1]) / 2)) im = bigIm for n in im.getdata(): inputs[offset][0] = (float(n) - inputMean) / inputStd offset += 1 print len(labels), "labels:" print labels #create a new .nc file file = netcdf_helpers.netcdf_file(ncFilename, 'w') #create the dimensions netcdf_helpers.createNcDim(file, 'numSeqs', len(seqLengths)) netcdf_helpers.createNcDim(file, 'numTimesteps', len(inputs)) netcdf_helpers.createNcDim(file, 'inputPattSize', len(inputs[0])) netcdf_helpers.createNcDim(file, 'numDims', 2) netcdf_helpers.createNcDim(file, 'numLabels', len(labels)) #create the variables netcdf_helpers.createNcStrings(file, 'seqTags', seqTags, ('numSeqs', 'maxSeqTagLength'), 'sequence tags') netcdf_helpers.createNcStrings(file, 'labels', labels, ('numLabels', 'maxLabelLength'), 'labels') netcdf_helpers.createNcStrings(file, 'targetStrings', targetStrings, ('numSeqs', 'maxTargStringLength'),
def std(array, axis): if np.shape(array)[axis] > 1: return np.std(array, axis) return array # parse command line options (options, args) = parser.parse_args() print options if len(args) != 2: parser.error("incorrect number of arguments") inputFilename = args[0] outputFilename = args[1] print 'inputFilename', inputFilename infile = netcdf_helpers.netcdf_file(inputFilename, 'r') print "loading in input array" inputVar = infile.variables[options.inputArrayName] outputArray = np.zeros(inputVar.shape, 'f') if options.bigFile: offset = 0 step = options.maxArraySize while offset < inputVar.shape[0]: max = min(offset + step, inputVar.shape[0]) outputArray[offset:max] = inputVar[offset:max] offset += step else: outputArray = np.copy(inputVar[:]) outputArray.flags.writeable = True
print len(labels),'labels' print labels totalLen = sum(seqLengths) print 'totalLen', totalLen inputs = zeros((totalLen,1), 'f') offset = 0 for filename in seqTags: print "reading image file", filename image = Image.open(filename).transpose(Image.FLIP_TOP_BOTTOM).transpose(Image.ROTATE_270) for i in image.getdata(): inputs[offset][0] = (float(i) - inputMean)/inputStd offset += 1 #create a new .nc file file = netcdf_helpers.netcdf_file(outputFilename, 'w') #create the dimensions netcdf_helpers.createNcDim(file,'numSeqs',len(seqLengths)) netcdf_helpers.createNcDim(file,'numTimesteps',len(inputs)) netcdf_helpers.createNcDim(file,'inputPattSize',len(inputs[0])) netcdf_helpers.createNcDim(file,'numDims',2) netcdf_helpers.createNcDim(file,'numLabels',len(labels)) #create the variables netcdf_helpers.createNcStrings(file,'seqTags',seqTags,('numSeqs','maxSeqTagLength'),'sequence tags') netcdf_helpers.createNcStrings(file,'labels',labels,('numLabels','maxLabelLength'),'labels') netcdf_helpers.createNcStrings(file,'wordTargetStrings',wordTargetStrings,('numSeqs','maxWordTargStringLength'),'target strings') netcdf_helpers.createNcStrings(file,'targetStrings',targetStrings,('numSeqs','maxTargStringLength'),'target strings') netcdf_helpers.createNcVar(file,'seqLengths',seqLengths,'i',('numSeqs',),'sequence lengths') netcdf_helpers.createNcVar(file,'seqDims',seqDims,'i',('numSeqs','numDims'),'sequence dimensions')