split_big_special is special, haha, and hacky and requires inputs of a specific shape -- just hack the script if you want to use it. note that `--size` is a required argument, but it is a dummy arg. """ import os from parser import get_args_split as parser import msg import hdf5 from combine_big import load from split import generate_uneven_filelist from split import save_filelist if __name__ == '__main__': msg.box("HDF5 MANIPULATOR: SPLIT") args = parser() data = load(args.input) # TODO - come up with a clever way to generalize this... new_sizes = [(0, 15000), (15000, 17500), (17500, 20000)] new_names_ext = ['_train.hdf5', '_valid.hdf5', '_test.hdf5'] new_filelist = zip(new_names_ext, new_sizes) filelist = generate_uneven_filelist( args.prefix or os.path.splitext(args.input)[0], new_filelist) print("\nSaving output files:\n")
data = hdf5.load(filename) print "\nThe following datasets were found in %s:\n" % filename msg.list_dataset(data) check.key_exists(match, data, filename) if keys: msg.info("Using only: " + keys) update_data(data, [k.strip() for k in keys.split(',')], args.match) return data if __name__ == '__main__': msg.box("HDF5 MANIPULATOR: COMBINE") args = parser() data1 = get_data(args.input1, args.match, args.keys1) data2 = get_data(args.input2, args.match, args.keys2) check.different_keys(data1, data2, args.match) data = merge_data(data1, data2, args.match, args.print_warnings, args.show_progress) print "\nThe following datasets will be saved in %s:\n" % args.output msg.list_dataset(data) hdf5.save(args.output, data)
metavar="[path/to/filename]", required=True, help="path to output hdf5 file" ) required.add_argument( "--imgnames", action="store", dest="imgnames", metavar="[imgs tensor]", required=True, help="name of images tensor to be rescaled" ) optional = parser.add_argument_group("optional arguments") optional.add_argument( "--low", action="store", dest="low", metavar="[lower bound]", required=False, help="lower float bound", default=0.05 ) optional.add_argument( "--high", action="store", dest="high", metavar="[upper bound]", required=False, help="upper float bound", default=0.95 ) return parser.parse_args() if __name__ == '__main__': msg.box("HDF5 MANIPULATOR: UINT8FLOAT_RESCALER") args = get_args_conv_rescale() print(args) data = hdf5.load(args.input) data[args.imgnames] = data[args.imgnames] * \ (args.high - args.low) / 255.0 + args.low hdf5.save(args.output, data)
""" import os import sys from collections import OrderedDict from parser import get_args_split as parser import msg import hdf5 import h5py import check from combine_big import load from split import generate_filelist from split import save_filelist if __name__ == '__main__': msg.box("HDF5 MANIPULATOR: SPLIT") args = parser() data = load(args.input) filelist = generate_filelist( args.prefix or os.path.splitext(args.input)[0], check.get_size(data), int(args.size)) print "\nSaving output files:\n" for f, r in filelist.iteritems(): msg.list_fileinfo(f, r) hdf5.save_subset_big(f, data, r[0], r[1])
output -- output file keys -- keys to be copied """ for k in keys: if k not in source: msg.warning("%s requested, but not found." % k) continue else: msg.info("Copying %s" % k) source.copy(k, output) if __name__ == '__main__': msg.box("HDF5 MANIPULATOR: EXTRACT") args = parser() f = h5py.File(args.input, 'r') o = h5py.File(args.output, 'w') print("The following datasets were found in %s:\n" % args.input) msg.list_dataset(f) copy(f, o, [k.strip() for k in args.keys.split(',')]) if len(o): print("\nThe following dataset were saved in %s:\n" % args.output) msg.list_dataset(o) else: msg.warning("No datasets were copied.")
for f in filelist: print "\t - %s" % f data = OrderedDict() attrs = OrderedDict() for f in filelist: data[f], attrs[f] = hdf5.load(f) hdf5.save(outputfile, *merge_data(data, attrs)) msg.info("Done") if __name__ == '__main__': msg.box("HDF5 MANIPULATOR: MERGE") parser = argparse.ArgumentParser(description='') parser.add_argument('--input', type=str, nargs='+', help='input hdf5 list') parser.add_argument('--output', type=str, help='output hdf5 file') args = parser.parse_args() filelist = args.input if not filelist: msg.error("No files matching --input were found.") sys.exit(1) merge_data_filenames(filelist, args.output)
""" data = h5py.File(source, 'r') print "\nAdding entries from %(f)s in [%(i)d:%(f)d]" \ % {"f": source, "i": range[0], "f": range[1]} check.check_keys(data, output) check.check_shapes(data, output) for key in data: output[key][range[0]:range[1]] = data[key] data.close() if __name__ == '__main__': msg.box("HDF5 MANIPULATOR: MERGE") args = parser() filelist = get_filelist([f.strip() for f in args.input_files.split(',')]) if not filelist: msg.error("No files matching --input were found.") sys.exit(1) print "The following input files were found:\n" for f in filelist: print "\t - %s" % f output = h5py.File(args.output, 'w')
required = parser.add_argument_group("required arguments") required.add_argument("--input", action="store", dest="input", metavar="[path/to/filename]", required=True, help="path to input hdf5 file") required.add_argument("--output", action="store", dest="output", metavar="[path/to/filename]", required=True, help="path to output hdf5 file") required.add_argument("--tensor", action="store", dest="tensor", metavar="[imgs tensor]", required=True, help="name of tensor to be changed") return parser.parse_args() if __name__ == '__main__': msg.box("HDF5 MANIPULATOR: TFTNSR2PTTNSR") args = get_args_tnsr() data = hdf5.load(args.input) data[args.tensor] = np.moveaxis(data[args.tensor], -1, 1) hdf5.save(args.output, data)
output -- output file keys -- keys to be copied """ for k in keys: if k not in source: msg.warning("%s requested, but not found." % k) continue else: msg.info("Copying %s" % k) source.copy(k, output) if __name__ == '__main__': msg.box("HDF5 MANIPULATOR: EXTRACT") args = parser() f = h5py.File(args.input, 'r') o = h5py.File(args.output, 'w') print "The following datasets were found in %s:\n" % args.input msg.list_dataset(f) copy(f, o, [k.strip() for k in args.keys.split(',')]) if len(o): print "\nThe following dataset were saved in %s:\n" % args.output msg.list_dataset(o) else: msg.warning("No datasets were copied.")