Example #1
0
def merge_data(data_list, attrs_list):
    """Merge dictionaries with data.

    Keyword arguments:
    data_list -- the dictionary with data dictionaries
    """

    data = None
    attrs = None

    for f in data_list:
        size = check.get_size(data_list[f])
        if not data:
            print "\nThe following datasets were found in %s:\n" % f
            msg.list_dataset(data_list[f])
            data = data_list[f]
            attrs = attrs_list[f]
        else:
            print "\nAdding %(n)d entries from %(f)s" % {"n": size, "f": f}
            check.check_keys(data, data_list[f])
            check.check_shapes(data, data_list[f])
            for key in data_list[f]:
                data[key] = np.append(data[key], data_list[f][key], axis=0)
            attrs['n_events'] += attrs_list[f]['n_events']

    return data, attrs
Example #2
0
def load(filename, mode='r'):
    """Load hdf5 file and print included datasets.

    Keyword arguments:
    filename -- file to load
    """

    f = h5py.File(filename, mode)

    print "\nThe following datasets were found in %s:\n" % filename
    msg.list_dataset(f)

    return f
def load(filename, mode='r'):

    """Load hdf5 file and print included datasets.

    Keyword arguments:
    filename -- file to load
    """

    f = h5py.File(filename, mode)

    print "\nThe following datasets were found in %s:\n" % filename
    msg.list_dataset(f)

    return f
Example #4
0
def get_data(filename, match, keys):
    """Load file, check if contains match,
    update datasets based on command line options. Return data dictionary.

    Keyword arguments:
    filename -- input hdf5 file
    match -- common key use to order data
    keys -- user-chosen datasets to save
    """

    data = hdf5.load(filename)

    print("\nThe following datasets were found in %s:\n" % filename)
    msg.list_dataset(data)

    check.key_exists(match, data, filename)

    if keys:
        msg.info("Using only: " + keys)
        update_data(data, [k.strip() for k in keys.split(',')], args.match)

    return data
Example #5
0
def get_data(filename, match, keys):

    """Load file, check if contains match,
    update datasets based on command line options. Return data dictionary.

    Keyword arguments:
    filename -- input hdf5 file
    match -- common key use to order data
    keys -- user-chosen datasets to save
    """

    data = hdf5.load(filename)

    print "\nThe following datasets were found in %s:\n" % filename
    msg.list_dataset(data)

    check.key_exists(match, data, filename)

    if keys:
        msg.info("Using only: " + keys)
        update_data(data, [k.strip() for k in keys.split(',')], args.match)

    return data
Example #6
0
    msg.list_dataset(data)

    check.key_exists(match, data, filename)

    if keys:
        msg.info("Using only: " + keys)
        update_data(data, [k.strip() for k in keys.split(',')], args.match)

    return data

if __name__ == '__main__':

    msg.box("HDF5 MANIPULATOR: COMBINE")

    args = parser()

    data1 = get_data(args.input1, args.match, args.keys1)
    data2 = get_data(args.input2, args.match, args.keys2)

    check.different_keys(data1, data2, args.match)

    data = merge_data(data1, data2, args.match,
                      args.print_warnings, args.show_progress)

    print "\nThe following datasets will be saved in %s:\n" % args.output
    msg.list_dataset(data)

    hdf5.save(args.output, data)

    msg.info("Done")
Example #7
0
#!/usr/bin/env python
"""
Print info on datasets in hdf5 file.
"""
import sys
sys.path.append('..')
import hdf5
import msg

if __name__ == '__main__':
    if len(sys.argv) != 2:
        print("usage: ./print file")
        sys.exit(1)

    print("\nThe following datasets were found in %s:\n" % sys.argv[1])
    msg.list_dataset(hdf5.load(sys.argv[1]))
Example #8
0
            msg.warning("%s requested, but not found." % k)
            continue
        else:
            msg.info("Copying %s" % k)
            source.copy(k, output)


if __name__ == '__main__':

    msg.box("HDF5 MANIPULATOR: EXTRACT")

    args = parser()
    f = h5py.File(args.input, 'r')
    o = h5py.File(args.output, 'w')

    print("The following datasets were found in %s:\n" % args.input)
    msg.list_dataset(f)

    copy(f, o, [k.strip() for k in args.keys.split(',')])

    if len(o):
        print("\nThe following dataset were saved in %s:\n" % args.output)
        msg.list_dataset(o)
    else:
        msg.warning("No datasets were copied.")

    f.close()
    o.close()

    msg.info("Done")
Example #9
0
        usage()

if __name__ == '__main__':
    if len(sys.argv) < 4:
        usage()

    train_frac, val_frac = get_fractions()

    if train_frac + val_frac > 1.0:
        msg.error("Total fraction must be <= 1.0")
        sys.exit(1)

    f = h5py.File(sys.argv[1], 'r+')

    print "\nThe following datasets were found in %s:\n" % sys.argv[1]
    msg.list_dataset(f)

    N = check.get_size(f)
    nof_train = int(train_frac * N)
    nof_val = int(val_frac * N)
    nof_test = N - nof_train - nof_val

    print "\nThe following split will be used:\n"
    print "\t - training: %d entries" % nof_train
    print "\t - validation: %d entries" % nof_val
    print "\t - testing: %d entries" % nof_test

    train_dict = {name: (0, nof_train)
                  for name in f.keys()}
    valid_dict = {name: (nof_train, nof_train + nof_val)
                  for name in f.keys()}
if __name__ == '__main__':

    msg.box("HDF5 MANIPULATOR: COMBINE")

    args = parser()

    in1, in2 = load(args.input1), load(args.input2)

    out = h5py.File(args.output, 'w')

    match(in1, in2, out, args.match)

    check.same_sizes(in1, in2)

    keys1 = get_keys(in1, args.keys1, args.match)
    keys2 = get_keys(in2, args.keys2, args.match)

    check.check_duplicates(keys1, keys2)

    copy(in1, out, keys1)
    copy(in2, out, keys2)

    print "\nThe following datasets were saved in %s:\n" % args.output
    msg.list_dataset(out)

    in1.close()
    in2.close()
    out.close()

    msg.info("Done")
Example #11
0
                "key": key,
                "id": i
            })
            sys.exit(1)


if __name__ == '__main__':
    if len(sys.argv) < 3:
        print("usage: ./diff file1 file2 [fullcheck]")
        sys.exit(1)

    in1 = h5py.File(sys.argv[1], 'r')
    in2 = h5py.File(sys.argv[2], 'r')

    print("\nThe following datasets were found in %s:\n" % sys.argv[1])
    msg.list_dataset(in1)
    print("\nThe following datasets were found in %s:\n" % sys.argv[2])
    msg.list_dataset(in2)

    check.check_keys(in1, in2)

    check.same_sizes(in1, in2)

    check.check_shapes(in1, in2)

    for key in in1:
        try:
            if not np.array_equal(in1[key], in2[key]):
                sys.exit(1)
                msg.error("%s datasets are different." % key)
            else:
Example #12
0
if __name__ == '__main__':

    msg.box("HDF5 MANIPULATOR: COMBINE")

    args = parser()

    in1, in2 = load(args.input1), load(args.input2)

    out = h5py.File(args.output, 'w')

    match(in1, in2, out, args.match)

    check.same_sizes(in1, in2)

    keys1 = get_keys(in1, args.keys1, args.match)
    keys2 = get_keys(in2, args.keys2, args.match)

    check.check_duplicates(keys1, keys2)

    copy(in1, out, keys1)
    copy(in2, out, keys2)

    print "\nThe following datasets were saved in %s:\n" % args.output
    msg.list_dataset(out)

    in1.close()
    in2.close()
    out.close()

    msg.info("Done")
Example #13
0
sys.path.append('..')
import hdf5
import msg
import check

if __name__ == '__main__':
    if len(sys.argv) != 3:
        print("usage: ./diff file1 file2")
        sys.exit(1)

    data1 = hdf5.load(sys.argv[1])
    data2 = hdf5.load(sys.argv[2])

    print("\nThe following datasets were found in %s:\n" % sys.argv[1])
    msg.list_dataset(data1)
    print("\nThe following datasets were found in %s:\n" % sys.argv[2])
    msg.list_dataset(data2)

    check.check_keys(data1, data2)

    if check.get_size(data1) != check.get_size(data2):
        msg.error("Different number of entries.")
        sys.exit(1)

    check.check_shapes(data1, data2)

    for key in data1:
        if not np.equal(data1[key], data2[key]).all():
            msg.error("Different entries for dataset: %s" % key)
            sys.exit(1)