Example #1
0
def diff_h5(input1_path, input2_path, numdiff=10):
    input1_file = tables.open_file(input1_path, mode="r")
    input2_file = tables.open_file(input2_path, mode="r")

#    print "copying globals from", input1_path,
#    input1_file.root.globals._f_copy(output_file.root, recursive=True)
#    print "done."

    input1_entities = input1_file.root.entities
    input2_entities = input2_file.root.entities

    #noinspection PyProtectedMember
    ent_names1 = set(table._v_name for table in input1_entities)
    #noinspection PyProtectedMember
    ent_names2 = set(table._v_name for table in input2_entities)
    for ent_name in sorted(ent_names1 | ent_names2):
        print()
        print(ent_name)
        if ent_name not in ent_names1:
            print("missing in file 1")
            continue
        elif ent_name not in ent_names2:
            print("missing in file 2")
            continue

        table1 = getattr(input1_entities, ent_name)
        input1_rows = index_table_light(table1)

        table2 = getattr(input2_entities, ent_name)
        input2_rows = index_table_light(table2)

        input1_periods = input1_rows.keys()
        input2_periods = input2_rows.keys()
        if input1_periods != input2_periods:
            print("periods are different in both files for '%s'" % ent_name)

        for period in sorted(set(input1_periods) & set(input2_periods)):
            print("* period:", period)
            start, stop = input1_rows.get(period, (0, 0))
            array1 = table1.read(start, stop)

            start, stop = input2_rows.get(period, (0, 0))
            array2 = table2.read(start, stop)

            diff_array(array1, array2, numdiff)

    input1_file.close()
    input2_file.close()
Example #2
0
def diff_h5(input1_path, input2_path, numdiff=10):
    input1_file = tables.open_file(input1_path, mode="r")
    input2_file = tables.open_file(input2_path, mode="r")

    input1_entities = input1_file.root.entities
    input2_entities = input2_file.root.entities

    # noinspection PyProtectedMember
    ent_names1 = set(table._v_name for table in input1_entities)
    # noinspection PyProtectedMember
    ent_names2 = set(table._v_name for table in input2_entities)
    for ent_name in sorted(ent_names1 | ent_names2):
        print()
        print(ent_name)
        if ent_name not in ent_names1:
            print("missing in file 1")
            continue
        elif ent_name not in ent_names2:
            print("missing in file 2")
            continue

        table1 = getattr(input1_entities, ent_name)
        input1_rows = index_table_light(table1)

        table2 = getattr(input2_entities, ent_name)
        input2_rows = index_table_light(table2)

        input1_periods = input1_rows.keys()
        input2_periods = input2_rows.keys()
        if input1_periods != input2_periods:
            print("periods are different in both files for '%s'" % ent_name)

        for period in sorted(set(input1_periods) & set(input2_periods)):
            print("* period:", period)
            start, stop = input1_rows.get(period, (0, 0))
            array1 = table1.read(start, stop)

            start, stop = input2_rows.get(period, (0, 0))
            array2 = table2.read(start, stop)

            diff_array(array1, array2, numdiff)

    input1_file.close()
    input2_file.close()
Example #3
0
def merge_h5(input1_path, input2_path, output_path):
    input1_file = tables.openFile(input1_path, mode="r")
    input2_file = tables.openFile(input2_path, mode="r")

    output_file = tables.openFile(output_path, mode="w")
    output_globals = output_file.createGroup("/", "globals", "Globals")

    print "copying globals from", input1_path,
    copyTable(input1_file.root.globals.periodic, output_file, output_globals)
    print "done."

    input1_entities = input1_file.root.entities
    input2_entities = input2_file.root.entities

    fields1 = get_h5_fields(input1_file)
    fields2 = get_h5_fields(input2_file)

    ent_names1 = set(fields1.keys())
    ent_names2 = set(fields2.keys())

    output_entities = output_file.createGroup("/", "entities", "Entities")
    for ent_name in sorted(ent_names1 | ent_names2):
        print
        print ent_name
        ent_fields1 = fields1.get(ent_name, [])
        ent_fields2 = fields2.get(ent_name, [])
        output_fields = merge_items(ent_fields1, ent_fields2)
        output_table = output_file.createTable(output_entities, ent_name,
                                               np.dtype(output_fields))

        if ent_name in ent_names1:
            table1 = getattr(input1_entities, ent_name)
            print " * indexing table from %s ..." % input1_path,
            input1_rows = index_table_light(table1)
            print "done."
        else:
            table1 = None
            input1_rows = {}

        if ent_name in ent_names2:
            table2 = getattr(input2_entities, ent_name)
            print " * indexing table from %s ..." % input2_path,
            input2_rows = index_table_light(table2)
            print "done."
        else:
            table2 = None
            input2_rows = {}

        print " * merging: ",
        input1_periods = input1_rows.keys()
        input2_periods = input2_rows.keys()
        output_periods = sorted(set(input1_periods) | set(input2_periods))

        def merge_period(period_idx, period):
            if ent_name in ent_names1:
                start, stop = input1_rows.get(period, (0, 0))
                input1_array = table1.read(start, stop)
            else:
                input1_array = None

            if ent_name in ent_names2:
                start, stop = input2_rows.get(period, (0, 0))
                input2_array = table2.read(start, stop)
            else:
                input2_array = None

            if ent_name in ent_names1 and ent_name in ent_names2:
                output_array, _ = mergeArrays(input1_array, input2_array)
            elif ent_name in ent_names1:
                output_array = input1_array
            elif ent_name in ent_names2:
                output_array = input2_array
            else:
                raise Exception("this shouldn't have happened")
            output_table.append(output_array)
            output_table.flush()

        loop_wh_progress(merge_period, output_periods)
        print " done."

    input1_file.close()
    input2_file.close()
    output_file.close()
Example #4
0
def diff_h5(input1_path, input2_path, numdiff=10):
    input1_file = tables.openFile(input1_path, mode="r")
    input2_file = tables.openFile(input2_path, mode="r")

#    print "copying globals from", input1_path,
#    input1_file.root.globals._f_copy(output_file.root, recursive=True)
#    print "done."

    input1_entities = input1_file.root.entities
    input2_entities = input2_file.root.entities

    fields1 = get_h5_fields(input1_file)
    fields2 = get_h5_fields(input2_file)

    ent_names1 = set(fields1.keys())
    ent_names2 = set(fields2.keys())
    for ent_name in sorted(ent_names1 | ent_names2):
        print
        print ent_name
        if ent_name not in ent_names1:
            print "missing in file 1"
            continue
        elif ent_name not in ent_names2:
            print "missing in file 2"
            continue

        ent_fields1 = fields1.get(ent_name, [])
        ent_fields2 = fields2.get(ent_name, [])
        fnames1 = set(fname for fname, _ in ent_fields1)
        fnames2 = set(fname for fname, _ in ent_fields2)

        table1 = getattr(input1_entities, ent_name)
        input1_rows = index_table_light(table1)

        table2 = getattr(input2_entities, ent_name)
        input2_rows = index_table_light(table2)

        input1_periods = input1_rows.keys()
        input2_periods = input2_rows.keys()
        if input1_periods != input2_periods:
            print "periods are different in both files for '%s'" % ent_name

        for period in sorted(set(input1_periods) & set(input2_periods)):
            print "* period:", period
            start, stop = input1_rows.get(period, (0, 0))
            array1 = table1.read(start, stop)

            start, stop = input2_rows.get(period, (0, 0))
            array2 = table2.read(start, stop)

            if len(array1) != len(array2):
                print "length is different: %d vs %d" % (len(array1),
                                                         len(array2))
                ids1 = array1['id']
                ids2 = array2['id']
                all_ids = np.union1d(ids1, ids2)
                notin1 = np.setdiff1d(ids1, all_ids)
                notin2 = np.setdiff1d(ids2, all_ids)
                if notin1:
                    print "the following ids are not present in file 1:", \
                          notin1
                elif notin2:
                    print "the following ids are not present in file 2:", \
                          notin2
                else:
                    # some ids must be duplicated
                    if len(ids1) > len(all_ids):
                        print "file 1 contain duplicate ids:",
                        uniques, dupes = unique_dupes(ids1)
                        print dupes
                        array1 = array1[uniques]
                    if len(ids2) > len(all_ids):
                        print "file 2 contain duplicate ids:",
                        uniques, dupes = unique_dupes(ids2)
                        print dupes
                        array2 = array2[uniques]

            for fname in sorted(fnames1 | fnames2):
                print "  - %s:" % fname,
                if fname not in fnames1:
                    print "missing in file 1"
                    continue
                elif fname not in fnames2:
                    print "missing in file 2"
                    continue
                col1, col2 = array1[fname], array2[fname]
                if np.array_equal(col1, col2):
                    print "ok"
                else:
                    print "different",
                    if len(col1) != len(col2):
                        print "(length)"
                    else:
                        diff = (col1 != col2).nonzero()[0]
                        print "(%d differences)" % len(diff)
                        ids = array1['id']
                        if len(diff) > numdiff:
                            diff = diff[:numdiff]
                        print PrettyTable([['id',
                                            fname + ' (file1)',
                                            fname + ' (file2)']] +
                                          [[ids[idx], col1[idx], col2[idx]]
                                           for idx in diff])

    input1_file.close()
    input2_file.close()
Example #5
0
def merge_group(parent1, parent2, name, output_file, index_col):
    print()
    print(name)
    print('=' * len(name))

    group1 = getattr(parent1, name, None)
    group2 = getattr(parent2, name, None)
    if group1 is None and group2 is None:
        print("node not found in either input files, skipped")
        return

    output_group = output_file.create_group("/", name)
    fields1 = get_group_fields(group1)
    fields2 = get_group_fields(group2)
    ent_names1 = set(fields1.keys())
    ent_names2 = set(fields2.keys())
    for ent_name in sorted(ent_names1 | ent_names2):
        print()
        print(ent_name)
        ent_fields1 = fields1.get(ent_name, [])
        ent_fields2 = fields2.get(ent_name, [])
        output_fields = merge_items(ent_fields1, ent_fields2)
        output_table = output_file.create_table(output_group, ent_name,
                                                np.dtype(output_fields))

        if ent_name in ent_names1:
            table1 = getattr(group1, ent_name)
            # noinspection PyProtectedMember
            print(" * indexing table from %s ..." % group1._v_file.filename,
                  end=' ')
            input1_rows = index_table_light(table1, index_col)
            print("done.")
        else:
            table1 = None
            input1_rows = {}

        if ent_name in ent_names2:
            table2 = getattr(group2, ent_name)
            # noinspection PyProtectedMember
            print(" * indexing table from %s ..." % group2._v_file.filename,
                  end=' ')
            input2_rows = index_table_light(table2, index_col)
            print("done.")
        else:
            table2 = None
            input2_rows = {}

        print(" * merging: ", end=' ')
        input1_periods = input1_rows.keys()
        input2_periods = input2_rows.keys()
        output_periods = sorted(set(input1_periods) | set(input2_periods))

        # noinspection PyUnusedLocal
        def merge_period(period_idx, period):
            if ent_name in ent_names1:
                start, stop = input1_rows.get(period, (0, 0))
                input1_array = table1.read(start, stop)
            else:
                input1_array = None

            if ent_name in ent_names2:
                start, stop = input2_rows.get(period, (0, 0))
                input2_array = table2.read(start, stop)
            else:
                input2_array = None

            if ent_name in ent_names1 and ent_name in ent_names2:
                if 'id' in input1_array.dtype.names:
                    assert 'id' in input2_array.dtype.names
                    output_array, _ = merge_arrays(input1_array, input2_array)
                else:
                    output_array = merge_array_records(input1_array,
                                                       input2_array)

            elif ent_name in ent_names1:
                output_array = input1_array
            elif ent_name in ent_names2:
                output_array = input2_array
            else:
                raise Exception("this shouldn't have happened")
            output_table.append(output_array)
            output_table.flush()

        loop_wh_progress(merge_period, output_periods)
        print(" done.")
Example #6
0
def merge_h5(input1_path, input2_path, output_path):
    input1_file = tables.open_file(input1_path, mode="r")
    input2_file = tables.open_file(input2_path, mode="r")

    output_file = tables.open_file(output_path, mode="w")

    print("copying globals from", input1_path, end=' ')
    #noinspection PyProtectedMember
    input1_file.root.globals._f_copy(output_file.root, recursive=True)
    print("done.")

    input1_entities = input1_file.root.entities
    input2_entities = input2_file.root.entities

    fields1 = get_h5_fields(input1_file)
    fields2 = get_h5_fields(input2_file)

    ent_names1 = set(fields1.keys())
    ent_names2 = set(fields2.keys())

    output_entities = output_file.create_group("/", "entities", "Entities")
    for ent_name in sorted(ent_names1 | ent_names2):
        print()
        print(ent_name)
        ent_fields1 = fields1.get(ent_name, [])
        ent_fields2 = fields2.get(ent_name, [])
        output_fields = merge_items(ent_fields1, ent_fields2)
        output_table = output_file.create_table(output_entities, ent_name,
                                               np.dtype(output_fields))

        if ent_name in ent_names1:
            table1 = getattr(input1_entities, ent_name)
            print(" * indexing table from %s ..." % input1_path, end=' ')
            input1_rows = index_table_light(table1)
            print("done.")
        else:
            table1 = None
            input1_rows = {}

        if ent_name in ent_names2:
            table2 = getattr(input2_entities, ent_name)
            print(" * indexing table from %s ..." % input2_path, end=' ')
            input2_rows = index_table_light(table2)
            print("done.")
        else:
            table2 = None
            input2_rows = {}

        print(" * merging: ", end=' ')
        input1_periods = input1_rows.keys()
        input2_periods = input2_rows.keys()
        output_periods = sorted(set(input1_periods) | set(input2_periods))

        #noinspection PyUnusedLocal
        def merge_period(period_idx, period):
            if ent_name in ent_names1:
                start, stop = input1_rows.get(period, (0, 0))
                input1_array = table1.read(start, stop)
            else:
                input1_array = None

            if ent_name in ent_names2:
                start, stop = input2_rows.get(period, (0, 0))
                input2_array = table2.read(start, stop)
            else:
                input2_array = None

            if ent_name in ent_names1 and ent_name in ent_names2:
                output_array, _ = merge_arrays(input1_array, input2_array)
            elif ent_name in ent_names1:
                output_array = input1_array
            elif ent_name in ent_names2:
                output_array = input2_array
            else:
                raise Exception("this shouldn't have happened")
            output_table.append(output_array)
            output_table.flush()

        loop_wh_progress(merge_period, output_periods)
        print(" done.")

    input1_file.close()
    input2_file.close()
    output_file.close()