Esempio n. 1
0
def filter_h5(input_path, output_path, condition):
    print "filtering for '%s'" % condition
    input_file = tables.openFile(input_path, mode="r")
    output_file = tables.openFile(output_path, mode="w")

    # copy globals
    input_file.root.globals._f_copy(output_file.root, recursive=True)

    output_entities = output_file.createGroup("/", "entities", "Entities")
    for table in input_file.iterNodes(input_file.root.entities):
        print table._v_name, "..."
        copyTable(table, output_entities, condition=condition)

    input_file.close()
    output_file.close()
Esempio n. 2
0
def filter_h5(input_path, output_path, condition):
    print "filtering for '%s'" % condition
    input_file = tables.openFile(input_path, mode="r")
    output_file = tables.openFile(output_path, mode="w")

    output_globals = output_file.createGroup("/", "globals", "Globals")
    copyTable(input_file.root.globals.periodic, output_file, output_globals)

    output_entities = output_file.createGroup("/", "entities", "Entities")
    for table in input_file.iterNodes(input_file.root.entities):
        print table._v_name, "..."
        copyTable(table, output_file, output_entities, condition=condition)

    input_file.close()
    output_file.close()
Esempio n. 3
0
def dropfields(input_path, output_path, todrop):
    input_file = tables.openFile(input_path, mode="r")
    input_root = input_file.root

    output_file = tables.openFile(output_path, mode="w")
    output_globals = output_file.createGroup("/", "globals", "Globals")

    print " * copying globals ...",
    copyTable(input_root.globals.periodic, output_file, output_globals)
    print "done."

    output_entities = output_file.createGroup("/", "entities", "Entities")
    for table in input_file.iterNodes(input_root.entities):
        table_fields = get_fields(table)
        table_fields = [(fname, ftype) for fname, ftype in table_fields
                        if fname not in todrop]
        print " * copying table %s (%.2f Mb) ..." % (table._v_name,
                                                     table_size(table)),
        copyTable(table, output_file, output_entities,
                  table_fields)
        print "done."

    input_file.close()
    output_file.close()
Esempio n. 4
0
def merge_h5(input1_path, input2_path, output_path):
    input1_file = tables.openFile(input1_path, mode="r")
    input2_file = tables.openFile(input2_path, mode="r")

    output_file = tables.openFile(output_path, mode="w")
    output_globals = output_file.createGroup("/", "globals", "Globals")

    print "copying globals from", input1_path,
    copyTable(input1_file.root.globals.periodic, output_file, output_globals)
    print "done."

    input1_entities = input1_file.root.entities
    input2_entities = input2_file.root.entities

    fields1 = get_h5_fields(input1_file)
    fields2 = get_h5_fields(input2_file)

    ent_names1 = set(fields1.keys())
    ent_names2 = set(fields2.keys())

    output_entities = output_file.createGroup("/", "entities", "Entities")
    for ent_name in sorted(ent_names1 | ent_names2):
        print
        print ent_name
        ent_fields1 = fields1.get(ent_name, [])
        ent_fields2 = fields2.get(ent_name, [])
        output_fields = merge_items(ent_fields1, ent_fields2)
        output_table = output_file.createTable(output_entities, ent_name,
                                               np.dtype(output_fields))

        if ent_name in ent_names1:
            table1 = getattr(input1_entities, ent_name)
            print " * indexing table from %s ..." % input1_path,
            input1_rows = index_table_light(table1)
            print "done."
        else:
            table1 = None
            input1_rows = {}

        if ent_name in ent_names2:
            table2 = getattr(input2_entities, ent_name)
            print " * indexing table from %s ..." % input2_path,
            input2_rows = index_table_light(table2)
            print "done."
        else:
            table2 = None
            input2_rows = {}

        print " * merging: ",
        input1_periods = input1_rows.keys()
        input2_periods = input2_rows.keys()
        output_periods = sorted(set(input1_periods) | set(input2_periods))

        def merge_period(period_idx, period):
            if ent_name in ent_names1:
                start, stop = input1_rows.get(period, (0, 0))
                input1_array = table1.read(start, stop)
            else:
                input1_array = None

            if ent_name in ent_names2:
                start, stop = input2_rows.get(period, (0, 0))
                input2_array = table2.read(start, stop)
            else:
                input2_array = None

            if ent_name in ent_names1 and ent_name in ent_names2:
                output_array, _ = mergeArrays(input1_array, input2_array)
            elif ent_name in ent_names1:
                output_array = input1_array
            elif ent_name in ent_names2:
                output_array = input2_array
            else:
                raise Exception("this shouldn't have happened")
            output_table.append(output_array)
            output_table.flush()

        loop_wh_progress(merge_period, output_periods)
        print " done."

    input1_file.close()
    input2_file.close()
    output_file.close()