Beispiel #1
0
def handle_imports(content, directory):
    import_files = content.get('import', [])
    if isinstance(import_files, basestring):
        import_files = [import_files]
    for fname in import_files[::-1]:
        import_path = os.path.abspath(os.path.join(directory, fname))
        print("importing: '%s'" % import_path)
        import_directory = os.path.dirname(import_path)
        with open(import_path) as f:
            import_content = handle_imports(yaml.load(f), import_directory)
            expand_periodic_fields(import_content)
            for wild_key in ('globals/*/fields', 'entities/*/fields'):
                multi_keys = expand_wild(wild_key, import_content)
                for multi_key in multi_keys:
                    import_fields = multi_get(import_content, multi_key)
                    local_fields = multi_get(content, multi_key, [])
                    # fields are in "yaml ordered dict" format and we want
                    # simple list of items
                    import_fields = [d.items()[0] for d in import_fields]
                    local_fields = [d.items()[0] for d in local_fields]
                    # merge the lists
                    merged_fields = merge_items(import_fields, local_fields)
                    # convert them back to "yaml ordered dict"
                    merged_fields = [{k: v} for k, v in merged_fields]
                    multi_set(content, multi_key, merged_fields)
            content = merge_dicts(import_content, content)
    return content
Beispiel #2
0
def handle_imports(content, directory):
    import_files = content.get('import', [])
    if isinstance(import_files, basestring):
        import_files = [import_files]
    for fname in import_files[::-1]:
        import_path = os.path.join(directory, fname)
        print("importing: '%s'" % import_path)
        import_directory = os.path.dirname(import_path)
        with open(import_path) as f:
            import_content = handle_imports(yaml.load(f), import_directory)
            expand_periodic_fields(import_content)
            for wild_key in ('globals/*/fields', 'entities/*/fields'):
                multi_keys = expand_wild(wild_key, import_content)
                for multi_key in multi_keys:
                    import_fields = multi_get(import_content, multi_key)
                    local_fields = multi_get(content, multi_key, [])
                    # fields are in "yaml ordered dict" format and we want
                    # simple list of items
                    import_fields = [d.items()[0] for d in import_fields]
                    local_fields = [d.items()[0] for d in local_fields]
                    # merge the lists
                    merged_fields = merge_items(import_fields, local_fields)
                    # convert them back to "yaml ordered dict"
                    merged_fields = [{k: v} for k, v in merged_fields]
                    multi_set(content, multi_key, merged_fields)
            content = merge_dicts(import_content, content)
    return content
Beispiel #3
0
def load_def(localdir, ent_name, section_def, required_fields):
    if 'type' in section_def and 'fields' in section_def:
        raise Exception("invalid structure for '%s': "
                        "type and fields sections are mutually exclusive"
                        % ent_name)

    if 'type' in section_def:
        csv_filename = section_def.get('path', ent_name + ".csv")
        csv_filepath = complete_path(localdir, csv_filename)
        str_type = section_def['type']
        if isinstance(str_type, basestring):
            celltype = field_str_to_type(str_type, "array '%s'" % ent_name)
        else:
            assert isinstance(str_type, type)
            celltype = str_type
        return 'ndarray', load_ndarray(csv_filepath, celltype)

    fields_def = section_def.get('fields')
    if fields_def is not None:
        for fdef in fields_def:
            if isinstance(fdef, basestring):
                raise SyntaxError("invalid field declaration: '%s', you are "
                                  "probably missing a ':'" % fdef)
        if all(isinstance(fdef, dict) for fdef in fields_def):
            fields = fields_yaml_to_type(fields_def)
        else:
            assert all(isinstance(fdef, tuple) for fdef in fields_def)
            fields = fields_def
    else:
        fields = None
    newnames = merge_dicts(invert_dict(section_def.get('oldnames', {})),
                           section_def.get('newnames', {}))
    transpose = section_def.get('transposed', False)

    interpolate_def = section_def.get('interpolate')
    files_def = section_def.get('files')
    if files_def is None:
        #XXX: it might be cleaner to use the same code path than for the
        # multi-file case (however, that would loose the "import any file
        # size" feature that I'm fond of.

        # we can simply return the stream as-is
        #FIXME: stream is not sorted
        # csv file is assumed to be in the correct order (ie by period then id)
        csv_filename = section_def.get('path', ent_name + ".csv")
        csv_filepath = complete_path(localdir, csv_filename)
        csv_file = CSV(csv_filepath, newnames,
                       delimiter=',', transpose=transpose)
        if fields is not None:
            fields = required_fields + fields
        stream = csv_file.read(fields)
        if fields is None:
            fields = csv_file.fields
        if interpolate_def is not None:
            raise Exception('interpolate is currently only supported with '
                            'multiple files')
        return 'table', (fields, csv_file.numlines, stream, csv_file)
    else:
        # we have to load all files, merge them and return a stream out of that
        print(" * computing number of rows...")

        # 1) only load required fields
        default_args = dict(newnames=newnames, transpose=transpose)
        if isinstance(files_def, dict):
            files_items = files_def.items()
        elif isinstance(files_def, list) and files_def:
            if isinstance(files_def[0], dict):
                # handle YAML ordered dict structure
                files_items = [d.items()[0] for d in files_def]
            elif isinstance(files_def[0], basestring):
                files_items = [(path, {}) for path in files_def]
            else:
                raise Exception("invalid structure for 'files'")
        else:
            raise Exception("invalid structure for 'files'")

        #XXX: shouldn't we use the "path" defined for the whole entity if any?
        # section_def.get('path')
        files = []
        for path, kwargs in files_items:
            kwargs['newnames'] = \
                merge_dicts(invert_dict(kwargs.pop('oldnames', {})),
                            kwargs.get('newnames', {}))
            f = CSV(complete_path(localdir, path),
                    **merge_dicts(default_args, kwargs))
            files.append(f)
        id_periods = union1d(f.as_array(required_fields) for f in files)

        print(" * reading files...")
        # 2) load all fields
        if fields is None:
            target_fields = merge_items(*[f.fields for f in files])
            fields_per_file = [None for f in files]
        else:
            target_fields = required_fields + fields
            fields_per_file = [[(name, type_) for name, type_ in target_fields
                               if name in f.field_names]
                              for f in files]
            total_fields = set.union(*[set(f.field_names) for f in files])
            missing = set(name for name, _ in target_fields) - total_fields
            if missing:
                raise Exception("the following fields were not found in any "
                                "file: %s" % ", ".join(missing))

        total_lines = len(id_periods)

        # allocate main array
        target = np.empty(total_lines, dtype=np.dtype(target_fields))
        # fill with default values
        target[:] = tuple(missing_values[ftype] for _, ftype in target_fields)
        target['period'] = id_periods['period']
        target['id'] = id_periods['id']

        arrays = [f.as_array(fields_to_load)
                  for f, fields_to_load in zip(files, fields_per_file)]

        # close all files
        for f in files:
            f.close()

        #FIXME: interpolation currently only interpolates missing data points,
        # not data points with their value equal the missing value
        # corresponding to the field type. This can only be fixed once
        # booleans are loaded as int8.
        if interpolate_def is not None:
            if any(v != 'previous_value'
                   for v in interpolate_def.itervalues()):
                raise Exception("currently, only 'previous_value' "
                                "interpolation is supported")
            to_interpolate = [k for k, v in interpolate_def.iteritems()
                              if v == 'previous_value']
        else:
            to_interpolate = []

        interpolate(target, arrays, id_periods, to_interpolate)
        return 'table', (target_fields, total_lines, iter(target), None)
Beispiel #4
0
def merge_h5(input1_path, input2_path, output_path):
    input1_file = tables.openFile(input1_path, mode="r")
    input2_file = tables.openFile(input2_path, mode="r")

    output_file = tables.openFile(output_path, mode="w")
    output_globals = output_file.createGroup("/", "globals", "Globals")

    print "copying globals from", input1_path,
    copyTable(input1_file.root.globals.periodic, output_file, output_globals)
    print "done."

    input1_entities = input1_file.root.entities
    input2_entities = input2_file.root.entities

    fields1 = get_h5_fields(input1_file)
    fields2 = get_h5_fields(input2_file)

    ent_names1 = set(fields1.keys())
    ent_names2 = set(fields2.keys())

    output_entities = output_file.createGroup("/", "entities", "Entities")
    for ent_name in sorted(ent_names1 | ent_names2):
        print
        print ent_name
        ent_fields1 = fields1.get(ent_name, [])
        ent_fields2 = fields2.get(ent_name, [])
        output_fields = merge_items(ent_fields1, ent_fields2)
        output_table = output_file.createTable(output_entities, ent_name,
                                               np.dtype(output_fields))

        if ent_name in ent_names1:
            table1 = getattr(input1_entities, ent_name)
            print " * indexing table from %s ..." % input1_path,
            input1_rows = index_table_light(table1)
            print "done."
        else:
            table1 = None
            input1_rows = {}

        if ent_name in ent_names2:
            table2 = getattr(input2_entities, ent_name)
            print " * indexing table from %s ..." % input2_path,
            input2_rows = index_table_light(table2)
            print "done."
        else:
            table2 = None
            input2_rows = {}

        print " * merging: ",
        input1_periods = input1_rows.keys()
        input2_periods = input2_rows.keys()
        output_periods = sorted(set(input1_periods) | set(input2_periods))

        def merge_period(period_idx, period):
            if ent_name in ent_names1:
                start, stop = input1_rows.get(period, (0, 0))
                input1_array = table1.read(start, stop)
            else:
                input1_array = None

            if ent_name in ent_names2:
                start, stop = input2_rows.get(period, (0, 0))
                input2_array = table2.read(start, stop)
            else:
                input2_array = None

            if ent_name in ent_names1 and ent_name in ent_names2:
                output_array, _ = mergeArrays(input1_array, input2_array)
            elif ent_name in ent_names1:
                output_array = input1_array
            elif ent_name in ent_names2:
                output_array = input2_array
            else:
                raise Exception("this shouldn't have happened")
            output_table.append(output_array)
            output_table.flush()

        loop_wh_progress(merge_period, output_periods)
        print " done."

    input1_file.close()
    input2_file.close()
    output_file.close()
Beispiel #5
0
def merge_group(parent1, parent2, name, output_file, index_col):
    print()
    print(name)
    print('=' * len(name))

    group1 = getattr(parent1, name, None)
    group2 = getattr(parent2, name, None)
    if group1 is None and group2 is None:
        print("node not found in either input files, skipped")
        return

    output_group = output_file.create_group("/", name)
    fields1 = get_group_fields(group1)
    fields2 = get_group_fields(group2)
    ent_names1 = set(fields1.keys())
    ent_names2 = set(fields2.keys())
    for ent_name in sorted(ent_names1 | ent_names2):
        print()
        print(ent_name)
        ent_fields1 = fields1.get(ent_name, [])
        ent_fields2 = fields2.get(ent_name, [])
        output_fields = merge_items(ent_fields1, ent_fields2)
        output_table = output_file.create_table(output_group, ent_name,
                                                np.dtype(output_fields))

        if ent_name in ent_names1:
            table1 = getattr(group1, ent_name)
            # noinspection PyProtectedMember
            print(" * indexing table from %s ..." % group1._v_file.filename,
                  end=' ')
            input1_rows = index_table_light(table1, index_col)
            print("done.")
        else:
            table1 = None
            input1_rows = {}

        if ent_name in ent_names2:
            table2 = getattr(group2, ent_name)
            # noinspection PyProtectedMember
            print(" * indexing table from %s ..." % group2._v_file.filename,
                  end=' ')
            input2_rows = index_table_light(table2, index_col)
            print("done.")
        else:
            table2 = None
            input2_rows = {}

        print(" * merging: ", end=' ')
        input1_periods = input1_rows.keys()
        input2_periods = input2_rows.keys()
        output_periods = sorted(set(input1_periods) | set(input2_periods))

        # noinspection PyUnusedLocal
        def merge_period(period_idx, period):
            if ent_name in ent_names1:
                start, stop = input1_rows.get(period, (0, 0))
                input1_array = table1.read(start, stop)
            else:
                input1_array = None

            if ent_name in ent_names2:
                start, stop = input2_rows.get(period, (0, 0))
                input2_array = table2.read(start, stop)
            else:
                input2_array = None

            if ent_name in ent_names1 and ent_name in ent_names2:
                if 'id' in input1_array.dtype.names:
                    assert 'id' in input2_array.dtype.names
                    output_array, _ = merge_arrays(input1_array, input2_array)
                else:
                    output_array = merge_array_records(input1_array,
                                                       input2_array)

            elif ent_name in ent_names1:
                output_array = input1_array
            elif ent_name in ent_names2:
                output_array = input2_array
            else:
                raise Exception("this shouldn't have happened")
            output_table.append(output_array)
            output_table.flush()

        loop_wh_progress(merge_period, output_periods)
        print(" done.")
Beispiel #6
0
def diff_array(array1, array2, showdiffs=10, raiseondiff=False):
    if len(array1) != len(array2):
        print("length is different: %d vs %d" % (len(array1), len(array2)))
        ids1 = array1['id']
        ids2 = array2['id']
        all_ids = np.union1d(ids1, ids2)
        notin1 = np.setdiff1d(ids1, all_ids)
        notin2 = np.setdiff1d(ids2, all_ids)
        if notin1:
            print("the following ids are not present in file 1:", notin1)
        elif notin2:
            print("the following ids are not present in file 2:", notin2)
        else:
            # some ids must be duplicated
            if len(ids1) > len(all_ids):
                print("file 1 contain duplicate ids:", end=' ')
                uniques, dupes = unique_dupes(ids1)
                print(dupes)
                array1 = array1[uniques]
            if len(ids2) > len(all_ids):
                print("file 2 contain duplicate ids:", end=' ')
                uniques, dupes = unique_dupes(ids2)
                print(dupes)
                array2 = array2[uniques]

    fields1 = get_fields(array1)
    fields2 = get_fields(array2)
    fnames1 = set(array1.dtype.names)
    fnames2 = set(array2.dtype.names)
    # use merge_items instead of fnames1 | fnames2 to preserve ordering
    for fname, _ in merge_items(fields1, fields2):
        print("  - %s:" % fname, end=' ')
        if fname not in fnames1:
            print("missing in file 1")
            continue
        elif fname not in fnames2:
            print("missing in file 2")
            continue
        col1, col2 = array1[fname], array2[fname]
        if np.issubdtype(col1.dtype, np.inexact):
            if len(col1) == len(col2):
                both_nan = np.isnan(col1) & np.isnan(col2)
                eq = np.all(both_nan | (col1 == col2))
            else:
                eq = False
        else:
            eq = np.array_equal(col1, col2)

        if eq:
            print("ok")
        else:
            print("different", end=' ')
            if len(col1) != len(col2):
                print("(length)")
            else:
                diff = (col1 != col2).nonzero()[0]
                print("(%d differences)" % len(diff))
                ids = array1['id']
                if len(diff) > showdiffs:
                    diff = diff[:showdiffs]
                print(
                    PrettyTable(
                        [['id', fname + ' (file1)', fname + ' (file2)']] +
                        [[ids[idx], col1[idx], col2[idx]] for idx in diff]))
            if raiseondiff:
                raise Exception('different')
Beispiel #7
0
def merge_h5(input1_path, input2_path, output_path):
    input1_file = tables.open_file(input1_path, mode="r")
    input2_file = tables.open_file(input2_path, mode="r")

    output_file = tables.open_file(output_path, mode="w")

    print("copying globals from", input1_path, end=' ')
    #noinspection PyProtectedMember
    input1_file.root.globals._f_copy(output_file.root, recursive=True)
    print("done.")

    input1_entities = input1_file.root.entities
    input2_entities = input2_file.root.entities

    fields1 = get_h5_fields(input1_file)
    fields2 = get_h5_fields(input2_file)

    ent_names1 = set(fields1.keys())
    ent_names2 = set(fields2.keys())

    output_entities = output_file.create_group("/", "entities", "Entities")
    for ent_name in sorted(ent_names1 | ent_names2):
        print()
        print(ent_name)
        ent_fields1 = fields1.get(ent_name, [])
        ent_fields2 = fields2.get(ent_name, [])
        output_fields = merge_items(ent_fields1, ent_fields2)
        output_table = output_file.create_table(output_entities, ent_name,
                                               np.dtype(output_fields))

        if ent_name in ent_names1:
            table1 = getattr(input1_entities, ent_name)
            print(" * indexing table from %s ..." % input1_path, end=' ')
            input1_rows = index_table_light(table1)
            print("done.")
        else:
            table1 = None
            input1_rows = {}

        if ent_name in ent_names2:
            table2 = getattr(input2_entities, ent_name)
            print(" * indexing table from %s ..." % input2_path, end=' ')
            input2_rows = index_table_light(table2)
            print("done.")
        else:
            table2 = None
            input2_rows = {}

        print(" * merging: ", end=' ')
        input1_periods = input1_rows.keys()
        input2_periods = input2_rows.keys()
        output_periods = sorted(set(input1_periods) | set(input2_periods))

        #noinspection PyUnusedLocal
        def merge_period(period_idx, period):
            if ent_name in ent_names1:
                start, stop = input1_rows.get(period, (0, 0))
                input1_array = table1.read(start, stop)
            else:
                input1_array = None

            if ent_name in ent_names2:
                start, stop = input2_rows.get(period, (0, 0))
                input2_array = table2.read(start, stop)
            else:
                input2_array = None

            if ent_name in ent_names1 and ent_name in ent_names2:
                output_array, _ = merge_arrays(input1_array, input2_array)
            elif ent_name in ent_names1:
                output_array = input1_array
            elif ent_name in ent_names2:
                output_array = input2_array
            else:
                raise Exception("this shouldn't have happened")
            output_table.append(output_array)
            output_table.flush()

        loop_wh_progress(merge_period, output_periods)
        print(" done.")

    input1_file.close()
    input2_file.close()
    output_file.close()
Beispiel #8
0
def load_def(localdir, ent_name, section_def, required_fields):
    if 'type' in section_def and 'fields' in section_def:
        raise Exception("invalid structure for '%s': "
                        "type and fields sections are mutually exclusive" %
                        ent_name)

    if 'type' in section_def:
        csv_filename = section_def.get('path', ent_name + ".csv")
        csv_filepath = complete_path(localdir, csv_filename)
        str_type = section_def['type']
        if isinstance(str_type, basestring):
            celltype = field_str_to_type(str_type, "array '%s'" % ent_name)
        else:
            assert isinstance(str_type, type)
            celltype = str_type
        return 'ndarray', load_ndarray(csv_filepath, celltype)

    fields_def = section_def.get('fields')
    if fields_def is not None:
        for fdef in fields_def:
            if isinstance(fdef, basestring):
                raise SyntaxError("invalid field declaration: '%s', you are "
                                  "probably missing a ':'" % fdef)
        if all(isinstance(fdef, dict) for fdef in fields_def):
            fields = fields_yaml_to_type(fields_def)
        else:
            assert all(isinstance(fdef, tuple) for fdef in fields_def)
            fields = fields_def
        fnames = {name for name, _ in fields}
        for reqname, reqtype in required_fields[::-1]:
            if reqname not in fnames:
                fields.insert(0, (reqname, reqtype))
    else:
        fields = None
    newnames = merge_dicts(invert_dict(section_def.get('oldnames', {})),
                           section_def.get('newnames', {}))
    transpose = section_def.get('transposed', False)

    interpolate_def = section_def.get('interpolate')
    files_def = section_def.get('files')
    if files_def is None:
        # XXX: it might be cleaner to use the same code path than for the
        # multi-file case (however, that would loose the "import any file
        # size" feature that I'm fond of.

        # we can simply return the stream as-is
        # FIXME: stream is not sorted
        # csv file is assumed to be in the correct order (ie by period then id)
        csv_filename = section_def.get('path', ent_name + ".csv")
        csv_filepath = complete_path(localdir, csv_filename)
        csv_file = CSV(csv_filepath,
                       newnames,
                       delimiter=',',
                       transpose=transpose)
        stream = csv_file.read(fields)
        if fields is None:
            fields = csv_file.fields
        if interpolate_def is not None:
            raise Exception('interpolate is currently only supported with '
                            'multiple files')
        return 'table', (fields, csv_file.numlines, stream, csv_file)
    else:
        # we have to load all files, merge them and return a stream out of that
        print(" * computing number of rows...")

        # 1) only load required fields
        default_args = dict(newnames=newnames, transpose=transpose)
        if isinstance(files_def, dict):
            files_items = files_def.items()
        elif isinstance(files_def, list) and files_def:
            if isinstance(files_def[0], dict):
                # handle YAML ordered dict structure
                files_items = [d.items()[0] for d in files_def]
            elif isinstance(files_def[0], basestring):
                files_items = [(path, {}) for path in files_def]
            else:
                raise Exception("invalid structure for 'files'")
        else:
            raise Exception("invalid structure for 'files'")

        # XXX: shouldn't we use the "path" defined for the whole entity if any?
        # section_def.get('path')
        files = []
        for path, kwargs in files_items:
            kwargs['newnames'] = \
                merge_dicts(invert_dict(kwargs.pop('oldnames', {})),
                            kwargs.get('newnames', {}))
            f = CSV(complete_path(localdir, path),
                    **merge_dicts(default_args, kwargs))
            files.append(f)
        id_periods = union1d(f.as_array(required_fields) for f in files)

        print(" * reading files...")
        # 2) load all fields
        if fields is None:
            target_fields = merge_items(*[f.fields for f in files])
            fields_per_file = [None for _ in files]
        else:
            target_fields = fields
            fields_per_file = [[(name, type_) for name, type_ in target_fields
                                if name in f.field_names] for f in files]
            total_fields = set.union(*[set(f.field_names) for f in files])
            missing = set(name for name, _ in target_fields) - total_fields
            if missing:
                raise Exception("the following fields were not found in any "
                                "file: %s" % ", ".join(missing))

        total_lines = len(id_periods)

        # allocate main array
        target = get_default_array(total_lines, np.dtype(target_fields))
        target['period'] = id_periods['period']
        target['id'] = id_periods['id']

        arrays = [
            f.as_array(fields_to_load)
            for f, fields_to_load in zip(files, fields_per_file)
        ]

        # close all files
        for f in files:
            f.close()

        # FIXME: interpolation currently only interpolates missing data points,
        # not data points with their value equal the missing value
        # corresponding to the field type. This can only be fixed once
        # booleans are loaded as int8.
        if interpolate_def is not None:
            if any(v != 'previous_value'
                   for v in interpolate_def.itervalues()):
                raise Exception("currently, only 'previous_value' "
                                "interpolation is supported")
            to_interpolate = [
                k for k, v in interpolate_def.iteritems()
                if v == 'previous_value'
            ]
        else:
            to_interpolate = []

        interpolate(target, arrays, id_periods, to_interpolate)
        return 'table', (target_fields, total_lines, iter(target), None)
Beispiel #9
0
def diff_array(array1, array2, numdiff=10, raiseondiff=False):
    if len(array1) != len(array2):
        print("length is different: %d vs %d" % (len(array1),
                                                 len(array2)))
        ids1 = array1['id']
        ids2 = array2['id']
        all_ids = np.union1d(ids1, ids2)
        notin1 = np.setdiff1d(ids1, all_ids)
        notin2 = np.setdiff1d(ids2, all_ids)
        if notin1:
            print("the following ids are not present in file 1:", \
                  notin1)
        elif notin2:
            print("the following ids are not present in file 2:", \
                  notin2)
        else:
            # some ids must be duplicated
            if len(ids1) > len(all_ids):
                print("file 1 contain duplicate ids:", end=' ')
                uniques, dupes = unique_dupes(ids1)
                print(dupes)
                array1 = array1[uniques]
            if len(ids2) > len(all_ids):
                print("file 2 contain duplicate ids:", end=' ')
                uniques, dupes = unique_dupes(ids2)
                print(dupes)
                array2 = array2[uniques]

    fields1 = get_fields(array1)
    fields2 = get_fields(array2)
    fnames1 = set(array1.dtype.names)
    fnames2 = set(array2.dtype.names)
    # use merge_items instead of fnames1 | fnames2 to preserve ordering
    for fname, _ in merge_items(fields1, fields2):
        print("  - %s:" % fname, end=' ')
        if fname not in fnames1:
            print("missing in file 1")
            continue
        elif fname not in fnames2:
            print("missing in file 2")
            continue
        col1, col2 = array1[fname], array2[fname]
        if issubclass(col1.dtype.type, np.inexact):
            if len(col1) == len(col2):
                both_nan = np.isnan(col1) & np.isnan(col2)
                eq = np.all(both_nan | (col1 == col2))
            else:
                eq = False
        else:
            eq = np.array_equal(col1, col2)

        if eq:
            print("ok")
        else:
            print("different", end=' ')
            if len(col1) != len(col2):
                print("(length)")
            else:
                diff = (col1 != col2).nonzero()[0]
                print("(%d differences)" % len(diff))
                ids = array1['id']
                if len(diff) > numdiff:
                    diff = diff[:numdiff]
                print(PrettyTable([['id',
                                    fname + ' (file1)',
                                    fname + ' (file2)']] +
                                  [[ids[idx], col1[idx], col2[idx]]
                                   for idx in diff]))
            if raiseondiff:
                raise Exception('different')