コード例 #1
0
    def test_merge_indexed_journalled_entries_count(self):
        old = np.asarray([0, 0, 0, 1, 1, 2, 3, 3, 5, 5, 5], dtype=np.int32)
        new = np.asarray([0, 2, 3, 4, 5, 6], dtype=np.int32)
        old_i, new_i = ops.ordered_generate_journalling_indices(old, new)

        old_inds = np.asarray([0, 2, 4, 6, 9, 12, 14, 17, 20, 23, 26, 29])
        old_vals = np.frombuffer(b''.join([
            b'aa', b'ab', b'ac', b'baa', b'bab', b'ca', b'daa', b'dab', b'faa',
            b'fab', b'fac'
        ]),
                                 dtype='S1')
        expected_inds = np.asarray([0, 2, 4, 6, 9, 12, 14, 17, 20, 23, 26, 29],
                                   dtype=np.int32)
        expected_vals = np.asarray([
            b'a', b'a', b'a', b'b', b'a', b'c', b'b', b'a', b'a', b'b', b'a',
            b'b', b'c', b'a', b'd', b'a', b'a', b'd', b'a', b'b', b'f', b'a',
            b'a', b'f', b'a', b'b', b'f', b'a', b'c'
        ],
                                   dtype='S1')
        self.assertTrue(np.array_equal(old_inds, expected_inds))
        self.assertTrue(np.array_equal(old_vals, expected_vals))

        new_inds = np.asarray([0, 2, 4, 7, 9, 12, 14])
        new_vals = np.frombuffer(b''.join(
            [b'ad', b'cb', b'dac', b'ea', b'fad', b'ga']),
                                 dtype='S1')
        to_keep = np.zeros(len(new_i), dtype=bool)
        ops.compare_indexed_rows_for_journalling(old_i, new_i, old_inds,
                                                 old_vals, new_inds, new_vals,
                                                 to_keep)

        count = ops.merge_indexed_journalled_entries_count(
            old_i, new_i, to_keep, old_inds, new_inds)
        self.assertTrue(np.array_equal(count, 43))
コード例 #2
0
    def test_compare_rows_for_journalling(self):
        old = np.asarray([0, 0, 0, 1, 1, 2, 3, 3, 5, 5, 5], dtype=np.int32)
        new = np.asarray([0, 2, 3, 4, 5, 6], dtype=np.int32)
        old_i, new_i = ops.ordered_generate_journalling_indices(old, new)

        old_expected = np.asarray([2, 4, 5, 7, -1, 10, -1], dtype=np.int32)
        self.assertTrue(np.array_equal(old_i, old_expected))
        new_expected = np.asarray([0, -1, 1, 2, 3, 4, 5], dtype=np.int32)
        self.assertTrue(np.array_equal(new_i, new_expected))

        old_data = np.asarray([0, 1, 2, 10, 11, 20, 30, 31, 50, 51, 52])
        new_data = np.asarray([2, 20, 31, 40, 52, 60])
        to_keep = np.zeros(len(new_i), dtype=bool)
        ops.compare_rows_for_journalling(old_i, new_i, old_data, new_data,
                                         to_keep)
        expected = np.asarray([False, False, False, False, True, False, True])
        self.assertTrue(np.array_equal(to_keep, expected))

        old_data = np.asarray([0, 1, 2, 10, 11, 20, 30, 31, 50, 51, 52])
        new_data = np.asarray([3, 21, 32, 41, 53, 61])
        to_keep = np.zeros(len(new_i), dtype=bool)
        ops.compare_rows_for_journalling(old_i, new_i, old_data, new_data,
                                         to_keep)
        expected = np.asarray([True, False, True, True, True, True, True])
        self.assertTrue(np.array_equal(to_keep, expected))
コード例 #3
0
    def test_merge_journalled_entries(self):
        old = np.asarray([0, 0, 0, 1, 1, 2, 3, 3, 5, 5, 5], dtype=np.int32)
        new = np.asarray([0, 2, 3, 4, 5, 6], dtype=np.int32)
        old_i, new_i = ops.ordered_generate_journalling_indices(old, new)

        old_data = np.asarray([0, 1, 2, 10, 11, 20, 30, 31, 50, 51, 52])
        new_data = np.asarray([2, 20, 31, 40, 52, 60])
        to_keep = np.zeros(len(new_i), dtype=bool)
        ops.compare_rows_for_journalling(old_i, new_i, old_data, new_data,
                                         to_keep)

        dest = np.zeros(len(old) + to_keep.sum(), dtype=old.dtype)
        ops.merge_journalled_entries(old_i, new_i, to_keep, old_data, new_data,
                                     dest)
        expected = np.asarray(
            [0, 1, 2, 10, 11, 20, 30, 31, 40, 50, 51, 52, 60], dtype=np.int32)
        self.assertTrue(np.array_equal(dest, expected))

        old_data = np.asarray([0, 1, 2, 10, 11, 20, 30, 31, 50, 51, 52])
        new_data = np.asarray([3, 21, 32, 40, 53, 60])
        to_keep = np.zeros(len(new_i), dtype=bool)
        ops.compare_rows_for_journalling(old_i, new_i, old_data, new_data,
                                         to_keep)

        dest = np.zeros(len(old) + to_keep.sum(), dtype=old.dtype)
        ops.merge_journalled_entries(old_i, new_i, to_keep, old_data, new_data,
                                     dest)
        expected = np.asarray(
            [0, 1, 2, 3, 10, 11, 20, 21, 30, 31, 32, 40, 50, 51, 52, 53, 60],
            dtype=np.int32)
        self.assertTrue(np.array_equal(dest, expected))
コード例 #4
0
 def test_ordered_generate_journalling_indices(self):
     old = np.asarray([0, 0, 0, 1, 1, 2, 3, 3, 5, 5, 5], dtype=np.int32)
     new = np.asarray([0, 2, 3, 4, 5, 6], dtype=np.int32)
     old_i, new_i = ops.ordered_generate_journalling_indices(old, new)
     old_expected = np.asarray([2, 4, 5, 7, -1, 10, -1], dtype=np.int32)
     self.assertTrue(np.array_equal(old_i, old_expected))
     new_expected = np.asarray([0, -1, 1, 2, 3, 4, 5], dtype=np.int32)
     self.assertTrue(np.array_equal(new_i, new_expected))
コード例 #5
0
ファイル: journal.py プロジェクト: clyyuanzi-london/ExeTera
def journal_table(session, schema, old_src, new_src, src_pk, result):
    old_keys = set(old_src.keys())
    new_keys = set(new_src.keys())

    common_keys = old_keys.intersection(new_keys)
    common_keys.remove('j_valid_from')
    common_keys.remove('j_valid_to')
    old_only_keys = old_keys.difference(new_keys)
    new_only_keys = new_keys.difference(old_keys)

    with utils.Timer("sorting old ids"):
        old_ids = session.get(old_src[src_pk])
        old_ids_ = old_ids.data[:]
        old_ids_valid_from = session.get(old_src['j_valid_from']).data[:]
        old_sorted_index = session.dataset_sort_index((old_ids_, old_ids_valid_from))
    old_count = len(old_ids_)

    with utils.Timer("sorting new_ids"):
        new_ids_ = session.get(new_src[src_pk]).data[:]
        new_sorted_index = session.dataset_sort_index((new_ids_,))
    new_count = len(new_ids_)

    # print("old_ids:", old_ids_[old_sorted_index[:20]])
    # print("new_ids:", new_ids_[new_sorted_index[:20]])

    # get the row maps for rows that we need to compare
    with utils.Timer("generating row_maps for merging"):
        old_ids_ = old_ids_[old_sorted_index]
        new_ids_ = new_ids_[new_sorted_index]
        old_map, new_map = ops.ordered_generate_journalling_indices(old_ids_, new_ids_)

    to_keep = np.zeros(len(old_map), dtype=np.bool)

    schema_fields = schema.fields.keys()
    common_keys = [k for k in schema_fields if k in common_keys]
    print("old_map:", old_map)
    print("new_map:", new_map)

    for k in common_keys:
        if k in (src_pk, 'j_valid_from', 'j_valid_to'):
            continue
        old_f = session.get(old_src[k])
        new_f = session.get(new_src[k])
        print(k)
        if isinstance(old_f, flds.IndexedStringField):
            old_f_i_, old_f_v_ = session.apply_index(old_sorted_index, old_f)
            new_f_i_, new_f_v_ = session.apply_index(new_sorted_index, new_f)
            ops.compare_indexed_rows_for_journalling(old_map, new_map,
                                                     old_f_i_, old_f_v_, new_f_i_, new_f_v_,
                                                     to_keep)
        else:
            old_f_ = session.apply_index(old_sorted_index, old_f)
            new_f_ = session.apply_index(new_sorted_index, new_f)
            ops.compare_rows_for_journalling(old_map, new_map, old_f_, new_f_, to_keep)

        print("to_keep:", to_keep.astype(np.uint8))
        print(to_keep.sum(), len(to_keep))

    merged_length = len(old_ids.data) + to_keep.sum()

    only_in_old = 0
    only_in_new = 0
    not_updated = 0
    updated = 0
    for i in range(len(old_map)):
        if old_map[i] == -1:
            only_in_new += 1
        if new_map[i] == -1:
            only_in_old += 1
        if (old_map[i] != -1) and (to_keep[i] == True):
            updated += 1
        if (new_map[i] != -1) and (to_keep[i] == False):
            not_updated += 1

    for k in common_keys:
        if k in (src_pk, 'j_valid_from', 'j_valid_to'):
            continue
        old_f = session.get(old_src[k])
        new_f = session.get(new_src[k])
        print(k)
        if isinstance(old_f, flds.IndexedStringField):
            old_f_i_, old_f_v_ = session.apply_index(old_sorted_index, old_f)
            new_f_i_, new_f_v_ = session.apply_index(new_sorted_index, new_f)
            dest_i_ = np.zeros(merged_length + 1, old_f_i_.dtype)
            val_count = ops.merge_indexed_journalled_entries_count(old_map, new_map, to_keep,
                                                                   old_f_i_, new_f_i_)
            dest_v_ = np.zeros(val_count, old_f_v_.dtype)
            ops.merge_indexed_journalled_entries(old_map, new_map, to_keep,
                                                 old_f_i_, old_f_v_, new_f_i_, new_f_v_,
                                                 dest_i_, dest_v_)
            dest_f = new_f.create_like(result, k)
            dest_f.indices.write(dest_i_)
            dest_f.values.write(dest_v_)

        else:
            old_f_v_ = session.apply_index(old_sorted_index, old_f)
            new_f_v_ = session.apply_index(new_sorted_index, new_f)
            dest_ = np.zeros(merged_length, old_f_v_.dtype)
            ops.merge_journalled_entries(old_map, new_map, to_keep, old_f_v_, new_f_v_, dest_)
            dest_f = new_f.create_like(result, k)
            dest_f.data.write(dest_)

    print("old_count:", old_count)
    print("new_count:", new_count)
    print("only in old:", only_in_old)
    print("only in new:", only_in_new)
    print("updated:", updated)
    print("not updated:", not_updated)
    print("post journal count:", merged_length)