def test_dump_to_directory_one_entity(use_testdb, dumpdir):
    pd = PonyDump(testdb)
    _fill_testdb()

    dump_statuses = [
        {'current': 1, 'count': 2, 'pk': (1,), 'entity': 'many1', 'chunk_size': 250},
        {'current': 2, 'count': 2, 'pk': (2,), 'entity': 'many1', 'chunk_size': 250},
        {'current': 2, 'count': 2, 'pk': None, 'entity': 'many1', 'chunk_size': 250},

        {'entity': None, 'path': None, 'current': 0, 'count': 0, 'pk': None},  # Всегда последний
    ]

    filenames = ['many1_dump.jsonl']

    for status in pd.dump_to_directory(dumpdir, entities=['many1']):
        path = status.pop('path')

        if path is not None:
            assert os.path.split(path)[1] in filenames
            assert os.path.isfile(path)
        else:
            status['path'] = path  # Для проверки последнего элемента

        assert status in dump_statuses
        dump_statuses.remove(status)

        if path is None:
            assert not dump_statuses

    assert not dump_statuses
Beispiel #2
0
def test_dump_to_directory_one_entity(use_testdb, dumpdir):
    pd = PonyDump(testdb)
    _fill_testdb()

    dump_statuses = [
        {
            'current': 1,
            'count': 2,
            'pk': (1, ),
            'entity': 'many1',
            'chunk_size': 250
        },
        {
            'current': 2,
            'count': 2,
            'pk': (2, ),
            'entity': 'many1',
            'chunk_size': 250
        },
        {
            'current': 2,
            'count': 2,
            'pk': None,
            'entity': 'many1',
            'chunk_size': 250
        },
        {
            'entity': None,
            'path': None,
            'current': 0,
            'count': 0,
            'pk': None
        },  # Всегда последний
    ]

    filenames = ['many1_dump.jsonl']

    for status in pd.dump_to_directory(dumpdir, entities=['many1']):
        path = status.pop('path')

        if path is not None:
            assert os.path.split(path)[1] in filenames
            assert os.path.isfile(path)
        else:
            status['path'] = path  # Для проверки последнего элемента

        assert status in dump_statuses
        dump_statuses.remove(status)

        if path is None:
            assert not dump_statuses

    assert not dump_statuses
def test_dump_to_directory_exclude_attrs(use_testdb, dumpdir):
    pd = PonyDump(testdb, dict_params={
        'many1': {'exclude': 'many2'},
    })
    _fill_testdb()

    for _ in pd.dump_to_directory(dumpdir):
        pass

    _check_dump_content(dumpdir, 'many1_dump.jsonl', [
        '{"_entity": "many1", "id": 1}\n',
        '{"_entity": "many1", "id": 2}\n',
    ])
Beispiel #4
0
def test_dump_to_directory_exclude_attrs(use_testdb, dumpdir):
    pd = PonyDump(testdb, dict_params={
        'many1': {
            'exclude': 'many2'
        },
    })
    _fill_testdb()

    for _ in pd.dump_to_directory(dumpdir):
        pass

    _check_dump_content(dumpdir, 'many1_dump.jsonl', [
        '{"_entity": "many1", "id": 1}\n',
        '{"_entity": "many1", "id": 2}\n',
    ])
Beispiel #5
0
def test_dump_to_directory_full(use_testdb, dumpdir, gzip_compression):
    pd = PonyDump(testdb)
    _fill_testdb()

    # Порядок дампа строго не определён
    dump_statuses = [
        {
            'current': 1,
            'count': 2,
            'pk': (1, ),
            'entity': 'many1',
            'chunk_size': 250
        },
        {
            'current': 2,
            'count': 2,
            'pk': (2, ),
            'entity': 'many1',
            'chunk_size': 250
        },
        {
            'current': 2,
            'count': 2,
            'pk': None,
            'entity': 'many1',
            'chunk_size': 250
        },
        {
            'current': 1,
            'count': 2,
            'pk': (1, ),
            'entity': 'many2',
            'chunk_size': 250
        },
        {
            'current': 2,
            'count': 2,
            'pk': (2, ),
            'entity': 'many2',
            'chunk_size': 250
        },
        {
            'current': 2,
            'count': 2,
            'pk': None,
            'entity': 'many2',
            'chunk_size': 250
        },
        {
            'current': 1,
            'count': 3,
            'pk': (1, 2, 3),
            'entity': 'pdtest1',
            'chunk_size': 250
        },
        {
            'current': 1,
            'count': 3,
            'pk': (1, 2, 3),
            'entity': 'pdtest1',
            'chunk_size': 250
        },
        {
            'current': 3,
            'count': 3,
            'pk': (4, 5, 5),
            'entity': 'pdtest1',
            'chunk_size': 250
        },
        {
            'current': 3,
            'count': 3,
            'pk': None,
            'entity': 'pdtest1',
            'chunk_size': 250
        },
        {
            'current': 1,
            'count': 3,
            'pk': (1, ),
            'entity': 'pdtest3',
            'chunk_size': 250
        },
        {
            'current': 3,
            'count': 3,
            'pk': (3, ),
            'entity': 'pdtest3',
            'chunk_size': 250
        },
        {
            'current': 3,
            'count': 3,
            'pk': None,
            'entity': 'pdtest3',
            'chunk_size': 250
        },
        {
            'current': 1,
            'count': 1,
            'pk': (1, ),
            'entity': 'pdtest2',
            'chunk_size': 250
        },
        {
            'current': 1,
            'count': 1,
            'pk': (1, ),
            'entity': 'pdtest2',
            'chunk_size': 250
        },
        {
            'current': 1,
            'count': 1,
            'pk': None,
            'entity': 'pdtest2',
            'chunk_size': 250
        },
        {
            'current': 0,
            'count': 0,
            'pk': None,
            'entity': 'pdtest4',
            'chunk_size': 250
        },
        {
            'entity': None,
            'path': None,
            'current': 0,
            'count': 0,
            'pk': None
        },  # Всегда последний
    ]

    filenames = [
        'pdtest1_dump.jsonl',
        'pdtest2_dump.jsonl',
        'pdtest3_dump.jsonl',
        'pdtest4_dump.jsonl',
        'many1_dump.jsonl',
        'many2_dump.jsonl',
    ]
    if gzip_compression:
        filenames = [x + '.gz' for x in filenames]
    filenames = set(filenames)

    # Проверка информацирования о процессе дампа
    for status in pd.dump_to_directory(dumpdir,
                                       gzip_compression=gzip_compression):
        path = status.pop('path')

        if path is not None:
            assert os.path.split(path)[1] in filenames
            assert os.path.isfile(path)
        else:
            status['path'] = path  # Для проверки последнего элемента

        assert status in dump_statuses
        dump_statuses.remove(status)

        if path is None:
            assert not dump_statuses

    assert not dump_statuses

    assert set(os.listdir(dumpdir)) == filenames

    # Проверка содержимого дампов:
    # - ключи у JSON отсортированы
    # - после запятых и двоеточий пробелы
    # - Файлы в UTF-8 без BOM
    # - перенос строк с помощью \n
    # - объекты внутри дампа отсортированы по первичным ключам
    _check_dump_content(dumpdir, 'pdtest1_dump.jsonl', [
        '{"_entity": "pdtest1", "k1": 1, "k2": 2, "k3": 3, "test2": 1, "test3": [1, 2], "test4": null}\n',
        '{"_entity": "pdtest1", "k1": 3, "k2": 5, "k3": 6, "test2": null, "test3": [], "test4": null}\n',
        '{"_entity": "pdtest1", "k1": 4, "k2": 5, "k3": 5, "test2": null, "test3": [], "test4": null}\n',
    ])

    _check_dump_content(dumpdir, 'pdtest2_dump.jsonl', [
        '{"_entity": "pdtest2", "id": 1, "test1": [1, 2, 3]}\n',
    ])

    _check_dump_content(dumpdir, 'pdtest3_dump.jsonl', [
        '{"_entity": "pdtest3", "foo_bool": true, "foo_datetime": "2017-06-01T01:02:03.999987Z", "foo_float": 0.30000000000000004, "foo_int": -4, "foo_longstr": "longstr", "foo_string": "foo\\u0000😊bar", "foo_uuid": "8e8cdc11-0785-43a8-8203-66c148c3f57c", "id": 1, "test1": [1, 2, 3]}\n',
        '{"_entity": "pdtest3", "foo_bool": false, "foo_datetime": "2017-06-01T01:02:03.999987Z", "foo_float": 0.30000000000000004, "foo_int": 4, "foo_longstr": "longstr", "foo_string": "foo\\u0000😊bar", "foo_uuid": "8e8cdc11-0785-43a8-8203-66c148c3f57c", "id": 2, "test1": [1, 2, 3]}\n',
        '{"_entity": "pdtest3", "foo_bool": null, "foo_datetime": "1970-01-01T00:00:00.000000Z", "foo_float": 0.0, "foo_int": 2147483647, "foo_longstr": "string", "foo_string": "string", "foo_uuid": "8e8cdc11-0785-43a8-8203-66c148c3f57c", "id": 3, "test1": null}\n',
    ])

    _check_dump_content(dumpdir, 'pdtest4_dump.jsonl', [''])

    _check_dump_content(dumpdir, 'many1_dump.jsonl', [
        '{"_entity": "many1", "id": 1, "many2": [1, 2]}\n',
        '{"_entity": "many1", "id": 2, "many2": [1, 2]}\n',
    ])

    _check_dump_content(dumpdir, 'many2_dump.jsonl', [
        '{"_entity": "many2", "id": 1, "many1": [1, 2]}\n',
        '{"_entity": "many2", "id": 2, "many1": [1, 2]}\n',
    ])
def test_dump_to_directory_full(use_testdb, dumpdir, gzip_compression):
    pd = PonyDump(testdb)
    _fill_testdb()

    # Порядок дампа строго не определён
    dump_statuses = [
        {'current': 1, 'count': 2, 'pk': (1,), 'entity': 'many1', 'chunk_size': 250},
        {'current': 2, 'count': 2, 'pk': (2,), 'entity': 'many1', 'chunk_size': 250},
        {'current': 2, 'count': 2, 'pk': None, 'entity': 'many1', 'chunk_size': 250},

        {'current': 1, 'count': 2, 'pk': (1,), 'entity': 'many2', 'chunk_size': 250},
        {'current': 2, 'count': 2, 'pk': (2,), 'entity': 'many2', 'chunk_size': 250},
        {'current': 2, 'count': 2, 'pk': None, 'entity': 'many2', 'chunk_size': 250},

        {'current': 1, 'count': 3, 'pk': (1, 2, 3), 'entity': 'pdtest1', 'chunk_size': 250},
        {'current': 1, 'count': 3, 'pk': (1, 2, 3), 'entity': 'pdtest1', 'chunk_size': 250},
        {'current': 3, 'count': 3, 'pk': (4, 5, 5), 'entity': 'pdtest1', 'chunk_size': 250},
        {'current': 3, 'count': 3, 'pk': None, 'entity': 'pdtest1', 'chunk_size': 250},

        {'current': 1, 'count': 3, 'pk': (1,), 'entity': 'pdtest3', 'chunk_size': 250},
        {'current': 3, 'count': 3, 'pk': (3,), 'entity': 'pdtest3', 'chunk_size': 250},
        {'current': 3, 'count': 3, 'pk': None, 'entity': 'pdtest3', 'chunk_size': 250},

        {'current': 1, 'count': 1, 'pk': (1,), 'entity': 'pdtest2', 'chunk_size': 250},
        {'current': 1, 'count': 1, 'pk': (1,), 'entity': 'pdtest2', 'chunk_size': 250},
        {'current': 1, 'count': 1, 'pk': None, 'entity': 'pdtest2', 'chunk_size': 250},

        {'current': 0, 'count': 0, 'pk': None, 'entity': 'pdtest4', 'chunk_size': 250},

        {'entity': None, 'path': None, 'current': 0, 'count': 0, 'pk': None},  # Всегда последний
    ]

    filenames = [
        'pdtest1_dump.jsonl',
        'pdtest2_dump.jsonl',
        'pdtest3_dump.jsonl',
        'pdtest4_dump.jsonl',
        'many1_dump.jsonl',
        'many2_dump.jsonl',
    ]
    if gzip_compression:
        filenames = [x + '.gz' for x in filenames]
    filenames = set(filenames)

    # Проверка информацирования о процессе дампа
    for status in pd.dump_to_directory(dumpdir, gzip_compression=gzip_compression):
        path = status.pop('path')

        if path is not None:
            assert os.path.split(path)[1] in filenames
            assert os.path.isfile(path)
        else:
            status['path'] = path  # Для проверки последнего элемента

        assert status in dump_statuses
        dump_statuses.remove(status)

        if path is None:
            assert not dump_statuses

    assert not dump_statuses

    assert set(os.listdir(dumpdir)) == filenames

    # Проверка содержимого дампов:
    # - ключи у JSON отсортированы
    # - после запятых и двоеточий пробелы
    # - Файлы в UTF-8 без BOM
    # - перенос строк с помощью \n
    # - объекты внутри дампа отсортированы по первичным ключам
    _check_dump_content(dumpdir, 'pdtest1_dump.jsonl', [
        '{"_entity": "pdtest1", "k1": 1, "k2": 2, "k3": 3, "test2": 1, "test3": [1, 2], "test4": null}\n',
        '{"_entity": "pdtest1", "k1": 3, "k2": 5, "k3": 6, "test2": null, "test3": [], "test4": null}\n',
        '{"_entity": "pdtest1", "k1": 4, "k2": 5, "k3": 5, "test2": null, "test3": [], "test4": null}\n',
    ])

    _check_dump_content(dumpdir, 'pdtest2_dump.jsonl', [
        '{"_entity": "pdtest2", "id": 1, "test1": [1, 2, 3]}\n',
    ])

    _check_dump_content(dumpdir, 'pdtest3_dump.jsonl', [
        '{"_entity": "pdtest3", "foo_bool": true, "foo_datetime": "2017-06-01T01:02:03.999987Z", "foo_float": 0.30000000000000004, "foo_int": -4, "foo_longstr": "longstr", "foo_string": "foo\\u0000😊bar", "foo_uuid": "8e8cdc11-0785-43a8-8203-66c148c3f57c", "id": 1, "test1": [1, 2, 3]}\n',
        '{"_entity": "pdtest3", "foo_bool": false, "foo_datetime": "2017-06-01T01:02:03.999987Z", "foo_float": 0.30000000000000004, "foo_int": 4, "foo_longstr": "longstr", "foo_string": "foo\\u0000😊bar", "foo_uuid": "8e8cdc11-0785-43a8-8203-66c148c3f57c", "id": 2, "test1": [1, 2, 3]}\n',
        '{"_entity": "pdtest3", "foo_bool": null, "foo_datetime": "1970-01-01T00:00:00.000000Z", "foo_float": 0.0, "foo_int": 2147483647, "foo_longstr": "string", "foo_string": "string", "foo_uuid": "8e8cdc11-0785-43a8-8203-66c148c3f57c", "id": 3, "test1": null}\n',
    ])

    _check_dump_content(dumpdir, 'pdtest4_dump.jsonl', [''])

    _check_dump_content(dumpdir, 'many1_dump.jsonl', [
        '{"_entity": "many1", "id": 1, "many2": [1, 2]}\n',
        '{"_entity": "many1", "id": 2, "many2": [1, 2]}\n',
    ])

    _check_dump_content(dumpdir, 'many2_dump.jsonl', [
        '{"_entity": "many2", "id": 1, "many1": [1, 2]}\n',
        '{"_entity": "many2", "id": 2, "many1": [1, 2]}\n',
    ])