def test_dump_to_directory_one_entity(use_testdb, dumpdir): pd = PonyDump(testdb) _fill_testdb() dump_statuses = [ {'current': 1, 'count': 2, 'pk': (1,), 'entity': 'many1', 'chunk_size': 250}, {'current': 2, 'count': 2, 'pk': (2,), 'entity': 'many1', 'chunk_size': 250}, {'current': 2, 'count': 2, 'pk': None, 'entity': 'many1', 'chunk_size': 250}, {'entity': None, 'path': None, 'current': 0, 'count': 0, 'pk': None}, # Всегда последний ] filenames = ['many1_dump.jsonl'] for status in pd.dump_to_directory(dumpdir, entities=['many1']): path = status.pop('path') if path is not None: assert os.path.split(path)[1] in filenames assert os.path.isfile(path) else: status['path'] = path # Для проверки последнего элемента assert status in dump_statuses dump_statuses.remove(status) if path is None: assert not dump_statuses assert not dump_statuses
def test_dump_to_directory_one_entity(use_testdb, dumpdir): pd = PonyDump(testdb) _fill_testdb() dump_statuses = [ { 'current': 1, 'count': 2, 'pk': (1, ), 'entity': 'many1', 'chunk_size': 250 }, { 'current': 2, 'count': 2, 'pk': (2, ), 'entity': 'many1', 'chunk_size': 250 }, { 'current': 2, 'count': 2, 'pk': None, 'entity': 'many1', 'chunk_size': 250 }, { 'entity': None, 'path': None, 'current': 0, 'count': 0, 'pk': None }, # Всегда последний ] filenames = ['many1_dump.jsonl'] for status in pd.dump_to_directory(dumpdir, entities=['many1']): path = status.pop('path') if path is not None: assert os.path.split(path)[1] in filenames assert os.path.isfile(path) else: status['path'] = path # Для проверки последнего элемента assert status in dump_statuses dump_statuses.remove(status) if path is None: assert not dump_statuses assert not dump_statuses
def test_dump_to_directory_exclude_attrs(use_testdb, dumpdir): pd = PonyDump(testdb, dict_params={ 'many1': {'exclude': 'many2'}, }) _fill_testdb() for _ in pd.dump_to_directory(dumpdir): pass _check_dump_content(dumpdir, 'many1_dump.jsonl', [ '{"_entity": "many1", "id": 1}\n', '{"_entity": "many1", "id": 2}\n', ])
def test_dump_to_directory_exclude_attrs(use_testdb, dumpdir): pd = PonyDump(testdb, dict_params={ 'many1': { 'exclude': 'many2' }, }) _fill_testdb() for _ in pd.dump_to_directory(dumpdir): pass _check_dump_content(dumpdir, 'many1_dump.jsonl', [ '{"_entity": "many1", "id": 1}\n', '{"_entity": "many1", "id": 2}\n', ])
def test_dump_to_directory_full(use_testdb, dumpdir, gzip_compression): pd = PonyDump(testdb) _fill_testdb() # Порядок дампа строго не определён dump_statuses = [ { 'current': 1, 'count': 2, 'pk': (1, ), 'entity': 'many1', 'chunk_size': 250 }, { 'current': 2, 'count': 2, 'pk': (2, ), 'entity': 'many1', 'chunk_size': 250 }, { 'current': 2, 'count': 2, 'pk': None, 'entity': 'many1', 'chunk_size': 250 }, { 'current': 1, 'count': 2, 'pk': (1, ), 'entity': 'many2', 'chunk_size': 250 }, { 'current': 2, 'count': 2, 'pk': (2, ), 'entity': 'many2', 'chunk_size': 250 }, { 'current': 2, 'count': 2, 'pk': None, 'entity': 'many2', 'chunk_size': 250 }, { 'current': 1, 'count': 3, 'pk': (1, 2, 3), 'entity': 'pdtest1', 'chunk_size': 250 }, { 'current': 1, 'count': 3, 'pk': (1, 2, 3), 'entity': 'pdtest1', 'chunk_size': 250 }, { 'current': 3, 'count': 3, 'pk': (4, 5, 5), 'entity': 'pdtest1', 'chunk_size': 250 }, { 'current': 3, 'count': 3, 'pk': None, 'entity': 'pdtest1', 'chunk_size': 250 }, { 'current': 1, 'count': 3, 'pk': (1, ), 'entity': 'pdtest3', 'chunk_size': 250 }, { 'current': 3, 'count': 3, 'pk': (3, ), 'entity': 'pdtest3', 'chunk_size': 250 }, { 'current': 3, 'count': 3, 'pk': None, 'entity': 'pdtest3', 'chunk_size': 250 }, { 'current': 1, 'count': 1, 'pk': (1, ), 'entity': 'pdtest2', 'chunk_size': 250 }, { 'current': 1, 'count': 1, 'pk': (1, ), 'entity': 'pdtest2', 'chunk_size': 250 }, { 'current': 1, 'count': 1, 'pk': None, 'entity': 'pdtest2', 'chunk_size': 250 }, { 'current': 0, 'count': 0, 'pk': None, 'entity': 'pdtest4', 'chunk_size': 250 }, { 'entity': None, 'path': None, 'current': 0, 'count': 0, 'pk': None }, # Всегда последний ] filenames = [ 'pdtest1_dump.jsonl', 'pdtest2_dump.jsonl', 'pdtest3_dump.jsonl', 'pdtest4_dump.jsonl', 'many1_dump.jsonl', 'many2_dump.jsonl', ] if gzip_compression: filenames = [x + '.gz' for x in filenames] filenames = set(filenames) # Проверка информацирования о процессе дампа for status in pd.dump_to_directory(dumpdir, gzip_compression=gzip_compression): path = status.pop('path') if path is not None: assert os.path.split(path)[1] in filenames assert os.path.isfile(path) else: status['path'] = path # Для проверки последнего элемента assert status in dump_statuses dump_statuses.remove(status) if path is None: assert not dump_statuses assert not dump_statuses assert set(os.listdir(dumpdir)) == filenames # Проверка содержимого дампов: # - ключи у JSON отсортированы # - после запятых и двоеточий пробелы # - Файлы в UTF-8 без BOM # - перенос строк с помощью \n # - объекты внутри дампа отсортированы по первичным ключам _check_dump_content(dumpdir, 'pdtest1_dump.jsonl', [ '{"_entity": "pdtest1", "k1": 1, "k2": 2, "k3": 3, "test2": 1, "test3": [1, 2], "test4": null}\n', '{"_entity": "pdtest1", "k1": 3, "k2": 5, "k3": 6, "test2": null, "test3": [], "test4": null}\n', '{"_entity": "pdtest1", "k1": 4, "k2": 5, "k3": 5, "test2": null, "test3": [], "test4": null}\n', ]) _check_dump_content(dumpdir, 'pdtest2_dump.jsonl', [ '{"_entity": "pdtest2", "id": 1, "test1": [1, 2, 3]}\n', ]) _check_dump_content(dumpdir, 'pdtest3_dump.jsonl', [ '{"_entity": "pdtest3", "foo_bool": true, "foo_datetime": "2017-06-01T01:02:03.999987Z", "foo_float": 0.30000000000000004, "foo_int": -4, "foo_longstr": "longstr", "foo_string": "foo\\u0000😊bar", "foo_uuid": "8e8cdc11-0785-43a8-8203-66c148c3f57c", "id": 1, "test1": [1, 2, 3]}\n', '{"_entity": "pdtest3", "foo_bool": false, "foo_datetime": "2017-06-01T01:02:03.999987Z", "foo_float": 0.30000000000000004, "foo_int": 4, "foo_longstr": "longstr", "foo_string": "foo\\u0000😊bar", "foo_uuid": "8e8cdc11-0785-43a8-8203-66c148c3f57c", "id": 2, "test1": [1, 2, 3]}\n', '{"_entity": "pdtest3", "foo_bool": null, "foo_datetime": "1970-01-01T00:00:00.000000Z", "foo_float": 0.0, "foo_int": 2147483647, "foo_longstr": "string", "foo_string": "string", "foo_uuid": "8e8cdc11-0785-43a8-8203-66c148c3f57c", "id": 3, "test1": null}\n', ]) _check_dump_content(dumpdir, 'pdtest4_dump.jsonl', ['']) _check_dump_content(dumpdir, 'many1_dump.jsonl', [ '{"_entity": "many1", "id": 1, "many2": [1, 2]}\n', '{"_entity": "many1", "id": 2, "many2": [1, 2]}\n', ]) _check_dump_content(dumpdir, 'many2_dump.jsonl', [ '{"_entity": "many2", "id": 1, "many1": [1, 2]}\n', '{"_entity": "many2", "id": 2, "many1": [1, 2]}\n', ])
def test_dump_to_directory_full(use_testdb, dumpdir, gzip_compression): pd = PonyDump(testdb) _fill_testdb() # Порядок дампа строго не определён dump_statuses = [ {'current': 1, 'count': 2, 'pk': (1,), 'entity': 'many1', 'chunk_size': 250}, {'current': 2, 'count': 2, 'pk': (2,), 'entity': 'many1', 'chunk_size': 250}, {'current': 2, 'count': 2, 'pk': None, 'entity': 'many1', 'chunk_size': 250}, {'current': 1, 'count': 2, 'pk': (1,), 'entity': 'many2', 'chunk_size': 250}, {'current': 2, 'count': 2, 'pk': (2,), 'entity': 'many2', 'chunk_size': 250}, {'current': 2, 'count': 2, 'pk': None, 'entity': 'many2', 'chunk_size': 250}, {'current': 1, 'count': 3, 'pk': (1, 2, 3), 'entity': 'pdtest1', 'chunk_size': 250}, {'current': 1, 'count': 3, 'pk': (1, 2, 3), 'entity': 'pdtest1', 'chunk_size': 250}, {'current': 3, 'count': 3, 'pk': (4, 5, 5), 'entity': 'pdtest1', 'chunk_size': 250}, {'current': 3, 'count': 3, 'pk': None, 'entity': 'pdtest1', 'chunk_size': 250}, {'current': 1, 'count': 3, 'pk': (1,), 'entity': 'pdtest3', 'chunk_size': 250}, {'current': 3, 'count': 3, 'pk': (3,), 'entity': 'pdtest3', 'chunk_size': 250}, {'current': 3, 'count': 3, 'pk': None, 'entity': 'pdtest3', 'chunk_size': 250}, {'current': 1, 'count': 1, 'pk': (1,), 'entity': 'pdtest2', 'chunk_size': 250}, {'current': 1, 'count': 1, 'pk': (1,), 'entity': 'pdtest2', 'chunk_size': 250}, {'current': 1, 'count': 1, 'pk': None, 'entity': 'pdtest2', 'chunk_size': 250}, {'current': 0, 'count': 0, 'pk': None, 'entity': 'pdtest4', 'chunk_size': 250}, {'entity': None, 'path': None, 'current': 0, 'count': 0, 'pk': None}, # Всегда последний ] filenames = [ 'pdtest1_dump.jsonl', 'pdtest2_dump.jsonl', 'pdtest3_dump.jsonl', 'pdtest4_dump.jsonl', 'many1_dump.jsonl', 'many2_dump.jsonl', ] if gzip_compression: filenames = [x + '.gz' for x in filenames] filenames = set(filenames) # Проверка информацирования о процессе дампа for status in pd.dump_to_directory(dumpdir, gzip_compression=gzip_compression): path = status.pop('path') if path is not None: assert os.path.split(path)[1] in filenames assert os.path.isfile(path) else: status['path'] = path # Для проверки последнего элемента assert status in dump_statuses dump_statuses.remove(status) if path is None: assert not dump_statuses assert not dump_statuses assert set(os.listdir(dumpdir)) == filenames # Проверка содержимого дампов: # - ключи у JSON отсортированы # - после запятых и двоеточий пробелы # - Файлы в UTF-8 без BOM # - перенос строк с помощью \n # - объекты внутри дампа отсортированы по первичным ключам _check_dump_content(dumpdir, 'pdtest1_dump.jsonl', [ '{"_entity": "pdtest1", "k1": 1, "k2": 2, "k3": 3, "test2": 1, "test3": [1, 2], "test4": null}\n', '{"_entity": "pdtest1", "k1": 3, "k2": 5, "k3": 6, "test2": null, "test3": [], "test4": null}\n', '{"_entity": "pdtest1", "k1": 4, "k2": 5, "k3": 5, "test2": null, "test3": [], "test4": null}\n', ]) _check_dump_content(dumpdir, 'pdtest2_dump.jsonl', [ '{"_entity": "pdtest2", "id": 1, "test1": [1, 2, 3]}\n', ]) _check_dump_content(dumpdir, 'pdtest3_dump.jsonl', [ '{"_entity": "pdtest3", "foo_bool": true, "foo_datetime": "2017-06-01T01:02:03.999987Z", "foo_float": 0.30000000000000004, "foo_int": -4, "foo_longstr": "longstr", "foo_string": "foo\\u0000😊bar", "foo_uuid": "8e8cdc11-0785-43a8-8203-66c148c3f57c", "id": 1, "test1": [1, 2, 3]}\n', '{"_entity": "pdtest3", "foo_bool": false, "foo_datetime": "2017-06-01T01:02:03.999987Z", "foo_float": 0.30000000000000004, "foo_int": 4, "foo_longstr": "longstr", "foo_string": "foo\\u0000😊bar", "foo_uuid": "8e8cdc11-0785-43a8-8203-66c148c3f57c", "id": 2, "test1": [1, 2, 3]}\n', '{"_entity": "pdtest3", "foo_bool": null, "foo_datetime": "1970-01-01T00:00:00.000000Z", "foo_float": 0.0, "foo_int": 2147483647, "foo_longstr": "string", "foo_string": "string", "foo_uuid": "8e8cdc11-0785-43a8-8203-66c148c3f57c", "id": 3, "test1": null}\n', ]) _check_dump_content(dumpdir, 'pdtest4_dump.jsonl', ['']) _check_dump_content(dumpdir, 'many1_dump.jsonl', [ '{"_entity": "many1", "id": 1, "many2": [1, 2]}\n', '{"_entity": "many1", "id": 2, "many2": [1, 2]}\n', ]) _check_dump_content(dumpdir, 'many2_dump.jsonl', [ '{"_entity": "many2", "id": 1, "many1": [1, 2]}\n', '{"_entity": "many2", "id": 2, "many1": [1, 2]}\n', ])