def test_load_breakpoint_continue(): """ movie数据集 断点续传 """ # 进行导入错误停止错误停止 cmd_error = "%s/bin/hugegraph-loader.sh -h %s -p %d -g %s -f %s -s %s " \ "--clear-all-data true " \ "--check-vertex true " res_error = InsertData(cmd_error, schema='schema_movie.groovy', struct='struct_movie.json', dir='movie').load_graph() res_error.communicate() # 进行断点续传 cmd = "%s/bin/hugegraph-loader.sh -h %s -p %d -g %s -f %s -s %s " \ "--incremental-mode true " res = InsertData(cmd, schema='schema_movie.groovy', struct='struct_movie.json', dir='movie').load_graph() res.communicate() res_assert = InsertData().loader_assert() print(res_assert) assert res.returncode == 0 assert res_assert[0] == 49015
def test_load_error_file_reload(): """ movie数据集 进行错误文件导入 """ # 进行导入错误文件的生成 cmd_error = "%s/bin/hugegraph-loader.sh -h %s -p %d -g %s -f %s -s %s " \ "--clear-all-data true " \ "--check-basic_operation true " res_error = InsertData(cmd_error, schema='schema_movie.groovy', struct='struct_movie.json', dir='movie').load_graph() res_error.communicate() # 进行错误文件的导入 cmd = "%s/bin/hugegraph-loader.sh -h %s -p %d -g %s -f %s -s %s " \ "--clear-all-data true " \ "--check-basic_operation true " \ "--failure-mode true" res = InsertData(cmd, schema='schema_movie.groovy', struct='struct_movie.json', dir='movie').load_graph() res.communicate() res_assert = InsertData().loader_assert() print(res_assert) assert res.returncode == 0 assert res_assert[0] > 500 assert res_assert[1] > 500
def test_ttl_use_loader(self): """ ttl + loader导入 """ print(_cfg.server_backend) # 插入设置ttl的数据 顶点:艺人->ttl=5s;边:属于->ttl=5s gremlin = "graph.schema().propertyKey('名称').asText().ifNotExist().create();" \ "graph.schema().propertyKey('类型').asText().valueSet().ifNotExist().create();" \ "graph.schema().propertyKey('发行时间').asDate().ifNotExist().create();" \ "graph.schema().propertyKey('演员').asText().ifNotExist().create();" \ "graph.schema().vertexLabel('电影').useCustomizeStringId()" \ ".properties('名称','类型','发行时间').ifNotExist().create();" \ "graph.schema().vertexLabel('艺人').useCustomizeStringId().properties('演员')" \ ".ttl(5000L).ifNotExist().create();" \ "graph.schema().vertexLabel('类型').useCustomizeStringId().properties('类型').ifNotExist().create();" \ "graph.schema().vertexLabel('年份').useCustomizeStringId().properties('发行时间').ifNotExist().create();" \ "graph.schema().edgeLabel('导演').link('艺人','电影').ifNotExist().create();" \ "graph.schema().edgeLabel('演出').link('艺人','电影').ifNotExist().create();" \ "graph.schema().edgeLabel('属于').link('电影','类型').properties('发行时间')" \ ".ttl(5000L).ifNotExist().create();" \ "graph.schema().edgeLabel('发行于').link('电影','年份').properties('发行时间').ifNotExist().create();" code, res_gremlin = Gremlin().gremlin_post(gremlin, auth=auth) print(code, res_gremlin) assert 200 assert res_gremlin['result']['data'][0]['name'] == '发行于' cmd = "%s/bin/hugegraph-loader.sh -h %s -p %d -g %s -f %s " res = InsertData(cmd, struct='struct_movie.json', dir='movie').load_graph() res.communicate() # stdout, stderr = res.communicate() # print(' ---> ' + str(stdout) + ' === ' + str(stderr)) # 进行查询 线程休眠时间大于ttl的设置时间 time.sleep(30) # 断言 code, res = Gremlin().gremlin_post("g.V().count();", auth=auth) assert code == 200 assert res['result']['data'][ 0] != 16034 # rocksdb后端中的设置ttl,进行count()操作有bug code, res = Gremlin().gremlin_post("g.E().count();", auth=auth) assert code == 200 assert res['result']['data'][ 0] == 83809 # rocksdb后端中的设置ttl,进行count()操作有bug code, res = Gremlin().gremlin_post("g.V('吴宇森');", auth=auth) assert code == 200 assert res['result']['data'] == [] code, res = Gremlin().gremlin_post("g.E('S铁汉柔情 > 3 >> S动作');", auth=auth) assert code == 200 assert res['result']['data'] == []
def test_load_set_max_lines(): """ network数据集 插入最大条数限制 """ cmd = "%s/bin/hugegraph-loader.sh -h %s -p %d -g %s -f %s -s %s " \ "--clear-all-data true " \ "--max-read-lines 10000000 " res = InsertData(cmd, schema='schema_checkVertex.groovy', struct='struct_checkVertex.json', dir='network') \ .load_graph() res.communicate() res_assert = InsertData().loader_assert() print(res_assert) assert res.returncode == 0 assert res_assert[0] == 1000 assert res_assert[1] == 15156
def test_load_set_single_concurrent(): """ network数据集 设置单条导入并发数 """ cmd = "%s/bin/hugegraph-loader.sh -h %s -p %d -g %s -f %s -s %s " \ "--clear-all-data true " \ "--single-insert-threads 5 " res = InsertData(cmd, schema='schema_checkVertex.groovy', struct='struct_checkVertex.json', dir='network') \ .load_graph() res.communicate() res_assert = InsertData().loader_assert() print(res_assert) assert res.returncode == 0 assert res_assert[0] == 1000 assert res_assert[1] == 15156
def test_load_network_retry_times(): """ network数据集 导入时候发生特定异常进行重试次数设置 """ cmd = "%s/bin/hugegraph-loader.sh -h %s -p %d -g %s -f %s -s %s " \ "--clear-all-data true " \ "--retry-times 10 " res = InsertData(cmd, schema='schema_checkVertex.groovy', struct='struct_checkVertex.json', dir='network') \ .load_graph() res.communicate() res_assert = InsertData().loader_assert() print(res_assert) assert res.returncode == 0 assert res_assert[0] == 1000 assert res_assert[1] == 15156
def test_load_hlm_batch_insert_threads(): """ hlm数据集 设置批量导入并发数 """ cmd = "%s/bin/hugegraph-loader.sh -h %s -p %d -g %s -f %s -s %s " \ "--clear-all-data true " \ "--batch-insert-threads 50 " res = InsertData(cmd, schema='schema_hlm.groovy', struct='struct_hlm.json', dir='hlm').load_graph() res.communicate() res_assert = InsertData().loader_assert() print(res_assert) assert res.returncode == 0 assert res_assert[0] == 41 assert res_assert[1] == 51
def test_load_hlm_max_read_lines(): """ hlm数据集 插入最大条数限制 """ cmd = "%s/bin/hugegraph-loader.sh -h %s -p %d -g %s -f %s -s %s " \ "--clear-all-data true " \ "--max-read-lines 10000000 " res = InsertData(cmd, schema='schema_hlm.groovy', struct='struct_hlm.json', dir='hlm').load_graph() res.communicate() res_assert = InsertData().loader_assert() print(res_assert) assert res.returncode == 0 assert res_assert[0] == 41 assert res_assert[1] == 51
def test_load_single_insert_threads(): """ movie数据集 设置单条导入并发数 """ cmd = "%s/bin/hugegraph-loader.sh -h %s -p %d -g %s -f %s -s %s " \ "--clear-all-data true " \ "--single-insert-threads 5 " res = InsertData(cmd, schema='schema_movie.groovy', struct='struct_movie.json', dir='movie').load_graph() res.communicate() res_assert = InsertData().loader_assert() print(res_assert) assert res.returncode == 0 assert res_assert[0] == 49015 assert res_assert[1] == 117356
def test_load_dry_run(): """ movie数据集 只解析不导入模式 """ cmd = "%s/bin/hugegraph-loader.sh -h %s -p %d -g %s -f %s -s %s " \ "--clear-all-data true " \ "--dry-run true " res = InsertData(cmd, schema='schema_movie.groovy', struct='struct_movie.json', dir='movie').load_graph() res.communicate() res_assert = InsertData().loader_assert() print(res_assert) assert res.returncode == 0 assert res_assert[0] == 0 assert res_assert[1] == 0
def test_load_batch_size(): """ movie数据集 导入每个批次包含的条数 """ cmd = "%s/bin/hugegraph-loader.sh -h %s -p %d -g %s -f %s -s %s " \ "--clear-all-data true " \ "--batch-size 500 " res = InsertData(cmd, schema='schema_movie.groovy', struct='struct_movie.json', dir='movie').load_graph() res.communicate() res_assert = InsertData().loader_assert() print(res_assert) assert res.returncode == 0 assert res_assert[0] == 49015 assert res_assert[1] == 117356
def test_load_movie_max_parse_errors(): """ movie数据集 最大解析错误条数 """ cmd = "%s/bin/hugegraph-loader.sh -h %s -p %d -g %s -f %s -s %s " \ "--clear-all-data true " \ "--max-parse-errors 5000 " res = InsertData(cmd, schema='schema_movie.groovy', struct='struct_movie.json', dir='movie').load_graph() res.communicate() res_assert = InsertData().loader_assert() print(res_assert) assert res.returncode == 0 assert res_assert[0] == 49015 assert res_assert[1] == 117356
def test_load_help_message(): """ movie数据集 查看help信息 """ cmd = "%s/bin/hugegraph-loader.sh -h %s -p %d -g %s -f %s -s %s " \ "--help " res = InsertData(cmd, schema='schema_movie.groovy', struct='struct_movie.json', dir='movie').load_graph() # res.communicate() stdout, stderr = res.communicate() print(str(stdout) + '\n' + str(stderr)) res_assert = InsertData().loader_assert() print(res_assert) assert res.returncode == 0 assert str(stdout, 'utf-8').startswith("Usage: <main class> [options]")
def test_load_hlm_retry_times(): """ hlm数据集 导入时候发生特定异常进行重试次数设置 """ cmd = "%s/bin/hugegraph-loader.sh -h %s -p %d -g %s -f %s -s %s " \ "--clear-all-data true " \ "--retry-times 10 " res = InsertData(cmd, schema='schema_hlm.groovy', struct='struct_hlm.json', dir='hlm').load_graph() res.communicate() res_assert = InsertData().loader_assert() print(res_assert) assert res.returncode == 0 assert res_assert[0] == 41 assert res_assert[1] == 51
def test_load_network_check_vertex(): """ network数据集 清空图导入 & 检查顶点 """ cmd = "%s/bin/hugegraph-loader.sh -h %s -p %d -g %s -f %s -s %s " \ "--clear-all-data true " \ "--check-basic_operation true " res = InsertData(cmd, schema='schema_checkVertex.groovy', struct='struct_checkVertex.json', dir='network') \ .load_graph() res.communicate() # stdout, stderr = res.communicate() # print(str(stdout) + '\n' + str(stderr)) res_assert = InsertData().loader_assert() print(res_assert) assert res.returncode == 0 assert res_assert[0] == 1000 assert res_assert[1] == 15156
def test_load_retry_interval(): """ movie数据集 重试时间间隔(s) """ cmd = "%s/bin/hugegraph-loader.sh -h %s -p %d -g %s -f %s -s %s " \ "x " \ "--retry-times 10 " \ "--retry-interval 10 " res = InsertData(cmd, schema='schema_movie.groovy', struct='struct_movie.json', dir='movie').load_graph() res.communicate() res_assert = InsertData().loader_assert() print(res_assert) assert res.returncode == 0 assert res_assert[0] == 49015 assert res_assert[1] == 117356
def test_load_clear_all_data(): """ movie数据集 清空图导入 """ cmd = "%s/bin/hugegraph-loader.sh -h %s -p %d -g %s -f %s -s %s " \ "--clear-all-data true " res = InsertData(part_cmd=cmd, schema='schema_movie.groovy', struct='struct_movie.json', dir='movie').load_graph() res.communicate() # stdout, stderr = res.communicate() # print(str(stdout) + '\n' + str(stderr)) res_assert = InsertData().loader_assert() print(res_assert) assert res.returncode == 0 assert res_assert[0] == 49015 assert res_assert[1] == 117356
def test_load_check_basic_operation(): """ hlm数据集 清空图导入 & 检查顶点 """ cmd = "%s/bin/hugegraph-loader.sh -h %s -p %d -g %s -f %s -s %s " \ "--clear-all-data true " \ "--check-basic_operation true " res = InsertData(cmd, schema='schema_hlm.groovy', struct='struct_hlm.json', dir='hlm').load_graph() res.communicate() # stdout, stderr = res.communicate() # print(str(stdout) + '\n' + str(stderr)) res_assert = InsertData().loader_assert() print(res_assert) assert res.returncode == 0 assert res_assert[0] == 41 assert res_assert[1] == 51
def setup_class(self): """ 测试类开始 """ if _cfg.server_backend == 'cassandra': clear_graph() else: Gremlin().gremlin_post('graph.truncateBackend();') # 适用gremlin语句进行truncate操作 InsertData(gremlin='gremlin_alg_03.txt').gremlin_graph()
def setup_class(self): """ 测试类开始 """ print('++++++++++++++++ start ') cmd = "%s/bin/hugegraph-loader.sh -h %s -p %d -g %s -f %s -s %s " \ "--clear-all-data true " InsertData(part_cmd=cmd, schema='schema_checkVertex.groovy', struct='struct_checkVertex.json', dir='network').load_graph()
def test_load_close_print(): """ movie数据集 关闭动态打印信息 """ cmd = "%s/bin/hugegraph-loader.sh -h %s -p %d -g %s -f %s -s %s " \ "--clear-all-data true " \ "--print-progress false " res = InsertData(cmd, schema='schema_movie.groovy', struct='struct_movie.json', dir='movie').load_graph() # res.communicate() stdout, stderr = res.communicate() print(str(stdout) + '\n' + str(stderr)) res_assert = InsertData().loader_assert() print(res_assert) assert res.returncode == 0 assert str(stdout, 'utf-8').split('\n')[1] == 'count metrics' assert res_assert[0] == 49015 assert res_assert[1] == 117356
def test_load_movie_check_data_customizeId_error(): """ movie数据集 导入前检查顶点 """ cmd = "%s/bin/hugegraph-loader.sh -h %s -p %d -g %s -f %s -s %s " \ "--clear-all-data true " \ "--check-vertex true " res = InsertData(cmd, schema='schema_movie_01.groovy', struct='struct_movie_01.json', dir='movie').load_graph() res.communicate() # stdout, stderr = res.communicate() # print(' ---> ' + str(stdout) + ' === ' + str(stderr)) res_assert = InsertData().loader_assert() print(res_assert) assert res.returncode == 0 assert res_assert[0] == 49036 assert res_assert[ 1] != 117356 # 加上check vertex时有bug:因为movie数据集中的不同顶点类型的ID会覆盖,导致边导入的时候会报错。
def test_load_check_vertex(): """ movie数据集 导入前检查顶点 """ cmd = "%s/bin/hugegraph-loader.sh -h %s -p %d -g %s -f %s -s %s " \ "--clear-all-data true " \ "--check-vertex true " \ "--batch-insert-threads 1 " \ "--single-insert-threads 1 " \ "--max-parse-errors 1 " \ "--max-insert-errors 1" res = InsertData(cmd, schema='schema_checkVertex.groovy', struct='struct_checkVertex.json', dir='check_vertex_data').load_graph() res.communicate() # stdout, stderr = res.communicate() # print(' ---> ' + str(stdout) + ' === ' + str(stderr)) res_assert = InsertData().loader_assert() print(res_assert) assert res.returncode == 0 assert res_assert[0] == 22 assert res_assert[1] == 9
def test_ttl_use_migrate(self): """ 顶点ttl + 数据迁移 """ # premise = 插入设置ttl的数据 顶点:艺人->ttl=5s;边:属于->ttl=5s gremlin = "graph.schema().propertyKey('名称').asText().ifNotExist().create();" \ "graph.schema().propertyKey('类型').asText().valueSet().ifNotExist().create();" \ "graph.schema().propertyKey('发行时间').asDate().ifNotExist().create();" \ "graph.schema().propertyKey('演员').asText().ifNotExist().create();" \ "graph.schema().vertexLabel('电影').useCustomizeStringId()" \ ".properties('名称','类型','发行时间').ifNotExist().create();" \ "graph.schema().vertexLabel('艺人').useCustomizeStringId().properties('演员')" \ ".ttl(5000L).ifNotExist().create();" \ "graph.schema().vertexLabel('类型').useCustomizeStringId().properties('类型').ifNotExist().create();" \ "graph.schema().vertexLabel('年份').useCustomizeStringId().properties('发行时间').ifNotExist().create();" \ "graph.schema().edgeLabel('导演').link('艺人','电影').ifNotExist().create();" \ "graph.schema().edgeLabel('演出').link('艺人','电影').ifNotExist().create();" \ "graph.schema().edgeLabel('属于').link('电影','类型').properties('发行时间')" \ ".ttl(5000L).ifNotExist().create();" \ "graph.schema().edgeLabel('发行于').link('电影','年份').properties('发行时间').ifNotExist().create();" code, res_gremlin = Gremlin().gremlin_post(gremlin, auth=auth) print(code, res_gremlin) assert 200 assert res_gremlin['result']['data'][0]['name'] == '发行于' cmd = "%s/bin/hugegraph-loader.sh -h %s -p %d -g %s -f %s " res = InsertData(cmd, struct='struct_movie.json', dir='movie').load_graph() res.communicate() time.sleep(30) # ttl 生效后进行下边操作 code, res = Gremlin().gremlin_post("g.V('吴宇森');", auth=auth) assert code == 200 assert res['result']['data'] == [] code, res = Gremlin().gremlin_post("g.E('S铁汉柔情 > 3 >> S动作');", auth=auth) assert code == 200 assert res['result']['data'] == [] #### 数据迁移 target_clear_graph() cmd = "./bin/hugegraph --url %s --graph %s %s %s migrate " \ "--target-url %s " \ "--target-graph %s " \ "%s " \ "%s " \ "--graph-mode RESTORING " res = run_shell(cmd) stdout, stderr = res.communicate() print(' ---> ' + str(stdout) + ' === ' + str(stderr)) code, res = Gremlin().gremlin_post("g.V('吴宇森');", auth=target_auth, host=_cfg.tools_target_host, port=_cfg.tools_target_port, protocol=taret_protocol) print(code, res) assert code == 200 assert res['result']['data'] == [] code, res = Gremlin().gremlin_post("g.E('S铁汉柔情 > 3 >> S动作');", auth=target_auth, host=_cfg.tools_target_host, port=_cfg.tools_target_port, protocol=taret_protocol) print(code, res) assert code == 200 assert res['result']['data'] == []