def load( in_file: str = typer.Argument(None), root: Optional[Path] = typer.Option(None), file_format: str = typer.Option("jsonl", "--ff"), dry_run: bool = typer.Option(False, "--dry-run"), skip_reindex: bool = typer.Option(False, "--skip-reindex"), is_binary: bool = typer.Option(False, "--is_binary"), flags: Optional[List[str]] = typer.Option(None, "--flag"), is_transaction: bool = typer.Option(False, "--tx"), ): """ Load data into local KB """ t0 = time.time() kb = KB(root=root) typer.echo(f"Loading using {file_format} from {in_file}") mode = "rb" if is_binary else "r" if in_file == "-": file_obj = typer.open_file(in_file, mode=mode) else: file_obj = smart_open.open(in_file, mode=mode) reader = cli.get_reader( file_format=file_format, file_obj=file_obj, kb=kb, flags=flags ) count = 0 transact = kb.transact if is_transaction else services.noop_context with typer.progressbar(reader) as progress: with transact(): for obj in progress: count += 1 if not dry_run: kb.save(obj) elif count <= 10: typer.echo(obj) else: break t1 = time.time() typer.echo(f"Loaded {count} in {t1 - t0:.2f}s [{in_file}, {file_format}]") if not dry_run and not skip_reindex: reindex(root=root)
def test_kb_save_bool_clear(kb: KB, apple): assert bool(kb) assert apple == kb.save(apple) kb.reindex() assert 1 == len(kb) kb.clear() assert 0 == len(kb) assert bool(kb)
def test_save_for_entity_and_edge(kb: KB, apple, google): assert apple == kb.save(apple) assert google == kb.save(google) kb.reindex() assert 2 == len(kb) assert apple == kb.get_node(apple.key) kb.connect(start=apple, verb="IS_A", end=apple) kb.reindex() assert kb.info()["graph"] == { "nodes": 2, "edges": 1, } assert 2 == len(kb.get_edges(node_key=apple)) assert 1 == len(kb.get_edges(node_key=apple, direction=Direction.incoming)) assert 2 == len(kb.get_edges(node_key=apple, verb="IS_A")) assert 1 == len(kb.get_edges(node_key=apple, verb="IS_A", limit=1)) assert 0 == len(kb.get_edges(node_key=apple, verb="IS_NOT")) assert apple.key == kb.get_neighbors(apple).neighbors[0].key assert ([] == kb.get_neighbors(apple, verb="IS_NOT", direction=Direction.outgoing).neighbors) kb.save(Edge(start=apple, verb="POINTS_NO_WHERE", end="INVALID|THING")) kb.save(Edge(start=apple, verb="POINTS_NO_WHERE", end=google)) kb.reindex() assert kb.info()["graph"] == { "nodes": 2, "edges": 3, } t = T().all_nodes(passthru=True) response = kb.search(q="a", traversal=t) assert 3 == len(response.nodes) kb.remove_node(apple.key) kb.reindex() assert kb.info()["graph"] == { "nodes": 1, "edges": 3, } kb.clean_edges() assert kb.info()["graph"] == { "nodes": 1, "edges": 0, } data = response.dict() compare = SearchResponse(**data) assert compare.nodes == response.nodes
def test_kb_save_invalid(kb: KB): with pytest.raises(RuntimeError): kb.save("invalid!")