Example #1
0
def load(
    in_file: str = typer.Argument(None),
    root: Optional[Path] = typer.Option(None),
    file_format: str = typer.Option("jsonl", "--ff"),
    dry_run: bool = typer.Option(False, "--dry-run"),
    skip_reindex: bool = typer.Option(False, "--skip-reindex"),
    is_binary: bool = typer.Option(False, "--is_binary"),
    flags: Optional[List[str]] = typer.Option(None, "--flag"),
    is_transaction: bool = typer.Option(False, "--tx"),
):
    """ Load data into local KB """
    t0 = time.time()

    kb = KB(root=root)
    typer.echo(f"Loading using {file_format} from {in_file}")

    mode = "rb" if is_binary else "r"
    if in_file == "-":
        file_obj = typer.open_file(in_file, mode=mode)
    else:
        file_obj = smart_open.open(in_file, mode=mode)

    reader = cli.get_reader(
        file_format=file_format, file_obj=file_obj, kb=kb, flags=flags
    )

    count = 0
    transact = kb.transact if is_transaction else services.noop_context
    with typer.progressbar(reader) as progress:
        with transact():
            for obj in progress:
                count += 1

                if not dry_run:
                    kb.save(obj)
                elif count <= 10:
                    typer.echo(obj)
                else:
                    break

    t1 = time.time()
    typer.echo(f"Loaded {count} in {t1 - t0:.2f}s [{in_file}, {file_format}]")
    if not dry_run and not skip_reindex:
        reindex(root=root)
Example #2
0
def test_kb_save_bool_clear(kb: KB, apple):
    assert bool(kb)

    assert apple == kb.save(apple)
    kb.reindex()

    assert 1 == len(kb)
    kb.clear()

    assert 0 == len(kb)
    assert bool(kb)
Example #3
0
def test_save_for_entity_and_edge(kb: KB, apple, google):
    assert apple == kb.save(apple)
    assert google == kb.save(google)
    kb.reindex()

    assert 2 == len(kb)
    assert apple == kb.get_node(apple.key)

    kb.connect(start=apple, verb="IS_A", end=apple)
    kb.reindex()

    assert kb.info()["graph"] == {
        "nodes": 2,
        "edges": 1,
    }

    assert 2 == len(kb.get_edges(node_key=apple))
    assert 1 == len(kb.get_edges(node_key=apple, direction=Direction.incoming))
    assert 2 == len(kb.get_edges(node_key=apple, verb="IS_A"))
    assert 1 == len(kb.get_edges(node_key=apple, verb="IS_A", limit=1))
    assert 0 == len(kb.get_edges(node_key=apple, verb="IS_NOT"))

    assert apple.key == kb.get_neighbors(apple).neighbors[0].key
    assert ([] == kb.get_neighbors(apple,
                                   verb="IS_NOT",
                                   direction=Direction.outgoing).neighbors)

    kb.save(Edge(start=apple, verb="POINTS_NO_WHERE", end="INVALID|THING"))
    kb.save(Edge(start=apple, verb="POINTS_NO_WHERE", end=google))
    kb.reindex()

    assert kb.info()["graph"] == {
        "nodes": 2,
        "edges": 3,
    }

    t = T().all_nodes(passthru=True)
    response = kb.search(q="a", traversal=t)
    assert 3 == len(response.nodes)

    kb.remove_node(apple.key)
    kb.reindex()

    assert kb.info()["graph"] == {
        "nodes": 1,
        "edges": 3,
    }

    kb.clean_edges()

    assert kb.info()["graph"] == {
        "nodes": 1,
        "edges": 0,
    }

    data = response.dict()
    compare = SearchResponse(**data)
    assert compare.nodes == response.nodes
Example #4
0
def test_kb_save_invalid(kb: KB):
    with pytest.raises(RuntimeError):
        kb.save("invalid!")