Example #1
0
def reindex(root: Optional[Path] = typer.Option(None)):
    """ Load data into local KB """
    t0 = time.time()
    kb = KB(root=root)
    typer.echo(f"Reindexing {kb.config.root}...")

    kb.reindex()
    t1 = time.time()
    typer.echo(f"Reindexed {kb.config.root} in {t1 - t0:.2f}s")
Example #2
0
def test_creates_files(root, kb: KB, apple):
    assert (root / "config.json").is_file()
    assert (root / "nodes").is_dir()
    assert (root / "edges").is_dir()
    assert not (root / "nodes.dawg").is_file()
    assert not (root / "edges.dawg").is_file()

    kb.reindex()
    assert (root / "nodes.dawg").is_file()
    assert (root / "edges.dawg").is_file()
Example #3
0
def test_kb_save_bool_clear(kb: KB, apple):
    assert bool(kb)

    assert apple == kb.save(apple)
    kb.reindex()

    assert 1 == len(kb)
    kb.clear()

    assert 0 == len(kb)
    assert bool(kb)
Example #4
0
def test_search_no_results(kb: KB, apple):
    response = kb.search(q="invalid")
    assert [] == response.nodes

    response = kb.search(keys=["Apple, Inc.|COMPANY"], labels=["INVALID"])
    assert [] == response.nodes

    response = kb.search(labels=["INVALID"])
    assert [] == response.nodes

    response = kb.search(limit=0)
    assert [] == response.nodes
Example #5
0
def clear(
    root: Optional[Path] = typer.Option(None),
    force: bool = typer.Option(False, "--force", "-f"),
):
    """ Clear local KB """

    root = Config.get_root(root)

    if root.exists():
        if not force:
            typer.confirm(f"Clearing {root}. Are you sure?", abort=True)

    kb = KB(root=root)
    kb.clear()
    services.finish("Clear", True)
Example #6
0
def load(
    in_file: str = typer.Argument(None),
    root: Optional[Path] = typer.Option(None),
    file_format: str = typer.Option("jsonl", "--ff"),
    dry_run: bool = typer.Option(False, "--dry-run"),
    skip_reindex: bool = typer.Option(False, "--skip-reindex"),
    is_binary: bool = typer.Option(False, "--is_binary"),
    flags: Optional[List[str]] = typer.Option(None, "--flag"),
    is_transaction: bool = typer.Option(False, "--tx"),
):
    """ Load data into local KB """
    t0 = time.time()

    kb = KB(root=root)
    typer.echo(f"Loading using {file_format} from {in_file}")

    mode = "rb" if is_binary else "r"
    if in_file == "-":
        file_obj = typer.open_file(in_file, mode=mode)
    else:
        file_obj = smart_open.open(in_file, mode=mode)

    reader = cli.get_reader(
        file_format=file_format, file_obj=file_obj, kb=kb, flags=flags
    )

    count = 0
    transact = kb.transact if is_transaction else services.noop_context
    with typer.progressbar(reader) as progress:
        with transact():
            for obj in progress:
                count += 1

                if not dry_run:
                    kb.save(obj)
                elif count <= 10:
                    typer.echo(obj)
                else:
                    break

    t1 = time.time()
    typer.echo(f"Loaded {count} in {t1 - t0:.2f}s [{in_file}, {file_format}]")
    if not dry_run and not skip_reindex:
        reindex(root=root)
Example #7
0
def test_search_with_just_text(kb: KB, apple, google):
    kb.save_node(apple)
    kb.save_node(google)
    kb.reindex()

    response = kb.search("ap")
    assert 1 == len(response)
    assert [apple] == response.nodes
    assert apple == response[0]
    assert [apple] == list(response)
Example #8
0
def test_save_load_sync(root, kb: KB, apple):
    def check():
        assert (kb.parse("AAPL")).spans[0].entity == apple
        assert (kb.parse("Apple, Inc.")).spans[0].entity == apple
        assert (kb.parse("Apple,Inc.")).spans[0].entity == apple

    with kb.transact():
        kb.save_node(apple)

    kb.reindex()
    check()

    kb = KB(root=root)
    check()
    kb.reload()
    check()
Example #9
0
def init_kb(root, exist_ok=False, config=None) -> bool:
    success = False

    try:
        root = Config.get_root(root)

        os.makedirs(str(root), exist_ok=exist_ok)
        Config.create(root=root, config=config)

        KB(root=root)
        success = True

    except FileExistsError as e:
        logger.error(e)

    return success
Example #10
0
def dump(
    out_file: str = typer.Argument("-"),
    root: Optional[Path] = typer.Option(None),
    file_format: str = typer.Option("jsonl", "--ff"),
):
    """ Dump data from KB in JSONL format """
    if out_file == "-":
        file_obj = typer.open_file(out_file, mode="w")
    else:
        file_obj = smart_open.open(out_file, mode="w")

    kb = KB(root=root)
    writer = cli.get_writer(file_format=file_format)

    for node in kb:
        writer(file_obj, node)

        it = kb.graph.iterate_edges(directions=Direction.outgoing, nodes=node)
        for _, edge in it:
            writer(file_obj, edge)
Example #11
0
 def __init__(self):
     self._kb: KB = KB()
Example #12
0
def kb(root):
    return KB(root=root)
Example #13
0
def get_user_store(root) -> UserStore:
    kb = KB(root=root)
    if not isinstance(kb.user_store, UserStore):
        raise RuntimeError("CLI tools only work with entitykb.UserStore")
    return kb.user_store
Example #14
0
def test_kb_save_invalid(kb: KB):
    with pytest.raises(RuntimeError):
        kb.save("invalid!")
Example #15
0
def info(root: Optional[Path] = typer.Option(None)):
    """ Display information for local KB """
    kb = KB(root=root)
    flat = sorted(services.flatten_dict(kb.info()).items())
    output = tabulate(flat, tablefmt="pretty", colalign=("left", "right"))
    typer.echo(output)
Example #16
0
def test_get_schema(kb: KB):
    schema = kb.get_schema()
    assert schema.keys() == {"nodes", "verbs", "labels"}
    assert {"NODE", "ENTITY"}.issubset(schema["nodes"].keys())
Example #17
0
def test_search_with_results(kb: KB, apple, google):
    kb.save_node(apple)
    kb.save_node(google)
    kb.reindex()

    # default (all nodes, no filter, etc.)
    response = kb.search()
    assert [apple, google] == response.nodes

    # offset = 1, skips 1 node
    response = kb.search(offset=1)
    assert [google] == response.nodes

    # limit = 1
    response = kb.search(limit=1)
    assert [apple] == response.nodes

    # prefix
    response = kb.search(q="a")
    assert [apple] == response.nodes

    # keys
    response = kb.search(keys=["Apple, Inc.|COMPANY"])
    assert [apple] == response.nodes

    # keys
    response = kb.search(keys=[apple.key, apple.key, "junk"])
    assert [apple] == response.nodes

    # labels
    response = kb.search(labels=["COMPANY"])
    assert 2 == len(response.nodes)

    # keys + labels
    response = kb.search(keys=["Apple, Inc.|COMPANY"], labels=["COMPANY"])
    assert [apple] == response.nodes

    # dict
    assert response.dict() == {
        "nodes": [{
            "data": None,
            "headquarters": {
                "city": "Cupertino",
                "data": None,
                "key": "1",
                "label": "LOCATION",
            },
            "key": "Apple, Inc.|COMPANY",
            "label": "COMPANY",
            "name": "Apple, Inc.",
            "synonyms": ("Apple", "AAPL"),
        }],
        "trails": [{
            "end": "Apple, Inc.|COMPANY",
            "hops": [],
            "start": "Apple, Inc.|COMPANY",
        }],
    }
Example #18
0
def test_save_for_entity_and_edge(kb: KB, apple, google):
    assert apple == kb.save(apple)
    assert google == kb.save(google)
    kb.reindex()

    assert 2 == len(kb)
    assert apple == kb.get_node(apple.key)

    kb.connect(start=apple, verb="IS_A", end=apple)
    kb.reindex()

    assert kb.info()["graph"] == {
        "nodes": 2,
        "edges": 1,
    }

    assert 2 == len(kb.get_edges(node_key=apple))
    assert 1 == len(kb.get_edges(node_key=apple, direction=Direction.incoming))
    assert 2 == len(kb.get_edges(node_key=apple, verb="IS_A"))
    assert 1 == len(kb.get_edges(node_key=apple, verb="IS_A", limit=1))
    assert 0 == len(kb.get_edges(node_key=apple, verb="IS_NOT"))

    assert apple.key == kb.get_neighbors(apple).neighbors[0].key
    assert ([] == kb.get_neighbors(apple,
                                   verb="IS_NOT",
                                   direction=Direction.outgoing).neighbors)

    kb.save(Edge(start=apple, verb="POINTS_NO_WHERE", end="INVALID|THING"))
    kb.save(Edge(start=apple, verb="POINTS_NO_WHERE", end=google))
    kb.reindex()

    assert kb.info()["graph"] == {
        "nodes": 2,
        "edges": 3,
    }

    t = T().all_nodes(passthru=True)
    response = kb.search(q="a", traversal=t)
    assert 3 == len(response.nodes)

    kb.remove_node(apple.key)
    kb.reindex()

    assert kb.info()["graph"] == {
        "nodes": 1,
        "edges": 3,
    }

    kb.clean_edges()

    assert kb.info()["graph"] == {
        "nodes": 1,
        "edges": 0,
    }

    data = response.dict()
    compare = SearchResponse(**data)
    assert compare.nodes == response.nodes
Example #19
0
def test_save_entity(kb: KB, apple, apple_records):
    kb.save_node(apple)
    kb.save_node(apple_records)
    assert {apple, apple_records} == set(kb)

    kb.reindex()

    # parse functions
    assert (kb.parse("AAPL")).spans[0].entity == apple
    assert (kb.parse("Apple, Inc.")).spans[0].entity == apple
    assert (kb.parse("Apple Computers")).spans[0].text == "Apple"
    assert (kb.parse("Apple Records")).spans[0].entity == apple_records
    assert 2 == len((kb.parse("Apple")).spans)

    # find functions
    assert 2 == len(kb.find("apple"))
    assert kb.find_one("apple") is None  # 2 results cause no return
    assert kb.find_one("AAPL").name == "Apple, Inc."

    # should reset the terms
    apple2 = apple.copy(update=dict(synonyms=("Apple", "Apple Computers")))
    kb.save_node(apple2)
    kb.reindex()

    assert not (kb.parse("AAPL")).spans
    assert (kb.parse("Apple, Inc.")).spans[0].entity == apple2
    assert (kb.parse("Apple Computers")).spans[0].entity == apple2
    assert (kb.parse("Apple Computers")).spans[0].text == "Apple Computers"
    assert 2 == len((kb.parse("Apple")).spans)

    kb.remove_node(apple2)
    kb.reindex()

    assert 1 == len((kb.parse("Apple")).spans)
    assert 1 == len((kb.parse("Apple Computers")).spans)
    assert (kb.parse("Apple Computers")).spans[0].text == "Apple"
Example #20
0
def test_parse(kb: KB):
    doc = kb.parse("This is a doc")
    assert isinstance(doc, Doc)
    assert 4 == len(doc.tokens)
Example #21
0
def test_user_functions(kb: KB):
    # noinspection PyUnresolvedReferences
    pw = kb.user_store.add_user(username="******", status=UserStatus.read_only)

    token = kb.authenticate("one", pw)
    assert kb.get_user(token).username == "one"