Beispiel #1
0
def main():
    import numpy as np
    np.set_printoptions(precision=6)

    # calc_idf()
    # idf = load_idf()
    # gen_vectors()

    # 计算vector
    idf = list(z.read_jsonline(f"{CASE_DIR}/output/idf.jsonl"))
    names = [
        "紫金县瓦溪镇康辉药店",
        "瓦溪镇康辉药店",
        "紫金瓦溪镇康辉药店",
        "紫金县瓦溪镇康辉药房",
        "武汉市江岸区李永康西医内科诊所",
        "武汉市江岸区李永康诊所",
        "江岸区李永康诊所",
    ]
    with jsonlines.open(f"{CASE_DIR}/output/scores.jsonl", "w") as fw:
        for name in names:
            scores = calc_score(name, idf)
            fw.write({
                "name": name,
                "scores": scores,
            })
            print(name, scores)
    print("exit")
Beispiel #2
0
def calc_vec(vector):
    vectors = z.read_jsonline(f"{CASE_DIR}/output/vectors.jsonl")
    scores = []
    for v in z.pb(vectors, title="计算分数"):
        name = v["name"]
        if not name:
            continue
        vec = v["vector"]
        dist = cosine_distance(vector, vec)
        # score = round((1.0-dist)*100.0, 6)
        score = round(dist, 6)
        scores.append((name, score))
    scores = sorted(scores, key=lambda it: it[1])
    return scores
Beispiel #3
0
    def test_read_jsonline(self):
        from zzpy import read_jsonline
        from collections.abc import Generator
        import os

        # generator
        file_path = os.path.join("test", "static", "1.jsonl")
        self.assertIsInstance(read_jsonline(file_path), Generator)

        # end file without newline
        data = []
        for i in read_jsonline(file_path):
            data.append(i)
        self.assertListEqual(data, [{
            "name": "Zero",
            "age": 31
        }, {
            "name": "Flyoung",
            "age": 17
        }])

        # generator
        file_path = os.path.join("test", "static", "2.jsonl")
        self.assertIsInstance(read_jsonline(file_path), Generator)

        # end file with newline
        data = []
        for i in read_jsonline(file_path):
            data.append(i)
        self.assertListEqual(data, [{
            "name": "Zero",
            "age": 31
        }, {
            "name": "Flyoung",
            "age": 17
        }])
Beispiel #4
0
def gen_vector(name, slices=[], idf=[]):
    if not slices:
        slices = split_name(name)
        print(f"{name}: {slices}")
        slices = [s[0] for s in slices]
    if not idf:
        idf = list(z.read_jsonline(f"{CASE_DIR}/output/idf.jsonl"))
    vector = []
    for item in idf:
        word, weight = item["word"], item["weight"]
        if word in slices:
            vector.append(weight)
        else:
            vector.append(0)
    return vector
Beispiel #5
0
def gen_vectors():
    """
    生成向量
    """
    idf = list(z.read_jsonline(f"{CASE_DIR}/output/idf.jsonl"))

    with jsonlines.open(f"{CASE_DIR}/output/vectors.jsonl", "w") as fw:
        for it in z.read_jsonline_with_progressbar(
                f"{CASE_DIR}/output/std.jsonl", title="生成向量"):
            name = it.get("name", "")
            # keys = [key for key in it.get("result", [])]
            keys = [key for key, _ in it.get("result", [])]
            vector = gen_vector(name=name, slices=keys, idf=idf)
            fw.write({
                "name": name,
                "vector": vector,
            })
Beispiel #6
0
def load_matchers(path):
    import zzpy as z
    return [load_matcher(config) for config in z.read_jsonline(path)]