Esempi in Python per message, esempi in Python per kiyuna.utils.message.message

Esempio n. 1

0

Mostra file

File: knock29.py Progetto: tmu-nlp/100knock2020

def render_html(path_img, path_html="out29.html") -> None:
    message("save :", path_html, type="status")
    contents = ("<!DOCTYPE html><html>"
                "<head><title>knock29</title></head>"
                '<body><img src="%s" width="128"/></body>'
                "</html>" % path_img)
    with open(path_html, "w") as f:
        f.write(contents)

    message("open :", path_html, type="status")
    webbrowser.open(path_html)

Esempio n. 2

0

Mostra file

 def tag(self, path_input: str, path_output: str, **kwargs) -> None:
     self.params.update(**kwargs)
     res = []
     with open(path_input) as f_in:
         for line in tqdm.tqdm(f_in):
             words = [self.trans(word) for word in line.split()]
             best_edge = self.__forward(words)
             tags = self.__backward(words, best_edge)
             res.append(" ".join(tags) + "\n")
     with open(path_output, "w") as f_out:
         f_out.writelines(res)
     message(f"saved : {path_output}", type="success")

Esempio n. 3

0

Mostra file

def sample(test_path, epochs=1, α=0.01, β=0.01, num_topics=2):
    """ #09 p24 """
    xcorpus, ycorpus, xcounts, ycounts, wordtype = initialize(test_path, num_topics)
    for epoch in range(1, epochs + 1):
        message("epoch =", epoch, type="status")
        ll = 0
        for i in tqdm(range(len(xcorpus)), leave=False):
            for j in range(len(xcorpus[i])):
                x = xcorpus[i][j]
                y = ycorpus[i][j]
                add_counts(xcounts, ycounts, x, y, i, -1)
                probs = []
                for k in range(num_topics):
                    p_xk = (xcounts[f"{x}|{k}"] + α) / (xcounts[k] + α * wordtype)
                    p_ky = (ycounts[f"{k}|{i}"] + β) / (ycounts[i] + β * num_topics)
                    probs.append(p_xk * p_ky)
                new_y = sampleone(probs)
                ll += math.log(probs[new_y])
                add_counts(xcounts, ycounts, x, new_y, i, 1)
                ycorpus[i][j] = new_y
        message("ll =", ll, type="success")
    return xcorpus, ycorpus

Esempio n. 4

0

Mostra file

File: tutorial02.py Progetto: tmu-nlp/NLPtutorial2020

def test(args: argparse.Namespace) -> None:
    model = BigramLM(WittenBell=args.WittenBell).load(args.model)

    res = model.test(args.test)
    if args.name:
        message(
            f"[{args.name} | {get_ext(args.WittenBell)}"
            f" default(λ_1={0.95:.2f}, λ_2={0.95:.2f})]",
            file=sys.stdout,
        )
    for k, v in res.items():
        message(f"{k:15s} = {v:f}", file=sys.stdout)

    # λ_1, λ_2 = grid_search(
    #     model,
    #     args.test,
    #     rng=(0.01, 1, 0.01),
    #     save=f"result_{get_ext(args.WittenBell)}.png",
    # )
    λ_1, λ_2 = grid_search(
        model,
        args.test,
        rng=(0.05, 1, 0.05),
        save=f"fig1_{get_ext(args.WittenBell)}.png",
    )
    # λ_1, λ_2 = grid_search(
    #     model,
    #     args.test,
    #     rng1=(λ_1 - 0.1, λ_1 + 0.1, 0.01),
    #     rng2=(λ_2 - 0.1, λ_2 + 0.1, 0.01),
    #     save=f"fig2_{get_ext(args.WittenBell)}.png",
    # )

    res = model.test(args.test, λ_1=λ_1, λ_2=λ_2)
    if args.name:
        message(
            f"[{args.name} | {get_ext(args.WittenBell)}"
            f" optimized(λ_1={λ_1:.2f}, λ_2={λ_2:.2f})]",
            file=sys.stdout,
        )
    for k, v in res.items():
        message(f"{k:15s} = {v:f}", file=sys.stdout)
    """result

Esempio n. 5

0

Mostra file

File: tutorial03.py Progetto: tmu-nlp/NLPtutorial2020

class Tokenizer(UnigramLM):
    def tokenize(
        self,
        path_input: str,
        path_output: str,
        *,
        λ_1: float = 0.95,
        vocab_size: int = 1_000_000,
    ) -> None:
        def forward(line: str, V: int = vocab_size) -> List[Tuple[int, int]]:
            size = len(line)
            best_edge = [None] * (size + 1)
            best_score = [float("inf")] * (size + 1)
            best_score[0] = 0
            for word_end in range(1, size + 1):
                for word_begin in range(size):
                    word = line[word_begin:word_end]
                    if word in self.model or len(word) == 1:
                        prob = λ_1 * self.model.get(word, 0) + (1 - λ_1) / V
                        my_score = best_score[word_begin] + -math.log2(prob)
                        if my_score < best_score[word_end]:
                            best_score[word_end] = my_score
                            best_edge[word_end] = (word_begin, word_end)
            return best_edge

        def backward(line: str, best_edge: List[Tuple[int, int]]) -> List[str]:
            words = []
            next_edge = best_edge[-1]
            while next_edge:
                words.append(line[next_edge[0]:next_edge[1]])
                next_edge = best_edge[next_edge[0]]
            words.reverse()
            return words

        res = []
        with open(path_input) as f_in:
            for line in map(lambda x: x.strip(), f_in):
                best_edge = forward(line)
                words = backward(line, best_edge)
                res.append(" ".join(words) + "\n")
        with open(path_output, "w") as f_out:
            f_out.writelines(res)
        message(f"saved {path_output}", type="success")

Esempio n. 6

0

Mostra file

File: tutorial01.py Progetto: tmu-nlp/NLPtutorial2020

def test(args: argparse.Namespace) -> None:
    model = UnigramLM().load(args.model)

    res = model.test(args.test)
    if args.name:
        message(f"[{args.name} | default(λ_1={0.95:.2f})]", file=sys.stdout)
    for k, v in res.items():
        message(f"{k:15s} = {v:f}", file=sys.stdout)

    entropy, λ_1 = min(
        (model.test(args.test, λ_unk=1 - λ_1)["entropy_H"], λ_1)
        for λ_1 in np.arange(0, 1, 0.01)
    )

    res = model.test(args.test, λ_unk=1 - λ_1)
    if args.name:
        message(
            f"[{args.name} | optimized(λ_1={λ_1:.2f})]", file=sys.stdout,
        )
    for k, v in res.items():
        message(f"{k:15s} = {v:f}", file=sys.stdout)

    """result

Esempio n. 7

0

Mostra file

File: tutorial02.py Progetto: tmu-nlp/NLPtutorial2020

def grid_search(
    model: Model,
    path_test: str,
    *,
    rng: Optional[Tuple[float, float, float]] = None,
    rng1: Optional[Tuple[float, float, float]] = None,
    rng2: Optional[Tuple[float, float, float]] = None,
    save: Optional[str] = None,
) -> Tuple[float, float]:
    def get_param(idx: np.ndarray) -> np.ndarray:
        return (np.array([rng1[0], rng2[0]]) +
                np.array([rng1[2], rng2[2]]) * idx)

    if rng:
        rng1 = rng2 = rng
    assert rng1 is not None
    assert rng2 is not None

    with Renderer("grid search") as out:
        cnt1 = len(np.arange(*rng1))
        cnt2 = len(np.arange(*rng2))
        E = np.zeros((cnt2, cnt1))
        for j, λ_2 in enumerate(np.arange(*rng2)):
            message(f"{j + 1:2d} / {cnt2}", CR=True, type="status")
            for i, λ_1 in enumerate(np.arange(*rng1)):
                E[j, i] = model.test(path_test, λ_1=λ_1, λ_2=λ_2)["entropy_H"]
        message("", CR=True)

        ma_y, ma_x = np.where(E == E.max())
        mi_y, mi_x = np.where(E == E.min())
        out.result("max", (E.max(), get_param(np.hstack([ma_x, ma_y]))))
        out.result("min", (E.min(), get_param(np.hstack([mi_x, mi_y]))))

    if save:
        fig = plt.figure()
        ax = fig.add_subplot(111)

        mappable = ax.pcolor(E, cmap="jet", edgecolors="k", alpha=0.8)
        fig.colorbar(mappable)

        ax.scatter(ma_x + 0.5, ma_y + 0.5, c="r", label="max")
        ax.scatter(mi_x + 0.5, mi_y + 0.5, c="b", label="min")

        ax.set_xticks(np.arange(cnt1) + 0.5, minor=False)
        ax.set_yticks(np.arange(cnt2) + 0.5, minor=False)
        ax.set_xticklabels(
            map(lambda x: f"{x:.2f}"[1:], np.arange(*rng1)),
            minor=False,
            rotation=45,
        )
        ax.set_yticklabels(
            map(lambda x: f"{x:.2f}"[1:], np.arange(*rng2)),
            minor=False,
        )
        ax.set_title(f"エントロピー {get_ext(model.WittenBell)}")
        ax.set_xlabel("$λ_1$")
        ax.set_ylabel("$λ_2$")
        ax.set_aspect("equal")
        ax.legend(loc="lower right")
        plt.savefig(save)

    return get_param(np.hstack([mi_x, mi_y]))

Esempio n. 8

0

Mostra file

File: knock29.py Progetto: tmu-nlp/100knock2020

def fetch_url_of_img_with_urllib(filename: str) -> dict:
    message("fetch:", f"url of `{filename}`", type="status")
    url_ = url + "?%s" % urllib.parse.urlencode(make_payload(filename))
    with urllib.request.urlopen(url_) as f:
        return json.loads(f.read().decode("utf-8"))

Esempio n. 9

0

Mostra file

 def train(self, path_corpus: str) -> "POSTagger":
     self.probs, self.possible_tags = self.__build_model(path_corpus)
     message(f"train model from {path_corpus}", type="success")
     return self

Esempio n. 10

0

Mostra file

 def load(self, path_model: str) -> "POSTagger":
     self.probs, self.possible_tags = self.__load_model(path_model)
     message(f"load  model from {path_model}", type="success")
     return self

Esempio n. 11

0

Mostra file

 def __exit__(self, *args) -> None:
     message("saved :", self.name, "\n", CR=True, type="success")

Esempio n. 12

0

Mostra file

    for k, v in cnter.items():
        idx = v.index(max(v))
        groups[idx].append((v[idx], k))
        tmp.append((v[idx], k))
    print_cnt = min(300, len(tmp))
    th = sorted(tmp, reverse=True)[print_cnt - 1][0]
    for i, group in enumerate(groups):
        print("=" * 5, i, "=" * 5)
        group.sort(reverse=True)
        res = [word for freq, word in group if word not in stop_words and freq >= th]
        pprint(res, width=80, compact=True)


if __name__ == "__main__":
    if sys.argv[1:] == ["test"]:
        message("test", type="status")
        learn_lda(test_path="../../test/07-train.txt", epochs=50)
    else:
        message("main", type="status")
        stop_words = nltk.corpus.stopwords.words("english")
        symbols = [
            "'",
            '"',
            ":",
            ";",
            ".",
            ",",
            "-",
            "!",
            "?",
            ")",

Esempio n. 13

0

Mostra file

File: knock43.py Progetto: tmu-nlp/100knock2020

sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
from kiyuna.utils.message import message  # noqa: E402 isort:skip


class ChunkNormalized(Chunk):
    def __init__(self, chunk):
        self.morphs, self.dst, self.srcs = (*chunk, )
        self.norm = self.get_norm()

    def get_norm(self):
        clause = "".join(m.surface for m in self.morphs if m.pos != "記号")
        return clause

    def has_pos(self, pos):
        return any(m.pos == pos for m in self.morphs)


if __name__ == "__main__":
    res = []
    for chunks in cabocha_into_chunks():
        chunks = {k: ChunkNormalized(v) for k, v in chunks.items()}
        for c in chunks.values():
            if c.dst == -1:
                continue
            if c.dst not in chunks:
                continue
            if c.has_pos("名詞") and chunks[c.dst].has_pos("動詞"):
                res.append(f"{c.norm}\t{chunks[c.dst].norm}\n")
    sys.stdout.writelines(res)
    message(f"write {len(res)} lines", type="success")

Esempio n. 14

0

Mostra file

File: time.py Progetto: tmu-nlp/100knock2020

 def __exit__(self, *args) -> None:
     self.end = time.time()
     self.secs = self.end - self.start
     self.msecs = self.secs * 1000
     if self.verbose:
         message(f"elapsed time = {self.msecs:f} [msec]", type="success")

Esempio n. 15

0

Mostra file

File: tutorial02.py Progetto: tmu-nlp/NLPtutorial2020

 def train(self, path_corpus: str) -> Type["Bigram"]:
     self.model = self.__build_model(path_corpus)
     message(f"train model from {path_corpus}", type="success")
     return self

Esempio n. 16

0

Mostra file

            dis.dis(build_word_frequency_cnter, file=sys.stderr)
            out.header("doctest")
            doctest.testmod(verbose=True)
            out.header("check serialize")
            cnter = list_word_freq("../../test/00-input.txt")
            dump(cnter, "cnter")
            cnter = load("cnter")
        exit(0)

    with Renderer("単語の異なり数") as out:
        out.result("map", len(list_word_freq(path)))
        out.result("set", len(get_vocab(path)))

    num = 10
    with Renderer(f"数単語の頻度（上位 {num} 単語のみ）") as out:
        out.result(
            "大文字と小文字の区別をする",
            build_word_frequency_cnter(path, str).most_common(num),
        )
        trans = lambda w: w.lower()  # noqa: E731
        out.result(
            "大文字と小文字の区別をしない",
            build_word_frequency_cnter(path, trans).most_common(num),
        )

    if "test" in path:
        for k, v in list_word_freq(path):
            print(k, v, sep="\t")

    message("DONE.", type="status")

Esempio n. 17

0

Mostra file

    else:
        return f"({sym} {words[int(i)]})"


if __name__ == "__main__":
    if sys.argv[1] == "test":
        grammar_file = "../../test/08-grammar.txt"
        input_file = "../../test/08-input.txt"
    else:
        grammar_file = "../../data/wiki-en-test.grammar"
        input_file = "../../data/wiki-en-short.tok"

    s, t = 0, 1
    with Renderer(sys.argv[1]) as out:
        for i, s_expr in enumerate(cky(grammar_file, input_file, s=s, t=t)):
            message("=" * 3, "line:", s + i, "=" * 3)
            tree = Tree.fromstring(s_expr)
            out.result("S-expression", s_expr)
            out.result("nltk.tree.Tree", tree)
            out.header("nltk.tree.Tree.pretty_print")
            tree.pretty_print()
            # tree.draw()
"""result
[+] main
=== line: 0 ===
[*]  1. S-expression
(S (PP (IN Among) (NP (DT these) (NP' (, ,) (NP' (JJ supervised) (NP' (NN learning) (NNS approaches)))))) (S' (VP (VBP have) (VP (VBN been) (VP' (NP (DT the) (NP' (ADJP (RBS most) (JJ successful)) (NNS algorithms))) (PP (TO to) (NP_NN date))))) (. .)))
[*]  2. nltk.tree.Tree
(S
  (PP
    (IN Among)

Esempio n. 18

0

Mostra file

File: knock23.py Progetto: tmu-nlp/100knock2020

def exec_match(wiki: str, pattern: str) -> Iterator[Tuple[str, Match]]:
    reg = re.compile(pattern)
    for line in wiki.split("\n"):
        match = reg.match(line)
        if match:
            yield line, match


if __name__ == "__main__":
    wiki = load("UK")

    pat = r"(?P<Level>=+)\s*(?P<Heading>.+)\s*(?P=Level)"
    for _, match in exec_match(wiki, pat):
        level, heading = match.group(1, 2)
        print(
            "  " * (len(level) - 2),
            "+",
            f" lv{len(level) - 1} ",
            heading,
            sep="",
        )

    with Renderer("re.match() vs. re.search()") as out:
        pat_hat = r"^" + pat
        it = zip(exec_match(wiki, pat), exec_search(wiki, pat_hat))
        for (line, match1), (_, match2) in it:
            assert match1.groups() == match2.groups(), line
        else:
            message("same")

Esempio n. 19

0

Mostra file

"""
57. 特徴量の重みの確認
52で学習したロジスティック回帰モデルの中で，
重みの高い特徴量トップ10と，重みの低い特徴量トップ10を確認せよ．

[MEMO]
2015 年版の knock75 に対応
"""
import os
import sys

from sklearn.metrics import precision_recall_fscore_support

sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
from kiyuna.utils.message import message, Renderer  # noqa: E402 isort:skip
from kiyuna.utils.pickle import dump, load  # noqa: E402 isort:skip

if __name__ == "__main__":
    classifier = load("chap06-classifier")
    names = load("chap06-vectorizer-names")
    weights = classifier.coef_.flatten()
    ranking = sorted(zip(weights, names), reverse=True)
    with Renderer("knock57") as out:
        out.header("best 10")
        for weight, name in ranking[:10]:
            message(f"{name:15}{weight:f}")
        out.header("worst 10")
        for weight, name in ranking[:-11:-1]:
            message(f"{name:15}{weight:f}")

Esempio n. 20

0

Mostra file

File: tutorial02.py Progetto: tmu-nlp/NLPtutorial2020

 def load(self, path_model: str) -> Type["Bigram"]:
     self.model = self.__load_model(path_model)
     message(f"load model from {path_model}", type="success")
     return self

Esempio n. 21

0

Mostra file

File: knock29.py Progetto: tmu-nlp/100knock2020

def fetch_url_of_img_with_requests(filename: str) -> dict:
    message("fetch:", f"url of `{filename}`", type="status")
    with requests.Session() as s:
        return s.get(url=url, params=make_payload(filename)).json()

Esempio n. 22

0

Mostra file

from kiyuna.utils.pickle import load  # noqa: E402 isort:skip
from kiyuna.utils.message import Renderer, message  # noqa: E402 isort:skip
from kiyuna.utils.message import green  # noqa: E402 isort:skip


def remove_em(od: OrderedDict) -> OrderedDict:
    """remove emphasis expressions
        ''italics''
        '''bold'''
        '''''both'''''
    """
    res = OrderedDict()
    reg = re.compile(r"'{2,}")
    for key in od:
        res[key] = reg.sub("", od[key])
    return res


if __name__ == "__main__":
    infobox = load("infobox")
    res = remove_em(infobox)

    with Renderer("knock26") as out:
        for (key, src), (_, dst) in zip(infobox.items(), res.items()):
            if src == dst:
                out.cnt += 1
            else:
                out.result(key, (src, green(dst)))
        if infobox == res:
            message("変化なし", type="warning")

Esempio n. 23

0

Mostra file

File: knock29.py Progetto: tmu-nlp/100knock2020

def save_file_from_url(url: str, filename: str) -> None:
    message("save :", filename, type="status")
    with urllib.request.urlopen(url) as f_in, open(filename, "wb") as f_out:
        f_out.write(f_in.read())

Esempio n. 24

0

Mostra file

 def __enter__(self) -> Type["SaveHelper"]:
     message("saving:", self.name, CR=True, type="status")
     return self

Esempio n. 25

0

Mostra file

def load(file_name: str) -> object:
    with open(get_path(file_name), "rb") as f_in:
        obj = dill.load(f_in)
    message("loaded:", trunc(repr(obj)), type="success")
    return obj

Esempio n. 26

0

Mostra file

def dump(obj: object, file_name: str) -> None:
    with open(get_path(file_name), "wb") as f_out:
        dill.dump(obj, f_out)
    message("saved :", trunc(repr(obj)), type="success")

Esempio n. 27

0

Mostra file

[MEMO]
2015 年版の knock94-95 に対応
"""
import os
import sys
from zipfile import ZipFile

from scipy.stats import spearmanr

sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
from kiyuna.utils.message import message, Renderer  # noqa: E402 isort:skip
from kiyuna.utils.pickle import load  # noqa: E402 isort:skip

if __name__ == "__main__":
    wv = load("chap07-embeddings")
    preds, labels = [], []
    with ZipFile("wordsim353.zip") as myzip:
        message(myzip.infolist())
        with myzip.open("combined.csv") as myfile:
            myfile = map(lambda x: x.decode(), myfile)
            message("[header]", next(myfile))  # Word 1,Word 2,Human (mean)
            for line in myfile:
                word1, word2, human = line.split(",")
                preds.append(wv.similarity(word1, word2))
                labels.append(human)
    with Renderer("knock66") as out:
        out.result("Spearman corr", spearmanr(preds, labels)[0])
"""result
0.6849564489532376
"""

Esempio n. 28

0

Mostra file

File: knock59.py Progetto: tmu-nlp/100knock2020

    #     "fit_intercept": [False, True],
    #     "class_weight": [None, "balanced"],
    #     "solver": ["newton-cg", "sag", "saga", "lbfgs"],
    #     "multi_class": ["multinomial"],
    #     "warm_start": [False, True],
    # }
    # for params in ParameterGrid(param_grid):
    #     clfs.append((LogisticRegression(**params), False))

    @timeout_decorator.timeout(3)
    def clf_fit(clf):
        clf.fit(*get_data("train", need_dense))

    models = defaultdict(list)
    for i, (clf, need_dense, *args) in enumerate(clfs):
        message(type(clf).__name__, type="status")
        message(clf.get_params())
        if args:
            message("skip", args, type="warning")
            continue
        if (clf.get_params().get("penalty", None) == "l1"
                and clf.get_params().get("solver", None) == "saga"):
            message("skip", "Too slow", type="warning")
            continue
        try:
            clf_fit(clf)
            score = clf.score(*get_data("valid", need_dense))
            models[score].append(clf)
            message(score, type="success")
        except Exception as e:
            message("skip", e, type="warning")

Esempio n. 29

0

Mostra file

カテゴリごとの性能をマイクロ平均（micro-average）とマクロ平均（macro-average）で統合せよ．

[MEMO]
2015 年版の knock77 に対応
"""
import os
import sys

from sklearn.metrics import precision_recall_fscore_support

from knock53 import load_dataset

sys.path.append(os.path.join(os.path.dirname(__file__), "../../"))
from kiyuna.utils.message import message, Renderer  # noqa: E402 isort:skip
from kiyuna.utils.pickle import dump, load  # noqa: E402 isort:skip

if __name__ == "__main__":
    score_names = ["Precision", "Recall", "F1_score"]
    classifier = load("chap06-classifier")
    with Renderer("knock56") as out:
        for average in "micro", "macro":
            out.header(average)
            features, labels = load_dataset(f"./test.feature.txt")
            predicts = classifier.predict(features)
            for name, result in zip(
                    score_names,
                    precision_recall_fscore_support(labels,
                                                    predicts,
                                                    average=average)):
                message(f"{name:10}\t{result}")