Esempio n. 1
0
def handle_hskchars():
    init_resources()
    extralink = ""
    expand = get_parameter("expand")
    return hsk_vocabulary_page("/hskchars", "HSK Characters for 2012-2020",
                               extralink, "characters", "/hskwords", "words",
                               hsk_chars, frequency_order_char_link, expand)
Esempio n. 2
0
def handle_cidian():
    start_time = time.time()
    init_resources()
    results = []
    query = get_parameter("q")
    expand = get_parameter("expand")
    return cidian_page(expand, query, results, start_time)
Esempio n. 3
0
def handle_radicals():
    start_time = time.time()
    init_resources()
    expand = get_parameter("expand")
    hsk_level = int(get_parameter(
        "hsk", 0))  # 1, 2, .. 6 but can be two levels e.g. 12, 14 etc.
    return radicals_page(expand, hsk_level, start_time)
Esempio n. 4
0
def handle_hskchars2010():
    init_resources()
    expand = get_parameter("expand")
    return hsk_vocabulary_page(
        '/hskchars2010', "HSK Characters for 2010 (outdated)",
        '<a href="/hskchars">HSK Characters 2012-2020</a>', "characters",
        "/hskwords2010", "words", hsk_chars_2010, frequency_order_char_link,
        expand)
Esempio n. 5
0
def handle_homophones():
    start_time = time.time()
    num_chars = int(get_parameter("chars", "2"))
    expand = get_parameter("expand", "no") == "yes"
    match_tones = get_parameter("tones", "no") == "yes"
    hsk_only = get_parameter("hsk", "no") == "yes"
    init_resources()
    return homophones_page(expand, hsk_only, match_tones, num_chars, start_time)
Esempio n. 6
0
def perform_set_operations(hanzi_a, hanzi_b, results, expand):
    init_resources()

    notes = []
    if get_parameter("formatA") == "block":
        words_a, chars_a, hsk_word_count, hsk_char_count = parse_block(
            hanzi_a, notes, expand, word_char_definition_link)
    elif get_parameter("formatA") == "commasep":
        words_a, chars_a, hsk_word_count, hsk_char_count = parse_comma_sep(
            hanzi_a, notes, True, expand, word_char_definition_link)
    else:
        words_a, chars_a, hsk_word_count, hsk_char_count = parse_list(
            hanzi_a, notes, True, expand, word_char_definition_link)
    if len(notes):
        results.append(
            """<h2><span style="color:red;">Warnings (List A)</span> <a class="arrowlink" href="javascript:toggle_visibility('warningshelp');"><small><small>(?)</small></small></a></h2><ul>
 <div id="warningshelp" class="inlinehelp" style="max-width:600px;">
    <p>This section lists words and character that are being treated as Chinese but aren't in the CC-CEDICT that is being used.</p>
    <p>In addition, when potential word matches are ignored during parsing of a block of text, warnings below will show you
    the words that are in the dictionary but which were not chosen by the script.</p>
 </div><span style="color:red;">""")
        for note in notes:
            results.append("<li>{}</li>".format(note))
        results.append("</ul></span>")

    notes = []
    if get_parameter("formatB") == "block":
        words_b, chars_b, hsk_word_count, hsk_char_count = parse_block(
            hanzi_b, notes, expand, word_char_definition_link)
    elif get_parameter("formatB") == "commasep":
        words_b, chars_b, hsk_word_count, hsk_char_count = parse_comma_sep(
            hanzi_b, notes, True, expand, word_char_definition_link)
    else:
        words_b, chars_b, hsk_word_count, hsk_char_count = parse_list(
            hanzi_b, notes, True, expand, word_char_definition_link)
    if len(notes):
        results.append(
            """<h2><span style="color:red;">Warnings (List B)</span> <a class="arrowlink" href="javascript:toggle_visibility('warningshelp');"><small><small>(?)</small></small></a></h2><ul>
 <div id="warningshelp" class="inlinehelp" style="max-width:600px;">
    <p>This section lists words and character that are being treated as Chinese but aren't in the CC-CEDICT that is being used.</p>
    <p>In addition, when potential word matches are ignored during parsing of a block of text, warnings below will show you
    the words that are in the dictionary but which were not chosen by the script.</p>
 </div><span style="color:red;">""")
        for note in notes:
            results.append("<li>{}</li>".format(note))
        results.append("</ul></span>")

    joinchar = ""
    if get_parameter("outputformat") == "oneperline":
        joinchar = "\n"
    elif get_parameter("outputformat") == "commasep":
        joinchar = ","
    elif get_parameter("outputformat") == "tabsep":
        joinchar = "\t"
    else:
        joinchar = " "

    results.append(
        """<h4>Set Operations on Words <a class="arrowlink" href="javascript:toggle_visibility('wordoperationshelp');"><small><small>(?)</small></small></a></h4>
 <div id="wordoperationshelp" class="inlinehelp" style="max-width:600px;">
 <p><b>A<sub>w</sub></b> and <b>B<sub>w</sub></b> are the sets of all unique words derived from Word Lists A and B above.</p>
 <p><b>A<sub>w</sub> \u2229 B<sub>w</sub></b> <i>Intersection, words that appear in both sets.</i><br />
    <b>A<sub>w</sub> \u222A B<sub>w</sub></b> <i>Union, both sets of words combined together as a single set.</i><br />
    <b>A<sub>w</sub> \u2216 B<sub>w</sub></b> <i>Difference, words that are <b>A<sub>w</sub></b> but not <b>B<sub>w</sub></b>.</i><br />
    <b>B<sub>w</sub> \u2216 A<sub>w</sub></b> <i>Difference, words that are <b>B<sub>w</sub></b> but not <b>A<sub>w</sub></b>.</i><br />
    <b>A<sub>w</sub> \u2206 B<sub>w</sub></b> <i>Symmetric Difference, words that are in only one of the two sets.</i></p>
    <p>All sets are sorted with the most frequently used words first.</p>
 </div>""")

    results.append(
        setresultbox("A<sub>w</sub>", "Awords", frequency_order_word(words_a),
                     joinchar, "word"))
    results.append(
        setresultbox("B<sub>w</sub>", "Bwords", frequency_order_word(words_b),
                     joinchar, "word"))
    results.append(
        setresultbox("A<sub>w</sub> \u2229 B<sub>w</sub>", "AintersectBwords",
                     frequency_order_word(words_a & words_b), joinchar,
                     "word"))
    results.append(
        setresultbox("A<sub>w</sub> \u222A B<sub>w</sub>", "AunionBwords",
                     frequency_order_word(words_a | words_b), joinchar,
                     "word"))
    results.append(
        setresultbox("A<sub>w</sub> \u2216 B<sub>w</sub>", "AdifferenceBwords",
                     frequency_order_word(words_a - words_b), joinchar,
                     "word"))
    results.append(
        setresultbox("B<sub>w</sub> \u2216 A<sub>w</sub>", "BdifferenceAwords",
                     frequency_order_word(words_b - words_a), joinchar,
                     "word"))
    results.append(
        setresultbox("A<sub>w</sub> \u2206 B<sub>w</sub>", "AsymmmetricBwords",
                     frequency_order_word(words_a ^ words_b), joinchar,
                     "word"))

    results.append(
        """<h4>Set Operations on Characters <a class="arrowlink" href="javascript:toggle_visibility('charoperationshelp');"><small><small>(?)</small></small></a></h4>
 <div id="charoperationshelp" class="inlinehelp" style="max-width:600px;">
 <p><b>A<sub>c</sub></b> and <b>B<sub>c</sub></b> are the sets of all unique characters derived from Word Lists A and B above.</p>
 <p><b>A<sub>c</sub> \u2229 B<sub>c</sub></b> <i>Intersection, characters that appear in both sets.</i><br />
    <b>A<sub>c</sub> \u222A B<sub>c</sub></b> <i>Union, both sets of characters combined together as a single set.</i><br />
    <b>A<sub>c</sub> \u2216 B<sub>c</sub></b> <i>Difference, characters that are <b>A<sub>c</sub></b> but not <b>B<sub>c</sub></b>.</i><br />
    <b>B<sub>c</sub> \u2216 A<sub>c</sub></b> <i>Difference, characters that are <b>B<sub>c</sub></b> but not <b>A<sub>c</sub></b>.</i><br />
    <b>A<sub>c</sub> \u2206 B<sub>c</sub></b> <i>Symmetric Difference, characters that are in only one of the two sets.</i></p>
    <p>All sets are sorted with the most frequently used characters first.</p>
 </div>""")

    results.append(
        setresultbox("A<sub>c</sub>", "Achars", frequency_order_word(chars_a),
                     joinchar, "char"))
    results.append(
        setresultbox("B<sub>c</sub>", "Bchars", frequency_order_word(chars_b),
                     joinchar, "char"))
    results.append(
        setresultbox("A<sub>c</sub> \u2229 B<sub>c</sub>", "AintersectBchars",
                     frequency_order_char(chars_a & chars_b), joinchar,
                     "char"))
    results.append(
        setresultbox("A<sub>c</sub> \u222A B<sub>c</sub>", "AunionBchars",
                     frequency_order_char(chars_a | chars_b), joinchar,
                     "char"))
    results.append(
        setresultbox("A<sub>c</sub> \u2216 B<sub>c</sub>", "AdifferenceBchars",
                     frequency_order_char(chars_a - chars_b), joinchar,
                     "char"))
    results.append(
        setresultbox("B<sub>c</sub> \u2216 A<sub>c</sub>", "BdifferenceAchars",
                     frequency_order_char(chars_b - chars_a), joinchar,
                     "char"))
    results.append(
        setresultbox("A<sub>c</sub> \u2206 B<sub>c</sub>", "AsymmmetricBchars",
                     frequency_order_char(chars_a ^ chars_b), joinchar,
                     "char"))
Esempio n. 7
0
def vocab_diff_page(oldlink, newlink, thislink, thisitem, otherlink, otheritem,
                    oldvocab, newvocab, linkfunction):
    start_time = time.time()
    init_resources()
    expand = get_parameter("expand")
    results = []
    results.append("""<html lang="zh-Hans">\n<head>""")
    results.append(
        "<title>HSK\u4E1C\u897F - Where the HSK 2010 {} are in 2012-2020</title>"
        .format(thisitem[0].upper() + thisitem[1:]))
    results.append(allstyle)
    results.append("</head>\n<body>")
    results.append(
        """<a href="https://hskhsk.com/word-lists">HSK\u4E1C\u897F</a>""")
    results.append("""<a href="/">Scripts</a>""")
    results.append(
        """<a href="{}">Where the HSK 2010 {} are in 2012-2020</a>""".format(
            otherlink, otheritem[0].upper() + otheritem[1:]))
    results.append(
        "<h3>HSK 2010 {} that changed level in 2012-2020</h3>".format(
            thisitem[0].upper() + thisitem[1:]))
    results.append(
        """<p>This table shows the {0} in the New HSK 2010 that changed level when the word lists were revised
in 2012 (also valid to date as of 2020), {0} that didn't change level are shown below.
For definitions hover over the characters, or try clicking on almost anything.</p>
<table border="1" style="border-collapse:collapse;" cellpadding="2em" cellspacing="0">
<tr><th rowspan=2 colspan=2 style="background-color: #FFFFFF;"></th><th colspan=7><a href="{1}" class="hsk0">HSK 2012-2020</a></th></tr>
<tr>""".format(thisitem, newlink))
    for i in range(1, 7):
        results.append(
            """<th><div style="white-space: nowrap;">&nbsp;&nbsp;&nbsp;&nbsp;<a href="{}#hsk{}" class="hsk{}">HSK {}</a>&nbsp;&nbsp;&nbsp;&nbsp;</div></th>"""
            .format(newlink, i, i, i))
    results.append(
        """<th><div class="hsk0" style="white-space: nowrap;">&nbsp;&nbsp;&nbsp;&nbsp;Non-HSK&nbsp;&nbsp;&nbsp;&nbsp;</div></th></tr>"""
    )
    for old in range(1, 8):
        results.append("<tr>")
        if old == 1:
            results.append(
                """<th rowspan=7><a href="{}" class="hsk0">HSK 2010</a></th>"""
                .format(oldlink))
        if old == 7:
            results.append(
                """<th><div style="white-space: nowrap;">Non-HSK</div></th>""")
        else:
            results.append(
                """<th><div style="white-space: nowrap;"><a href="{}#hsk{}" class="hsk{}">HSK {}</a></div></th>"""
                .format(oldlink, old, old, old))
        for new in range(1, 8):
            if old == new:
                if old >= 1 and old <= 6:
                    results.append(
                        """<td class="hsk{0}light" onClick="document.location.href='#hsk{0}';" onmouseover="this.style.cursor='pointer';"> </td>"""
                        .format(old))
                else:
                    results.append("""<td class="hsk0light"></td>""")
            else:
                if old == 7:
                    somehanzi = newvocab[new] - oldvocab[16]
                elif new == 7:
                    somehanzi = oldvocab[old] - newvocab[16]
                else:
                    somehanzi = (oldvocab[old] & newvocab[new]) - newvocab[
                        old]  # add the set subtract to account for case where word exists at multiple levels
                results.append("<td>")
                separator = "<br />" if expand == "yes" else chinese_comma_sep
                results.append(separator.join(linkfunction(somehanzi, expand)))
                results.append("</td>")
        results.append("</tr>")
    results.append("</table>")

    results.append(
        "<h3>HSK 2010 {} that didn't change level in 2012-2020</h3>".format(
            thisitem[0].upper() + thisitem[1:]))
    for level in range(1, 7):
        results.append(
            """<h4><a class="hsk{0}" name="hsk{0}">HSK {0} {1} that didn't change level</a></h4>"""
            .format(level, thisitem[0].upper() + thisitem[1:]))
        somehanzi = newvocab[level] & oldvocab[level]
        separator = "<br />" if expand == "yes" else chinese_comma_sep
        results.append(separator.join(linkfunction(somehanzi, expand)))

    results.append(
        """<p><small><i>Page generated in {:1.6f} seconds.</i></small></p>\n"""
        .format(time.time() - start_time))
    results.append(page_footer)
    return Response(results)
Esempio n. 8
0
def performactions(hanzi, results, expand):
    init_resources()
    # only parse if one of these actions is being performed
    if (get_parameter("analysevocab") or get_parameter("analysehskwords")
            or get_parameter("analysehskchars")
            or get_parameter("suggesthskwords")
            or get_parameter("suggesthskchars")
            or get_parameter("suggestwords")
            or get_parameter("suggestwordsreuse")
            or get_parameter("suggestchars")):
        notes = []
        if get_parameter("format") == "block":
            words, chars, hsk_word_count, hsk_char_count = parse_block(
                hanzi, notes, expand)
        elif get_parameter("format") == "commasep":
            words, chars, hsk_word_count, hsk_char_count = parse_comma_sep(
                hanzi, notes, False, expand)
        else:
            words, chars, hsk_word_count, hsk_char_count = parse_list(
                hanzi, notes, False, expand)
        if len(notes):
            results.append(
                """<h2><span style="color:red;">Warnings</span> <a class="arrowlink" href="javascript:toggle_visibility('warningshelp');"><small><small>(?)</small></small></a></h4><ul>
 <div id="warningshelp" class="inlinehelp" style="max-width:500px;">
    <p>This section lists words and character that are being treated as Chinese but aren't in the CC-CEDICT that is being used.</p>
    <p>In addition, when potential word matches are ignored during parsing of a block of text, warnings below will show you
    the words that are in the dictionary but which were not chosen by the script.</p>
 </div><span style="color:red;">""")
            for note in notes:
                results.append("<li>{}</li>".format(note))
            results.append("</ul></span>")

        results.append(
            """<h2>Results <a class="arrowlink" href="javascript:toggle_visibility('resultshelp');"><small><small>(?)</small></small></a></h4>
 <div id="resultshelp" class="inlinehelp" style="max-width:500px;">
    All word/character lists are in descending order of frequency, with the most frequently used words/characters at the top of each list.
 </div>""")

        if get_parameter("analysevocab"):
            analysewords(results, words, chars, hsk_word_count, hsk_char_count)
        if get_parameter("analysehskwords"):
            analysehskwords(results, words, hsk_word_count)
        if get_parameter("analysehskchars"):
            analysehskchars(results, chars, hsk_char_count)
        if get_parameter("suggesthskwords"):
            suggesthskwords(results, words)
        if get_parameter("suggesthskchars"):
            suggesthskchars(results, chars)
        if get_parameter("suggestwords"):
            suggestfreqwords(results, words)
        if get_parameter("suggestwordsreuse"):
            suggestfreqwordsre(results, words, chars)
        if get_parameter("suggestchars"):
            suggestfreqchars(results, chars)
    else:
        results.append("<h2>Results</h2>")
    # these actions just use the raw hanzi
    if get_parameter("annotatewords"):
        annotatewords(results, hanzi, expand)
    if get_parameter("annotatechars"):
        annotatechars(results, hanzi, expand)
Esempio n. 9
0
def handle_mandcomp():
    start_time = time.time()
    init_resources()
    expand = get_parameter("expand")
    return mandarin_companion_page(start_time, expand)
Esempio n. 10
0
def handle_chars1000():
    start_time = time.time()
    init_resources()
    expand = get_parameter("expand")
    return list_page_chars1000(expand, start_time)
Esempio n. 11
0
def handle_flashcards():
    init_resources()
    return flashcards_download()
Esempio n. 12
0
def handle_search():
    start_time = time.time()
    init_resources()
    return search_page(start_time)