Esempio n. 1
0
def handle_cidian():
    start_time = time.time()
    init_resources()
    results = []
    query = get_parameter("q")
    expand = get_parameter("expand")
    return cidian_page(expand, query, results, start_time)
Esempio n. 2
0
def handle_radicals():
    start_time = time.time()
    init_resources()
    expand = get_parameter("expand")
    hsk_level = int(get_parameter(
        "hsk", 0))  # 1, 2, .. 6 but can be two levels e.g. 12, 14 etc.
    return radicals_page(expand, hsk_level, start_time)
Esempio n. 3
0
def blockboxtemplate(cols=""):
    if get_parameter("outputcommasep") or get_parameter("hanziA"):
        cols = 'cols="30"'
    elif get_parameter("addfreqindex") and get_parameter("addfreqvalue"):
        cols = 'cols="25"'
    else:
        cols = 'cols="15"'
    return """<div class="box"><div class="title">{}</div><div><textarea name="{}" """ + cols + """ rows="12">{}</textarea></div></div>"""
Esempio n. 4
0
def handle_homophones():
    start_time = time.time()
    num_chars = int(get_parameter("chars", "2"))
    expand = get_parameter("expand", "no") == "yes"
    match_tones = get_parameter("tones", "no") == "yes"
    hsk_only = get_parameter("hsk", "no") == "yes"
    init_resources()
    return homophones_page(expand, hsk_only, match_tones, num_chars, start_time)
Esempio n. 5
0
def handle_hskchars():
    init_resources()
    extralink = ""
    expand = get_parameter("expand")
    return hsk_vocabulary_page("/hskchars", "HSK Characters for 2012-2020",
                               extralink, "characters", "/hskwords", "words",
                               hsk_chars, frequency_order_char_link, expand)
Esempio n. 6
0
def handle_hskchars2010():
    init_resources()
    expand = get_parameter("expand")
    return hsk_vocabulary_page(
        '/hskchars2010', "HSK Characters for 2010 (outdated)",
        '<a href="/hskchars">HSK Characters 2012-2020</a>', "characters",
        "/hskwords2010", "words", hsk_chars_2010, frequency_order_char_link,
        expand)
Esempio n. 7
0
def frequency_order_char(hanzi):
    frequency_list = [(query_char_frequency_index(h), h) for h in hanzi]
    frequency_list.sort()
    if get_parameter("outputcommasep"):
        return [",".join([h for f, h in frequency_list])]
    elif get_parameter("addfreqindex") and get_parameter("addfreqvalue"):
        return [
            "{}\t{}\t{}".format(h, f, query_char_freq(h))
            for f, h in frequency_list
        ]
    elif get_parameter("addfreqindex"):
        return [
            "{}\t{}".format(h, 0 if f >= max_frequency_index else f)
            for f, h in frequency_list
        ]
    elif get_parameter("addfreqvalue"):
        return [
            "{}\t{}".format(h, query_char_freq(h)) for f, h in frequency_list
        ]
    else:
        return [h for f, h in frequency_list]
Esempio n. 8
0
def handle_pinyinfix():
    start_time = time.time()
    pinyin = get_parameter("pinyin", "")
    if len(pinyin) > 100000:
        context = create_context(
            start_time,
            error="Sorry, that text is too big; It will consume too much server CPU time to process. " \
                  "You can try reaching out to [email protected] or set this script up on your own dedicated server."
        )
        return render_template("error.html", **context)
    fixed, fixed_count = fix_pinyin(pinyin)
    context = create_context(start_time,
                             pinyin=pinyin,
                             fixed=fixed,
                             fixed_count=fixed_count)
    return render_template("pinyinfix.html", **context)
Esempio n. 9
0
def vocab_analysis_page(expand, start_time):
    defaultistrue = "true"
    if get_parameter("ignoredefaults", ""):
        defaultistrue = ""
    wordfreqchecked = "checked" if get_parameter("analysevocab",
                                                 defaultistrue) else ""
    hskanalwordschecked = "checked" if get_parameter("analysehskwords",
                                                     defaultistrue) else ""
    hskanalcharschecked = "checked" if get_parameter("analysehskchars") else ""
    hskwordschecked = "checked" if get_parameter("suggesthskwords") else ""
    hskcharschecked = "checked" if get_parameter("suggesthskchars") else ""
    freqwordschecked = "checked" if get_parameter("suggestwords") else ""
    freqwordsrechecked = "checked" if get_parameter(
        "suggestwordsreuse") else ""
    freqcharschecked = "checked" if get_parameter("suggestchars") else ""
    annotatewordschecked = "checked" if get_parameter("annotatewords") else ""
    annotatecharschecked = "checked" if get_parameter("annotatechars") else ""
    outputcommasepchecked = "checked" if get_parameter(
        "outputcommasep") else ""
    addfreqindexchecked = "checked" if get_parameter("addfreqindex",
                                                     defaultistrue) else ""
    addfreqvaluechecked = "checked" if get_parameter("addfreqvalue") else ""
    oneperlinechecked = ""
    commasepchecked = ""
    blockchecked = "checked" if get_parameter("format") == "block" else ""
    commasepchecked = "checked" if get_parameter(
        "format") == "commasep" else ""
    if blockchecked == "" and commasepchecked == "":
        oneperlinechecked = "checked"
    defaulthanzi = ""
    hanzi = get_parameter("hanzi", defaulthanzi)
    if blockchecked and len(hanzi) > 10000:
        return "Sorry, that text is too big; It will consume too much server CPU time to process." \
               "If you want to set up this script on a dedicated server you can find the source" \
               "at https://github.com/glxxyz/hskhsk.com/tree/master/cidian"
    results = []
    results.append("""<html lang="zh-Hans">\n<head>""")
    results.append(
        "<title>HSK\u4E1C\u897F - Analyse Your \u6C49\u5B57 Vocabulary/Text</title>"
    )
    results.append(allstyle)
    results.append("""<script>function outputcommaclick() {
        var c = document.getElementsByName("outputcommasep")[0].checked;
        document.getElementsByName("addfreqindex")[0].disabled = c;
        document.getElementsByName("addfreqvalue")[0].disabled = c;
    }
    </script>""")
    results.append("</head>")
    results.append("""<body onload="outputcommaclick();">""")
    results.append(
        """<a href="https://hskhsk.com/analyse.html">HSK\u4E1C\u897F</a>""")
    results.append("""<a href="/">Scripts</a>""")
    results.append("""<a href="/sets">Set Operations</a>""")
    results.append(
        """<h2 class="compact">Analyse Your \u6C49\u5B57 <a class="arrowlink" href="javascript:toggle_visibility('mainhelp');"><small><small>(?)</small></small></a></h2>
<div id="mainhelp" class="inlinehelp">
    <p>The purpose of this tool is to analyse a Chinese vocabulary list or block of Chinese text, to give information about the words and characters it contains.</p>
    <p>See the (?) info buttons below below for more information about how to use the various features of this tool.</p>
</div>

<form method="POST" action="/hanzi">
<input type='hidden' value='true' name='ignoredefaults'>
<table>
    <tr><td valign="top">
        <h4 class="compact">Vocabulary Actions <a class="arrowlink" href="javascript:toggle_visibility('vocabactionshelp');"><small>(?)</small></a></h4>

        <div id="vocabactionshelp" class="inlinehelp">
<p>Select the actions to be performed on the inputted text or vocabulary list.</p>
<p>The 'analyse' functions will give you statistics such as word counts, and for the HSK options how many words/characters you know at each HSK level.</p>
<p>The 'suggest' options show how the highest frequency words or characters that you don't know.</p>
        </div>

        <div class="indent"><input type="checkbox" name="analysevocab" value="true" {}>Analyse words/characters</input></div>
        <div class="indent"><input type="checkbox" name="analysehskwords" value="true" {}>Analyse HSK words</input></div>
        <div class="indent"><input type="checkbox" name="analysehskchars" value="true" {}>Analyse HSK characters</input></div>
        <div class="indent"><input type="checkbox" name="suggesthskwords" value="true" {}>Suggest HSK words</input></div>
        <div class="indent"><input type="checkbox" name="suggesthskchars" value="true" {}>Suggest HSK characters</input></div>
        <div class="indent"><input type="checkbox" name="suggestwords" value="true" {}>Suggest words</input></div>
        <div class="indent"><input type="checkbox" name="suggestwordsreuse" value="true" {}>Suggest words using input characters</input></div>
        <div class="indent"><input type="checkbox" name="suggestchars" value="true" {}>Suggest characters</input></div>
    </td>
    <td valign="top">
        <h4 class="compact">Annotated Version <a class="arrowlink" href="javascript:toggle_visibility('annotationactionshelp');"><small>(?)</small></a></h4>

        <div id="annotationactionshelp" class="inlinehelp">
<p>These options will output a version of your source text with popup information giving HSK levels, frequency stroke and radical information, etc.</p>
<p>The words/characters are also clickable, with the links taking you to a full dictionary entry. In addition, words/characters are coloured by HSK level.</p>
        </div>

        <div class="indent"><input type="checkbox" name="annotatewords" value="true" {}>Annotate words</input></div>
        <div class="indent"><input type="checkbox" name="annotatechars" value="true" {}>Annotate characters</input></div>

        <h4 class="compact">Input Options <a class="arrowlink" href="javascript:toggle_visibility('inputoptionshelp');"><small>(?)</small></a></h4>

        <div id="inputoptionshelp" class="inlinehelp">
<p>Choose one word/character per line when the input is a vocabulary list from Skritter or a flashcard text file. Anything after first whitespace on each line ignored.</p>
<p>Comma/whitespace separated will use the characters ,;| or any whitespace to separate the words in your input.</p>
<p>If pasting text from a web page or document use the 'Big block of text' option. This option is less precise, as word breaks have to be determined by this tool.</p>
        </div>

        <div class="indent"><input type="radio" name="format" value="oneperline" {}>One word/character per line</input></div>
        <div class="indent"><input type="radio" name="format" value="commasep" {}>Comma/whitespace separated</input></div>
        <div class="indent"><input type="radio" name="format" value="block" {}>Big block of text</input></div>

        <h4 class="compact">Output List Options <a class="arrowlink" href="javascript:toggle_visibility('outputoptionshelp');"><small>(?)</small></a></h4>

        <div id="outputoptionshelp" class="inlinehelp">
<p>The Comma Separated option will output comma separated words/characters, with no frequency information (the other two options will be ignored).</p>
<p>The other two options add frequency information to the listboxes of hanzi characters that are output.</p>
<p>With the frequency index, 1 is the highest frequency word/character, and higher values are less frequent.</p>
<p>The raw word/character frequency is the actual frequency reported by SUBTLEX-CH, with higher values being more
frequent, which helps to understand the relative frequency of each character.</p>
        </div>

        <div class="indent"><input type="checkbox" name="outputcommasep" value="true" {} onclick="outputcommaclick()">Comma Separated</input></div>
        <div class="indent"><input type="checkbox" name="addfreqindex" value="true" {}>Add frequency index</input></div>
        <div class="indent"><input type="checkbox" name="addfreqvalue" value="true" {}>Add raw frequency</div>
    </td></tr>
</table>
<h4 class="compact">Input your simpflified Chinese here <a class="arrowlink" href="javascript:toggle_visibility('textinputhelp');"><small>(?)</small></a></h4>

<div id="textinputhelp" class="inlinehelp" style="max-width:500px;">
    <p>This edit box is for the vocabulary list or block of text that you wish to analyse. Choose the format of your list using by selecting the appropriate value from the 'Input Options' section above.</p>
    <p>To help to resolve ambiguous words when analysing a block of text, place a | character (vertical bar) between words.</p>
</div>

<textarea name="hanzi" cols="80" rows="15">{}</textarea><br />
<input type="submit" value="    Go!    " /></form>
    """.format(wordfreqchecked, hskanalwordschecked, hskanalcharschecked,
               hskwordschecked, hskcharschecked, freqwordschecked,
               freqwordsrechecked, freqcharschecked, annotatewordschecked,
               annotatecharschecked, oneperlinechecked, commasepchecked,
               blockchecked, outputcommasepchecked, addfreqindexchecked,
               addfreqvaluechecked, hanzi))
    if hanzi != defaulthanzi:
        performactions(hanzi, results, expand)
    results.append(
        """<p><small><i>Page generated in {:1.6f} seconds</i></small></p>""".
        format(time.time() - start_time))
    results.append(page_footer)
    return Response(results)
Esempio n. 10
0
def performactions(hanzi, results, expand):
    init_resources()
    # only parse if one of these actions is being performed
    if (get_parameter("analysevocab") or get_parameter("analysehskwords")
            or get_parameter("analysehskchars")
            or get_parameter("suggesthskwords")
            or get_parameter("suggesthskchars")
            or get_parameter("suggestwords")
            or get_parameter("suggestwordsreuse")
            or get_parameter("suggestchars")):
        notes = []
        if get_parameter("format") == "block":
            words, chars, hsk_word_count, hsk_char_count = parse_block(
                hanzi, notes, expand)
        elif get_parameter("format") == "commasep":
            words, chars, hsk_word_count, hsk_char_count = parse_comma_sep(
                hanzi, notes, False, expand)
        else:
            words, chars, hsk_word_count, hsk_char_count = parse_list(
                hanzi, notes, False, expand)
        if len(notes):
            results.append(
                """<h2><span style="color:red;">Warnings</span> <a class="arrowlink" href="javascript:toggle_visibility('warningshelp');"><small><small>(?)</small></small></a></h4><ul>
 <div id="warningshelp" class="inlinehelp" style="max-width:500px;">
    <p>This section lists words and character that are being treated as Chinese but aren't in the CC-CEDICT that is being used.</p>
    <p>In addition, when potential word matches are ignored during parsing of a block of text, warnings below will show you
    the words that are in the dictionary but which were not chosen by the script.</p>
 </div><span style="color:red;">""")
            for note in notes:
                results.append("<li>{}</li>".format(note))
            results.append("</ul></span>")

        results.append(
            """<h2>Results <a class="arrowlink" href="javascript:toggle_visibility('resultshelp');"><small><small>(?)</small></small></a></h4>
 <div id="resultshelp" class="inlinehelp" style="max-width:500px;">
    All word/character lists are in descending order of frequency, with the most frequently used words/characters at the top of each list.
 </div>""")

        if get_parameter("analysevocab"):
            analysewords(results, words, chars, hsk_word_count, hsk_char_count)
        if get_parameter("analysehskwords"):
            analysehskwords(results, words, hsk_word_count)
        if get_parameter("analysehskchars"):
            analysehskchars(results, chars, hsk_char_count)
        if get_parameter("suggesthskwords"):
            suggesthskwords(results, words)
        if get_parameter("suggesthskchars"):
            suggesthskchars(results, chars)
        if get_parameter("suggestwords"):
            suggestfreqwords(results, words)
        if get_parameter("suggestwordsreuse"):
            suggestfreqwordsre(results, words, chars)
        if get_parameter("suggestchars"):
            suggestfreqchars(results, chars)
    else:
        results.append("<h2>Results</h2>")
    # these actions just use the raw hanzi
    if get_parameter("annotatewords"):
        annotatewords(results, hanzi, expand)
    if get_parameter("annotatechars"):
        annotatechars(results, hanzi, expand)
Esempio n. 11
0
def handle_mandcomp():
    start_time = time.time()
    init_resources()
    expand = get_parameter("expand")
    return mandarin_companion_page(start_time, expand)
Esempio n. 12
0
def handle_hanzi():
    start_time = time.time()
    expand = get_parameter("expand")
    return vocab_analysis_page(expand, start_time)
Esempio n. 13
0
def handle_chars1000():
    start_time = time.time()
    init_resources()
    expand = get_parameter("expand")
    return list_page_chars1000(expand, start_time)
Esempio n. 14
0
def search_page(start_time):
    hsk1 = "checked" if get_parameter("hsk1") else ""
    hsk2 = "checked" if get_parameter("hsk2") else ""
    hsk3 = "checked" if get_parameter("hsk3") else ""
    hsk4 = "checked" if get_parameter("hsk4") else ""
    hsk5 = "checked" if get_parameter("hsk5") else ""
    hsk6 = "checked" if get_parameter("hsk6") else ""
    mandcomp = int(
        get_parameter("mandcomp")) if get_parameter("mandcomp") else 0

    expand = get_parameter("expand")
    pinyin = get_parameter("pinyin")
    hanzi = get_parameter("hanzi")
    component = get_parameter("component")
    compound = get_parameter("compound")
    minlength = int(get_parameter("min", "1"))
    maxlength = int(get_parameter("max", "10"))

    defoff = ""
    deffull = ""
    defcompact = ""
    definition = get_parameter("def", "off")
    if definition == "compact":
        defcompact = "checked"
    elif definition == "full":
        deffull = "checked"
    else:
        defoff = "checked"

    formatwildcard = ""
    formatregex = ""
    searchformat = get_parameter("format", "wildcard")
    if searchformat == "wildcard":
        formatwildcard = "checked"
    else:
        formatregex = "checked"

    sortfreq = ""
    sortpinyin = ""
    sorthanzi = ""
    sort = get_parameter("sort", "freq")
    if sort == "freq":
        sortfreq = "checked"
    elif sort == "pinyin":
        sortpinyin = "checked"
    else:
        sorthanzi = "checked"

    results = []
    results.append("""<html lang="zh-Hans">\n<head>""")
    results.append(
        "<title>HSK\u4E1C\u897F\u8BCD\u5178 - Advanced Hanzi Search</title>")
    results.append(allstyle)
    results.append("</head>")
    results.append("<body>")
    results.append(
        """<a href="https://hskhsk.com/analyse.html">HSK\u4E1C\u897F</a>""")
    results.append("""<a href="/">Scripts</a>""")
    results.append("""<a href="/cidian">Dictionary</a>""")
    results.append("""<a href="/radicals">Radicals</a>""")
    results.append("""
<h2 class="compact">Advanced Hanzi Search</h2>
<form method="GET" action="/search">
<div class="paddedbox">
    <h4 class="compact">Search Fields <a class="arrowlink" href="javascript:toggle_visibility('searchhelp');"><small>(?)</small></a></h4>

    <div id="searchhelp" class="inlinehelp">
        If a value is entered into any of these fields, or the character composition fields,
        then each of the results returned must match that value.
        The results shown are the logical AND (set intersection) of the results found by each input field.
    </div>

    <div>
        Search format:
    </div>

    <div>
        <input type="radio" name="format" value="wildcard" {}>Wildcard</input>
            <a class="arrowlink" href="javascript:toggle_visibility('wildcardhelp');">(?)</a>
    </div>

    <div id="wildcardhelp" class="inlinehelp">
        Use * to match zero or any number of characters.<br />
        \u5c0f* matches all words beginning with \u5c0f.<br />
        *\u5c0f* matches all words with a \u5c0f.<br />
        Use + to match any one or more characters.<br />
        Use ? to match any single character.<br />
        Use [12] to match the characters '1' or '2'.<br />
    </div>

    <div>
        <input type="radio" name="format" value="regex" {}>Regex</input>
            <a class="arrowlink" href="javascript:toggle_visibility('regexhelp');">(?)</a>
    </div>

    <div id="regexhelp" class="inlinehelp">
        Try <a href="http://docs.python.org/release/2.5.2/lib/re-syntax.html">this link</a> for more
        information about regular expressions.
    </div>

    <div>
        Pinyin <input type="text" name="pinyin" value="{}"></input>
            <a class="arrowlink" href="javascript:toggle_visibility('pinyinhelp');">(?)</a><br />
    </div>

    <div id="pinyinhelp" class="inlinehelp">
        For pinyin search enter tone numbers, (pin1yin1) not tone marks (p\u012Bny\u012Bn).
        There are no spaces between syllables, and the search is case insensitive.
    </div>

    <div>
        Hanzi <input type="text" name="hanzi" value="{}"></input>
    </div>
</div>
<div class="paddedbox">
    <h4 class="compact">Character Composition</h4>

    <div>
Component of <input size="6" type="text" name="component" value="{}"></input>
    <a class="arrowlink" href="javascript:toggle_visibility('componenthelp');">(?)</a><br />
    </div>

    <div id="componenthelp" class="inlinehelp">
One character in the result must be a component of one of the characters in this box.
If you are only interested in single characters, set both the maximum and minmimum hanzi length to 1.
    </div>

    <div>
Compound of <input size="6" type="text" name="compound" value="{}"></input>
    <a class="arrowlink" href="javascript:toggle_visibility('compoundhelp');">(?)</a><br />
    </div>

    <div id="compoundhelp" class="inlinehelp">
One character in the result must be composed of one of the characters in this box.
If you are only interested in single characters, set both the maximum and minmimum hanzi length to 1.
    </div>

</div>
<div class="paddedbox">
    <h4 class="compact">Hanzi Chars <a class="arrowlink" href="javascript:toggle_visibility('hanzicharshelp');"><small>(?)</small></a></h4>
    <div id="hanzicharshelp" class="inlinehelp">
The maximum and minimun length of the hanzi results returned. Set both the max and min to 1 if you only want to see single character words.
    </div>
    <div class="indent">Min <input type="text" name="min" size="4" value="{}"></input></div>
    <div class="indent">Max <input type="text" name="max" size="4" value="{}"></input></div>
</div>
<div class="paddedbox">
    <h4 class="compact">Definition <a class="arrowlink" href="javascript:toggle_visibility('defhelp');"><small>(?)</small></a></h4>
    <div id="defhelp" class="inlinehelp">
Whether or not to display a full or truncated definition alongside the results. The alternative is to just show a list of hanzi words.
    </div>
    <div class="indent"><input type="radio" name="def" value="off" {}>Off</input></div>
    <div class="indent"><input type="radio" name="def" value="compact" {}>Compact</input></div>
    <div class="indent"><input type="radio" name="def" value="full" {}>Full</input></div>
</div>
<div class="paddedbox">
    <h4 class="compact">HSK Level <a class="arrowlink" href="javascript:toggle_visibility('hskhelp');"><small>(?)</small></a></h4>
    <div id="hskhelp" class="inlinehelp">
The results are filtered so that they must be in one of the HSK levels that are checked. If no boxes are checked, HSK filtering is ignored.
    </div>
    <div class="indent"><input type="checkbox" name="hsk1" value="t" {}>HSK 1</input></div>
    <div class="indent"><input type="checkbox" name="hsk2" value="t" {}>HSK 2</input></div>
    <div class="indent"><input type="checkbox" name="hsk3" value="t" {}>HSK 3</input></div>
    <div class="indent"><input type="checkbox" name="hsk4" value="t" {}>HSK 4</input></div>
    <div class="indent"><input type="checkbox" name="hsk5" value="t" {}>HSK 5</input></div>
    <div class="indent"><input type="checkbox" name="hsk6" value="t" {}>HSK 6</input></div>
</div>
<div class="paddedbox">
    <h4 class="compact">Sort Order <a class="arrowlink" href="javascript:toggle_visibility('sorthelp');"><small>(?)</small></a></h4>
    <div id="sorthelp" class="inlinehelp">
Results sorted by frequency show the most frequent words first.
Pinyin sorting should obey the <a href="http://pinyin.info/news/2012/pinyin-sort-order/">most
authoritative rules</a> that I could find about pinyin ordering.
Hanzi sorting uses the <a href="http://www.unicode.org/notes/tn26/">unicode code point</a>
to sort the results.
    </div>
    <div class="indent"><input type="radio" name="sort" value="freq" {}>Frequency</input></div>
    <div class="indent"><input type="radio" name="sort" value="pinyin" {}>Pinyin</input></div>
    <div class="indent"><input type="radio" name="sort" value="hanzi" {}>Hanzi</input></div>
</div>
<br />
<input type="submit" value="    Search!    " /></form>
    """.format(formatwildcard, formatregex, pinyin, hanzi, component, compound,
               minlength, maxlength, defoff, defcompact, deffull, hsk1, hsk2,
               hsk3, hsk4, hsk5, hsk6, sortfreq, sortpinyin, sorthanzi))

    resultset = processadvancedsearch(searchformat, sort, pinyin, hanzi,
                                      component, compound, minlength,
                                      maxlength, hsk1, hsk2, hsk3, hsk4, hsk5,
                                      hsk6, mandcomp, expand)

    if len(resultset):
        results.append("<h4>Results</h4>")
        params = request.query_string.decode('utf-8')
        results.append(
            "<p><small>Download flashcards: <a href='/flash?card=pleco&{}'>Pleco</a>"
            .format(params))
        results.append(
            "<a href='/flash?card=sticky&{}'>StickyStudy</a></small></p>".
            format(params))
        word_link_hskcolour_search(results, resultset, definition, expand)

    results.append(
        """<p><small><i>Page generated in {:1.6f} seconds</i></small></p>""".
        format(time.time() - start_time))
    results.append(page_footer)
    return "\n".join(results)
Esempio n. 15
0
def vocab_diff_page(oldlink, newlink, thislink, thisitem, otherlink, otheritem,
                    oldvocab, newvocab, linkfunction):
    start_time = time.time()
    init_resources()
    expand = get_parameter("expand")
    results = []
    results.append("""<html lang="zh-Hans">\n<head>""")
    results.append(
        "<title>HSK\u4E1C\u897F - Where the HSK 2010 {} are in 2012-2020</title>"
        .format(thisitem[0].upper() + thisitem[1:]))
    results.append(allstyle)
    results.append("</head>\n<body>")
    results.append(
        """<a href="https://hskhsk.com/word-lists">HSK\u4E1C\u897F</a>""")
    results.append("""<a href="/">Scripts</a>""")
    results.append(
        """<a href="{}">Where the HSK 2010 {} are in 2012-2020</a>""".format(
            otherlink, otheritem[0].upper() + otheritem[1:]))
    results.append(
        "<h3>HSK 2010 {} that changed level in 2012-2020</h3>".format(
            thisitem[0].upper() + thisitem[1:]))
    results.append(
        """<p>This table shows the {0} in the New HSK 2010 that changed level when the word lists were revised
in 2012 (also valid to date as of 2020), {0} that didn't change level are shown below.
For definitions hover over the characters, or try clicking on almost anything.</p>
<table border="1" style="border-collapse:collapse;" cellpadding="2em" cellspacing="0">
<tr><th rowspan=2 colspan=2 style="background-color: #FFFFFF;"></th><th colspan=7><a href="{1}" class="hsk0">HSK 2012-2020</a></th></tr>
<tr>""".format(thisitem, newlink))
    for i in range(1, 7):
        results.append(
            """<th><div style="white-space: nowrap;">&nbsp;&nbsp;&nbsp;&nbsp;<a href="{}#hsk{}" class="hsk{}">HSK {}</a>&nbsp;&nbsp;&nbsp;&nbsp;</div></th>"""
            .format(newlink, i, i, i))
    results.append(
        """<th><div class="hsk0" style="white-space: nowrap;">&nbsp;&nbsp;&nbsp;&nbsp;Non-HSK&nbsp;&nbsp;&nbsp;&nbsp;</div></th></tr>"""
    )
    for old in range(1, 8):
        results.append("<tr>")
        if old == 1:
            results.append(
                """<th rowspan=7><a href="{}" class="hsk0">HSK 2010</a></th>"""
                .format(oldlink))
        if old == 7:
            results.append(
                """<th><div style="white-space: nowrap;">Non-HSK</div></th>""")
        else:
            results.append(
                """<th><div style="white-space: nowrap;"><a href="{}#hsk{}" class="hsk{}">HSK {}</a></div></th>"""
                .format(oldlink, old, old, old))
        for new in range(1, 8):
            if old == new:
                if old >= 1 and old <= 6:
                    results.append(
                        """<td class="hsk{0}light" onClick="document.location.href='#hsk{0}';" onmouseover="this.style.cursor='pointer';"> </td>"""
                        .format(old))
                else:
                    results.append("""<td class="hsk0light"></td>""")
            else:
                if old == 7:
                    somehanzi = newvocab[new] - oldvocab[16]
                elif new == 7:
                    somehanzi = oldvocab[old] - newvocab[16]
                else:
                    somehanzi = (oldvocab[old] & newvocab[new]) - newvocab[
                        old]  # add the set subtract to account for case where word exists at multiple levels
                results.append("<td>")
                separator = "<br />" if expand == "yes" else chinese_comma_sep
                results.append(separator.join(linkfunction(somehanzi, expand)))
                results.append("</td>")
        results.append("</tr>")
    results.append("</table>")

    results.append(
        "<h3>HSK 2010 {} that didn't change level in 2012-2020</h3>".format(
            thisitem[0].upper() + thisitem[1:]))
    for level in range(1, 7):
        results.append(
            """<h4><a class="hsk{0}" name="hsk{0}">HSK {0} {1} that didn't change level</a></h4>"""
            .format(level, thisitem[0].upper() + thisitem[1:]))
        somehanzi = newvocab[level] & oldvocab[level]
        separator = "<br />" if expand == "yes" else chinese_comma_sep
        results.append(separator.join(linkfunction(somehanzi, expand)))

    results.append(
        """<p><small><i>Page generated in {:1.6f} seconds.</i></small></p>\n"""
        .format(time.time() - start_time))
    results.append(page_footer)
    return Response(results)
Esempio n. 16
0
def handle_sets():
    start_time = time.time()
    expand = get_parameter("expand")
    output_one_per_line_checked = "checked" if get_parameter(
        "outputformat") == "oneperline" else ""
    output_comma_sep_checked = "checked" if get_parameter(
        "outputformat") == "commasep" else ""
    output_tab_sep_checked = "checked" if get_parameter(
        "outputformat") == "tabsep" else ""
    output_space_sep_checked = "checked" if get_parameter(
        "outputformat") == "spacesep" else ""
    if output_one_per_line_checked == "" and output_comma_sep_checked == "" and output_space_sep_checked == "":
        output_one_per_line_checked = "checked"
    one_per_line_checked_a = ""
    block_checked_a = "checked" if get_parameter("formatA") == "block" else ""
    comma_sep_checked_a = "checked" if get_parameter(
        "formatA") == "commasep" else ""
    if block_checked_a == "" and comma_sep_checked_a == "":
        one_per_line_checked_a = "checked"
    one_per_line_checked_b = ""
    block_checked_b = "checked" if get_parameter("formatB") == "block" else ""
    comma_sep_checked_b = "checked" if get_parameter(
        "formatB") == "commasep" else ""
    if block_checked_b == "" and comma_sep_checked_b == "":
        one_per_line_checked_b = "checked"
    hanzi_a = get_parameter("hanziA", "")
    hanzi_b = get_parameter("hanziB", "")
    if (block_checked_a
            and len(hanzi_a) > 10000) or (block_checked_b
                                          and len(hanzi_b) > 10000):
        return "Sorry, that text is too big; It will consume too much server CPU time to process." \
               "If you want to set up this script on a dedicated server you can find the source" \
               "at https://github.com/glxxyz/hskhsk.com/tree/main/cidian"

    return sets_page(block_checked_a, block_checked_b, comma_sep_checked_a,
                     comma_sep_checked_b, expand, hanzi_a, hanzi_b,
                     one_per_line_checked_a, one_per_line_checked_b,
                     output_comma_sep_checked, output_one_per_line_checked,
                     output_space_sep_checked, output_tab_sep_checked,
                     start_time)
Esempio n. 17
0
def perform_set_operations(hanzi_a, hanzi_b, results, expand):
    init_resources()

    notes = []
    if get_parameter("formatA") == "block":
        words_a, chars_a, hsk_word_count, hsk_char_count = parse_block(
            hanzi_a, notes, expand, word_char_definition_link)
    elif get_parameter("formatA") == "commasep":
        words_a, chars_a, hsk_word_count, hsk_char_count = parse_comma_sep(
            hanzi_a, notes, True, expand, word_char_definition_link)
    else:
        words_a, chars_a, hsk_word_count, hsk_char_count = parse_list(
            hanzi_a, notes, True, expand, word_char_definition_link)
    if len(notes):
        results.append(
            """<h2><span style="color:red;">Warnings (List A)</span> <a class="arrowlink" href="javascript:toggle_visibility('warningshelp');"><small><small>(?)</small></small></a></h2><ul>
 <div id="warningshelp" class="inlinehelp" style="max-width:600px;">
    <p>This section lists words and character that are being treated as Chinese but aren't in the CC-CEDICT that is being used.</p>
    <p>In addition, when potential word matches are ignored during parsing of a block of text, warnings below will show you
    the words that are in the dictionary but which were not chosen by the script.</p>
 </div><span style="color:red;">""")
        for note in notes:
            results.append("<li>{}</li>".format(note))
        results.append("</ul></span>")

    notes = []
    if get_parameter("formatB") == "block":
        words_b, chars_b, hsk_word_count, hsk_char_count = parse_block(
            hanzi_b, notes, expand, word_char_definition_link)
    elif get_parameter("formatB") == "commasep":
        words_b, chars_b, hsk_word_count, hsk_char_count = parse_comma_sep(
            hanzi_b, notes, True, expand, word_char_definition_link)
    else:
        words_b, chars_b, hsk_word_count, hsk_char_count = parse_list(
            hanzi_b, notes, True, expand, word_char_definition_link)
    if len(notes):
        results.append(
            """<h2><span style="color:red;">Warnings (List B)</span> <a class="arrowlink" href="javascript:toggle_visibility('warningshelp');"><small><small>(?)</small></small></a></h2><ul>
 <div id="warningshelp" class="inlinehelp" style="max-width:600px;">
    <p>This section lists words and character that are being treated as Chinese but aren't in the CC-CEDICT that is being used.</p>
    <p>In addition, when potential word matches are ignored during parsing of a block of text, warnings below will show you
    the words that are in the dictionary but which were not chosen by the script.</p>
 </div><span style="color:red;">""")
        for note in notes:
            results.append("<li>{}</li>".format(note))
        results.append("</ul></span>")

    joinchar = ""
    if get_parameter("outputformat") == "oneperline":
        joinchar = "\n"
    elif get_parameter("outputformat") == "commasep":
        joinchar = ","
    elif get_parameter("outputformat") == "tabsep":
        joinchar = "\t"
    else:
        joinchar = " "

    results.append(
        """<h4>Set Operations on Words <a class="arrowlink" href="javascript:toggle_visibility('wordoperationshelp');"><small><small>(?)</small></small></a></h4>
 <div id="wordoperationshelp" class="inlinehelp" style="max-width:600px;">
 <p><b>A<sub>w</sub></b> and <b>B<sub>w</sub></b> are the sets of all unique words derived from Word Lists A and B above.</p>
 <p><b>A<sub>w</sub> \u2229 B<sub>w</sub></b> <i>Intersection, words that appear in both sets.</i><br />
    <b>A<sub>w</sub> \u222A B<sub>w</sub></b> <i>Union, both sets of words combined together as a single set.</i><br />
    <b>A<sub>w</sub> \u2216 B<sub>w</sub></b> <i>Difference, words that are <b>A<sub>w</sub></b> but not <b>B<sub>w</sub></b>.</i><br />
    <b>B<sub>w</sub> \u2216 A<sub>w</sub></b> <i>Difference, words that are <b>B<sub>w</sub></b> but not <b>A<sub>w</sub></b>.</i><br />
    <b>A<sub>w</sub> \u2206 B<sub>w</sub></b> <i>Symmetric Difference, words that are in only one of the two sets.</i></p>
    <p>All sets are sorted with the most frequently used words first.</p>
 </div>""")

    results.append(
        setresultbox("A<sub>w</sub>", "Awords", frequency_order_word(words_a),
                     joinchar, "word"))
    results.append(
        setresultbox("B<sub>w</sub>", "Bwords", frequency_order_word(words_b),
                     joinchar, "word"))
    results.append(
        setresultbox("A<sub>w</sub> \u2229 B<sub>w</sub>", "AintersectBwords",
                     frequency_order_word(words_a & words_b), joinchar,
                     "word"))
    results.append(
        setresultbox("A<sub>w</sub> \u222A B<sub>w</sub>", "AunionBwords",
                     frequency_order_word(words_a | words_b), joinchar,
                     "word"))
    results.append(
        setresultbox("A<sub>w</sub> \u2216 B<sub>w</sub>", "AdifferenceBwords",
                     frequency_order_word(words_a - words_b), joinchar,
                     "word"))
    results.append(
        setresultbox("B<sub>w</sub> \u2216 A<sub>w</sub>", "BdifferenceAwords",
                     frequency_order_word(words_b - words_a), joinchar,
                     "word"))
    results.append(
        setresultbox("A<sub>w</sub> \u2206 B<sub>w</sub>", "AsymmmetricBwords",
                     frequency_order_word(words_a ^ words_b), joinchar,
                     "word"))

    results.append(
        """<h4>Set Operations on Characters <a class="arrowlink" href="javascript:toggle_visibility('charoperationshelp');"><small><small>(?)</small></small></a></h4>
 <div id="charoperationshelp" class="inlinehelp" style="max-width:600px;">
 <p><b>A<sub>c</sub></b> and <b>B<sub>c</sub></b> are the sets of all unique characters derived from Word Lists A and B above.</p>
 <p><b>A<sub>c</sub> \u2229 B<sub>c</sub></b> <i>Intersection, characters that appear in both sets.</i><br />
    <b>A<sub>c</sub> \u222A B<sub>c</sub></b> <i>Union, both sets of characters combined together as a single set.</i><br />
    <b>A<sub>c</sub> \u2216 B<sub>c</sub></b> <i>Difference, characters that are <b>A<sub>c</sub></b> but not <b>B<sub>c</sub></b>.</i><br />
    <b>B<sub>c</sub> \u2216 A<sub>c</sub></b> <i>Difference, characters that are <b>B<sub>c</sub></b> but not <b>A<sub>c</sub></b>.</i><br />
    <b>A<sub>c</sub> \u2206 B<sub>c</sub></b> <i>Symmetric Difference, characters that are in only one of the two sets.</i></p>
    <p>All sets are sorted with the most frequently used characters first.</p>
 </div>""")

    results.append(
        setresultbox("A<sub>c</sub>", "Achars", frequency_order_word(chars_a),
                     joinchar, "char"))
    results.append(
        setresultbox("B<sub>c</sub>", "Bchars", frequency_order_word(chars_b),
                     joinchar, "char"))
    results.append(
        setresultbox("A<sub>c</sub> \u2229 B<sub>c</sub>", "AintersectBchars",
                     frequency_order_char(chars_a & chars_b), joinchar,
                     "char"))
    results.append(
        setresultbox("A<sub>c</sub> \u222A B<sub>c</sub>", "AunionBchars",
                     frequency_order_char(chars_a | chars_b), joinchar,
                     "char"))
    results.append(
        setresultbox("A<sub>c</sub> \u2216 B<sub>c</sub>", "AdifferenceBchars",
                     frequency_order_char(chars_a - chars_b), joinchar,
                     "char"))
    results.append(
        setresultbox("B<sub>c</sub> \u2216 A<sub>c</sub>", "BdifferenceAchars",
                     frequency_order_char(chars_b - chars_a), joinchar,
                     "char"))
    results.append(
        setresultbox("A<sub>c</sub> \u2206 B<sub>c</sub>", "AsymmmetricBchars",
                     frequency_order_char(chars_a ^ chars_b), joinchar,
                     "char"))
Esempio n. 18
0
def flashcards_download():
    searchformat = get_parameter("format", "wildcard")
    hsk1 = "checked" if get_parameter("hsk1") else ""
    hsk2 = "checked" if get_parameter("hsk2") else ""
    hsk3 = "checked" if get_parameter("hsk3") else ""
    hsk4 = "checked" if get_parameter("hsk4") else ""
    hsk5 = "checked" if get_parameter("hsk5") else ""
    hsk6 = "checked" if get_parameter("hsk6") else ""
    mandcomp = int(
        get_parameter("mandcomp")) if get_parameter("mandcomp") else ""
    expand = get_parameter("expand")
    pinyin = get_parameter("pinyin")
    hanzi = get_parameter("hanzi")
    component = get_parameter("component")
    compound = get_parameter("compound")
    minlength = int(get_parameter("min", "1"))
    maxlength = int(get_parameter("max", "4"))
    fileformat = get_parameter("card", "pleco")
    sort = get_parameter("sort", "freq")
    resultset = processadvancedsearch(searchformat, sort, pinyin, hanzi,
                                      component, compound, minlength,
                                      maxlength, hsk1, hsk2, hsk3, hsk4, hsk5,
                                      hsk6, mandcomp, expand)
    if len(resultset):
        results = flashcard_list(resultset, fileformat)
        response = make_response("\r\n".join(results))
        response.headers[
            'Content-Disposition'] = 'attachment; filename=' + "hskhsk-" + fileformat + "-flascards.txt"
        return response
    else:
        return "No Results Found"