예제 #1
0
def build_model(out: ModelOutput = ModelOutput("malt/swemalt-1.7.2.mco"),
                _maltjar: Binary = Binary("[malt.jar]")):
    """Download model for MALT Parser.

    Won't download model unless maltjar has been installed.
    """
    out.download("http://maltparser.org/mco/swedish_parser/swemalt-1.7.2.mco")
예제 #2
0
def build_model(sense_model: ModelOutput = ModelOutput(
    "wsd/ALL_512_128_w10_A2_140403_ctx1.bin"),
                context_model: ModelOutput = ModelOutput(
                    "wsd/lem_cbow0_s512_w10_NEW2_ctx.bin")):
    """Download models for SALDO-based word sense disambiguation."""
    # Download sense model
    sense_model.download(
        "https://github.com/spraakbanken/sparv-wsd/raw/master/models/scouse/ALL_512_128_w10_A2_140403_ctx1.bin"
    )

    # Download context model
    context_model.download(
        "https://github.com/spraakbanken/sparv-wsd/raw/master/models/scouse/lem_cbow0_s512_w10_NEW2_ctx.bin"
    )
예제 #3
0
def morphtable_inputs(
        suc: ModelOutput = ModelOutput("hunpos/suc3_morphtable.words"),
        morphtable_base: ModelOutput = ModelOutput("hunpos/suc.morphtable"),
        morphtable_patterns: ModelOutput = ModelOutput("hunpos/suc.patterns")):
    """Download the files needed to build the SALDO morphtable."""
    suc.download(
        "https://github.com/spraakbanken/sparv-models/raw/master/hunpos/suc3_morphtable.words"
    )

    morphtable_base.download(
        "https://github.com/spraakbanken/sparv-models/raw/master/hunpos/suc.morphtable"
    )

    morphtable_patterns.download(
        "https://github.com/spraakbanken/sparv-models/raw/master/hunpos/suc.patterns"
    )
예제 #4
0
def download_saldo(out: ModelOutput = ModelOutput("saldo/saldom.xml")):
    """Download SALDO morphology XML."""
    out.download(
        "https://svn.spraakdata.gu.se/sb-arkiv/pub/lexikon/saldom/saldom.xml")
예제 #5
0
def download_dalin_wordlist(out: ModelOutput = ModelOutput("hunpos/hist/dalinm.hunpos")):
    """Download Dalin wordlist."""
    out.download("https://github.com/spraakbanken/sparv-models/raw/master/hunpos/hist/dalinm.hunpos")
예제 #6
0
def download_swedberg_wordlist(out: ModelOutput = ModelOutput("hunpos/hist/swedberg-gender.hunpos")):
    """Download Swedberg wordlist."""
    out.download("https://github.com/spraakbanken/sparv-models/raw/master/hunpos/hist/swedberg-gender.hunpos")
예제 #7
0
def download_bettertokenizer(
        out: ModelOutput = ModelOutput("segment/bettertokenizer.sv")):
    """Download model for use with BetterWordTokenizer."""
    out.download(
        "https://github.com/spraakbanken/sparv-models/raw/master/segment/bettertokenizer.sv"
    )
예제 #8
0
def download_punkt_model(
        out: ModelOutput = ModelOutput("segment/punkt-nltk-svenska.pickle")):
    """Download model for use with PunktSentenceTokenizer."""
    out.download(
        "https://github.com/spraakbanken/sparv-models/raw/master/segment/punkt-nltk-svenska.pickle"
    )
예제 #9
0
def hunpos_model(model: ModelOutput = ModelOutput(
    "hunpos/suc3_suc-tags_default-setting_utf8.model")):
    """Download the Hunpos model."""
    model.download(
        "https://github.com/spraakbanken/sparv-models/raw/master/hunpos/suc3_suc-tags_default-setting_utf8.model"
    )
예제 #10
0
def download_nst_comp(
        out: ModelOutput = ModelOutput("saldo/nst_comp_pos.pickle")):
    """Download compound POS model from sparv-models repo."""
    out.download(
        "https://github.com/spraakbanken/sparv-models/raw/master/saldo/nst_comp_pos.pickle"
    )
예제 #11
0
def metashare_template(model: ModelOutput = ModelOutput(
    "sbx_metadata/sbx-metashare-template.xml")):
    """Download the SBX META-SHARE template."""
    model.download(
        "https://raw.githubusercontent.com/spraakbanken/sparv-sbx-metadata/main/data/sbx-metashare-template.xml"
    )
예제 #12
0
def download_korp_stats(out: ModelOutput = ModelOutput("saldo/stats.pickle")):
    """Download stats.pickle model."""
    out.download(
        "https://github.com/spraakbanken/sparv-models/raw/master/saldo/stats.pickle"
    )
예제 #13
0
def swefn_freq_model(out: ModelOutput = ModelOutput(
    "lexical_classes/swefn.freq.gp2008+suc3+romi.pickle")):
    """Download SweFN frequency model."""
    out.download(
        "https://github.com/spraakbanken/sparv-models/raw/master/lexical_classes/swefn.freq.gp2008+suc3+romi.pickle"
    )