Ejemplos de STagger en Python

Lenguaje de programación: Python

Namespace/Package Name: sdp

Método / Función: STagger

Ejemplos en hotexamples.com: 9

Python STagger - 9 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de sdp.STagger extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

Mostrar archivo

Archivo: txt2tag.py Proyecto: techknowledgist/techknowledgist

def get_tagger(language):
    """Used by batch.py."""
    if language == "en":
        return sdp.STagger("english-caseless-left3words-distsim.tagger")
    elif language == "de":
        return sdp.STagger("german-fast.tagger")
    elif language == "cn":
        return sdp.STagger("chinese.tagger")
    else:
        exit("There is no tagger for language=%s" % language)

Ejemplo n.º 2

Mostrar archivo

Archivo: txt2tag.py Proyecto: techknowledgist/techknowledgist

def test_tag_en(input=None, output=None):
    if input is None:
        input = "/home/j/anick/fuse/data/patents/en_test/txt/US20110052365A1.xml"
    if output is None:
        output = "/home/j/anick/fuse/data/patents/en_test/tag/US20110052365A1.xml"
    tagger = sdp.STagger("english-caseless-left3words-distsim.tagger")
    tag(input, output, tagger)

Ejemplo n.º 3

Mostrar archivo

def process_patent_dir(patent_sent_dir, output_dir):
    # create a chunker schema instance
    cs = sdp.chunker_tech()
    # create tagger instance
    st = sdp.STagger("english-caseless-left3words-distsim.tagger") 

    # process each file in the dir
    for file_name in os.listdir(patent_sent_dir):
        output_file = output_dir + "/" + file_name
        process_patent_sent_file(patent_sent_dir, file_name, st, cs, output_file)

Ejemplo n.º 4

Mostrar archivo

Archivo: txt2tag.py Proyecto: techknowledgist/techknowledgist

def patent_txt2tag_dir(lang_path, language):
    # choose tagger for language
    if language == "en":
        tagger = sdp.STagger("english-caseless-left3words-distsim.tagger")
    elif language == "de":
        # note: german-fast is much faster than german-dewac although 4% poorer in dealing
        # with unknown words.
        tagger = sdp.STagger("german-fast.tagger")
    elif language == "cn":
        tagger = sdp.STagger("chinese.tagger")

    txt_path = lang_path + "/" + language + "/txt"
    tag_path = lang_path + "/" + language + "/tag"
    for year in os.listdir(txt_path):
        txt_year_dir = txt_path + "/" + year
        tag_year_dir = tag_path + "/" + year
        print "[patent_txt2tag_dir]calling txt2tag for dir: %s" % txt_year_dir
        txt2tag_dir(txt_year_dir, tag_year_dir, tagger)
    print "[patent_txt2tag_dir]finished writing tagged data to %s" % tag_path

Ejemplo n.º 5

Mostrar archivo

def test_pm():
    dir = "/home/j/anick/fuse/data/pubmed"
    file = "pubmed_lines.txt"
    output_file = "/home/j/anick/fuse/data/pubmed/chunks.txt"
    #file = "pubmed_lines_test_1.txt"
    # create a chunker schema instance
    cs = sdp.chunker_tech()
    # create tagger instance
    tagger = sdp.STagger("english-caseless-left3words-distsim.tagger") 

    process_patent_sent_file(dir, file, tagger, cs, output_file)

Ejemplo n.º 6

Mostrar archivo

Archivo: txt2tag.py Proyecto: techknowledgist/techknowledgist

def test_tag_de():
    tagger = sdp.STagger("german-fast.tagger")
    dir = "/home/j/anick/fuse/data/tmp"
    file_list = [
        "DE3102424A1_all_caps", "DE3102424A1_all_lower",
        "DE3102424A1_first_cap"
    ]
    for file in file_list:
        full_inpath = dir + "/" + file + ".xml"
        full_outpath = dir + "/" + file + ".tag"
        tag(full_inpath, full_outpath, tagger)
        print "Created %s" % full_outpath

Ejemplo n.º 7

Mostrar archivo

Archivo: txt2tag.py Proyecto: techknowledgist/techknowledgist

def pipeline_txt2tag_dir(root, language):
    source_path = os.path.join(root, "txt")
    target_path = os.path.join(root, "tag")

    # choose tagger for language
    if language == "en":
        tagger = sdp.STagger("english-caseless-left3words-distsim.tagger")
    elif language == "de":
        # note: german-fast is much faster than german-dewac although 4% poorer in dealing
        # with unknown words.
        tagger = sdp.STagger("german-fast.tagger")
    elif language == "cn":
        tagger = sdp.STagger("chinese.tagger")

    for file in os.listdir(source_path):
        source_file = source_path + "/" + file
        target_file = target_path + "/" + file
        print "[txt2tag_dir]from %s to %s" % (source_file, target_file)
        #txt2tag_file(source_file, target_file, tagger)
        tag(source_file, target_file, tagger)
    print "[txt2tag_dir]done"

Ejemplo n.º 8

Mostrar archivo

def test_tag_cn():
    input = "/home/j/anick/fuse/data/patents/tmp/cn/CN1394959A-tf.seg"
    output = "/home/j/anick/fuse/data/patents/tmp/cn/CN1394959A-tf.tag"
    tagger = sdp.STagger("chinese.tagger")
    tag(input, output, tagger)

Ejemplo n.º 9

Mostrar archivo

def test_tag_en():
    input = "/home/j/anick/fuse/data/patents/en_test/txt/US20110052365A1.xml"
    output = "/home/j/anick/fuse/data/patents/en_test/tag/US20110052365A1.xml"
    tagger = sdp.STagger("chinese.tagger")
    tag(input, output, tagger)