Ejemplo n.º 1
0
from taglexicon import TagLexicon
from wclexicon import WCLexicon
from tools import read_dict

import sys

config = Configuration('wsj', args)

# Read tagset and tag lexicon from corpus
wsj_tags, wsj_norm_tags = read_dict('data/wsj-train.tab', 0, 1)

# Create a Tagset object from the tags we have read
WSJ = Tagset(wsj_tags, config)

# Load a file with word classes
WC = WCLexicon.from_file('brown', 'data/en-brown320.txt', config)

text_field  = 0
tag_field   = 1

# Define tags (relative to the current position during a search)
this_tag        = WSJ.tag(tag_field, 0)
last_tag        = WSJ.tag(tag_field, -1)
last_last_tag   = WSJ.tag(tag_field, -2)

# Define words (relative to the current position during a search)
this_word       = TextField(text_field, 0)
last_word       = TextField(text_field, -1)
next_word       = TextField(text_field, 1)
next_next_word  = TextField(text_field, 2)
Ejemplo n.º 2
0
from taglexicon import TagLexicon
from wclexicon import WCLexicon
from tools import read_dict

import sys

config = Configuration('wsj', args)

# Read tagset and tag lexicon from corpus
wsj_tags, wsj_norm_tags = read_dict('data/wsj-train.tab', 0, 1)

# Create a Tagset object from the tags we have read
WSJ = Tagset(wsj_tags, config)

# Load a file with word classes
WC = WCLexicon.from_file('brown', 'data/en-brown320.txt', config)

text_field = 0
tag_field = 1

# Define tags (relative to the current position during a search)
this_tag = WSJ.tag(tag_field, 0)
last_tag = WSJ.tag(tag_field, -1)
last_last_tag = WSJ.tag(tag_field, -2)

# Define words (relative to the current position during a search)
this_word = TextField(text_field, 0)
last_word = TextField(text_field, -1)
next_word = TextField(text_field, 1)
next_next_word = TextField(text_field, 2)
Ejemplo n.º 3
0
    config.build()
    sys.exit(0)

# Read tagset and tag lexicon (coarse SUC -> UD) from corpus
suc_ne_tags, suc_norm_ne_tags = read_dict('suc-data/suc-blogs-ne-train.tab', 1,
                                          3)

# Create a Tagset object from the tags we have read
SUC_NE = Tagset(suc_ne_tags, config)

text_field = 0
lemma_field = 1
suc_full_field = 2
tag_field = 3

Names = WCLexicon.from_file('names', 'suc-data/names.txt', config)

WC = WCLexicon.from_file('brown', 'suc-data/swe-brown100.txt', config)

# Define tags (relative to the current position during a search)
this_tag = SUC_NE.tag(tag_field, 0)
last_tag = SUC_NE.tag(tag_field, -1)
last_last_tag = SUC_NE.tag(tag_field, -2)

# POS tags (+ morphology)
this_pos = TextField(suc_full_field, 0)
last_pos = TextField(suc_full_field, -1)
next_pos = TextField(suc_full_field, 2)

# Define lemmas (relative to the current position during a search)
this_lemma = TextField(lemma_field, 0)
Ejemplo n.º 4
0
    for line in f:
        token, _, tag, _ = line.rstrip('\n').split('\t')
        suc_norm_tags[token.lower()].add(tag)
        suc_tags.add(tag)

with open('suc-data/dalin.txt', 'r', encoding='utf-8') as f:
    for line in f:
        token, _, tag, _ = line.rstrip('\n').split('\t')
        suc_norm_tags[token.lower()].add(tag)
        suc_tags.add(tag)

# Create a Tagset object from the tags we have read
SUC = Tagset(suc_tags, config)

# Load a file with word classes
WC = WCLexicon.from_file('brown', 'suc-data/swe-brown100.txt', config)

text_field = 0
tag_field = 1

# Define tags (relative to the current position during a search)
this_tag = SUC.tag(tag_field, 0)
last_tag = SUC.tag(tag_field, -1)
last_last_tag = SUC.tag(tag_field, -2)

# Define words (relative to the current position during a search)
this_word = TextField(text_field, 0)
last_word = TextField(text_field, -1)
next_word = TextField(text_field, 1)
next_next_word = TextField(text_field, 2)
Ejemplo n.º 5
0
    for line in f:
        token, tag = line.rstrip('\n').split('\t')
        suc_norm_tags[token.lower()].add(tag)
        suc_tags.add(tag)

with open('suc-data/saldo.txt', 'r', encoding='utf-8') as f:
    for line in f:
        token, _, tag, _ = line.rstrip('\n').split('\t')
        suc_norm_tags[token.lower()].add(tag)
        suc_tags.add(tag)

# Create a Tagset object from the tags we have read
SUC = Tagset(suc_tags, config)

# Load a file with word classes
WC = WCLexicon.from_file('brown', 'suc-data/swe-brown100.txt', config)

text_field  = 0
tag_field   = 1

# Define tags (relative to the current position during a search)
this_tag        = SUC.tag(tag_field, 0)
last_tag        = SUC.tag(tag_field, -1)
last_last_tag   = SUC.tag(tag_field, -2)

# Define words (relative to the current position during a search)
this_word       = TextField(text_field, 0)
last_word       = TextField(text_field, -1)
next_word       = TextField(text_field, 1)
next_next_word  = TextField(text_field, 2)
Ejemplo n.º 6
0
if config.skip_generate:
    config.build()
    sys.exit(0)

# Read tagset and tag lexicon (coarse SUC -> UD) from corpus
suc_ne_tags, suc_norm_ne_tags = read_dict("suc-data/suc-blogs-ne-train.tab", 1, 3)

# Create a Tagset object from the tags we have read
SUC_NE = Tagset(suc_ne_tags, config)

text_field = 0
lemma_field = 1
suc_full_field = 2
tag_field = 3

Names = WCLexicon.from_file("names", "suc-data/names.txt", config)

WC = WCLexicon.from_file("brown", "suc-data/swe-brown100.txt", config)

# Define tags (relative to the current position during a search)
this_tag = SUC_NE.tag(tag_field, 0)
last_tag = SUC_NE.tag(tag_field, -1)
last_last_tag = SUC_NE.tag(tag_field, -2)

# POS tags (+ morphology)
this_pos = TextField(suc_full_field, 0)
last_pos = TextField(suc_full_field, -1)
next_pos = TextField(suc_full_field, 2)

# Define lemmas (relative to the current position during a search)
this_lemma = TextField(lemma_field, 0)