Exemple #1
0
def realData():
    p = MyParser('../train.wtag')
    words = p.getAllPairTagsCombinations()
    f104 = F104Builder(words)
    firstSent = [w for (w, t) in p.splitted[0]]
    history = History("RB", "VBG", firstSent, 3)
    assert f104.getFeatureVector(history, "bla").size == 0
    assert f104.getFeatureVector(history, "RP").size == 1
 def __init__(self, parser: MyParser, offset) -> None:
     self.parser = parser
     self.f100 = F100Builder(parser.getWordsWithTag(), 0)
     self.f103 = F103Builder(parser.getAllThreeTagsCombinations(),
                             self.f100.size)
     self.f104 = F104Builder(parser.getAllPairTagsCombinations(),
                             self.f100.size + self.f103.size)
     super().__init__(self.f100.size + self.f103.size + self.f104.size,
                      offset)
    def __init__(self, train_parser: MyParser, isTraining) -> None:
        self.parser = train_parser
        self.isTraining = isTraining
        vecSize = 0

        self.f100 = F100Builder(train_parser.getWordsWithTag(), vecSize)
        vecSize = self.f100.size
        print("F100 size", self.f100.size)

        self.f103 = F103Builder(train_parser.getAllThreeTagsCombinations(),
                                vecSize)
        vecSize = vecSize + self.f103.size
        print("F103 size", self.f103.size)

        self.f104 = F104Builder(train_parser.getAllPairTagsCombinations(),
                                vecSize)
        vecSize = vecSize + self.f104.size
        print("F104 size", self.f104.size)

        self.f106 = F106Builder(train_parser.getUniqueTags(), vecSize)
        vecSize = vecSize + self.f106.size
        print("F106 size", self.f106.size)

        self.fSuf = SuffixFeatureBuilder(train_parser, vecSize)
        vecSize = vecSize + self.fSuf.size
        print("Suffix size", self.fSuf.size)

        self.fPref = PrefixFeatureBuilder(train_parser, vecSize)
        vecSize = vecSize + self.fPref.size
        print("Prefix size", self.fPref.size)

        self.fDigNum = DigitNumberFeatureBuilder(train_parser, vecSize)
        vecSize = vecSize + self.fDigNum.size
        print("DigitNum size", self.fDigNum.size)

        self.fLetNum = DigitWordFeatureBuilder(train_parser, vecSize)
        vecSize = vecSize + self.fLetNum.size
        print("DigitLetter size", self.fLetNum.size)

        self.fCaps = CapsFeatureBuilder(train_parser, vecSize)
        vecSize = vecSize + self.fCaps.size
        print("Caps size", self.fCaps.size)

        self.fPrevNext = PrevNextWordFeatureBuilder(
            train_parser.getAllPrevWordTagCombinations(),
            train_parser.getAllNextWordTagCombinations(), vecSize)
        vecSize = vecSize + self.fPrevNext.size
        print("PrevNext size", self.fPrevNext.size)

        super().__init__(vecSize, 0)
Exemple #4
0
from MyParser import MyParser

p = MyParser("../train.wtag")
words = p.getWordsWithTag()
tag3 = p.getAllThreeTagsCombinations()
tag2 = p.getAllPairTagsCombinations()
tag = p.getUniqueTags()
# print(tag3)
# print(tag2)
print(tag)