def realData(): p = MyParser('../train.wtag') words = p.getWordsWithTag() f100 = F100Builder(words) firstSent = [w for (w, t) in p.splitted[0]] history=History("t5","t2",firstSent,3) assert f100.getFeatureVector(history,"bla").size == 0 assert f100.getFeatureVector(history,"RB").size == 1
def __init__(self, parser: MyParser, offset) -> None: self.parser = parser self.f100 = F100Builder(parser.getWordsWithTag(), 0) self.f103 = F103Builder(parser.getAllThreeTagsCombinations(), self.f100.size) self.f104 = F104Builder(parser.getAllPairTagsCombinations(), self.f100.size + self.f103.size) super().__init__(self.f100.size + self.f103.size + self.f104.size, offset)
def __init__(self, train_parser: MyParser, isTraining) -> None: self.parser = train_parser self.isTraining = isTraining vecSize = 0 self.f100 = F100Builder(train_parser.getWordsWithTag(), vecSize) vecSize = self.f100.size print("F100 size", self.f100.size) self.f103 = F103Builder(train_parser.getAllThreeTagsCombinations(), vecSize) vecSize = vecSize + self.f103.size print("F103 size", self.f103.size) self.f104 = F104Builder(train_parser.getAllPairTagsCombinations(), vecSize) vecSize = vecSize + self.f104.size print("F104 size", self.f104.size) self.f106 = F106Builder(train_parser.getUniqueTags(), vecSize) vecSize = vecSize + self.f106.size print("F106 size", self.f106.size) self.fSuf = SuffixFeatureBuilder(train_parser, vecSize) vecSize = vecSize + self.fSuf.size print("Suffix size", self.fSuf.size) self.fPref = PrefixFeatureBuilder(train_parser, vecSize) vecSize = vecSize + self.fPref.size print("Prefix size", self.fPref.size) self.fDigNum = DigitNumberFeatureBuilder(train_parser, vecSize) vecSize = vecSize + self.fDigNum.size print("DigitNum size", self.fDigNum.size) self.fLetNum = DigitWordFeatureBuilder(train_parser, vecSize) vecSize = vecSize + self.fLetNum.size print("DigitLetter size", self.fLetNum.size) self.fCaps = CapsFeatureBuilder(train_parser, vecSize) vecSize = vecSize + self.fCaps.size print("Caps size", self.fCaps.size) self.fPrevNext = PrevNextWordFeatureBuilder( train_parser.getAllPrevWordTagCombinations(), train_parser.getAllNextWordTagCombinations(), vecSize) vecSize = vecSize + self.fPrevNext.size print("PrevNext size", self.fPrevNext.size) super().__init__(vecSize, 0)
from MyParser import MyParser p = MyParser("../train.wtag") words = p.getWordsWithTag() tag3 = p.getAllThreeTagsCombinations() tag2 = p.getAllPairTagsCombinations() tag = p.getUniqueTags() # print(tag3) # print(tag2) print(tag)