def trainRegexp(self, backoff): self.split_into_folds() for k in range(1, (self.folds + 1)): train_sents = sum(self.foldlist[: (self.folds - 1)], []) if self.option_tone == "tonal" and self.option_tag == "Affixes": regex = RegexpTonalSA(backoff) if self.option_tone == "tonal" and self.option_tag == "POS": regex = RegexpTonal(backoff) if self.option_tone == "nontonal" and self.option_tag == "Affixes": regex = RegexpSA(backoff) if self.option_tone == "nontonal" and self.option_tag == "POS": regex = Regexp(backoff) to_tag = [untag(i) for i in self.foldlist[self.folds - 1]] self.regex_tagged += regex.tag_sents(to_tag) self.org_tagged += self.foldlist[self.folds - 1] self.foldlist = [self.foldlist[self.folds - 1]] + self.foldlist[: (self.folds - 1)] self.regex = regex self.regex_avg_acc = accuracy(sum(self.org_tagged, []), sum(self.regex_tagged, [])) print("Accuracy of concatenated regexp-tagged sentences: ", self.regex_avg_acc) (self.regex_tagprecision, self.regex_tagrecall) = self.tagprecision_recall( regex, self.regex_tagged, self.org_tagged ) self.org_tagged = [] self.foldlist = [] for i in range(1, self.folds + 1): self.foldlist.append(self.create_fold(i))