def VV(self): """ VVI: number of verb lemmas/number of verb tokens VVII: number of verb lemmas/number of lexical tokens """ verb_lemmas = set(self.verb_lemmas) verb_tokens = self.verb_lemmas lex_tokens = self.open_class_lemmas VVI = division(verb_lemmas, verb_tokens) SVVI = squared_division(verb_lemmas, verb_tokens) CVVI = corrected_division(verb_lemmas, verb_tokens) VVII = division(verb_lemmas, lex_tokens) return VVI, SVVI, CVVI, VVII
def derivational_suffixation(self): """ number of suffixes on n's level/number of suffixes """ suffixes = self.get_suffixes() level3_suffixes = [i for i in suffixes if i in SUFFIXES["level3"]] level4_suffixes = [i for i in suffixes if i in SUFFIXES["level4"]] level5_suffixes = [i for i in suffixes if i in SUFFIXES["level5"]] level6_suffixes = [i for i in suffixes if i in SUFFIXES["level6"]] der_suff3 = division(level3_suffixes, suffixes) der_suff4 = division(level4_suffixes, suffixes) der_suff5 = division(level5_suffixes, suffixes) der_suff6 = division(level6_suffixes, suffixes) return der_suff3, der_suff4, der_suff5, der_suff6
def AdvV(self): """ number of adverb lemmas/number of lexical tokens """ adv_lemmas = set(self.adv_lemmas) lex_tokens = self.open_class_lemmas return division(adv_lemmas, lex_tokens)
def AdjV(self): """ number of adjective lemmas/number of lexical tokens """ adj_lemmas = set(self.adj_lemmas) lex_tokens = self.open_class_lemmas return division(adj_lemmas, lex_tokens)
def NV(self): """ number of noun lemmas/number of lexical tokens """ noun_lemmas = set(self.noun_lemmas) lex_tokens = self.tokens return division(noun_lemmas, lex_tokens)
def LFP(self): """ Lexical Frequency Profile is the proportion of tokens: first - 1000 most frequent words second list - the second 1000 third - University Word List (Xue & Nation 1989) none - list of those that are not in these lists """ first = [i for i in self.lemmas if i in FIVE_T_FREQ_COCA[0:1000]] second = [i for i in self.lemmas if i in FIVE_T_FREQ_COCA[1000:2000]] third = [i for i in self.lemmas if i in UWL] first_procent = division(first, self.lemmas) second_procent = division(second, self.lemmas) third_procent = division(third, self.lemmas) none = 1 - (first_procent + second_procent + third_procent) return first_procent, second_procent, third_procent, none
def VS(self): """ number of sophisticated verb lemmas/number of verb tokens """ soph_verbs = [i for i in self.verb_lemmas if i not in FREQ_VERBS_COCA_FROM_FIVE_T] VSI = division(soph_verbs, self.verb_lemmas) VSII = corrected_division(soph_verbs, self.verb_lemmas) VSIII = squared_division(soph_verbs, self.verb_lemmas) return VSI, VSII, VSIII
def TTR(self): """ number of lemmas/number of tokens """ lemmas = set(self.lemmas) tokens = self.tokens TTR = division(lemmas, tokens) CTTR = corrected_division(lemmas, tokens) RTTR = root_division(lemmas, tokens) LogTTR = log_division(lemmas, tokens) Uber = uber(lemmas, tokens) return TTR, CTTR, RTTR, LogTTR, Uber
def LS(self): """ number of sophisticated lexical tokens/number of lexical tokens """ soph_lex_lemmas = [i for i in self.open_class_lemmas if i not in FIVE_T_FREQ_COCA] return division(soph_lex_lemmas, self.open_class_lemmas)
def density(self): """ number of lexical tokens/number of tokens """ return division(self.open_class_lemmas, self.lemmas)
def freq_aux(self): """ frequency of modals(auxilaries) """ return division(self.aux_forms, self.verb_tokens)
def freq_finite_forms(self): """ frequency of tensed(finite) forms """ return division(self.finite_tokens, self.verb_tokens)