def WriteAdjective(outfile, degree, lemma, adjective, standard=None): synset = lemma.synset.name count = base.GetCompoundCount(lemma) standard = '+' if standard else '-' terminals = base.LemmaToTerminals(adjective) outfile.write('Adj[DEG=%s,SNS="%s",FRQ=%d,%sstd] -> %s\n' % (degree, synset, count, standard, terminals))
def WriteRules(outfile): names = ([('m', i.lower()) for i in nltk.corpus.names.read('male.txt')] + [('f', i.lower()) for i in nltk.corpus.names.read('female.txt')]) for gender, name in names: ambiguity = Ambiguity(name, gender) name = base.LemmaToTerminals(name) outfile.write('PrpN[NUM=sg,SEX=%s,FRQ=%d] -> %s\n' % (gender, -ambiguity, name))
def WriteProperNoun(outfile, lemma, gender): if ShouldBeIgnored(lemma): return noun = base.LemmaToTerminals(lemma.name) count = base.GetCompoundCount(lemma) - 1 if gender: outfile.write('PrpN[NUM=sg,SNS="%s",SEX=%s,FRQ=%d] -> %s\n' % (lemma.synset.name, gender, count, noun)) else: outfile.write('PrpN[NUM=sg,SNS="%s",FRQ=%d] -> %s\n' % (lemma.synset.name, count, noun))
def WriteNoun(outfile, lemma, gender, number, name_override=None): if ShouldBeIgnored(name_override or lemma): return noun = base.LemmaToTerminals(name_override or lemma.name) count = base.GetCompoundCount(lemma) if gender: outfile.write('Noun[NUM=%s,SNS="%s",SEX=%s,FRQ=%s] -> %s\n' % (number, lemma.synset.name, gender, count, noun)) else: outfile.write('Noun[NUM=%s,SNS="%s",FRQ=%s] -> %s\n' % (number, lemma.synset.name, count, noun))
def WriteRules(outfile): conjunctions = pickle.load(open(CONJUNCTIONS_PATH)) for conjunction in conjunctions: name = conjunction[0].replace(' ', '_') semantics = conjunction[-2] is_series = '-+'[conjunction[-1]] flags = tuple('-+'[i] for i in conjunction[1:-2]) if '...' in name: group = name first, second = [i.strip() for i in name.split('...')] first_terminals = base.LemmaToTerminals(first) second_terminals = base.LemmaToTerminals(second) common_args = (group, semantics) + flags + (is_series, ) outfile.write(PAIR_TEMPLATE % (common_args + ('+', first_terminals))) outfile.write(PAIR_TEMPLATE % (common_args + ('-', second_terminals))) else: terminals = base.LemmaToTerminals(name) args = (semantics, ) + flags + (is_series, terminals) outfile.write(SINGLE_TEMPLATE % args)
def GetVerbRules(patterns): conjugation = pickle.load(open(VERBS_LIST)) rules = [] with_children = collections.defaultdict(set) for cls in nltk.corpus.verbnet.classids(): if cls.count('-') > 1: name, number, suffix = cls.split('-', 2) base_class = name + '-' + number for number in suffix.split('-'): with_children[base_class].add(cls) base_class += '-' + number with_children[base_class].add(cls) else: with_children[cls].add(cls) for index, pattern, classes in patterns: for cls in classes: for frame_cls in with_children[cls]: frame_cls = nltk.corpus.verbnet.vnclass(frame_cls) for member in frame_cls.findall('MEMBERS/MEMBER'): verb = member.attrib['name'] lemmas = member.attrib['wn'].replace('?', '').split() if lemmas: lemmas = [ nltk.corpus.wordnet.lemma_from_key(i + '::') for i in lemmas ] else: lemmas = nltk.corpus.wordnet.lemmas(verb, 'v') if len(lemmas) > 1: lemmas = [] for lemma in lemmas: synset = lemma.synset.name count = base.GetCompoundCount(lemma) if verb not in conjugation: continue for form, conjugated_verb in zip( VERB_FORMS, conjugation[verb]): conjugated_verb = base.LemmaToTerminals( conjugated_verb) args = (form, index, cls, synset, count, conjugated_verb) rules.append(VERB_TEMPLATE % args) return rules
def HandleLex(node, *_): value = node.attrib['value'].replace('[+be]', '').strip() if value: return [base.LemmaToTerminals(i) for i in value.split()] else: return []