def _add_category(self, file_name, tag): """ Loads the terms into the taxonomy with the data from the specified file and assigns them the specified tag """ with open(file_name) as f: for term in f: taxonomy.append(term.rstrip('\n'), type=tag)
def load_taxonomy(filename, use_stem=True): stemmer = nltk.stem.PorterStemmer() taxdf = pd.read_csv(filename) for i,row in taxdf.iterrows(): for synonym in row['Syns'].split(','): if use_stem: taxonomy.append(stemmer.stem(synonym), type=row['Category']) else: taxonomy.append(synonym, type=row['Category']) taxonomy.append(row['Category'], type=row['Parent'])
import os, sys sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) from pattern.search import search, taxonomy, Classifier from pattern.en import parsetree # The search module includes a Taxonomy class # that can be used to define semantic word types. # For example, consider that you want to extract flower names from a text. # This would make search patterns somewhat unwieldy: # search("rose|lily|daisy|daffodil|begonia", txt). # A better approach is to use the taxonomy: for flower in ("rose", "lily", "daisy", "daffodil", "begonia"): taxonomy.append(flower, type="flower") print taxonomy.children("flower") print taxonomy.parents("rose") print taxonomy.classify("rose") # Yields the most recently added parent. print # Taxonomy terms can be included in a pattern by using uppercase: t = parsetree("A field of white daffodils.", lemmata=True) m = search("FLOWER", t) print t print m print # Another example: taxonomy.append("chicken", type="food") taxonomy.append("chicken", type="bird")
# Import itertools for permutations import itertools # Import Pattern Search and English modules from pattern.search import search, taxonomy from pattern.en import parsetree # Import (and print out) test strings for pattern matching # import test_strings # Define Taxonomy CAUSALV1 for verbs: e.g., cause* causal_verb_list1 = ['causes', 'caused'] for c in causal_verb_list1: taxonomy.append(c, type='CAUSALV1') # Consider adding other verb tenses based on cause here as appropriate # Define Taxonomy for CAUSALV2 for simple causal verbs tagged as nouns # in their simple present form, e.g., cause causal_verb_list2 = ['cause'] for c in causal_verb_list2: taxonomy.append(c, type='CAUSALV2') # Consider adding other verb tenses based on cause here as appropriate. # This includes verb tenses where subject/cause noun phrase is plural. ############################################################################### # Cause-effect patterns: statements where cause precedes the effect ############################################################################### # Manually-defined noun phrase definitions # Consider adding noun phrase chunk from NLTK book, which has optional # determiner, etc.
import os, sys; sys.path.append(os.path.join("..", "..", "..")) from pattern.search import Pattern, Constraint, Classifier, taxonomy from pattern.en import Sentence, parse # The search module includes a Taxonomy class # that can be used to define semantic word types. # For example, consider that you want to extract flower names from a text. # This would make patterns somewhat unwieldy, e.g.: # Pattern.fromstring("rose|lily|daisy|daffodil|begonia"). # A better approach is to use the taxonomy: for flower in ("rose", "lily", "daisy", "daffodil", "begonia"): taxonomy.append(flower, type="flower") print taxonomy.children("flower") print taxonomy.parents("rose") print taxonomy.classify("rose") # Yields the most recently added parent. print # Taxonomy terms can be included in a pattern: p = Pattern([Constraint(taxa=["flower"])]) # or p = Pattern.fromstring("FLOWER") s = Sentence(parse("A field of white daffodils.", lemmata=True)) m = p.search(s) print s print m print from pattern.search import search
def get_arrets(self): """ Cette methode prend en parametre une chaine de caractere et retourne, s'il y en a, le nom des arrets """ # initialisation stop_found = [] irrelevent = ['GARE', 'SAINT', 'SAINTE'] accepted_tags = ['NN', 'NNP', 'NNS'] stop = self.Lex_learn['arrets'] tax = [] # apprentissage du lexique des arrets for l in stop: l_split = l.split('|') tax.append(l_split[0]) if len(l_split[0]) == len(l): tax.append(l_split[0]) else: tax.append(l_split[0]) tax.extend(l_split[1].split(',')) for a in tax: a = suppr_ret_char(a) taxonomy.append(a, type='ARRET') # recherche des mots cles dans le tweet (self.s) s = self.s t = parsetree(s) s = search('ARRET', t) stop_found = [] for m in s: for w in m.words: if w.tag in accepted_tags and len( w.string) > 2 and not w.string.upper() in irrelevent: stop_found.append(w.string) elif self._is_in_lev(w.string.upper(), self.Lex_arret): stop_found.append(w.string) # recherche des arrets composes # pas encore fonctionel to_remove = [] compound_found = [] for i in range(0, len(stop_found)): for j in range(i, len(stop_found)): if self._is_neighbour(stop_found[i], stop_found[j]): w_compound = stop_found[i] + " " + stop_found[j] compound_found.append(w_compound) to_remove.append(stop_found[i]) to_remove.append(stop_found[j]) stop_found.extend(compound_found) to_remove = list(set(to_remove)) for w in to_remove: stop_found.remove(w) # traduction des arrets trouves en arrets reels for i in range(0, len(stop_found)): stop_found[i] = self._to_tbc_arret(stop_found[i])[0] # suppression des arrets non coherents try: stop_found.remove('AAAA') return list(set(stop_found)) except: return list(set(stop_found))
import string from collections import defaultdict import nltk import pandas as pd from nltk.corpus import framenet as fn from textblob import TextBlob from pattern.en import suggest, parse, parsetree, sentiment from pattern.en import conjugate, lemma, lexeme from pattern.search import search, taxonomy for f in ('rose', 'lily', 'daisy', 'daffodil', 'begonia'): taxonomy.append(f, type='flower') for f in ('flower', 'tree'): taxonomy.append(f, type='plant') t = parsetree('A field of daffodils is white.', lemmata=True) print search('PLANT', t) taxonomy.parents('daffodil', recursive=True) taxonomy.children('plant', recursive=False) #def taxonomy_normalize(sentence): # bp_match = search('BEAUTY_PARTS', parsetree(sentence, lemmata=True)) # facial_match = search('MAKEUP', parsetree(sentence, lemmata=True))
# Import Pattern Search and English modules from pattern.search import search, taxonomy from pattern.en import parsetree # Defining simple causal patterns # Taxonomy for cause-effect pattern causal_verb_list1 = [ 'causes', 'caused', 'would cause', 'will cause', 'is causing', 'has been causing', 'was causing', 'had been causing', 'will be causing', 'will have been causing', 'would be causing', 'would have been causing', 'are causing', 'have been causing', 'were causing' ] for c1 in causal_verb_list1: taxonomy.append(c1, type='CAUSALV1') cause_effect_pattern = "{NP} CAUSALV1 {NP}" # Taxonomy for effect-cause pattern causal_verb_list2 = [ "is caused", "was caused", "are caused", "were caused", "has been caused", "have been caused", "had been caused", "will have been caused", "would have been caused", "is being caused", "was being caused", "were being caused", "would be caused", "will be caused" ] for c2 in causal_verb_list2: taxonomy.append(c2, type='CAUSALV2') effect_cause_pattern = "{NP} CAUSALV2 by {NP}"
lemma('humidity') frames = fn.frames_by_lemma(r'skin') for f in frames: print '%s - %s\n' % (f.name, f.definition) fn.lexical_units(r'') fn.frames_by_lemma(r'(?i)a little') for f in ('reflect', 'bank'): taxonomy.append(f, type='angle') for f in ('bank', 'financial-institution'): taxonomy.append(f, type='finance') t = parsetree('A field of daffodils is white.', lemmata=True) print search('PLANT', t) taxonomy.parents('daffodil', recursive=True) taxonomy.children('plant', recursive=False) taxonomy.classify('bank')