Example #1
0
def categorize_phrases(sentphrases, body_part_name, use_stem=True):
    stemmer = nltk.stem.PorterStemmer()
    
    records = []    
    for i,row in sentphrases.iterrows():
        sentence = row['sentence']
        phrase = row['phrase']
        
        categories = []
        parents = []
        
        for word in phrase:
            if use_stem == True:
                category = taxonomy.classify(stemmer.stem(word.string))
            else:
                category = taxonomy.classify(word.string)
                
            if category:
                categories.append(category)
                parent = taxonomy.parents(category)
                
                if parent and parent[0] != category:
                    parents.append(parent[0])
                else:
                    parents.append('')
                
                
            #elif word.string == body_part_name:
            #    categories.append(body_part_name)
            #    parents.append(body_part_name)
                
        assert(len(categories) == len(parents))
        
        phrase_str = ' '.join([ w.string+'/'+w.tag for w in phrase ])
        phrase_str = phrase_str.replace(',', '')
        
        sentence_str = sentence.replace(',', '')        
        
        score = len(categories) * (len(categories) / (1.*len(phrase))) 
        for i,category in enumerate(categories):            
            records.append( ( body_part_name, sentence_str, phrase_str, category, parents[i], score) )
            
    df = pd.DataFrame( records, columns=['body_part', 'sentence', 'phrase', 'category', 'parent', 'score'])
                
    return df
Example #2
0
from pattern.search import search, taxonomy, Classifier
from pattern.en import parsetree

# The search module includes a Taxonomy class
# that can be used to define semantic word types.
# For example, consider that you want to extract flower names from a text.
# This would make search patterns somewhat unwieldy:
# search("rose|lily|daisy|daffodil|begonia", txt).

# A better approach is to use the taxonomy:
for flower in ("rose", "lily", "daisy", "daffodil", "begonia"):
    taxonomy.append(flower, type="flower")

print taxonomy.children("flower")
print taxonomy.parents("rose")
print taxonomy.classify("rose")  # Yields the most recently added parent.
print

# Taxonomy terms can be included in a pattern by using uppercase:
t = parsetree("A field of white daffodils.", lemmata=True)
m = search("FLOWER", t)
print t
print m
print

# Another example:
taxonomy.append("chicken", type="food")
taxonomy.append("chicken", type="bird")
taxonomy.append("penguin", type="bird")
taxonomy.append("bird", type="animal")
print taxonomy.parents("chicken")
Example #3
0
from pattern.search import Pattern, Constraint, Classifier, taxonomy
from pattern.en     import Sentence, parse

# The search module includes a Taxonomy class 
# that can be used to define semantic word types.
# For example, consider that you want to extract flower names from a text.
# This would make patterns somewhat unwieldy, e.g.:
# Pattern.fromstring("rose|lily|daisy|daffodil|begonia").

# A better approach is to use the taxonomy:
for flower in ("rose", "lily", "daisy", "daffodil", "begonia"):
    taxonomy.append(flower, type="flower")
    
print taxonomy.children("flower")
print taxonomy.parents("rose")
print taxonomy.classify("rose") # Yields the most recently added parent.
print
    
# Taxonomy terms can be included in a pattern:
p = Pattern([Constraint(taxa=["flower"])]) # or
p = Pattern.fromstring("FLOWER")

s = Sentence(parse("A field of white daffodils.", lemmata=True))
m = p.search(s)
print s
print m
print

from pattern.search import search
taxonomy.append("chicken", type="food")
taxonomy.append("chicken", type="bird")
Example #4
0

for f in ('reflect', 'bank'):
    taxonomy.append(f, type='angle')

for f in ('bank', 'financial-institution'):
    taxonomy.append(f, type='finance')
    

t = parsetree('A field of daffodils is white.', lemmata=True)
print search('PLANT', t) 

taxonomy.parents('daffodil', recursive=True)
taxonomy.children('plant', recursive=False)

taxonomy.classify('bank')





from pattern.en import wordnet


a = wordnet.synsets('tone')[4]

b = wordnet.synsets('color')[0]

wordnet.similarity(a,b)