Example #1
0
    def augmentKeywordsFromSentence(self, stopWordList, tokens):
        from stdwn import impl

        # initialise keyword list
        keywords = []

        # get WordNet information for each relevant word
        for token in tokens:
            # get synsets
            synsets = impl.lookupSynsetsByForm(token)

            # go through synsets
            for synset in synsets:
                # append synonyms
                for synonym in synset.synonyms:
                    keywords.append(self.underscore.sub(" ", synonym.form))

                # go through hyponyms
                for hyponym in synset.hyponyms():
                    # append synonyms
                    for synonym in hyponym.synonyms:
                        keywords.append(self.underscore.sub(" ", synonym.form))

                # go through hypernyms
                for hypernym in synset.relations("@"):
                    # append synonyms
                    for synonym in hypernym.synonyms:
                        keywords.append(self.underscore.sub(" ", synonym.form))

        # build set with unique values
        keywords = set(keywords)

        # return keywords
        return keywords
Example #2
0
    def augmentKeywordsFromSentence(self, stopWordList, tokens):
        # initialise keyword list
        keywords = []

        # get WordNet information for each relevant word
        for token in tokens:
            # get synsets
            synsets = impl.lookupSynsetsByForm(token)

            # go through synsets
            for synset in synsets:
                # append synonyms
                for synonym in synset.synonyms:
                    keywords.append(self.underscore.sub(' ', synonym.form))

                # go through hyponyms
                for hyponym in synset.hyponyms():
                    # append synonyms
                    for synonym in hyponym.synonyms:
                        keywords.append(self.underscore.sub(' ', synonym.form))

                # go through hypernyms
                for hypernym in synset.relations('@'):
                    # append synonyms
                    for synonym in hypernym.synonyms:
                        keywords.append(self.underscore.sub(' ', synonym.form))

        # build set with unique values
        keywords = set(keywords)

        # return keywords
        return keywords
Example #3
0
# train tagging model
model = tag.Bigram()
model.train(brown.tagged([textCategories['pressReportage'], textCategories['pressEditorial'], textCategories['pressReviews'], textCategories['skillsAndHobbies'], textCategories['popularLore']]))

# tag text
text = 'I want to buy a camera'
tokens = list(tokenizer.processWhitespacesWithoutStopWords(text, 1))
taggedTokens = list(model.tag(tokens))
print tokens
print taggedTokens

# get WordNet information for each noun
for taggedToken in taggedTokens:
    if taggedToken[1] == 'nn' or taggedToken[1] == None:
	# get synsets
	synsets = impl.lookupSynsetsByForm(taggedToken[0])

	# print gloss
	for synset in synsets:
	    # go through synonyms
	    print '--------------------------------------------------------------'
	    for synonym in synset.synonyms:
		print synonym.form

	    # go through hyponyms
	    print '- Hyponyms ------------------------------------------------------------'
	    for hyponym in synset.hyponyms():
		for hypoSynonym in hyponym.synonyms:
		    print synonym.form, hypoSynonym.form

	    # go through hypernyms
Example #4
0
from stdwn import impl

# get synsets
synsets = impl.lookupSynsetsByForm('camera')

# print
for synset in synsets:
    for item in synset:
	print item
Example #5
0
        textCategories['pressReviews'], textCategories['skillsAndHobbies'],
        textCategories['popularLore']
    ]))

# tag text
text = 'I want to buy a camera'
tokens = list(tokenizer.processWhitespacesWithoutStopWords(text, 1))
taggedTokens = list(model.tag(tokens))
print tokens
print taggedTokens

# get WordNet information for each noun
for taggedToken in taggedTokens:
    if taggedToken[1] == 'nn' or taggedToken[1] == None:
        # get synsets
        synsets = impl.lookupSynsetsByForm(taggedToken[0])

        # print gloss
        for synset in synsets:
            # go through synonyms
            print '--------------------------------------------------------------'
            for synonym in synset.synonyms:
                print synonym.form

            # go through hyponyms
            print '- Hyponyms ------------------------------------------------------------'
            for hyponym in synset.hyponyms():
                for hypoSynonym in hyponym.synonyms:
                    print synonym.form, hypoSynonym.form

            # go through hypernyms