Example #1
0
__author__ = 'daksh'
import time
from dishingOut.Database.database import MongoOperator as mongo
from dishingOut.NPChunking.NPChunker import NPChunker
import csv
import json

chunker = NPChunker()
chunker.train()


''' Get all test adjectives : Unfiltered. Around 1 lakh adjectives. Contains a lot of non adjectives'''
# database = mongo('DishingOut')
# database.setUpConnection()
# database.setUpCollection('reviews')

# restaurants = database.getAll()
restaurants = None
with open('../data/restaurants.json') as data_file:
	restaurants = json.load(data_file)
restaurants = restaurants[2000:]
start = time.time()
# for restaurant in restaurants:
#     for review in restaurant['userReviews']:
#         text = review['reviewText']
#         sentences = chunker.split(text)
#         for sent in sentences:
#             tree,terms = chunker.extractChunk(sent)
#             print(tree)

vocabulary = list()
Example #2
0
import nltk
import csv
import json
from dishingOut.NPChunking.NPChunker import NPChunker


nltk.data.path.append("/home/daksh/Documents/Softwares/nltk_data")

from nltk.corpus import sentiwordnet as wn
from textblob import Word


pos_words = list()
neg_words = list()
chunker = NPChunker()
chunker.train()


with open('../data/posAdj.csv', 'r') as csvfile:
    spamwriter = csv.reader(csvfile)
    pos_words = list(spamwriter)

with open('../data/negAdj.csv', 'r') as csvfile:
    spamwriter = csv.reader(csvfile)
    neg_words = list(spamwriter)

pos_words = [word[0] for word in pos_words]
neg_words = [word[0] for word in neg_words]

posSeed_list = {key:True for key in pos_words}
Example #3
0
__author__ = 'daksh'

from dishingOut.NPChunking.NPChunker import NPChunker

chunker = NPChunker()
chunker.train()
# tree,terms = chunker.extractChunk("Dosa was good")
# text = 'An institution in south indian offerings with Benne Masala Dosa at its best along with rava upma,kesari bath and mangalore bhajji with same level of excellence.Masala Dosa- You cant eat just one,will crave for more for sure.It tastes heavenly with the chutney CTR offers,its own kind.'
# text = 'Biryani was aromatic and superb'
# text = "Brilliant Idli Vada but Kara Bhath is not good!"
# text = "Amritsari fish were my favorites of the day"
# text = "Rasgulla and jamun were good"

# text = "Idli Vada and Kara Bhath are good!"
# text= "We had a crispy dosa and the coffee was not hot but the paneer was pathetic"
# text = "crispy dosa and coffee not hot"
# text = "Double masala chicken biryani served here is the best biryani I have tried till date"
# text = "Other starters that I have ordered like lemon chicken chicken kebab paneer Manchurian are okayish but the taste of biryani covers it all for me"
# text = "paneer was tasty"
# text = "I did not expect palak paneer to be so good but to my wonder it was excellent."
text = "Paneer tikka - Soft and tasty."

tree, terms = chunker.extractChunk(text)
tree.draw()
# tree.draw()
print(terms)
# sentences = chunker.getSentences(text)
# print(type(sentences[0]))
#for sent in sentences:
#   print(chunker.getAdjetives(sent))
Example #4
0
class AntonymReplacer(object):
    def __init__(self):
        self.chunker = NPChunker()
        self.chunker.train()

    def replace(self, adj, pos=None):
        '''antonyms = set()
        for syn in wordnet.synsets(adj, pos=pos):
            for lemma in syn.lemmas():
                for antonym in lemma.antonyms():
                    antonyms.add(antonym.name())
        if len(antonyms) >= 1:
            return antonyms.pop()
        else:
            return None'''
        antonyms = []
        synonyms = []
        # word = Word(adj)
        # for syn in word.synsets[:]:
        for syn in list(wn.senti_synsets(adj)):
            # for l in syn.lemmas():
            for l in syn.synset.lemmas():
                synonyms.append(l.name())
                if l.antonyms():
                    antonyms.append(l.antonyms()[0].name())
        if len(antonyms) > 0:
            return antonyms[0]
        else:
            return None

    def replace_negations(self, sent):
        i, l = 0, len(sent)
        words = []
        flag = False
        while i < l:
            word = sent[i]
            if word == 'not' and i + 1 < l:
                ant = self.replace(sent[i + 1])
                if ant:
                    words.append(ant)
                    i += 2
                    flag = True
                    continue
            words.append(word)
            i += 1
        return flag, words

    def checkNegationWords(self, words):
        if 'not' in words:
            return True
        elif 'nothing' in words:
            return True
        else:
            return False

    def NegationCheck(self, sentence):

        words = self.chunker.getWords(sentence)
        flag, new_sent = self.replace_negations(words)
        if flag:
            return False, new_sent
        else:
            check = self.checkNegationWords(words)
            if check:
                return check, words
            else:
                return False, words


# a = AntonymReplacer()
# print(a.NegationCheck('The dosa was good'))
Example #5
0
 def __init__(self):
     self.chunker = NPChunker()
     self.chunker.train()
Example #6
0
class AntonymReplacer(object):
    def __init__(self):
        self.chunker = NPChunker()
        self.chunker.train()

    def replace(self, adj, pos=None):
        '''antonyms = set()
        for syn in wordnet.synsets(adj, pos=pos):
            for lemma in syn.lemmas():
                for antonym in lemma.antonyms():
                    antonyms.add(antonym.name())
        if len(antonyms) >= 1:
            return antonyms.pop()
        else:
            return None'''
        antonyms=[]
        synonyms = []
        # word = Word(adj)
        # for syn in word.synsets[:]:
        for syn in list(wn.senti_synsets(adj)):
            # for l in syn.lemmas():
            for l in syn.synset.lemmas():
                synonyms.append(l.name())
                if l.antonyms():
                    antonyms.append(l.antonyms()[0].name())
        if len(antonyms) > 0:
            return antonyms[0]
        else:
            return None
    def replace_negations(self, sent):
        i, l = 0, len(sent)
        words = []
        flag = False
        while i < l:
            word = sent[i]
            if word == 'not' and i+1 < l:
                ant = self.replace(sent[i+1])
                if ant:
                    words.append(ant)
                    i += 2
                    flag = True
                    continue
            words.append(word)
            i += 1
        return flag,words

    def checkNegationWords(self,words):
        if 'not' in words:
            return True
        elif 'nothing' in words:
            return True
        else:
            return False

    def NegationCheck(self,sentence):

        words = self.chunker.getWords(sentence)
        flag,new_sent = self.replace_negations(words)
        if flag:
            return False,new_sent
        else:
            check = self.checkNegationWords(words)
            if check:
                return check,words
            else:
                return False,words



# a = AntonymReplacer()
# print(a.NegationCheck('The dosa was good'))
Example #7
0
 def __init__(self):
     self.chunker = NPChunker()
     self.chunker.train()
Example #8
0
__author__ = "Daksh"

import nltk
import csv
import json
from dishingOut.NPChunking.NPChunker import NPChunker

nltk.data.path.append("/home/daksh/Documents/Softwares/nltk_data")

from nltk.corpus import sentiwordnet as wn
from textblob import Word

pos_words = list()
neg_words = list()
chunker = NPChunker()
chunker.train()

with open('../data/posAdj.csv', 'r') as csvfile:
    spamwriter = csv.reader(csvfile)
    pos_words = list(spamwriter)

with open('../data/negAdj.csv', 'r') as csvfile:
    spamwriter = csv.reader(csvfile)
    neg_words = list(spamwriter)

pos_words = [word[0] for word in pos_words]
neg_words = [word[0] for word in neg_words]

posSeed_list = {key: True for key in pos_words}
negSeed_list = {key: False for key in neg_words}
Example #9
0
import json
from dishingOut.NPChunking.NPChunker import NPChunker
from dishingOut.SentimentAnalysis.antonymReplacer import AntonymReplacer
import pickle
import time

with open('../data/final_data.json','r') as f:
    data = json.load(f)

chunker = NPChunker()
chunker.train()
replacer = AntonymReplacer()
f = open('../SentimentAnalysis/nb_classifier.pickle', 'rb')
classifier = pickle.load(f)
f.close()

print("Done pre-processing")

def word_feats(words):
    return dict([word,True] for word in words)


start = time.time()
restaurants = data
for restaurant in restaurants:
    dishes = restaurant['dishes']
    restaurant['dishRatings'] = dict()
    print(dishes.keys())
    allChunks = dict()
    for dish in dishes.keys():
        # print(dish)
Example #10
0
__author__ = 'daksh'

from dishingOut.NPChunking.NPChunker import NPChunker

chunker = NPChunker()
chunker.train()
# tree,terms = chunker.extractChunk("Dosa was good")
# text = 'An institution in south indian offerings with Benne Masala Dosa at its best along with rava upma,kesari bath and mangalore bhajji with same level of excellence.Masala Dosa- You cant eat just one,will crave for more for sure.It tastes heavenly with the chutney CTR offers,its own kind.'
# text = 'Biryani was aromatic and superb'
# text = "Brilliant Idli Vada but Kara Bhath is not good!"
# text = "Amritsari fish were my favorites of the day"
# text = "Rasgulla and jamun were good"

# text = "Idli Vada and Kara Bhath are good!"
# text= "We had a crispy dosa and the coffee was not hot but the paneer was pathetic"
# text = "crispy dosa and coffee not hot"
# text = "Double masala chicken biryani served here is the best biryani I have tried till date"
# text = "Other starters that I have ordered like lemon chicken chicken kebab paneer Manchurian are okayish but the taste of biryani covers it all for me"
# text = "paneer was tasty"
# text = "I did not expect palak paneer to be so good but to my wonder it was excellent."
text = "Paneer tikka - Soft and tasty."

tree,terms = chunker.extractChunk(text)
tree.draw()
# tree.draw()
print(terms)
# sentences = chunker.getSentences(text)
# print(type(sentences[0]))
#for sent in sentences:
 #   print(chunker.getAdjetives(sent))