def senticnet(text): """ Returns a list obtained from SenticNet with the following four features normalized: [pleasantness_value, attention_value, sensiivity_value, aptitude_value] :param text: input text pre-processed by Spacy :return: a list with the SenticNet features averaged for all the words in text """ list_features = [0] * 4 sn = SenticNet() count_words = 0 for token in text: try: concept_info = sn.concept(token) list_features[0] += float(concept_info['sentics']['pleasantness']) list_features[1] += float(concept_info['sentics']['attention']) list_features[2] += float(concept_info['sentics']['sensitivity']) list_features[3] += float(concept_info['sentics']['aptitude']) count_words += 1 except KeyError: pass if count_words != 0: list_features = [feature / count_words for feature in list_features] return list_features
class Get_IAC(): def __init__(self): self.col = ['Name', 'Brand', 'Price', 'Title', 'Score', 'Time', 'Text'] self.sn = SenticNet('en') self.wordnet_lemmatizer = WordNetLemmatizer() def review_to_sentences(self, review): # review = review.replace(',','.') review = review.replace('.', '. ') raw_sentences = sent_tokenize(review) return raw_sentences def InputData(self, input_path): self.dict_list = [] if '.csv' in input_path: with open(input_path, 'r', encoding='utf8') as f: reader = csv.DictReader(f) for row in reader: d = {i: row[i] for i in col} self.dict_list.append(d) elif '.xlsx' in input_path: wb = load_workbook(input_path) sheet = wb.active count = 0 for row in sheet.rows: if count == 0: count += 1 continue d = {} name = 0 for cell in row: d[self.col[name]] = cell.value name += 1 self.dict_list.append(d) self.dict_list = [ x for x in self.dict_list if x['Text'] != '' and x['Text'] != None ] self.sentences = [] for i in range(len(self.dict_list)): for j in self.review_to_sentences(self.dict_list[i]['Text']): self.sentences.append(j) self.sentences = [x for x in self.sentences if len(x) >= 5] def GetIAC(self): self.nlp = StanfordCoreNLP(r'stanford-corenlp-full-2018-10-05') self.IAC = [] for i in tqdm(self.sentences): dependency = self.nlp.dependency_parse(i) token = self.nlp.word_tokenize(i) if [x for x in dependency if 'compound' in x] != []: for j in [x for x in dependency if 'compound' in x]: token[j[2] - 1] = token[j[2] - 1] + '-' + token[j[1] - 1] token[j[1] - 1] = '' i = ' '.join(token) parse = self.nlp.parse(i) dependency = self.nlp.dependency_parse(i) pos = self.nlp.pos_tag(i) token = [] for j in pos: wordnet_pos = self.get_wordnet_pos(j[1]) token.append( self.wordnet_lemmatizer.lemmatize(j[0].lower(), pos=wordnet_pos)) # subject noun relation if [x for x in dependency if 'nsubj' in x] != []: for j in self.Subject_Noun_Rule(parse, dependency, token, pos): self.IAC.append(j) else: # Non subject noun relation for j in self.Non_Subject_Noun_Rule(parse, dependency, token, pos): self.IAC.append(j) self.nlp.close() self.IAC = list(set(self.IAC)) def get_wordnet_pos(self, treebank_tag): if treebank_tag.startswith('J'): return wn.ADJ elif treebank_tag.startswith('V'): return wn.VERB elif treebank_tag.startswith('N'): return wn.NOUN elif treebank_tag.startswith('R'): return wn.ADV else: return wn.NOUN # Additional Rule: 對等連接詞 def Conj(self, index, dependency, token): IAC = [] index = list(set(index)) if [x for x in dependency if 'conj' in x] != []: conj = [x for x in dependency if 'conj' in x] for j in conj: if j[1] in index or j[2] in index: if j[1] not in index: IAC.append(token[j[1] - 1]) index.append(j[1]) if j[2] not in index: IAC.append(token[j[2] - 1]) index.append(j[2]) return IAC def Subject_Noun_Rule(self, parse, dependency, token, pos): be = ['is', 'was', 'am', 'are', 'were'] adv_mod = [x for x in dependency if 'advmod' in x] adj_mod = [x for x in dependency if 'amod' in x] active_token = token[[x for x in dependency if 'nsubj' in x][0][2] - 1] # 主詞 result = [] index = [] if adv_mod != [] or adj_mod != []: A, B = self.Rule1(adv_mod, adj_mod, active_token, token) result += A index += B # does not have auxiliary verb if any(k in token for k in be) == False and [x for x in pos if 'MD' in x] == []: A, B = self.Rule2(token, pos, dependency, active_token, adv_mod, adj_mod) result += A index += B if [x for x in dependency if 'dobj' in x] != []: A, B = self.Rule3(dependency, token, pos) result += A index += B if [x for x in dependency if 'xcomp' in x] != []: A, B = self.Rule4(dependency, token, pos) result += A index += B if [x for x in dependency if 'cop' in x] != []: A, B = self.Rule5(dependency, pos, active_token, token) result += A index += B result += self.Conj(index, dependency, token) return list(set(result)) # 3.3.3 Rule 1 def Rule1(self, adv_mod, adj_mod, active_token, token): IAC = [] index = [] if adv_mod != []: for j in adv_mod: try: concept = self.sn.concept(token[j[2] - 1]) IAC.append(token[j[2] - 1]) index.append(j[2]) except: a = 0 # print(token[j[2]-1] + ' Not in SenticNet') if adj_mod != []: for j in adj_mod: try: concept = self.sn.concept(token[j[2] - 1]) IAC.append(token[j[2] - 1]) index.append(j[2]) except: a = 0 # print(token[j[2]-1] + ' Not in SenticNet') return IAC, index # 3.3.3 Rule 2-1 def Rule2(self, token, pos, dependency, active_token, adv_mod, adj_mod): IAC = [] index = [] advcl = [x for x in dependency if 'advcl' in x] # adverbial clause modifier if advcl != []: for j in advcl: IAC.append(token[j[1] - 1]) index.append(j[1]) IAC.append(active_token) index.append([x for x in dependency if 'nsubj' in x][0][2]) if adv_mod != []: for j in adv_mod: IAC.append(token[j[1] - 1]) index.append(j[1]) IAC.append(active_token) index.append([x for x in dependency if 'nsubj' in x][0][2]) if adj_mod != []: for j in adj_mod: IAC.append(token[j[1] - 1]) index.append(j[1]) IAC.append(active_token) index.append([x for x in dependency if 'nsubj' in x][0][2]) return IAC, index # 3.3.3 Rule 2-2 & 2-3 def Rule3(self, dependency, token, pos): IAC = [] index = [] dobj = [x for x in dependency if 'dobj' in x] # open clausal complement for j in dobj: if pos[j[2] - 1][1] == 'NN': try: # Rule 2-3 concept = self.sn.concept(token[j[2] - 1]) IAC.append(token[j[2] - 1]) index.append(j[2]) conj = [] conj.append(j[2]) if [x for x in dependency if 'conj' in x and j[2] in x ] != []: for i in [ x for x in dependency if 'conj' in x and j[2] in x ]: conj.append(i[1]) conj.append(i[2]) conj = list(set(conj)) for i in conj: t1 = i connect = [x for x in dependency if t1 in x] for k in connect: if k[1] != t1: if pos[k[1] - 1][1] == 'NN': IAC.append(token[k[1] - 1]) index.append(k[1]) if k[2] != t1: if pos[k[2] - 1][1] == 'NN': IAC.append(token[k[2] - 1]) index.append(k[2]) except: # Rule 2-2 IAC.append(token[j[2] - 1]) index.append(j[2]) # print(token[j[2]-1] + ' Not in SenticNet') return IAC, index # 3.3.3 Rule 2-4 def Rule4(self, dependency, token, pos): IAC = [] index = [] xcomp = [x for x in dependency if 'xcomp' in x] # open clausal complement for j in xcomp: try: concept = self.sn.concept(token[j[1] - 1] + '-' + token[j[2] - 1]) IAC.append(token[j[1] - 1] + '-' + token[j[2] - 1]) except: a = 0 # print(token[j[1]-1] + '-' + token[j[2]-1] + ' Not in SenticNet') t1 = j[2] connect = [x for x in dependency if t1 in x] for k in connect: if pos[k[2] - 1][1] == 'NN': IAC.append(token[k[2] - 1]) index.append(k[2]) return IAC, index # 3.3.3 Rule 3 & 3.3.3 Rule 4 & 3.3.3 Rule 5 def Rule5(self, dependency, pos, active_token, token): IAC = [] index = [] cop = [x for x in dependency if 'cop' in x] # copula # Rule 4 if pos[[x for x in dependency if 'nsubj' in x][0][2] - 1][1] == 'NN': IAC.append(active_token) index.append([x for x in dependency if 'nsubj' in x][0][2]) # Rule 3 & Rule 5 for j in cop: # Rule 3 conj = [] # if token[j[1]-1] in all_term: IAC.append(token[j[1] - 1]) index.append(j[1]) conj.append(j[1]) if [x for x in dependency if 'conj' in x and j[1] in x] != []: for i in [x for x in dependency if 'conj' in x and j[1] in x]: conj.append(i[1]) conj.append(i[2]) # Rule 5 conj = list(set(conj)) for i in conj: t1 = i connect = [x for x in dependency if t1 in x] for k in connect: if k[1] != t1: if pos[k[1] - 1][1] == 'VB' or pos[k[1] - 1][1] == 'VV': IAC.append(token[k[1] - 1]) index.append(k[1]) if token[t1 - 1] not in IAC: IAC.append(token[t1 - 1]) index.append(t1) if k[2] != t1: if pos[k[2] - 1][1] == 'VB' or pos[k[2] - 1][1] == 'VV': IAC.append(token[k[2] - 1]) index.append(k[2]) if token[t1 - 1] not in IAC: IAC.append(token[t1 - 1]) index.append(t1) return IAC, index def Non_Subject_Noun_Rule(self, parse, dependency, token, pos): result = [] index = [] if [x for x in dependency if 'xcomp' in x] != []: A, B = self.Rule6(dependency, token) result += A index += B if [x for x in dependency if 'case' in x] != []: A, B = self.Rule7(dependency, pos, token) result += A index += B if [x for x in dependency if 'dobj' in x] != []: A, B = self.Rule8(dependency, token) result += A index += B result += self.Conj(index, dependency, token) return list(set(result)) # 3.3.4 Rule 1 def Rule6(self, dependency, token): IAC = [] index = [] xcomp = [x for x in dependency if 'xcomp' in x] # open clausal complement for j in xcomp: # if token[j[1]-1] in all_term: IAC.append(token[j[1] - 1]) index.append(j[1]) return IAC, index # 3.3.4 Rule 2 def Rule7(self, dependency, pos, token): IAC = [] index = [] case = [x for x in dependency if 'case' in x] # a prepositional relation for j in case: if pos[j[1] - 1][1] == 'NN': connect = [ x for x in dependency if j[1] in x and 'mod' in x[0] ] for i in connect: IAC.append(token[i[1] - 1]) IAC.append(token[i[2] - 1]) index.append(i[1]) index.append(i[2]) return list(set(IAC)), list(set(index)) # 3.3.4 Rule 3 def Rule8(self, dependency, token): IAC = [] index = [] dobj = [x for x in dependency if 'dobj' in x] # a direct object relation for j in dobj: IAC.append(token[j[2] - 1]) index.append(j[2]) return IAC, index def Save(self, output_path): with open(output_path, 'wb') as f: pickle.dump(self.IAC, f)
for _, records in groupby(sorted(lst, key=keyprop), keyprop) ] a = [{'time': '25 APR', 'total': 10, 'high': 10}, {'time': '26 APR', 'total': 5, 'high': 5}] b = [{'time': '24 APR', 'total': 10, 'high': 10}, {'time': '26 APR', 'total': 15, 'high': 5}] merger = merge_list_of_records_by('time', add) hasil_merge = merger(a+b) print(hasil_merge) print("sinonim with thesaurus==================================================================") # from PyDictionary import PyDictionary # # dictionary = PyDictionary() # print(dictionary.synonym("good")) from thesaurus import Word w = Word('suicidal') syn = w.synonyms() print(syn) sn = SenticNet() try: concept_info_sinonim = sn.concept("suicidal") print(concept_info_sinonim) except Exception as e: print(e)
# -*- coding: utf-8 -*- """ Spyder Editor This is a temporary script file. """ import nltk nltk.download() import os from senticnet.senticnet import SenticNet sn = SenticNet() sn.concept('') def fun1(d): try: from senticnet.senticnet import SenticNet sn = SenticNet() sn.semantics(d) return True except KeyError as error: return False fun1('day') from nltk.corpus import wordnet sk = wordnet.synset('ssd')
def get_clues(text): text = text print("*--------(%s)-------------*" % (text)) print(type(text)) nlp = StanfordCoreNLP('http://localhost:9001') stop_words = set(stopwords.words('english')) ''' Method to remove numbers appended at last ''' dep_parse = nlp.annotate(text, properties={ 'annotators': 'depparse', 'outputFormat': 'json', 'timeout': 10000, }) pos = nlp.annotate(text, properties={ 'annotators': 'lemma', 'outputFormat': 'json', 'timeout': 10000, }) sn = SenticNet() word_to_dep = [{} for i in range(len(dep_parse['sentences']))] word_to_par = [{} for i in range(len(dep_parse['sentences']))] word_to_pos = [{} for i in range(len(dep_parse['sentences']))] word_to_lemma = [{} for i in range(len(dep_parse['sentences']))] word_to_child = [{} for i in range(len(dep_parse['sentences']))] sents = [[] for i in range(len(dep_parse['sentences']))] index_to_word = {} ''' Constructing dicts for maintaining the dependencies among words. ''' ''' Appending each word by occurence number to maintain distinct word count ''' #print(dep_parse['sentences']) print("********") for i, sent in enumerate(dep_parse['sentences']): for dep in sent['basicDependencies']: word_to_dep[i][dep['dependentGloss'] + str(dep['dependent'])] = dep['dep'] word_to_par[i][dep['dependentGloss'] + str(dep['dependent'])] = dep['governorGloss'] + str( dep['governor']) index_to_word[dep['dependentGloss'] + str(dep['dependent'])] = dep['dependentGloss'] if (dep['governorGloss'] + str(dep['governor']) not in word_to_child[i]): word_to_child[i][dep['governorGloss'] + str(dep['governor'])] = [] if (dep['dependentGloss'] + str(dep['dependent']) not in word_to_child[i]): word_to_child[i][dep['dependentGloss'] + str(dep['dependent'])] = [] word_to_child[i][dep['governorGloss'] + str(dep['governor'])].append( dep['dependentGloss'] + str(dep['dependent'])) sents[i].append(dep['dependentGloss'] + str(dep['dependent'])) word_to_dep[i]['ROOT0'] = 'root' word_to_par[i]['ROOT0'] = 'root' for i, sent in enumerate(pos['sentences']): for pos_tagger in sent['tokens']: word_to_pos[i][pos_tagger['word']] = pos_tagger['pos'] word_to_lemma[i][pos_tagger['word']] = pos_tagger['lemma'] word_to_pos[i]['ROOT'] = 'root' word_to_lemma[i]['ROOT'] = 'root' ''' Displaying the deps ''' ##Implemeting rules to extract aspects for i, sent in enumerate(sents): if (__name__ == '__main__'): print(word_to_dep[i], word_to_par[i], word_to_pos[i]) print("Children==>") print(word_to_child[i]) aspects = [] for i, sent in enumerate(sents): for word in sent: ''' Rule 0 ''' if ('subj' in word_to_dep[i][word]): for child in word_to_child[i][word_to_par[i][word]]: if ('amod' in word_to_dep[i][child] or 'advmod' in word_to_dep[i][child]): aspects.append(word_to_par[i][word]) if (__name__ == '__main__'): print("Rule 0 triggered.") ''' Rule 1 (without sub): Very big to hold. ''' if (word_to_dep[i][word] == 'xcomp' and ('JJ' in word_to_pos[i][index_to_word[word_to_par[i][word]]] or 'RB' in word_to_pos[i][index_to_word[word_to_par[i][word]]])): if (__name__ == '__main__'): print("Rule 1 triggered") aspects.append(word_to_par[i][word]) ''' Rule 2 (without subj): Not to mention the price of the phone ''' if (word_to_dep[i][word] == 'dobj' and 'VB' in word_to_pos[i][index_to_word[(word_to_par[i][word])]] and ('NN' in word_to_pos[i][index_to_word[(word)]] or 'JJ' in word_to_pos[i][index_to_word[(word)]])): aspects.append(word) if (__name__ == '__main__'): print("Rule 2 triggered") print(word) ''' Rule 3 (without subj): Love the sleekness of the player ''' if ('NN' in word_to_pos[i][index_to_word[(word)]] and word_to_dep[i][word] == 'nmod'): aspects.append(word_to_par[i][word]) if (__name__ == '__main__'): print("Rule 3 triggered") print(word_to_par[i][word]) ''' Rule 4 (with sub): The battery lasts little two aspects ''' if (word_to_dep[i][word] == 'advmod' or word_to_dep[i][word] == 'amod' or word_to_dep[i][word] == 'advcl') and ('VB' in word_to_pos[i][index_to_word[( word_to_par[i][word])]]): aspects.append(word_to_par[i][word]) for word2 in sent: if (word2 != word and word_to_dep[i][word2] == 'nsubj' and word_to_par[i][word2] == word_to_par[i][word] and ('NN' in word_to_pos[i][index_to_word[word2]] or 'JJ' in word_to_pos[i][index_to_word[word2]])): aspects.append(word2) if (__name__ == '__main__'): print("Rule 4 triggered") print(word2) ''' Rule 5 (with sub): I like the lens of this camera ''' if ('NN' in word_to_pos[i][index_to_word[(word)]] and word_to_dep[i][word] == 'dobj'): if (__name__ == '__main__'): print("Rule 5 triggered") print(word) try: concept_info = sn.concept((word)) print("present in senticnet") except KeyError: print("Yay") aspects.append(word) ''' Rule 6 : I like the beauty of the screen. Check if senticnet condition should be added ''' if ('NN' in word_to_pos[i][index_to_word[(word)]] and word_to_dep[i][word] == 'dobj'): try: concept_info = sn.concept((word)) aspects.append(word) print("yay!") except KeyError: print("oops, not there in SenticNet") for word2 in sent: if (word2 != word and word_to_par[i][word2] == word and 'NN' in word_to_pos[i][index_to_word[(word2)]]): aspects.append(word2) if (__name__ == '__main__'): print("Rule 6 triggered.") print(word2) ''' Rule 7 : I would like to comment on the camera of this phone. ''' if (word_to_dep[i][word] == 'xcomp'): try: concept_info = sn.concept((word)) aspects.append(word) print("yay!") except KeyError: print("oops, not there in SenticNet") for child in word_to_child[i][word]: if ('NN' in word_to_pos[i][index_to_word[child]]): aspects.append(child) if (__name__ == '__main__'): print("Rule 7 triggered.") print(word) print(child) ''' Rule 8 : The car is expensive. ''' if (word_to_dep[i][word] == 'nsubj'): for word2 in sent: if (word2 != word and word_to_dep[i][word2] == 'cop' and word_to_par[i][word2] == word_to_par[i][word]): aspects.append(word_to_par[i][word]) if (__name__ == '__main__'): print("Rule 8 triggered") print(word_to_par[i][word]) ''' Rule 9 : The camera is nice. ''' if (word_to_dep[i][word] == 'nsubj' and 'NN' in word_to_pos[i][index_to_word[(word)]]): for word2 in sent: if (word2 != word and word_to_dep[i][word2] == 'cop' and word_to_par[i][word2] == word_to_par[i][word]): aspects.append(word) if (__name__ == '__main__'): print("Rule 9 triggered") print(word) ''' Rule 10 : The phone is very lightweight to carry. ''' if (word_to_dep[i][word] == 'cop'): for word2 in sent: if (word2 != word and 'VB' in word_to_pos[i][index_to_word[(word2)]] and word_to_par[i][word] == word_to_par[i][word2]): aspects.append(word2) if (__name__ == '__main__'): print("Rule 10 triggered.") print(word2) ''' Extracting mods of dobjs ''' if (word_to_dep[i][word] == 'dobj'): for child in word_to_child[i][word]: if ('mod' in word_to_dep[i][child] and 'JJ' in word_to_pos[i][index_to_word[(child)]]): aspects.append(child) ''' Rule 11 : Checking for conjuctions ''' for asp in aspects: for word in sent: if (word_to_dep[i][word] == 'conj' and word_to_par[i][word] == asp): aspects.append(word) if (__name__ == '__main__'): print("Rule conj triggered.") print(word) finalIAC = set(aspects) finalIAC = [index_to_word[f] for f in finalIAC] finalIAC = [w for w in finalIAC if not w in stop_words] finalSenti = [] for iac in finalIAC: try: concept_info = sn.concept((iac)) finalSenti.append(iac) except KeyError: print("No word available for " + iac) return finalIAC, finalSenti
# Each line of corpus must be equivalent to each document of the corpus #boc_model=boc.BOCModel(doc_path="input corpus path") boc_model = boc.BOCModel('text.txt') #boc_model.context = text # output can be saved with save_path parameter boc_matrix, word2concept_list, idx2word_converter = boc_model.fit() # SenitcNet lexicon lookup from senticnet.senticnet import SenticNet sn = SenticNet() concept_info = sn.concept(text) polarity_value = sn.polarity_value(text) polarity_intense = sn.polarity_intense(text) moodtags = sn.moodtags(text) semantics = sn.semantics(text) sentics = sn.sentics(text) print('==================================') print('test: ', text) print('concept_info: ', concept_info) print('polarity_value: ', polarity_value) print('polarity_intense: ', polarity_intense) print('moodtags: ', moodtags) print('semantics: ', semantics) print('sentics: ', sentics) print('==================================')
def get_clues(text): text = text print("*--------(%s)-------------*" % (text)) print(type(text)) nlp = StanfordCoreNLP('http://localhost:9001') stop_words = set(stopwords.words('english')) ''' Method to remove numbers appended at last ''' dep_parse = nlp.annotate(text, properties={ 'annotators': 'depparse', 'outputFormat': 'json', 'timeout': 10000, }) pos = nlp.annotate(text, properties={ 'annotators': 'lemma', 'outputFormat': 'json', 'timeout': 10000, }) sn = SenticNet() word_to_dep = [{} for i in range(len(dep_parse['sentences']))] word_to_par = [{} for i in range(len(dep_parse['sentences']))] word_to_pos = [{} for i in range(len(dep_parse['sentences']))] word_to_lemma = [{} for i in range(len(dep_parse['sentences']))] word_to_child = [{} for i in range(len(dep_parse['sentences']))] sents = [[] for i in range(len(dep_parse['sentences']))] index_to_word = {} aspect_result = [[] for i in range(len(dep_parse['sentences']))] ''' Constructing dicts for maintaining the dependencies among words. ''' ''' Appending each word by occurence number to maintain distinct word count ''' print("********") for i, sent in enumerate(dep_parse['sentences']): for dep in sent['basicDependencies']: word_to_dep[i][dep['dependentGloss'] + str(dep['dependent'])] = dep['dep'] word_to_par[i][dep['dependentGloss'] + str(dep['dependent'])] = dep['governorGloss'] + str( dep['governor']) index_to_word[dep['dependentGloss'] + str(dep['dependent'])] = dep['dependentGloss'] if (dep['governorGloss'] + str(dep['governor']) not in word_to_child[i]): word_to_child[i][dep['governorGloss'] + str(dep['governor'])] = [] if (dep['dependentGloss'] + str(dep['dependent']) not in word_to_child[i]): word_to_child[i][dep['dependentGloss'] + str(dep['dependent'])] = [] word_to_child[i][dep['governorGloss'] + str(dep['governor'])].append( dep['dependentGloss'] + str(dep['dependent'])) sents[i].append(dep['dependentGloss'] + str(dep['dependent'])) word_to_dep[i]['ROOT0'] = 'root' word_to_par[i]['ROOT0'] = 'root' for i, sent in enumerate(pos['sentences']): for pos_tagger in sent['tokens']: word_to_pos[i][pos_tagger['word']] = pos_tagger['pos'] word_to_lemma[i][pos_tagger['word']] = pos_tagger['lemma'] word_to_pos[i]['ROOT'] = 'root' word_to_lemma[i]['ROOT'] = 'root' ''' Displaying the deps ''' ##Implemeting rules to extract aspects for i, sent in enumerate(sents): if (__name__ == '__main__'): print(word_to_dep[i], word_to_par[i], word_to_pos[i], word_to_lemma[i]) print("Children==>") print(word_to_child[i]) for i, sent in enumerate(sents): token_t = word_to_child[i]['ROOT0'][0] is_sub = False token_h = None for child in word_to_child[i][token_t]: if 'subj' in word_to_dep[i][child]: is_sub = True token_h = child #If subject noun relationship present if is_sub: """ Rule 0: if any adv or adj modifies the token t. """ for child in word_to_child[i][token_t]: if ('amod' in word_to_dep[i][child] or 'advmod' in word_to_dep[i][child]): try: concept_info = sn.concept(index_to_word[child]) aspect_result[i].append(token_t) if __name__ == '__main__': print("Rule 1 triggered.") print("present in senticnet") except KeyError: print("OOps") """ Rule 1: The battery lasts little. """ for child in word_to_child[i][token_t]: if (word_to_dep[i][child] == 'advmod' or word_to_dep[i][child] == 'amod' or word_to_dep[i][child] == 'advcl') and ( 'VB' in word_to_pos[i][index_to_word[token_t]]): aspect_result[i].append(token_t) aspect_result[i].append(token_h) if __name__ == '__main__': print("Rule 1 triggered.") print(token_t) print(token_h) """ Rule 2: I like the beauty of the screen (and I like the lens of this camera). """ for child in word_to_child[i][token_t]: if (word_to_dep[i][child] == 'dobj' and 'NN' in word_to_pos[i][index_to_word[child]]): aspect_result[i].append(child) if __name__ == '__main__': print(child) try: concept_info = sn.concept(index_to_word[child]) if __name__ == '__main__': print("Rule 2 triggered") for grandchild in word_to_child[i][child]: if ('NN' in word_to_pos[i][ index_to_word[grandchild]]): aspect_result[i].append(grandchild) print(grandchild) except KeyError: print("OOps") """ Rule 3: I would like to comment on the camera of this phone. """ for child in word_to_child[i][token_t]: if (word_to_dep[i][child] == 'xcomp'): try: sn.concept(index_to_word[child]) aspect_result[i].append(child) if __name__ == '__main__': print(child) except KeyError: print("OOps") for grandchild in word_to_child[i][child]: if ('NN' in word_to_pos[i][index_to_word[grandchild]]): aspect_result[i].append(grandchild) if __name__ == '__main__': print(grandchild) print("Rule 3 triggered.") """ Rule 4: The car is expensive. """ for child in word_to_child[i][token_t]: if (word_to_dep[i][child] == 'cop'): try: sn.concept(word_to_lemma[i][index_to_word[token_t]]) aspect_result[i].append(token_t) if __name__ == '__main__': print("Rule 4 triggered") print(token_t) except KeyError: pass """ Rule 5: The camera is nice """ for child in word_to_child[i][token_t]: if (word_to_dep[i][child] == 'cop' and 'NN' in word_to_pos[i][index_to_word[token_h]]): aspect_result[i].append(token_h) if __name__ == '__main__': print("Rule 5 triggered.") print(token_h) """ Rule 6: """ for child in word_to_child[i][token_t]: if (word_to_dep[i][child] == 'cop'): for child2 in word_to_child[i][token_t]: if (child != child2 and 'VB' in word_to_pos[i][index_to_word[child2]]): try: sn.concept(index_to_word[token_t]) sn.concept(index_to_word[child2]) aspect_result[i].append(token_t) aspect_result[i].append(child2) if __name__ == '__main__': print("rule 6 trigg") print(token_t) print(child2) except KeyError: pass else: """ Rule 7:Very big to hold. """ for word in sent: if ('RB' in word_to_pos[i][index_to_word[word]] or 'JJ' in word_to_pos[i][index_to_word[word]]): for child in word_to_child[i][word]: if (word_to_dep[i][child] == 'xcomp' or word_to_dep[i][child] == 'ccomp'): aspect_result[i].append(word) if __name__ == '__main__': print("Rule 7 triggered") print(word) """ Rule 8: Love the sleekness of the player. """ for word in sent: for child in word_to_child[i][word]: if ('NN' in word_to_pos[i][index_to_word[child]] and word_to_dep[i][child] == 'nmod'): for grandchild in word_to_child[i][child]: if ('IN' in word_to_pos[i][ index_to_word[grandchild]]): aspect_result[i].append(word) aspect_result[i].append(child) if __name__ == '__main__': print(word) print(child) print("Rule 8 triggered.") """ Rule 9: Not to mention the price of the phone. """ for word in sent: for child in word_to_child[i][word]: if (word_to_dep[i][child] == 'dobj'): aspect_result[i].append(child) if __name__ == '__main__': print(child) print("Rule 9 triggered") ''' Rule 11 : Checking for conjuctions ''' for asp in aspect_result[i]: for word in sent: if (word_to_dep[i][word] == 'conj' and word_to_par[i][word] == asp): aspect_result[i].append(word) if (__name__ == '__main__'): print("Rule conj triggered.") print(word) finalIAC = [set(aspect_result[i]) for i in range(len(sents))] finalIAC = [[index_to_word[w] for w in finalIAC[i]] for i in range(len(sents))] print(finalIAC) singleFinalIAC = [] for i in range(len(sents)): for w in finalIAC[i]: if w not in stop_words: singleFinalIAC.append(w) print(singleFinalIAC) finalSenti = [] for iac in singleFinalIAC: try: concept_info = sn.concept((iac)) finalSenti.append(iac) except KeyError: print("No word available for " + iac) return singleFinalIAC, finalSenti
sisa = 0 # print("Jumlah existing wordlist : {} ".format(len(existWordlistDepression))) #here it is for wordExist in existWordlistDepression: word = wordExist['word'] tokens = word_tokenize(word) for token, tag in pos_tag(tokens): lemma = wn_lemmater.lemmatize(token, tag_map[tag[0]]) print("lemma asli : {}".format(lemma)) if lemma not in lemmaOfExistWordlist: lemmaOfExistWordlist.append(lemma) # 685 lemma unique ditemukan dari DB try: syns = wordnet.synsets(lemma) concept_info = sn.concept(lemma) concept_info_origin = concept_info conceptExist.append(concept_info) #word dimasukkan ke dalam objek senticwordObj = WordList_sentic() senticwordObj = WordList_sentic(None,lemma,concept_info) #OBJEK WORD ASLI if (float(concept_info['polarity_intense']) < 0): #TERBUKTI NEGATIF # print(concept_info['polarity_intense']) # conceptExistNegative.append(concept_info) objsConceptExistNegative.append(senticwordObj) elif (float(concept_info['polarity_intense']) > 0): #TIDAK - di senticnet # SINONIM # cek SINONIM di wordnet jika tidak dinyatakan - print("lemma positif masuk di - : {}".format(lemma))
from senticnet.senticnet import SenticNet teste = [] sn = SenticNet('pt') concept_info = sn.concept('amor') polarity_value = sn.polarity_value('amor') polarity_intense = sn.polarity_intense('amor') moodtags = sn.moodtags('amor') semantics = sn.semantics('amor') sentics = sn.sentics('amor') teste.append(concept_info) print(teste)