def tag(self, sentence_list): multiword_expressions = [] for sentence in sentence_list: sentence_parsed = MBSP.parse(sentence, chunks=False, relations=False, anchors=False) sentence_lemmatized = " ".join( [x.split("/")[2] for x in sentence_parsed.split(" ")]) multiword_expressions += self.__pattern_1(sentence_lemmatized) multiword_expressions += self.__pattern_2(sentence_parsed) return multiword_expressions
def queryGenerator(raw_input_string, change_sentiment): # #Step 0: Seperate if user is asking for meaning or for a debate response # word, isMeaning = MeaningExtractor.getIfMeaning(str(raw_input_string)) if isMeaning: meaning = MeaningExtractor.getMeaning(word) return meaning, True # #Step 1: Obtain input from the user # s = str(raw_input_string) # #Step 2: Convert the sentence into blob and MBSP Sentence objects respectively # input_string = TextBlob(s) clipsSentence = MBSP.Sentence(MBSP.parse(s), token=[MBSP.WORD,MBSP.POS,MBSP.CHUNK,MBSP.PNP,MBSP.REL,MBSP.ANCHOR,MBSP.LEMMA]) # #Step 3: Define the variable required for the analysis and interpretation of input # query = "" w = [] subjPhrases, verbPhrases, predPhrases = [], [], [] pNouns, verbs = [], [] pnps, anchors = [], [] # #Step 4: Obtain all the proper nouns from the sentence # for sentence in input_string.sentences: tagged = sentence.tags for word_tag in tagged: if word_tag[1]=='NNP' or word_tag[1]=='NNPS': pNouns.append(word_tag[0]) # #Step 5: Obtain the different parts, i.e. subject, predicate, object of the sentence # for chunk in clipsSentence.chunks: if chunk.role == 'SBJ' and chunk not in subjPhrases: subjPhrases.append(chunk.string) elif (chunk.type == 'VP' or chunk.type == 'ADVP') and chunk not in verbPhrases: verbPhrases.append(chunk.string) elif (chunk.role == 'PRD' or chunk.role == 'OBJ') and chunk not in predPhrases: predPhrases.append(chunk.string) # #Step 6: Detect the noun phrase and the anchors corresponding to them(Ref:CLIPS docs) # pnps = clipsSentence.pnp for item in pnps: if item.anchor not in anchors: anchors.append(item.anchor) # #Step 7.0: Train the classifier for sentiment data # ###with open('sentiment_training_formatted.csv', 'r') as fp: ### classifier = NaiveBayesClassifier(fp, format='csv') # #Step 7.1: Classify the user input and record the sentiment # '''def getSentiment(sentence): sentiObj= TextBlob(s, analyzer=NaiveBayesAnalyzer()).sentiment pos_ratio = sentiObj.p_pos neg_ratio = sentiObj.p_neg if pos_ratio>=neg_ratio: return 'pos' else: return 'neg' input_sentiment = getSentiment(input_string)''' # #Step 8: Generate the final query # for pNoun in pNouns: for sbj in subjPhrases: if pNoun not in subjPhrases: query = query+pNoun+" " #print 'pNoun:'+pNoun for sbj in subjPhrases: query = query+sbj+" " #print 'sbj:'+sbj for prd in predPhrases: query = query+prd+" " #print 'prd:'+prd for vr in verbPhrases: query = query+vr+" " #print 'vr:'+vr for anc in anchors: anc = anc.string query = query+anc+" " #print 'anc:'+anc for pnp in pnps: pnp = pnp.string query = query+pnp+" " #print 'pnp:'+pnp # #Step 9: [Blank] # # #Step 10: Remove repetitive words from the sentence # query_blob = TextBlob(query) wrds = query_blob.words final_words = [] for wrd in wrds: #print wrd if str(wrd).lower() not in final_words: final_words.append(wrd) final_query = "" for wrd in final_words: final_query = final_query+wrd+" " # #Step 11: Sort the query words in order of the input # index_dict = {} indexes = [] for word in TextBlob(final_query).words: try: index_dict[input_string.index(str(word))] = str(word) indexes.append(input_string.index(str(word))) except(ValueError): print "Word not in main string:", word indexes.sort() final_query = "" for index in indexes: final_query = final_query+index_dict[index]+" " # #Step 12.0: Filter query for articles: a,an,the,is # reps = {' a ':' ', ' an ':' ', ' the ':' ', ' is ':' '} query = replace_all(query, reps) if change_sentiment: # #Step 12.1: Build a dictionary of the replaceable words # #Note:make priority lists for different set of words def create_replace_dict(lines): replace_dict = {} for line in lines: kv = line.split(',') replace_dict[kv[0]] = kv[1][:-1] return replace_dict # #Step 12.2: Replace words from the given phrase # def replace_words(phrase): l = open('replace_list.csv', 'r').readlines() new_phrase = replace_all(phrase, create_replace_dict(l)) if new_phrase==phrase: l=open('replace_list2.csv', 'r').readlines() new_phrase = replace_all(phrase, create_replace_dict(l)) return new_phrase final_query = replace_words(final_query) # #Step 13: Get the sentiment of the final query # '''final_query_sentiment = getSentiment(final_query)''' # #Step 14: Print out the query # print "<------------------------------------------->" print "in:", s print "out:", final_query print "<------------------------------------------->" return final_query, False else: print "<------------------------------------------->" print "in:", s print "out:", s print "<------------------------------------------->" return s, False
#### MEMORY-BASED SHALLOW PARSER ###################################################################### # Copyright (c) 2003-2010 University of Antwerp, Belgium and Tilburg University, The Netherlands # License: GNU General Public License, see LICENSE.txt ###################################################################################################### # Add the upper directory (where the MBSP module is) to the search path. import os, sys; sys.path.insert(0, os.path.join("..", "..")) import MBSP if not MBSP.config.autostart: MBSP.start() s = MBSP.parse("I ate many slices of pizza with a fork.") s = MBSP.split(s) # A useful operation is to extract the heads in a sentence, # for example to create a "normalized" sentence, or to construct a Timbl lookup instance. # A head is the principal word in a chunk. # We could retrieve the heads by iterating over Sentence.chunks, # but this would skip the loose words in between chunks (e.g. "and" or ","), # which can also be useful, particularly in the case of contructing a lookup instance. # Sentence.constituents() returns an in-order list of mixed Chunk and Word objects # that can be used for this purpose: heads = [] for p in s[0].constituents(pnp=False): if isinstance(p, MBSP.Word): heads.append(( p.index, p.lemma))
text = 'Automation is good for the economy. A world dominated by robots is a thing I am looking forward to!' text = raw_input('Enter text: ') parse_tree = parsetree(text) for sentence in parse_tree: for chunk in sentence.chunks: for word in chunk.words: print str(word), print '\n', str(chunk) print self_parse = [] construct = [] sentence = [] parsed_text = MBSP.parse(text) parsed_sentences = parsed_text.split('O/.') parsed_words = parsed_text.split(' ') for word in parsed_words: element = [e.encode('ascii') for e in word.split('/')] construct.append(element) if element[1] == '.': sentence.append(construct) construct = [] print element print # print str(parsed_text) print[word[0] for word in sentence[0]]
#### MEMORY-BASED SHALLOW PARSER ###################################################################### # Copyright (c) 2003-2010 University of Antwerp, Belgium and Tilburg University, The Netherlands # License: GNU General Public License, see LICENSE.txt ###################################################################################################### # Add the upper directory (where the MBSP module is) to the search path. import os, sys; sys.path.insert(0, os.path.join("..", "..")) import MBSP if not MBSP.config.autostart: MBSP.start() s = MBSP.parse("I eat pizza with a fork.") s = MBSP.split(s) # Yields a list of traversable Sentence objects. for sentence in s: for chunk in sentence.chunks: print repr(chunk) print print " Words:", chunk.words # A list of Word objects. print " Relations:", chunk.related # A list of Chunk objects. print " Parent PNP:", repr(chunk.pnp) # A PNPChunk object, or None. print "Related PNP:", chunk.attachments # A list of PNPChunk objects. print # Remove the servers from memory when you're done: # MBSP.stop()
# Copyright (c) 2003-2010 University of Antwerp, Belgium and Tilburg University, The Netherlands # License: GNU General Public License, see LICENSE.txt ###################################################################################################### # Add the upper directory (where the MBSP module is) to the search path. import os, sys sys.path.insert(0, os.path.join("..", "..")) import MBSP if not MBSP.config.autostart: MBSP.start() s = MBSP.parse("I eat pizza with a fork.") s = MBSP.split(s) # Yields a list of traversable Sentence objects. for sentence in s: for chunk in sentence.chunks: print repr(chunk) print print " Words:", chunk.words # A list of Word objects. print " Relations:", chunk.related # A list of Chunk objects. print " Parent PNP:", repr(chunk.pnp) # A PNPChunk object, or None. print "Related PNP:", chunk.attachments # A list of PNPChunk objects. print # Remove the servers from memory when you're done: # MBSP.stop()
def chomp_wurds(): wurds = request.json['wurds'] print "Got wurds: " + wurds breakdown = MBSP.parse(wurds) return jsonify({'orig': wurds, 'breakdown': breakdown}), 201
def _parse_MBSP(self, txt): parsed = MBSP.parse(txt) return unicode(parsed)
# License: GNU General Public License, see LICENSE.txt ###################################################################################################### # Add the upper directory (where the MBSP module is) to the search path. import os, sys; sys.path.insert(0, os.path.join("..", "..")) import MBSP if not MBSP.config.autostart: MBSP.start() q = 'I eat pizza with a fork.' s = MBSP.parse(q, tokenize = True, # Split tokens, e.g. 'fork.' => 'fork' + '.' tags = True, # Assign part-of-speech tags => 'fork' = noun = NN. chunks = True, # Assign chunk tags => 'a' + 'fork' = noun phrase = NP. relations = True, # Find chunk relations: 'I' = sentence subject = NP-SBJ-1. anchors = True, # Find prepositional noun phrase anchors. lemmata = True) # Find word lemmata. # Print the output of the parser in a readable table format. # The tags assigned to each part-of-speech are listed at: # http://www.clips.ua.ac.be/pages/mbsp-tags MBSP.pprint(s) # Print the output of the parser as XML: print print MBSP.xml(s) # Remove the servers from memory when you're done: # MBSP.stop()
def parse(*args, **kwargs): s = MBSP.parse(*args, **kwargs) s = parse_semantic_tag(s) return s