Python TextBlob.parse Exemples, textblob.TextBlob.parse Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : getEntities.py Projet : project-spinoza-dev/tsakpy

def getEntities(parser, tweet, xEntities):
	try:
		spacyParsedObject = parser(tweet)
		sentence =  TextBlob(tweet)
		textblobTaggedObject = sentence.parse().split()
		patterntaggedObject = tag(tweet, tokenize=True)
		for word in patterntaggedObject:
			word, wordtag=word
			if  wordtag == "NNP" or  wordtag == "NN" or  wordtag == "PRP":
				v = str(word)
				v = v.strip()
				if(v not in xEntities):	
					xEntities[v]=str(wordtag)						
		for taggedObject in textblobTaggedObject:
			for word in taggedObject:
				word, wordtag=word[0], word[1]
				if wordtag == "NNP" or wordtag == "NN" or wordtag == "PRP":
					v = str(word)
					v = v.strip()
					if(v not in xEntities):	
						xEntities[v]=str(wordtag)
		for word in spacyParsedObject:
			if word.tag_ == "NNP" or word.tag_ == "NN" or word.tag_ == "PRP":
				v = str(word)
				v = v.strip()
				if(v not in xEntities):	
					xEntities[v]=str(word.tag_)
		return xEntities
	except Exception as e:
		return e

Exemple #2

0

Afficher le fichier

    def test_get_np_for_default(self):
        text_list = self.text_list

        for text in text_list:
            b = TextBlob(text)
            print(b.noun_phrases)
            print(b.parse())

Exemple #3

0

Afficher le fichier

Fichier : model.py Projet : matheuscas/fuzzy_opinion_mining

def tag_documents_text(client):

	documents = client['cornell']['documents']
	for doc in documents.find():
		blob = TextBlob(doc['text'], pos_tagger=PerceptronTagger())
		parsed_blob = blob.parse()
		documents.update({'name':doc['name']},{'$set':{'parsed_perceptron':parsed_blob}})

Exemple #4

0

Afficher le fichier

Fichier : getEntities.py Projet : project-spinoza-dev/tsakpy

def getEntities(parser, tweet, xEntities):
    try:
        spacyParsedObject = parser(tweet)
        sentence = TextBlob(tweet)
        textblobTaggedObject = sentence.parse().split()
        patterntaggedObject = tag(tweet, tokenize=True)
        for word in patterntaggedObject:
            word, wordtag = word
            if wordtag == "NNP" or wordtag == "NN" or wordtag == "PRP":
                v = str(word)
                v = v.strip()
                if (v not in xEntities):
                    xEntities[v] = str(wordtag)
        for taggedObject in textblobTaggedObject:
            for word in taggedObject:
                word, wordtag = word[0], word[1]
                if wordtag == "NNP" or wordtag == "NN" or wordtag == "PRP":
                    v = str(word)
                    v = v.strip()
                    if (v not in xEntities):
                        xEntities[v] = str(wordtag)
        for word in spacyParsedObject:
            if word.tag_ == "NNP" or word.tag_ == "NN" or word.tag_ == "PRP":
                v = str(word)
                v = v.strip()
                if (v not in xEntities):
                    xEntities[v] = str(word.tag_)
        return xEntities
    except Exception as e:
        return e

Exemple #5

0

Afficher le fichier

Fichier : model.py Projet : matheuscas/fuzzy_opinion_mining

def extract_trigrams(client):
	documents = client['cornell']['documents']

	for doc in documents.find():
		blob = TextBlob(doc['text'])
		valid_trigrams = []
		for s in blob.sentences:
			sentence = TextBlob(s.dict['raw'])
			sentence = TextBlob(sentence.parse())
			trigrams = sentence.ngrams(n=3)
			valid_trigrams = valid_trigrams + get_valid_trigrams(trigrams)
		documents.update({'name':doc['name']},{'$set':{'trigrams':valid_trigrams}})

Exemple #6

0

Afficher le fichier

def get_structure():
    train = []
    for sent in sentences:
        blob = TextBlob(sent)

        #名词短语个数
        a1 = len(blob.noun_phrases)

        #词性个数
        af = blob.tags
        af = [j for (i, j) in af]

        #parse
        a2 = blob.parse().count('O')

        temp = [a1, a2]
        temp.extend(match_tag(af))

        train.append(temp)

    return train

Exemple #7

0

Afficher le fichier

def nlp_run(label, command):
    text = TextBlob(label)
    sentvalue = text.sentiment.polarity
    subvalue = text.sentiment.subjectivity
    tags = text.tags
    parse = text.parse()
    nouns = text.noun_phrases
    correct = text.correct()
    if command == "pol":
        if sentvalue < 0:
            tst = "The polarity is " + str(sentvalue) + " ," "which means the text is negative"
            return tst
        elif sentvalue > 0:
            tst = "The polarity is " + str(sentvalue) + " ," "which means the text is positive"
            return tst
        elif sentvalue == 0:
            tst = "Polarity cannot be detected :("
            return tst
    
    elif command == "spel":
        return correct
    elif command == "tag":
        tst = str(tags)
        return tst
    elif command == "parse":
        tst = str(parse)
        return tst
    elif command == "noun":
        tst = str(nouns)
        return tst
    elif command == "sub":
        if subvalue > 0.5:
            tst = "The subjectivity is " + str(sentvalue) + " ," "which means the text is subjective"
            return tst
        elif subvalue < 0.5:
            tst = "The subjectivity is " + str(sentvalue) + " ," "which means the text is objective"
            return tst
        elif subvalue == 0.5:
            tst = "subjectivity cannot be detected :("
            return tst

Exemple #8

0

Afficher le fichier

def nlp_parse():

    try:
        req_json = request.get_json()
        if req_json is None:

            return jsonify(error='this service require A JSON request')

        else:
            if not ('text' in req_json):
                raise Exception('Missing mandatory paramater "text"')

        text = req_json['text']
        blob = TextBlob(text)
        nounPhrases = blob.parse().split()

        return json.JSONEncoder().encode(nounPhrases)

    except Exception as ex:
        app.log.error(type(ex))
        app.log.error(ex.args)
        app.log.error(ex)
        return jsonify(error=str(ex))

Exemple #9

0

Afficher le fichier

Fichier : headline.py Projet : studiocjp/lsf

# maybe need more than two headlines
# print sys.argv[1]
# print sys.argv[2]

# headlines 1 and 2 - analyze, mix and send back to node
# blob = TextBlob(sys.argv[1])
# # print blob.tags
# blob2 = TextBlob(sys.argv[2])
# print blob2.tags

for i, val in enumerate(news):
   headline = news[i]['title']
   headlines.append(headline)
   headblob = TextBlob(headline, np_extractor=extractor)
   headblobs.append(headblob.noun_phrases)
   parsed = headblob.parse()
   headParsed.append(parsed)



# for item in headParsed:
#    print item

# get the first noun phrase from each headline and swap them
# grab a random noun phrase from each headline
h1i = int(random.random()*20)
h1 = headlines[h1i]
r1 = int(random.random()*len(headblobs[h1i]))
np1 = headblobs[h1i][r1]
# capitalize the noun phrase
# np1 = ' '.join(word[0].upper() + word[1:] for word in np1.split())

Exemple #10

0

Afficher le fichier

Fichier : parser.py Projet : thesoulbender/languageprocessing

 def analyze(self, text):
     text = TextBlob(text)
     return text.parse()

Exemple #11

0

Afficher le fichier

# **************** "iNNovationMerge DailyCodeHub" ****************
# Visit https://www.innovationmerge.com/

# Theme : Natural Language Processing using TextBlob in Python

# NLP - Parsing the Text

from textblob import TextBlob

text = 'INNovationMerge is an online learning platform. \
        developed for the users who wants to learn and practice \
        technologies with the respective environments.'

blob = TextBlob(text)
print(blob.parse())

# Output:
# INNovationMerge/NN/B-NP/O is/VBZ/B-VP/O an/DT/O/O online/JJ/B-ADJP/O learning/VBG/B-VP/O platform/NN/B-NP/O ././O/O developed/VBN/B-VP/O for/IN/B-PP/B-PNP the/DT/B-NP/I-PNP users/NNS/I-NP/I-PNP who/WP/O/O wants/VBZ/B-VP/O to/TO/B-PP/O learn/VB/B-VP/O and/CC/O/O practice/NN/B-NP/O technologies/NNS/I-NP/O with/IN/B-PP/B-PNP the/DT/B-NP/I-PNP respective/JJ/I-NP/I-PNP environments/NNS/I-NP/I-PNP ././O/O

Exemple #12

0

Afficher le fichier

Fichier : question_answer_util.py Projet : tomshen/sherlock

    except:
        first_attempt = ""
    if first_attempt != "":
        return first_attempt
    try:
        second_attempt = parse_second(q, bigblob, uncommon, mode)
    except:
        second_attempt = ""
    if second_attempt != "":
        return second_attempt
    third_attempt = b.backup_answer(q, n.nps, raw)
    if third_attempt != "":
        return third_attempt
    if len(n.nps) > 0:
        return n.nps[0]
    else:
        return "Yes" #guess

if __name__ == "__main__":
    q = raw_input("Ask a question\n")
    q = TextBlob(q, np_extractor=extractor)
    print q.noun_phrases
    noun_phrases, idxs = n.get_nps_from_blob(q)
    print noun_phrases
    print q.words
    first =  noun_phrases[0]
    print n.get_np_tags(first, q)
    print q.tags
    print q.parse()
    #print p.extract_generic_relations(q)

Exemple #13

0

Afficher le fichier

sent = TextBlob("I haawve goood speling")
correct_sent = sent.correct()

w = Word("haave")
spellcheck = w.spellcheck()

#Get Word and Noun Phrase Frequencies
words = TextBlob('We are no longer together. We are enemies now.')
word_counts = words.word_counts
#You can specify whether or not the search should be case-sensitive (default is False).

#Translation and Language Detection
en_blob = TextBlob("You are my best friend")
pl_blob = en_blob.translate(to='pl')

blob = TextBlob("Mam na imię Piotr")
detected_lang = blob.detect_language()

#Parsing
text = TextBlob('I know You')
text_parse = text.parse()

#string
text = TextBlob("Hello World")
upper_text = text.upper()
find_world = text.find("World")

#ngrams
blob = TextBlob("Now is better than never.")
ngram = blob.ngrams(n=3)

Exemple #14

0

Afficher le fichier

def check_sarc(tweet):
    blob = TextBlob(tweet, parser=PatternParser())
    tokens = blob.parse().split(' ')
    dic = defaultdict(list)  # stores all phrases by category
    temp = ''
    phrases = []  # list of all phrases
    for t in tokens:
        if t.split('/')[2] == 'O':
            if temp:
                phrases.append((ctag, temp))
            dic[t.split('/')[2]].append(temp)
            temp = t.split('/')[0] + ' '
            ctag = t.split('/')[2]
        elif 'B-' in t.split('/')[2]:
            if temp:
                phrases.append((ctag, temp))
            temp = t.split('/')[0] + ' '
            dic[t.split('/')[2].split('-')[1]].append(temp)
            ctag = t.split('/')[2].split('-')[1]
        elif 'I-' in t.split('/')[2]:
            dic[t.split('/')[2].split('-')[1]][-1] += t.split('/')[0] + ' '
            temp += t.split('/')[0] + ' '
            ctag = t.split('/')[2].split('-')[1]
        else:
            pass
    if temp:
        phrases.append((ctag, temp))
    SF = []
    sf = []
    for i in phrases:
        if i[0] in ['NP', 'ADjP']:
            SF.append(i[1])
        elif i[0] == 'VP':
            sf.append(i[1])
    for i in range(len(phrases) - 1):
        if phrases[i][0] == 'NP' and phrases[i + 1][0] == 'VP':
            SF.append(phrases[i][1] + ' ' + phrases[i + 1][1])
        elif phrases[i][0] == 'ADVP' and phrases[i + 1][0] == 'VP':
            sf.append(phrases[i][1] + ' ' + phrases[i + 1][1])
        elif phrases[i][0] == 'VP' and phrases[i + 1][0] == 'ADVP':
            sf.append(phrases[i][1] + ' ' + phrases[i + 1][1])
        elif phrases[i][0] == 'ADJP' and phrases[i + 1][0] == 'VP':
            sf.append(phrases[i][1] + ' ' + phrases[i + 1][1])
        elif phrases[i][0] == 'VP' and phrases[i + 1][0] == 'NP':
            sf.append(phrases[i][1] + ' ' + phrases[i + 1][1])
    for i in range(len(phrases) - 2):
        if phrases[i][0] == 'VP' and phrases[i + 1][0] == 'ADVP' and phrases[
                i + 2][0] == 'ADJP':
            sf.append(phrases[i][1] + ' ' + phrases[i + 1][1] + ' ' +
                      phrases[i + 1][1])
        elif phrases[i][0] == 'VP' and phrases[i + 1][0] == 'ADJP' and phrases[
                i + 2][0] == 'NP':
            sf.append(phrases[i][1] + ' ' + phrases[i + 1][1] + ' ' +
                      phrases[i + 2][1])
        elif phrases[i][0] == 'ADVP' and phrases[
                i + 1][0] == 'ADJP' and phrases[i + 2][0] == 'NP':
            sf.append(phrases[i][1] + ' ' + phrases[i + 1][1] + ' ' +
                      phrases[i + 2][1])
    print SF
    print sf
    PSF = []
    NSF = []
    psf = []
    nsf = []
    for i in SF:
        blob = TextBlob(i)
        if blob.polarity > 0:
            PSF.append(i)
        elif blob.polarity < 0:
            NSF.append(i)
        elif blob.polarity == 0:
            pass
    for i in sf:
        blob = TextBlob(i)
        if blob.polarity > 0:
            psf.append(i)
        elif blob.polarity < 0:
            psf.append(i)
        elif blob.polarity == 0:
            pass
    print PSF
    print NSF
    print psf
    print nsf
    if (PSF and nsf) or (psf and NSF):
        return 1
    else:
        return 0

Exemple #15

0

Afficher le fichier

Fichier : 1_quickstart.py Projet : mohammadnasiruddin/text_blob

)
print(monty.word_counts['ekki'])  # through the word_counts dictionary
print(monty.words.count('ekki'))  # using the count() method
print(monty.words.count('ekki',
                        case_sensitive=True))  # specify case sensitivity
print(wiki.noun_phrases.count('python'))
# translation and language detection
# en_blob = TextBlob(u'Simple is better than complex.')
# print(en_blob.translate(to='es'))
# chinese_blob = TextBlob(u"美丽优于丑陋")
# print(chinese_blob.translate(from_lang="zh-CN", to='en'))
# b = TextBlob(u"بسيط هو أفضل من مجمع")
# print(b.detect_language())
# parsing
b = TextBlob("And now for something completely different.")
print(b.parse())
# textblobs are like python strings!
print(zen[0:19])
print(zen.upper())
print(zen.find("Simple"))
apple_blob = TextBlob('apples')
banana_blob = TextBlob('bananas')
print(apple_blob < banana_blob)
print(apple_blob == 'apples')
apple_blob + ' and ' + banana_blob
TextBlob("apples and bananas")
print("{0} and {1}".format(apple_blob, banana_blob))
# n-grams
blob = TextBlob("Now is better than never.")
print(blob.ngrams(n=3))
# getting start and end indices of sentences

Exemple #16

0

Afficher le fichier

Fichier : textblob_new.py Projet : nirabhratapaswi/nlp

from textblob.wordnet import VERB

raw_query = "Physics is a better subject to study than Mathematics. I like Physics more than I like Mathematics. Physicists are more intelligent than Mathematicians."

# Get input ready for use
query = TextBlob(raw_query)
print 'Query: ', query
tags = query.tags
print 'Tags: ', tags
nouns = query.noun_phrases
print 'Nouns: ', nouns
sentiment = query.sentiment
print 'Sentiment: ', sentiment
words = query.words
print 'Words: ', words
sentences = query.sentences
print 'Sentences: ', sentences
parse = query.parse()
print 'Parse: ', parse
language = query.detect_language()
print 'Language: ', language
# TODO : add spelling checks to correct the input sentences for better searches
corrected = query.correct()
print 'Corrected: ', corrected

# Search for results
w = Word('Octopus')
print '\nSynsets: ', w.synsets
print '\nDefinitions: ', w.definitions
print Word("hack").get_synsets(pos=VERB)

Exemple #17

0

Afficher le fichier

Fichier : grammartree.py Projet : yaron1000/libra

def get_value_instruction(sent):
    # Text blob part of speech identification algorithm
    blob = TextBlob(sent)
    blob.parse()

    # isolating tags of words in instruction
    tags = blob.tags

    decoded = ""

    # if an adjective is present then truth is set to True to activate the
    # correct pipeline
    truth = False
    for x in range(len(tags)):
        if "JJ" in tags[x]:
            truth = True
            break

    # when an adjective exists this pipeline is run
    if truth:
        try:
            for x in range(len(tags)):
                if "JJ" in tags[x]:
                    q = x + 1
                    decoded += sent.split()[x] + "_"

                    # while the word after the adjective is any of these parts
                    # of speech they're added to the instruction final
                    while ("VBN" in tags[q] or "VBG" in tags[q]
                           or "NN" in tags[q] or "NNS" in tags[q]
                           or "RB" in tags[q]
                           or ("NNS" in tags[q] and "IN" in tags[q + 1])):
                        decoded += sent.split()[q] + "_"
                        # if an interjection is present then you want to skip
                        # over it
                        if ("IN" in tags[q + 1]):
                            decoded += sent.split()[q + 1] + "_"
                            q += 2
                            continue
                        q += 1
                        if q >= len(tags):
                            break

        except BaseException:
            pass

    # if there's no adjective present you want to run this pipeline
    else:
        try:
            # you iterate through the tags and identify certain parts of speech
            for x in range(len(tags)):
                if x < len(tags) - 1:
                    if "IN" in tags[x + 1]:
                        decoded += sent.split()[x] + "_"
                        decoded += sent.split()[x + 1] + "_"
                        x = x + 2
                        continue
                # if any of these parts of speech are in the instruction then
                # you want to extract them. The parts of speech can be found:
                # https://repository.upenn.edu/cgi/viewcontent.cgi?article=1603&context=cis_reports
                if "NN" in tags[x] or "NNS" in tags[x] or "RB" in tags[
                        x] or "VBG" in tags[x] or "VBN" in tags[x]:
                    decoded += sent.split()[x] + "_"
                else:
                    continue
        except BaseException:
            print(x)
            print(tags[x])
            print("Please try re-typing your sentence")

    decoded = decoded[:-1]

    # If it's two words then you just choose the second word: we're assuming two words = predict apples, even if this is false in the example apples red
    # similarity identificatin will still pick up on the right column

    if len(sent.split()) == 2:
        decoded = sent.split()[1]

    return decoded

Exemple #18

0

Afficher le fichier

def process_single_question(q):
    """
    Check EverNote 180127 for detail
    :param q: input question
    :return: token_list, a list of raw words
             chunk_pos_list, [(st, ed)] indicating the position [st, ed)
    """
    blob = TextBlob(q)
    shallow_parse = blob.parse().replace('\n', ' ').split(' ')
    LogInfo.logs(shallow_parse)
    chunk_tup_list = []
    for item in shallow_parse:
        spt = item.split('/')
        token = spt[0]
        chunk_tag = spt[2]
        chunk_tup_list.append([token, chunk_tag])

    while True:                     # deal with .
        tups_len = len(chunk_tup_list)
        dot_idx = -1
        for idx, tup in enumerate(chunk_tup_list):
            if tup[0] == u".":
                dot_idx = idx  # capture the index of '
                break
        if dot_idx == -1:
            break
        assert dot_idx > 0
        chunk_tup_list[dot_idx-1][0] += chunk_tup_list[dot_idx][0]
        del chunk_tup_list[dot_idx]

    while True:                     # deal with '
        tups_len = len(chunk_tup_list)
        quote_idx = -1
        for idx, tup in enumerate(chunk_tup_list):
            if tup[0] == u"'":
                quote_idx = idx     # capture the index of '
                break
        if quote_idx == -1:
            break
        assert quote_idx > 0
        if quote_idx < tups_len - 1 and chunk_tup_list[quote_idx+1][0] == u"s":
            chunk_tup_list[quote_idx][0] += chunk_tup_list[quote_idx+1][0]
            del chunk_tup_list[quote_idx+1]
        else:
            chunk_tup_list[quote_idx-1][0] += chunk_tup_list[quote_idx][0]
            chunk_tup_list[quote_idx-1][0] += chunk_tup_list[quote_idx+1][0]
            del chunk_tup_list[quote_idx+1]
            del chunk_tup_list[quote_idx]

    token_list = [tup[0] for tup in chunk_tup_list]
    chunk_pos_list = []
    st = -1
    for idx in range(len(chunk_tup_list)):
        tag = chunk_tup_list[idx][1]
        if tag in ('B-NP', 'I-NP'):
            if st != -1:
                continue
            else:
                st = idx
        else:
            if st != -1 and token_list[st].lower() not in wh_set:
                chunk_pos_list.append((st, idx))
            st = -1
    #     if tag  'I-NP':
    #         continue
    #     else:
    #         if st != -1 and token_list[st].lower() not in wh_set:
    #             chunk_pos_list.append((st, idx))
    #         st = -1
    #         if tag == 'B-NP':
    #             st = idx

    if st != -1 and token_list[st].lower() not in wh_set:
        chunk_pos_list.append((st, len(chunk_tup_list)))
    return token_list, chunk_pos_list

Exemple #19

0

Afficher le fichier

Fichier : processing.py Projet : lvglvg/TCAMP-WEEK2

###################################
# 对于文本特征构建组成新的训练文件 #
#################################
from textblob import TextBlob
with open('f1.txt', 'a', encoding='utf-8') as f1in:
    with open('f2.txt', 'a', encoding='utf-8') as f2in:
        with open('f3.txt', 'a', encoding='utf-8') as f3in:
            with open('ctrain.txt', encoding='utf-8') as f:
                count = 0
                for line in f.readlines():
                    count += 1
                    text = line.split('\n')[0].split('\t')[1]
                    tag = TextBlob(text)
                    # print(tag.tags)
                    print(tag.parse().split(' '))
                    f = tag.parse().split(' ')
                    for i in f:
                        i = i.split('/')
                        f1in.write(i[1] + ' ')
                        f2in.write(i[2] + ' ')
                        f3in.write(i[3] + ' ')
                f1in.write('\n')
                f2in.write('\n')
                f3in.write('\n')

                # print(line)
                # f count==10:break
from textblob import TextBlob
with open('ftmp_test.txt', 'w', encoding='utf-8') as fin:
    with open('tmp_test', encoding='utf-8') as f:

Exemple #20

0

Afficher le fichier

Fichier : textblob_quickstart.py Projet : jeffsilverm/CascadiaIT_2017_NLP

for sent in [
        "A woman without her man is nothing. ",
        "A woman, without her man, is nothing.",
        "A woman: without her, man is nothing."
]:
    wiki = TextBlob(sent)
    print("Wiki tags: %s : " % sent, wiki.tags)

for sent in [
        "For more information see the Commercial Driver Guide available at www.dol.wa.gov or at any driver licensing office.",
        "You can get an instruction permit or a driver license at any driver licensing office.",
        "Some offices do not offer testing so before you come in be sure the one you plan to visit offers the testing you need.",
        "In an effort to reduce wait times legislation was passed to allow driver training schools licensed by the Department of Licensing and school districts that offer a traffic safety education program under the supervision of the Office of the Superintendent of Public Instruction to administer driver licensing examinations.",
        "A list of approved schools as well as driver licensing offices can be found on our website.",
        "Please contact an approved school for their specific testing requirements.",
        "To be issued an instruction permit you must: ** be at least 15-1/2 years old (or 15 years old if enrolled in an approved driver-training course); ** pass the knowledge test (unless enrolled in an approved driver-training course); ** complete the vision and medical screenings and; ** pay an application/examination fee.",
        "If you pay an application/examination fee and are ( at least 15-1/2 years old or ( 15 years old and enrolled in an approved driver-training course) ) and ( pass the knowledge test or are enrolled in an approved driver-training course) and complete the ( vision and medical screenings) then you will be issued an instruction permit.",
        "If you are under 18 you must also bring your parent or guardian with you to the licensing office when you apply."
        # They must show proof of identity and proof of relationship to you and must also sign a Parental Authorization Affidavit.
        # When last names are different we require more documents 1-3  proving relationship.
        # The permit is valid for one year and you can renew it.
        # If you are enrolled in an approved driver-training course you can get an instruction permit at age 15.
        # You will need a waiver from your school allowing you to apply for the permit up to 10 days before the class starts.
]:
    wiki = TextBlob(sent)
    print("\nWiki tags: %s : " % sent, wiki.tags)
    print("Noun phrase extraction", wiki.noun_phrases)
    print("Parsed", wiki.parse())
    print("n-grams", wiki.ngrams(n=3))

Exemple #21

0

Afficher le fichier

def main(argv):

    #with sr.Microphone() as source:
    finalTranscript="";
    translationSuccess=0;
    optionChoosen ="0";
    exitOption="0"
    welcomeTextSpeak=0
    threeOptionAware=0
    while exitOption in ("0"):
        m = sr.Microphone();
        optionChoosen ="0";
        while optionChoosen not in ("1", "2", "3"):
            if welcomeTextSpeak == 0:
                welcomeTextSpeak = welcomeTextSpeak+1;
                welcomeText="Hello user. Welcome to Shriram's Voice based Natural Language Processing Tool"
                print(welcomeText)
                Text2SpeechRequest("for_welcome",welcomeText);
            
            silenceText="Please wait while I analyze your environment and self adjust myself to better undersand what you say"
            print(silenceText)
            Text2SpeechRequest("for_silence",silenceText);
            r = sr.Recognizer()
            with m as source: r.adjust_for_ambient_noise(source)
            thresholdText="Optimal Energy Threshold set to: {}".format(r.energy_threshold)
            print (thresholdText);
            
            if(r.energy_threshold > 500):
                environmentNoisyText="Your environment is noisy so please help me by speaking a bit loud"
                print(environmentNoisyText);
                Text2SpeechRequest("for_notifying_noisy_environment",environmentNoisyText);

                
            with sr.Microphone( device_index = None, sample_rate = 48000 ) as source:
                #----------------------------------------------------------------ASSISTING FOR OTHER OPTIONS--------------------------------------------------#   
                sayCommandText="Can you please say your request now"
                print(sayCommandText)
                Text2SpeechRequest("for_requesting_command",sayCommandText);
                
                audio = r.listen(source)

                waitForUnderstandingText="Please wait while I try to understand what you just said"
                print(waitForUnderstandingText)
                Text2SpeechRequest("for_waiting_to_undersanding_input",waitForUnderstandingText)
                if threeOptionAware==0:
                    threeOptionAware=threeOptionAware+1;
                    learningRequestText= "I am learning to understand how you speak, so I am going to provide you with two different options that represents what I understood.";
                    print(learningRequestText);
                    Text2SpeechRequest("for_notifying_about_2_options",learningRequestText);
                
                transcriptGoogle= GetGoogleTranscripts(r,sr,audio);
                if not transcriptGoogle =='None':
                    print("Option # 1: Did you spoke the following words? : " + transcriptGoogle)
                else:
                    print("Sorry I cannot recognize what you said. Please try again");
                    finalTranscript="";

                transcriptIbm= GetIBMWatsonTranscripts(r,sr,audio);
                if not transcriptIbm == 'None':
                    print("Option # 2: Did you spoke the following words? : " + transcriptIbm);
                else:
                    print("Sorry I cannot recognize what you said. Please try again")
                    finalTranscript="";
                
                choiceText='Please say the option number which closely relates to what you just said. If you are not satisfied please say restart.'
                print(choiceText)
                Text2SpeechRequest("for_getting_transcript_choice",choiceText);
                
                audioListenTranscriptChoice = r.listen(source)
                transcriptGoogleListenTranscriptChoice= GetGoogleTranscripts(r,sr,audioListenTranscriptChoice);
                
                if re.search( "1" , transcriptGoogleListenTranscriptChoice.lower()):
                    optionChoosen="1"
                    finalTranscript=transcriptGoogle;
                elif re.search( "2" , transcriptGoogleListenTranscriptChoice.lower()):
                    optionChoosen="2";
                    finalTranscript=transcriptIbm;
                else:
                    print("Restarting to get your request again and understand your request thoroughly");
                    optionChoosen="0"
                
                if optionChoosen in ("1", "2"):
                    if len(finalTranscript) > 0:
                        nltkOpsText= "Lets perform some natural language analysis on the text you had just talked ";
                        print(nltkOpsText)
                        Text2SpeechRequest("for_notifying_nltk_operations",nltkOpsText);
                        print("Parts Of Speech Tags in your sentence are :")
                        print(str(nltk.pos_tag(nltk.word_tokenize(finalTranscript))))
                        blob = TextBlob(finalTranscript);
                        print("Grammatical structure of your sentence is :");
                        print(blob.parse())
                        overAllSentiment = 0;
                        for sentence in blob.sentences:
                            overAllSentiment=overAllSentiment+sentence.sentiment.polarity;
                        print("Sentiment score of your sentence is :");
                        print(overAllSentiment)
                        if overAllSentiment > 0.0:
                            positiveText="You have spoken a positive sentence. Seems you are happy !";
                            print(positiveText);
                            Text2SpeechRequest("for_notifying_positive_sentiment",positiveText);
                        elif overAllSentiment < 0.0:
                            negativeText="You have spoken a negative sentence. Seems you are not happy.";
                            print(negativeText);
                            Text2SpeechRequest("for_notifying_negative_sentiment",negativeText);
                        else:
                            neutralText="You have spoken a neutral sentence.";
                            print(neutralText);
                            Text2SpeechRequest("for_notifying_neutral_text",neutralText);
                            
                        exitOptionText="Are you satisfied with the results that I had presented to you? Do you want me to help you with anything else?"
                        print(exitOptionText);
                        Text2SpeechRequest("for_exit_choice_getting",exitOptionText)
                        audioListenExit = r.listen(source)
                        transcriptGoogleListenExit= GetGoogleTranscripts(r,sr,audioListenExit);
                        if re.search( "yes|yeah|fine|yup|up|ok|satisfied" , transcriptGoogleListenExit.lower()):
                            exitOption="0";
                        elif re.search( "no|thats ok|i am good|done|thanks" , transcriptGoogleListenExit.lower()):
                            exitOption="1";
                else:
                    optionChoosen="0"
    sr.Microphone.MicrophoneStream.close;
    goodByeText="Thank you for using Shriram's NLP Application!. Hope I was of help to you. Have a great day"
    print(goodByeText)
    Text2SpeechRequest("for_goodbye_note",goodByeText)

Exemple #22

0

Afficher le fichier

Fichier : pblga.py Projet : rkp768/BigSarc

def check_sarc(tweet):
	blob = TextBlob(tweet, parser=PatternParser())
	tokens = blob.parse().split(' ')
	dic = defaultdict(list)	# stores all phrases by category
	temp = ''
	phrases = []	# list of all phrases
	for t in tokens:
		if t.split('/')[2] == 'O':
			if temp:
				phrases.append((ctag,temp))			
			dic[t.split('/')[2]].append(temp)
			temp = t.split('/')[0]+' '
			ctag = t.split('/')[2]
		elif 'B-' in t.split('/')[2]:
			if temp:
				phrases.append((ctag,temp))
			temp = t.split('/')[0]+' '
			dic[t.split('/')[2].split('-')[1]].append(temp)
			ctag = t.split('/')[2].split('-')[1]
		elif 'I-' in t.split('/')[2]:
			dic[t.split('/')[2].split('-')[1]][-1] += t.split('/')[0]+' '
			temp += t.split('/')[0]+' '
			ctag = t.split('/')[2].split('-')[1]
		else:
			pass
	if temp:
		phrases.append((ctag,temp))
	SF = []
	sf = []
	for i in phrases:
		if i[0] in ['NP','ADjP']:
			SF.append(i[1])
		elif i[0]=='VP':
			sf.append(i[1])
	for i in range(len(phrases)-1):
		if phrases[i][0]=='NP' and phrases[i+1][0]=='VP':
			SF.append(phrases[i][1]+' '+phrases[i+1][1])
		elif phrases[i][0]=='ADVP' and phrases[i+1][0]=='VP':
			sf.append(phrases[i][1]+' '+phrases[i+1][1])
		elif phrases[i][0]=='VP' and phrases[i+1][0]=='ADVP':
			sf.append(phrases[i][1]+' '+phrases[i+1][1])
		elif phrases[i][0]=='ADJP' and phrases[i+1][0]=='VP':
			sf.append(phrases[i][1]+' '+phrases[i+1][1])
		elif phrases[i][0]=='VP' and phrases[i+1][0]=='NP':
			sf.append(phrases[i][1]+' '+phrases[i+1][1])
	for i in range(len(phrases)-2):
		if phrases[i][0]=='VP' and phrases[i+1][0]=='ADVP' and phrases[i+2][0]=='ADJP':
			sf.append(phrases[i][1]+' '+phrases[i+1][1]+' '+phrases[i+1][1])
		elif phrases[i][0]=='VP' and phrases[i+1][0]=='ADJP' and phrases[i+2][0]=='NP':
			sf.append(phrases[i][1]+' '+phrases[i+1][1]+' '+phrases[i+2][1])
		elif phrases[i][0]=='ADVP' and phrases[i+1][0]=='ADJP' and phrases[i+2][0]=='NP':
			sf.append(phrases[i][1]+' '+phrases[i+1][1]+' '+phrases[i+2][1])
	print SF
	print sf	
	PSF = []
	NSF = []
	psf = []
	nsf = []
	for i in SF:
		blob = TextBlob(i)
		if blob.polarity > 0:
			PSF.append(i)
		elif blob.polarity < 0:
			NSF.append(i)
		elif blob.polarity == 0:
			pass
	for i in sf:
		blob = TextBlob(i)
		if blob.polarity > 0:
			psf.append(i)
		elif blob.polarity < 0:
			psf.append(i)
		elif blob.polarity == 0:
			pass	
	print PSF
	print NSF
	print psf
	print nsf
	if (PSF and nsf) or (psf and NSF):
		return 1
	else:
		return 0

Exemple #23

0

Afficher le fichier

Fichier : situation.py Projet : ak795/Academic-Project

from nltk import Tree
from nltk.grammar import CFG
from nltk.parse.generate import generate, demo_grammar
from nltk import CFG
import string , re
import wordpolarity

s = "I do bad things for good people."
mystring = s.translate(None , string.punctuation)
#print s.parse()

b = TextBlob(mystring)
print b.sentiment.polarity


g = str(b.parse())
x = g.split()

word_list = []

mystr = mystring.split()

space_list = x
main_list = []
#print (space_list)

for word in space_list:
    new_list = word.split("/")
    main_list.append(new_list)

a={}

Exemple #24

0

Afficher le fichier

__author__ = 'cloudera'

from senticnet.senticnet import Senticnet
from textblob import TextBlob

sentence = "One of the very first Apple 1 computers, worth about 500,000, goes on sale later this month at Christie's auction house, the latest vintage tech sale."

sn = Senticnet()

concept_info = sn.concept('love')
print 'sn.concept(love) = ', concept_info

polarity = sn.polarity('love')
print 'polarity(love) = ', polarity

semantics = sn.semantics('love')
print 'semantics = ', semantics

sentics = sn.sentics('love')
print 'sentics = ', sentics

sentenceBlob = TextBlob(sentence)
print sentenceBlob.parse()
print sentenceBlob.sentiment

sentenceConcept = sn.concept(sentence)
print sentenceConcept

Exemple #25

0

Afficher le fichier

Fichier : TextBlog_Tutorial_examples.py Projet : luciemarty/CSTwitterAnalysis

b = TextBlob("I havv goood speling!")
print(b.correct())

#w = Word('havv')
#print(w.spellcheck())

monty = TextBlob("We are no longer the Knights who say Ni. "
                    "We are now the Knights who say Ekki ekki ekki PTANG.")
print(monty.word_counts['ekki'])
print(monty.words.count('ekki'))
print(monty.words.count('ekki', case_sensitive=True)) #Case_sensitive-->Sensible aux majuscules/minuscules


en_blob = TextBlob(u'The sooner, the better')
print(en_blob.translate(to='fr'))


b = TextBlob(u"今年の夏の間に日本に行きました")
print(b.translate(from_lang='ja',to='fr'))

c = TextBlob("And now for something completely different.")
print(c.parse())

blob = TextBlob("Now is better than never.")
blob.ngrams(n=3)

for s in zen.sentences:
    print(s)
    print("---- Starts at index {}, Ends at index {}".format(s.start, s.end))

Exemple #26

0

Afficher le fichier

blob.tokens

#This is an alternative way
tokenizer = BlanklineTokenizer()
blob = TextBlob("A token\n\nof appreciation")
blob.tokenize(tokenizer)

# Noun phrase chunkers
from textblob.np_extractors import ConllExtractor
extractor = ConllExtractor()
blob = TextBlob("Python is a high-level programming language.", np_extractor=extractor)
blob.noun_phrases

# POS taggers
from textblob.taggers import NLTKTagger
nltk_tagger = NLTKTagger()
blob = TextBlob("Tag! You're It!", pos_tagger=nltk_tagger)
blob.pos_tags

# Parser
from textblob.parsers import PatternParser
blob = TextBlob("Parsing is fun.", parser=PatternParser())
blob.parse()

# TextBlob that share same model
rom textblob.taggers import NLTKTagger
tb = Blobber(pos_tagger=NLTKTagger())
blob1 = tb("This is a blob.")
blob2 = tb("This is another blob.")
blob1.pos_tagger is blob2.pos_tagger

Exemple #27

0

Afficher le fichier

#Words can be lemmatized by the lemmatize method, but notice that the TextBlog lemmatize method is
# inherited from NLTK Word Lemmatizer, and the default POS Tag is "n", if you want lemmatize other
#pos tag words, you need specify it:
nlpblob.words[138].pluralize().lemmatize()
nlpblob.words[21].pluralize().lemmatize()

#9）Spelling Correction
#TextBlob Spelling correction is based on Peter Norvig"s "How to Write a Spelling Corrector", which is
# implemented in the pattern library:
b = TextBlob("I havv good speling!")
b.correct()

#Word objects also have a spellcheck() method that returns a list of (word, confidence) tuples with spelling suggestions:

#9) Parsing: TextBlob parse method is based on pattern parser:
nlpblob.parse()

#10) Translation and Language Detection: By Google"s API:
#Detect
nlpblob.detect_language()

nlpblob.translate(to="hi")
nlpblob.translate(to="kn") # es fr
nlpblob.translate(to="fr") # es fr
nlpblob.translate(to="zh")

# Few more example. How to get keyword for any particular language
non_eng_blob = TextBlob("हिन्दी समाचार की आधिकारिक वेबसाइट. पढ़ें देश और दुनिया की ताजा ख़बरें")
non_eng_blob.detect_language()

non_eng_blob = TextBlob("ಮುಖ್ಯ ವಾರ್ತೆಗಳು ಜನಪ್ರಿಯ")