Python TextBlob 예제들, textblob.TextBlob Python 예제들

예제 #1

0

파일 보기

파일: _DEPRECATED_syncrooms_autotranslate.py 프로젝트: 0xD3ADB33F/hangoutsbot

def _translate_message(bot, broadcast_list, context):
    if context and "autotranslate" in context:
        _autotranslate = context["autotranslate"]
        origin_language = _get_room_language(bot, _autotranslate["conv_id"])
        for send in broadcast_list:
            target_conversation_id = send[0]
            response = send[1]
            target_language = _get_room_language(bot, target_conversation_id)
            if origin_language != target_language:
                logger.debug("translating {} to {}".format(origin_language, target_language))
                translated = _autotranslate["event_text"]
                try:
                    en_blob = TextBlob(_autotranslate["event_text"])
                    translated = "{0}".format(en_blob.translate(to=target_language))
                    #translated = gs.translate(_autotranslate["event_text"], target_language
                except Exception:
                    logger.debug("Translation Api returned string unchanged")
                else:
                    pass
                finally:
                    if _autotranslate["event_text"] != translated:
                    # mutate the original response by reference
                        response.extend([
                            hangups.ChatMessageSegment('\n', hangups.SegmentType.LINE_BREAK),
                            hangups.ChatMessageSegment('(' + translated + ')')])

예제 #2

0

파일 보기

파일: library_search.py 프로젝트: ZeroPage/zp-library-old

def update_book(book):
    blob = TextBlob(book.description)

    if blob.detect_language() == 'en':
        description = ''
        nouns = filter(lambda x: x[1] == 'NN' or x[1] == 'NNP', blob.tags)

        for noun, tag in nouns:
            description += noun + " "

            if len(noun) > 2:
                description += TextBlob(noun).translate(to='ko').string + " "

    else:
        description = book.description

    book_document = search.Document(
        doc_id=book.ISBN,
        fields=[
            search.TextField(name='title', value=remove_punc(book.title)),
            search.TextField(name='author', value=remove_punc(book.author)),
            search.TextField(name='description', value=remove_punc(description))
        ]
    )

    index = get_book_index()
    index.put(book_document)

예제 #3

0

파일 보기

파일: text_processors.py 프로젝트: potatochip/kojak

def tokenize(text, spell=False, stem=False, lemma=False, lower=False, stop=False):
    # lowercase, remove non-alphas and punctuation
    b = TextBlob(unicode(text, 'utf8'))

    if spell:
        b = b.correct()
    words = b.words
    if lower:
        words = words.lower()
    if lemma:
        words = words.lemmatize()
    if stem:
        words = [stemmer.stem(w) for w in words]
    if stop:
        tokens = [w.encode('utf-8') for w in words if w.isalpha() and w not in stopwords]
    else:
        tokens = [w.encode('utf-8') for w in words if w.isalpha()]
    # letters_only = re.sub("[^a-zA-Z]", " ", text)

    # # ngrams
    # temp_list = []
    # for i in range(1,ngram+1):
    #     temp = [list(i) for i in TextBlob(' '.join(tokens)).ngrams(i)]
    #     try:
    #         if len(temp[0]) == 1:
    #             temp_list.extend([i[0] for i in temp])
    #         else:
    #             for i in temp:
    #                 temp_list.append(tuple(i))
    #     except:
    #         pass
    # return temp_list
    return tokens

예제 #4

0

파일 보기

파일: model.py 프로젝트: matheuscas/fuzzy_opinion_mining

def tag_documents_text(client):

	documents = client['cornell']['documents']
	for doc in documents.find():
		blob = TextBlob(doc['text'], pos_tagger=PerceptronTagger())
		parsed_blob = blob.parse()
		documents.update({'name':doc['name']},{'$set':{'parsed_perceptron':parsed_blob}})

예제 #5

0

파일 보기

파일: extract.py 프로젝트: MattL920/nasaMining

def extract(ngrams, dataset, doc_id):
    # extract keywords
    print 'Extracting keywords'
    for i, ngram in enumerate(ngrams):
        doc = doc_id[i]

        if field not in dataset[doc]:
            dataset[doc][field] = set()

            if doc > 0 and doc % 1000 == 0:
                print '\t', doc

        for kw in filter(lambda k: '_' in k, ngram):
            keyword = kw.replace('_', ' ')

            kw_tb = TextBlob(keyword)

            # filter out punctuation, etc (make sure that there are two non-punc words)
            if len(kw_tb.words) < 2:
                continue

            # add keywords which are all proper nouns
            distinct_tags = set(t[1] for t in kw_tb.tags)
            if distinct_tags - {'NNP', 'NNPS'} == {}:
                dataset[doc][field].add(kw_tb.lower())
                continue

            # add noun phrases
            for np in kw_tb.lower().noun_phrases:
                dataset[doc][field].add(np)

    return kw_set_to_list(dataset)

예제 #6

0

파일 보기

파일: Amanda.py 프로젝트: Amanda-Clark/IRC_Bot_Code

def hi(bot, trigger):
    lang_codes = ['af', 'ga', 'sq', 'it', 'ar',	'ja', 'az', 'kn', 'eu', 'ko', 'bn', 'la', 'en']
    trans = TextBlob('Greetings dear '+trigger.nick+'on the road of life ')
    ind = randint(0, 12)
    trans = trans.translate(to=lang_codes[ind])
    saying = str(trans)
    bot.say(saying)

예제 #7

0

파일 보기

파일: translate.py 프로젝트: bet0x/smartbot

    def on_command(self, msg, stdin, stdout, reply):
        # pre-process args
        # this might mess up if "from" or "to" is left out and
        # the message contains "from" or "to"
        self._push_character(msg["args"], "from", "-", 1)
        self._push_character(msg["args"], "to",   "-", 1)

        try:
            args = self.parser.parse_args(msg["args"][1:])
        except (argparse.ArgumentError, SystemExit):
            return

        # get message from the appropriate place
        if args.message:
            message = " ".join(args.message)
        else:
            message = stdin.read().strip()

        # translate
        from_lang = args.from_language
        to_lang   = args.to_language
        message   = TextBlob(message)
        try:
            translated = message.translate(from_lang=from_lang, to=to_lang)
        except:
            pass
        else:
            print(translated, file=stdout)

예제 #8

0

파일 보기

파일: translate.py 프로젝트: firecurious/kokoro

    def _german(self, text):
        blob = TextBlob(text)

        try:
            return str(blob.translate(to="en"))
        except:
            return text

예제 #9

0

파일 보기

파일: status_wall.py 프로젝트: fourvvvv/dt-twitter-network

def process_status(status, lang):
    text = ""

    # translate
    if lang == 'en':
        text = status['text']
    else:
        blob = TextBlob(status['text'])
        try:
            text = str(blob.translate())
        except textblob.exceptions.NotTranslated:
            text = status['text']

    # sentiment analysis
    sentiment = TextBlob(text).sentiment

    return {
          "created_at": 1000 * int(time.mktime((status['created_at']).timetuple()))
        , "id_str": status['id_str']
        , "text": text
        , "sentiment": {"polarity": sentiment[0], "subjectivity": sentiment[1]}
        , "retweet_count": status['retweet_count']
        , "in_reply_to_status_id_str": status['in_reply_to_status_id_str']
        , "geo": status['geo']
        , "retweeted": status['retweeted']
        , "in_reply_to_user_id_str": status['in_reply_to_user_id_str']
            }

예제 #10

0

파일 보기

파일: PhraseClassify.py 프로젝트: ManasMahanta/Final_project

def matchRhyme(word1,word2):
    #str1 = "tekst"
    #word1+="टेक्स्ट"
    str1 = ""
    str2 = ""

    word1+= "टेक्स्ट"
    word2+= "टेक्स्ट"

    str1 += " " + word1
    str2 += " " + word2

    hindi_blob1 = TextBlob(str1)
    hindi_blob2 = TextBlob(str2)

    transliteratedtxt1 = hindi_blob1.translate(from_lang="hi", to='en')
    transliteratedtxt1=transliteratedtxt1.substring[:-5]
    transliteratedtxt2 = hindi_blob2.translate(from_lang="hi", to='en')
    transliteratedtxt2= transliteratedtxt2.substring[:-5]

    word1Index= len(transliteratedtxt1)
    word2Index= len(transliteratedtxt2)
    ##Matcing last charater if they are same!!
    if (transliteratedtxt1[word1Index-1] == transliteratedtxt2[word2Index-1]):

        #rhymeMeter=3;
        ##Matching if second Last character is any of the Matras!!
        if ( ((transliteratedtxt1[word1Index-2]=='a') and (transliteratedtxt2[word2Index-2]=='a')) or ((transliteratedtxt1[word1Index-2]=='e') and (transliteratedtxt2[word2Index-2]=='e'))or ((transliteratedtxt1[word1Index-2]=='o') and (transliteratedtxt2[word2Index-2]=='o')) or ((transliteratedtxt1[word1Index-2]=='i') and (transliteratedtxt2[word2Index-2]=='i')) or ((transliteratedtxt1[word1Index-2]=='u') and (transliteratedtxt2[word2Index-2]=='u')) ):
            rhymeMeter=5
        else:
            if(transliteratedtxt1[word1Index-2]!=transliteratedtxt1[word1Index-2]):
                rhymeMeter=4
    return rhymeMeter

예제 #11

0

파일 보기

파일: textutils.py 프로젝트: ubuntor/cslbot

def gen_translate(msg, fromlang=None, outputlang='en'):
    try:
        blob = TextBlob(msg)
        blob = blob.translate(from_lang=fromlang, to=outputlang)
        return str(blob)
    except NotTranslated:
        return msg

예제 #12

0

파일 보기

파일: crawler.py 프로젝트: John-Keating/investa_gator_v2

    def scrape(self,links=[],ads=True,translator=False):
        responses = []
        values = {}
        data = []
        
        if ads:
            for link in links:
                r = requests.get(link)
                responses.append(r)
        else:
            for link in links:
                r = requests.get(link)
                text = unidecode(r.text)
                html = lxml.html.fromstring(text)

                links = html.xpath("//div[@class='cat']/a/@href")
                for link in links:
                    if len(self.base_urls) > 1 or len(self.base_urls[0]) > 3:
                        time.sleep(random.randint(5,27))
                    try:
                        responses.append(requests.get(link))
                        print link
                    except requests.exceptions.ConnectionError:
                        print "hitting connection error"
                        continue

        for r in responses:
            text = r.text
            html = lxml.html.fromstring(text)
            values["title"] = html.xpath("//div[@id='postingTitle']/a/h1")[0].text_content()
            values["link"] = unidecode(r.url)
            values["new_keywords"] = []
            try:
                values["images"] = html.xpath("//img/@src")
            except IndexError:
                values["images"] = "weird index error"
            pre_decode_text = html.xpath("//div[@class='postingBody']")[0].text_content().replace("\n","").replace("\r","")  
            values["text_body"] = pre_decode_text 
            try:
                values["posted_at"] = html.xpath("//div[class='adInfo']")[0].text_content().replace("\n"," ").replace("\r","")
            except IndexError:
                values["posted_at"] = "not given"
            values["scraped_at"] = str(datetime.datetime.now())
            body_blob = TextBlob(values["text_body"])
            title_blob = TextBlob(values["title"])
            values["language"] = body_blob.detect_language() #requires the internet - makes use of google translate api
            values["polarity"] = body_blob.polarity
            values["subjectivity"] = body_blob.sentiment[1]
            if values["language"] != "en" and not translator:
                values["translated_body"] = body_blob.translate(from_lang="es")
                values["translated_title"] = title_blob.translate(from_lang="es")
            else:
                values["translated_body"] = "none"
                values["translated_title"] = "none"
            text_body = values["text_body"]
            title = values["title"]
            values["phone_numbers"] = self.phone_number_parse(values)
            data.append(values)
        
        return data

예제 #13

0

파일 보기

파일: db_fill.py 프로젝트: anmousyon/python

def getKeywords(text, useless):
	text = TextBlob(text)
	for word in text.words:
		for bad in useless:
			if word is bad:
				text.remove(word)
	return text

예제 #14

0

파일 보기

파일: current.py 프로젝트: tinypirates/minniebot-mk2

def answer(question):
    global IsAnswer,detected,u
    IsAnswer = True
    DetectLang = TextBlob(question)
    detected = DetectLang.detect_language()
    if detected == 'en':
        print("language detected: en")
        u = 'en'
        print(len(words),"len(words)")
        low = question.lower()
        questions = re.sub('[^\w]',' ',low).split() #list
        BadWords(questions)
        print(questions)
        def writeout(words,question,IsAnswer):
            r = []
            if len(words) > 3000:
                a1 = len(questions)
                for x in range(0,a1):
                    words.remove(random.choice(words))
                print(len(words),"len(words)")
            else:
                pass
            os.remove('newwords.txt')
            file = open('newwords.txt','w')
            words.extend(questions)
            r.extend(words)
            s = ' '.join(r)
            file.write(s)
        writeout(words,question,IsAnswer)
        randomthought()
    else:
        u = detected
        print("language detected:",u)
        randomthought()

예제 #15

0

파일 보기

파일: sentiment_analysis_3.py 프로젝트: code-11/BloombergSarcasm

def check_speech_patterns(text):
	PATTERNS={
		("PRP","DT"),
		("CC","VBD"),
		("VB","RB"),
		("VB","PRP$"),
		("NN","POS"),
		("NN","MD","VB"),
		("VB","PRP$","NN"),
		("MD","VB","VBN"),
		("NN","IN","PRP$"),
		("IN","PRP$","JJ"),
		("VB","PRP","DT","NN"),
		("VBD","RB","JJ","NNS"),
		("NNP","NNP","NNP","NNP"),
		("PRP$","NN","CC","PRP"),
		("NNP", "NNP", "NNP", "NNP", "NNP"), 
		("NN", "IN", "DT", "NNS", "IN"),
		("PRP$", "NN", "IN", "DT", "NN"),
		("IN", "DT", "NN", "WDT", "VBZ"),
		("NN", "IN", "PRP$", "JJ", "NN"),
		("DT", "NN", "IN", "NN", "NN")
	}
	blob= TextBlob(text)
	for i in range (2,6):
		ngrams=blob.ngrams(n=i)
		for gram in ngrams:
			str_gram=" ".join(gram)
			gram_blob=TextBlob(str_gram)
			tags=gram_blob.tags
			lst1, lst2 = zip(*tags)
			if lst2 in PATTERNS:
				return True
	return False

예제 #16

0

파일 보기

파일: sentiment_analysis_2.py 프로젝트: code-11/BloombergSarcasm

def sentiment_pattern(text, gram_n=6):
	blob= TextBlob(text)
	ngrams=blob.ngrams(n=gram_n)
	sentiment_list=[]
	datalist = []
	for gram in ngrams:
		str_gram=" ".join(gram)
		print str_gram
		data = (0, 0, str_gram, None)
		datalist.append(Datapoint(*data))

		#gram_blob=TextBlob(str_gram)
		#sentiment=gram_blob.sentiment[0]
		#if sentiment>0:
		#	sentiment=1
		#elif sentiment<0:
		#	sentiment=-1
		#sentiment_list.append(sentiment)

	predictor = pickle.load(open("predictor.pickle", "rb" ) )
	prediction = predictor.predict(datalist)

	for sentiment in prediction:
		sentiment = int(sentiment)
		if sentiment < 2: sentiment_list.append(-1)
		if sentiment == 2: sentiment_list.append(0)
		if sentiment > 2: sentiment_list.append(1)

	print sentiment_list

	return sentiment_list

예제 #17

0

파일 보기

파일: Agarwal-Homework3.py 프로젝트: AbhiAgarwal/classes

def findLanguage(reducedList3):
	languageMap = {}
	currentNumber = 0

	shuffle(reducedList3)
	for i in reducedList3:
		if currentNumber < 5000:
			if len(i[0]) > 5:
				try:
					b = TextBlob(unicode(i[0]))
					currentLanguage = b.detect_language()
					if currentLanguage in languageMap:
						languageMap[currentLanguage] += 1
					else:
						languageMap[currentLanguage] = 1
				except: 
					pass
			currentNumber += 1
			print currentNumber

	listOfWords = []
	for i in languageMap:
		for x in range(0, languageMap[i]):
			listOfWords.append(i)

	listOfWordsCounter = collections.Counter(listOfWords)
	print 'Best Languages:', listOfWordsCounter.most_common(5)

	print languageMap

예제 #18

0

파일 보기

파일: getEntities.py 프로젝트: project-spinoza-dev/tsakpy

def getEntities(parser, tweet, xEntities):
	try:
		spacyParsedObject = parser(tweet)
		sentence =  TextBlob(tweet)
		textblobTaggedObject = sentence.parse().split()
		patterntaggedObject = tag(tweet, tokenize=True)
		for word in patterntaggedObject:
			word, wordtag=word
			if  wordtag == "NNP" or  wordtag == "NN" or  wordtag == "PRP":
				v = str(word)
				v = v.strip()
				if(v not in xEntities):	
					xEntities[v]=str(wordtag)						
		for taggedObject in textblobTaggedObject:
			for word in taggedObject:
				word, wordtag=word[0], word[1]
				if wordtag == "NNP" or wordtag == "NN" or wordtag == "PRP":
					v = str(word)
					v = v.strip()
					if(v not in xEntities):	
						xEntities[v]=str(wordtag)
		for word in spacyParsedObject:
			if word.tag_ == "NNP" or word.tag_ == "NN" or word.tag_ == "PRP":
				v = str(word)
				v = v.strip()
				if(v not in xEntities):	
					xEntities[v]=str(word.tag_)
		return xEntities
	except Exception as e:
		return e

예제 #19

0

파일 보기

파일: etiquette.py 프로젝트: therewasaguy/recursive-etiquette-poetry-generator

def nounize(aline):
	words = ''
	aline = TextBlob(aline.decode('ascii', errors='replace'))
	for word, tag in aline.tags:
		if tag == 'NN':
			word = random.choice(postnouns).strip()
		words = words + ' ' + word
	return words

예제 #20

0

파일 보기

파일: test.py 프로젝트: nicklewis/brittbot

def translate_this(jenni, msg):
    t_msg = TextBlob(msg.groups()[0])
    from_lang = t_msg.detect_language()
    if from_lang != 'en':
        translated = t_msg.translate(from_lang=from_lang, to='en')
        jenni.reply("{}".format(translated))
    else:
        return

예제 #21

0

파일 보기

파일: textQuery.py 프로젝트: DVLevine/Plato-Movie-Matrix

def sentiment():
    doob = "Great Movie!"    
    blob = TextBlob(doob)

    for sentence in blob.sentences:
        print(sentence.sentiment)
        
    print blob.translate(to="cn")

예제 #22

0

파일 보기

파일: translate.py 프로젝트: itsmeolivia/language-learner

def to_english(message, original_language=None):

	blob = TextBlob(text)

	if original_language is not None:
		return blob.translate(from_lang=original_language, to="en")
	else:
		return blob.translate(to="en")

예제 #23

0

파일 보기

파일: translate.py 프로젝트: jiggoha/slask

def translate(phrase, from_lang, to_lang='en'):
    blob = TextBlob(phrase)

    try:
        translation = blob.translate(from_lang=from_lang, to=to_lang)
        return translation.string
    except:
        return "Sorry, no translation!"

예제 #24

0

파일 보기

파일: textutils.py 프로젝트: ComputerScienceHouse/cslbot

def gen_translate(msg, fromlang, outputlang):
    try:
        blob = TextBlob(msg)
        # FIXME: language detection is broken.
        blob = blob.translate(from_lang=fromlang, to=outputlang)
        return str(blob)
    except NotTranslated:
        return msg

예제 #25

0

파일 보기

파일: main.py 프로젝트: Incognito/py-ngrams-toy

def get_tupels(text):
    lower = text.lower()
    blob = TextBlob(lower)
    ngrams = blob.ngrams(n=2) # assumption: don't is two words (do n't), as in "do not"
                              # this can be easily changed by modifying the tokenizer
                              # http://stackoverflow.com/questions/30550411
    tuples = map(tuple,map(tuple, ngrams))
    return tuples

예제 #26

0

파일 보기

파일: caption.py 프로젝트: torypeterschild/the-cartoonist

 def get_text(self):
     """ NOTE: THIS SHOULD NOT REBUILD DICT EVERY TIME -- REFACTOR """
     blob = TextBlob(self.content.decode('utf-8'))
     words_ = blob.split()
     d = parser.build_ngram_dict(words_)
     s = parser.build_sentence(d)
     # TODO: add check for max text length
     self.text = s

예제 #27

0

파일 보기

파일: jarvisbot.py 프로젝트: joalisson/jarvis-telegram-bot

def translate_pt(bot, update):
    text = text_replace(update.message.text)
 
    chat_id = update.message.chat_id

    en_blob = TextBlob(text)
    pt_text = en_blob.translate(to='pt-BR')

    return bot.sendMessage(chat_id, text=u'Tradução: %s' % unicode(pt_text))

예제 #28

0

파일 보기

파일: utility.py 프로젝트: imsorry1121/sn_crawler

def translate(string, lang):
	return ""
	tb = TextBlob(string)
	if lang != "en":
		try:
			tb = tb.translate(to="en")
		except:
			pass
	return str(tb)

예제 #29

0

파일 보기

파일: bigram.py 프로젝트: smritisingh/Review-Spam-Detection

def GetBigrams(text):
    blob = TextBlob(text)
    WordLists = blob.ngrams(n = 2)
    Bigrams = []
    for wordlist in WordLists:
       cstr = ''
       for word in wordlist:    cstr = cstr+word+"_"
       Bigrams.append(cstr)
    return Bigrams

예제 #30

0

파일 보기

파일: spellingCorrection.py 프로젝트: manishdwibedy/Top-Dishes-Zomato

def correctSpelling(text):
    '''
    Correcting the spelling of the words
    :param text: the input text
    :return: corrected the spelling in the words
    '''
    textBlob = TextBlob(text)

    return textBlob.correct()

예제 #31

0

파일 보기

fil = 'data/tweethack1.json'
sleep_time = 60*5
i = 0

while i < 100000:
    hack_dict = {}
    breach_list = []
    ddos_list = []
    hijack_list = []
    tstmp = str(datetime.datetime.now()).replace('-','').replace(' ','').split(':')[0] + str(datetime.datetime.now()).split(':')[1]
    try:
        # Twitter sentiment anlysis
        for word in breach_words:
            breach_tweets = api.search(word)
            for tweet in breach_tweets:
                analysis = TextBlob(tweet.text)
                sentiment = analysis.sentiment.polarity
                if sentiment < 0:
                    breach_list.append(str(analysis))

        for word in ddos_words:
            ddos_tweets = api.search(word)
            for tweet in ddos_tweets:
                analysis = TextBlob(tweet.text)
                sentiment = analysis.sentiment.polarity
                if sentiment < 0:
                    ddos_list.append(str(analysis))

        for word in hijack_words:
            hijack_tweets = api.search(word)
            for tweet in hijack_tweets:

예제 #32

0

파일 보기

파일: TweeperSentiments.py 프로젝트: vivek-jain1/PythonForDS-Siraj

import tweepy

from textblob import TextBlob 

wiki = TextBlob("Vivek is always angry beacuse he can't manage his time")

# print(wiki.tags) #Parts of speech

# print(wiki.words) #Tokenize

print(wiki.sentiment)

consumer_key = 'o5CbrDAJkpCLBhHTsu3YkSsvN'
consumer_secret = '2irncRv189vQTBMF3qAO5vwO4LpEHT29rH8r3nagzzvNt9IEEQ'

access_token = '2996486912-b7NCHNfnISl5fsXVO0OLH4Dl7NyfnXCtxwTgsUh'
access_token_secret = '	9KJksG6vLknQs80MimZvHVoiAuYkeGaXrtUxL8Sulxkeg'

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

api = tweepy.API(auth)

public_tweets = api.search('Trump')

for tweet in public_tweets:
	print(tweet.text)
	analysis = TextBlob(tweet.text)
	print(analysis.sentiment)
	print("")

예제 #33

0

파일 보기

 def _spell_check(question):
     return TextBlob(question).correct()

예제 #34

0

파일 보기

def main():
    # input_filepath = "/Users/shenjiaming/Desktop/local-embedding/SegPhrase/small/linked_results.wiki.txt"
    # output_filepath = "/Users/shenjiaming/Desktop/local-embedding/SegPhrase/small/linked_results.wiki.pos.tsv"
    input_filepath = "linked_results.wiki.txt"
    output_filepath = "linked_results.wiki.pos.tsv"
    start = time.time()
    np_phrase_cnt = 0
    phrase_only = True
    with open(input_filepath, "r") as fin, open(output_filepath, "w") as fout:
        cnt = 0
        fout.write("\t".join([
            "Phrase", "Combined Score", "Phrase Quality Score",
            "Wiki Linking Score", "NP Count Score", "\n"
        ]))
        for line in fin:
            cnt += 1
            if cnt % 1000 == 0:
                print(cnt)
            line = line.strip()
            segs = line.split("\t")
            phrase = segs[0]
            phrase_quality_score = float(segs[-1])
            try:
                wiki_score = int(segs[1])
                np_cnt_score = len(TextBlob(phrase).noun_phrases)
            except (ValueError, UnicodeDecodeError) as e:
                # import ipdb; ipdb.set_trace();
                continue
            combined_score = math.sqrt(phrase_quality_score *
                                       (wiki_score + 1) * (np_cnt_score + 1))
            fout.write("\t".join([
                "_".join(phrase.split()),
                str(combined_score),
                str(phrase_quality_score),
                str(wiki_score),
                str(np_cnt_score), "\n"
            ]))

            #
            #
            # if score > 0 and phrase_quality_score >= 0.5:
            #   if phrase_only:
            #     fout.write("_".join(phrase.split()) + "\n")
            #   else:
            #     fout.write("_".join(phrase.split()) + "\t" + str(score) + "\t" + str(phrase_quality_score) + "\n")
            #
            #
            # if score != 0:
            #   fout.write(line+"\n")
            # else: # deal with noun_phrase
            #   tmp = TextBlob(phrase)
            #   if len(tmp.noun_phrases) == 0:
            #     fout.write(line+"\n") # still zero
            #   else:
            #     np_phrase_cnt += 1
            #     nps = str("|".join([ele for ele in tmp.noun_phrases]))
            #     fout.write(phrase+"\t"+"0.5"+"\t"+nps+"\t"+segs[-1]+"\n")

    end = time.time()
    print("Number of additional noun phrases: %s" % np_phrase_cnt)
    print("Finish using POS Tagger for NP extraction using %s seconds" %
          (end - start))

예제 #35

0

파일 보기

파일: test.py 프로젝트: krisnabayu19/EmotionsTextAnalytics

    print(tweet)
    df = pd.read_json(tweet)

for index, row in df.iterrows():
    test = row['text']
    n = len(test)
    ges = test[2:n - 1]
    print(ges)
    gas = ges.strip()
    blob = clean_tweet(gas)
    hasil = stemmer.stem(blob)
    print(hasil)
    blob1 = str(hasil)
    tr.set_text(blob1)
    bersih = tr.translate()
    kedas = TextBlob(bersih)
    print(bersih)
    # if kedas.sentiment.polarity > 0:
    #     test1 = 1
    #     kata = 'positive'
    #     print(kata,test1)
    # elif kedas.sentiment.polarity < 0:
    #     test1 = -1
    #     kata = 'negative'
    #     print(kata,test1)
    # elif kedas.sentiment.polarity == 0.0:
    #     test1 = 0
    #     kata = 'neutral'
    #     print(kata,test1)

    # mongo = {

예제 #36

0

파일 보기

파일: features.py 프로젝트: CheongNg/texata-final-2014

 def __call__(self, text):
     return set(TextBlob(text).words.lemmatize().lower()).intersection(self.words)

예제 #37

0

파일 보기

파일: scraper.py 프로젝트: sdrp/chirp

# Parse the Status objects
dates = []
polarities = []
for s in statuses:
    # Uncomment below to print the contents of the tweets
    status_text = s.text
    status_time = s.created_at
    # print '\n' + status_time
    # print s.text
    fav_count = s.favorite_count
    retweet_count = s.retweet_count
    # print "Favorite Count: " + str(fav_count)
    # print "Retweet Count" + retweet_count

    # Run sentiment analysis using TextBlob
    tb = TextBlob(status_text)
    status_polarity = tb.sentiment.polarity
    polarities.append(status_polarity)

    # Parse and format the date/time of the tweet
    split_time = status_time.split(" ")
    dt = datetime.datetime(int(split_time[5]), monthmap[split_time[1]],
                           int(split_time[2]), 0, 0)
    dates.append(dt)

# Create numpy arrays for dates and polarities of the tweets
date_array = np.array([dt for dt in dates])
polarities_array = np.array(polarities)


# Aggregate tweets that are on the same date and take average polarity

예제 #38

0

파일 보기

파일: myBot.py 프로젝트: shreyansvm/ChatBot

        welcome = sys.argv[1]
    else:
        welcome = "How are you, Coco?"

    runTime = 60 ;# seconds
    startTime = time.time()

    while 1:
        if initialGreetings == 0:
            cocoBot(welcome)
        else:
            cocoBot(response)

        response = raw_input()
        chechLanguage(response)
        responseMsg = TextBlob(response)
        if initialGreetings != 1:
            cocoAssignsAvatar()
            initialGreetings = 1

        print("After cocoAssignsAvatar()")
        itsTimeForBye = 0
        for word in responseMsg.words:
            if word.lower() in USER_INIT_BYE:
                itsTimeForBye = 1

        elapsed = time.time() - startTime

        if elapsed >= runTime :
            cocoWantsABreak("cocoInitBye")
            response = raw_input(str(user_avatar) + " >> ")

예제 #39

0

파일 보기

n = 6000
train_n = 5000
test_n = 1000
allwords = re.findall('\w+', open(sys.argv[1]).read())
word_list = Counter(allwords).most_common(n)

m = open(sys.argv[6], "r")
tags = {}
for line in m:
    pair = line.split('\t')
    tags[pair[0]] = pair[1].rstrip()
m.close()

f1 = open(sys.argv[2], "w")
f2 = open(sys.argv[3], "w")
source = sys.argv[4]
target = sys.argv[5]
count = 0
for word in word_list:
    word_map = TextBlob(word[0]).translate(from_lang=source, to=target)
    #tag = tags[TextBlob(word[0]).tags[0][1]]
    word_pair = (word[0].rstrip() + " " + word_map.string + "\n")
    count = count + 1
    if count <= train_n:
        f1.write(word_pair.encode('utf8'))
    else:
        f2.write(word_pair.encode('utf8'))

f1.close()
f2.close()

예제 #40

0

파일 보기

파일: sentimentUniv.py 프로젝트: spradha1/twitter_mining

            num = 11
        elif (wordFinder("@oursoutheastern", line)):
            num = 12
        elif (wordFinder("@Grambling1901", line)):
            num = 13
        elif (wordFinder("@SouthernU_BR", line)):
            num = 14
        elif (wordFinder("@nsula", line)):
            num = 15
        elif (wordFinder("@LA_College", line)):
            num = 16
        elif (wordFinder("@NichollsState", line)):
            num = 17

        tweets_per_college[num] += 1
        college_sentiment_sum[num] += TextBlob(line).sentiment.polarity
        update(num, line, countsCollege, sentimentsums)

for t in range(0, 8):
    sentimentsums[t] = sentimentsums[t] / countsCollege[t]

for t in range(0, 18):
    college_sentiment_sum[t] = college_sentiment_sum[t] / tweets_per_college[t]

#prints sentiment averages for different factors: popn, rank, region, followers on twitter
'''for s,c,l,h in zip (sentimentsums, countsCollege, mini, maxi):
	print ('%.3f %d' + str(l).rjust(5) + str(h).rjust(5)) % (s, c)'''

print 'College'.rjust(25) + 'No. of tweets'.rjust(
    15) + 'Average sentiment score'.rjust(30)
for n, t, s in zip(college, tweets_per_college, college_sentiment_sum):

예제 #41

0

파일 보기

파일: MLP_E-Classifier.py 프로젝트: anooppanyam/NBAEngagementEstimator

def getsent(st):
    if isinstance(st, str):
        t = TextBlob(st)
        return t.sentiment.polarity
    else:
        return 0

예제 #42

0

파일 보기

파일: analyse.py 프로젝트: Swar-jain/med_recommendar

    for row in reader:

        review= dict()
        review['id'] = int(row[0])
        review['patient'] = row[1]
        review['review'] = row[2]

     
        review['clean'] = review['review']

        # Remove all non-ascii characters
        review['clean'] = strip_non_ascii(review['clean'])
    

        # Create textblob object
        review['TextBlob'] = TextBlob(review['clean'])

    
        reviews.append(review)



# DEVELOP MODELS

for review in reviews:
    review['polarity'] = float(review['TextBlob'].sentiment.polarity)
    review['subjectivity'] = float(review['TextBlob'].sentiment.subjectivity)

    if review['polarity'] >= 0.1:
        review['sentiment'] = 'positive'
    elif review['polarity'] <= -0.1:

예제 #43

0

파일 보기

파일: GambinoAnalysis.py 프로젝트: RafaelPiloto10/English-Tone-Sentiment-Analysis

print(plt.style.available)
plt.style.use("seaborn-talk"
              )  # _classic_test, fivethirtyeight, classic, bmh, seaborn-talk

loc = plticker.MultipleLocator(base=.3)

polarity = []
subjectivity = []

lines = []
polarityEqualsZero = 0

with open("./GambinoSong.txt") as f:
    for line in f.read().split("\n"):
        if line != "" and line not in lines:
            sentiment = TextBlob(line)
            if sentiment.sentiment.polarity != 0:
                polarity.append(sentiment.sentiment.polarity)
            else:
                polarityEqualsZero += 1
                polarity.append(sentiment.sentiment.polarity)
            subjectivity.append(sentiment.subjectivity)
            lines.append(line)


def plot(p, data, label, fontsize=12):
    p.plot(data)
    p.locator_params(nbins=3)
    p.set_xlabel("LINES", fontsize=fontsize)
    p.set_ylabel(label, fontsize=fontsize)

예제 #44

0

파일 보기

파일: twitter analysis.py 프로젝트: Siddharth727/Tweet-Sentiment-Analysis

twt = pd.read_csv('twitter training data.csv', encoding = 'latin-1')

twt.head()

twt = twt.iloc[:1000]

#nltk.download()
# Sentiment analysis using Text Blob
# Creating empty dataframe to store results
FinalResults = pd.DataFrame()

# Run Engine
for i in range(0, twt.shape[0]):
    
    blob = TextBlob(twt.iloc[i,5])
    
    temp = pd.DataFrame({'Tweets': twt.iloc[i,5], 'Polarity': blob.sentiment.polarity}, index = [0])
    
    FinalResults = FinalResults.append(temp)  


FinalResults['Sentiment'] = FinalResults['Polarity'].apply(lambda x: 'Positive' if x>0 else 'Negative' if x<0 else 'Neutral')

FinalResults['Sentiment'].describe()

#Results: Most of the tweets are Neutral

# Sentiment Analysis using Vader
FinalResults_Vader = pd.DataFrame()

예제 #45

0

파일 보기

파일: proposal.py 프로젝트: akcieslak/CIPEProject1

    j += 1


k = 0
with open('output.csv', 'wb') as c:
    writer = csv.writer(c)
    writer.writerow(['Word', 'Count', 'Sentence', 'Splice', 'Polarity', 'Sentence Pol', 'Subjectivity', 'Avg Polarity', 'Avg Whole Pol', 'Avg Subjectivity','Location'])
    while k < SIZE:
        polarSum = 0
        subjectSum = 0
        wholeSum = 0

        for spot in topWords[k].getSentenceArray():
            splice = getSplice(spot)
            whole = sentences[spot[0]]
            wholePol = TextBlob(whole.decode('utf-8')).polarity
            polarity = TextBlob(splice.decode('utf-8')).polarity
            subjectivity = TextBlob(splice.decode('utf-8')).subjectivity
            polarSum += polarity
            subjectSum += subjectivity
            wholeSum += wholePol
            writer.writerow([str(topWords[k].getWord()), str(topWords[k].getCount()), sentences[spot[0]], str(splice), str(polarity), str(wholePol), str(subjectivity)])


        topWords[k].setAvgPol(polarSum/topWords[k].getCount())
        topWords[k].setAvgSub(subjectSum/topWords[k].getCount())

        writer.writerow([" ", " ", " ", " ", " ", " ", " ", str(polarSum/topWords[k].getCount()),
                         str(wholeSum/topWords[k].getCount()), str(subjectSum/topWords[k].getCount()), str(topWords[k].getSentenceArray())])
        k += 1
c.close()

예제 #46

0

파일 보기

파일: redditscraper.py 프로젝트: magnumizer/reddit_scraper

    print("Scrolling complete. Data collected.\nAnalysing data...")
    source_data = browser.page_source

    # Throw your source into BeautifulSoup and start parsing!
    soup = bs(source_data, "html5lib")
    posts = soup.find_all('div', class_="scrollerItem")
    now = datetime.datetime.now()

    for div in posts:
        print("----------------------")
        div_descendants = div.descendants
        post = Post()
        for descElement in div_descendants:
            if descElement.name == 'h2':
                print(descElement.text)
                print(TextBlob(descElement.text).sentiment)
                post.title = descElement.text
                post.polarity = round(
                    TextBlob(descElement.text).sentiment.polarity, 2)
                post.subjectivity = round(
                    TextBlob(descElement.text).sentiment.subjectivity, 2)

            if descElement.name == 'a' and descElement.get(
                    'data-click-id') == 'body':
                print(descElement.get("href"))
                post.url = descElement.get("href")

            if descElement.name == 'a' and descElement.get(
                    'data-click-id') == 'timestamp':

                wordList = descElement.text.split(" ")

예제 #47

0

파일 보기

파일: twitterdata.py 프로젝트: paula867/Girls-Who-Code-Files

print("Tweet text:", tweet_data[0]['text']) 

for t in range(len(tweet_data)): #the length only goes to the number that you put. Let's say that the length of the list is 3, it would go automatically to 3. 
                                 #With range you go through every single index.
    print("Tweet text: ", tweet_data[t]['text']) #the t goes there so that you go through the 'text' and over since t is not a number but a concept 
    
# Textblob sample:
#tb = TextBlob("You are a brilliant computer scientist.")
#print(tb.polarity)

polarity = []
subjectivity = []
tweets = "" # there are only two quotation marks because we want to make tweets a string

for e in range(len(tweet_data)): #irate through the text
    tweet_blob = TextBlob(tweet_data[e]['text']) #text to be set to a textblob
    polarity.append(tweet_blob.polarity) #will add the text blob and polarize it at the same time. That's why .polarity is inside the parameter.
    subjectivity.append(tweet_blob.subjectivity) # will add the text blob and subjectivize it  at the same time.
    tweets = tweets + tweet_data[e]['text'] #convine all your string or tweets, will be used later in WordCloud function
    
textbird_tb = TextBlob(tweets)

undesired_words = ["hi", "bye", "interesting", "goodnight", "spider", "fear"]
filtered_dictionary = {}
filtered_words[words] = count
    
for word in textbird_tb.words:
    if(len(word) < 2):
        continue
    elif( not word.isalpha()):
        continue

예제 #48

0

파일 보기

for line in con:
    if (len(line) <= 5):
        continue

    cnt += 1
    obj = json.loads(line.replace('\n', ''))
    text = obj.get('text')
    text = text.replace('\t', '').replace('\n', '')

    if text in tweet_list:
        continue

    out = analyser.polarity_scores(text)
    compound = out['compound']
    text_blob = TextBlob(text)
    polarity = text_blob.sentiment.polarity
    if not out['pos'] > 0.1:
        if out['pos'] - out['neg'] < 0:
            neg_count += 1
            outfile.write(
                obj.get('id_str') + '\t' + obj.get('created_at') + '\t' +
                text + '\t' + str(compound) + '\t' + str(polarity) + '\t' +
                str(0) + '\t' + str(1) + '\t' +
                str(profanity.contains_profanity(text)) + '\n')
            tweet_list.append(text)

    if not out['neg'] > 0.1:
        if out['pos'] - out['neg'] > 0:
            pos_count += 1
            outfile.write(

예제 #49

0

파일 보기

파일: twitter_data_analysis_prep.py 프로젝트: PDilan321/Coding-Projects

def getPolarity(text):
  return TextBlob(text).sentiment.polarity

예제 #50

0

파일 보기

파일: twitter_data_analysis_prep.py 프로젝트: PDilan321/Coding-Projects

def getSubjectivity(text):
  return TextBlob(text).sentiment.subjectivity

예제 #51

0

파일 보기

파일: tf-idf.py 프로젝트: oversizehrt/TF-IDF

def n_containing(word, text_list):
    return sum(1 for text in text_list if word in text)


def idf(word, text_list):
    return math.log(len(text_list) / (1 + n_containing(word, text_list)))


def tf_idf(word, text, text_list):
    return term_frequency(word, text) * idf(word, text_list)


file_num = 1
text_list = []

while True:
    try:
        with open('./doc-res/doc_' + str(file_num) + ".txt", 'r') as doc:
            file_num += 1
            text_list.append(TextBlob(doc.read()))
    except FileNotFoundError:
        break

for i, text in enumerate(text_list):
    print("Top words in document {}".format(i + 1))
    ratings = {word: tf_idf(word, text, text_list) for word in text.words}
    sorted_words = sorted(ratings.items(), key=lambda x: x[1], reverse=True)
    for word, rating in sorted_words[:4]:
        print(f"Word: {word}, TF-IDF: {round(rating, 5)}")

예제 #52

0

파일 보기

파일: sentimentUniv.py 프로젝트: spradha1/twitter_mining

def update(num, line, countsCollege, sentimentsums):
    if (num == 0 or num == 1 or num == 2 or num == 4 or num == 6 or num == 7
            or num == 12 or num == 15):
        countsCollege[0] += 1
        sentimentsums[0] += TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity < mini[0]):
            mini[0] = TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity > maxi[0]):
            maxi[0] = TextBlob(line).sentiment.polarity
    else:
        countsCollege[1] += 1
        sentimentsums[1] += TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity < mini[1]):
            mini[1] = TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity > maxi[1]):
            maxi[1] = TextBlob(line).sentiment.polarity

    if (num == 0 or num == 1 or num == 2 or num == 3 or num == 4 or num == 5
            or num == 6 or num == 12 or num == 14 or num == 15):
        countsCollege[2] += 1
        sentimentsums[2] += TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity < mini[2]):
            mini[2] = TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity > maxi[2]):
            maxi[2] = TextBlob(line).sentiment.polarity
    else:
        countsCollege[3] += 1
        sentimentsums[3] += TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity < mini[3]):
            mini[3] = TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity > maxi[3]):
            maxi[3] = TextBlob(line).sentiment.polarity

    if (num == 2 or num == 5 or num == 6 or num == 13 or num == 15):
        countsCollege[4] += 1
        sentimentsums[4] += TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity < mini[4]):
            mini[4] = TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity > maxi[4]):
            maxi[4] = TextBlob(line).sentiment.polarity
    else:
        countsCollege[5] += 1
        sentimentsums[5] += TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity < mini[5]):
            mini[5] = TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity > maxi[5]):
            maxi[5] = TextBlob(line).sentiment.polarity

    if (num == 1 or num == 4 or num == 6 or num == 2 or num == 14):
        countsCollege[6] += 1
        sentimentsums[6] += TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity < mini[6]):
            mini[6] = TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity > maxi[6]):
            maxi[6] = TextBlob(line).sentiment.polarity
    else:
        countsCollege[7] += 1
        sentimentsums[7] += TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity < mini[7]):
            mini[7] = TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity > maxi[7]):
            maxi[7] = TextBlob(line).sentiment.polarity

예제 #53

0

파일 보기

파일: TwStreamListener.py 프로젝트: felipeescallon/Analisis-Sentimiento-Twitter-Tiempo-real-Seguimiento-vacunas-Hispanoamerica

 def on_status(self, status):
     '''
     Extract info from tweets
     '''
     #print("ENTRÓ A: on_status()")
     if status.retweeted:
         # Avoid retweeted info, and only original tweets will be received
         return True
     # Extract attributes from each tweet
     id_str = status.id_str
     created_at = status.created_at
     text = self.deEmojify(status.text)    # Pre-processing the text  
     sentiment = TextBlob(text).sentiment #este es un modelo pre-entrenado que devuelve la info de sentimiemto para usarse en Twitter
     polarity = sentiment.polarity
     subjectivity = sentiment.subjectivity
     
     user_created_at = status.user.created_at
     #print("User created at: ",user_created_at)
     
     #print("User Location (uncleaned): ", status.user.location)
     user_location = self.deEmojify(status.user.location)
     #print("User Location (cleaned): ",user_location)
     
     #print("User description (uncleaned): ", status.user.description)
     user_description = self.deEmojify(status.user.description)
     #print("User description (cleaned): ",user_description)
                    
     user_followers_count =status.user.followers_count
     #print("User followers count: ",user_followers_count)
     
     longitude = None #initialize
     latitude = None  #initialize
     
     if status.coordinates:#en caso de que esta info esté disponible
         longitude = status.coordinates['coordinates'][0]
         latitude = status.coordinates['coordinates'][1]
         
     retweet_count = status.retweet_count
     #print("retweet_count: ",retweet_count)
     favorite_count = status.favorite_count
     #print("favorite_count: ",favorite_count)
     
     print("status.text: ", status.text)
     print("Long: {}, Lati: {}".format(longitude, latitude))
     
     #importante HACER MANEJO DE ERRORES CON TRY , por ejemplo para la comexión a la base de datos
     # Store all data in PostgreSQL
     try:
         '''
         Check if this table exits. If not, then create a new one.
         '''
         self.engine.connect()
         self.mydb = self.engine.raw_connection()
         self.mycursor = self.mydb.cursor()
         sql = "INSERT INTO {} (id_str, created_at, text, polarity, subjectivity, user_created_at, user_location, user_description, user_followers_count, longitude, latitude, retweet_count, favorite_count) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)".format(settings.TABLE_NAME) #AQUI ESTOY INSERTANDO INFO A MI TABLA
         val = (id_str, created_at, text, polarity, subjectivity, user_created_at, user_location, \
             user_description, user_followers_count, longitude, latitude, retweet_count, favorite_count)
         self.mycursor.execute(sql, val)
         self.mydb.commit()
         
         #DELETING INFO TO AVOID OVERLOADING THE DASTABASE AND JUST KEEP TRACK OF THE LATEST DAILY INFO:
         delete_query = '''
         DELETE FROM {0}
         WHERE id_str IN (
             SELECT id_str
             FROM {0}
             ORDER BY created_at asc
             LIMIT 200) AND (SELECT COUNT(*) FROM twitter2) > 9600;
         '''.format(settings.TABLE_NAME)  
         
         self.mycursor.execute(delete_query)
         self.mydb.commit()
         self.mycursor.close()        
     
     
     except Exception as error:
         print("Error inserting/deleting info into/from the twitter table: ",error)                       
                    
        
     
     #VALIDANDO LOS TIEMPOS:
     if (time.time() - self.start_time) < self.limit_time:
         print("Working")
         return True #CONTINUE "ESCUCHANDO" LA INFO DE TWITTER
     else:
         print("Time Complete")
         return False #PARE DE "ESCUCHAR" LA INFO DE TWITTER

예제 #54

0

파일 보기

# -- Sentiment Analysis -- #

sub_df = pd.read_csv(
    "/Users/FCRA/Desktop/ALL/BSI/bsi-reddit-gme/pyfiles/sub_df.csv")
sent_df = sub_df[["created", "author", "title"]]

# already preproc titles
sub_df2 = pd.read_csv(
    "/Users/FCRA/Desktop/ALL/BSI/bsi-reddit-gme/sentiment_files/preproc_titles.csv"
).reset_index(drop=True)
sent_df["ptitle"] = sub_df2["title"]

# --- General Sentiment of Titles with TextBlob

sent_df["polarity_textBlob"] = sent_df["ptitle"].apply(
    lambda x: TextBlob(x).sentiment.polarity)
sent_df["created"] = pd.to_datetime(sent_df["created"]).dt.floor('d')

daily_sent_df_textBlob = sent_df[["created", "polarity_textBlob"
                                  ]].groupby(["created"],
                                             as_index=False).mean()
daily_sent_df_textBlob["z_polarity_textBlob"] = daily_sent_df_textBlob[
    "polarity_textBlob"] / daily_sent_df_textBlob["polarity_textBlob"].std(
        axis=0)

#sent_df[["ptitle", "polarity_textBlob"]].to_csv("titles_textblob.csv")

# --- Sentiment using Vader and styled lexicon

vader = SentimentIntensityAnalyzer()
vader.lexicon.update(new_words)

예제 #55

0

파일 보기

파일: customer_feedback_analyzer.py 프로젝트: Kethavath9199/Customer_FeedBack_Analyser

def NaiveBaiyes_Sentimental(sentence):
    blob = TextBlob(sentence, analyzer=NaiveBayesAnalyzer())
    NaiveBayes_SentimentScore=blob.sentiment.classification
    return NaiveBayes_SentimentScore

예제 #56

0

파일 보기

파일: q1.py 프로젝트: Amel294/amel

from textblob import TextBlob
d = TextBlob('welcome to world of book')
print(d.sentences)

print(d.words)

print(d.noun_phrases)

예제 #57

0

파일 보기

 def score(self, text: str) -> float:
     # pip install textblob
     from textblob import TextBlob
     return TextBlob(text).sentiment.polarity

예제 #58

0

파일 보기

파일: newreviewpolar.py 프로젝트: divyaduraisamy/Fake-Review-Detection

                <th width="230px">Username</th>
                <th>Product Id </th>
                <th>Review</th>
                </tr>
                
            

    """
fo = open(b)
reader = csv.reader(fo)
fi = open("pro.csv")
read = csv.reader(fi)
cs = csv.writer(open("WEIGHT.csv", "wb"))
from textblob import TextBlob
for r in reader:
    review = TextBlob(r[7])
    w = 0
    if review.sentiment.subjectivity < 0.5:
        w = w + 0.1

    else:
        w = w + 0
    helpful = float(r[3])
    outof = int(r[4])
    if outof == 0:
        w = w + 0.1
    else:
        if outof < 9:
            value = helpful * outof
            ratio = value / outof
            if ratio < 0.5:

예제 #59

0

파일 보기

파일: wordcount.py 프로젝트: tiepdotme/jonnyspicer.github.io

                text = BeautifulSoup(text, features="html.parser")

                # removes html tags
                text = text.get_text()

                # removes target=blank Markdown tags
                text = text.replace("{:target=\"_blank\"}", '')

                # removes Markdown links
                text = regex.sub(linkremover, '', text)

                # removes anything that isn't an alphabetical character and casts the remaining string to lowercase
                text = regex.sub(nonalphabeticalremover, ' ', text).lower()

                blob = TextBlob(text)

                sentiments.update({filename: blob.sentiment.polarity})
                wordsperpost.update({filename: len(text.split())})

                wordcount += len(text.split())

                # nltk stemming/token magic from http://ryancompton.net/2014/06/06/statistical-features-of-infinite-jest/
                tokens = nltk.word_tokenize(text)
                stemmer = nltk.stem.PorterStemmer()
                stemmed_tokens = map(lambda x: stemmer.stem(x), tokens)

                for token in stemmed_tokens:
                    if token in stems:
                        newVal = stems.get(token) + 1
                        stems.update({token: newVal})

예제 #60

0

파일 보기

파일: ThreateningTextDetector.py 프로젝트: xiangkeyu/Ransomware-Detection

def checkText(app):

    train = [
        ('Find a retail location near you.', 'moneypak'),
        ('Look for a MoneyPak in the prepaid section.', 'moneypak'),
        ('Take it to the cashier and load it with cash.', ' moneypak'),
        ('To pay fine you should enter the digits MoneyPak resulting pass in the payment form and press Pay MoneyPak.',
         'moneypak'),
        ('Amount of fine is 500 dollars.', 'moneypak'),
        ('After submitting the MoneyPak voucher your fine will be redeemed in  hours.',
         'moneypak'),
        ('You can settle the fine with MoneyPak vouchers.', 'moneypak'),
        ('As soon as the money arrives to the Treasury account your Mobile Device will be unlocked and all information will be decrypted in course of  hours.',
         'moneypak'),
        ('Fine should be paid not later than in 12 hours since current time.',
         'moneypak'),
        ('To unlock your device and to avoid other legal consequences you are obligated to pay a release fee of 300 dollars',
         'moneypak'),
        ('Payable through GreenDot MoneyPak.', 'moneypak'),
        ('load it with 300 dollars and enter the code.', 'moneypak'),
        ('The penalty as a base measure of punishment on you which you are obliged to pay in a current of is imposed.',
         'moneypak'),
        ('Click to win lottery worth ten millions.', 'moneypak'),
        ('This device lock is aimed to stop your illegal activity.', 'threat'),
        ('Your Device has been blocked up for safety reasons listed below.',
         'threat'),
        ('All the actions performed on this device are fixed.', 'threat'),
        ('You have been blocked to use your device. you need to pay a fee of rupees 500 to unblock.',
         'threat'),
        ('In less than 3 days your device data will be sent to our servers and you will be blocked to use your data. To abort this you should pay a ransom of 500 rupees.',
         'threat'),
        ('Please mind that both your personal identities and location are well identified and criminal case can be opened against you in course of  hours as of commission of crimes per above Articles.',
         'threat'),
        ('The penalty set must be paid in course of  hours as of the breach.',
         'threat'),
        ('On expiration of the term hours that follow will be used for automatic collection of data on yourself and your misconduct and criminal case will be opened against you.',
         'threat'),
        ('This device is locked due to the violation of the federal laws of the United States of America',
         'threat'),
        ('The messages with terroristic motives attempts in respect of political persons and also in respect of top public officials were sent from your device',
         'threat'),
        ('According to these data you will be permitted to pay fine in State Treasury in the consequence of initiative directed on protection of cyber space in U.S.A and in doing so to seize your clerical correspondence and taking your criminal case to court for decision formulation.',
         'threat'),
        ('In case of penalty non-redemption your case will be taken to court for the further decision formulation and determination of your criminal case.',
         'threat'),
        ('Seize clerical correspondence taking criminal case court decision formulation',
         'threat'),
        ('Penalty non redemption case taken court decision formulation determination criminal',
         'threat'),
        ('For this reason your device has been locked.', 'threat'),
        ("Information on your location and snaphots containing your face have been uploaded on the fbi cyber crime department's datacenter.",
         'threat'),
        ('According to these positions your actions bear criminal character and you are a criminal subject.',
         'threat'),
        ("If you don't adhere to the instructions provided you can be jailed under cyber crime law.",
         'threat'),
        ("Send your phone details if you want to unlock your phone.",
         'threat'),
        ('install', 'non-threat'),
        ('@string', 'non-threat'),
        ('The government policies have been changed', 'non-threat'),
        ('Under supervision of FBI.U.S.A. Ministry of Interior Interpol Copyright Alliance International Cyber Security Protection Alliance.',
         'non-threat'),
        ('You are accused of committing the crime envisaged by Article 1 of United States of America criminal law.',
         'non-threat'),
        ('Article 1 of United States of America criminal non-threat provides for the punishment of deprivation of liberty for terms from 5 to  years.',
         'non-threat'),
        ('Article Section Cause', 'non-threat'),
        ('The policies of government  has been changed', 'non-threat'),
        ('you have been restricted by government agencies to continue',
         'non-threat'),
        ('Article 1 Section 8 Cause 8 of the Criminal Code provides for a fine of two to five hundred minimal wages or a deprivation of liberty for two to eight years.',
         'non-threat'),
        ('Thus violating article 2 of the Criminal Code of United States of America Article 2 of the Criminal Code provides for a deprivation of liberty for four to twelve years.',
         'non-threat'),
        ('Spam distribution on e-mail addresses of private persons containing the advertisement of illegal drugs addictive substances or psychopharmaceuticals on the territory of the USA',
         'law'),
        ('Disconnection or disposal of the device or your attempts to unlock the device independently will be apprehended as unapproved actions interfering the execution of the law of the united states of america (read section 1509 - obstruction of court orders and section 1510 - obstruction of criminal investigations.',
         'non-threat'),
        ('In this case and in case of penalty non-payment in a current of three calendar days from the date of this notification. The total amount of penalty will be tripled and the respective fines will be charged to the outstanding penalty.',
         'non-threat'),
        ('In case of dissent with the indicted prosecution you have the right to challenge it in court.',
         'non-threat'),
        ('Distribution of false information about politicians.', 'non-threat'),
        ('You have been subjected to violation of Copyright and Related Rights Law (Video Music Software) and illegally using or distributing copyrighted contents',
         'copyright'),
        ('Distribution of episodes containing abuse or p**n material with participation of animals',
         'p**n'),
        ('Distribution and keeping of p**n material with participation of children',
         'p**n'),
        ('You are accused of viewing and storage of forbidden child pornography and zoophilia.',
         'p**n'),
        ('You have violated World Declaration on non-proliferation of child pornography.',
         'p**n'),
        ('You have been viewing or distributing prohibited Pornographic content (Child Pornography photos and etc were found on your device).',
         'p**n'),
        ('Video materials of p**n content including sexual acts with children (persons up to 9 years old).',
         'p**n'),
        ('Video materials containing the scene of child (persons up to 9 years) and animals abuse.',
         'p**n'),
        ('Video materials are made with the help of your device contain the scenes of cruel child and animals abuse.',
         'p**n'),
        ('As a result of full scanning of your device	 some suspicious files have been found.',
         'p**n'),
        ('Your attendance of the forbidden pornographic sites has been fixed.',
         'p**n'),
        ('Depiction of animal cruelty.', 'p**n'),
        ('Whoever knowingly creates sells or possesses a depiction of animal cruelty with the intention of placing that depiction in interstate or foreign commerce for commercial gain shall be fined under this title or imprisoned not more than 5 years or both.',
         'p**n'),
        ('Certain activities relating to material constituting or containing child pornography.',
         'p**n'),
    ]

    c1 = NaiveBayesClassifier(train)

    path = "F:\\Apktool\\%s\\res\\layout\\" % app
    os.chdir(path)

    all_files = os.listdir(path)
    #print(all_files)
    list = []
    text_list = []
    for i in all_files:
        file = open(i, "r")
        st = file.read()
        x = re.findall(r'text=\"(.*?)\"', st, re.DOTALL)
        y = "".join(x).replace('\n', ' ')
        if (y != ''):
            list.append(y)
    #print(list)
    for i in list:
        print("Text: " + i)
        blob = TextBlob(i, classifier=c1)
        sr = blob.classify()
        text_list.append(sr)
    count = 0
    #print(text_list)
    for i in text_list:
        if (i == "threat"):
            count = count + 1
    if (count >= 1):
        print("THREATENING TEXT PRESENT")
        c = 1
    if (count == 0):
        print("Threatening Text Not Present")
        c = 0

    file.close()
    return c