def _translate_message(bot, broadcast_list, context):
    if context and "autotranslate" in context:
        _autotranslate = context["autotranslate"]
        origin_language = _get_room_language(bot, _autotranslate["conv_id"])
        for send in broadcast_list:
            target_conversation_id = send[0]
            response = send[1]
            target_language = _get_room_language(bot, target_conversation_id)
            if origin_language != target_language:
                logger.debug("translating {} to {}".format(origin_language, target_language))
                translated = _autotranslate["event_text"]
                try:
                    en_blob = TextBlob(_autotranslate["event_text"])
                    translated = "{0}".format(en_blob.translate(to=target_language))
                    #translated = gs.translate(_autotranslate["event_text"], target_language
                except Exception:
                    logger.debug("Translation Api returned string unchanged")
                else:
                    pass
                finally:
                    if _autotranslate["event_text"] != translated:
                    # mutate the original response by reference
                        response.extend([
                            hangups.ChatMessageSegment('\n', hangups.SegmentType.LINE_BREAK),
                            hangups.ChatMessageSegment('(' + translated + ')')])
def update_book(book):
    blob = TextBlob(book.description)

    if blob.detect_language() == 'en':
        description = ''
        nouns = filter(lambda x: x[1] == 'NN' or x[1] == 'NNP', blob.tags)

        for noun, tag in nouns:
            description += noun + " "

            if len(noun) > 2:
                description += TextBlob(noun).translate(to='ko').string + " "

    else:
        description = book.description

    book_document = search.Document(
        doc_id=book.ISBN,
        fields=[
            search.TextField(name='title', value=remove_punc(book.title)),
            search.TextField(name='author', value=remove_punc(book.author)),
            search.TextField(name='description', value=remove_punc(description))
        ]
    )

    index = get_book_index()
    index.put(book_document)
Example #3
0
def tokenize(text, spell=False, stem=False, lemma=False, lower=False, stop=False):
    # lowercase, remove non-alphas and punctuation
    b = TextBlob(unicode(text, 'utf8'))

    if spell:
        b = b.correct()
    words = b.words
    if lower:
        words = words.lower()
    if lemma:
        words = words.lemmatize()
    if stem:
        words = [stemmer.stem(w) for w in words]
    if stop:
        tokens = [w.encode('utf-8') for w in words if w.isalpha() and w not in stopwords]
    else:
        tokens = [w.encode('utf-8') for w in words if w.isalpha()]
    # letters_only = re.sub("[^a-zA-Z]", " ", text)

    # # ngrams
    # temp_list = []
    # for i in range(1,ngram+1):
    #     temp = [list(i) for i in TextBlob(' '.join(tokens)).ngrams(i)]
    #     try:
    #         if len(temp[0]) == 1:
    #             temp_list.extend([i[0] for i in temp])
    #         else:
    #             for i in temp:
    #                 temp_list.append(tuple(i))
    #     except:
    #         pass
    # return temp_list
    return tokens
Example #4
0
def tag_documents_text(client):

	documents = client['cornell']['documents']
	for doc in documents.find():
		blob = TextBlob(doc['text'], pos_tagger=PerceptronTagger())
		parsed_blob = blob.parse()
		documents.update({'name':doc['name']},{'$set':{'parsed_perceptron':parsed_blob}})
Example #5
0
def extract(ngrams, dataset, doc_id):
    # extract keywords
    print 'Extracting keywords'
    for i, ngram in enumerate(ngrams):
        doc = doc_id[i]

        if field not in dataset[doc]:
            dataset[doc][field] = set()

            if doc > 0 and doc % 1000 == 0:
                print '\t', doc

        for kw in filter(lambda k: '_' in k, ngram):
            keyword = kw.replace('_', ' ')

            kw_tb = TextBlob(keyword)

            # filter out punctuation, etc (make sure that there are two non-punc words)
            if len(kw_tb.words) < 2:
                continue

            # add keywords which are all proper nouns
            distinct_tags = set(t[1] for t in kw_tb.tags)
            if distinct_tags - {'NNP', 'NNPS'} == {}:
                dataset[doc][field].add(kw_tb.lower())
                continue

            # add noun phrases
            for np in kw_tb.lower().noun_phrases:
                dataset[doc][field].add(np)

    return kw_set_to_list(dataset)
Example #6
0
def hi(bot, trigger):
    lang_codes = ['af', 'ga', 'sq', 'it', 'ar',	'ja', 'az', 'kn', 'eu', 'ko', 'bn', 'la', 'en']
    trans = TextBlob('Greetings dear '+trigger.nick+'on the road of life ')
    ind = randint(0, 12)
    trans = trans.translate(to=lang_codes[ind])
    saying = str(trans)
    bot.say(saying)
Example #7
0
    def on_command(self, msg, stdin, stdout, reply):
        # pre-process args
        # this might mess up if "from" or "to" is left out and
        # the message contains "from" or "to"
        self._push_character(msg["args"], "from", "-", 1)
        self._push_character(msg["args"], "to",   "-", 1)

        try:
            args = self.parser.parse_args(msg["args"][1:])
        except (argparse.ArgumentError, SystemExit):
            return

        # get message from the appropriate place
        if args.message:
            message = " ".join(args.message)
        else:
            message = stdin.read().strip()

        # translate
        from_lang = args.from_language
        to_lang   = args.to_language
        message   = TextBlob(message)
        try:
            translated = message.translate(from_lang=from_lang, to=to_lang)
        except:
            pass
        else:
            print(translated, file=stdout)
Example #8
0
    def _german(self, text):
        blob = TextBlob(text)

        try:
            return str(blob.translate(to="en"))
        except:
            return text
def process_status(status, lang):
    text = ""

    # translate
    if lang == 'en':
        text = status['text']
    else:
        blob = TextBlob(status['text'])
        try:
            text = str(blob.translate())
        except textblob.exceptions.NotTranslated:
            text = status['text']

    # sentiment analysis
    sentiment = TextBlob(text).sentiment

    return {
          "created_at": 1000 * int(time.mktime((status['created_at']).timetuple()))
        , "id_str": status['id_str']
        , "text": text
        , "sentiment": {"polarity": sentiment[0], "subjectivity": sentiment[1]}
        , "retweet_count": status['retweet_count']
        , "in_reply_to_status_id_str": status['in_reply_to_status_id_str']
        , "geo": status['geo']
        , "retweeted": status['retweeted']
        , "in_reply_to_user_id_str": status['in_reply_to_user_id_str']
            }
def matchRhyme(word1,word2):
    #str1 = "tekst"
    #word1+="टेक्स्ट"
    str1 = ""
    str2 = ""

    word1+= "टेक्स्ट"
    word2+= "टेक्स्ट"

    str1 += " " + word1
    str2 += " " + word2

    hindi_blob1 = TextBlob(str1)
    hindi_blob2 = TextBlob(str2)

    transliteratedtxt1 = hindi_blob1.translate(from_lang="hi", to='en')
    transliteratedtxt1=transliteratedtxt1.substring[:-5]
    transliteratedtxt2 = hindi_blob2.translate(from_lang="hi", to='en')
    transliteratedtxt2= transliteratedtxt2.substring[:-5]

    word1Index= len(transliteratedtxt1)
    word2Index= len(transliteratedtxt2)
    ##Matcing last charater if they are same!!
    if (transliteratedtxt1[word1Index-1] == transliteratedtxt2[word2Index-1]):

        #rhymeMeter=3;
        ##Matching if second Last character is any of the Matras!!
        if ( ((transliteratedtxt1[word1Index-2]=='a') and (transliteratedtxt2[word2Index-2]=='a')) or ((transliteratedtxt1[word1Index-2]=='e') and (transliteratedtxt2[word2Index-2]=='e'))or ((transliteratedtxt1[word1Index-2]=='o') and (transliteratedtxt2[word2Index-2]=='o')) or ((transliteratedtxt1[word1Index-2]=='i') and (transliteratedtxt2[word2Index-2]=='i')) or ((transliteratedtxt1[word1Index-2]=='u') and (transliteratedtxt2[word2Index-2]=='u')) ):
            rhymeMeter=5
        else:
            if(transliteratedtxt1[word1Index-2]!=transliteratedtxt1[word1Index-2]):
                rhymeMeter=4
    return rhymeMeter
Example #11
0
def gen_translate(msg, fromlang=None, outputlang='en'):
    try:
        blob = TextBlob(msg)
        blob = blob.translate(from_lang=fromlang, to=outputlang)
        return str(blob)
    except NotTranslated:
        return msg
Example #12
0
    def scrape(self,links=[],ads=True,translator=False):
        responses = []
        values = {}
        data = []
        
        if ads:
            for link in links:
                r = requests.get(link)
                responses.append(r)
        else:
            for link in links:
                r = requests.get(link)
                text = unidecode(r.text)
                html = lxml.html.fromstring(text)

                links = html.xpath("//div[@class='cat']/a/@href")
                for link in links:
                    if len(self.base_urls) > 1 or len(self.base_urls[0]) > 3:
                        time.sleep(random.randint(5,27))
                    try:
                        responses.append(requests.get(link))
                        print link
                    except requests.exceptions.ConnectionError:
                        print "hitting connection error"
                        continue

        for r in responses:
            text = r.text
            html = lxml.html.fromstring(text)
            values["title"] = html.xpath("//div[@id='postingTitle']/a/h1")[0].text_content()
            values["link"] = unidecode(r.url)
            values["new_keywords"] = []
            try:
                values["images"] = html.xpath("//img/@src")
            except IndexError:
                values["images"] = "weird index error"
            pre_decode_text = html.xpath("//div[@class='postingBody']")[0].text_content().replace("\n","").replace("\r","")  
            values["text_body"] = pre_decode_text 
            try:
                values["posted_at"] = html.xpath("//div[class='adInfo']")[0].text_content().replace("\n"," ").replace("\r","")
            except IndexError:
                values["posted_at"] = "not given"
            values["scraped_at"] = str(datetime.datetime.now())
            body_blob = TextBlob(values["text_body"])
            title_blob = TextBlob(values["title"])
            values["language"] = body_blob.detect_language() #requires the internet - makes use of google translate api
            values["polarity"] = body_blob.polarity
            values["subjectivity"] = body_blob.sentiment[1]
            if values["language"] != "en" and not translator:
                values["translated_body"] = body_blob.translate(from_lang="es")
                values["translated_title"] = title_blob.translate(from_lang="es")
            else:
                values["translated_body"] = "none"
                values["translated_title"] = "none"
            text_body = values["text_body"]
            title = values["title"]
            values["phone_numbers"] = self.phone_number_parse(values)
            data.append(values)
        
        return data
Example #13
0
def getKeywords(text, useless):
	text = TextBlob(text)
	for word in text.words:
		for bad in useless:
			if word is bad:
				text.remove(word)
	return text
Example #14
0
def answer(question):
    global IsAnswer,detected,u
    IsAnswer = True
    DetectLang = TextBlob(question)
    detected = DetectLang.detect_language()
    if detected == 'en':
        print("language detected: en")
        u = 'en'
        print(len(words),"len(words)")
        low = question.lower()
        questions = re.sub('[^\w]',' ',low).split() #list
        BadWords(questions)
        print(questions)
        def writeout(words,question,IsAnswer):
            r = []
            if len(words) > 3000:
                a1 = len(questions)
                for x in range(0,a1):
                    words.remove(random.choice(words))
                print(len(words),"len(words)")
            else:
                pass
            os.remove('newwords.txt')
            file = open('newwords.txt','w')
            words.extend(questions)
            r.extend(words)
            s = ' '.join(r)
            file.write(s)
        writeout(words,question,IsAnswer)
        randomthought()
    else:
        u = detected
        print("language detected:",u)
        randomthought()
def check_speech_patterns(text):
	PATTERNS={
		("PRP","DT"),
		("CC","VBD"),
		("VB","RB"),
		("VB","PRP$"),
		("NN","POS"),
		("NN","MD","VB"),
		("VB","PRP$","NN"),
		("MD","VB","VBN"),
		("NN","IN","PRP$"),
		("IN","PRP$","JJ"),
		("VB","PRP","DT","NN"),
		("VBD","RB","JJ","NNS"),
		("NNP","NNP","NNP","NNP"),
		("PRP$","NN","CC","PRP"),
		("NNP", "NNP", "NNP", "NNP", "NNP"), 
		("NN", "IN", "DT", "NNS", "IN"),
		("PRP$", "NN", "IN", "DT", "NN"),
		("IN", "DT", "NN", "WDT", "VBZ"),
		("NN", "IN", "PRP$", "JJ", "NN"),
		("DT", "NN", "IN", "NN", "NN")
	}
	blob= TextBlob(text)
	for i in range (2,6):
		ngrams=blob.ngrams(n=i)
		for gram in ngrams:
			str_gram=" ".join(gram)
			gram_blob=TextBlob(str_gram)
			tags=gram_blob.tags
			lst1, lst2 = zip(*tags)
			if lst2 in PATTERNS:
				return True
	return False
def sentiment_pattern(text, gram_n=6):
	blob= TextBlob(text)
	ngrams=blob.ngrams(n=gram_n)
	sentiment_list=[]
	datalist = []
	for gram in ngrams:
		str_gram=" ".join(gram)
		print str_gram
		data = (0, 0, str_gram, None)
		datalist.append(Datapoint(*data))

		#gram_blob=TextBlob(str_gram)
		#sentiment=gram_blob.sentiment[0]
		#if sentiment>0:
		#	sentiment=1
		#elif sentiment<0:
		#	sentiment=-1
		#sentiment_list.append(sentiment)

	predictor = pickle.load(open("predictor.pickle", "rb" ) )
	prediction = predictor.predict(datalist)

	for sentiment in prediction:
		sentiment = int(sentiment)
		if sentiment < 2: sentiment_list.append(-1)
		if sentiment == 2: sentiment_list.append(0)
		if sentiment > 2: sentiment_list.append(1)

	print sentiment_list

	return sentiment_list
Example #17
0
def findLanguage(reducedList3):
	languageMap = {}
	currentNumber = 0

	shuffle(reducedList3)
	for i in reducedList3:
		if currentNumber < 5000:
			if len(i[0]) > 5:
				try:
					b = TextBlob(unicode(i[0]))
					currentLanguage = b.detect_language()
					if currentLanguage in languageMap:
						languageMap[currentLanguage] += 1
					else:
						languageMap[currentLanguage] = 1
				except: 
					pass
			currentNumber += 1
			print currentNumber

	listOfWords = []
	for i in languageMap:
		for x in range(0, languageMap[i]):
			listOfWords.append(i)

	listOfWordsCounter = collections.Counter(listOfWords)
	print 'Best Languages:', listOfWordsCounter.most_common(5)

	print languageMap
Example #18
0
def getEntities(parser, tweet, xEntities):
	try:
		spacyParsedObject = parser(tweet)
		sentence =  TextBlob(tweet)
		textblobTaggedObject = sentence.parse().split()
		patterntaggedObject = tag(tweet, tokenize=True)
		for word in patterntaggedObject:
			word, wordtag=word
			if  wordtag == "NNP" or  wordtag == "NN" or  wordtag == "PRP":
				v = str(word)
				v = v.strip()
				if(v not in xEntities):	
					xEntities[v]=str(wordtag)						
		for taggedObject in textblobTaggedObject:
			for word in taggedObject:
				word, wordtag=word[0], word[1]
				if wordtag == "NNP" or wordtag == "NN" or wordtag == "PRP":
					v = str(word)
					v = v.strip()
					if(v not in xEntities):	
						xEntities[v]=str(wordtag)
		for word in spacyParsedObject:
			if word.tag_ == "NNP" or word.tag_ == "NN" or word.tag_ == "PRP":
				v = str(word)
				v = v.strip()
				if(v not in xEntities):	
					xEntities[v]=str(word.tag_)
		return xEntities
	except Exception as e:
		return e
		
def nounize(aline):
	words = ''
	aline = TextBlob(aline.decode('ascii', errors='replace'))
	for word, tag in aline.tags:
		if tag == 'NN':
			word = random.choice(postnouns).strip()
		words = words + ' ' + word
	return words
Example #20
0
def translate_this(jenni, msg):
    t_msg = TextBlob(msg.groups()[0])
    from_lang = t_msg.detect_language()
    if from_lang != 'en':
        translated = t_msg.translate(from_lang=from_lang, to='en')
        jenni.reply("{}".format(translated))
    else:
        return
Example #21
0
def sentiment():
    doob = "Great Movie!"    
    blob = TextBlob(doob)

    for sentence in blob.sentences:
        print(sentence.sentiment)
        
    print blob.translate(to="cn") 
Example #22
0
def to_english(message, original_language=None):

	blob = TextBlob(text)

	if original_language is not None:
		return blob.translate(from_lang=original_language, to="en")
	else:
		return blob.translate(to="en")
Example #23
0
def translate(phrase, from_lang, to_lang='en'):
    blob = TextBlob(phrase)

    try:
        translation = blob.translate(from_lang=from_lang, to=to_lang)
        return translation.string
    except:
        return "Sorry, no translation!"
Example #24
0
def gen_translate(msg, fromlang, outputlang):
    try:
        blob = TextBlob(msg)
        # FIXME: language detection is broken.
        blob = blob.translate(from_lang=fromlang, to=outputlang)
        return str(blob)
    except NotTranslated:
        return msg
Example #25
0
def get_tupels(text):
    lower = text.lower()
    blob = TextBlob(lower)
    ngrams = blob.ngrams(n=2) # assumption: don't is two words (do n't), as in "do not"
                              # this can be easily changed by modifying the tokenizer
                              # http://stackoverflow.com/questions/30550411
    tuples = map(tuple,map(tuple, ngrams))
    return tuples
Example #26
0
 def get_text(self):
     """ NOTE: THIS SHOULD NOT REBUILD DICT EVERY TIME -- REFACTOR """
     blob = TextBlob(self.content.decode('utf-8'))
     words_ = blob.split()
     d = parser.build_ngram_dict(words_)
     s = parser.build_sentence(d)
     # TODO: add check for max text length
     self.text = s
def translate_pt(bot, update):
    text = text_replace(update.message.text)
 
    chat_id = update.message.chat_id

    en_blob = TextBlob(text)
    pt_text = en_blob.translate(to='pt-BR')

    return bot.sendMessage(chat_id, text=u'Tradução: %s' % unicode(pt_text))
Example #28
0
def translate(string, lang):
	return ""
	tb = TextBlob(string)
	if lang != "en":
		try:
			tb = tb.translate(to="en")
		except:
			pass
	return str(tb)
def GetBigrams(text):
    blob = TextBlob(text)
    WordLists = blob.ngrams(n = 2)
    Bigrams = []
    for wordlist in WordLists:
       cstr = ''
       for word in wordlist:    cstr = cstr+word+"_"
       Bigrams.append(cstr)
    return Bigrams
def correctSpelling(text):
    '''
    Correcting the spelling of the words
    :param text: the input text
    :return: corrected the spelling in the words
    '''
    textBlob = TextBlob(text)

    return textBlob.correct()
Example #31
0
fil = 'data/tweethack1.json'
sleep_time = 60*5
i = 0

while i < 100000:
    hack_dict = {}
    breach_list = []
    ddos_list = []
    hijack_list = []
    tstmp = str(datetime.datetime.now()).replace('-','').replace(' ','').split(':')[0] + str(datetime.datetime.now()).split(':')[1]
    try:
        # Twitter sentiment anlysis
        for word in breach_words:
            breach_tweets = api.search(word)
            for tweet in breach_tweets:
                analysis = TextBlob(tweet.text)
                sentiment = analysis.sentiment.polarity
                if sentiment < 0:
                    breach_list.append(str(analysis))

        for word in ddos_words:
            ddos_tweets = api.search(word)
            for tweet in ddos_tweets:
                analysis = TextBlob(tweet.text)
                sentiment = analysis.sentiment.polarity
                if sentiment < 0:
                    ddos_list.append(str(analysis))

        for word in hijack_words:
            hijack_tweets = api.search(word)
            for tweet in hijack_tweets:
import tweepy

from textblob import TextBlob 

wiki = TextBlob("Vivek is always angry beacuse he can't manage his time")

# print(wiki.tags) #Parts of speech

# print(wiki.words) #Tokenize

print(wiki.sentiment)

consumer_key = 'o5CbrDAJkpCLBhHTsu3YkSsvN'
consumer_secret = '2irncRv189vQTBMF3qAO5vwO4LpEHT29rH8r3nagzzvNt9IEEQ'

access_token = '2996486912-b7NCHNfnISl5fsXVO0OLH4Dl7NyfnXCtxwTgsUh'
access_token_secret = '	9KJksG6vLknQs80MimZvHVoiAuYkeGaXrtUxL8Sulxkeg'

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

api = tweepy.API(auth)

public_tweets = api.search('Trump')

for tweet in public_tweets:
	print(tweet.text)
	analysis = TextBlob(tweet.text)
	print(analysis.sentiment)
	print("")
Example #33
0
 def _spell_check(question):
     return TextBlob(question).correct()
Example #34
0
def main():
    # input_filepath = "/Users/shenjiaming/Desktop/local-embedding/SegPhrase/small/linked_results.wiki.txt"
    # output_filepath = "/Users/shenjiaming/Desktop/local-embedding/SegPhrase/small/linked_results.wiki.pos.tsv"
    input_filepath = "linked_results.wiki.txt"
    output_filepath = "linked_results.wiki.pos.tsv"
    start = time.time()
    np_phrase_cnt = 0
    phrase_only = True
    with open(input_filepath, "r") as fin, open(output_filepath, "w") as fout:
        cnt = 0
        fout.write("\t".join([
            "Phrase", "Combined Score", "Phrase Quality Score",
            "Wiki Linking Score", "NP Count Score", "\n"
        ]))
        for line in fin:
            cnt += 1
            if cnt % 1000 == 0:
                print(cnt)
            line = line.strip()
            segs = line.split("\t")
            phrase = segs[0]
            phrase_quality_score = float(segs[-1])
            try:
                wiki_score = int(segs[1])
                np_cnt_score = len(TextBlob(phrase).noun_phrases)
            except (ValueError, UnicodeDecodeError) as e:
                # import ipdb; ipdb.set_trace();
                continue
            combined_score = math.sqrt(phrase_quality_score *
                                       (wiki_score + 1) * (np_cnt_score + 1))
            fout.write("\t".join([
                "_".join(phrase.split()),
                str(combined_score),
                str(phrase_quality_score),
                str(wiki_score),
                str(np_cnt_score), "\n"
            ]))

            #
            #
            # if score > 0 and phrase_quality_score >= 0.5:
            #   if phrase_only:
            #     fout.write("_".join(phrase.split()) + "\n")
            #   else:
            #     fout.write("_".join(phrase.split()) + "\t" + str(score) + "\t" + str(phrase_quality_score) + "\n")
            #
            #
            # if score != 0:
            #   fout.write(line+"\n")
            # else: # deal with noun_phrase
            #   tmp = TextBlob(phrase)
            #   if len(tmp.noun_phrases) == 0:
            #     fout.write(line+"\n") # still zero
            #   else:
            #     np_phrase_cnt += 1
            #     nps = str("|".join([ele for ele in tmp.noun_phrases]))
            #     fout.write(phrase+"\t"+"0.5"+"\t"+nps+"\t"+segs[-1]+"\n")

    end = time.time()
    print("Number of additional noun phrases: %s" % np_phrase_cnt)
    print("Finish using POS Tagger for NP extraction using %s seconds" %
          (end - start))
    print(tweet)
    df = pd.read_json(tweet)

for index, row in df.iterrows():
    test = row['text']
    n = len(test)
    ges = test[2:n - 1]
    print(ges)
    gas = ges.strip()
    blob = clean_tweet(gas)
    hasil = stemmer.stem(blob)
    print(hasil)
    blob1 = str(hasil)
    tr.set_text(blob1)
    bersih = tr.translate()
    kedas = TextBlob(bersih)
    print(bersih)
    # if kedas.sentiment.polarity > 0:
    #     test1 = 1
    #     kata = 'positive'
    #     print(kata,test1)
    # elif kedas.sentiment.polarity < 0:
    #     test1 = -1
    #     kata = 'negative'
    #     print(kata,test1)
    # elif kedas.sentiment.polarity == 0.0:
    #     test1 = 0
    #     kata = 'neutral'
    #     print(kata,test1)

    # mongo = {
Example #36
0
 def __call__(self, text):
     return set(TextBlob(text).words.lemmatize().lower()).intersection(self.words)
Example #37
0
# Parse the Status objects
dates = []
polarities = []
for s in statuses:
    # Uncomment below to print the contents of the tweets
    status_text = s.text
    status_time = s.created_at
    # print '\n' + status_time
    # print s.text
    fav_count = s.favorite_count
    retweet_count = s.retweet_count
    # print "Favorite Count: " + str(fav_count)
    # print "Retweet Count" + retweet_count

    # Run sentiment analysis using TextBlob
    tb = TextBlob(status_text)
    status_polarity = tb.sentiment.polarity
    polarities.append(status_polarity)

    # Parse and format the date/time of the tweet
    split_time = status_time.split(" ")
    dt = datetime.datetime(int(split_time[5]), monthmap[split_time[1]],
                           int(split_time[2]), 0, 0)
    dates.append(dt)

# Create numpy arrays for dates and polarities of the tweets
date_array = np.array([dt for dt in dates])
polarities_array = np.array(polarities)


# Aggregate tweets that are on the same date and take average polarity
Example #38
0
        welcome = sys.argv[1]
    else:
        welcome = "How are you, Coco?"

    runTime = 60 ;# seconds
    startTime = time.time()

    while 1:
        if initialGreetings == 0:
            cocoBot(welcome)
        else:
            cocoBot(response)

        response = raw_input()
        chechLanguage(response)
        responseMsg = TextBlob(response)
        if initialGreetings != 1:
            cocoAssignsAvatar()
            initialGreetings = 1

        print("After cocoAssignsAvatar()")
        itsTimeForBye = 0
        for word in responseMsg.words:
            if word.lower() in USER_INIT_BYE:
                itsTimeForBye = 1

        elapsed = time.time() - startTime

        if elapsed >= runTime :
            cocoWantsABreak("cocoInitBye")
            response = raw_input(str(user_avatar) + " >> ")
Example #39
0
n = 6000
train_n = 5000
test_n = 1000
allwords = re.findall('\w+', open(sys.argv[1]).read())
word_list = Counter(allwords).most_common(n)

m = open(sys.argv[6], "r")
tags = {}
for line in m:
    pair = line.split('\t')
    tags[pair[0]] = pair[1].rstrip()
m.close()

f1 = open(sys.argv[2], "w")
f2 = open(sys.argv[3], "w")
source = sys.argv[4]
target = sys.argv[5]
count = 0
for word in word_list:
    word_map = TextBlob(word[0]).translate(from_lang=source, to=target)
    #tag = tags[TextBlob(word[0]).tags[0][1]]
    word_pair = (word[0].rstrip() + " " + word_map.string + "\n")
    count = count + 1
    if count <= train_n:
        f1.write(word_pair.encode('utf8'))
    else:
        f2.write(word_pair.encode('utf8'))

f1.close()
f2.close()
Example #40
0
            num = 11
        elif (wordFinder("@oursoutheastern", line)):
            num = 12
        elif (wordFinder("@Grambling1901", line)):
            num = 13
        elif (wordFinder("@SouthernU_BR", line)):
            num = 14
        elif (wordFinder("@nsula", line)):
            num = 15
        elif (wordFinder("@LA_College", line)):
            num = 16
        elif (wordFinder("@NichollsState", line)):
            num = 17

        tweets_per_college[num] += 1
        college_sentiment_sum[num] += TextBlob(line).sentiment.polarity
        update(num, line, countsCollege, sentimentsums)

for t in range(0, 8):
    sentimentsums[t] = sentimentsums[t] / countsCollege[t]

for t in range(0, 18):
    college_sentiment_sum[t] = college_sentiment_sum[t] / tweets_per_college[t]

#prints sentiment averages for different factors: popn, rank, region, followers on twitter
'''for s,c,l,h in zip (sentimentsums, countsCollege, mini, maxi):
	print ('%.3f %d' + str(l).rjust(5) + str(h).rjust(5)) % (s, c)'''

print 'College'.rjust(25) + 'No. of tweets'.rjust(
    15) + 'Average sentiment score'.rjust(30)
for n, t, s in zip(college, tweets_per_college, college_sentiment_sum):
def getsent(st):
    if isinstance(st, str):
        t = TextBlob(st)
        return t.sentiment.polarity
    else:
        return 0
Example #42
0
    for row in reader:

        review= dict()
        review['id'] = int(row[0])
        review['patient'] = row[1]
        review['review'] = row[2]

     
        review['clean'] = review['review']

        # Remove all non-ascii characters
        review['clean'] = strip_non_ascii(review['clean'])
    

        # Create textblob object
        review['TextBlob'] = TextBlob(review['clean'])

    
        reviews.append(review)



# DEVELOP MODELS

for review in reviews:
    review['polarity'] = float(review['TextBlob'].sentiment.polarity)
    review['subjectivity'] = float(review['TextBlob'].sentiment.subjectivity)

    if review['polarity'] >= 0.1:
        review['sentiment'] = 'positive'
    elif review['polarity'] <= -0.1:
print(plt.style.available)
plt.style.use("seaborn-talk"
              )  # _classic_test, fivethirtyeight, classic, bmh, seaborn-talk

loc = plticker.MultipleLocator(base=.3)

polarity = []
subjectivity = []

lines = []
polarityEqualsZero = 0

with open("./GambinoSong.txt") as f:
    for line in f.read().split("\n"):
        if line != "" and line not in lines:
            sentiment = TextBlob(line)
            if sentiment.sentiment.polarity != 0:
                polarity.append(sentiment.sentiment.polarity)
            else:
                polarityEqualsZero += 1
                polarity.append(sentiment.sentiment.polarity)
            subjectivity.append(sentiment.subjectivity)
            lines.append(line)


def plot(p, data, label, fontsize=12):
    p.plot(data)
    p.locator_params(nbins=3)
    p.set_xlabel("LINES", fontsize=fontsize)
    p.set_ylabel(label, fontsize=fontsize)
twt = pd.read_csv('twitter training data.csv', encoding = 'latin-1')

twt.head()

twt = twt.iloc[:1000]

#nltk.download()
# Sentiment analysis using Text Blob
# Creating empty dataframe to store results
FinalResults = pd.DataFrame()

# Run Engine
for i in range(0, twt.shape[0]):
    
    blob = TextBlob(twt.iloc[i,5])
    
    temp = pd.DataFrame({'Tweets': twt.iloc[i,5], 'Polarity': blob.sentiment.polarity}, index = [0])
    
    FinalResults = FinalResults.append(temp)  


FinalResults['Sentiment'] = FinalResults['Polarity'].apply(lambda x: 'Positive' if x>0 else 'Negative' if x<0 else 'Neutral')

FinalResults['Sentiment'].describe()

#Results: Most of the tweets are Neutral

# Sentiment Analysis using Vader
FinalResults_Vader = pd.DataFrame()
Example #45
0
    j += 1


k = 0
with open('output.csv', 'wb') as c:
    writer = csv.writer(c)
    writer.writerow(['Word', 'Count', 'Sentence', 'Splice', 'Polarity', 'Sentence Pol', 'Subjectivity', 'Avg Polarity', 'Avg Whole Pol', 'Avg Subjectivity','Location'])
    while k < SIZE:
        polarSum = 0
        subjectSum = 0
        wholeSum = 0

        for spot in topWords[k].getSentenceArray():
            splice = getSplice(spot)
            whole = sentences[spot[0]]
            wholePol = TextBlob(whole.decode('utf-8')).polarity
            polarity = TextBlob(splice.decode('utf-8')).polarity
            subjectivity = TextBlob(splice.decode('utf-8')).subjectivity
            polarSum += polarity
            subjectSum += subjectivity
            wholeSum += wholePol
            writer.writerow([str(topWords[k].getWord()), str(topWords[k].getCount()), sentences[spot[0]], str(splice), str(polarity), str(wholePol), str(subjectivity)])


        topWords[k].setAvgPol(polarSum/topWords[k].getCount())
        topWords[k].setAvgSub(subjectSum/topWords[k].getCount())

        writer.writerow([" ", " ", " ", " ", " ", " ", " ", str(polarSum/topWords[k].getCount()),
                         str(wholeSum/topWords[k].getCount()), str(subjectSum/topWords[k].getCount()), str(topWords[k].getSentenceArray())])
        k += 1
c.close()
    print("Scrolling complete. Data collected.\nAnalysing data...")
    source_data = browser.page_source

    # Throw your source into BeautifulSoup and start parsing!
    soup = bs(source_data, "html5lib")
    posts = soup.find_all('div', class_="scrollerItem")
    now = datetime.datetime.now()

    for div in posts:
        print("----------------------")
        div_descendants = div.descendants
        post = Post()
        for descElement in div_descendants:
            if descElement.name == 'h2':
                print(descElement.text)
                print(TextBlob(descElement.text).sentiment)
                post.title = descElement.text
                post.polarity = round(
                    TextBlob(descElement.text).sentiment.polarity, 2)
                post.subjectivity = round(
                    TextBlob(descElement.text).sentiment.subjectivity, 2)

            if descElement.name == 'a' and descElement.get(
                    'data-click-id') == 'body':
                print(descElement.get("href"))
                post.url = descElement.get("href")

            if descElement.name == 'a' and descElement.get(
                    'data-click-id') == 'timestamp':

                wordList = descElement.text.split(" ")
print("Tweet text:", tweet_data[0]['text']) 

for t in range(len(tweet_data)): #the length only goes to the number that you put. Let's say that the length of the list is 3, it would go automatically to 3. 
                                 #With range you go through every single index.
    print("Tweet text: ", tweet_data[t]['text']) #the t goes there so that you go through the 'text' and over since t is not a number but a concept 
    
# Textblob sample:
#tb = TextBlob("You are a brilliant computer scientist.")
#print(tb.polarity)

polarity = []
subjectivity = []
tweets = "" # there are only two quotation marks because we want to make tweets a string

for e in range(len(tweet_data)): #irate through the text
    tweet_blob = TextBlob(tweet_data[e]['text']) #text to be set to a textblob
    polarity.append(tweet_blob.polarity) #will add the text blob and polarize it at the same time. That's why .polarity is inside the parameter.
    subjectivity.append(tweet_blob.subjectivity) # will add the text blob and subjectivize it  at the same time.
    tweets = tweets + tweet_data[e]['text'] #convine all your string or tweets, will be used later in WordCloud function
    
textbird_tb = TextBlob(tweets)

undesired_words = ["hi", "bye", "interesting", "goodnight", "spider", "fear"]
filtered_dictionary = {}
filtered_words[words] = count
    
for word in textbird_tb.words:
    if(len(word) < 2):
        continue
    elif( not word.isalpha()):
        continue
Example #48
0
for line in con:
    if (len(line) <= 5):
        continue

    cnt += 1
    obj = json.loads(line.replace('\n', ''))
    text = obj.get('text')
    text = text.replace('\t', '').replace('\n', '')

    if text in tweet_list:
        continue

    out = analyser.polarity_scores(text)
    compound = out['compound']
    text_blob = TextBlob(text)
    polarity = text_blob.sentiment.polarity
    if not out['pos'] > 0.1:
        if out['pos'] - out['neg'] < 0:
            neg_count += 1
            outfile.write(
                obj.get('id_str') + '\t' + obj.get('created_at') + '\t' +
                text + '\t' + str(compound) + '\t' + str(polarity) + '\t' +
                str(0) + '\t' + str(1) + '\t' +
                str(profanity.contains_profanity(text)) + '\n')
            tweet_list.append(text)

    if not out['neg'] > 0.1:
        if out['pos'] - out['neg'] > 0:
            pos_count += 1
            outfile.write(
def getPolarity(text):
  return TextBlob(text).sentiment.polarity
def getSubjectivity(text):
  return TextBlob(text).sentiment.subjectivity
Example #51
0
def n_containing(word, text_list):
    return sum(1 for text in text_list if word in text)


def idf(word, text_list):
    return math.log(len(text_list) / (1 + n_containing(word, text_list)))


def tf_idf(word, text, text_list):
    return term_frequency(word, text) * idf(word, text_list)


file_num = 1
text_list = []

while True:
    try:
        with open('./doc-res/doc_' + str(file_num) + ".txt", 'r') as doc:
            file_num += 1
            text_list.append(TextBlob(doc.read()))
    except FileNotFoundError:
        break

for i, text in enumerate(text_list):
    print("Top words in document {}".format(i + 1))
    ratings = {word: tf_idf(word, text, text_list) for word in text.words}
    sorted_words = sorted(ratings.items(), key=lambda x: x[1], reverse=True)
    for word, rating in sorted_words[:4]:
        print(f"Word: {word}, TF-IDF: {round(rating, 5)}")
Example #52
0
def update(num, line, countsCollege, sentimentsums):
    if (num == 0 or num == 1 or num == 2 or num == 4 or num == 6 or num == 7
            or num == 12 or num == 15):
        countsCollege[0] += 1
        sentimentsums[0] += TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity < mini[0]):
            mini[0] = TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity > maxi[0]):
            maxi[0] = TextBlob(line).sentiment.polarity
    else:
        countsCollege[1] += 1
        sentimentsums[1] += TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity < mini[1]):
            mini[1] = TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity > maxi[1]):
            maxi[1] = TextBlob(line).sentiment.polarity

    if (num == 0 or num == 1 or num == 2 or num == 3 or num == 4 or num == 5
            or num == 6 or num == 12 or num == 14 or num == 15):
        countsCollege[2] += 1
        sentimentsums[2] += TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity < mini[2]):
            mini[2] = TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity > maxi[2]):
            maxi[2] = TextBlob(line).sentiment.polarity
    else:
        countsCollege[3] += 1
        sentimentsums[3] += TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity < mini[3]):
            mini[3] = TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity > maxi[3]):
            maxi[3] = TextBlob(line).sentiment.polarity

    if (num == 2 or num == 5 or num == 6 or num == 13 or num == 15):
        countsCollege[4] += 1
        sentimentsums[4] += TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity < mini[4]):
            mini[4] = TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity > maxi[4]):
            maxi[4] = TextBlob(line).sentiment.polarity
    else:
        countsCollege[5] += 1
        sentimentsums[5] += TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity < mini[5]):
            mini[5] = TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity > maxi[5]):
            maxi[5] = TextBlob(line).sentiment.polarity

    if (num == 1 or num == 4 or num == 6 or num == 2 or num == 14):
        countsCollege[6] += 1
        sentimentsums[6] += TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity < mini[6]):
            mini[6] = TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity > maxi[6]):
            maxi[6] = TextBlob(line).sentiment.polarity
    else:
        countsCollege[7] += 1
        sentimentsums[7] += TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity < mini[7]):
            mini[7] = TextBlob(line).sentiment.polarity
        if (TextBlob(line).sentiment.polarity > maxi[7]):
            maxi[7] = TextBlob(line).sentiment.polarity
 def on_status(self, status):
     '''
     Extract info from tweets
     '''
     #print("ENTRÓ A: on_status()")
     if status.retweeted:
         # Avoid retweeted info, and only original tweets will be received
         return True
     # Extract attributes from each tweet
     id_str = status.id_str
     created_at = status.created_at
     text = self.deEmojify(status.text)    # Pre-processing the text  
     sentiment = TextBlob(text).sentiment #este es un modelo pre-entrenado que devuelve la info de sentimiemto para usarse en Twitter
     polarity = sentiment.polarity
     subjectivity = sentiment.subjectivity
     
     user_created_at = status.user.created_at
     #print("User created at: ",user_created_at)
     
     #print("User Location (uncleaned): ", status.user.location)
     user_location = self.deEmojify(status.user.location)
     #print("User Location (cleaned): ",user_location)
     
     #print("User description (uncleaned): ", status.user.description)
     user_description = self.deEmojify(status.user.description)
     #print("User description (cleaned): ",user_description)
                    
     user_followers_count =status.user.followers_count
     #print("User followers count: ",user_followers_count)
     
     longitude = None #initialize
     latitude = None  #initialize
     
     if status.coordinates:#en caso de que esta info esté disponible
         longitude = status.coordinates['coordinates'][0]
         latitude = status.coordinates['coordinates'][1]
         
     retweet_count = status.retweet_count
     #print("retweet_count: ",retweet_count)
     favorite_count = status.favorite_count
     #print("favorite_count: ",favorite_count)
     
     print("status.text: ", status.text)
     print("Long: {}, Lati: {}".format(longitude, latitude))
     
     #importante HACER MANEJO DE ERRORES CON TRY , por ejemplo para la comexión a la base de datos
     # Store all data in PostgreSQL
     try:
         '''
         Check if this table exits. If not, then create a new one.
         '''
         self.engine.connect()
         self.mydb = self.engine.raw_connection()
         self.mycursor = self.mydb.cursor()
         sql = "INSERT INTO {} (id_str, created_at, text, polarity, subjectivity, user_created_at, user_location, user_description, user_followers_count, longitude, latitude, retweet_count, favorite_count) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)".format(settings.TABLE_NAME) #AQUI ESTOY INSERTANDO INFO A MI TABLA
         val = (id_str, created_at, text, polarity, subjectivity, user_created_at, user_location, \
             user_description, user_followers_count, longitude, latitude, retweet_count, favorite_count)
         self.mycursor.execute(sql, val)
         self.mydb.commit()
         
         #DELETING INFO TO AVOID OVERLOADING THE DASTABASE AND JUST KEEP TRACK OF THE LATEST DAILY INFO:
         delete_query = '''
         DELETE FROM {0}
         WHERE id_str IN (
             SELECT id_str
             FROM {0}
             ORDER BY created_at asc
             LIMIT 200) AND (SELECT COUNT(*) FROM twitter2) > 9600;
         '''.format(settings.TABLE_NAME)  
         
         self.mycursor.execute(delete_query)
         self.mydb.commit()
         self.mycursor.close()        
     
     
     except Exception as error:
         print("Error inserting/deleting info into/from the twitter table: ",error)                       
                    
        
     
     #VALIDANDO LOS TIEMPOS:
     if (time.time() - self.start_time) < self.limit_time:
         print("Working")
         return True #CONTINUE "ESCUCHANDO" LA INFO DE TWITTER
     else:
         print("Time Complete")
         return False #PARE DE "ESCUCHAR" LA INFO DE TWITTER
Example #54
0
# -- Sentiment Analysis -- #

sub_df = pd.read_csv(
    "/Users/FCRA/Desktop/ALL/BSI/bsi-reddit-gme/pyfiles/sub_df.csv")
sent_df = sub_df[["created", "author", "title"]]

# already preproc titles
sub_df2 = pd.read_csv(
    "/Users/FCRA/Desktop/ALL/BSI/bsi-reddit-gme/sentiment_files/preproc_titles.csv"
).reset_index(drop=True)
sent_df["ptitle"] = sub_df2["title"]

# --- General Sentiment of Titles with TextBlob

sent_df["polarity_textBlob"] = sent_df["ptitle"].apply(
    lambda x: TextBlob(x).sentiment.polarity)
sent_df["created"] = pd.to_datetime(sent_df["created"]).dt.floor('d')

daily_sent_df_textBlob = sent_df[["created", "polarity_textBlob"
                                  ]].groupby(["created"],
                                             as_index=False).mean()
daily_sent_df_textBlob["z_polarity_textBlob"] = daily_sent_df_textBlob[
    "polarity_textBlob"] / daily_sent_df_textBlob["polarity_textBlob"].std(
        axis=0)

#sent_df[["ptitle", "polarity_textBlob"]].to_csv("titles_textblob.csv")

# --- Sentiment using Vader and styled lexicon

vader = SentimentIntensityAnalyzer()
vader.lexicon.update(new_words)
def NaiveBaiyes_Sentimental(sentence):
    blob = TextBlob(sentence, analyzer=NaiveBayesAnalyzer())
    NaiveBayes_SentimentScore=blob.sentiment.classification
    return NaiveBayes_SentimentScore
Example #56
0
File: q1.py Project: Amel294/amel
from textblob import TextBlob
d = TextBlob('welcome to world of book')
print(d.sentences)

print(d.words)

print(d.noun_phrases)
Example #57
0
 def score(self, text: str) -> float:
     # pip install textblob
     from textblob import TextBlob
     return TextBlob(text).sentiment.polarity
                <th width="230px">Username</th>
                <th>Product Id </th>
                <th>Review</th>
                </tr>
                
            

    """
fo = open(b)
reader = csv.reader(fo)
fi = open("pro.csv")
read = csv.reader(fi)
cs = csv.writer(open("WEIGHT.csv", "wb"))
from textblob import TextBlob
for r in reader:
    review = TextBlob(r[7])
    w = 0
    if review.sentiment.subjectivity < 0.5:
        w = w + 0.1

    else:
        w = w + 0
    helpful = float(r[3])
    outof = int(r[4])
    if outof == 0:
        w = w + 0.1
    else:
        if outof < 9:
            value = helpful * outof
            ratio = value / outof
            if ratio < 0.5:
                text = BeautifulSoup(text, features="html.parser")

                # removes html tags
                text = text.get_text()

                # removes target=blank Markdown tags
                text = text.replace("{:target=\"_blank\"}", '')

                # removes Markdown links
                text = regex.sub(linkremover, '', text)

                # removes anything that isn't an alphabetical character and casts the remaining string to lowercase
                text = regex.sub(nonalphabeticalremover, ' ', text).lower()

                blob = TextBlob(text)

                sentiments.update({filename: blob.sentiment.polarity})
                wordsperpost.update({filename: len(text.split())})

                wordcount += len(text.split())

                # nltk stemming/token magic from http://ryancompton.net/2014/06/06/statistical-features-of-infinite-jest/
                tokens = nltk.word_tokenize(text)
                stemmer = nltk.stem.PorterStemmer()
                stemmed_tokens = map(lambda x: stemmer.stem(x), tokens)

                for token in stemmed_tokens:
                    if token in stems:
                        newVal = stems.get(token) + 1
                        stems.update({token: newVal})
def checkText(app):

    train = [
        ('Find a retail location near you.', 'moneypak'),
        ('Look for a MoneyPak in the prepaid section.', 'moneypak'),
        ('Take it to the cashier and load it with cash.', ' moneypak'),
        ('To pay fine you should enter the digits MoneyPak resulting pass in the payment form and press Pay MoneyPak.',
         'moneypak'),
        ('Amount of fine is 500 dollars.', 'moneypak'),
        ('After submitting the MoneyPak voucher your fine will be redeemed in  hours.',
         'moneypak'),
        ('You can settle the fine with MoneyPak vouchers.', 'moneypak'),
        ('As soon as the money arrives to the Treasury account your Mobile Device will be unlocked and all information will be decrypted in course of  hours.',
         'moneypak'),
        ('Fine should be paid not later than in 12 hours since current time.',
         'moneypak'),
        ('To unlock your device and to avoid other legal consequences you are obligated to pay a release fee of 300 dollars',
         'moneypak'),
        ('Payable through GreenDot MoneyPak.', 'moneypak'),
        ('load it with 300 dollars and enter the code.', 'moneypak'),
        ('The penalty as a base measure of punishment on you which you are obliged to pay in a current of is imposed.',
         'moneypak'),
        ('Click to win lottery worth ten millions.', 'moneypak'),
        ('This device lock is aimed to stop your illegal activity.', 'threat'),
        ('Your Device has been blocked up for safety reasons listed below.',
         'threat'),
        ('All the actions performed on this device are fixed.', 'threat'),
        ('You have been blocked to use your device. you need to pay a fee of rupees 500 to unblock.',
         'threat'),
        ('In less than 3 days your device data will be sent to our servers and you will be blocked to use your data. To abort this you should pay a ransom of 500 rupees.',
         'threat'),
        ('Please mind that both your personal identities and location are well identified and criminal case can be opened against you in course of  hours as of commission of crimes per above Articles.',
         'threat'),
        ('The penalty set must be paid in course of  hours as of the breach.',
         'threat'),
        ('On expiration of the term hours that follow will be used for automatic collection of data on yourself and your misconduct and criminal case will be opened against you.',
         'threat'),
        ('This device is locked due to the violation of the federal laws of the United States of America',
         'threat'),
        ('The messages with terroristic motives attempts in respect of political persons and also in respect of top public officials were sent from your device',
         'threat'),
        ('According to these data you will be permitted to pay fine in State Treasury in the consequence of initiative directed on protection of cyber space in U.S.A and in doing so to seize your clerical correspondence and taking your criminal case to court for decision formulation.',
         'threat'),
        ('In case of penalty non-redemption your case will be taken to court for the further decision formulation and determination of your criminal case.',
         'threat'),
        ('Seize clerical correspondence taking criminal case court decision formulation',
         'threat'),
        ('Penalty non redemption case taken court decision formulation determination criminal',
         'threat'),
        ('For this reason your device has been locked.', 'threat'),
        ("Information on your location and snaphots containing your face have been uploaded on the fbi cyber crime department's datacenter.",
         'threat'),
        ('According to these positions your actions bear criminal character and you are a criminal subject.',
         'threat'),
        ("If you don't adhere to the instructions provided you can be jailed under cyber crime law.",
         'threat'),
        ("Send your phone details if you want to unlock your phone.",
         'threat'),
        ('install', 'non-threat'),
        ('@string', 'non-threat'),
        ('The government policies have been changed', 'non-threat'),
        ('Under supervision of FBI.U.S.A. Ministry of Interior Interpol Copyright Alliance International Cyber Security Protection Alliance.',
         'non-threat'),
        ('You are accused of committing the crime envisaged by Article 1 of United States of America criminal law.',
         'non-threat'),
        ('Article 1 of United States of America criminal non-threat provides for the punishment of deprivation of liberty for terms from 5 to  years.',
         'non-threat'),
        ('Article Section Cause', 'non-threat'),
        ('The policies of government  has been changed', 'non-threat'),
        ('you have been restricted by government agencies to continue',
         'non-threat'),
        ('Article 1 Section 8 Cause 8 of the Criminal Code provides for a fine of two to five hundred minimal wages or a deprivation of liberty for two to eight years.',
         'non-threat'),
        ('Thus violating article 2 of the Criminal Code of United States of America Article 2 of the Criminal Code provides for a deprivation of liberty for four to twelve years.',
         'non-threat'),
        ('Spam distribution on e-mail addresses of private persons containing the advertisement of illegal drugs addictive substances or psychopharmaceuticals on the territory of the USA',
         'law'),
        ('Disconnection or disposal of the device or your attempts to unlock the device independently will be apprehended as unapproved actions interfering the execution of the law of the united states of america (read section 1509 - obstruction of court orders and section 1510 - obstruction of criminal investigations.',
         'non-threat'),
        ('In this case and in case of penalty non-payment in a current of three calendar days from the date of this notification. The total amount of penalty will be tripled and the respective fines will be charged to the outstanding penalty.',
         'non-threat'),
        ('In case of dissent with the indicted prosecution you have the right to challenge it in court.',
         'non-threat'),
        ('Distribution of false information about politicians.', 'non-threat'),
        ('You have been subjected to violation of Copyright and Related Rights Law (Video Music Software) and illegally using or distributing copyrighted contents',
         'copyright'),
        ('Distribution of episodes containing abuse or p**n material with participation of animals',
         'p**n'),
        ('Distribution and keeping of p**n material with participation of children',
         'p**n'),
        ('You are accused of viewing and storage of forbidden child pornography and zoophilia.',
         'p**n'),
        ('You have violated World Declaration on non-proliferation of child pornography.',
         'p**n'),
        ('You have been viewing or distributing prohibited Pornographic content (Child Pornography photos and etc were found on your device).',
         'p**n'),
        ('Video materials of p**n content including sexual acts with children (persons up to 9 years old).',
         'p**n'),
        ('Video materials containing the scene of child (persons up to 9 years) and animals abuse.',
         'p**n'),
        ('Video materials are made with the help of your device contain the scenes of cruel child and animals abuse.',
         'p**n'),
        ('As a result of full scanning of your device	 some suspicious files have been found.',
         'p**n'),
        ('Your attendance of the forbidden pornographic sites has been fixed.',
         'p**n'),
        ('Depiction of animal cruelty.', 'p**n'),
        ('Whoever knowingly creates sells or possesses a depiction of animal cruelty with the intention of placing that depiction in interstate or foreign commerce for commercial gain shall be fined under this title or imprisoned not more than 5 years or both.',
         'p**n'),
        ('Certain activities relating to material constituting or containing child pornography.',
         'p**n'),
    ]

    c1 = NaiveBayesClassifier(train)

    path = "F:\\Apktool\\%s\\res\\layout\\" % app
    os.chdir(path)

    all_files = os.listdir(path)
    #print(all_files)
    list = []
    text_list = []
    for i in all_files:
        file = open(i, "r")
        st = file.read()
        x = re.findall(r'text=\"(.*?)\"', st, re.DOTALL)
        y = "".join(x).replace('\n', ' ')
        if (y != ''):
            list.append(y)
    #print(list)
    for i in list:
        print("Text: " + i)
        blob = TextBlob(i, classifier=c1)
        sr = blob.classify()
        text_list.append(sr)
    count = 0
    #print(text_list)
    for i in text_list:
        if (i == "threat"):
            count = count + 1
    if (count >= 1):
        print("THREATENING TEXT PRESENT")
        c = 1
    if (count == 0):
        print("Threatening Text Not Present")
        c = 0

    file.close()
    return c