Esempio n. 1
0
    def initialize_document(self, doc, docs_list_mode=False):
        if not docs_list_mode:
            self.doc = doc.lower()

            self.blob = TextBlob(text=self.doc, tokenizer=self.tokenizer)
            self.tokens = copy.deepcopy(self.blob.tokens)

            self.bigrams = self.bigramify(self.blob.tokens)
            self.tokens.extend(self.bigrams)

            self.trigrams = self.trigramify(self.blob.tokens)
            self.tokens.extend(self.trigrams)
        else:
            doc = doc.lower()

            blob = TextBlob(text=doc, tokenizer=self.tokenizer)
            tokens = copy.deepcopy(blob.tokens)

            bigram = self.bigramify(tokens=tokens)
            tokens.extend(bigram)

            trigram = self.trigramify(tokens=tokens)
            tokens.extend(trigram)

            return tokens
Esempio n. 2
0
def get_half_contrast(feat, text):
    '''
    0 = no difference in polarity
    1 = high difference in polarity
    '''
    first_half_polarity = 0.0
    second_half_polarity = 0.0
    tokens = nltk.word_tokenize(text, language='english', preserve_line=False)
    if len(tokens) == 1:
        feat['half_contrast'] = 0.0
    else:
        first_half = tokens[0:int(len(tokens) / 2)]
        second_half = tokens[int(len(tokens) / 2):]

        try:
            blob = TextBlob("".join([
                " " + i if i not in string.punctuation else i
                for i in first_half
            ]).strip())
            first_half_polarity = blob.sentiment.polarity
        except:
            first_half_polarity = 0.0

        try:
            blob = TextBlob("".join([
                " " + i if i not in string.punctuation else i
                for i in second_half
            ]).strip())
            second_half_polarity = blob.sentiment.polarity
        except:
            second_half_polarity = 0.0

        feat['half_contrast'] = np.abs(first_half_polarity -
                                       second_half_polarity) / 2
def get_third_contrast(feat, text):
    '''
        0 = no difference in polarity
        1 = high difference in polarity
        '''
    first_half_polarity = 0.0
    second_half_polarity = 0.0
    third_half_polarity = 0.0
    if type(text) is str:
        tokens = nltk.word_tokenize(text,
                                    language='english',
                                    preserve_line=False)
        if len(tokens) < 2:
            feat['third_contrast_12'] = 0.0
            feat['third_contrast_13'] = 0.0
            feat['third_contrast_23'] = 0.0
        elif len(tokens) == 2:
            try:
                blob = TextBlob(tokens[0])
                first_half_polarity = blob.sentiment.polarity
            except:
                first_half_polarity = 0.0
            try:
                blob = TextBlob(tokens[1])
                second_half_polarity = blob.sentiment.polarity
            except:
                second_half_polarity = 0.0
            feat['third_contrast_13'] = 0.0
            feat['third_contrast_23'] = 0.0
            feat['third_contrast_12'] = np.abs(first_half_polarity -
                                               second_half_polarity) / 2
        else:
            first_half = tokens[0:int(len(tokens) / 3)]
            second_half = tokens[int(len(tokens) / 3):2 * int(len(tokens) / 3)]
            third_half = tokens[2 * int(len(tokens) / 3):]
            try:
                blob = TextBlob(first_half)
                first_half_polarity = blob.sentiment.polarity
            except:
                first_half_polarity = 0.0
            try:
                blob = TextBlob(second_half)
                second_half_polarity = blob.sentiment.polarity
            except:
                second_half_polarity = 0.0
            try:
                blob = TextBlob(third_half)
                third_half_polarity = blob.sentiment.polarity
            except:
                third_half_polarity = 0.0
            feat['third_contrast_12'] = np.abs(first_half_polarity -
                                               second_half_polarity) / 2
            feat['third_contrast_13'] = np.abs(first_half_polarity -
                                               third_half_polarity) / 2
            feat['third_contrast_23'] = np.abs(second_half_polarity -
                                               third_half_polarity) / 2
    else:
        feat['third_contrast_12'] = 0
        feat['third_contrast_13'] = 0
        feat['third_contrast_23'] = 0
Esempio n. 4
0
def flat_doc(document, model, extremes=None):
	flat_doc = ""
	for field in document:
		if not isinstance(document[field], list): continue #No tomamos en cuenta los campos 'id' y '_version_': auto-generados por Solr
		for value in document[field]:
			## Detección y traducción ##
			if field=='author.authors.authorName' or field=='author.authorBio' or field=='description' or field=='quotes.quoteText':
				value_blob = TextBlob(value)
				try:
					if value_blob.detect_language() != 'en':
						try: 
							value = value_blob.translate(to='en')
						except Exception as e: 
							value = value #e = NotTranslated('Translation API returned the input string unchanged.',)
				except Exception as e:
					value = value #e = TranslatorError('Must provide a string with at least 3 characters.')
			############################
			flat_doc += str(value)+' ' #Se aplana el documento en un solo string
	flat_doc = preprocess_string(flat_doc, CUSTOM_FILTERS) #Preprocesa el string
	flat_doc = [w for w in flat_doc if w not in stop_words] #Remueve stop words
	if extremes:
		flat_doc = [w for w in flat_doc if w not in extremes]
	flat_doc = [w for w in flat_doc if w in model.vocab] #Deja sólo palabras del vocabulario
	if flat_doc == []:
		flat_doc = ['book'] #Si el libro queda vacío, agregarle un token para no tener problemas más adelante
	return flat_doc
Esempio n. 5
0
def file_translator(file, language):
    with open(file, 'r') as fh:
        for f in fh:
            sentense = TextBlob(f)
            if language == "english":
                print sentense.translate(to="en")
            else:
                print sentense.translate(to="es")
    return
Esempio n. 6
0
def word_translator(words):
    b = TextBlob(words)
    if b.detect_language() == "en":
        print "The word " + words + " is in english and means",\
                b.translate(to="es") 
    elif b.detect_language() == "es":
        print "La palabra " + words +\
                " esta en espanol y en ingles significa", b.translate(to="en")
    return
Esempio n. 7
0
 def get_translate(self, text):
     text = text.replace("text:", "")
     blob = TextBlob(text)
     lan = blob.detect_language()
     if lan != 'en':
         sentp = blob.translate(to="en")
     else:
         sentp = blob.translate(to="fa")
     sent = self.sender.sendMessage(str(sentp))
     self._editor = telepot.helper.Editor(self.bot, sent)
     self._edit_msg_ident = telepot.message_identifier(sent)
Esempio n. 8
0
def get_average_contrast(feat, text):
    '''
        0 = no contrast
        1 = high contrast
        '''
    negCount = 0
    posCount = 0
    negTotal = 0.0
    posTotal = 0.0
    polarityTemp = 0.0
    polarityDif = 0.0
    if type(text) is str:
        try:
            blob = TextBlob(text.strip())
            for sentence in blob.sentences:
                polarityTemp = sentence.sentiment.polarity
                if polarityTemp < 0:
                    negTotal += polarityTemp
                    negCount += 1
                elif polarityTemp > 0:
                    posTotal += polarityTemp
                    posCount += 1
            if negCount > 0:
                if posCount > 0:
                    polarityDif = ((posTotal / posCount) -
                                   (negTotal / negCount)) / 2
            if polarityDif < 0.5:
                feat['average_contrast'] = False
            else:
                feat['average_contrast'] = True
        except:
            feat['average_contrast'] = False
    else:
        feat['average_contrast'] = False
Esempio n. 9
0
def del_a_comment(insta_username):
    media_id = get_post_id(insta_username)
    url = BASE_URL + "/media/media-id/comments?access_token={}".format(
        ACCESS_TOKEN)
    print(url)
    comment_info = requests.get(url).json()

    if comment_info['meta']['code'] == 200:
        if len(comment_info['data']):
            #Here's a naive implementation of how to delete the negative comments :
            for x in range(0, len(comment_info["data"])):
                comment_id = comment_info['data'][x]['id']
                comment_text = comment_info['data'][x]['text']
                blob = TextBlob(comment_text, analyzer=NaiveBayesAnalyzer())
                if (blob.sentiment.p_neg > blob.sentiment.p_pos):
                    print('Negative comment : {}').format(comment_text)

                    delete_url = BASE_URL + "/media/{}/comments/{}/?access_token={}".format(
                        media_id, comment_id, ACCESS_TOKEN)
                    print('DELETE request url : %s').format(delete_url)
                    delete_info = requests.delete(delete_url).json()

                    if delete_info['meta']['code'] == 200:
                        print('Comment successfully deleted!\n')
                    else:
                        print('Unable to delete comment!')
                else:
                    print('Positive comment : %s\n').format(comment_text)
        else:
            print('There are no existing comments on the post!')
    else:
        print('Status code other than 200 received!')
def func_5(filename):
    length = []  #length list
    polarities = []  #polarity list
    with open(filename, 'r') as f:  #open file
        for line in f.readlines():  #read file line by line
            text = line.split('\t')[
                -1]  #split line with tab,the last one in splited list is text
            length.append(len(
                text))  #calculate length of text and append it to length list
            polarities.append(
                TextBlob(text).sentiment.polarity
            )  #get text polarity and append it to polarities list
    plt.scatter(length, polarities,
                c='g')  #scatterplot length and polarities,color is green
    plt.title('sentiment&length')  #title
    plt.xlabel('length')  #xlabel
    plt.ylabel('sentiment')  #ylabel
    length = np.array(length)  #transpose list to array
    polarities = np.array(polarities)
    pearson = np.corrcoef(length, polarities)  #calculate pearson's correlation
    print 'question 5:'
    print 'Correlation between length/polarity: r = {}, p = {}'.format(
        pearson[0], pearson[1])  #format print
    plt.savefig('sentiment&length.png')
    plt.show()
Esempio n. 11
0
def get_extreme_contrast(feat, text):
    '''
        0 = no contrast
        1 = high contrast
        '''
    minPolarity = 0.0
    maxPolarity = 0.0
    polarityTemp = 0.0
    if type(text) is str:
        try:
            blob = TextBlob(text.strip())
            for sentence in blob.sentences:
                polarityTemp = sentence.sentiment.polarity
                if polarityTemp > maxPolarity:
                    maxPolarity = polarityTemp
                elif polarityTemp < minPolarity:
                    minPolarity = polarityTemp
            score = (maxPolarity - minPolarity) / 2
            if score < 0.5:
                feat['extreme_contrast'] = False
            else:
                feat['extreme_contrast'] = True
        except:
            feat['extreme_contrast'] = False
    else:
        feat['extreme_contrast'] = False
def get_sentiment_for_text(text):
    '''
        Returns the sentiment score for the given text.
    '''
    text_analyzer = TextBlob(text, analyzer=NaiveBayesAnalyzer())
    sentiment = text_analyzer.sentiment
    return (sentiment.classification, sentiment.p_pos, sentiment.p_neg)
Esempio n. 13
0
    def find_concepts(self, paragraph: str) -> dict:
        blob = TextBlob(paragraph)
        sentences = list(blob.sentences)
        to_return = dict()
        for sentence in sentences:
            medscan_markup = self.markup_sentence(str(sentence))
            range2dict = dict()  #{id_range:{id:obj_name}}
            markup_pos = medscan_markup.find('ID{')
            while markup_pos > 0:
                markup_start = markup_pos + 3
                id_end = medscan_markup.find('=', markup_start)
                msids = list(medscan_markup[markup_start:id_end].split(','))
                id_range = (int(msids[0]) // 1000000) * 1000000
                first_msid = 0 if len(msids) == 1 else 1
                markup_end = medscan_markup.find('}', markup_start + 5)
                if markup_end < 0: break  #hack for broken markup
                for i in range(first_msid, len(msids)):
                    msid = msids[i]
                    try:
                        obj_name = self.objnames[msid]
                    except KeyError:
                        obj_name = medscan_markup[id_end + 1:markup_end]
                        print(
                            '"%s" with MedScan ID %s doesn\'t have object name'
                            % (obj_name, msid))
                    try:
                        range2dict[id_range][msid] = obj_name
                    except KeyError:
                        range2dict[id_range] = {msid: obj_name}

                markup_pos = medscan_markup.find('ID{', markup_end + 1)

            to_return[medscan_markup] = range2dict

        return to_return  # {medscan_markup:{id_range:{id:obj_name}}}, str
Esempio n. 14
0
def handle_message_event(event):
    print(event)
    text = event.message.text
    source = event.source
    id = ''
    if isinstance(source, SourceUser):
        id = source.user_id
    elif isinstance(source, SourceGroup):
        id = source.group_id
    set_send_id(id)
    blob = TextBlob(text)
    if '狀態' in text:
        text = text.replace('狀態', '')
        if text == '':
            line_bot_api.reply_message(
                event.reply_token, TextSendMessage(text=get_all_messages()))
        else:
            name = text
            line_bot_api.reply_message(event.reply_token,
                                       TextSendMessage(text=get_message(name)))
    elif '報告' in text:
        matches = re.search('(.*)報告(\d*)', text)
        if matches.group(1) == '' and matches.group(2) == '':
            line_bot_api.reply_message(event.reply_token,
                                       TextSendMessage(text=get_all_reports()))
        elif matches.group(2) == '':
            line_bot_api.reply_message(
                event.reply_token,
                TextSendMessage(text=get_report(matches.group(1))))
        else:
            line_bot_api.reply_message(
                event.reply_token,
                TextSendMessage(text=get_report_url(matches.group(1),
                                                    int(matches.group(2)))))
    elif '敬禮' in text:
        line_bot_api.reply_message(event.reply_token,
                                   TextSendMessage(text='敬禮'))
    elif '安安' in text:
        line_bot_api.reply_message(event.reply_token,
                                   TextSendMessage(text='安'))
    elif '0.0' in text:
        line_bot_api.reply_message(event.reply_token,
                                   TextSendMessage(text='0.0'))
    elif blob.detect_language() == 'ru':
        line_bot_api.reply_message(
            event.reply_token,
            TextSendMessage(text=str(blob.translate(to='zh-TW'))))
 def __get_blob(self, text):
     """
         Translate text with current user locale
         @param text as str
     """
     try:
         locales = GLib.get_language_names()
         user_code = locales[0].split(".")[0]
         try:
             from textblob.blob import TextBlob
         except:
             return _("You need to install python3-textblob module")
         blob = TextBlob(text)
         return str(blob.translate(to=user_code))
     except Exception as e:
         Logger.error("LyricsView::__get_blob(): %s", e)
         return _("Can't translate this lyrics")
 def test_tag_textblob(self):
     trained_tagger = PerceptronTagger()
     blob = TextBlob(self.text, pos_tagger=trained_tagger)
     # Punctuation is excluded
     assert_equal([w for w, t in blob.tags], [
         'Simple', 'is', 'better', 'than', 'complex', 'Complex', 'is',
         'better', 'than', 'complicated'
     ])
Esempio n. 17
0
def get_subjectivity_score(feat, text):
    '''
    0 = very objective
    1 = very subjective
    '''
    try:
        blob = TextBlob(text.strip())
        feat['subjectivity'] = blob.sentiment.subjectivity
    except:
        feat['subjectivity'] = 0.0
Esempio n. 18
0
def get_polarity_score(feat, text):
    '''
    -1 = very negative
    1 = very positive
    '''
    try:
        blob = TextBlob(text.strip())
        feat['polarity'] = blob.sentiment.polarity
    except:
        feat['polarity'] = 0.0
Esempio n. 19
0
def import_text(text, title=None):
	"""
	Import a text.

	"""
	blob = TextBlob(text)
	document = Document()
	if title is None:
		first_sentence = blob.sentences[0]
		if len(first_sentence) > 50:
			chunk = first_sentence[0:47]
			last_space = chunk.rfind(" ")
			title = "{0}...".format(first_sentence[0:last_space])
	document.title = title
	document.text = text
	document.language = blob.detect_language()
	sentiment = blob.sentiment
	document.polarity = sentiment[0]
	document.subjectivity = sentiment[1]
	document.is_tagged = False
	document.save()
	return document
def get_subjectivity_score(feat, text):
    '''
        0 = very objective
        1 = very subjective
        '''
    if type(text) is str:
        try:
            blob = TextBlob(text.strip())
            score = int(blob.sentiment.subjectivity * 100)
            feat['subjectivity'] = score
        except:
            feat['subjectivity'] = 0
    else:
        feat['subjectivity'] = 0
Esempio n. 21
0
    def freq(self, word, docs=None):
        if docs is None:
            return self.tokens.count(word)
        else:
            if not isinstance(docs, str):
                d = ""
                for item in docs:
                    d = "%s %s" % (d, item)
                docs = d

            blob = TextBlob(text=docs, tokenizer=self.tokenizer)
            blob.tokens.extend(self.bigramify(blob))
            blob.tokens.extend(self.trigramify(blob))
            return blob.tokens.count(word)
def get_polarity_score(feat, text):
    '''
        -1 = very negative
        1 = very positive
        '''
    if type(text) is str:
        try:
            blob = TextBlob(text.strip())
            score = int(np.abs(blob.sentiment.polarity) * 100)
            feat['polarity'] = score
        except:
            feat['polarity'] = 0
    else:
        feat['polarity'] = 0
Esempio n. 23
0
def main():
    # Get our data as an array: [title, author, date, content] from read_in()
    lines = read_in()
    title = lines[0]
    author = lines[1]
    date = lines[2]
    chinese_blob = TextBlob(lines[3])
    en_content = chinese_blob.translate(from_lang="zh-CN", to='en')
    info("Translated texts: " + str(en_content)) # print translated result to web console.

    # Combine translated result with ada-content-en.csv to produce new csv.
    # Make a call to localhost:5000/update with data: [(id),title,author,date,content], "id" field will be automatically generated by reviewing csv file.
    # please note that in dev environment, 8080 is node app port, while 5000 is python flask app port.
    r = requests.get("http://localhost:5000/update", headers={'X-API-TOKEN': 'FOOBAR1'}, data={'title': title, 'author': author, 'date': date, 'content': en_content})
    info("INFO: " + r.text)

    # training updated backup.csv.
    r_train = requests.get("http://localhost:5000/train", headers={'X-API-TOKEN': 'FOOBAR1'}, data={'data-url': 'backup.csv'})
    info("INFO: " + r_train.text)

    # predicting updated backup.csv.
    r_predict = requests.post("http://localhost:5000/predict", headers={'X-API-TOKEN': 'FOOBAR1'}, data={'item': '-1', 'num': 2, 'data-url': 'backup.csv'})
    info("INFO: " + r_predict.text)
Esempio n. 24
0
def file_translator(file, language):
    with open(file, 'r') as fh:
        for f in fh:
            sentense = TextBlob(f)
            if language == "english":
                print sentense.translate(to="en")
            else:
                print sentense.translate(to="es")
    return
Esempio n. 25
0
def word_translator(words):
    b = TextBlob(words)
    if b.detect_language() == "en":
        print "The word " + words + " is in english and means",\
                b.translate(to="es")
    elif b.detect_language() == "es":
        print "La palabra " + words +\
                " esta en espanol y en ingles significa", b.translate(to="en")
    return
Esempio n. 26
0
def removeStopWords(definition):
    #replacing special signs
    definition = (" ".join(re.findall(r"[A-Za-z0-9]*",
                                      definition))).replace("  ", " ")
    blob = TextBlob(definition)
    #REMOVING STOP WORDS

    filtered_sentence = []  #words that are in the definitions
    word_cluster = []  #saving the word cluster for every word
    #check every word and identify word cluster
    for word, pos in blob.tags:

        #check if word in stop words
        if word not in stop_words:
            #if not add stop word, add word and cluster to arrays
            filtered_sentence.append(word)
            word_cluster.append(pos)
    return (filtered_sentence, word_cluster)
Esempio n. 27
0
def get_extreme_contrast(feat, text):
    '''
    0 = no contrast
    1 = high contrast
    '''
    minPolarity = 0.0
    maxPolarity = 0.0
    polarityTemp = 0.0
    try:
        blob = TextBlob(text.strip())
        for sentence in blob.sentences:
            polarityTemp = sentence.sentiment.polarity
            if polarityTemp > maxPolarity:
                maxPolarity = polarityTemp
            elif polarityTemp < minPolarity:
                minPolarity = polarityTemp
        feat['extreme_contrast'] = (maxPolarity - minPolarity) / 2
    except:
        feat['extreme_contrast'] = 0.0
Esempio n. 28
0
 def __get_features_from_tweet_text(self, tweet_text):
     """This function returns the following features from the tweet text:
     - Adjectives and their corresponding frequencies found in the tweet. Each adjective is a separate feature.
     - Subjectivity and polarity as determined by TextBlob.
     :returns:  (key,value) map of all features found. 
     """
     text_blob = TextBlob(tweet_text,
                          np_extractor=self.np_extractor,
                          pos_tagger=self.pos_tagger)
     adjective_map = dict(
         Counter((ele[0] for ele in set(text_blob.pos_tags)
                  if ele[1] == self.ADJECTIVE)))
     polarity = text_blob.sentiment[0]
     subjectivity = text_blob.sentiment[1]
     return dict(
         adjective_map.items() + {
             self.POLARITY_FEATURE_KEY: polarity,
             self.SUBJECTIVITY_FEATURE_KEY: subjectivity
         }.items())
Esempio n. 29
0
def get_polarity_score(feat, text):
    '''
        -1 = very negative
        1 = very positive
        '''
    if type(text) is str:
        try:
            blob = TextBlob(text.strip())
            score = blob.sentiment.polarity
            if score is 0:
                feat['positivity'] = False
                feat['negativity'] = False
            elif score < 0:
                feat['positivity'] = False
                feat['negativity'] = True
            else:
                feat['positivity'] = True
                feat['negativity'] = False
        except:
            feat['positivity'] = False
            feat['negativity'] = False
    else:
        feat['positivity'] = False
        feat['negativity'] = False
Esempio n. 30
0
def get_subjectivity_score(feat, text):
    '''
        0 = very objective
        1 = very subjective
        '''
    if type(text) is str:
        try:
            blob = TextBlob(text.strip())
            score = blob.sentiment.subjectivity
            if score is 0.5:
                feat['subjectivity'] = False
                feat['objectivity'] = False
            elif score < 0.5:
                feat['subjectivity'] = False
                feat['objectivity'] = True
            else:
                feat['subjectivity'] = True
                feat['objectivity'] = False
        except:
            feat['subjectivity'] = False
            feat['objectivity'] = False
    else:
        feat['subjectivity'] = False
        feat['objectivity'] = False
Esempio n. 31
0
 def translate(p):
     try:
         return str(TextBlob(p).translate(from_lang='pl'))
     except (NotTranslated, TranslatorError) as e:
         return 'Translation error'
Esempio n. 32
0
	    for row in reader:
	    	combo = url2content(row['url'])
	    	writer.writerow({'id': row['id'], 'title': combo['title'], 'author': combo['author'], 'date': combo['date'], 'url': row['url'], 'content': combo['combined_string']})
	    	print 'Processing scraper NO.' + str(row['id'])


### Connect with ada-content.csv to translate content to english version.	
with open('ada-content-en.csv', 'w') as target:
    fieldnames = ['id', 'title', 'author', 'date', 'url', 'content']
    writer = csv.DictWriter(target, fieldnames=fieldnames)
    writer.writeheader()

    with open('ada-content.csv') as source:
	    reader = csv.DictReader(source.read().splitlines())
	    for row in reader:
	    	chinese_blob = TextBlob(row['content'].decode('utf-8'))
	    	en_content = chinese_blob.translate(from_lang="zh-CN", to='en')
	    	writer.writerow({'id': row['id'], 'title': row['title'], 'author': row['author'], 'date': row['date'], 'url': row['url'], 'content': en_content})
	    	print 'Processing translator NO. ' + str(row['id'])











Esempio n. 33
0
from textblob.blob import TextBlob
blob = TextBlob('Уровень')
print(str(blob.translate(to='zh-TW')))
Esempio n. 34
0
def sentimentTB(sentence):
    from textblob.blob import TextBlob
    sentBlob = TextBlob(sentence)
    sentiment = sentBlob.sentiment.polarity
    return sentiment