def matchRhyme(word1,word2):
    #str1 = "tekst"
    #word1+="टेक्स्ट"
    str1 = ""
    str2 = ""

    word1+= "टेक्स्ट"
    word2+= "टेक्स्ट"

    str1 += " " + word1
    str2 += " " + word2

    hindi_blob1 = TextBlob(str1)
    hindi_blob2 = TextBlob(str2)

    transliteratedtxt1 = hindi_blob1.translate(from_lang="hi", to='en')
    transliteratedtxt1=transliteratedtxt1.substring[:-5]
    transliteratedtxt2 = hindi_blob2.translate(from_lang="hi", to='en')
    transliteratedtxt2= transliteratedtxt2.substring[:-5]

    word1Index= len(transliteratedtxt1)
    word2Index= len(transliteratedtxt2)
    ##Matcing last charater if they are same!!
    if (transliteratedtxt1[word1Index-1] == transliteratedtxt2[word2Index-1]):

        #rhymeMeter=3;
        ##Matching if second Last character is any of the Matras!!
        if ( ((transliteratedtxt1[word1Index-2]=='a') and (transliteratedtxt2[word2Index-2]=='a')) or ((transliteratedtxt1[word1Index-2]=='e') and (transliteratedtxt2[word2Index-2]=='e'))or ((transliteratedtxt1[word1Index-2]=='o') and (transliteratedtxt2[word2Index-2]=='o')) or ((transliteratedtxt1[word1Index-2]=='i') and (transliteratedtxt2[word2Index-2]=='i')) or ((transliteratedtxt1[word1Index-2]=='u') and (transliteratedtxt2[word2Index-2]=='u')) ):
            rhymeMeter=5
        else:
            if(transliteratedtxt1[word1Index-2]!=transliteratedtxt1[word1Index-2]):
                rhymeMeter=4
    return rhymeMeter
    def scrape(self,links=[],ads=True,translator=False):
        responses = []
        values = {}
        data = []
        
        if ads:
            for link in links:
                r = requests.get(link)
                responses.append(r)
        else:
            for link in links:
                r = requests.get(link)
                text = unidecode(r.text)
                html = lxml.html.fromstring(text)

                links = html.xpath("//div[@class='cat']/a/@href")
                for link in links:
                    if len(self.base_urls) > 1 or len(self.base_urls[0]) > 3:
                        time.sleep(random.randint(5,27))
                    try:
                        responses.append(requests.get(link))
                        print link
                    except requests.exceptions.ConnectionError:
                        print "hitting connection error"
                        continue

        for r in responses:
            text = r.text
            html = lxml.html.fromstring(text)
            values["title"] = html.xpath("//div[@id='postingTitle']/a/h1")[0].text_content()
            values["link"] = unidecode(r.url)
            values["new_keywords"] = []
            try:
                values["images"] = html.xpath("//img/@src")
            except IndexError:
                values["images"] = "weird index error"
            pre_decode_text = html.xpath("//div[@class='postingBody']")[0].text_content().replace("\n","").replace("\r","")  
            values["text_body"] = pre_decode_text 
            try:
                values["posted_at"] = html.xpath("//div[class='adInfo']")[0].text_content().replace("\n"," ").replace("\r","")
            except IndexError:
                values["posted_at"] = "not given"
            values["scraped_at"] = str(datetime.datetime.now())
            body_blob = TextBlob(values["text_body"])
            title_blob = TextBlob(values["title"])
            values["language"] = body_blob.detect_language() #requires the internet - makes use of google translate api
            values["polarity"] = body_blob.polarity
            values["subjectivity"] = body_blob.sentiment[1]
            if values["language"] != "en" and not translator:
                values["translated_body"] = body_blob.translate(from_lang="es")
                values["translated_title"] = title_blob.translate(from_lang="es")
            else:
                values["translated_body"] = "none"
                values["translated_title"] = "none"
            text_body = values["text_body"]
            title = values["title"]
            values["phone_numbers"] = self.phone_number_parse(values)
            data.append(values)
        
        return data
def sentiment():
    doob = "Great Movie!"    
    blob = TextBlob(doob)

    for sentence in blob.sentences:
        print(sentence.sentiment)
        
    print blob.translate(to="cn") 
def to_english(message, original_language=None):

	blob = TextBlob(text)

	if original_language is not None:
		return blob.translate(from_lang=original_language, to="en")
	else:
		return blob.translate(to="en")
def getSentimentScore (text):
    blob = TextBlob(text)
    blob.tags           
    blob.noun_phrases
    try:
        blob.translate(to="en")
        print blob
    except:
        print"No translation needed for English text"
    sentimentScore = blob.sentiment.polarity
    return sentimentScore
Exemple #6
0
    def keywordResultsCount(self, inputs):
        self.inputs = inputs['general']
        params = self.mapParameters(inputs)

        print 'ushmm'
        print params

        try:

         if 'German'in params:
            g = params['Solr__Query']
            z1 = str(g)
            blob = TextBlob(z1)
            params['Solr__Query']=str(blob.translate(to="de"))
            print params
            self.result_search_term = params['Solr__Query']
            print self.result_search_term




         if 'French' in params:
            g = params['Solr__Query']
            z1 = str(g)
            blob = TextBlob(z1)
            params['Solr__Query']=str(blob.translate(to="fr"))
            print params
            self.result_search_term = params['Solr__Query']
            print self.result_search_term
        except:
            pass
        '''
        query = inputs['general'].split(' ')
        x=len(query)
        '''
        url = "http://www.errproject.org/jeudepaume/card_advanced_search.php"
        r = requests.get(url, params=params)
        html = r.text
        soup = BeautifulSoup(html, "lxml")

        results = soup.find_all("tr", class_="results")
        count = results.__len__()
        '''
        if (x>1):
            self.result_search_term = query[1]
        else:
            self.result_search_term = query[0]
        '''
        #self.result_search_term = 'none'
        self.results_url = r.url
        self.results_count = count

        return self
Exemple #7
0
def split_sentence_based_on_verbs(reviewText):
    review_spacy = nlp(reviewText)
    review_textblob = TextBlob(reviewText)
    if not review_textblob.detect_language() == 'en':
        review_textblob = review_textblob.translate(to='en')
        review_spacy = nlp(review_textblob.string)
    else:
        contains_romanian_words = 0

        for word in review_textblob.words:
            word_textblob = TextBlob(word)
            if len(word_textblob.string) >= 3 and word_textblob.detect_language() == 'ro':
                contains_romanian_words = 1
                break

        if contains_romanian_words == 1:
            new_reviewText = ''
            for word in review_spacy:
                word_textblob = TextBlob(word.orth_)
                if not word.is_title and len(word_textblob.string) >= 3:
                    if word_textblob.detect_language() != 'ro':
                        new_reviewText = new_reviewText + ' ' + word_textblob.string
                    else:
                        new_word = word_textblob.translate(to='en')
                        new_reviewText = new_reviewText + ' ' + new_word.string
                else :
                    new_reviewText = new_reviewText + ' ' + word_textblob.string
                    # only_english_words = 0
                    # break
            review_textblob = TextBlob(new_reviewText)
            review_spacy = nlp(review_textblob.string)

    new_sentences = []
    verbs_positions = []
    for k in range(0, len(review_spacy)):
        if review_spacy[k].pos == VERB and review_spacy[k].dep_ == 'ROOT':
            verbs_positions.append(k)
    start = 0
    if len(verbs_positions) > 0:
        for p in range(0, len(verbs_positions)):
            if p == len(verbs_positions) - 1:
                new_sentences.append(review_spacy[start:len(review_spacy)].text)
            else:
                q = verbs_positions[p] + 1
                while q < len(review_spacy):
                    if review_spacy[q].is_stop and ((review_spacy[q].pos == CONJ and (q < len(review_spacy)-1 and review_spacy[q-1].pos != review_spacy[q+1].pos)) or (review_spacy[q].pos == DET and review_spacy[q].lower_ in ['the', 'this', 'those', 'which', 'other', 'another']) or (review_spacy[q].pos == PUNCT and review_spacy[q] in [',', ';'])):
                        new_sentences.append(review_spacy[start:q].text)
                        start = q
                        break
                    q += 1
    else:
        new_sentences.append(reviewText)
    return new_sentences
 def parse_text_meta_data(self,html,values):
     if self.debug: print "Processing textual information - language, polarity, subjectivity.."
     body_blob = TextBlob(values["text_body"])
     title_blob = TextBlob(values["title"])
     values["language"] = body_blob.detect_language() #requires the internet - makes use of google translate api
     values["polarity"] = body_blob.polarity
     values["subjectivity"] = body_blob.sentiment[1]
     if values["language"] != "en" and not translator:
         values["translated_body"] = body_blob.translate(from_lang="es")
         values["translated_title"] = title_blob.translate(from_lang="es")
     else:
         values["translated_body"] = "none"
         values["translated_title"] = "none"
     return values
Exemple #9
0
def translate_msg(message):
    try:
        if (len(message.text) > 3):
            b = TextBlob(unicode(message.text))
            if (b.detect_language() == "ru"):
                tr_text = unicode(b.translate(to="en"))
                bot.send_message(message.chat.id, tr_text)
            if (b.detect_language() == "en"):
                tr_text = unicode(b.translate(to="ru"))
                bot.send_message(message.chat.id, tr_text)
    except Exception as e:
        print (e.message)
        bot.send_message(message.chat.id, "Sorry Boss,can't translate :("
                                          " Try another message, please " +
                                          telegram.Emoji.KISSING_FACE)
Exemple #10
0
def main():
    string = input()
    points = pointscounter(string)
    letters = lettercounter(string)
    words = wordscounter(string)
    ASL = words / points
    ASW = letters / words
    FRE = 206.835 - (1.3 * ASL) - (60.1 * ASW)

    print('Предложений: ', points)
    print('Слов: ', words)
    print('Слогов: ', letters)
    print('Средняя длина предложения в словах: ', ASL)
    print('Средняя длина слова в слогах: ', ASW)
    print('Индекс удобочитаемости Флеша: ', FRE)
    if FRE > 80:
        print('Текст очень легко читается (для младших школьников).')
    elif 80 >= FRE > 50:
        print('	Простой текст (для школьников).')
    elif 50 >= FRE > 20:
        print('	Текст немного трудно читать (для студентов).')
    else:
        print('Текст трудно читается (для выпускников ВУЗов).')
    ru_blob = TextBlob(string)
    en_blob = ru_blob.translate(to='en')
    if en_blob.sentiment.polarity < -0.33:
        print("Тональность текста: негативный")
    elif en_blob.sentiment.polarity > 0.33:
        print('Тональность текста: позитивный')
    else:
        print('Тональность текста: нейтральный')
    obj = round(en_blob.sentiment.subjectivity * 1000)
    the = str(obj / 10) + '%'
    print('Объективность текста: ', the)
Exemple #11
0
def text_to_eng(text):
    d = ''
    c = ''
    b = TextBlob(text)
    try:
        c = b.translate(from_lang='ur', to='en')
    except:
        c = str(b)
    c = TextBlob(str(c))
    try:
        d = c.translate(from_lang='te', to='en')
    except:
        d = str(c)
    if isinstance(d, str):
        return TextBlob(d).correct()
    return d.correct()
def replaceByTranslation(text):
    newText = []
    sr = random.SystemRandom()
    for word in text:
        wordBlob = TextBlob(word)
        try:
            wordBlob = wordBlob.translate(
                to="en"
            )  ## Converting to random langauge for meaningful variation
            wordBlob = wordBlob.translate(to="el")
        except NotTranslated:
            pass

        newText.append(str(wordBlob).lower())

    return newText
def _translate_message(bot, broadcast_list, context):
    if context and "autotranslate" in context:
        _autotranslate = context["autotranslate"]
        origin_language = _get_room_language(bot, _autotranslate["conv_id"])
        for send in broadcast_list:
            target_conversation_id = send[0]
            response = send[1]
            target_language = _get_room_language(bot, target_conversation_id)
            if origin_language != target_language:
                logger.debug("translating {} to {}".format(origin_language, target_language))
                translated = _autotranslate["event_text"]
                try:
                    en_blob = TextBlob(_autotranslate["event_text"])
                    translated = "{0}".format(en_blob.translate(to=target_language))
                    #translated = gs.translate(_autotranslate["event_text"], target_language
                except Exception:
                    logger.debug("Translation Api returned string unchanged")
                else:
                    pass
                finally:
                    if _autotranslate["event_text"] != translated:
                    # mutate the original response by reference
                        response.extend([
                            hangups.ChatMessageSegment('\n', hangups.SegmentType.LINE_BREAK),
                            hangups.ChatMessageSegment('(' + translated + ')')])
Exemple #14
0
    def _german(self, text):
        blob = TextBlob(text)

        try:
            return str(blob.translate(to="en"))
        except:
            return text
Exemple #15
0
def Calificar(id):
    cur = mysql.connection.cursor()
    cur.execute('SELECT * FROM productos WHERE id = %s', (id))
    data = cur.fetchall()

    if request.method == 'GET':
        with sr.Microphone() as source:
            print("Di tu opinión...")
            audio = r.listen(source)
            try:
                text = r.recognize_google(audio, language="es-ES")
                textoanalizado = TextBlob(text)
                textotraducido = textoanalizado.translate(to="en")
                print("Tu opinión: {}".format(text))

                cur.execute('INSERT INTO opiniones (texto, subjetividad, polaridad, producto_id) VALUES (%s, %s, %s, %s)',
                            (text, textotraducido.subjectivity, textotraducido.polarity, id))
                mysql.connection.commit()
                flash("Producto calificado con éxito", category="exito")

                return redirect(url_for('getProducto', id=id))
            except:
                print("Tenemos un problema")
                flash("No se capturo la opinión", category="error")
                return redirect(url_for('getProducto', id=id))
Exemple #16
0
def translate_to_french_for_dunstan(sentence=None):
    """
    Given a sentence, translate each word in the sentence
    Example: sentence = 'I love you', returns {"I": "je", "love": "amour", "you": "vous"}
    use textblob package (https://textblob.readthedocs.io/en/dev/) and NLTK package
    for this task
    :param sentence: Sentence to translate
    :return:
    """
    # first tokenize the words: split the sentence
    # into words using the NLTK function word_tokenize()
    words = nltk.word_tokenize(sentence)

    # initiate a dictionary object to put in english and French words
    en_fr = {}

    # Now do the translation
    for w in words:
        en_blob = TextBlob(w)

        # use the function translate(from_lang="en", to='fr')
        # on the en_blob object defined above
        fr_blob = en_blob.translate(from_lang="en", to='fr')

        # use function raw on the blob above to get the word as a string
        fr_word = fr_blob.raw

        # put the translated word in the
        # dictionary object en_fr with english
        # as key and corresponding french translation as value
        en_fr[w] = fr_word

    # return the dictionary object
    return en_fr
 def analyze(self, tweets):
     positive_count = negative_count = neutral_count = 0
     total = 1
     for content in tweets:
         analysis = TextBlob(content)
         try:
             content = analysis.translate(to='en').raw
         except NotTranslated:
             pass
         client = language.LanguageServiceClient()
         document = types.Document(content=content,
                                   type=enums.Document.Type.PLAIN_TEXT)
         annotations = client.analyze_sentiment(document=document)
         score = annotations.document_sentiment.score
         if score < -0.1:
             negative_count += 1
         elif score > 0.1:
             positive_count += 1
         else:
             neutral_count += 1
     total = positive_count + negative_count + neutral_count
     negative = int(negative_count / total * 100)
     positive = int(positive_count / total * 100)
     neutral = int(neutral_count / total * 100)
     return {'positive': positive, 'neutral': neutral, 'negative': negative}
Exemple #18
0
def translit(text, language):
    trans_blob = TextBlob(text)
    try:
        trans = trans_blob.translate(from_lang=language, to='en')
    except:
        return ''
    return trans
Exemple #19
0
def getSentiment(line):
    global bagOfWords
    global listWords

    analysis = TextBlob(line, analyzer=NaiveBayesAnalyzer())
    sentiment = 0

    line = line.lower()
    line = re.sub(r'[^a-z\s]', '', line)

    words = line.split()
    words = [word for word in words if word not in stopWords]

    listWords.extend(words)

    for word in words:
        if word in positiveWords:
            sentiment += 0.1
        elif word in negativeWords:
            sentiment -= 0.15

        if word in bagOfWords:
            bagOfWords[word] += 1
        else:
            bagOfWords[word] = 1

    try:
        eng = analysis.translate(to='en')
        sentiment += eng.sentiment.polarity

        return sentiment
    except:
        return sentiment
Exemple #20
0
 def text_blob(doc):
     """
     TextBlob Sentiment (by segment)
     :param doc: zh doc
     :return: (-1~1)
     """
     try:
         translator_en = Translator()
         en_text = translator_en.translate(doc).text
         blob = TextBlob(en_text)
     except:
         try:
             blob = TextBlob(doc)
             blob = blob.translate()
         except:
             try:
                 en_text = iciba(doc, dst='en')
                 blob = TextBlob(en_text)
             except:
                 try:
                     gs = goslate.Goslate()
                     en_text = gs.translate(doc, 'en')
                     blob = TextBlob(en_text)
                 except:
                     translator = tT(to_lang="en")
                     en_text = translator.translate(doc)
                     blob = TextBlob(en_text)
     score = []
     for sentence in blob.sentences:
         emotion = sentence.sentiment.polarity * sentence.sentiment.subjectivity
         if 0.000001 > emotion > -0.000001:
             continue
         score.append(emotion)
     return np.mean(score)
Exemple #21
0
def analisis(text):
    blob = TextBlob(text)
    #lang = blob.detect_language()
    transl = ''
    polarity = 0
    #sentences = blob.sentences

    #if (lang != 'en'):
    transl = blob.translate(to='en')
    enBlob = transblob(str(transl))
    blob = enBlob
    sentences = enBlob.sentences

    for sentence in sentences:
        polarity += sentence.sentiment.polarity

    percent = round(polarity * 100)

    print(percent)

    result = jsonify({
        "polarity": percent,
        "positive": posneg(percent),
        "negative": neg(percent),
        "isHoax": is_hoax(percent),
        #"language":lang,
        "tags": blob.tags,
        "noun_phrases": blob.noun_phrases,
        "word_counts": blob.word_counts,
        "words": blob.words,
        "tokenize": blob.tokenize(),
        "sentiment_assessments": blob.sentiment_assessments,
        "translation": transl
    })
    return result
Exemple #22
0
def analize_sentiment(textopt):
    analysis = TextBlob(textopt)
    if analysis.detect_language() != 'en':
        analysis = TextBlob(str(analysis.translate(to='en')))
        return analysis.sentiment.polarity
    else:
        return analysis.sentiment.polarity
Exemple #23
0
 async def on_message(self, msg: discord.Message):
     """Auto-translate if enabled."""
     server = msg.server
     if server is None:
         return
     if not server.id:
         return
     if server.id not in self.settings:
         return
     if "AUTO_TRANSLATE" not in self.settings[server.id]:
         return
     if msg.channel is None:
         return
     if msg.channel.id != self.settings[server.id]["CHANNEL"]:
         return
     if msg.author == server.me:
         return
     if self.settings[server.id]["AUTO_TRANSLATE"]:
         try:
             blob = TextBlob(msg.content)
             out = blob.translate(to=self.settings[server.id]["LANGUAGE"])
             author = msg.author
             await self.bot.send_message(
                 msg.channel, "**{}: **{}".format(author.display_name, out))
         except textblob.exceptions.NotTranslated:
             return
Exemple #24
0
    def audio(self, texto):
        self.texto = texto

        # Se corrobora que idioma es, si el idioma es distinto a español se traduce, sino sigue
        traduction = TextBlob(self.texto)
        idioma = str(traduction.detect_language())
        if idioma != 'es':
            self.texto = str(traduction.translate(to='es'))

        # Se crea un archivo txt que tiene la variable texto
        with open('texto_a_traducir.txt', 'w') as f:
            f.write(self.texto)

        # Se lee el archivo creado
        with open('texto_a_traducir.txt') as f:
            lines = f.read()

            # se convierte ese archivo a audio
            output = gTTS(text=lines, lang='es', slow=False)

            # Se guarda el audio como mp3
            output.save('texto.mp3')

        # Se reproduce el audio
        os.system('texto.mp3 &')
Exemple #25
0
 def _translator(self, _text=None, _lang="en"):
     if _text == None:
         return None
     print("Translating...")
     txtBlob = TextBlob(_text)
     _text = str(txtBlob.correct())
     sleep(self.sleep_time)  #short Time
     try:
         _translated = self.__translator.translate(_text, dest=_lang)
         sleep(self.sleep_time)  #short Time
         _translated = self.__translator.translate(_text, dest="en")
         _text = _translated.text
     except:
         try:
             _text = self.__google_translator.translate(_text,
                                                        lang_tgt=_lang)
             sleep(self.sleep_time)  #short Time
             _text = self.__google_translator.translate(_text,
                                                        lang_tgt="en")
         except:
             try:
                 _gs = goslate.Goslate()
                 _text = _gs.translate(_text, 'en')
             except:
                 try:
                     _translator = trans(to_lang="en")
                     _text = _translator.translate(_text)
                 except:
                     try:
                         _text = str(txtBlob.translate(to='en'))
                     except:
                         _text = None
     _text = str(txtBlob.correct())
     print("Translating Completed.")
     return _text
Exemple #26
0
def translate(input_csv, text_csv, trans_csv):
    "Translate the reviews"
    text = pd.read_csv(text_csv)
    table = pd.read_csv(input_csv)
    trans = pd.DataFrame(columns=['trans_text'])

    na = index_na(text)
    new_text = []
    for i in table.index.tolist():
        if i in na:
            t = table.loc[i, 'text']
            try:
                from_lng = detect(t)
            except:
                from_lng = "error"
            blob = TextBlob(t)
            if from_lng != 'error':
                t_new = proc_text(
                    str(blob.translate(from_lang=from_lng, to="en")))
            else:
                t_new = t
            new_text.append(t_new)
        else:
            new_text.append(text.loc[i, 'clean'])
    trans['trans_text'] = pd.Series(new_text)
    trans['stars'] = text[['stars']]

    trans.to_csv(trans_csv,
                 sep=',',
                 encoding='utf-8',
                 header=True,
                 doublequote=True,
                 index=False)
Exemple #27
0
def hi(bot, trigger):
    lang_codes = ['af', 'ga', 'sq', 'it', 'ar',	'ja', 'az', 'kn', 'eu', 'ko', 'bn', 'la', 'en']
    trans = TextBlob('Greetings dear '+trigger.nick+'on the road of life ')
    ind = randint(0, 12)
    trans = trans.translate(to=lang_codes[ind])
    saying = str(trans)
    bot.say(saying)
Exemple #28
0
def get_polarity(text):
    analysis = TextBlob(text)
    if text != '':
        if analysis.detect_language() == 'es':
            result = analysis.translate(from_lang='es', to='en').sentiment
            time.sleep(2)
            return result
Exemple #29
0
    def _german(self, text):
        blob = TextBlob(text)

        try:
            return str(blob.translate(to="en"))
        except:
            return text
Exemple #30
0
def gen_translate(msg, fromlang=None, outputlang='en'):
    try:
        blob = TextBlob(msg)
        blob = blob.translate(from_lang=fromlang, to=outputlang)
        return str(blob)
    except NotTranslated:
        return msg
def process_status(status, lang):
    text = ""

    # translate
    if lang == 'en':
        text = status['text']
    else:
        blob = TextBlob(status['text'])
        try:
            text = str(blob.translate())
        except textblob.exceptions.NotTranslated:
            text = status['text']

    # sentiment analysis
    sentiment = TextBlob(text).sentiment

    return {
          "created_at": 1000 * int(time.mktime((status['created_at']).timetuple()))
        , "id_str": status['id_str']
        , "text": text
        , "sentiment": {"polarity": sentiment[0], "subjectivity": sentiment[1]}
        , "retweet_count": status['retweet_count']
        , "in_reply_to_status_id_str": status['in_reply_to_status_id_str']
        , "geo": status['geo']
        , "retweeted": status['retweeted']
        , "in_reply_to_user_id_str": status['in_reply_to_user_id_str']
            }
Exemple #32
0
    def on_command(self, msg, stdin, stdout, reply):
        # pre-process args
        # this might mess up if "from" or "to" is left out and
        # the message contains "from" or "to"
        self._push_character(msg["args"], "from", "-", 1)
        self._push_character(msg["args"], "to",   "-", 1)

        try:
            args = self.parser.parse_args(msg["args"][1:])
        except (argparse.ArgumentError, SystemExit):
            return

        # get message from the appropriate place
        if args.message:
            message = " ".join(args.message)
        else:
            message = stdin.read().strip()

        # translate
        from_lang = args.from_language
        to_lang   = args.to_language
        message   = TextBlob(message)
        try:
            translated = message.translate(from_lang=from_lang, to=to_lang)
        except:
            pass
        else:
            print(translated, file=stdout)
Exemple #33
0
def translateString(textStr):

    textStr = TextBlob(textStr)
    try:  #if english error handle
        return textStr.translate(from_lang=textStr.detect_language(), to='en')
    except:
        return textStr
Exemple #34
0
def review_features_romanian(reviewText, type):
    review_spacy = nlp(reviewText)
    review_textblob = TextBlob(reviewText)
    review_spacy_ents = review_spacy.ents
    word_features_array = []
    # print(review_textblob)
    if not review_textblob.detect_language() == 'en':
        review_textblob = review_textblob.translate(to='en')
        review_spacy = nlp(review_textblob.string)
    else:
        contains_romanian_words = 0

        for word in review_textblob.words:
            word_textblob = TextBlob(word)
            if len(word_textblob.string) >= 3 and word_textblob.detect_language() == 'ro':
                contains_romanian_words = 1
                break

        if contains_romanian_words == 1:
            new_reviewText = ''
            for word in review_spacy:
                word_textblob = TextBlob(word.orth_)
                if not word.is_title and len(word_textblob.string) >= 3:
                    if word_textblob.detect_language() != 'ro':
                        new_reviewText = new_reviewText + ' ' + word_textblob.string
                    else:
                        new_word = word_textblob.translate(to='en')
                        new_reviewText = new_reviewText + ' ' + new_word.string
                else :
                    new_reviewText = new_reviewText + ' ' + word_textblob.string
            review_textblob = TextBlob(new_reviewText)
            review_spacy = nlp(review_textblob.string)
            # print(review_spacy)w_spacy)
    for i in range(len(review_spacy)):
        word = review_spacy[i]
        # if not word.is_stop and not word.is_punct:
        if (word.pos == NOUN or (word.pos == VERB and TextBlob(word.orth_).sentiment.polarity > 0) or word.pos == ADJ or word.pos == ADV) and not word.is_punct:
        # if word.pos == NOUN:
            if type == labelType.Label.aspect:
                word_features_array.append(word_aspect_features(review_spacy, review_textblob, review_spacy_ents, i))
            elif type == labelType.Label.attribute:
                word_features_array.append(word_attribute_features(review_spacy, review_textblob, review_spacy_ents, i))
            elif type == labelType.Label.polarity:
                word_features_array.append(word_polarity_features(review_spacy, review_textblob, review_spacy_ents, i))
            elif type == labelType.Label.emotion:
                word_features_array.append(word_emotion_features(review_spacy, review_textblob, review_spacy_ents, i))
    return word_features_array
Exemple #35
0
class Review:
    """Review class does all the review processing"""
    def __init__(self, review_text, lang='en'):
        # super(Review, self).__init__()
        # self.arg = arg
        self.text = review_text
        # self.tokens = nltk.word_tokenize(self.text)
        # self.tags = nltk.pos_tag(self.tokens)
        self.tb = TB(self.text)
        self.language = tb.detect_language()
        self.subjectivity = tb.subjectivity
        self.polarity = tb.polarity

    def get_tokens(self):
        """returns the list of all tokens including punctuations"""
        return [w for w in self.tb.tokens]

    def get_tags(self):
        """return the list of pair of word and pos tag"""
        return [p for p in self.tb.tags]

    def translate(self, from_lang, to_lang=u'en'):
        """translate the current text to specified language"""
        self.tb = self.tb.translate(from_lang, to_lang)

    def get_sentences(self):
        """returns a list of list of pair of words,pos-tag"""
        return [list(TB(str(y)).tags) for y in self.tb.sentences]

    def get_features(self):
        """return a dictionary of noun along with a list of adjectives associated to it"""
        tags = self.get_tags()
        noun_features = {}
        l = len(tags)
        for i in range(l):
            if tags[i][1].startswith('JJ'):
                # find the closest noun
                left = i - 1
                right = i + 1
                while left >= 0 and not tags[left][1].startswith('NN'):
                    left -= 1
                while right < l and not tags[right][1].startswith('NN'):
                    right += 1
                if i - left <= right - i:
                    cl_n_i = left
                else:
                    cl_n_i = right
                # checking for HAC failure
                if cl_n_i < 0 or cl_n_i >= l:
                    closest_noun = "this_product"
                else:
                    closest_noun = tags[cl_n_i][0]

                # adding the noun features
                if not noun_features.has_key(closest_noun):
                    noun_features[closest_noun] = [tags[i][0]]
                else:
                    noun_features[closest_noun].append(tags[i][0])
        return noun_features
Exemple #36
0
def spam_filter(msg=input("Enter message = ")):
    msg = TextBlob(msg)
    current_lang = msg.detect_language()
    print("Language of this message is = ", current_lang)
    if (current_lang != 'en'):
        msg.translate(to='en')
    else:
        msg.correct()
    X_dtm = vect.fit_transform(X)
    test_dtm = vect.transform([str(msg)])
    model.fit(X_dtm, y)
    result = model.predict(test_dtm)
    prob = model.predict_proba(test_dtm)
    if result == [1]:
        print("SPAM ALERT!")
    else:
        print("HAM")
        predsa = clf.predict(vectsa.transform([str(msg)]))

        if predsa == [1]:
            print("Positive Feeling")

        elif predsa == [0]:
            print("Negative Feeling")
        else:
            print("Can't analyze ur Felling...Try API ? ....")
        senti = indicoio.sentiment_hq(str(msg))
        print("Online Help , Positivity of Incoming Message = ", senti)
    p = indicoio.personality(str(msg))
    d = []
    d.append([
        p['agreeableness'], p['conscientiousness'], p['extraversion'],
        p['openness'], msg.sentiment.polarity, msg.sentiment.subjectivity
    ])
    traits = pd.DataFrame(d,
                          columns=[
                              'agreeableness', 'conscientiousness',
                              'extraversion', 'openness', 'polarity',
                              'subjectivity'
                          ])
    print(profanity.contains_profanity(str(msg)), " Profanity")
    print(profanity.censor(str(msg)))
    print("Summarizing this message =", msg.noun_phrases)
    percent = pd.DataFrame(prob, columns=["% HAM", "%SPAM"])
    print(traits)
    print(percent)
Exemple #37
0
 def is_support(self):
     analysis = TextBlob(self.full_text)
     try:
         eng = analysis.translate(to='en')
         if eng.sentiment.polarity > 0:
             self.set_stance(SUP)
     except:
         pass
 def _translate_text(self, text):
     blob = TextBlob(text)
     translated = text
     try:
         translated = blob.translate(to=self.to_lang)
     except NotTranslated:
         logging.debug("There is a word the same as input " + text)
     return str(translated)
Exemple #39
0
def translate(file_name):
    with open('static/en-us/' + file_name) as f:
        lines = f.readlines()
        lines = [l for l in lines if check_contain_english(l) ]  # english filter
        en_blob = TextBlob("".join(lines).replace("\n",""))
        return_str = en_blob.translate(to='zh')
        return_str = str(return_str).replace("。","。\n")
        return return_str
def gen_translate(msg, fromlang, outputlang):
    try:
        blob = TextBlob(msg)
        # FIXME: language detection is broken.
        blob = blob.translate(from_lang=fromlang, to=outputlang)
        return str(blob)
    except NotTranslated:
        return msg
def translate(text):
    text = TextBlob(text)
    try:
        text = text.translate(from_lang="ru", to="en")
    except NotTranslated:
        pass

    return str(text)
Exemple #42
0
def translate_this(jenni, msg):
    t_msg = TextBlob(msg.groups()[0])
    from_lang = t_msg.detect_language()
    if from_lang != 'en':
        translated = t_msg.translate(from_lang=from_lang, to='en')
        jenni.reply("{}".format(translated))
    else:
        return
def from_eng_to_es():
    words = Bl(
        input("enter a phrase to translate from english into spanish:\n"))
    print('In English:\n {}'.format(words))
    print("in Spanish:")

    #using the translate method to translate to the language
    print(words.translate(to='es'))
        def wrapper(*args: Tuple[str]) -> Callable:

            targs = TextBlob(*args)
            if translate:
                if targs.detect_language() != 'en':
                    targs = targs.translate(to='en')

            return func(targs)
 def is_support(self):
     analysis = TextBlob(line)
     try:
         eng = analysis.translate(to='en')
         if eng.sentiment.polarity > 0:
             self.set_stance('support')
     except:
         print("El elemento no está presente")
Exemple #46
0
def translate(phrase, from_lang, to_lang='en'):
    blob = TextBlob(phrase)

    try:
        translation = blob.translate(from_lang=from_lang, to=to_lang)
        return translation.string
    except:
        return "Sorry, no translation!"
Exemple #47
0
 def translate(self, TranslateTo):
     self.TranslationError = False
     blob = TextBlob(self.DataSourcetext)
     try:
         self.DataSourceTranslatedText = blob.translate(to=TranslateTo)
         self.isTranslated = True
     except Exception as e:
         self.TranslationError = True
Exemple #48
0
def getTranslation(text, code):
    toTranslate = TextBlob(text)
    try:
        translatedText = toTranslate.translate(to=code)
    except:
        translatedText = toTranslate

    return translatedText
Exemple #49
0
    def keywordResultsCount(self, inputs):
        self.inputs = inputs
        #query = "+".join(inputs.split())

        query = inputs.split(' ')
        x=len(query)
        print x

        if (x>1):
         try:
            blob = TextBlob(query[1])
            if (query[0]=='German'):
                query_german = blob.translate(to="de")
                url = "http://www.getty.edu/Search/SearchServlet?qt="+str(query_german)
                self.results_url = "http://www.getty.edu/Search/SearchServlet?qt="+str(query_german)
                self.result_search_term = str(query_german)

            elif (query[0]=='French') :
                query_french = blob.translate(to="fr")
                url = "http://www.getty.edu/Search/SearchServlet?qt="+str(query_french)
                self.results_url = "http://www.getty.edu/Search/SearchServlet?qt="+str(query_french)
                self.result_search_term = str(query_french)
         except:
             url = "http://www.getty.edu/Search/SearchServlet?qt="+str(query[0])
             pass
        else:
          url = "http://www.getty.edu/Search/SearchServlet?qt="+str(query[0])
          self.results_url = "http://www.getty.edu/Search/SearchServlet?qt="+query[0]
          self.result_search_term = str(query[0])


        #url = "http://www.getty.edu/Search/SearchServlet?qt="+query
        html = requests.get(url).text
        soup = BeautifulSoup(html, "lxml")

        table = soup.find_all("table")[2]
        num = table.find("td").contents[0].strip().split()
        count = num[1]

       # self.results_url = url
        if (count.isdigit()):
             self.results_count = count
        else:
             self.results_count = 0
        return self
Exemple #50
0
    def onButtonPressed(self, button):
        textbuffer = tview_translate.get_buffer()
        start = textbuffer.get_start_iter()
        end = textbuffer.get_end_iter()
        textbuffer.delete(start, end)

        textbuffer = tview_text.get_buffer()
        start = textbuffer.get_start_iter()
        end = textbuffer.get_end_iter()

        text = u"{0}".format(textbuffer.get_text(start, end, False))
        tree_iter = comboboxtext_to.get_active_iter()
        language_to = None
        if tree_iter is not None:
            model = comboboxtext_to.get_model()
            key, language_to = model[tree_iter][:2]

        tree_iter = comboboxtext_from.get_active_iter()
        language_from = None
        if tree_iter is not None:
            model = comboboxtext_from.get_model()
            key, language_from = model[tree_iter][:2]

        if language_to is not None:
            value = r.hget(text + ":" + language_to, language_from)
            if value:
                textbuffer = tview_translate.get_buffer()
                textbuffer.set_text(value)
                return

            blob = TextBlob(text)
            if language_from == 'detect':
                language_from = blob.detect_language()

            if language_from is None:
                translate = u"{0}".format(blob.translate(to=language_to))
            else:
                translate = u"{0}".format(blob.translate(
                    from_lang=language_from, to=language_to))

            textbuffer = tview_translate.get_buffer()
            textbuffer.set_text(translate)
            if value is None:
                mapping = {language_from: translate}
                r.hmset(text + ":" + language_to, mapping)
def translate_en(bot, update):
    text = text_replace(update.message.text)
 
    chat_id = update.message.chat_id

    en_blob = TextBlob(text)
    en_text = en_blob.translate(to='en')

    return bot.sendMessage(chat_id, text=u'Tradução: %s' % str(en_text))
Exemple #52
0
def translate(string, lang):
	return ""
	tb = TextBlob(string)
	if lang != "en":
		try:
			tb = tb.translate(to="en")
		except:
			pass
	return str(tb)
    def post(self, request, *args, **kwargs):
        selected_text = request.POST.get('selected_text')
        translated_selected_text = TextBlob(selected_text)

        translated_word_or_sentence=translated_selected_text.translate(to='kn')
        return JsonResponse({
            'status': "success",
            'translated_word_or_sentence':str(translated_word_or_sentence),
        })
def translate_pt(bot, update):
    text = text_replace(update.message.text)
 
    chat_id = update.message.chat_id

    en_blob = TextBlob(text)
    pt_text = en_blob.translate(to='pt-BR')

    return bot.sendMessage(chat_id, text=u'Tradução: %s' % unicode(pt_text))
Exemple #55
0
def get_translations(keywords, languages):
    result = []
    if len(languages) > 0:
        blob = TextBlob(keywords)
        for lang in languages:
            try:
                result.append(str(blob.translate(to=lang)))
            except NotTranslated:
                pass
    return result
Exemple #56
0
def translate(places):
	result = {}
	for place in places:
		word = TextBlob(place)
		try:
			tword = word.translate(to=TRANSLATE_TO).words[0]
			result[place] = unicode(tword).encode('utf-8')
		except textblob.exceptions.NotTranslated:
			result[place] = place
	return result
def translated(list_topics, start_idx, end_idx, dir_topic, dir_out, paramter):
#Translate

    fileBudget = open("data/budget/budget","r")
    budget_string = fileBudget.read().split("\n")
    budget = []
    for i in range(len(budget_string)-1):
        budget.append(int(budget_string[i]))

    number_sentence = 0

    for i in range(start_idx, end_idx,1):
        print("process " + list_topics[i])
        file = open(dir_topic + list_topics[i], mode="rb")
        parameter[-1] = budget[i]
        sentences = file.read().decode("utf8","replace").split('\r\n')
        phrase_sentences = []
        for line in sentences:
            if len(line) != 0:
                number_sentence += 1
                phrase= []
                try:
                    blob = TextBlob(line)
                    translated_blob = blob.translate(to='vi')
                    out_sen = " ".join(translated_blob.tokens)
                    start = 0
                    out_sen_tmp = out_sen.lower().split()
                    end = len(out_sen_tmp)
                    for nphrase in translated_blob.noun_phrases:
                        phrase_ele = []
                        nphrase = nphrase.split()
                        k = find_index(out_sen_tmp,nphrase,start, end)
                        start = k + len(nphrase)
                        for j in range(k,k + len(nphrase),1):
                            phrase_ele.append(j+1)
                        phrase.append(phrase_ele)

                except:
                    out_sen = line
                    phrase = []
            if (out_sen != "" ):
                sen = sentence.sentence(out_sen,phrase)
                phrase_sentences.append(sen)


        summarizer = PhraseBaseSummarization.phrase_based_summarization(phrase_sentences)
        summary =summarizer.summarizer(parameter)

        fileOut = open(dir_out+list_topics[i],"w")
        fileOut.write(summary)

        print("finish " + list_topics[i])
        fileOut.close()

    print "no.sentence: ", number_sentence
Exemple #58
0
    def keywordResultsCount(self, inputs):
        self.inputs = inputs
        #query = "+".join(inputs.split())

        query = inputs.split(' ')
        x=len(query)
        print x

        if (x>1):
          try:
            blob = TextBlob(query[1])
            if (query[0]=='German'):
                query_german = blob.translate(to="de")
                url = "https://catalog.archives.gov/api/v1/?q="+str(query_german)
                self.results_url = "https://catalog.archives.gov/search?q="+str(query_german)
                self.result_search_term = str(query_german)

            elif (query[0]=='French') :
                query_french = blob.translate(to="fr")
                url = "https://catalog.archives.gov/api/v1/?q="+str(query_french)
                self.results_url = "https://catalog.archives.gov/search?q="+str(query_french)
                self.result_search_term = str(query_french)
          except:
              url = "https://catalog.archives.gov/api/v1/?q="+str(query[0])
              self.results_url = "https://catalog.archives.gov/search?q="+query[0]
              pass
        else:
          url = "https://catalog.archives.gov/api/v1/?q="+str(query[0])
          self.results_url = "https://catalog.archives.gov/search?q="+query[0]
          self.result_search_term = query[0]

        res = requests.get(url)
        parsed = res.json()
        num = parsed["opaResponse"]["results"]["total"]


        #self.results_url = "https://catalog.archives.gov/search?q="+query
        if num!= None:
            self.results_count = num
        else:
            self.results_count = 0
        return self
Exemple #59
0
def textblob_sentiment():
	text = '''
	Apple Store looks for a boost with new leader Angela Ahrendts.
	What 1 Expert Is Watching At Tesla.
	How the iPad mini became Apples middle child.
	My dream investor. 
	Timexs $36 Watch Answer to Apple Watch.
	Facebook, Skyworks Among IBD 50's Top 5 Tech Earnings.
	Apple told its sapphire supplier Put on your big boy pants.
	World losing battle against global warming.
	Apple Accused of Bait and Switch.
	GoPro mum on new cameras; SNL suggests a colonoscopy cam.
	Apple Loop: Eleven iPhone Tips, Apple Watch SDK, Xperia Defeats iPhone.
	Taiwan Semiconductor In Buy Range From Breakout.
	ATM Czar Euronet Adds U.S. To A Global Growth Engine.
	Final Glance: Computer companies.
	Final Glance: Computer companies.
	What Apple has in common with F-35 maker Lockheed Martin.
	GT Advanced says fell victim to 'bait-and-switch' by Apple.
	GT Advanced says fell victim to 'bait-and-switch' by Apple.
	The top 15 smartphones you can buy right now.
	Documents show acrimony over failed Apple deal.
	Apple Inc.'s Latest Supplier Negotiating Tactics With GT Advanced Revealed.
	Is Apple, Inc. Stock Still Cheap at All-Time Highs?
	Should Apple, Inc. Investors Fear a 2016 iPhone Sales Meltdown?
	Profit
	'''

	blob = TextBlob(text)
	blob.tags           # [(u'The', u'DT'), (u'titular', u'JJ'),
			    #  (u'threat', u'NN'), (u'of', u'IN'), ...]

	blob.noun_phrases   # WordList(['titular threat', 'blob',
			    #            'ultimate movie monster',
			    #            'amoeba-like mass', ...])

	for sentence in blob.sentences:
	    print(sentence.sentiment.polarity)
	# 0.060
	# -0.341

	blob.translate(to="es")  # 'La amenaza titular de The Blob...'