Beispiel #1
0
def file_translator(file, language):
    with open(file, 'r') as fh:
        for f in fh:
            sentense = TextBlob(f)
            if language == "english":
                print sentense.translate(to="en")
            else:
                print sentense.translate(to="es")
    return
Beispiel #2
0
def word_translator(words):
    b = TextBlob(words)
    if b.detect_language() == "en":
        print "The word " + words + " is in english and means",\
                b.translate(to="es")
    elif b.detect_language() == "es":
        print "La palabra " + words +\
                " esta en espanol y en ingles significa", b.translate(to="en")
    return
Beispiel #3
0
def file_translator(file, language):
    with open(file, 'r') as fh:
        for f in fh:
            sentense = TextBlob(f)
            if language == "english":
                print sentense.translate(to="en")
            else:
                print sentense.translate(to="es")
    return
Beispiel #4
0
def word_translator(words):
    b = TextBlob(words)
    if b.detect_language() == "en":
        print "The word " + words + " is in english and means",\
                b.translate(to="es") 
    elif b.detect_language() == "es":
        print "La palabra " + words +\
                " esta en espanol y en ingles significa", b.translate(to="en")
    return
Beispiel #5
0
 def get_translate(self, text):
     text = text.replace("text:", "")
     blob = TextBlob(text)
     lan = blob.detect_language()
     if lan != 'en':
         sentp = blob.translate(to="en")
     else:
         sentp = blob.translate(to="fa")
     sent = self.sender.sendMessage(str(sentp))
     self._editor = telepot.helper.Editor(self.bot, sent)
     self._edit_msg_ident = telepot.message_identifier(sent)
Beispiel #6
0
def flat_doc(document, model, extremes=None):
	flat_doc = ""
	for field in document:
		if not isinstance(document[field], list): continue #No tomamos en cuenta los campos 'id' y '_version_': auto-generados por Solr
		for value in document[field]:
			## Detección y traducción ##
			if field=='author.authors.authorName' or field=='author.authorBio' or field=='description' or field=='quotes.quoteText':
				value_blob = TextBlob(value)
				try:
					if value_blob.detect_language() != 'en':
						try: 
							value = value_blob.translate(to='en')
						except Exception as e: 
							value = value #e = NotTranslated('Translation API returned the input string unchanged.',)
				except Exception as e:
					value = value #e = TranslatorError('Must provide a string with at least 3 characters.')
			############################
			flat_doc += str(value)+' ' #Se aplana el documento en un solo string
	flat_doc = preprocess_string(flat_doc, CUSTOM_FILTERS) #Preprocesa el string
	flat_doc = [w for w in flat_doc if w not in stop_words] #Remueve stop words
	if extremes:
		flat_doc = [w for w in flat_doc if w not in extremes]
	flat_doc = [w for w in flat_doc if w in model.vocab] #Deja sólo palabras del vocabulario
	if flat_doc == []:
		flat_doc = ['book'] #Si el libro queda vacío, agregarle un token para no tener problemas más adelante
	return flat_doc
Beispiel #7
0
def handle_message_event(event):
    print(event)
    text = event.message.text
    source = event.source
    id = ''
    if isinstance(source, SourceUser):
        id = source.user_id
    elif isinstance(source, SourceGroup):
        id = source.group_id
    set_send_id(id)
    blob = TextBlob(text)
    if '狀態' in text:
        text = text.replace('狀態', '')
        if text == '':
            line_bot_api.reply_message(
                event.reply_token, TextSendMessage(text=get_all_messages()))
        else:
            name = text
            line_bot_api.reply_message(event.reply_token,
                                       TextSendMessage(text=get_message(name)))
    elif '報告' in text:
        matches = re.search('(.*)報告(\d*)', text)
        if matches.group(1) == '' and matches.group(2) == '':
            line_bot_api.reply_message(event.reply_token,
                                       TextSendMessage(text=get_all_reports()))
        elif matches.group(2) == '':
            line_bot_api.reply_message(
                event.reply_token,
                TextSendMessage(text=get_report(matches.group(1))))
        else:
            line_bot_api.reply_message(
                event.reply_token,
                TextSendMessage(text=get_report_url(matches.group(1),
                                                    int(matches.group(2)))))
    elif '敬禮' in text:
        line_bot_api.reply_message(event.reply_token,
                                   TextSendMessage(text='敬禮'))
    elif '安安' in text:
        line_bot_api.reply_message(event.reply_token,
                                   TextSendMessage(text='安'))
    elif '0.0' in text:
        line_bot_api.reply_message(event.reply_token,
                                   TextSendMessage(text='0.0'))
    elif blob.detect_language() == 'ru':
        line_bot_api.reply_message(
            event.reply_token,
            TextSendMessage(text=str(blob.translate(to='zh-TW'))))
 def __get_blob(self, text):
     """
         Translate text with current user locale
         @param text as str
     """
     try:
         locales = GLib.get_language_names()
         user_code = locales[0].split(".")[0]
         try:
             from textblob.blob import TextBlob
         except:
             return _("You need to install python3-textblob module")
         blob = TextBlob(text)
         return str(blob.translate(to=user_code))
     except Exception as e:
         Logger.error("LyricsView::__get_blob(): %s", e)
         return _("Can't translate this lyrics")
Beispiel #9
0
def main():
    # Get our data as an array: [title, author, date, content] from read_in()
    lines = read_in()
    title = lines[0]
    author = lines[1]
    date = lines[2]
    chinese_blob = TextBlob(lines[3])
    en_content = chinese_blob.translate(from_lang="zh-CN", to='en')
    info("Translated texts: " + str(en_content)) # print translated result to web console.

    # Combine translated result with ada-content-en.csv to produce new csv.
    # Make a call to localhost:5000/update with data: [(id),title,author,date,content], "id" field will be automatically generated by reviewing csv file.
    # please note that in dev environment, 8080 is node app port, while 5000 is python flask app port.
    r = requests.get("http://localhost:5000/update", headers={'X-API-TOKEN': 'FOOBAR1'}, data={'title': title, 'author': author, 'date': date, 'content': en_content})
    info("INFO: " + r.text)

    # training updated backup.csv.
    r_train = requests.get("http://localhost:5000/train", headers={'X-API-TOKEN': 'FOOBAR1'}, data={'data-url': 'backup.csv'})
    info("INFO: " + r_train.text)

    # predicting updated backup.csv.
    r_predict = requests.post("http://localhost:5000/predict", headers={'X-API-TOKEN': 'FOOBAR1'}, data={'item': '-1', 'num': 2, 'data-url': 'backup.csv'})
    info("INFO: " + r_predict.text)
Beispiel #10
0
from textblob.blob import TextBlob
blob = TextBlob('Уровень')
print(str(blob.translate(to='zh-TW')))
	    	combo = url2content(row['url'])
	    	writer.writerow({'id': row['id'], 'title': combo['title'], 'author': combo['author'], 'date': combo['date'], 'url': row['url'], 'content': combo['combined_string']})
	    	print 'Processing scraper NO.' + str(row['id'])


### Connect with ada-content.csv to translate content to english version.	
with open('ada-content-en.csv', 'w') as target:
    fieldnames = ['id', 'title', 'author', 'date', 'url', 'content']
    writer = csv.DictWriter(target, fieldnames=fieldnames)
    writer.writeheader()

    with open('ada-content.csv') as source:
	    reader = csv.DictReader(source.read().splitlines())
	    for row in reader:
	    	chinese_blob = TextBlob(row['content'].decode('utf-8'))
	    	en_content = chinese_blob.translate(from_lang="zh-CN", to='en')
	    	writer.writerow({'id': row['id'], 'title': row['title'], 'author': row['author'], 'date': row['date'], 'url': row['url'], 'content': en_content})
	    	print 'Processing translator NO. ' + str(row['id'])












        for row in reader:
            combo = url2content(row['url'])
            writer.writerow({
                'id': row['id'],
                'title': combo['title'],
                'author': combo['author'],
                'date': combo['date'],
                'content': combo['combined_string']
            })
            print 'Processing scraper NO.' + str(row['id'])

### Connect with ada-content.csv to translate content to english version.
with open('ada-content-en.csv', 'w') as target:
    fieldnames = ['id', 'title', 'author', 'date', 'content']
    writer = csv.DictWriter(target, fieldnames=fieldnames)
    writer.writeheader()

    with open('ada-content.csv') as source:
        reader = csv.DictReader(source.read().splitlines())
        for row in reader:
            chinese_blob = TextBlob(row['content'].decode('utf-8'))
            en_content = chinese_blob.translate(from_lang="zh-CN", to='en')
            writer.writerow({
                'id': row['id'],
                'title': row['title'],
                'author': row['author'],
                'date': row['date'],
                'content': en_content
            })
            print 'Processing translator NO. ' + str(row['id'])