def speaker_output_source(): translator_input = user_input_textbox.get("1.0", "end") translator = google_translator() source = translator.detect(translator_input) source_lang.set(source[1]) dst = dest_lang.current() destination = gtts[dst] translation = translator.translate(translator_input, lang_src=source[0], lang_tgt=destination) myobj = gTTS(text=translation, lang=destination, slow=False) myobj.save(r"C:\AppData\Translation.mp3") os.system(r"C:\AppData\Translation.mp3")
async def tr_engine(event, text, parse_mode='md'): kk = Config.LANG if LANGUAGES.get(Config.LANG) else 'en' if kk == 'en': await event.edit(text, parse_mode=parse_mode) else: try: translator = google_translator() translated = translator.translate(text, lang_tgt=kk) hmm = translated except: hmm = text await event.edit(hmm, parse_mode=parse_mode) return
def get_weather(self): translator = google_translator() home = 'Campinas, São Paulo' owm = pyowm.OWM(OPENWEATHER) observation = owm.weather_at_place(home) detail = observation.get_weather() temp = detail.get_temperature('celsius') temp = int(temp['temp']) status = detail.get_detailed_status() status = translator.translate(status, lang_tgt="pt") return [temp, status]
def __init__(self, dir_path=None, develop=True): """ Initializes a GTITAN instance with the desired directory. :param dir_path: path where to create a directory. If left to None, uses default package directory; """ self.develop = develop if dir_path is None: #This queries 1 path up self.dir_path = os.path.dirname(os.path.abspath(__file__)) if not os.path.exists(os.path.join(self.dir_path, "checkpath")): os.makedirs(os.path.join(self.dir_path, "checkpath")) else: #TODO: make all folders if new folder indicated: copy default files there such as the config. self.dir_path = dir_path if not os.path.exists(dir_path): default_path = os.path.dirname(os.path.abspath(__file__)) # creating directory structure os.makedirs(os.path.join(self.dir_path, "config")) os.makedirs(os.path.join(self.dir_path, "data", "Translations")) os.makedirs( os.path.join(self.dir_path, "data", "Trend_indices")) # copying defaults shutil.copyfile( os.path.join(default_path, "config", "config_py.json"), os.path.join(self.dir_path, "config", "config_py.json")) else: print("Directory already exists, loading data from it.") print(f"Using directory2 '{self.dir_path}'", __file__) with open(os.path.join(self.dir_path, "config", "config_py.json"), 'r') as fp: self.CONFIG = json.load(fp) #set configuring settings: self.CONFIG['CONN']['timeout'] = tuple(self.CONFIG['CONN']['timeout']) self.t_sleep = 0.2 #also get it from self.google_timeout = 14 * 60 * 60 self.t_block = 0 #Call instances of used packages self.translator = google_translator() #self.pytrend = TrendReq(hl='en-US', **self.CONFIG['CONN']) self.pytrend = TrendReq() self.gtab = gtab.GTAB() # sets default anchorbank default_project = "frontex" self.set_active_project(default_project)
def request(text): lang = "fr" t = google_translator(timeout=5) # TRANSLATES TO RANDOM LANGUAGES FOUR TIMES num_translations = 4 for i in range(num_translations): lang = random.choice(list(LATIN_LANG.keys())) text = t.translate(text, lang) text = t.translate(text, "en") translateFINAL = t.translate(text, "en") translateFINAL = re.sub(u"(\u201c|\u201d)", "\"", translateFINAL) translateFINAL = re.sub(u"(\u2018|\u2019)", "'", translateFINAL) return translateFINAL
def do_translate_zh_en(text): """ translates a string from Chinese to English :param text: str :return: str """ t = google_translator(timeout=5) if isinstance(text, int): return text # don't translate the IDs if isinstance(text, str): text = text.strip() # Strip trailing crap return t.translate( text, lang_tgt='en', lang_src='zh').strip() if text and text != 'null' else ''
class Translator: translator = google_translator() tx_cache = {} # avoid translating twice @classmethod def translate(cls, text, lang_tgt, lang_src="en"): # if langs are the same do nothing if not lang_tgt.startswith(lang_src): if lang_tgt not in cls.tx_cache: cls.tx_cache[lang_tgt] = {} # if translated before, dont translate again if text in cls.tx_cache[lang_tgt]: # get previous translated value translated_text = cls.tx_cache[lang_tgt][text] else: # translate this utterance translated_text = cls.translator.translate(text, lang_tgt=lang_tgt, lang_src=lang_src) if isinstance(translated_text, list): # usually male/female forms of the word return translated_text[0] # save the translation if we need it again cls.tx_cache[lang_tgt][text] = translated_text else: translated_text = text.strip() return translated_text @classmethod def translate_dict(cls, data, lang_tgt, lang_src="en"): for k, v in data.items(): if isinstance(v, dict): data[k] = cls.translate_dict(v, lang_tgt, lang_src) elif isinstance(v, str): data[k] = cls.translate(v, lang_tgt, lang_src) elif isinstance(v, list): data[k] = cls.translate_list(v, lang_tgt, lang_src) return data @classmethod def translate_list(cls, data, lang_tgt, lang_src="en"): for idx, v in enumerate(data): if isinstance(v, dict): data[idx] = cls.translate_dict(v, lang_tgt, lang_src) elif isinstance(v, str): data[idx] = cls.translate(v, lang_tgt, lang_src) elif isinstance(v, list): data[idx] = cls.translate_list(v, lang_tgt, lang_src) return data
def translate_data(language,source): #如果遇到错误就最多重试6次 success_num = 0 while success_num < 6: try: translator = google_translator(url_suffix="com",timeout=25,proxies={'http':'159.75.5.165:10808','https':'159.75.5.165:10808',}) translate_text = translator.translate(source,lang_tgt=language) return translate_text break except Exception as e: print(e,"正在重试:",data) success_num = success_num + 1 continue
def TranslateNow(file_translate): translator = google_translator() file = open("output.txt", "r") with open(file_translate, "a", encoding='utf-8') as tr_to_en: print("Çeviri işlemi başladı.") for line in file: translate_result = translator.translate(line, lang_src="tr", lang_tgt="en") tr_to_en.write(translate_result) file.close()
def translate_file_util(text, target_lang): while True: try: translator = google_translator() translated_text = translator.translate(text, lang_src='en', lang_tgt=target_lang) except Exception as e: print(str(e)) time.sleep(10) continue break return (translated_text)
async def google_tl(rss_str_tl: str): try: text = '' translator = google_translator() text = emoji.demojize(rss_str_tl) text = re.sub(r':[A-Za-z_]*:', ' ', text) text = '\n谷歌翻译:\n' + str( translator.translate(re.escape(text), lang_tgt='zh')) text = re.sub(r'\\', '', text) return text except Exception as e: print("谷歌翻译失败") text = '\n翻译失败!' + str(e) + '\n' return text
def detect_and_translate(text): translator = google_translator() r = Rake() original_lang = translator.detect(text) print("\nSource Language was : ", original_lang[1]) if (original_lang[0] != 'en'): print(text) translate_text = translator.translate(text, lang_tgt='en') print("\nEnglish Translation \n") r.extract_keywords_from_sentences(translate_text.split('\n')) print(translate_text) ranked = r.get_ranked_phrases_with_scores() print("\nPhrases with Scores") print(ranked)
def __init__(self, db, languages, weight_field , spacy_model, remove_words = "./data/words_to_remove.txt", replace_words= "./data/words_to_replace.txt", desc_field = "description", max_words = 5, ): """Class defined to process wuwana description tags. It attacks db and uses 3 NLP Libraries: - Spacy as tokenizer. - Wordcloud as tag modeller. - Gensim as tag modeller. Parameters ----------- languages: list with languages in format: ["es","fr","zh-cn"] remove_words: path to file of words to be removed. replace_words: path to file of words to be replaced. spacy_mode: pretrained Spacy model. max_words: max words to be extracted from description texts. desc_field: field where text is stored in company table. weight_field: Field in company table where weights will be stored. """ #file with words to be removed from tags self.file_words = open(remove_words, "r", encoding="utf-8" ) self.remove_words = self.file_words.read().split(";") #bag of words that should be replaced, such as abbreviations with open(replace_words, "r", encoding="utf-8" ) as f_in: self.replace_words = json.load(f_in) self.translator = google_translator() self.db = db self.cursor_tag = self.db.cursor() self.max_words = max_words self.desc_field = desc_field self.languages = languages self.weight_field = weight_field # English pretrained Spacy model try: self.nlp = spacy.load("en_core_web_lg") except: sys.exit("ERROR: You must download en_core_web_lg spacy model. Use 'python -m spacy download en_core_web_lg' ")
def main(): """ A simple NLP app """ translator = google_translator() with open('sentiment analysis app/pickle files/log_reg.pkl', 'rb') as f: model = pickle.load(f) with open('sentiment analysis app/pickle files/tfidf_vectorizer.pkl', 'rb') as f: tfidf_vectorizer = pickle.load(f) st.title('Olist User Review') menu = ['Home', 'About'] choice = st.sidebar.selectbox('Menu', menu) try: if choice == 'Home': st.subheader('Sentiment Analysis') review = st.text_area('Review Text', 'Enter your Review in Portuguese') trans_text = translator.translate(text=review, lang_tgt='en') st.write('Would you like to translate') if st.button('Translate'): st.text_area('Translated Review Text', trans_text) if st.button('Get Sentiment'): if translator.detect(review)[0] != 'pt': st.warning( 'Review Text has to be in Portuguese language **:see_no_evil:**' ) else: prediction = int( model.predict(tfidf_vectorizer.transform([review]))) if prediction == 1: st.success('**Review text is Positive :joy: :yum:**') st.balloons() elif prediction == 0: st.error('**Review text is Negative :cry: :worried:**') if choice == 'About': st.subheader('Learn More About Sentiment Analysis') st.write( '## Model was built using Logistic Regression :sunglasses:') st.write('Model was train in **Portuguese language**') st.write('''Reviews should be in that language (Portuguese) \n Option to translate to English is Available''') # st.write('If you want other language Translation') except: st.write('**:eyes:** Check your Internet Connectivity!! **:eyes:**')
def get_converters(): from google_trans_new import google_translator from indictrans import Transliterator converters = { 'g_translator': google_translator(url_suffix="com.pk"), 'hi2ur': Transliterator(source='hin', target='urd', rb=False), #, build_lookup=True), 'ur2hi': Transliterator(source='urd', target='hin', rb=False), #, build_lookup=True), 'hi2en': Transliterator(source='hin', target='eng', rb=False), #, build_lookup=True), 'ur2en': Transliterator(source='urd', target='eng', rb=False), #, build_lookup=True), } return converters
def translate(self): try: self.pushButton.setCursor(QtCore.Qt.BusyCursor) text_1 = self.textEdit.toPlainText() if self.is_checked: lang_2 = self.comboBox_2.currentText() langs = list(google_trans_new.LANGUAGES.values()) codes = list(google_trans_new.LANGUAGES.keys()) dst_lng = codes[langs.index(lang_2.lower())] translator = google_translator() src_lang = translator.detect(text_1) translate = translator.translate(text_1, lang_tgt=dst_lng) self.label_4.setText( f"Auto detected: {src_lang[1].capitalize()}") self.label_4.adjustSize() else: lang_1 = self.comboBox.currentText() lang_2 = self.comboBox_2.currentText() langs = list(google_trans_new.LANGUAGES.values()) codes = list(google_trans_new.LANGUAGES.keys()) src_lang = codes[langs.index(lang_1.lower())] dst_lng = codes[langs.index(lang_2.lower())] translator = google_translator() translate = translator.translate(text_1, lang_src=src_lang, lang_tgt=dst_lng) self.label_4.setText("") self.textEdit_2.setText(translate) self.pushButton.setCursor(QtCore.Qt.PointingHandCursor) except Exception as e: self.error_message(e)
def google_tr_eng_ru(sheet_wb, new_tb, ltr, num, color="FF0000"): # TODO гугл переводчик с англ на рус translator = google_translator() global counter counter += 1 trl = translator.translate('{}'.format(sheet_wb['{}{}'.format(ltr, num)].value), lang_tgt='ru') s = sheet_wb['{}{}'.format(ltr, num)].font.size n = sheet_wb['{}{}'.format(ltr, num)].font.name new_tb['A{}'.format(counter)] = sheet_wb['{}{}'.format(ltr, num)].value sheet_wb['{}{}'.format(ltr, num)] = new_tb['B{}'.format(counter)] = trl sheet_wb['{}{}'.format(ltr, num)].font = Font(size=s, name=n, color=color)
def google_trans(self, l_to = ini["to"]): """ Func: Using google translate to translate the content. Args: content: the content you wanna translate -- it can be a string, and it can also be a list. l_to: the language you wanna translate to(default to be zh-CH) """ # trans = Translator(service_urls=["translate.google.cn"]) trans = google_translator() result = trans.translate(self.content, l_to) print("\n") print(" google translate ".center(40, "*")) print("\n\t", proc_str(result), "\n\n") self.finished[0] = 1
def trans(text, lang='zh-CN', detect=1): text = text_clean(text) tr = google_translator() if lang == 'en': result = get_trans(text, lang_tgt='en') elif lang == 'zh': result = get_trans(text, lang_tgt='zh-CN') else: if get_lang(text)[0] == 'zh-CN': result = get_trans(text, lang_tgt='zh-CN') + '\n' \ + get_trans(text, lang_tgt='en') else: result = get_trans(text, lang_tgt='zh-CN') + '\n' \ + text return result
def Detect_image_text_and_translate(): img = cv2.imread(r'.\img.png') text = pytesseract.image_to_string(img) print('翻譯前:\n' + str(text) + '\n') translator = google_translator() #,lang_src='en' translate_text = translator.translate(text, lang_tgt='zh-TW') #翻譯繁體 print('翻譯後:\n' + str(translate_text) + '\n') pyautogui.alert(translate_text, '翻譯內容') with open(r".\Detect_Text.txt", "w", encoding="utf-8") as f: f.write('翻譯前:\n' + str(text) + '\n\n') #翻譯前寫入文字檔 f.write('翻譯後:\n' + str(translate_text) + '\n\n') #翻譯後寫入文字檔 print('已寫入文字檔!')
def translation(self): try: self.get_permission() try: translator = google_translator() translate_text = translator.translate( self.nav_layout.screen_manager.screen1.isi.text, lang_src='id', lang_tgt='en') self.nav_layout.screen_manager.screen1.isi.text = translate_text except: pass except: pass
def data_augmentation(paragraphs): print(paragraphs[0]) #translator = Translator(service_urls=['https://translation.googleapis.com']) translator = google_translator() back_translations = [] def translation(paragraph, src, dest): """ recursive translation function to avoid hitting the API's 5000 character limit""" """ line by line translation to keep the structure of the document """ lines = paragraph.split('\n') trans_result = '' for line in lines: if len(line) >= 4000 : # the limit for traduction with google's api is 5000 characters par_1 = line[0:len(line)//2] par_2 = line[ len(line) // 2 if len(line)%2 == 0 else len(line) //2 + 1 : ] trans = translation(par_1, src, dest) + translation(par_2, src, dest) else: trans = translator.translate(line, lang_src= src, lang_tgt= dest) trans_result += trans + '\n' return trans_result def back_to_back_translation(paragraph): en_to_other_translation = translation(paragraph, 'en', 'fr') back_translation = translation(en_to_other_translation, 'fr', 'en') return back_translation for paragraph in paragraphs: back_translation = back_to_back_translation(paragraph) back_translations += [back_translation] """translations = [ translator.translate(par, lang_src='en', lang_tgt='fr') for par in paragraphs] print(translations) back_translations = [ translator.translate(trans, lang_src='fr', lang_tgt='en') for trans in translations] print(back_translations)""" return back_translations
def run(self, dispatcher: CollectingDispatcher, tracker: Tracker, domain: Dict[Text, Any]) -> List[Dict[Text, Any]]: countries_languages_path = os.path.join("data", "cldf-datasets-wals-014143f", "created", "lang_country_info.csv") countries_languages_data = pd.read_csv(countries_languages_path) entities = list(tracker.get_latest_entity_values("country")) if len(entities) > 0: query_lang = entities.pop() query_lang = query_lang.lower().title() print(query_lang) try: #gs = goslate.Goslate() translator = google_translator() #final = gs.translate(query_lang, 'en') #print(final) final = translator.translate(query_lang, lang_tgt='en') final = final.lower() final = re.sub("[^a-zA-Z]+", "", final) print(final) out_row = countries_languages_data[ countries_languages_data["country_name"].str.lower() == final].to_dict("records") if len(out_row) > 0: languages = [] for i in range(len(out_row)): languages.append(out_row[i]["name"]) print(languages) out_text = "%s की भाषा/एँ: \n%s" % (query_lang, ", ".join(languages)) dispatcher.utter_message(text=out_text) else: dispatcher.utter_message( text="क्षमा करें! हमारे पास %s देश के रिकॉर्ड नहीं हैं।" % query_lang) except: dispatcher.utter_message( text= "बेटा, System Error के लिए माफी माँगता हूँ। कुछ समय बाद प्रयास करें।" ) return []
def back_translate(sentence): available_langs = list(google_trans_new.LANGUAGES.keys()) trans_lang = random.choice(available_langs) #print(f"Translating to {google_trans_new.LANGUAGES[trans_lang]}") translator = google_translator() translations = translator.translate(text=sentence, lang_src='en', lang_tgt=trans_lang) #print(translations) translations_en_random = translator.translate(text=translations, lang_src=trans_lang, lang_tgt='en') # print(translations_en_random) return translations_en_random
def run(self, dispatcher: CollectingDispatcher, tracker: Tracker, domain: Dict[Text, Any]) -> List[Dict[Text, Any]]: print('LANG SEARCH') data_path = os.path.join("data", "cldf-datasets-wals-014143f", "cldf", "languages.csv") wals_data = pd.read_csv(data_path) entities = list(tracker.get_latest_entity_values("language")) if len(entities) > 0: query_lang = entities.pop() query_lang = query_lang.lower().capitalize() print(query_lang) try: translator = google_translator() #gs = goslate.Goslate() #final = gs.translate(query_lang, 'en') final = translator.translate(query_lang, lang_tgt='en') final = final.lower() final = re.sub("[^a-zA-Z]+", "", final) #print(len(re.sub("[^a-zA-Z]+", "", final))) #print(type(final)) #print(final == 'hindi') out_row = wals_data[wals_data["Name"].str.lower() == final].to_dict("records") #print(wals_data[wals_data["Name"]=='Hindi']) #print(out_row) if len(out_row) > 0: out_row = out_row[0] out_text = "%s भाषा %s परिवार से संबंधित है।\nइसका जीनस %s है।\nइसका ISO कोड %s है।" % ( query_lang, out_row["Family"], out_row["Genus"], out_row["ISO_codes"]) dispatcher.utter_message(text=out_text) else: dispatcher.utter_message( text= "क्षमा करें! हमारे पास %s भाषा के रिकॉर्ड नहीं हैं।" % query_lang) except: dispatcher.utter_message( text= "बेटा, System Error के लिए माफी माँगता हूँ। कुछ समय बाद प्रयास करें।" ) return []
def df_sentiment(df): # Import necessary libraries from nltk.sentiment.vader import SentimentIntensityAnalyzer from google_trans_new import google_translator import time import numpy as np from json.decoder import JSONDecodeError # Set Google Translator translator = google_translator() # Set Vader's SentimentIntensityAnalyzer sid = SentimentIntensityAnalyzer() # Reset index df = df.reset_index(drop=True) # Compute Sentiment, using vader and google translate # Initializing an empty sentiment list to which the sentiment dct for each tweet will be appended sentiment_lst = [] # Filling the column with the sentiment dictionaries # This is done with sleeps between each iteration to avoid the 429: Too Many Requests error for i in range(0, len(df)): en_text = translator.translate(df['text'][i]) sentiment_lst.append(sid.polarity_scores(en_text)) if i % 25 == 0: time.sleep(2) else: time.sleep(0.7) # Pass the final sentiment lst to a sentiment dct column df['sentiment_dct'] = sentiment_lst # Creating new columns for each dictionary key and dropping the dictionary itself df['negative'] = [df['sentiment_dct'][i]['neg'] for i in df.index.tolist()] df['neutral'] = [df['sentiment_dct'][i]['neu'] for i in df.index.tolist()] df['positive'] = [df['sentiment_dct'][i]['pos'] for i in df.index.tolist()] df['compound'] = [ df['sentiment_dct'][i]['compound'] for i in df.index.tolist() ] df = df.drop(columns='sentiment_dct') return df
async def handle_translation(content: str) -> str: translator = google_translator() appid = config.baiduid secretKey = config.baidukey text = emoji.demojize(content) text = re.sub(r':[A-Za-z_]*:', ' ', text) try: if appid and secretKey: url = f'https://api.fanyi.baidu.com/api/trans/vip/translate' salt = str(random.randint(32768, 65536)) sign = hashlib.md5( (appid + content + salt + secretKey).encode()).hexdigest() params = { "q": content, "from": "auto", "to": "zh", "appid": appid, "salt": salt, "sign": sign } async with httpx.AsyncClient(proxies={}) as client: r = await client.get(url, params=params, timeout=10) try: i = 0 str_tl = '' while i < len(r.json()["trans_result"]): str_tl += r.json()["trans_result"][i]["dst"] + "\n" i += 1 text = "\n百度翻译:\n" + str_tl except Exception as e: if r.json()["error_code"] == "52003": logger.warning("无效的appid,尝试使用谷歌翻译,错误信息:" + str(r.json()["error_msg"])) text = '\n谷歌翻译:\n' + \ str(translator.translate(re.escape(text), lang_tgt='zh')) else: logger.warning("使用百度翻译错误:" + str(r.json()["error_msg"]) + ",开始尝试使用谷歌翻译") text = '\n谷歌翻译:\n' + \ str(translator.translate(re.escape(text), lang_tgt='zh')) else: text = '\n谷歌翻译:\n' + \ str(translator.translate(re.escape(text), lang_tgt='zh')) text = re.sub(r'\\', '', text) except Exception as e: text = '\n翻译失败!' + str(e) + '\n' return text
def get_data(): ## get_data returns a dataframe df with the data shown # in the main table of 'https://www.corona.ps/details'. # It also translates the text in the table from arabic to english. # Besides returning df it also generates a file named # 'COVID_ps_%timestamp.csv' in the same folder containting this file., # The timestamp correspond to the actual date. #Website with data # URL_or = 'https://www.corona.ps/details' URL_or = 'https://www.corona.ps/' #Magic page = requests.get(URL_or) soup = BeautifulSoup(page.content, 'html.parser') #Find tables in webpage tables = soup.find_all("table") # init the Google API translator # translator = Translator() translator = google_translator() #tables[4] has the necessary data table = tables[4] tab_data = [[cell.text for cell in row.find_all(["th", "td"])] for row in table.find_all("tr")] #generate dataframe df = pd.DataFrame(tab_data) #translate the first row with titles for i in range(0, df.shape[1]): translation = translator.translate(df[i][0], lang_tgt="en") df[i][0] = translation #now translate the 0th column (with governorates) for i in range(1, df.shape[0]): translation = translator.translate(df[0][i], lang_tgt="en") df[0][i] = translation #Header df = df.rename(columns=df.iloc[0]).drop(df.index[0]) # Returns a datetime object containing the local date and time dateTimeObj = datetime.now() timestampStr = dateTimeObj.date().strftime("%d-%b-%Y") name_df = 'COVID_ps_' + timestampStr + '.csv' print('Dataframe generated\nSaved in ' + name_df) df.to_csv('data/' + name_df) return df
def __init__(self): super().__init__() self.translator = google_translator() self.tx_cache = {} # avoid translating twice self.duck_cache = {} self.rake = Rake() # only english for now # for usage in tell me more / follow up questions self.idx = 0 self.results = [] self.image = None # subparser, intents just for this skill # not part of main intent service intent_cache = expanduser( self.config_core['padatious']['intent_cache']) self.intents = IntentContainer(intent_cache)
def update_keywords(keywords_df: pd.DataFrame, dir_lyrics: str): embedding_distributor = load_local_embedding_distributor() pos_tagger = load_local_corenlp_pos_tagger() translator = google_translator() updated_keywords = keywords_df.to_dict('list') track_ids = set(updated_keywords['track_id']) filenames = os.listdir(dir_lyrics) for i, filename in enumerate(filenames): track_id, _ = os.path.splitext(filename) print(f'Processing track {i + 1}/{len(filenames)}. Id: {track_id}...') if int(track_id) in track_ids: print(f'Already have keywords for track {track_id}') continue lyrics_path = DIR_LYRICS + filename with open(lyrics_path) as f: try: lyrics = f.read().replace('\n', ' ').replace(' ', ' ') if not lyrics: continue lyrics_en = translator.translate(lyrics) if not lyrics_en: continue keywords, weights, _ = extract_keyphrases( embedding_distributor, pos_tagger, lyrics_en, N_KEYWORDS, 'en') keywords_str = ";".join(keywords) weights_str = ";".join(map(str, weights)) updated_keywords['track_id'].append(track_id) updated_keywords['keywords'].append(keywords_str) updated_keywords['weights'].append(weights_str) except Exception as e: print(track_id, e) pass return pd.DataFrame( data=updated_keywords, columns=keywords_df.columns )