def get_language_results(tweet_text_array): parsed_languages = [] raw_results = indicoio.language(tweet_text_array, top_n=1) for result in raw_results: field, value = result.items()[0] parsed_languages.append(field) return parsed_languages
def analysis(data): sentiment = ind.sentiment_hq(data) tags = sort(ind.text_tags(data)) languages = sort(ind.language(data)) politics = sort(ind.political(data)) keywords = sort(ind.keywords(data)) names = sort(ind.named_entities(data)) print "Sentiment", sentiment print "\n\n\nTags" for t in tags: print t[0], float(t[1]) * 100 print "\n\n\nLanguages" for l in languages: print l[0], float(l[1]) * 100 print "\n\n\nPolitical" for p in politics: print p[0], float(p[1]) * 100 print "\n\nkeywords" for k in keywords: print k[0], float(k[1]) * 100
def convertData(x): data = x name = data['name'] lang = indicoio.language(data['words']) if lang['English'] > lang['Spanish']: language = 'english' if lang['English'] < lang['Spanish']: language = 'spanish' sent = round(indicoio.sentiment(data['words']), 2) words = data['words'].split() numwords = len(words) totalL = 0 for j in words: totalL += len(j) meanLW = round((totalL / numwords), 2) #Encoding Total-lenght # Values : # 0 : short < 20 # 1 : medium > 20 # 2 : long > 40 if totalL < 20: totalL = 0 if totalL > 40: totalL = 2 if totalL > 20: totalL = 1 #Encoding numwords if numwords < 5: numwords = 0 if numwords > 10: numwords = 2 if numwords >= 5: numwords = 1 #Encoding sentiment #Values : # 1 : Possitive # 2 : Negative # 0 : Neutral if sent > 0.6: sent = 1 if sent < 0.4: sent = 2 if sent < 1: sent = 0 if language == 'english': language = 1 if language == 'spanish': language = 0 sent = 0 cad = [name, language, totalL, meanLW, sent, numwords] return cad
def isNotEnglish(text): language = indicoio.language(text) print(language) testy = sorted(language.keys(), key=lambda x: language[x], reverse=True)[:1] < 0.5 print(testy) print("!!") return testy
def test_language(self): language_set = set([ 'English', 'Spanish', 'Tagalog', 'Esperanto', 'French', 'Chinese', 'French', 'Bulgarian', 'Latin', 'Slovak', 'Hebrew', 'Russian', 'German', 'Japanese', 'Korean', 'Portuguese', 'Italian', 'Polish', 'Turkish', 'Dutch', 'Arabic', 'Persian (Farsi)', 'Czech', 'Swedish', 'Indonesian', 'Vietnamese', 'Romanian', 'Greek', 'Danish', 'Hungarian', 'Thai', 'Finnish', 'Norwegian', 'Lithuanian' ]) language_dict = language('clearly an english sentence') self.assertEqual(language_set, set(language_dict.keys())) assert language_dict['English'] > 0.25
def test_batch_language(self): test_data = ['clearly an english sentence'] response = language(test_data, api_key=self.api_key) self.assertTrue(isinstance(response, list)) self.assertTrue(response[0]['English'] > 0.25)
def test_language(self): language_dict = language('clearly an english sentence') self.assertEqual(LANGUAGES, set(language_dict.keys())) assert language_dict['English'] > 0.25
def test_batch_language(self): test_data = ["clearly an english sentence"] response = language(test_data) self.assertTrue(isinstance(response, list)) self.assertTrue(response[0]["English"] > 0.25)
def get_language(): if request.method == 'POST': data = dict(request.form)['data_to_analyze'] return json.dumps({ 'language': sort(indicoio.language(data)[0])[0] })