def get_language_results(tweet_text_array):
    parsed_languages = []
    raw_results = indicoio.language(tweet_text_array, top_n=1)
    for result in raw_results:
        field, value = result.items()[0]
        parsed_languages.append(field)
    return parsed_languages
Esempio n. 2
0
def analysis(data):
    sentiment = ind.sentiment_hq(data)
    tags = sort(ind.text_tags(data))
    languages = sort(ind.language(data))
    politics = sort(ind.political(data))
    keywords = sort(ind.keywords(data))
    names = sort(ind.named_entities(data))

    print "Sentiment", sentiment

    print "\n\n\nTags"
    for t in tags:
        print t[0], float(t[1]) * 100

    print "\n\n\nLanguages"
    for l in languages:
        print l[0], float(l[1]) * 100

    print "\n\n\nPolitical"
    for p in politics:
        print p[0], float(p[1]) * 100
    
    print "\n\nkeywords"
    for k in keywords:
        print k[0], float(k[1]) * 100
Esempio n. 3
0
def convertData(x):
    data = x
    name = data['name']

    lang = indicoio.language(data['words'])

    if lang['English'] > lang['Spanish']:
        language = 'english'
    if lang['English'] < lang['Spanish']:
        language = 'spanish'

    sent = round(indicoio.sentiment(data['words']), 2)
    words = data['words'].split()
    numwords = len(words)
    totalL = 0
    for j in words:
        totalL += len(j)

    meanLW = round((totalL / numwords), 2)

    #Encoding Total-lenght
    # Values :
    # 0 : short < 20
    # 1 : medium > 20
    # 2 : long > 40
    if totalL < 20:
        totalL = 0
    if totalL > 40:
        totalL = 2
    if totalL > 20:
        totalL = 1
    #Encoding numwords
    if numwords < 5:
        numwords = 0
    if numwords > 10:
        numwords = 2
    if numwords >= 5:
        numwords = 1

    #Encoding sentiment
    #Values :
    # 1 : Possitive
    # 2 : Negative
    # 0 : Neutral

    if sent > 0.6:
        sent = 1
    if sent < 0.4:
        sent = 2
    if sent < 1:
        sent = 0

    if language == 'english':
        language = 1
    if language == 'spanish':
        language = 0
        sent = 0

    cad = [name, language, totalL, meanLW, sent, numwords]
    return cad
Esempio n. 4
0
def isNotEnglish(text):
    language = indicoio.language(text)
    print(language)
    testy = sorted(language.keys(), key=lambda x: language[x], reverse=True)[:1] < 0.5
    print(testy)
    print("!!")
    return testy
Esempio n. 5
0
 def test_language(self):
     language_set = set([
         'English', 'Spanish', 'Tagalog', 'Esperanto', 'French', 'Chinese',
         'French', 'Bulgarian', 'Latin', 'Slovak', 'Hebrew', 'Russian',
         'German', 'Japanese', 'Korean', 'Portuguese', 'Italian', 'Polish',
         'Turkish', 'Dutch', 'Arabic', 'Persian (Farsi)', 'Czech',
         'Swedish', 'Indonesian', 'Vietnamese', 'Romanian', 'Greek',
         'Danish', 'Hungarian', 'Thai', 'Finnish', 'Norwegian', 'Lithuanian'
     ])
     language_dict = language('clearly an english sentence')
     self.assertEqual(language_set, set(language_dict.keys()))
     assert language_dict['English'] > 0.25
Esempio n. 6
0
 def test_language(self):
     language_set = set([
         'English',
         'Spanish',
         'Tagalog',
         'Esperanto',
         'French',
         'Chinese',
         'French',
         'Bulgarian',
         'Latin',
         'Slovak',
         'Hebrew',
         'Russian',
         'German',
         'Japanese',
         'Korean',
         'Portuguese',
         'Italian',
         'Polish',
         'Turkish',
         'Dutch',
         'Arabic',
         'Persian (Farsi)',
         'Czech',
         'Swedish',
         'Indonesian',
         'Vietnamese',
         'Romanian',
         'Greek',
         'Danish',
         'Hungarian',
         'Thai',
         'Finnish',
         'Norwegian',
         'Lithuanian'
     ])
     language_dict = language('clearly an english sentence')
     self.assertEqual(language_set, set(language_dict.keys()))
     assert language_dict['English'] > 0.25
Esempio n. 7
0
 def test_batch_language(self):
     test_data = ['clearly an english sentence']
     response = language(test_data, api_key=self.api_key)
     self.assertTrue(isinstance(response, list))
     self.assertTrue(response[0]['English'] > 0.25)
Esempio n. 8
0
 def test_language(self):
     language_dict = language('clearly an english sentence')
     self.assertEqual(LANGUAGES, set(language_dict.keys()))
     assert language_dict['English'] > 0.25
Esempio n. 9
0
 def test_batch_language(self):
     test_data = ["clearly an english sentence"]
     response = language(test_data)
     self.assertTrue(isinstance(response, list))
     self.assertTrue(response[0]["English"] > 0.25)
Esempio n. 10
0
def get_language():
    if request.method == 'POST':
        data = dict(request.form)['data_to_analyze']
        return json.dumps({
            'language': sort(indicoio.language(data)[0])[0]
        })
 def test_language(self):
     language_dict = language('clearly an english sentence')
     self.assertEqual(LANGUAGES, set(language_dict.keys()))
     assert language_dict['English'] > 0.25