Beispiel #1
0
realPT = "Faltam 11 dias para fazer anos, hmm"
realCA = "Comença al Centre Fraternal la Jornada de Cooperaciò i defensa dels DDHH al Sàhara Occidental pic.twitter.com/lmOEAfww7K"
realEU = "@ErrealaAle @rnrjukebox hori ere pentsatu det, km mordoxka zegoen bukaera arte bakarrik juteko,... bestela oso erraz jun da,..."
realGL = "Pouco frío tiña eu logo no carnaval en Abadín"
realES = "#Lugo #a6 (amarillo) obras en #PedrafitaDoCebreiro carril izquierdo cerrado km431,3~430 decreciente #dgt #trafico http://tuitrafico.com/estado-del-trafico/galicia/lugo/pedrafita-do-cebreiro/199833/ …"
realUND = "Hhhhhhhhhhhhjjhhhhhhhhhhhh"
realOTHER = "Buongiorno ai nostri ascoltatori in Italia :)) pic.twitter.com/zfGpYc3oxo"

a = 'Primer sorteo del stream @Dimegioclub http://www.twitch.tv/miicrocs' #en+es

b = 'Hau ez dakit zer den estamos hablando en un idioma edo beste batean'

c = '"En Cada Lucha Aquel Que Va A Muerte Es El Que Gana" Goazen @PasaiaRegional!! #aupaekipo #aupapasaia pic.twitter.com/BQ1ikdE2Qt'


text = preprocess.preprocessText(tweetEU)

# linearCoefficients = linear.getlinearcoefficientsForLanguageArray(arrayLanguages, maxNgram, corpusNgrams)
linearCoefficientsALL = read.readLinearCoefficients(LI_Coefficients)

linearCoefficients = linearCoefficientsALL[maxNgram-1]
import time
t1 = time.time()
for tweet in tweetListPreProcessedtest:
    t0 = time.time()

    predictedLanguage, probability = linear.getPredictedLanguageForTweet(linearCoefficients, tweet.text, maxNgram, corpusNgrams)
    utils.printResultTXT(predictedLanguage, tweet, 5)

    print "time for tweet= "+str(time.time()-t0)
print "time total= "+str(time.time()-t1)
realEN = "'Where is the moment when we need it the most' @ Salvador de Bahía, Brasil http://instagram.com/p/lX9he2CrnF/ "
realPT = "Faltam 11 dias para fazer anos, hmm"
realCA = "Comença al Centre Fraternal la Jornada de Cooperaciò i defensa dels DDHH al Sàhara Occidental pic.twitter.com/lmOEAfww7K"
realEU = "@ErrealaAle @rnrjukebox hori ere pentsatu det, km mordoxka zegoen bukaera arte bakarrik juteko,... bestela oso erraz jun da,..."
realGL = "Pouco frío tiña eu logo no carnaval en Abadín"
realES = "#Lugo #a6 (amarillo) obras en #PedrafitaDoCebreiro carril izquierdo cerrado km431,3~430 decreciente #dgt #trafico http://tuitrafico.com/estado-del-trafico/galicia/lugo/pedrafita-do-cebreiro/199833/ …"
realUND = "Hhhhhhhhhhhhjjhhhhhhhhhhhh"
realOTHER = "Buongiorno ai nostri ascoltatori in Italia :)) pic.twitter.com/zfGpYc3oxo"

a = 'Primer sorteo del stream @Dimegioclub http://www.twitch.tv/miicrocs'  #en+es

b = 'Hau ez dakit zer den estamos hablando en un idioma edo beste batean'

c = '"En Cada Lucha Aquel Que Va A Muerte Es El Que Gana" Goazen @PasaiaRegional!! #aupaekipo #aupapasaia pic.twitter.com/BQ1ikdE2Qt'

text = preprocess.preprocessText(tweetEU)

# linearCoefficients = linear.getlinearcoefficientsForLanguageArray(arrayLanguages, maxNgram, corpusNgrams)
linearCoefficientsALL = read.readLinearCoefficients(LI_Coefficients)

linearCoefficients = linearCoefficientsALL[maxNgram - 1]
import time
t1 = time.time()
for tweet in tweetListPreProcessedtest:
    t0 = time.time()

    predictedLanguage, probability = linear.getPredictedLanguageForTweet(
        linearCoefficients, tweet.text, maxNgram, corpusNgrams)
    utils.printResultTXT(predictedLanguage, tweet, 5)

    print "time for tweet= " + str(time.time() - t0)