Python UtilsTweetSafa.obtainNgrams Exemples

Langage de programmation: Python

Class/Type: UtilsTweetSafa

Méthode/Fonction: obtainNgrams

Exemples au hotexamples.com: 5

Python UtilsTweetSafa.obtainNgrams - 5 exemples trouvés. Ce sont les exemples réels les mieux notés de UtilsTweetSafa.obtainNgrams extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

obtainNgrams(4)

printResultTXT(2)

chooseLanguagesLin(1)

getBigramFreqForSingleLang(1)

returnNgramFreqSet(1)

printResults(1)

orderVector(1)

learnNgramConfidencefromData(1)

getAllLanguagesSet(1)

cleanDataset(1)

formatDataset(1)

evaluateNgramRakingSet(1)

crossValidationRanking(1)

crossValidationLidstone(1)

createDataSet(1)

cleanTweets(1)

returnNgramFreqSetRanking(1)

Méthodes fréquemment utilisées

obtainNgrams (4)

printResultTXT (2)

chooseLanguagesLin (1)

getBigramFreqForSingleLang (1)

returnNgramFreqSet (1)

printResults (1)

orderVector (1)

learnNgramConfidencefromData (1)

getAllLanguagesSet (1)

cleanDataset (1)

Méthodes fréquemment utilisées

formatDataset (1)

evaluateNgramRakingSet (1)

crossValidationRanking (1)

crossValidationLidstone (1)

createDataSet (1)

cleanTweets (1)

returnNgramFreqSetRanking (1)

Exemple #1

0

Afficher le fichier

Fichier : CrossValidation.py Projet : CarlosAndres12/SEPLN-TweetLID14

def crossValidation(tweetList, k,maxNgram): m=80 n=50 for i in xrange(k): trainSet,testSet = divideDataset(tweetList,k,i) trainDist = utils.obtainNgrams(trainSet,maxNgram) confidenceDict=utils.learnNgramConfidencefromData(trainDist,trainSet) predicted, true=utils.evaluateNgramRakingSet(testSet,trainDist, confidenceDict,m,n) # utils.printJeroni(true,predicted,i) utils.printResults(testSet, predicted, i)

Exemple #2

0

Afficher le fichier

Fichier : CrossValidation.py Projet : CarlosAndres12/SEPLN-TweetLID14

def crossValidationLinearInterpolation(tweetList, k, maxNgram): for i in xrange(k): trainSet, testSet = divideDataset(tweetList, k, i) trainDist, arrayLanguages, languagesAll = utils.obtainNgrams(trainSet, maxNgram) linearCoefficients = linear.getlinearcoefficientsForLanguageArray(arrayLanguages, maxNgram, trainDist) print linearCoefficients count = 0 tot = 0 for tweet in testSet: predictedLanguage, probability = linear.getPredictedLanguageForTweet(linearCoefficients, tweet.text, maxNgram, trainDist) utils.printResultTXT(predictedLanguage, tweet) if(predictedLanguage == tweet.language): count = count + 1; tot = tot +1 # print str(count)+'/'+str(tot) print 'correct tweets fold '+str(i)+' = '+str(count)+'/'+str(tot)

Exemple #3

0

Afficher le fichier

Fichier : mainLID.py Projet : CarlosAndres12/SEPLN-TweetLID14

tweetListPreProcessed_train = preprocess.main(tweetList_train) tweetListPreProcessed_test = preprocess.main(tweetList_test) # shuffle(tweetListPreProcessed) # 3-. Algorithms # 3.1-. Algorithms: Bayesian Networks # 3.2.1-. Linear interpolation # Generate linear coefficients: input (n-grams and language) # Smooth data # cv.crossValidationLinearInterpolation(tweetListPreProcessed_train, 3, maxNgram) linearCoefficientsAll = list() trainDist, arrayLanguages, languagesAll = utils.obtainNgrams(tweetListPreProcessed_train, maxNgram) for gram in xrange(1, maxNgram+1): linearCoefficientsAll.append(linear.getlinearcoefficientsForLanguageArray(arrayLanguages, gram, trainDist)) print linearCoefficientsAll # linearCoefficientsALL = read.readLinearCoefficients(LI_Coefficients) count = 4 # Desde que gram empezar for i in xrange(count, maxNgram): count = count + 1 t0 = time.time() for tweet in tweetListPreProcessed_test:

Exemple #4

0

Afficher le fichier

Fichier : Demo.py Projet : buhrmann/SEPLN-TweetLID14

tweetListtest = read.read_tweets_dataset(test) # 2-. Pre-process state tweetListPreProcessed = preprocess.main(tweetList) tweetListPreProcessedtest= preprocess.main(tweetListtest) shuffle(tweetListPreProcessed) # Raw data -> tweetList # Clean data -> tweetListPreProcessed #utils.printTweets(tweetListPreProcessed) # 3-. Algorithms # # 3.1-. OBTAIN N-GRAMS corpusNgrams, arrayLanguages, arrayLanguagesFull = utils.obtainNgrams(tweetListPreProcessed, maxNgram+1) arrayLanguagesFull = utils.orderVector(arrayLanguagesFull) # Example: print(corpusNgrams.get(str(3)).get('pt')) # 3.2-. Algorithms: Bayesian Networks # 3.2.1-. Linear interpolation # Generate linear coefficients: input (n-grams and language) # Smooth data tweetEN = "Tomorrow is going to be a good day to go to the beach." tweetPT = "Amanhã será um dia muito bom, como ir para a praia." tweetCA = "Demà farà un dia molt bo, com per anar a la platja." tweetEU = "Bihar egun oso ona egingo du, hondartzara joateko modukoa."

Exemple #5

0

Afficher le fichier

Fichier : CalculateLICoefficients.py Projet : CarlosAndres12/SEPLN-TweetLID14

# _____________________________________________________________________________ # 1-. Read dataset and create tweetList fullfilled of Tweet object* dataset = sys.argv[1] maxNgram = int(sys.argv[2]) filename = os.path.basename(dataset).split('.') tweetList = read.read_tweets_dataset(dataset) # 2-. Pre-process state # Raw data -> tweetList # Clean data -> tweetListPreProcessed tweetListPreProcessed = preprocess.main(tweetList) # 3-. OBTAIN N-GRAMS and Linear Coefficients for i in xrange(5, maxNgram+1): corpusNgrams, arrayLanguages,arrayLanguagesFull = utils.obtainNgrams(tweetListPreProcessed, i+1) linearCoefficients = linear.getlinearcoefficientsForLanguageArray(arrayLanguages, i, corpusNgrams) # print linearCoefficients file = open('../Dataset/LICoefficients_'+str(maxNgram)+'gram_for-'+str(filename[0])+'.txt', 'a+') for li in linearCoefficients: file.write(str(i)+"\t"+str(li[0])) for co in xrange(1, i+1): file.write("\t"+str(li[co])) file.write("\n") file.close()