Python LoadData.LoadData_vocabulary Exemples

Langage de programmation: Python

Class/Type: LoadData

Méthode/Fonction: LoadData_vocabulary

Exemples au hotexamples.com: 1

Python LoadData.LoadData_vocabulary - 1 exemples trouvés. Ce sont les exemples réels les mieux notés de LoadData.LoadData_vocabulary à partir du pack SGAE extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

LoadData(30)

GetData(9)

LoadDta(5)

load_ic(2)

loadTrainDataFromCSV(2)

LoadDataFromFile(2)

loadTestDataFromCSV(2)

addAllOneColumn(2)

AccidentsData(1)

batch_creation(1)

SelectDataAccordingToType(1)

VehiclesData(1)

_downloadStocks(1)

_getRawDataFromText(1)

_getSymbols(1)

getTraindata(1)

downloadSymbols(1)

getTestData(1)

ProcessData(1)

historicalData(1)

loadRFBenchmarkFromCSV(1)

load_anns(1)

loadcleandata(1)

RAFDataset(1)

MergeImages(1)

Normalize(1)

LoadData_labels(1)

DataDictionary(1)

DataSet(1)

DualTrainDatasetRAF(1)

GetData_WithoutS(1)

LoadDataTrain(1)

LoadData_bagOfWords(1)

LoadData_cut_dire(1)

LoadData_vocabulary(1)

MergedData(1)

LoadJSON(1)

LoadStopWords(1)

LoadXMLFromDrug(1)

Load_Case1(1)

Load_Case2(1)

Load_Case5(1)

Load_CaseGB(1)

Covid(1)

store(1)

Méthodes fréquemment utilisées

LoadData (30)

GetData (9)

LoadDta (5)

load_ic (2)

loadTrainDataFromCSV (2)

LoadDataFromFile (2)

loadTestDataFromCSV (2)

addAllOneColumn (2)

AccidentsData (1)

batch_creation (1)

Méthodes fréquemment utilisées

SelectDataAccordingToType (1)

VehiclesData (1)

_downloadStocks (1)

_getRawDataFromText (1)

_getSymbols (1)

getTraindata (1)

downloadSymbols (1)

getTestData (1)

ProcessData (1)

historicalData (1)

loadRFBenchmarkFromCSV (1)

load_anns (1)

loadcleandata (1)

RAFDataset (1)

MergeImages (1)

Normalize (1)

LoadData_labels (1)

DataDictionary (1)

DataSet (1)

DualTrainDatasetRAF (1)

Méthodes fréquemment utilisées

loadRFBenchmarkFromCSV (1)

load_anns (1)

loadcleandata (1)

RAFDataset (1)

MergeImages (1)

Normalize (1)

LoadData_labels (1)

DataDictionary (1)

DataSet (1)

DualTrainDatasetRAF (1)

GetData_WithoutS (1)

LoadDataTrain (1)

LoadData_bagOfWords (1)

LoadData_cut_dire (1)

LoadData_vocabulary (1)

MergedData (1)

LoadJSON (1)

LoadStopWords (1)

LoadXMLFromDrug (1)

Load_Case1 (1)

Load_Case2 (1)

Load_Case5 (1)

Load_CaseGB (1)

Covid (1)

store (1)

Méthodes fréquemment utilisées

GetData_WithoutS (1)

LoadDataTrain (1)

LoadData_bagOfWords (1)

LoadData_cut_dire (1)

LoadData_vocabulary (1)

MergedData (1)

LoadJSON (1)

LoadStopWords (1)

LoadXMLFromDrug (1)

Load_Case1 (1)

Load_Case2 (1)

Load_Case5 (1)

Load_CaseGB (1)

Covid (1)

store (1)

Exemple #1

0

Afficher le fichier

def RunMain(): print '************Welcome to the World of Bayes!***********\n' time.clock() t0 = float(time.clock()) # # load data, and save as the format under NaiveBayes. DIR_RESULT = "./Result/" DIR = "./clintontrump-data/" FILENAME_BASIC = "clintontrump." [vocList, wordNum] = ld.LoadData_vocabulary(DIR+FILENAME_BASIC+"vocabulary") [trainX, trainDocNum] = ld.LoadData_bagOfWords(DIR+FILENAME_BASIC+"bagofwords.train") [devX, devDocNum] = ld.LoadData_bagOfWords(DIR+FILENAME_BASIC+"bagofwords.dev") [testX, testDocNum] = ld.LoadData_bagOfWords(DIR+FILENAME_BASIC+"bagofwords.test") str0 = "realDonaldTrump" str1 = "HillaryClinton" trainY = ld.LoadData_labels(DIR+FILENAME_BASIC+"labels.train", str0) devY = ld.LoadData_labels(DIR+FILENAME_BASIC+"labels.dev", str0) t1 = float(time.clock()) print 'Loading data File. using time %.4f s, \n' % (t1-t0) # # define NaiveBayes instance, and calc prior P(y) nbModel = nb.NAIVE_BAYES_MODEL(wordNum, trainDocNum, trainX, trainY) nbModel.estimatePy_MLE() # *******part 1: basic implementation ###### Bernoulli model [berAccuracy, berTestHist, berConfuseMat] = LearnAndTest(nbModel, devX, devY, "Bernoulli") od.WritenFile_dev(DIR_RESULT+"Predict.Bernoulli_basic.dev", berTestHist, str0, str1) Pwy_b = copy.deepcopy(nbModel.Pwy_c) #print 'Bernoulli accuracy is %.4f \nconfuseMatrix is:\n' %(float(berAccuracy)/float(testDocNum)), berConfuseMat t2 = float(time.clock()) print 'Bernoulli Model learn & test, using time %.4f s, \n' % (t2-t1) ###### Multinomial will go through the similar process. [mulAccuracy, mulTestHist, mulConfuseMat] = LearnAndTest(nbModel, devX, devY, "Multinomial") od.WritenFile_dev(DIR_RESULT+"Predict.Multinomial_basic.dev", mulTestHist, str0, str1) Pwy_m = copy.deepcopy(nbModel.Pwy_c) #print 'Multinomial accuracy is %.4f \nconfuse matrix is:\n' %(float(mulAccuracy)/float(testDocNum)), mulConfuseMat t3 = float(time.clock()) print 'multinomial Model learn & test, using time %.4f s, \n' % (t3-t2) # ******* part 3: bonus**********since training data will be changed in part2, so part 3 first. ###### Bernoulli model t1 = float(time.clock()) [berAccuracy, berTestHist, berConfuseMat] = bo.LearnAndTest(nbModel, devX, devY, "Bernoulli") od.WritenFile_dev(DIR_RESULT+"Predict.Bernoulli_basic_with_tag.dev", berTestHist, str0, str1) #Pwy_b = copy.deepcopy(nbModel.Pwy_c) #print 'Bernoulli accuracy is %.4f \nconfuseMatrix is:\n' %(float(berAccuracy)/float(testDocNum)), berConfuseMat t2 = float(time.clock()) print 'Bernoulli Model learn & test, using time %.4f s, \n' % (t2-t1) ###### Multinomial will go through the similar process. t1 = float(time.clock()) [mulAccuracy, mulTestHist, mulConfuseMat] = bo.LearnAndTest(nbModel, devX, devY, "Multinomial") od.WritenFile_dev(DIR_RESULT+"Predict.Multinomial_basic_with_tag.dev", mulTestHist, str0, str1) #Pwy_m = copy.deepcopy(nbModel.Pwy_c) #print 'Multinomial accuracy is %.4f \nconfuse matrix is:\n' %(float(mulAccuracy)/float(testDocNum)), mulConfuseMat t2 = float(time.clock()) print 'multinomial Model learn & test, using time %.4f s, \n' % (t2-t1) #Predict the final ###### Bernoulli model t1 = float(time.clock()) berTestHist = bo.LearnAndPredict(nbModel, testX, "Bernoulli") od.WritenFile_dev(DIR_RESULT+"Predict.Bernoulli_basic_with_tag.test", berTestHist, str0, str1) #Pwy_b = copy.deepcopy(nbModel.Pwy_c) #print 'Bernoulli accuracy is %.4f \nconfuseMatrix is:\n' %(float(berAccuracy)/float(testDocNum)), berConfuseMat t2 = float(time.clock()) print 'Bernoulli Model learn & test, using time %.4f s, \n' % (t2-t1) ###### Multinomial will go through the similar process. t1 = float(time.clock()) mulTestHist = bo.LearnAndPredict(nbModel, testX, "Multinomial") od.WritenFile_dev(DIR_RESULT+"Predict.Multinomial_basic_with_tag.test", mulTestHist, str0, str1) #Pwy_m = copy.deepcopy(nbModel.Pwy_c) #print 'Multinomial accuracy is %.4f \nconfuse matrix is:\n' %(float(mulAccuracy)/float(testDocNum)), mulConfuseMat t2 = float(time.clock()) print 'multinomial Model learn & test, using time %.4f s, \n' % (t2-t1) ##### Ranking Top ten features topWord_list = [10, 100, 1000, 5000] std_threshold = [0.1, 0.3, 0.5, 0.7, 0.9] tfidf_threshold = [1, 2, 3, 4, 5] for word_num in topWord_list: print '\n * Remove top words based on Bernoulli', word_num [removedIdx, labelVec, redFeaNum] = im.find_top_words(nbModel.classNum, wordNum, word_num, Pwy_b) nbModel.setFeatureLabel(labelVec, redFeaNum) t1 = float(time.clock()) [berAccuracy, mulTestHist, mulConfuseMat] = LearnAndTest(nbModel, devX, devY, "Bernoulli") t2 = float(time.clock()) print 'Bernoulli Model learn & test, using time %.4f s, \n' % (t2-t1) t1 = float(time.clock()) [mulAccuracy, mulTestHist, mulConfuseMat] = LearnAndTest(nbModel, devX, devY, "Multinomial") t2 = float(time.clock()) print 'Multinomial Model learn & test, using time %.4f s, \n' % (t2-t1) print '\n * Remove top words based on Mulinomial', word_num [removedIdx, labelVec, redFeaNum] = im.find_top_words(nbModel.classNum, wordNum, word_num, Pwy_m) nbModel.setFeatureLabel(labelVec, redFeaNum) t1 = float(time.clock()) [berAccuracy, berTestHist, berConfuseMat] = LearnAndTest(nbModel, devX, devY, "Bernoulli") t2 = float(time.clock()) print 'Bernoulli Model learn & test, using time %.4f s, \n' % (t2-t1) t1 = float(time.clock()) [mulAccuracy, mulTestHist, mulConfuseMat] = LearnAndTest(nbModel, devX, devY, "Multinomial") t2 = float(time.clock()) print 'Multinomial Model learn & test, using time %.4f s, \n' % (t2-t1) for std_thre in std_threshold: # Find stop words based on STD and doc# print '\n * Remove STD words for Bernoulli', std_thre [removedIdx, labelVec, redFeaNum] = im.find_std_zero_words(wordNum, std_thre, Pwy_b) #showWords(vocList, removedIdx) nbModel.setFeatureLabel(labelVec, redFeaNum) t1 = float(time.clock()) [berAccuracy, berTestHist, berConfuseMat] = LearnAndTest(nbModel, devX, devY, "Bernoulli") t2 = float(time.clock()) print 'Bernoulli Model learn & test, using time %.4f s, \n' % (t2-t1) t1 = float(time.clock()) [mulAccuracy, mulTestHist, mulConfuseMat] = LearnAndTest(nbModel, devX, devY, "Multinomial") t2 = float(time.clock()) print 'Multinomial Model learn & test, using time %.4f s, \n' % (t2-t1) # Find stop words based on STD and word# print '\n * Remove STD words for Multinomial', std_thre [removedIdx, labelVec, redFeaNum] = im.find_std_zero_words(wordNum, std_thre, Pwy_m) nbModel.setFeatureLabel(labelVec, redFeaNum) t1 = float(time.clock()) [berAccuracy, berTestHist, berConfuseMat] = LearnAndTest(nbModel, devX, devY, "Bernoulli") t2 = float(time.clock()) print 'Bernoulli Model learn & test, using time %.4f s, \n' % (t2-t1) t1 = float(time.clock()) [mulAccuracy, mulTestHist, mulConfuseMat] = LearnAndTest(nbModel, devX, devY, "Multinomial") t2 = float(time.clock()) print 'Multinomial Model learn & test, using time %.4f s, \n' % (t2-t1) for tfidf_thre in tfidf_threshold: # Find stop words based on TF/IDF print '\n * Remove low TF/IDF words', tfidf_thre [removedIdx, labelVec, redFeaNum] = im.find_low_tfidf_words(wordNum, tfidf_thre, Pwy_b, Pwy_m) nbModel.setFeatureLabel(labelVec, redFeaNum) t1 = float(time.clock()) [berAccuracy, berTestHist, berConfuseMat] = LearnAndTest(nbModel, devX, devY, "Bernoulli") t2 = float(time.clock()) print 'berAccuracy Model learn & test, using time %.4f s, \n' % (t2-t1) t1 = float(time.clock()) [mulAccuracy, mulTestHist, mulConfuseMat] = LearnAndTest(nbModel, devX, devY, "Multinomial") t2 = float(time.clock()) print 'Multinomial Model learn & test, using time %.4f s, \n' % (t2-t1) # ******** part 2: Priors and overfittings ## different Laplace Smoothing Alpha [testAlpha, testAccuracy] = PriorAndFitting_diffLaplace(nbModel, devX, devY, DIR_RESULT, str0, str1) testAccuracy = np.array(testAccuracy)/float(testDocNum) print testAlpha print testAccuracy od.Save2Figure_semilogs(DIR_RESULT+'laplaceAlpha', 1, testAlpha, [testAccuracy],['log(laplace_alpha)','accuracy'], [1e-5, 10000,0, 1], 1)