def bayes_train_test(X,y): n_splits=3 kf = KFold(n_splits,shuffle=True) kf.get_n_splits(X) accuracy=[] f1_macro=[] f1_micro=[] for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] y_pred=bayes.predict(X_test,X_train,y_train) accuracy.append(accuracy_score(y_test,y_pred)) f1_macro.append(f1_score(y_test,y_pred,average='macro')) f1_micro.append(f1_score(y_test,y_pred,average='micro')) acc=np.mean(np.asarray(accuracy)) f1_ma=np.mean(np.asarray(f1_macro)) f1_mi=np.mean(np.asarray(f1_micro)) print("Test accuracy ::",acc) print("Test macro F1 Score ::",f1_ma) print("Test micro F1 Score ::",f1_mi) return acc,f1_ma,f1_mi
def classify_bayes(): fracs = [x / 10 for x in range(1, 11)] digitd = (read_digitdata('training'), read_digitdata('test'), read_digitdata('validation')) faced = (read_facedata('train'), read_facedata('test'), read_facedata('validation')) datasets = [faced, digitd] # Bayesian for dataset in datasets: for f in fracs: fs = take_sample(dataset[0], f) t1 = time.time() model = bayes.train(fs) t2 = time.time() total = len(dataset[1]) cor = 0 for item in dataset[1]: cl, logprob = bayes.predict(item[0], model) if cl == item[1]: cor += 1 acc = float(cor) / total print("Bayes class accuracy frac {} train_time {} accuracy {}". format(f, t2 - t1, acc))
test["Saltratio"]=0.0 test["Saltratio"].ix[test["gender"]=="male"]=test["Salt[g]"]/8.0 test["Saltratio"].ix[test["gender"]=="female"]=test["Salt[g]"]/7.0 test=test.drop(["gender","age","height","weight","EER[kcal]", "P target(15%)[g]","F target(25%)[g]","C target(60%)[g]", "E[kcal]","P[g]","F[g]","C[g]","Salt[g]"],axis=1) #---transform dataframe to list----------------------------------------- test_array=np.array(test) test_list=test_array.tolist() #---predict the class and get a list of classes------------------------ predictions = [] for i in range(len(test_list)): if test_list[i][0]=="breakfast": del test_list[i][0] prediction = bayes.predict(train_breakfast_dict, train_breakfast_summaries,test_list[i]) if test_list[i][0]=="lunch": del test_list[i][0] prediction = bayes.predict(train_lunch_dict, train_lunch_summaries,test_list[i]) if test_list[i][0]=="dinner": del test_list[i][0] prediction = bayes.predict(train_dinner_dict, train_dinner_summaries,test_list[i]) predictions.append(prediction) #---write the claseees into the table ------------------------------------ score = pd.DataFrame(predictions,columns=["scores"]) test = pd.read_excel('filename of test data') test=test.reset_index(drop=True) testscore = pd.concat([test,score],axis=1) testscore.to_excel('filename of the test data with results')
import bayes as by import numpy as ny test_doc1=['love','my','dalmation'] test_doc2=['love','dog','my','dog'] dataset,labels=by.parseFile('data') vocab_list=by.creatVocabList(dataset) test_doc1=ny.array(by.setWords2Bag2(vocab_list, test_doc2)) pVec,pDoc,index=by.trainNB(dataset, labels,vocab_list) result=by.predict(pVec,pDoc,index,test_doc1) print(result)
"gender", "age", "height", "weight", "EER[kcal]", "Type", "P target(15%)[g]", "F target(25%)[g]", "C target(60%)[g]", "E[kcal]", "P[g]", "F[g]", "C[g]", "Salt[g]" ], axis=1) accuracy = 0 for iter_num in range(1000): #---Validation Split----------------------------------------------------------- train, val = train_test_split(trainval, test_size=0.1) val_x = val.drop(["Score(1:worst 2:bad 3:good 4:best)"], axis=1) val_x_array = np.array(val_x) val_x_list = val_x_array.tolist() #---devide train data according to class------------------------------- train = np.array(train) train = train.tolist() train_dict = bayes.fea_and_class(train) #---calculate the mean and std of each feature according to category------- train_summaries = bayes.summarizeByClass(train_dict) #---predict the class and get a list of classes---------------------- predictions = [] for i in range(len(val_x_list)): prediction = bayes.predict(train_dict, train_summaries, val_x_list[i]) predictions.append(prediction) #calculate the accuracy using the validation and predictions--------------- val_array = np.array(val) val_list = val_array.tolist() accuracy += bayes.accuracy(val_list, predictions) print accuracy / 1000.0