data, label, test_size=0.2, random_state=0) return X_train, X_test, y_train, y_test def mode(data): result = np.zeros((600, 200)) for i in range(0, len(data)): result[i][data[i]] = 1 return result if __name__ == "__main__": piclabel_3k, picname_3k, labels_3k = read_data.read_train_3k( read_data.train_3k) attributes_train = read_data.read_attributes(read_data.attr_train) alexnet_train = read_data.read_npy(read_data.alexnet_train) alexmin = np.amin(alexnet_train) alexnet_train_ = alexnet_train - alexmin #X_train, X_test, y_train, y_test = crossvalid(data, label) #siftbow_train = read_data.read_npy(read_data.siftbow_train) acc_attri_gNB, ytest = crossvalid_gaussianNB(attributes_train, piclabel_3k) #print acc_attri_gNB acc_attri_mNB, ytest = crossvalid_multinomialNB(attributes_train, piclabel_3k) #print acc_attri_mNB acc_attri_bNB, ytest = crossvalid_bernoulliNB(attributes_train, piclabel_3k) #print acc_attri_bNB acc_alexnet_gNB, ytest = crossvalid_gaussianNB(alexnet_train, piclabel_3k) #print acc_alexnet_gNB acc_alexnet_mNB, ytest = crossvalid_multinomialNB(alexnet_train_,
X_train, X_test, y_train, y_test = cross_validation.train_test_split(data, label, test_size = 0.2, random_state = 0) bnb = naive_bayes.BernoulliNB(binarize = 2.5) accuracy = bnb.fit(X_train, y_train).score(X_test, y_test) return accuracy def mode(data): result = np.zeros((1000, 200)) for i in range(0, len(data)): result[i][data[i]] = 1 return result if __name__ == "__main__": piclabel_3k, picname_3k, labels_3k = read_data.read_train_3k(read_data.train_3k) attributes_train = read_data.read_attributes(read_data.attr_train) attributes_test = read_data.read_attributes(read_data.attr_test) alexnet_train = read_data.read_npy(read_data.alexnet_train) alexnet_test = read_data.read_npy(read_data.alexnet_test) alexmin = np.amin(alexnet_train) alexnet_train_ = alexnet_train - alexmin; alexnet_test_ = alexnet_test - alexmin prediction1 = gaussianNB(attributes_train, piclabel_3k, attributes_test) prediction2 = multinomialNB(attributes_train, piclabel_3k, attributes_test) prediction3 = bernoulliNB(attributes_train, piclabel_3k, attributes_test) prediction4 = gaussianNB(alexnet_train, piclabel_3k, alexnet_test) prediction5 = multinomialNB(alexnet_train_, piclabel_3k, alexnet_test_) prediction6 = bernoulliNB(alexnet_train, piclabel_3k, alexnet_test) prediction = mode(prediction1) + mode(prediction2) + mode(prediction3) + mode(prediction4) + mode(prediction5) + mode(prediction6) prediction = np.argmax(prediction, axis = 1) test_imgname = read_data.read_test_3k(read_data.test_3k) csvfile = file('combine_naive_noprob.csv', 'wb') writer = csv.writer(csvfile)
import numpy as np import read_data as rd from sklearn.cross_validation import KFold train_label,picname_3k,labelname_3k = rd.read_train_3k(rd.train_3k) test_name = rd.read_test_3k(rd.test_3k) attr_name = rd.read_attributes_list(rd.attr_list) train_data = rd.read_attributes(rd.attr_train) # test_data = rd.read_attributes(rd.attr_test) train_data_alex = rd.read_npy(rd.alexnet_train) test_data_alex = rd.read_npy(rd.alexnet_test) train_data_siftbow = rd.read_npy(rd.siftbow_train) # test_data_siftbow = rd.read_npy(rd.siftbow_test) def cross_validate(train_data,n_folds = 5): kf = KFold(len(train_data), n_folds = n_folds) ret = 0.0 for train_index, test_index in kf: X_train, X_test = train_data[train_index], train_data[test_index] y_train, y_test = train_label[train_index], train_label[test_index] temp = svm(X_train,y_train,X_test,y_test) print temp ret += temp return ret/n_folds def svm(X_train,y_train,X_test,y_test): # clf = SVC(C=1.0,kernel='sigmoid') clf = LinearSVC(dual=False) score = clf.fit(X_train,y_train).score(X_test,y_test)
import numpy as np import read_data as rd from sklearn.cross_validation import KFold train_label, picname_3k, labelname_3k = rd.read_train_3k(rd.train_3k) test_name = rd.read_test_3k(rd.test_3k) attr_name = rd.read_attributes_list(rd.attr_list) train_data = rd.read_attributes(rd.attr_train) # test_data = rd.read_attributes(rd.attr_test) train_data_alex = rd.read_npy(rd.alexnet_train) test_data_alex = rd.read_npy(rd.alexnet_test) train_data_siftbow = rd.read_npy(rd.siftbow_train) # test_data_siftbow = rd.read_npy(rd.siftbow_test) def cross_validate(train_data, n_folds=5): kf = KFold(len(train_data), n_folds=n_folds) ret = 0.0 for train_index, test_index in kf: X_train, X_test = train_data[train_index], train_data[test_index] y_train, y_test = train_label[train_index], train_label[test_index] temp = svm(X_train, y_train, X_test, y_test) print temp ret += temp return ret / n_folds def svm(X_train, y_train, X_test, y_test): # clf = SVC(C=1.0,kernel='sigmoid') clf = LinearSVC(dual=False) score = clf.fit(X_train, y_train).score(X_test, y_test)
ab=AdaBoostClassifier(n_estimators=100,learning_rate=1.0) accuracy=ab.fit(X_train,y_train).score(X_test,y_test) return accuracy def crossvalidate_knn(data,label): X_train,X_test, y_train, y_test=cross_validation.train_test_split(data,label,test_size=0.2,random_state=0) knn=KNeighborsClassifier(n_neighbors=7) accuracy=knn.fit(X_train,y_train).score(X_test,y_test) return accuracy if __name__=="__main__": piclabel_3k,picname_3k,labels_3k=read_data.read_train_3k(read_data.train_3k) attributes_train=read_data.read_attributes(read_data.attr_train) alexnet_train=read_data.read_npy(read_data.alexnet_train) siftbow_train=read_data.read_npy(read_data.siftbow_train) #Random Forest cross_validation att_rf=crossvalidate_rf(attributes_train,piclabel_3k) alex_rf=crossvalidate_rf(alexnet_train,piclabel_3k) sift_rf=crossvalidate_rf(siftbow_train,piclabel_3k) #AdaBoost cross_validation alex_ab=crossvalidate_ab(alexnet_train,piclabel_3k) #Knn cross_validation att_knn=crossvalidate_knn(attributes_train,piclabel_3k) alex_knn=crossvalidate_knn(alexnet_train,piclabel_3k) sift_knn=crossvalidate_knn(siftbow_train,piclabel_3k)
def crossvalidate_knn(data, label): X_train, X_test, y_train, y_test = cross_validation.train_test_split( data, label, test_size=0.2, random_state=0) knn = KNeighborsClassifier(n_neighbors=7) accuracy = knn.fit(X_train, y_train).score(X_test, y_test) return accuracy if __name__ == "__main__": piclabel_3k, picname_3k, labels_3k = read_data.read_train_3k( read_data.train_3k) attributes_train = read_data.read_attributes(read_data.attr_train) alexnet_train = read_data.read_npy(read_data.alexnet_train) siftbow_train = read_data.read_npy(read_data.siftbow_train) #Random Forest cross_validation att_rf = crossvalidate_rf(attributes_train, piclabel_3k) alex_rf = crossvalidate_rf(alexnet_train, piclabel_3k) sift_rf = crossvalidate_rf(siftbow_train, piclabel_3k) #AdaBoost cross_validation alex_ab = crossvalidate_ab(alexnet_train, piclabel_3k) #Knn cross_validation att_knn = crossvalidate_knn(attributes_train, piclabel_3k) alex_knn = crossvalidate_knn(alexnet_train, piclabel_3k) sift_knn = crossvalidate_knn(siftbow_train, piclabel_3k)