Ejemplo n.º 1
0
def loaddata():
    print "loading data..."
    #load data in train.csv, divided into train data and validation data
    data, label = preprocess.loadTrainSet()
    val_data = data[0:6000]
    val_label = label[0:6000]
    train_data = data[6000:]
    train_label = label[6000:]
    #load data in test.csv
    test_data = preprocess.loadTestSet()
    return train_data, train_label, val_data, val_label, test_data
Ejemplo n.º 2
0
def loaddata():
	print "loading data..."
	#load data in train.csv, divided into train data and validation data
	data,label = preprocess.loadTrainSet()
	val_data = data[0:6000]
	val_label = label[0:6000]
	train_data = data[6000:]
	train_label = label[6000:]
	#load data in test.csv
	test_data = preprocess.loadTestSet()
	return train_data,train_label,val_data,val_label,test_data
def loaddata():
    print("loading data...")
    #加载测试数据集 train.csv, 将其分为训练数据和校验数据
    data, label = preprocess.loadTrainSet()
    val_data = data[0:6000]  #校验数据选择前6000个
    val_label = label[0:6000]  #前6000个校验数据的类别
    train_data = data[6000:]  #训练数据
    train_label = label[6000:]  #训练数据的类别
    #加载测试数据集 test.csv
    test_data = preprocess.loadTestSet()
    return train_data, train_label, val_data, val_label, test_data
def loaddata():
#####The data set is loaded into train.csv file 
	print "loading data..."
	data,label = preprocess.loadTrainSet()
	#####The data set is divided into train data and validation data
	val_data = data[0:6000]
	val_label = label[0:6000]
	train_data = data[6000:]
	train_label = label[6000:]
	######The data is loaded into test.csv file	
	test_data = preprocess.loadTestSet()
	return train_data,train_label,val_data,val_label,test_data
def loaddata():
####Data set is loaded into the train.csv file
print "loading data..."
data,label = preprocess.loadTrainSet()

####Divide the loaded data set into train data and validation data
val_data = data[0:6000]
val_label = label[0:6000]
train_data = data[6000:]
train_label = label[6000:]
test_data = preprocess.loadTestSet()
####The data is loaded into test.csv file
return train_data,train_label,val_data,val_label,test_data



def knn(train_data,train_label,val_data,val_label,test_data,name = "knn_submission.csv"):
print "Start training KNN Classifier..."
####validation set is evaluated
knnClf = KNeighborsClassifier(n_neighbors=20)
	knnClf.fit(train_data,train_label)
	
	val_pred_label = knnClf.predict_proba(val_data)
	logloss = preprocess.evaluation(val_label,val_pred_label)
	print "logloss of validation set:",logloss
####Classifying the set
	print "Start classify test set..."
	test_label = knnClf.predict_proba(test_data)
	preprocess.saveResult(test_label,filename = name)



if __name__ == "__main__":
	t1 = time.time()
	train_data,train_label,val_data,val_label,test_data = loaddata()
	knn(train_data,train_label,val_data,val_label,test_data) 
	t2 = time.time()
	print "Done! It cost",t2-t1,"s"