Esempio n. 1
0
def main(args):		
	#take argument
	trainfile = args[1]
	testfile = args[2]
	classlabel = int(args[3])
	printWord = int(args[4])
		
	
	#set train file an dtest file
	train = a.read_data(trainfile)
	test  = a.read_data(testfile)
	
	#get top 2000 frequency
	fre = a.frequency(train)
	
	
	#if yes, print Words
	if (printWord == 1):
		a.printTopwords(fre)
	
	#create binary feature for boss data
	train = a.create_binary_feature(train,fre,classlabel)
	test = a.create_binary_feature(test,fre,classlabel)
	
	#get probability table based on train data
	prob_table,pYes,pNo = a.train_nbc(train)
	
	#use probability table for testing,and return result
	result = a.test_nbc(prob_table,test,pYes,pNo)
	
	#get test class label
	classlabel = [x[-1] for x in test]
	
	
	#use zero one difference figure out result
	diff = a.zero_onr_loss(result,classlabel)
	
	print "ZERO-ONE-LOSS {0}".format(diff)
Esempio n. 2
0
def crossValidation(data):

    X = kfold(data, 10)
    for tss in [100, 250, 500, 1000, 2000]:
        print "tss = ", tss
        loss = list()
        for i in range(10):
            test_set = X[i]
            # take rest of data
            train_data = list()
            for j in range(10):
                if j != i:
                    train_data += X[j]
            train_set = random.sample(train_data, tss)
            train, pYes, pNo = nbc.train_nbc(train_set)
            result = nbc.test_nbc(train, test_set, pYes, pNo)
            label = [x[-1] for x in test_set]
            diff = nbc.zero_one_loss(result, label)

            loss.append(diff)
        print loss
        print "mean: ", numpy.mean(loss)
        print "std error: ", standard_error(loss, 10)