Ejemplo n.º 1
0
            prob = prob + log(0.00004, 2)

            # perplex = float(-1)*(float(1)/float(v))*prob
            # perplex = 10**perplex
            # print perplex
    return prob


myset = Set([1, 2, 3, 4, 5])
temp = myset
fp = open("results_unigram", "w+")
Pos_Dict = dict()
Neg_Dict = dict()
for x in myset:
    temp.remove(x)
    Pos_Dict = get_bigram("pos", "dataset", temp)
    # Calculate pos_perplexity
    print len(Pos_Dict)
    Neg_Dict = get_bigram("neg", "dataset", temp)
    print len(Neg_Dict)
    # Calculate neg_perplexity
    fpath = "dataset/" + str(x)
    test_file_p = os.listdir(fpath + "/pos")
    test_file_n = os.listdir(fpath + "/neg")
    fp.writelines("Test folder:" + str(x) + "\n")
    # test positive folder under test folder
    y_true = list()
    y_pred = list()

    Npos = 0
    Nneg = 0
	
	fh.close()
	'''


myset = Set([1,2,3,4,5]);
temp = myset
#fp = open('results_unigram','w+')
master_dict= dict()
Pos_Dict = dict()
Neg_Dict= dict()
path = 'dataset'
for x in myset:
	#temp.remove(x)
	
        Pos_Dict = get_bigram('pos', path, temp)
        Neg_Dict = get_bigram('neg', path, temp)
	print "1"
	master_dict.update(Pos_Dict)
	master_dict.update(Neg_Dict)
	print "2"	
	master = ()
	master = sorted(master_dict)
	print "3"
	#print master_dict
	
	master = ()
	master = sorted(master_dict)
	print "4"
	#print master
	
Ejemplo n.º 3
0
	perplex = float(-1)*(float(1)/float(v))*prob
	perplex = 10000*(10**perplex)
	#print perplex
	return perplex

myset = Set([1,2,3,4,5]);
temp = myset
fp = open('results_bigram','w+')
Pos_Dict_uni= dict()
Pos_Dict_bi= dict()
Neg_Dict_uni= dict()
Neg_Dict_bi= dict()
for x in myset:
	temp.remove(x)
        Pos_Dict_uni = get_unigram('pos','/home/avj/Documents/NLP/NLP_BinaryClassifier/dataset',temp)
	Pos_Dict_bi = get_bigram('pos','/home/avj/Documents/NLP/NLP_BinaryClassifier/dataset',temp)
	# Calculate pos_perplexity
	print len(Pos_Dict_bi)
	print len(Pos_Dict_uni)
        Neg_Dict_uni = get_unigram('neg','/home/avj/Documents/NLP/NLP_BinaryClassifier/dataset',temp)
        Neg_Dict_bi = get_bigram('neg','/home/avj/Documents/NLP/NLP_BinaryClassifier/dataset',temp)
	print len(Neg_Dict_bi)
	print len(Neg_Dict_uni)
	# Calculate neg_perplexity
	fpath = '/home/avj/Documents/NLP/NLP_BinaryClassifier/dataset/'+str(x)
	test_file_p = os.listdir(fpath +'/pos')
	test_file_n = os.listdir(fpath+'/neg') 
	fp.writelines("Test folder:"+str(x)+"\n")
	#test positive folder under test folder
	
	Pos_Dict_bi = convert(Pos_Dict_uni, Pos_Dict_bi)