def compute_features(): t = open("./package/training_set_3.csv", 'w') c = csv.writer(t) c.writerow(["serial no"]) i = 1 while i < 10: c.writerow([i]) i = i + 1 t.close() #two files 1_chat and 2_chat are created #first feature to extract ////"""average words per line"""" average_word( "./package/test_final.txt", 3 ) #creates a text file named number_word_1 which contains number of word in each line. #print "average_word_feature extracted" #second feature """word length per line"'' word_length( "./package/test_final.txt", 3 ) #creates a text file named word_length_1 which contains number of word in each line. #print "word_length_feature extracted" #third feature ""uppercase_lowrcase ratio"""" ratio( "./package/test_final.txt", 3 ) #creates a text file named word_length_1 which contains number of word in each line. #print "ratio_feature extracted" #fourth feature ""time_chat , time to give that particular reply""" #sixth feature """smiley count """every 10 line feature usage smiley_count("./package/test_final.txt", 3) #print "smiley count extracted" #fifth feature """stopwaord usagee in a line""""every 10 line feature stopword_usage("./package/test_final.txt", 3) #print "stopword feature extracted" #seventh feature """punchuation usage"""" punctuation("./package/test_final.txt", 3) #print "punctuation_feature extracted" #eighth feature """message length "" msg_length("./package/test_final.txt", 3) #print "msg_length_feature extracted" #ninth feature """acronym count """ acro_line("./package/test_final.txt", 3)
def compute_features(): t = open("./package/training_set_3.csv", "w") c = csv.writer(t) c.writerow(["serial no"]) i = 1 while i < 10: c.writerow([i]) i = i + 1 t.close() # two files 1_chat and 2_chat are created # first feature to extract ////"""average words per line"""" average_word( "./package/test_final.txt", 3 ) # creates a text file named number_word_1 which contains number of word in each line. # print "average_word_feature extracted" # second feature """word length per line"'' word_length( "./package/test_final.txt", 3 ) # creates a text file named word_length_1 which contains number of word in each line. # print "word_length_feature extracted" # third feature ""uppercase_lowrcase ratio"""" ratio( "./package/test_final.txt", 3 ) # creates a text file named word_length_1 which contains number of word in each line. # print "ratio_feature extracted" # fourth feature ""time_chat , time to give that particular reply""" # sixth feature """smiley count """every 10 line feature usage smiley_count("./package/test_final.txt", 3) # print "smiley count extracted" # fifth feature """stopwaord usagee in a line""""every 10 line feature stopword_usage("./package/test_final.txt", 3) # print "stopword feature extracted" # seventh feature """punchuation usage"""" punctuation("./package/test_final.txt", 3) # print "punctuation_feature extracted" # eighth feature """message length "" msg_length("./package/test_final.txt", 3) # print "msg_length_feature extracted" # ninth feature """acronym count """ acro_line("./package/test_final.txt", 3)
#first feature to extract ////""average words per line"" average_word('./chats_process/'+str(first)+'_'+str(second)+'/'+first+'.txt',str(first),str(second),1) #creates a text file named number_word_1 which contains number of word in each line inside the folder chats_process/user1_user2. average_word('./chats_process/'+str(first)+'_'+str(second)+'/'+second+'.txt',str(first),str(second),2) #print "average_word_feature extracted" #second feature ""word length per line"'' word_length('./chats_process/'+str(first)+'_'+str(second)+'/'+first+'.txt',str(first),str(second),1) #creates a text file named word_length_1 which contains number of word in each line. word_length('./chats_process/'+str(first)+'_'+str(second)+'/'+second+'.txt',str(first),str(second),2) #print "word_length_feature extracted" #third feature ""uppercase_lowrcase ratio"" ratio('./chats_process/'+str(first)+'_'+str(second)+'/'+first+'.txt',str(first),str(second),1) #creates a text file named word_length_1 which contains number of word in each line. ratio('./chats_process/'+str(first)+'_'+str(second)+'/'+second+'.txt',str(first),str(second),2) #print "ratio_feature extracted" #fourth feature ""smiley count ""every 10 line feature usage smiley_count('./chats_process/'+str(first)+'_'+str(second)+'/'+first+'.txt',str(first),str(second),1) smiley_count('./chats_process/'+str(first)+'_'+str(second)+'/'+second+'.txt',str(first),str(second),2) #print "smiley count extracted" #fifth feature ""stopwaord usagee in a line""every 10 line feature stopword_usage('./chats_process/'+str(first)+'_'+str(second)+'/'+first+'.txt',str(first),str(second),1) stopword_usage('./chats_process/'+str(first)+'_'+str(second)+'/'+second+'.txt',str(first),str(second),2) #print "stopword feature extracted"
print f extract(f) #two files 1_chat and 2_chat are created #first feature to extract ////"""average words per line"""" average_word("./package/1_chat.txt",1)#creates a text file named number_word_1 which contains number of word in each line. average_word("./package/2_chat.txt",2) print "average_word_feature extracted" #second feature """word length per line"'' word_length("./package/1_chat.txt",1)#creates a text file named word_length_1 which contains number of word in each line. word_length("./package/2_chat.txt",2) print "word_length_feature extracted" #third feature ""uppercase_lowrcase ratio"""" ratio("./package/1_chat.txt",1)#creates a text file named word_length_1 which contains number of word in each line. ratio("./package/2_chat.txt",2) print "ratio_feature extracted" #fourth feature ""time_chat , time to give that particular reply""" #sixth feature """smiley count """every 10 line feature usage smiley_count("./package/1_chat.txt",1) smiley_count("./package/2_chat.txt",2) print "smiley count extracted" #fifth feature """stopwaord usagee in a line""""every 10 line feature stopword_usage("./package/1_chat.txt",1) stopword_usage("./package/2_chat.txt",2)