def create_classifier_dump(): ls1 = pu.convs_list_exp() ls2 = pu.convs_list_nonexp() pos, neg = create_feature1(.01,.01,500,500) classifier = naive_bayes_classifier(pos,neg) class_file = open(pth.TEMP + '/classifier','w') pickle.dump(classifier, class_file) class_file.close()
def mk_nonexp_wordfreq(): #convs= pu.convs_list_train() fl = open(pth.NonexpConvs,'r') nexps= pu.convs_list_nonexp() NonexpFreq = nltk.FreqDist() #count = 0 for conv in nexps: msgs= pu.get_msgs_in(conv) tally=[] for msg in msgs: #print msg #count +=1 #if count >3: # break if msg is not None: words = msg.split() for word in words: lw= word.lower() if lw not in tally: tally.append(lw) NonexpFreq.inc(lw) fl = open(pth.NonexpDict,'w') pickle.dump(NonexpFreq,fl)