コード例 #1
0
def processConversation(conversation):
    global bag_of_words
    bag_of_words = {}
    sentences = conversation.split(".")
    tokenized = PreProcess.tokenize_sentences(sentences)
    filtered = PreProcess.RemovePunctAndStopWords(tokenized)
    bag_of_words = FreqDist(word.lower() for word in filtered)
コード例 #2
0
def processConversation(conversation, category):
    global bag_of_words, documentClass
    bag_of_words = {}
    sentences = conversation.split(".")
    tokenized = PreProcess.tokenize_sentences(sentences)
    filtered = PreProcess.RemovePunctAndStopWords(tokenized)
    for word in filtered:
        if word in bag_of_words:
            bag_of_words[word] = int(bag_of_words[word]) + 1
        else:
            bag_of_words[word] = 1
    #total=len(filtered)
    #bag_of_words=calculateFrequencies(total)
    addTermFrequency(bag_of_words)
コード例 #3
0
def processConversation(conversation,category):
	global bag_of_words,documentClass
	bag_of_words={}
	sentences=conversation.split(".")
	tokenized=PreProcess.tokenize_sentences(sentences)
	filtered=PreProcess.RemovePunctAndStopWords(tokenized)
	
	for word in filtered:
		if word in bag_of_words:
			bag_of_words[word]=int(bag_of_words[word])+1
		else:
			bag_of_words[word]=1
	total=len(filtered)
	bag_of_words=calculateFrequencies(total)
	if category in documentClass:
			new_dict=merge_two_dicts(documentClass[category],bag_of_words)
			documentClass[category]=new_dict
	else:
		documentClass[category]=bag_of_words