Python word_occurance Beispiele, emulate_util.word_occurance Python Beispiele

Beispiel #1

0

Datei anzeigen

def generate_response(input, debug=0):

    output = ""

    with open("utils/emulate/relationships.json") as data_file:
        relationship_data = json.load(data_file)

    key_words = emulate_util.key_words(input)
    total_sentence_weight = sum(key_words.values()) * 1.0

    print input

    #convert to % then inverse (The result is a %)
    for key_word in key_words:
        key_words[key_word] = (1.0 -
                               (key_words[key_word] / total_sentence_weight))
        print key_word + ": " + str(key_words[key_word] * 1) + ""

    #these are the "result words" which come from our key words in the sentence
    #The algorithm = key_word(% score) * (result_word(score) / occurance of results word)

    result_set = defaultdict(int)

    for key_word in key_words:
        if key_word in relationship_data:
            for result_word in relationship_data[key_word]:
                occurance = emulate_util.word_occurance(result_word) * 2
                word_score = 0
                if occurance > 0:
                    word_score = key_words[key_word] * (
                        relationship_data[key_word][result_word] / occurance)
                if word_score > .4: result_set[result_word] += word_score

# for result_word in sorted(result_set, key=result_set.get):
#print result_word + " : " + str(result_set[result_word])

#Trim anything that doesn't make the cut of AT LEAST the MIN_CUTOFF value

#final_words = {k: v for k, v in result_set.items() if k > MIN_CUTOFF}
    final_words = dict(
        sorted(result_set.iteritems(),
               key=operator.itemgetter(1),
               reverse=True)[:5])

    print final_words

    return make_sentence(final_words.keys())

Beispiel #2

0

Datei anzeigen

Datei: emulate2.py Projekt: ssteen3/dota2groupme

def generate_response(input, debug=0):

    output = ""

    with open("utils/emulate/relationships.json") as data_file:
        relationship_data = json.load(data_file)

    key_words = emulate_util.key_words(input)
    total_sentence_weight = sum(key_words.values()) * 1.0

    print input

    # convert to % then inverse (The result is a %)
    for key_word in key_words:
        key_words[key_word] = 1.0 - (key_words[key_word] / total_sentence_weight)
        print key_word + ": " + str(key_words[key_word] * 1) + ""

    # these are the "result words" which come from our key words in the sentence
    # The algorithm = key_word(% score) * (result_word(score) / occurance of results word)

    result_set = defaultdict(int)

    for key_word in key_words:
        if key_word in relationship_data:
            for result_word in relationship_data[key_word]:
                occurance = emulate_util.word_occurance(result_word) * 2
                word_score = 0
                if occurance > 0:
                    word_score = key_words[key_word] * (relationship_data[key_word][result_word] / occurance)
                if word_score > 0.4:
                    result_set[result_word] += word_score

    # for result_word in sorted(result_set, key=result_set.get):
    # print result_word + " : " + str(result_set[result_word])

    # Trim anything that doesn't make the cut of AT LEAST the MIN_CUTOFF value

    # final_words = {k: v for k, v in result_set.items() if k > MIN_CUTOFF}
    final_words = dict(sorted(result_set.iteritems(), key=operator.itemgetter(1), reverse=True)[:5])

    print final_words

    return make_sentence(final_words.keys())

Beispiel #3

0

Datei anzeigen

Datei: emulate.py Projekt: ssteen3/dota2groupme

def generate_response(input, debug = 0):

    output = ""

    with open("utils/emulate/relationships.json") as data_file:
        relationship_data = json.load(data_file)

    key_words_temp = emulate_util.key_words(input)
    key_words = {}
    total_sentence_weight = sum(key_words_temp.values()) * 1.0

    if len(key_words_temp) < 1: return "I didn't understand your dumbass message"

    if debug: output += "Input Keywords: "

    #convert key_word to add Type of Speech
    key_word_pos = nltk.pos_tag(nltk.word_tokenize(input))
    for word in key_words_temp:
        for word_with_pos in key_word_pos:
            if word == word_with_pos[0].lower():
                key_words[word + "#" + word_with_pos[1]] = key_words_temp[word]

    #if Multiple words, convert to % then inverse (The result is a %)
    if len(key_words) > 1:
        for key_word in key_words:
            key_words[key_word] = (1.0 - (key_words[key_word] / total_sentence_weight))
            if debug: output += key_word + ": " + str(key_words[key_word] * 1) + "\n"



    #these are the "result words" which come from our key words in the sentence
    #The algorithm = key_word(% score) * (result_word(score) / occurance of results word)
    result_set = defaultdict(int)
    #pdb.set_trace()
    for key_word in key_words:
        if key_word in relationship_data:
            for result_word in relationship_data[key_word]:
                occurance = emulate_util.word_occurance(result_word.split("#", 1)[0]) * 2
                word_score = 0
                if occurance > 0:
                    word_score = key_words[key_word] * ( relationship_data[key_word][result_word] / occurance )
                    result_set[result_word] += word_score

   # for result_word in sorted(result_set, key=result_set.get):
        #print result_word + " : " + str(result_set[result_word])

    #Trim anything that doesn't make the cut of AT LEAST the MIN_CUTOFF value

    #final_words = {k: v for k, v in result_set.items() if k > MIN_CUTOFF}
    sorted_words = dict(sorted(result_set.iteritems(), key=operator.itemgetter(1), reverse=True)[:5])
    final_words = []

    for word in sorted_words:
        final_words.append(word.split("#", 1)[0])

    #pdb.set_trace()

    if debug: output += "Response Keywords: " + str(final_words) + "\n\n"

    if len(final_words) < 1:
        output += "I don't understand..."
    else:
        output += make_sentence(final_words)

    return output

Beispiel #4

0

Datei anzeigen

def generate_response(input, debug=0):

    output = ""

    with open("utils/emulate/relationships.json") as data_file:
        relationship_data = json.load(data_file)

    key_words_temp = emulate_util.key_words(input)
    key_words = {}
    total_sentence_weight = sum(key_words_temp.values()) * 1.0

    if len(key_words_temp) < 1:
        return "I didn't understand your dumbass message"

    if debug: output += "Input Keywords: "

    #convert key_word to add Type of Speech
    key_word_pos = nltk.pos_tag(nltk.word_tokenize(input))
    for word in key_words_temp:
        for word_with_pos in key_word_pos:
            if word == word_with_pos[0].lower():
                key_words[word + "#" + word_with_pos[1]] = key_words_temp[word]

    #if Multiple words, convert to % then inverse (The result is a %)
    if len(key_words) > 1:
        for key_word in key_words:
            key_words[key_word] = (
                1.0 - (key_words[key_word] / total_sentence_weight))
            if debug:
                output += key_word + ": " + str(key_words[key_word] * 1) + "\n"

    #these are the "result words" which come from our key words in the sentence
    #The algorithm = key_word(% score) * (result_word(score) / occurance of results word)
    result_set = defaultdict(int)
    #pdb.set_trace()
    for key_word in key_words:
        if key_word in relationship_data:
            for result_word in relationship_data[key_word]:
                occurance = emulate_util.word_occurance(
                    result_word.split("#", 1)[0]) * 2
                word_score = 0
                if occurance > 0:
                    word_score = key_words[key_word] * (
                        relationship_data[key_word][result_word] / occurance)
                    result_set[result_word] += word_score

# for result_word in sorted(result_set, key=result_set.get):
#print result_word + " : " + str(result_set[result_word])

#Trim anything that doesn't make the cut of AT LEAST the MIN_CUTOFF value

#final_words = {k: v for k, v in result_set.items() if k > MIN_CUTOFF}
    sorted_words = dict(
        sorted(result_set.iteritems(),
               key=operator.itemgetter(1),
               reverse=True)[:5])
    final_words = []

    for word in sorted_words:
        final_words.append(word.split("#", 1)[0])

    #pdb.set_trace()

    if debug: output += "Response Keywords: " + str(final_words) + "\n\n"

    if len(final_words) < 1:
        output += "I don't understand..."
    else:
        output += make_sentence(final_words)

    return output