Exemple #1
0
def do_clean(line, known_speakers):
    speaker_is_known = False
    text = TextBlob(line)
    text = text.replace('\n', '')
    #nws_text = text.replace(' ','')
    for test_speaker in known_speakers:
        if text.startswith(test_speaker):
            speaker_is_known = True
            speaker = test_speaker
            clean_line = remove_prefix(text, speaker)
            return speaker, clean_line
    #if speaker_is_known == False and nws_text != '':
    #	print(text)
    return '-', '-'
Exemple #2
0
def find_unknown_speakers(line, known_speakers):
    import re
    '''
	Tests to see if a known speaker is in a line. Prints out if not. 
	So we can see if there's a new unknown speaker
	'''
    speaker_is_known = False
    text = TextBlob(line)
    text = text.replace('\n', '')
    nws_text = text.replace(' ', '')
    for test_speaker in known_speakers:
        if text.startswith(test_speaker):
            speaker_is_known = True
    if speaker_is_known == False and nws_text != '':
        text = str(text)
        print_text = re.sub("\[.*?\]", "[]", text)
        print_text = re.sub("\{.*?\}", "{}", print_text)
        if print_text != '[]' and print_text != '{}':
            print(print_text)  #,'|||',text
Exemple #3
0
def selectResponse(dclass, sentence):
    GREETING_RESPONSES = ["'Hello", "Hey", "Hi", "Good day to you", "Hi there"]
    GOODBYE_RESPONSES = [
        "See you later", "Bye", "Talk to you later", "Bye for now", "Take care"
    ]
    IDENTITY_RESPONSES = [
        "I am LifeBot. How can I help you ?",
        " I am LifeBot, I can give healthy recommendations."
    ]
    if (dclass == 'greeting'):
        input = TextBlob(sentence)
        if (input.startswith("how are you", start=0)):
            return "I am fine Thanks."
        else:
            return rd.choice(GREETING_RESPONSES)
    elif (dclass == 'goodbye'):
        return rd.choice(GOODBYE_RESPONSES)
    elif (dclass == 'identity'):
        return rd.choice(IDENTITY_RESPONSES)
    else:
        return ('Sorry, I dont understand that')
Exemple #4
0
def parseFile(filename):

    words = []
    lines = []

    with open("." + os.sep + filename, 'r') as f:
        lines = f.readlines()

    adverbs = []
    adjectives = []
    nouns = []
    verbs = []

    #postuples = word_tokenize(' '.join(lines))
    #postuples = nltk.pos_tag(postuples)

    pos = TextBlob(' '.join(lines))
    postuples = pos.tags

    for word, pos in postuples:
        if len(word) >= min_word_length:
            if pos.startswith('J'):
                adjectives.append(word)
            if pos.startswith('V'):
                # convert to base form of the verb if requested
                if lemmatize_verbs == True:
                    #word = WordNetLemmatizer().lemmatize(word,'v')
                    w = Word(word)
                    word = w.lemmatize('v')
                verbs.append(word)
            if pos == 'NN' or pos == 'NNS':
                nouns.append(word)
            if pos == 'NNP' and include_proper_nouns == True:
                nouns.append(word)
            if pos.startswith('RB'):
                adverbs.append(word)

    # now a second round of testing
    if second_pass == True:
        print("\n[First Pass] Adv: " + str(len(adverbs)) + " Adj: " +
              str(len(adjectives)) + " Nouns: " + str(len(nouns)) +
              " Verbs: " + str(len(verbs)))
        fadverbs = []
        for word in adverbs:
            if word[-2:] == "ly":
                fadverbs.append(word)

        adverbs = fadverbs

        nouns = TextBlob(' '.join(nouns))
        fnouns = []
        for word, pos in nouns.tags:
            if pos == 'NN' or pos == 'NNS' or pos == 'NNP':
                fnouns.append(word)

        nouns = fnouns

        verbs = TextBlob(' '.join(verbs))
        fverbs = []
        for word, pos in verbs.tags:
            if pos.startswith('V'):
                fverbs.append(word)

        verbs = fverbs

        adjectives = TextBlob(' '.join(adjectives))
        fadjectives = []
        for word, pos in adjectives.tags:
            if pos.startswith('J'):
                fadjectives.append(word)

        adjectives = fadjectives

    global poolstring
    poolstring = "[Final Pool] Adv: " + str(len(adverbs)) + " Adj: " + str(
        len(adjectives)) + " Nouns: " + str(len(nouns)) + " Verbs: " + str(
            len(verbs))

    wordcount = len(postuples)
    usedcount = len(adverbs) + len(adjectives) + len(nouns) + len(verbs)

    return [adverbs, adjectives, nouns, verbs], wordcount, usedcount