예제 #1
0
def correct_spelling(word_tokens, word_to_keep):

    for i in range(len(word_tokens)):
        w = Word(word_tokens[i])
        if w not in word_to_keep:
            word_tokens[i] = str(w.correct())
    return word_tokens
예제 #2
0
def onlyspellcorrection(query_tokens):
    "Corrects the spelling errors in the query"
    corrected_query = []
    for t in query_tokens:
        w = Word(t)
        a = w.correct()
        corrected_query.append(a)
    if cmp(corrected_query, query_tokens) == 0:
        return query_tokens
    else:
        s = ""
        for i in corrected_query:
            s += i
            s += " "
        print "Did you mean: " + s + "?"
예제 #3
0
def spell_check(query):
    #split query
    splitted_query = query.split()
    #empty list for spell checked query
    corrected_query = []
    #searching freq_dict in db
    dict_collection = mongo.db["context_dict_collection"]
    freq_dict = dict_collection.find_one({"name": "freq_dict"})["freq_dict"]
    #stop words
    stop_words = get_stop_words("en")
    stop_words.append("can")
    #for each word in splitted query
    for word in splitted_query:
        #convert to testblob word
        blob_word = Word(word)
        #all the possible corrections to word
        possible_corrections = blob_word.spellcheck()
        #initial counter
        freq_counter = 1
        #for the case when spelling is incorrected but no word in document to correct it
        at_least_one = False
        #in case the spelling is correct
        corrected_word = blob_word
        #for each possible correction in the word
        for p in possible_corrections:
            #p[0]'s are the corrections and p[1] scores
            if p[0] in freq_dict.keys():
                #signifies at least one correction is present in dictionary so frequency based correction
                at_least_one = True
                #frequency of p[0]
                frequency = freq_dict[p[0]]
            else:
                frequency = 0
            #keeping highest frequency and corresponding word in record
            if frequency >= freq_counter and p[0] not in stop_words:
                freq_counter = frequency
                corrected_word = p[0]
        #no correction was present in dictionary
        if at_least_one is False:
            #return correction with highest score
            corrected_word = blob_word.correct()
        corrected_query.append(corrected_word)
    return " ".join(corrected_query)
예제 #4
0
def spellcorrection(query_tokens):
    "Corrects the spelling errors in the query"
    corrected_query = []
    for t in query_tokens:
        w = Word(t)
        a = w.correct()
        corrected_query.append(a)
    if cmp(corrected_query, query_tokens) == 0:
        return query_tokens
    else:
        s = ""
        for i in corrected_query:
            s += i
            s += " "
        print "Did you mean: " + s + "?"
        choice = int(
            raw_input(
                "Press 1 to continue with the original query, otherwise Press 0\n"
            ))
        if choice == 0:
            return corrected_query
        else:
            return query_tokens
예제 #5
0
파일: nlp_1.py 프로젝트: DLTroyer/NLP
print(spanish)

from textblob import Word

index = Word('index')

print(index.pluralize())

animals = TextBlob('dog cat fish sheep bird').words

print(animals.pluralize())

cacti = Word('cacti')

print(cacti.singularize())

word = Word('theyr')

#returns the possible solutions and the percentage that they recomend which option
print(word.spellcheck())

word.correct()

print(word)

sentence = TextBlob('Ths sentense has missplled wrds.')

sentence = sentence.correct()

print(sentence)
예제 #6
0
print(blob.sentiment)  #evaluates how pos vs neg statment is

sentences = blob.sentences

for sentence in sentences:
    print(sentence.sentiment)

print(blob.detect_language())

spanish = blob.translate(to="es")  #translate statement to spanish!

print(spanish)

from textblob import Word

index = Word("index")

print(index.pluralize())

animals = TextBlob('dog cat fish sheep bird').words

print(animals.pluralize())  #pluralizes words in blob

word = Word("theyr")

print(word.spellcheck())

print(word.correct())

#sentence = TextBlob("This sentence has misspelled wrds.")
예제 #7
0
print(index.pluralize())

cacti = Word("cacti")

print(cacti.singularize())

animals = TextBlob("dog cat fish bird").words

print(animals.pluralize())

word = Word("theyr")

print(word.spellcheck())

corrected_word = word.correct()

print(corrected_word)

sentence = TextBlob("Ths sentnce has misspeled wrds.")
corrected_sentence = sentence.correct()

print(corrected_sentence)

#############

word1 = Word("studies")
word2 = Word("varieties")

print(word1.lemmatize())
print(word2.lemmatize())
예제 #8
0
def correctSpel(str1):
    w = Word(str1)
    chk = w.spellcheck()
    correct1 = w.correct()
    print correct1
예제 #9
0
print(w.lemmatize("n"))
w = Word("am")
print(w.lemmatize("v"))
w = Word("are")
print(w.lemmatize("v"))
w = Word("were")
print(w.lemmatize("v"))
w = Word("is")
print(w.lemmatize("v"))

# =============================================================================
# spell check
# =============================================================================
w = Word("havv")
print(w.spellcheck())
w = w.correct()
print(w)

# plural word list
t = TextBlob(
    "Data science is an inter-disciplinary fild that uses scientfic methods, processes, algoriths and systems to extract knwledge and insigts from many structural and unstructured data. Data science is related to data mining and big data."
)
print(t.spellcheck())  # does not work ... no such methid
print(t.correct())

# get individual words
words = t.words
for w in words:
    print(w.spellcheck())
    print(w.correct())
    print("")
예제 #10
0
            string)
        pattern = re.compile(r"(\w*\s*)*")
        match = pattern.match(string)
        print match.group()
        clean_file.write(match.group().strip())
        clean_file.write("\n")
    clean_file.close()
#分词and纠错and标准化
origin_Dict = {}
with open("cleaned/ring_clean.txt") as clean:
    i = 0
    for line in clean:
        list = []
        for word in line.split():
            w = Word(word)
            list.append(w.correct().singularize())
        if len(list) < 3:
            continue
        origin_Dict[i] = list
        i = i + 1
    print i
#pos 词性标注,过滤出名词和动词
filter_Dict = {}
tag_Dict = {}
for index, words in origin_Dict.items():
    tag_Dict[index] = nltk.pos_tag(words)
for index, tags in tag_Dict.items():
    tem = []
    for tag in tags:
        #print tag
        if tag[1] != 'NN' and tag[1] != 'VB':
sentence = TextBlob('Use 4 spaces per indentation level.')
print(sentence.words[2].singularize()) #similarly you can use pluralize
print()

#word lemmatization
w = Word("octopi")
print("octopi -> ",w.lemmatize())
w = Word("went")
print("went -> ",w.lemmatize("v"))
print()

#definition
print("Octopus : ",Word("octopus").definitions)
print()

#translation and language detection
en_blob = TextBlob(u'Simple is better than complex.')
print('Simple is better than complex.')
print("SPANISH : ",en_blob.translate(to='es'))
en_blob = TextBlob(u'Comment allez vous?')
print('Comment allez vous?')
print("language : ",en_blob.detect_language())
print()

#spell-check
w = Word("banama")
print("banama")
print("correction : ",w.correct())
print("suggestions : ",w.spellcheck())
print()
def classify_search_terms(paths, out_path, dict_path, log_progress=print):
    sc = SpellChecker()
    # Has to match the order of type of files
    # in paths.
    ptypes = ['organic', 'paid', 'none']
    extra_rows = [
        'Portfolio Classification', 'Clicks', 'Impressions', 'Average position'
    ]

    out_wb = Workbook(write_only=True)
    out_ws = out_wb.create_sheet()

    cls_dict = load_dict(dict_path)

    open_paths = []
    # First pass: open files,
    # generate simple portfolio classification
    pc = {}
    for ip, p in enumerate(paths):
        wb = load_workbook(p, read_only=True)
        ws = wb.active
        open_paths.append(ws)
        for i, row in enumerate(ws.rows, 1):
            # Skip first line and headers.
            if i < 3:
                continue
            st = row[0].value
            st = ' '.join(str(st).lower().split(' '))
            if not is_english(st):
                print('Not english:', repr(st))
                continue
            if st not in pc:
                pc[st] = []
            # Add this classification only
            # if it's not already there.
            if ptypes[ip] not in pc[st]:
                pc[st].append(ptypes[ip])

    none_matches = [0, 0, 0]
    partial_matches = [0, 0, 0]
    full_matches = [0, 0, 0]
    # The number of search terms in each portfolio.
    pt_totals = [0, 0, 0]
    from_partial_to_full = 0
    from_unclass_to_partial = 0
    from_unclass_to_full = 0
    # Click stats: paid and organic
    clicks = [0, 0]
    for ip, ws in enumerate(open_paths):
        i = 1
        log_progress('Starting processing rows...')
        for row in ws.rows:
            # Dealing with the first line in input file
            # and the header, but only when processing the first file.
            if ip == 0 and i < 3:
                r = row
                # Add header only at the beginning
                if i == 2:
                    r = [row[0].value, 'Semantic Classification'] + extra_rows
                else:
                    # Rewrite the first line from the input
                    r = [_r.value for _r in r]
                out_ws.append(r)
                i += 1
                continue
            elif i < 3:
                # Skip first lines and headers
                # for the rest of the files.
                i += 1
                continue
            # Starting to process the rest of the data
            # Getting the search term.
            st = row[0].value
            print('Search term:', st)
            if st is None:
                continue
            st = str(st).lower().split(' ')
            sst = ' '.join(st)
            if sst not in pc:
                continue
            csst = pc[sst]
            if csst == 'added':
                print('Search term has been already added')
                continue
            # Trying to classify.
            sclsi, tom, um_kws = classify_kws(st, cls_dict)
            # Try spell correct and classify again
            sst = ' '.join(st)
            corrected_st = sst
            current_tom = tom
            # Trying to improve those two
            if tom in ('partial', 'unclassified'):
                print('Trying to improve ', tom)
                # Going trought keywords that didn't
                # have any match
                for ukw in um_kws:
                    w = Word(ukw)
                    wc = w.correct()
                    # If a spell corrected word is
                    # is new let's use it
                    if wc != ukw:
                        corrected_st = corrected_st.replace(ukw, wc)
                        continue
                    wl = w.lemmatize()
                    if wl != ukw:
                        corrected_st = corrected_st.replace(ukw, wl)
                        continue
            # If we've changed the search term let's
            # classify it again
            if corrected_st != sst:
                #print('Corrected st from %s->%s' % (sst, corrected_st))
                corrected_st = corrected_st.split()
                sclsi, tom, um_kws = classify_kws(corrected_st, cls_dict)
                if current_tom == 'unclassified' and tom == 'partial':
                    print('Corrected from %s to %s!' % (current_tom, tom))
                    st = corrected_st
                    from_unclass_to_partial += 1
                elif current_tom in ('unclassified',
                                     'partial') and tom == 'full':
                    print('Corrected from %s to full!' % current_tom)
                    st = corrected_st
                    if current_tom == 'partial':
                        from_partial_to_full += 1
                    if current_tom == 'unclassified':
                        from_unclass_to_full += 1
            if tom == 'full':
                full_matches[ip] += 1
            elif tom == 'partial':
                partial_matches[ip] += 1
            elif tom == 'unclassified':
                none_matches[ip] += 1
            pt_totals[ip] += 1
            pclsi = ' | '.join(pc[sst])
            # Match this search term as added
            # to prevent duplicates.
            pc[sst] = 'added'
            # Generating the output
            newrow = [row[0].value, sclsi, pclsi]
            if ptypes[ip] == 'organic' and pclsi.find('organic') != -1:
                # Fill in extra rows only from
                # organic file.
                newrow += [_r.value for _r in row[1:]]
                no_clicks = int(row[1].value)
                # Count clicks
                if pclsi.find('paid') != -1:
                    clicks[0] += no_clicks
                if pclsi.find('organic') != -1:
                    clicks[1] += no_clicks
            out_ws.append(newrow)
            if i % 1000 == 0:
                n = datetime.now()
                m = '%s:generated %d rows.' % (n, i)
                log_progress(m)
                print(m)
            i += 1
        #print(st, clsi)
    max_rows = i
    print('Total correction from partial to full: %d' % from_partial_to_full)
    print('Total correction from unclass to partial: %d' %
          from_unclass_to_partial)
    print('Total correction from unclass tofull: %d' % from_unclass_to_full)
    print('Number of clicks', clicks)
    snames = ['full', 'partial', 'unclassified']
    stats = [full_matches, partial_matches, none_matches]
    out_ws.append(['Total:'])
    # Total stats for each portfolio.
    out_ws.append(['Match type/Portfolio'] + ['+'.join(ptypes)])

    for i, s in enumerate(stats):
        spp = '%.2f%%' % ((sum(s) * 100) / sum(pt_totals))
        out_ws.append([snames[i]] + [spp])
    out_ws.append([])
    # Per portfolio stats
    out_ws.append(['Per Portfolio:'])
    out_ws.append(['Match type/Portfolio'] + ptypes)
    for i, s in enumerate(stats):
        pp = []
        for i, m in enumerate(s):
            if pt_totals[i] == 0:
                pp.append('0%')
            else:
                pp.append('%.2f%%' % ((m * 100) / pt_totals[i]))
        out_ws.append([snames[i]] + pp)

    out_ws.append(['Classification improvements:'])
    out_ws.append(['Unclassified->Partial', from_unclass_to_partial])
    out_ws.append(['Partial->Full', from_partial_to_full])
    out_ws.append(['Unclassified->Full', from_unclass_to_full])

    # Generate venn diagram
    import tempfile
    # vd=venn2((clicks[0], abs(clicks[0]-clicks[1]),clicks[1]), set_labels = ('Paid', 'Organic'))
    # vdf = tempfile.NamedTemporaryFile()
    # vdf = vdf.name+'.png'
    # plt.savefig(vdf)
    # img = drawing.image.Image(vdf)
    # max_rows=out_ws._max_row+1
    # imgcell='A%d' % max_rows
    # out_ws.add_image(img, imgcell)

    log_progress('Saving output file...')
    out_wb.save(out_path)
def classify_search_terms2():
    # sc=SpellChecker()
    # Has to match the order of type of files
    # in paths.
    ptypes = ['organic', 'paid', 'none']
    extra_rows = [
        'Portfolio Classification', 'Clicks', 'Impressions', 'Average position'
    ]

    out_wb = Workbook(write_only=True)
    out_ws = out_wb.create_sheet()

    cls_dict = load_dict2()

    open_paths = []
    # First pass: open files,
    # generate simple portfolio classification
    pc = {}
    for term in SearchTerm.objects.all():
        st = ' '.join(str(term.query).lower().split(' '))
        if not is_english(st):
            print('Not english:', repr(st))
            continue
        if st not in pc:
            pc[st] = []
            # Add this classification only
            # if it's not already there.
        if term.search_portfolio.name not in pc[st]:
            pc[st].append(term.search_portfolio.name)

    organic_portfolio = SearchPortfolio.objects.get(name='organic')
    paid_portfolio = SearchPortfolio.objects.get(name='paid')

    none_matches = {}  # {'organic': 0, 'paid': 0, 'none': 3}
    partial_matches = {}
    full_matches = {}
    pt_totals = {}  # {'organic': 0, 'paid': 0, 'none': 3}
    for portfolio in SearchPortfolio.objects.all():
        none_matches[portfolio.name] = 0
        partial_matches[portfolio.name] = 0
        full_matches[portfolio.name] = 0
        pt_totals[portfolio.name] = portfolio.searchterm_set.count()

    from_partial_to_full = 0
    from_unclass_to_partial = 0
    from_unclass_to_full = 0
    # Click stats: paid and organic
    clicks = [0, 0]

    i = 1
    classifications_list = []
    SearchClassification.objects.all().delete()
    for term in SearchTerm.objects.all():
        # Starting to process the rest of the data
        # Getting the search term.
        st = term.query
        print('%s - Search term: %s' % (term.id, st))
        if st is None:
            continue
        st = str(st).lower().split(' ')
        sst = ' '.join(st)
        if sst not in pc:
            continue
        csst = pc[sst]
        if csst == 'added':
            print('Search term has been already added')
            continue
        # Trying to classify.
        sclsi, tom, um_kws = classify_kws(st, cls_dict)
        # Try spell correct and classify again
        sst = ' '.join(st)
        corrected_st = sst
        current_tom = tom
        # Trying to improve those two
        if tom in ('partial', 'unclassified'):
            print('Trying to improve ', tom)
            # Going trought keywords that didn't
            # have any match
            for ukw in um_kws:
                w = Word(ukw)
                wc = w.correct()
                # If a spell corrected word is
                # is new let's use it
                if wc != ukw:
                    corrected_st = corrected_st.replace(ukw, wc)
                    continue
                wl = w.lemmatize()
                if wl != ukw:
                    corrected_st = corrected_st.replace(ukw, wl)
                    continue
        # If we've changed the search term let's
        # classify it again
        if corrected_st != sst:
            #print('Corrected st from %s->%s' % (sst, corrected_st))
            corrected_st = corrected_st.split()
            sclsi, tom, um_kws = classify_kws(corrected_st, cls_dict)
            if current_tom == 'unclassified' and tom == 'partial':
                print('Corrected from %s to %s!' % (current_tom, tom))
                st = corrected_st
                from_unclass_to_partial += 1
            elif current_tom in ('unclassified', 'partial') and tom == 'full':
                print('Corrected from %s to full!' % current_tom)
                st = corrected_st
                if current_tom == 'partial':
                    from_partial_to_full += 1
                if current_tom == 'unclassified':
                    from_unclass_to_full += 1
        if tom == 'full':
            full_matches[term.search_portfolio.name] += 1
        elif tom == 'partial':
            partial_matches[term.search_portfolio.name] += 1
        elif tom == 'unclassified':
            none_matches[term.search_portfolio.name] += 1

        pclsi = ' | '.join(pc[sst])
        # Match this search term as added
        # to prevent duplicates.
        pc[sst] = 'added'
        # Generating the output
        newrow = [term.query, sclsi, pclsi]
        if term.search_portfolio.name == 'organic' and pclsi.find(
                'organic') != -1:
            # Fill in extra rows only from
            # organic file.
            newrow += [
                term.clicks, term.impressions, term.avg_position, term.cost,
                term.conversions, term.total_conv
            ]
            no_clicks = int(term.clicks)
            # Count clicks
            if pclsi.find('paid') != -1:
                clicks[0] += no_clicks
            if pclsi.find('organic') != -1:
                clicks[1] += no_clicks
        out_ws.append(newrow)
        if i % 1000 == 0:
            n = datetime.now()
            m = '%s:generated %d rows.' % (n, i)
            print(m)
        i += 1

        search_classification = SearchClassification(
            query=term.query, search_dictionary_ids=sclsi, portfolio_ids=pclsi)
        if pclsi.find('organic') != -1:
            search_term = SearchTerm.objects.filter(
                query=term.query,
                search_portfolio_id=organic_portfolio.id).first()
            search_classification.organic_clicks = search_term.clicks
            search_classification.organic_impressions = search_term.impressions
            search_classification.organic_avg_position = search_term.avg_position
        if pclsi.find('paid') != -1:
            search_term = SearchTerm.objects.filter(
                query=term.query,
                search_portfolio_id=paid_portfolio.id).first()
            search_classification.paid_clicks = search_term.clicks
            search_classification.paid_impressions = search_term.impressions
            search_classification.paid_cost = search_term.cost
            search_classification.paid_conversions = search_term.conversions
            search_classification.paid_total_conv = search_term.total_conv
        print("query - %s" % search_classification.query)
        print("semantic - %s" % search_classification.search_dictionary_ids)
        search_classification.save()
        # classifications_list.append(search_classification)

    # SearchClassification.objects.bulk_create(classifications_list)
    print("SearchClassification records are created.")

    print('*****************')
    print(none_matches)
    print(partial_matches)
    print(full_matches)
    print(pt_totals)

    max_rows = i
    print("max_rows: %s" % max_rows)
    max_rows = SearchTerm.objects.count()
    print("max_rows: %s" % max_rows)
    print('Total correction from partial to full: %d' % from_partial_to_full)
    print('Total correction from unclass to partial: %d' %
          from_unclass_to_partial)
    print('Total correction from unclass tofull: %d' % from_unclass_to_full)
    print('Number of clicks', clicks)

    snames = ['full', 'partial', 'unclassified']
    stats = [full_matches, partial_matches, none_matches]
    out_ws.append(['Total:'])
    # Total stats for each portfolio.
    out_ws.append(['Match type/Portfolio'] + ['+'.join(ptypes)])

    total_match = {}
    match = []
    for i, s in enumerate(stats):
        spp = '%.2f%%' % ((sum(s.values()) * 100) / sum(pt_totals.values()))
        out_ws.append([snames[i]] + [spp])
        total_match[snames[i]] = (sum(s.values()) * 100) / sum(
            pt_totals.values())
    out_ws.append([])

    # Per portfolio stats
    out_ws.append(['Per Portfolio:'])
    out_ws.append(['Match type/Portfolio'] + ptypes)

    for i, s in enumerate(stats):
        pp = []
        match_t = []
        for item in s.items():
            portfolio = SearchPortfolio.objects.get(name=item[0])
            if pt_totals[item[0]] == 0:
                pp.append('0%')
                match_t.append((portfolio.id, 0))
            else:
                pp.append('%.2f%%' % ((item[1] * 100) / pt_totals[item[0]]))
                match_t.append((portfolio.id,
                                round((item[1] * 100) / pt_totals[item[0]],
                                      2)))
        out_ws.append([snames[i]] + pp)
        match.append(match_t)

    out_ws.append(['Classification improvements:'])
    out_ws.append(['Unclassified->Partial', from_unclass_to_partial])
    out_ws.append(['Partial->Full', from_partial_to_full])
    out_ws.append(['Unclassified->Full', from_unclass_to_full])

    total_match['unclassified_partial'] = from_unclass_to_partial
    total_match['partial_full'] = from_partial_to_full
    total_match['unclassified_full'] = from_unclass_to_full

    SearchMatchTotal.objects.all().delete()
    total = SearchMatchTotal(**total_match)
    total.save()

    match_list = []
    for i in range(0, len(match[0])):
        item = [item[i] for item in match]
        print(item)
        match_list.append(
            SearchMatch(full=item[0][1],
                        partial=item[1][1],
                        unclassified=item[2][1],
                        search_portfolio_id=item[0][0]))
    SearchMatch.objects.all().delete()
    SearchMatch.objects.bulk_create(match_list)

    # Generate venn diagram

    # out_path = '/home/engineer/Projects/Python/Shilo/classified.xlsx'
    # out_wb.save(out_path)
예제 #14
0
        #print line
        line = line.translate(None, '",.()!;/?0123456789')  #过滤标点和数字
        #tem = TextBlob(line)
        #s = tem.correct()
        list = []
        for word in line.split():
            list.append(str(word).strip())
        wordDict[i] = list
        i = i + 1
#SpellCorrect 这里使用的是TEXTBLOB库。花时间较长。
correct_Dict = {}
for index, words in wordDict.items():
    list = []
    for word in words:
        w = Word(word)
        list.append(w.correct())
    correct_Dict[index] = list
#Singularization 使用TEXTBLOB,花时间较长。
singular_Dict = {}
for index, words in correct_Dict.items():
    list = []
    for word in words:
        list.append(str(word.singularize()))
    singular_Dict[index] = list

#stemming 词干提取 提取后有些单词不知道什么意思了。所以注释掉了。使用的nltk
'''
stemed_Dict = {}#词干提取后的词典
from nltk.stem import PorterStemmer
pst = PorterStemmer()
for index,words in singular_Dict.items():
예제 #15
0
# Section 12.2.9 snippets
from textblob import Word

word = Word('theyr')

word.spellcheck()

word.correct()  # chooses word with the highest confidence value

from textblob import TextBlob

sentence = TextBlob('Ths sentense has missplled wrds.')

sentence.correct()

##########################################################################
# (C) Copyright 2019 by Deitel & Associates, Inc. and                    #
# Pearson Education, Inc. All Rights Reserved.                           #
#                                                                        #
# DISCLAIMER: The authors and publisher of this book have used their     #
# best efforts in preparing the book. These efforts include the          #
# development, research, and testing of the theories and programs        #
# to determine their effectiveness. The authors and publisher make       #
# no warranty of any kind, expressed or implied, with regard to these    #
# programs or to the documentation contained in these books. The authors #
# and publisher shall not be liable in any event for incidental or       #
# consequential damages in connection with, or arising out of, the       #
# furnishing, performance, or use of these programs.                     #
##########################################################################
예제 #16
0
파일: nlp_1.py 프로젝트: MPTauber/NLP
# for sentence in sent_list:  # shows scores for each sentence
#     print(sentence.sentiment)  # took off round, sentiment, and polarity because using different analyzer thatd doesnt require it
#     ### shows first sentence is positive and second is negative

# ##########################################################
# print(blob.detect_language()) ### finds out which language this is in

# spanish = blob.translate(to='es')
# print(spanish)

# german = blob.translate(to='de')   # translates the "blob" variable to german
# print(german)

###########################################################
from textblob import Word

my_word = Word('theyr')
print(my_word.spellcheck()
      )  ## [('they', 0.5713042216741622), ('their', 0.42869577832583783)]
### means confidence is higher that the word is supposed to be 'they'

new_word = my_word.correct(
)  ## corrects it to 'they' automatically because confidence is higher for this
print(new_word)

my_sentence = TextBlob('Ths sentense has missplld wrds.')

new_sentence = my_sentence.correct()
print(new_sentence)
예제 #17
0
from textblob import TextBlob, Word
from textblob.en import Spelling

MOCKDATA = "Hi, I cant spel at al, snd hlp."

test = TextBlob(MOCKDATA)

# spelling = Spelling(path=path)
# spelling.train()

for data in test.words:
    temp = Word(data)
    tempTuples = temp.spellcheck()
    print(temp)
    print(temp.spellcheck())
    print(temp.correct())
    for tuples in tempTuples:
        print(tuples[0])
        print(tuples[1])

        # print(tuples.confidence)
예제 #18
0
def spellcheck(word):
    word_object = Word(word)
    return word_object.correct()
for n in para.sentences[1].noun_phrases:
    print(n)
for t in para.sentences[1].tags:
    print(t)
    
print(para.sentences[2].words[1].singularize())



#Word operation
from textblob import Word
Word('horsases').singularize()
w = Word('better')
w.singularize()
w.pluralize()
w.correct()
w.lemmatize('a')
w = Word('horss')
w.spellcheck()



#ngrams
blob = TextBlob('I live in Modinagar')
print(blob.ngrams(2))
print(blob.ngrams(3))



#sentiments
blob = TextBlob('you are wise man')