Python smog_index Exemples, textstat.textstat.smog_index Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : preprocessing.py Projet : AMontgomerie/CEFR-English-Level-Predictor

def preprocess_text(text):
    """Takes a text, generate features, and returns as dict

    Args:
        text (str): the text to be preprocessed.

    Returns:
        dict: a dictionary of feature names with associated values

    """
    text = _simplify_punctuation(text)

    features = {
        "flesch_reading_ease": textstat.flesch_reading_ease(text),
        "smog_index": textstat.smog_index(text),
        "flesch_kincaid_grade": textstat.flesch_kincaid_grade(text),
        "coleman_liau_index": textstat.coleman_liau_index(text),
        "automated_readability_index":
        textstat.automated_readability_index(text),
        "dale_chall_readability_score":
        textstat.dale_chall_readability_score(text),
        "difficult_words": textstat.difficult_words(text),
        "linsear_write_formula": textstat.linsear_write_formula(text),
        "gunning_fog": textstat.gunning_fog(text),
        "text_standard": textstat.text_standard(text, float_output=True),
        "mean_parse_tree_depth": get_mean_parse_tree_depth(text),
        "mean_ents_per_sentence": get_mean_ents_per_sentence(text),
    }

    features.update(get_mean_pos_tags(text))

    return features

Exemple #2

0

Afficher le fichier

Fichier : features.py Projet : dzenilee/presidential

 def get_readability_scores(self, doc):
     segment = doc.text
     readability_dict = {
         "automated_readability_index":
         textstat.automated_readability_index(segment),
         "coleman_liau_index":
         textstat.coleman_liau_index(segment),
         "dale_chall_readability_score":
         textstat.dale_chall_readability_score(segment),
         "difficult_words":
         textstat.difficult_words(segment),
         "flesch_kincaid_grade":
         textstat.flesch_kincaid_grade(segment),
         "flesch_reading_ease":
         textstat.flesch_reading_ease(segment),
         "gunning_fog":
         textstat.gunning_fog(segment),
         "linsear_write_formula":
         textstat.linsear_write_formula(segment),
         "smog_index":
         textstat.smog_index(segment),
         "text_standard":
         self._convert_text_standard_to_integer(
             textstat.text_standard(segment)),
     }
     return readability_dict

Exemple #3

0

Afficher le fichier

def parse_comment(subreddit_name, body):
    # raw metrics
    sentences = ts.sentence_count(body)
    words = ts.lexicon_count(body)
    syllables = ts.syllable_count(body)
    trisyllabic = ts.trisyllab_count(body)

    # derived
    fk_grade = ts.flesch_kincaid_grade(body)
    smog = ts.smog_index(body)

    return (sub.display_name, sentences, words, syllables, trisyllabic, fk_grade, smog)

Exemple #4

0

Afficher le fichier

def get_score(text):
    scores = []
    scores.append((tst.avg_sentence_length(text) - MEAN_SL) / STD_SL)
    scores.append((tst.avg_letter_per_word(text) - MEAN_AL) / STD_AL)
    scores.append(tst.avg_sentence_per_word(text))
    scores.append((tst.sentence_count(text) - MEAN_SC) / STD_SC)
    scores.append((tst.flesch_kincaid_grade(text) - MEAN_GRADE) / MEAN_GRADE)
    scores.append((tst.flesch_reading_ease(text) - 50) / 50)
    scores.append((tst.smog_index(text) - MEAN_GRADE) / MEAN_GRADE)
    scores.append((tst.coleman_liau_index(text) - MEAN_GRADE) / MEAN_GRADE)
    scores.append((tst.automated_readability_index(text) - MEAN_GRADE) / MEAN_GRADE)
    scores.append((tst.dale_chall_readability_score(text) - MEAN_GRADE) / MEAN_GRADE)
    scores.append((tst.linsear_write_formula(text) - MEAN_GRADE) / MEAN_GRADE)
    scores.append((tst.gunning_fog(text) - MEAN_GRADE) / MEAN_GRADE)
    return scores

Exemple #5

0

Afficher le fichier

Fichier : readability.py Projet : MichaelCrowcroft/semantics

 def post(self, args):
     text = args['text']
     readability = {}
     readability["flesch_reading_ease"] = textstat.flesch_reading_ease(text)
     readability["flesch_kincaid_grade"] = textstat.flesch_kincaid_grade(
         text)
     readability["smog_index"] = textstat.smog_index(text)
     readability["coleman_liau_index"] = textstat.coleman_liau_index(text)
     readability[
         "automated_readability_index"] = textstat.automated_readability_index(
             text)
     readability[
         "dale_chall_readability_score"] = textstat.dale_chall_readability_score(
             text)
     readability["linsear_write_formula"] = textstat.linsear_write_formula(
         text)
     readability["gunning_fog"] = textstat.gunning_fog(text)
     readability["text_standard"] = textstat.text_standard(text)
     readability["difficult_words"] = textstat.difficult_words(text)
     return jsonify(readability)

Exemple #6

0

Afficher le fichier

Fichier : standard_metrics.py Projet : kbennatti/misc_text

    sentences = sent_tokenize(text)
    words_per_sentence = [word_tokenize(sent) for sent in sentences]
    non_space_chars = re.sub(space_special_chars, '', text)

    words_len = pd.Series([len(word) for word in words])
    sentences_len = pd.Series([len(sent) for sent in sentences])
    len_words_per_sentence = pd.Series([len(wps) for wps in words_per_sentence])


    word_stats = words_len.describe()
    word_stats.index = ['Word ' + i for i in word_stats.index]

    sent_stats = sentences_len.describe()
    sent_stats.index = ['Sentence ' + i for i in sent_stats.index]

    wps_stats = len_words_per_sentence.describe()
    wps_stats.index = ['Words per sentences ' + i for i in wps_stats.index]
    info_dict['Name'] = name
    info_dict['Total characters'] = len(non_space_chars)
    #info_dict['Total sentences'] = len(sentences)
    info_dict.update(word_stats.to_dict())
    info_dict.update(sent_stats.to_dict())
    info_dict.update(wps_stats.to_dict())
    info_dict['Flesch-Kincaid'] = textstat.flesch_kincaid_grade(text)
    info_dict['Gunning fog'] = textstat.gunning_fog(text)
    info_dict['SMOG'] = textstat.smog_index(text)

    info_dicts.append(info_dict)

df = pd.DataFrame(info_dicts)
df.to_csv(r'C:\Users\Krista\DocumentsRE _Call_re_potential_matter\code_results_contents_removed.csv')

Exemple #7

0

Afficher le fichier

Fichier : smog.py Projet : jd-rub/Thesis_PM_Lyrics

 def get_delta(self):
     return abs(
         textstat.smog_index(self.input_data) -
         textstat.smog_index(self.output_data))

Exemple #8

0

Afficher le fichier

Fichier : smog.py Projet : jd-rub/Thesis_PM_Lyrics

 def get_score(self):
     self.input_data = self.input_data.replace("\n", ". ")
     return textstat.smog_index(self.input_data)

Exemple #9

0

Afficher le fichier

from textstat import textstat
if __name__ == '__main__':
    test_data = 'The quick brown fox jumps over the lazy dog'

#File to be used to test the function
print(textstat.flesch_reading_ease(test_data))
print(textstat.smog_index(test_data))
print(textstat.flesch_kincaid_grade(test_data))
print(textstat.coleman_liau_index(test_data))
print(textstat.automated_readability_index(test_data))
print(textstat.dale_chall_readability_score(test_data))
print(textstat.difficult_words(test_data))
print(textstat.linsear_write_formula(test_data))
print(textstat.gunning_fog(test_data))
print(textstat.text_standard(test_data))

Exemple #10

0

Afficher le fichier

Fichier : main.py Projet : twango-dev/probr

def index():
    data = request.json
    print(f'Debug: {data}')

    unique_id = data['unique_id']
    process_language = data['process_language']
    message = data['message']

    matches_list = None
    if process_language:
        # Language tool takes a while to process
        language_tool = LanguageTool('en-US')
        matches: list[Match] = language_tool.check(message)

        matches_list = []
        for match in matches:
            matches_list.append(match_to_dict(match))
        print(f'Analysis finished: {matches_list}')

    sentences: list = splitter.split(text=message)

    return {
        'unique_id': unique_id,
        'text_statistics': {
            'lexicon_count': textstat.lexicon_count(message),
            'lexicon_count_ps': list_map(sentences, textstat.lexicon_count),
            'syllable_count': textstat.syllable_count(message),
            'syllable_count_ps': list_map(sentences, textstat.syllable_count),
            'sentences': sentences,
            'sentence_count': len(sentences),
            'readability': {
                'flesch_reading_ease': {
                    'score': textstat.flesch_reading_ease(message),
                    'sps': list_map(sentences, textstat.flesch_reading_ease)
                },
                'smog_index': {
                    'score': textstat.smog_index(message)
                },
                'flesch_kincaid_grade': {
                    'score': textstat.flesch_kincaid_grade(message),
                    'sps': list_map(sentences, textstat.flesch_kincaid_grade)
                },
                'coleman_liau_index': {
                    'score': textstat.coleman_liau_index(message),
                    'sps': list_map(sentences, textstat.coleman_liau_index)
                },
                'automated_readability_index': {
                    'score':
                    textstat.automated_readability_index(message),
                    'sps':
                    list_map(sentences, textstat.automated_readability_index)
                },
                'dale_chall_readability_score': {
                    'score':
                    textstat.dale_chall_readability_score(message),
                    'sps':
                    list_map(sentences, textstat.dale_chall_readability_score)
                },
                'difficult_words': {
                    'score': textstat.difficult_words(message),
                    'sps': list_map(sentences, textstat.difficult_words),
                    'words': textstat.difficult_words_list(message)
                },
                'linsear_write_formula': {
                    'score': round(textstat.linsear_write_formula(message), 2),
                    'sps': list_map(sentences, textstat.linsear_write_formula)
                },
                'gunning_fog': {
                    'score': textstat.gunning_fog(message),
                    'sps': list_map(sentences, textstat.gunning_fog)
                },
                'text_standard': {
                    'score': textstat.text_standard(message)
                }
            }
        },
        'language_tool': matches_list
    }