Python Parser.average_sentence_length 예제들

프로그래밍 언어: Python

클래스/타입: Parser

메소드/함수: average_sentence_length

hotexamples.com에서의 예제들: 2

Python Parser.average_sentence_length - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 Parser.average_sentence_length 패키지로부터 pywin32에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

parseStatement(30)

parseStatements(12)

_parse_pred(10)

get_avg_convergence_for_x_and_set_size(4)

get_average_recall_ratios(4)

parseSVMInput(4)

parse_echo_params(3)

get_data_with_turnover_rates(3)

get_convergence_and_distinct_patterns_from_log_v1(3)

get_avg_perfect_recall_for_x_and_set_size(3)

getElements(3)

getElementText(3)

descriptionsMeans(3)

parseAnnotation(3)

get_dictionary_list_of_convergence_and_perfect_recall_for_turnover_rates(3)

parseFeatureVector(3)

get_dictionary_list_of_convergence_and_perfect_recall_for_dg_weightings(3)

get_avg_perfect_recall_and_avg_spurious_recall_from_data_for_configs(2)

BookmarksParser(2)

CalcMultipleCompoundsDescriptor(2)

CalcSingleCompoundDescriptor(2)

get_parser_info(2)

parseMapas(2)

getElementAttr(2)

average_sentence_length(2)

average_syllable_per_word(2)

Arc_Eager_Parser(2)

parseVirtualCommands(2)

parse_800_params(2)

DoublePass(2)

get_file_name(2)

identify_sentence(1)

parse_teams(1)

loadPoints(1)

lcs(1)

import_parser(1)

get_raw_data(1)

hasMoreCmds(1)

get_most_active_user(1)

get_label_encoded_data(1)

parse_quit(1)

get_most_watch_movie(1)

parse_rating(1)

parse_privmsg(1)

parse_end_names(1)

load_parsed_data_from_file(1)

parseFilesInDir(1)

parse_jpk(1)

parse_data_from_neocortical_consolidation_log_lines(1)

parse_acta(1)

예제 #1

파일 보기

파일: Calculator.py 프로젝트: Somsubhra/ReadabilityScore

class Calculator:

    # Constructor for the Calculator class
    def __init__(self, filename):
        self.parser = Parser(filename)

    # Calculate Automatic Readability Index
    def automated_readability_index(self):
        return 4.71 * (float(self.parser.number_of_characters()) / float(self.parser.number_of_words())) \
               + 0.5 * (float(self.parser.number_of_words()) / float(self.parser.number_of_sentences())) - 21.43

    # Calculate Gunning fog index
    def gunning_fog_index(self):
        return 0.4 * ((float(self.parser.number_of_words()) / float(self.parser.number_of_sentences()))
                      + 100 * (float(self.parser.number_of_polysyllables()) / float(self.parser.number_of_words())))

    # Calculate smog index
    def smog_index(self):
        return 1.0430 * sqrt(float(self.parser.number_of_polysyllables()) * 30.0 / float(self.parser.number_of_sentences()))\
               + 3.1291

    # Calculate flesch reading ease
    def flesch_reading_ease(self):
        return 206.835 - 1.105 * (float(self.parser.number_of_words()) / float(self.parser.number_of_sentences())) - \
               84.6 * (float(self.parser.number_of_syllables()) / float(self.parser.number_of_words()))

    # Calculate Flesch Kincaid grade level
    def flesch_kincaid_grade_level(self):
        return (0.39 * float(self.parser.average_sentence_length())) + (11.8 * self.parser.average_syllable_per_word()) \
               - 15.59

    # Calculate Coleman Liau index
    def coleman_liau_index(self):
        return 0.0588 * (float(self.parser.average_letter_per_100_words())) \
               - 0.296 * (self.parser.average_sentences_per_100_words()) - 15.8

예제 #2

파일 보기

파일: Generator.py 프로젝트: Somsubhra/ReadabilityScore

class Generator:

    # Constructor for generator class
    def __init__(self, asl, awl, asw, psw30, juk30, difficulty, output_train_directory):
        self.asl = asl
        self.awl = awl
        self.asw = asw
        self.psw30 = psw30
        self.juk30 = juk30
        self.difficulty = difficulty
        self.output_train_directory = output_train_directory

    # Generate the equation using linear regression
    def generate(self):

        print "Calculating correlations..."

        corr_asl = stats.pearsonr(self.difficulty, self.asl)
        corr_awl = stats.pearsonr(self.difficulty, self.awl)
        corr_asw = stats.pearsonr(self.difficulty, self.asw)
        corr_psw30 = stats.pearsonr(self.difficulty,self.psw30)
        corr_juk30 = stats.pearsonr(self.difficulty, self.juk30)

        corr_asl_awl = stats.pearsonr(self.asl, self.awl)
        corr_asl_asw = stats.pearsonr(self.asl, self.asw)
        corr_asl_psw30 = stats.pearsonr(self.asl, self.psw30)
        corr_asl_juk30 = stats.pearsonr(self.asl, self.juk30)
        corr_awl_asw = stats.pearsonr(self.awl, self.asw)
        corr_awl_psw30 = stats.pearsonr(self.awl, self.psw30)
        corr_awl_juk30 = stats.pearsonr(self.awl, self.juk30)
        corr_asw_psw30 = stats.pearsonr(self.asw, self.psw30)
        corr_asw_juk30 = stats.pearsonr(self.asw, self.juk30)
        corr_psw30_juk30 = stats.pearsonr(self.psw30, self.juk30)

        output_file = open(path.join(self.output_train_directory, 'stats_training.csv'), 'a')

        output_file.write("\"\";\"\";\"\";\"\";\"\";\"\";\"\"\n")

        output_file.write("\"\";\"ASL\";\"AWL\";\"ASW\";\"PSW30\";\"JUK30\"\n")

        output_file.write("\"Correlation"
                          + "\";\""
                          + str(corr_asl[0])
                          + "\";\""
                          + str(corr_awl[0])
                          + "\";\""
                          + str(corr_asw[0])
                          + "\";\""
                          + str(corr_psw30[0])
                          + "\";\""
                          + str(corr_juk30[0])
                          + "\"\n")

        output_file.write("\"\";\"\";\"\";\"\";\"\";\"\";\"\"\n")

        output_file.write("\"\";\"ASL\";\"AWL\";\"ASW\";\"PSW30\";\"JUK30\"\n")

        output_file.write("\"ASL"
                          + "\";\""
                          + "\";\""
                          + str(corr_asl_awl[0])
                          + "\";\""
                          + str(corr_asl_asw[0])
                          + "\";\""
                          + str(corr_asl_psw30[0])
                          + "\";\""
                          + str(corr_asl_juk30[0])
                          + "\"\n")
        output_file.write("\"AWL"
                          + "\";\""
                          + str(corr_asl_awl[0])
                          + "\";\""
                          + "\";\""
                          + str(corr_awl_asw[0])
                          + "\";\""
                          + str(corr_awl_psw30[0])
                          + "\";\""
                          + str(corr_awl_juk30[0])
                          + "\"\n")
        output_file.write("\"ASW"
                          + "\";\""
                          + str(corr_asl_asw[0])
                          + "\";\""
                          + str(corr_awl_asw[0])
                          + "\";\""
                          + "\";\""
                          + str(corr_asw_psw30[0])
                          + "\";\""
                          + str(corr_asw_juk30[0])
                          + "\"\n")
        output_file.write("\"PSW30"
                          + "\";\""
                          + str(corr_asl_psw30[0])
                          + "\";\""
                          + str(corr_awl_psw30[0])
                          + "\";\""
                          + str(corr_asw_psw30[0])
                          + "\";\""
                          + "\";\""
                          + str(corr_psw30_juk30[0])
                          + "\"\n")
        output_file.write("\"JUK30"
                          + "\";\""
                          + str(corr_asl_juk30[0])
                          + "\";\""
                          + str(corr_awl_juk30[0])
                          + "\";\""
                          + str(corr_asw_juk30[0])
                          + "\";\""
                          + str(corr_psw30_juk30[0])
                          + "\";\""
                          + "\"\n")

        output_file.close()

        self.features = []

        threshold = 0.4

        if abs(corr_asl[0]) > threshold:
            self.features.append('asl')
        if abs(corr_awl[0]) > threshold:
            self.features.append('awl')
        if abs(corr_asw[0]) > threshold:
            self.features.append('asw')
        if abs(corr_psw30[0]) > threshold:
            self.features.append('psw30')
        if abs(corr_juk30[0]) > threshold:
            self.features.append('juk30')

        print "Features selected", self.features

        features_data = []

        length = len(self.difficulty)

        for feature in self.features:
            if feature == 'asl':
                features_data.append(self.asl)
            elif feature == 'awl':
                features_data.append(self.awl)
            elif feature == 'asw':
                features_data.append(self.asw)
            elif feature == 'psw30':
                features_data.append(self.psw30)
            elif feature == 'juk30':
                features_data.append(self.juk30)

        no_features = len(self.features)

        print "Performing linear regression using manual difficulty and selected features..."
        x = np.array(features_data, np.int32)

        y = np.array(self.difficulty)

        n = np.max(x.shape)

        X = np.vstack([np.ones(n), x]).T

        model, residue = np.linalg.lstsq(X, y)[:2]
        r2 = 1 - residue / (y.size * y.var())

        self.coeff = np.linalg.lstsq(X, y)[0]

        formula = ""
        for i in range(no_features):
            formula += "(" + str(self.coeff[i]) + ") * (" + str(self.features[i]) + ") + "

        formula += "(" + str(self.coeff[no_features]) + ")"
        #print "R^2 : " + str(r2)

        print "Generated the following formula: "
        print "---------------------------------------------------------------------------------"
        print formula
        print "---------------------------------------------------------------------------------"

    # Generate the custom index
    def custom_index(self, filename):

        self.parser = Parser(filename)

        length = len(self.features)

        index = 0.0

        feature_value = []
        for i in range(length):
            if self.features[i] == 'asl':
                index += float(self.parser.average_sentence_length()) * float(self.coeff[i])
            elif self.features[i] == 'awl':
                index += float(self.parser.average_word_length()) * float(self.coeff[i])
            elif self.features[i] == 'asw':
                index += float(self.parser.average_syllable_per_word()) * float(self.coeff[i])
            elif self.features[i] == 'psw30':
                index += float(self.parser.number_of_polysyllables_per_30_words()) * float(self.coeff[i])
            elif self.features[i] == 'juk30':
                index += float(self.parser.number_of_jukthakshar_per_30_words()) * float(self.coeff[i])

        index += float(self.coeff[length])

        return index