Example #1
0
    def build_alt_words_table(n):

        Logger.log_message('Building alternate words table based on ' + str(n) + '-grams')
        alt_words = {}

        in_file = path.join('out', str(n) + '-gram-regexp.csv')
        Logger.log_message('Reading ' + in_file)

        input_file = open(in_file)

        for line in input_file.readlines():
            words = str(line.split(';')[1]).split()
            for word in words:
                for alt_word in words:
                    if word in alt_words:
                        if alt_word not in alt_words[word]:
                            alt_words[word].append(alt_word)
                    else:
                        alt_words[word] = [alt_word]

        input_file.close()
        Logger.log_success('Finished reading ' + in_file)

        out_file = path.join('out', 'lwlm-alt-words-' + str(n) + '-grams.csv')
        Logger.log_message('Writing alternate words table to ' + out_file)
        output_file = open(out_file, 'w+')

        for word in alt_words:
            words = set(alt_words[word])
            col = ' '.join(w for w in words)
            output_file.write(word + ';' + col + '\n')

        output_file.close()
        Logger.log_success('Alternate words table has been written to ' + out_file)
Example #2
0
    def run(self):

        Logger.log_message("Running ITFIDF")

        Logger.log_message("Reading " + self.tf_dict)
        tf_dict_file = open(self.tf_dict)

        for line in tf_dict_file.readlines():
            cols = line.split(";")
            self.tf_dictionary[cols[0]] = int(cols[1])

        tf_dict_file.close()

        Logger.log_message("Reading " + self.df_dict)
        df_dict_file = open(self.df_dict)

        for line in df_dict_file.readlines():
            cols = line.split(";")
            self.df_dictionary[cols[0]] = int(cols[1])

        max_tf = max(self.tf_dictionary.values())
        max_df = max(self.df_dictionary.values())

        for word in self.df_dictionary:
            if word in self.tf_dictionary:
                if word in self.df_dictionary:
                    self.itfidf[word] = (max_tf * max_df) / (self.tf_dictionary[word] * self.df_dictionary[word])

        Logger.log_message("Writing results to " + self.out_file)
        self.dump_results()
        Logger.log_success("Finished writing results to " + self.out_file)
Example #3
0
    def run(self):

        # Build up the syllable count dictionary
        dictionary = open(self.dict_file, 'r')

        Logger.log_message("Reading " + self.dict_file)

        for line in dictionary.readlines():
            cols = line.split(';')
            self.syllable_val[cols[0]] = int(cols[2])

        dictionary.close()

        # Check for the input directory
        try:
            stat(self.in_dir)
        except:
            Logger.log_error('Input text not found')
            return

        Logger.log_message('Running syllable counter')

        for(dir_path, _, file_names) in walk(self.in_dir):
            for file_name in file_names:
                in_file = path.join(dir_path, file_name)
                self.count_syllables(in_file)

        Logger.log_success('Finished syllable counting')

        Logger.log_message('Writing results to ' + self.out_file)
        self.dump_results()
        Logger.log_success('Finished writing results to ' + self.out_file)
Example #4
0
    def run(self):
        # Check for the input directory
        try:
            stat(self.in_dir)
        except:
            Logger.log_error('Input text not found')
            return

        # Create the output directory
        try:
            stat(self.out_dir)
        except:
            makedirs(self.out_dir)

        Logger.log_message('Started stemming')

        # Walk through the input directory
        for(dir_path, _, file_names) in walk(self.in_dir):
            for file_name in file_names:

                in_file = path.join(dir_path, file_name)
                out_file = path.join(self.out_dir, file_name + '_' + dir_path.replace('/', '_') + '.txt')

                Stemmer.stem_file(in_file, out_file)

        Logger.log_success('Finished stemming')
Example #5
0
    def run(self):

        dictionary = open(self.dict_file)

        Logger.log_message('Reading ' + self.dict_file)

        # Construct the dictionary
        for line in dictionary.readlines():
            items = line.split(";")
            self.document_frequencies[items[0]] = 0
            self.document_words[items[0]] = False

        # Check for the input directory
        try:
            stat(self.in_dir)
        except:
            Logger.log_error('Input text not found')
            return

        Logger.log_message('Running document frequency counter')

        for(dir_path, _, file_names) in walk(self.in_dir):
            for file_name in file_names:
                in_file = path.join(dir_path, file_name)

                self.terms_in_document(in_file)

        Logger.log_message('Finished document frequency counting')

        Logger.log_message('Writing results to ' + self.out_file)
        self.dump_results()
        Logger.log_success('Finished writing results to ' + self.out_file)
Example #6
0
    def run(self):
        Logger.log_message("Running merger on " + self.in_file)

        input_file = open(self.in_file)

        for line in input_file.readlines():

            if line[0] == "#":
                continue

            cols = line.split(";")
            file_name = str(cols[0].split("/")[-1])

            grade = self.file_names.index(file_name[0]) + 1

            if grade not in self.number_of_words:
                self.number_of_chars[grade] = 0
                self.number_of_words[grade] = 0
                self.number_of_syllables[grade] = 0
                self.number_of_sentences[grade] = 0

            self.number_of_chars[grade] += int(cols[1])
            self.number_of_words[grade] += int(cols[2])
            self.number_of_syllables[grade] += int(cols[3])
            self.number_of_sentences[grade] += int(cols[4])

        input_file.close()

        Logger.log_message("Writing merged results to " + self.out_file)
        self.dump_results()
        Logger.log_success("Results written to " + self.out_file)
    def run(self):
        Logger.log_message("Running Readability Calculator on " +
                           self.stats_file)

        input_file = open(self.stats_file)

        for line in input_file.readlines():

            if line[0] == "#":
                continue

            cols = line.split(";")

            grade = int(cols[0])
            words = float(cols[2])
            syllables = float(cols[3])
            sentences = float(cols[4])

            flesch_kincaid_grade = 0.39 * (words / sentences) + \
                11.8 * (syllables / words) - 15.59

            self.readability_grade[grade] = flesch_kincaid_grade

        input_file.close()

        Logger.log_message("Writing results to " + self.out_file)
        self.dump_results()
        Logger.log_success("Results written to " + self.out_file)

        grades = self.readability_grade.keys()
        fk_grades = self.readability_grade.values()

        correlation = pearsonr(grades, fk_grades)
        Logger.log_result("Correlation between grade level and Flesch Kincaid grade: " \
            + str(correlation))
Example #8
0
def main():

    if len(sys.argv) > 1:
        if sys.argv[1] == 'server':
            run_server()
            return

    Logger.log_message("Running application Simplify")
    cleanup()
    LWLM.build_tables('corpus')
    Logger.log_success("Application exited successfully")
Example #9
0
    def build_tables(in_dir):
        Logger.log_message('Building 3-Gram LWLM tables')
        ng = NGram(3, in_dir)
        ng.run()
        Logger.log_success('Finished building 3-Gram LWLM tables')

        Logger.log_message('Building 5-Gram LWLM tables')
        ng = NGram(5, in_dir)
        ng.run()
        Logger.log_success('Finished building 5-Gram LWLM tables')

        LWLM.build_alt_words_table(3)
        LWLM.build_alt_words_table(5)
Example #10
0
    def run(self):
        Logger.log_message('Running ' + str(self.n) + '-Gram Frequency counter')

        try:
            stat(self.in_dir)
        except:
            Logger.log_error('Input text not found')
            return

        for(dir_path, _, file_names) in walk(self.in_dir):
            for file_name in file_names:
                in_file = path.join(dir_path, file_name)
                self.parse_file(in_file)

        self.dump_results()

        Logger.log_success(str(self.n) + '-Gram Frequency counter exited successfully')
Example #11
0
def main():
    Logger.log_message("Starting NCERT Readability application")

    # Run the parser
    parser = Parser(CORPUS_DIR, OUTPUT_DIR, STATS_FILE)
    parser.run()

    # Merge the stats
    merger = Merger(path.join(OUTPUT_DIR, STATS_FILE),
                    path.join(OUTPUT_DIR, MERGED_STATS_FILE))

    merger.run()

    readability_calc = ReadabilityCalculator(
        path.join(OUTPUT_DIR, MERGED_STATS_FILE),
        path.join(OUTPUT_DIR, RESULTS_FILE))

    readability_calc.run()

    Logger.log_success("Application exited successfully")
Example #12
0
    def run(self):

        # Check for the input directory
        try:
            stat(self.in_dir)
        except:
            Logger.log_error('Input text not found')
            return

        Logger.log_message('Running term frequency counter')

        for(dir_path, _, file_names) in walk(self.in_dir):
            for file_name in file_names:

                in_file = path.join(dir_path, file_name)
                self.count_term_frequency(in_file)

        Logger.log_success('Finished term frequency counting')

        Logger.log_message('Writing results to ' + self.out_file)
        self.dump_results()
        Logger.log_success('Finished writing results to ' + self.out_file)
Example #13
0
    def dump_results(self):

        out_file = path.join('out', str(self.n) + '-gram.csv')
        Logger.log_message('Writing ' + str(self.n) + '-Gram table to ' + out_file)
        output_file = open(out_file, 'w+')

        for s in self.table:
            output_file.write(s + ';' + str(self.table[s]) + '\n')

        output_file.close()
        Logger.log_success('Finished writing ' + str(self.n) + '-Gram table to ' + out_file)

        out_file = path.join('out', str(self.n) + '-gram-regexp.csv')
        Logger.log_message('Writing ' + str(self.n) + '-Gram Regular Expressions to ' + out_file)
        output_file = open(out_file, 'w+')

        for nb in self.neighbors:
            words = set(self.neighbors[nb])
            col = ' '.join(w for w in words)
            output_file.write(nb + ';' + col + '\n')

        output_file.close()
        Logger.log_success(str(self.n) + '-Gram Regular Expressions have been written to ' + out_file)
Example #14
0
    def run(self):
        Logger.log_message("Parser started running")

        # Check if the input directory exists
        try:
            stat(self.in_dir)
        except:
            Logger.log_error("Input text not found")
            return

        # Create output directory if it doesn't exist
        try:
            stat(self.out_dir)
        except:
            mkdir(self.out_dir)

        for (dir_path, _, file_names) in walk(self.in_dir):
            for file_name in file_names:
                in_file = path.join(dir_path, file_name)
                self.parse(in_file)

        Logger.log_message("Writing results to " + self.out_file)
        self.dump_results()
        Logger.log_success("Results have been written to " + self.out_file)
Example #15
0
    def __init__(self, host, port, debug):

        self.host = host
        self.port = port
        self.debug = debug

        self.app = Flask(__name__)

        # Index route
        @self.app.route('/')
        def index():
            return render_template('index.html')

        @self.app.route('/api/tag')
        def tag_api():
            text = request.args["text"]
            _type = request.args["type"]

            tagger = Tagger(_type)
            result = tagger.tag(text)

            return jsonify(success=True, result=result)

        Logger.log_success('Server started successfully')
Example #16
0
def cleanup():
    Logger.log_message('Cleaning up')
    call(['rm', '-rf', 'out'])
    call(['mkdir', 'out'])
    Logger.log_success('Finished cleaning up')
Example #17
0
    def __init__(self, host, port, debug):
        self.host = host
        self.port = port
        self.debug = debug

        self.app = Flask(__name__)

        self.syntactic_simplifier = SyntacticSimplifier()
        self.enricher = Enricher()

        @self.app.route('/')
        def index():
            return render_template('index.html')

        @self.app.route('/enrich')
        def enrich():
            return render_template('enrich.html')

        @self.app.route('/lexus')
        def lexus():
            return render_template('lexus.html')

        @self.app.route('/syntax')
        def syntax():
            return render_template('syntax.html')

        @self.app.route('/readability')
        def readability():
            return render_template('readability.html')

        @self.app.route('/api/simplify')
        def simplify_api():
            text = request.args['text']
            n = request.args['n']

            lex_result = LexicalSimplifier.simplify(text, n)
            syn_result = self.syntactic_simplifier.simplify(text)

            result = {
                "lexical": lex_result,
                "syntactic": syn_result
            }

            return jsonify(success=True, result=result)

        @self.app.route('/api/lexus/simplify')
        def lexus_simplify_api():
            text = request.args['text']
            n = request.args['n']

            result = LexicalSimplifier.simplify(text, n)
            return jsonify(success=True, result=result)

        @self.app.route('/api/syntax/simplify')
        def syntax_simplify_api():
            text = request.args['text']

            result = self.syntactic_simplifier.simplify(text, False, True)
            return jsonify(success=True, result=result)

        @self.app.route('/api/enrich')
        def enrich_api():
            text = request.args['text']
            result = self.enricher.enrich(text)
            return jsonify(success=True, result=result)

        @self.app.route('/api/readability')
        def readability_api():
            text = request.args['text']
            result = {
                "flesch_kincaid_grade_level": FleschKincaid.calculate_grade_level(text)
            }
            return jsonify(success=True, result=result)

        Logger.log_success("Started application server successfully")