def main(): args = docopt(__doc__, version="0.1") fileops = FileOps(casemode=args["--casemode"], countpos=args["--count"], dirsonly=args["--dirsonly"], exclude=args["--exclude"], filesonly=args["--filesonly"], hidden=args["--all"], ignorecase=args["--ignorecase"], interactive=args["--interactive"], keepext=args["--keep-extension"], mediamode=args["--media"], noclobber=args["--no-clobber"], recursive=args["--recursive"], regex=args["--regex"], remdups=args["--remduplicates"], remext=args["--remextensions"], remnonwords=args["--no-wordchars"], remsymbols=args["--remsymbols"], simulate=args["--simulate"], spacemode=args["--spacemode"], quiet=args["--quiet"], verbosity=args["-v"], matchpattern=args["<matchpattern>"], replacepattern=args["<replacepattern>"]) fileops.get_previews(fileops.get_targets(args["--path"]))
def get_corpus_info(categories, **kwargs): corpus_info = [] for category in categories: if not 'stopwords' in kwargs: tokens = [w for w in brown.words(categories=category)] else: if kwargs['stopwords'] == 'english': tokens = [w for w in brown.words(categories=category) if w not in FileOps.get_stopwords('stopwords.txt')] token_count = len(tokens) type_count = len(set(tokens)) diversity = "%.3f" % (type_count/token_count) tmp = category.split('_') category = ' '.join(tmp) category_info = (category, token_count, type_count, diversity) print(category_info) corpus_info.append(category_info) return corpus_info
for category in categories: if not 'stopwords' in kwargs: tokens = [w for w in brown.words(categories=category)] else: if kwargs['stopwords'] == 'english': tokens = [w for w in brown.words(categories=category) if w not in FileOps.get_stopwords('stopwords.txt')] token_count = len(tokens) type_count = len(set(tokens)) diversity = "%.3f" % (type_count/token_count) tmp = category.split('_') category = ' '.join(tmp) category_info = (category, token_count, type_count, diversity) print(category_info) corpus_info.append(category_info) return corpus_info if __name__ == "__main__": <<<<<<< HEAD ======= plurals_info = get_plurals_info(brown.categories()) FileOps.write_plurals_table(plurals_info) >>>>>>> a4d8d09f16fdcb0d863e6b8683049a38be7be961 corpus_info = get_corpus_info(brown.categories()) stopwords_info = get_corpus_info(brown.categories(), stopwords='english') FileOps.write_table(corpus_info, stopwords_info) print_corpus_info(brown.categories(), FileOps.get_stopwords('stopwords.txt'))
""" Skeleton code for a flask website """ import flask from flask import render_template from flask import request from flask import url_for from fileops import FileOps fo = FileOps() import json import logging ### # Globals ### app = flask.Flask(__name__) import CONFIG ### # Pages ### @app.route("/") @app.route("/index") def index():