Esempio n. 1
0
def main():
    args = docopt(__doc__, version="0.1")
    fileops = FileOps(casemode=args["--casemode"],
                      countpos=args["--count"],
                      dirsonly=args["--dirsonly"],
                      exclude=args["--exclude"],
                      filesonly=args["--filesonly"],
                      hidden=args["--all"],
                      ignorecase=args["--ignorecase"],
                      interactive=args["--interactive"],
                      keepext=args["--keep-extension"],
                      mediamode=args["--media"],
                      noclobber=args["--no-clobber"],
                      recursive=args["--recursive"],
                      regex=args["--regex"],
                      remdups=args["--remduplicates"],
                      remext=args["--remextensions"],
                      remnonwords=args["--no-wordchars"],
                      remsymbols=args["--remsymbols"],
                      simulate=args["--simulate"],
                      spacemode=args["--spacemode"],
                      quiet=args["--quiet"],
                      verbosity=args["-v"],
                      matchpattern=args["<matchpattern>"],
                      replacepattern=args["<replacepattern>"])
    fileops.get_previews(fileops.get_targets(args["--path"]))
Esempio n. 2
0
def main():
    args = docopt(__doc__, version="0.1")
    fileops = FileOps(casemode=args["--casemode"],
                      countpos=args["--count"],
                      dirsonly=args["--dirsonly"],
                      exclude=args["--exclude"],
                      filesonly=args["--filesonly"],
                      hidden=args["--all"],
                      ignorecase=args["--ignorecase"],
                      interactive=args["--interactive"],
                      keepext=args["--keep-extension"],
                      mediamode=args["--media"],
                      noclobber=args["--no-clobber"],
                      recursive=args["--recursive"],
                      regex=args["--regex"],
                      remdups=args["--remduplicates"],
                      remext=args["--remextensions"],
                      remnonwords=args["--no-wordchars"],
                      remsymbols=args["--remsymbols"],
                      simulate=args["--simulate"],
                      spacemode=args["--spacemode"],
                      quiet=args["--quiet"],
                      verbosity=args["-v"],
                      matchpattern=args["<matchpattern>"],
                      replacepattern=args["<replacepattern>"])
    fileops.get_previews(fileops.get_targets(args["--path"]))
Esempio n. 3
0
def get_corpus_info(categories, **kwargs):
    
    corpus_info = []
    
    for category in categories:
        if not 'stopwords' in kwargs:
            tokens = [w for w in brown.words(categories=category)]
        else:
            if kwargs['stopwords'] == 'english':
                tokens = [w for w in brown.words(categories=category) if w not in FileOps.get_stopwords('stopwords.txt')]
        token_count = len(tokens)
        type_count = len(set(tokens))
        diversity = "%.3f" % (type_count/token_count)
        tmp = category.split('_')
        category = ' '.join(tmp)
        category_info = (category, token_count, type_count, diversity)
        print(category_info)
        corpus_info.append(category_info)
        
    return corpus_info
Esempio n. 4
0
    for category in categories:
        if not 'stopwords' in kwargs:
            tokens = [w for w in brown.words(categories=category)]
        else:
            if kwargs['stopwords'] == 'english':
                tokens = [w for w in brown.words(categories=category) if w not in FileOps.get_stopwords('stopwords.txt')]
        token_count = len(tokens)
        type_count = len(set(tokens))
        diversity = "%.3f" % (type_count/token_count)
        tmp = category.split('_')
        category = ' '.join(tmp)
        category_info = (category, token_count, type_count, diversity)
        print(category_info)
        corpus_info.append(category_info)
        
    return corpus_info
    
if __name__ == "__main__":
    
<<<<<<< HEAD
=======
    
    plurals_info = get_plurals_info(brown.categories())
    FileOps.write_plurals_table(plurals_info)
    
    
>>>>>>> a4d8d09f16fdcb0d863e6b8683049a38be7be961
    corpus_info = get_corpus_info(brown.categories())
    stopwords_info = get_corpus_info(brown.categories(), stopwords='english')
    FileOps.write_table(corpus_info, stopwords_info)
    print_corpus_info(brown.categories(), FileOps.get_stopwords('stopwords.txt'))  
Esempio n. 5
0
"""
Skeleton code for a flask website

"""

import flask
from flask import render_template
from flask import request
from flask import url_for

from fileops import FileOps
fo = FileOps()

import json
import logging


###
# Globals
###
app = flask.Flask(__name__)
import CONFIG


###
# Pages
###

@app.route("/")
@app.route("/index")
def index():