Example #1
0
def main(args=None):
    download_and_extract()

    pwd = os.getcwd()

    init_parser = ArgumentParser()
    init.populate_parser(init_parser)
    args = init_parser.parse_args(
        ['ap', '--name', '"Associated Press 88-90 sample"', '--rebuild', '-q'])
    init.main(args)

    prep_parser = ArgumentParser()
    prep.populate_parser(prep_parser)
    args = prep_parser.parse_args(
        'ap.ini --lang en --high 2000 --low 5 -q'.split())
    prep.main(args)

    train_parser = ArgumentParser()
    train.populate_parser(train_parser)
    args = train_parser.parse_args(
        "ap.ini -k 20 40 60 --context-type article --iter 20".split())
    train.main(args)

    from configparser import RawConfigParser as ConfigParser
    config = ConfigParser()
    config.read('ap.ini')
    config.set("main", "label_module", "topicexplorer.extensions.ap")
    config.set("main", "corpus_desc", "ap.md")
    config.set("www", "icons", "ap,link")
    config.set("www", "fulltext", "True")
    shutil.copyfile(os.path.join(os.path.dirname(__file__), '../demo/ap.md'),
                    'ap.md')
    with open("ap.ini", "w") as configfh:
        config.write(configfh)
Example #2
0
def main(args=None, launch=True):
    download_and_extract()

    pwd = os.getcwd()

    init_parser = ArgumentParser()
    init.populate_parser(init_parser)
    args = init_parser.parse_args(
        ['ap', '--name', '"Associated Press 88-90 sample"', '--rebuild', '-q'])
    init.main(args)

    prep_parser = ArgumentParser()
    prep.populate_parser(prep_parser)
    args = prep_parser.parse_args(
        'ap.ini --lang en --high 2000 --low 5 -q'.split())
    prep.main(args)

    train_parser = ArgumentParser()
    train.populate_parser(train_parser)
    args = train_parser.parse_args(
        "ap.ini -k 20 40 60 --context-type article --iter 20".split())
    train.main(args)

    import topicexplorer.config
    config = topicexplorer.config.read('ap.ini')
    config.set("main", "label_module", "topicexplorer.extensions.ap")
    config.set("main", "corpus_desc", "ap.md")
    config.set("www", "icons", "ap,fingerprint,link")
    config.set("www", "fulltext", "True")
    shutil.copyfile(get_static_resource_path('demo/ap.md'), 'ap.md')
    with open("ap.ini", "w") as configfh:
        config.write(configfh)

    if launch:
        launch_parser = ArgumentParser()
        server.populate_parser(launch_parser)
        args = launch_parser.parse_args(['ap.ini'])
        server.main(args)
Example #3
0
def main(args=None, launch=True):
    download_and_extract()
        
    pwd = os.getcwd()

    init_parser = ArgumentParser()
    init.populate_parser(init_parser)
    args = init_parser.parse_args(
        ['ap', '--name', '"Associated Press 88-90 sample"', '--rebuild', '-q'])
    init.main(args)

    prep_parser = ArgumentParser()
    prep.populate_parser(prep_parser)
    args = prep_parser.parse_args('ap.ini --lang en --high 2000 --low 5 -q'.split())
    prep.main(args)

    train_parser = ArgumentParser()
    train.populate_parser(train_parser)
    args = train_parser.parse_args("ap.ini -k 20 40 60 --context-type article --iter 20".split())
    train.main(args)

    import topicexplorer.config
    config = topicexplorer.config.read('ap.ini')
    config.set("main", "label_module", "topicexplorer.extensions.ap")
    config.set("main", "corpus_desc", "ap.md")
    config.set("www", "icons", "ap,fingerprint,link")
    config.set("www", "fulltext", "True")
    shutil.copyfile(get_static_resource_path('demo/ap.md'), 'ap.md')
    with open("ap.ini", "w") as configfh:
        config.write(configfh)

    if launch:
        launch_parser = ArgumentParser()
        server.populate_parser(launch_parser)
        args = launch_parser.parse_args(['ap.ini'])
        server.main(args)
Example #4
0
def main():
    """
    The primary CLI function for the Topic Explorer.
    """
    # Create the master argparse object.
    parser = ThrowingArgumentParser()

    # Adding the benchmarks flags.
    benchmark_group = parser.add_mutually_exclusive_group()
    benchmark_group.add_argument('-t',
                                 '--time',
                                 help="Print execution time",
                                 action='store_true')
    benchmark_group.add_argument('-p',
                                 '--profile',
                                 help="""Profile the command.
    Optional filename saves results for use with snakeviz, pstats, or
    cprofilev. Automatically launches snakeviz, if installed.""",
                                 nargs='?',
                                 metavar='STATS_FILE')

    # Using add_subparsers(metavar) until argparse.SUPPRESS support is fixed.
    # See issue http://bugs.python.org/issue22848
    parsers = parser.add_subparsers(
        help="select a command",
        parser_class=ArgumentParser,
        metavar='{version,demo,update,init,prep,train,launch,notebook,metadata}'
    )
    version_parser = parsers.add_parser('version',
                                        help="Print the version and exit")
    version_parser.set_defaults(func='version')

    # Init Parser
    parser_init = parsers.add_parser('init',
                                     help="Initialize the topic explorer")
    init.populate_parser(parser_init)
    parser_init.set_defaults(func="init")

    # Prep Parser
    parser_prep = parsers.add_parser(
        'prep',
        help="Prep the corpus",
        formatter_class=argparse.RawDescriptionHelpFormatter)
    prep.populate_parser(parser_prep)
    parser_prep.set_defaults(func="prep")

    # Train Parser
    parser_train = parsers.add_parser('train', help="Train the LDA models")
    train.populate_parser(parser_train)
    parser_train.set_defaults(func="train")

    # Launch Parser
    parser_launch = parsers.add_parser('launch',
                                       help="Serve the trained LDA models")
    server.populate_parser(parser_launch)
    parser_launch.set_defaults(func="launch")

    # Serve Parser
    parser_serve = parsers.add_parser(
        'serve',
        help="Serve a single LDA model, helper for `topicexplorer launch`," +
        "rarely called directly")
    server.populate_parser(parser_serve)
    parser_serve.set_defaults(func="serve")

    # Notebook Parser
    parser_nb = parsers.add_parser('notebook',
                                   help="Create a set of IPython Notebooks")
    notebook.populate_parser(parser_nb)
    parser_nb.set_defaults(func="notebook")

    # Demo Parser
    parser_demo = parsers.add_parser('demo',
                                     help="Download and run the AP demo")
    parser_demo.set_defaults(func="demo")

    # Update Parser
    parser_update = parsers.add_parser('update',
                                       help="Update the Topic Explorer")
    parser_update.set_defaults(func="update")

    # Metadata Parser
    parser_metadata = parsers.add_parser(
        'metadata', help="Add spaces before unicode chars")
    metadata.populate_parser(parser_metadata)
    parser_metadata.set_defaults(func="metadata")

    # fancy arg validation for manually injecting tempfile to profile arg
    try:
        try:
            args = parser.parse_args()
        except ArgumentParserError as e:
            import sys
            new_args = sys.argv[1:]
            try:
                # If the error was thrown by the '-p' argument not having a
                # valid file, fix by manually injecting a nargs break
                profile = new_args.index('-p')

                if (len(new_args) > (profile + 1)
                        and new_args[profile + 1] in parsers.choices.keys()):
                    new_args.insert(profile + 1, '-')
                    args = parser.parse_args(new_args)
                else:
                    raise e
            except ValueError:
                raise e
    except ArgumentParserError as e:
        import sys
        # Check to see if error occurs with a subparser and cause the exception
        # to arise from the subparser instead
        for p in parsers.choices.keys():
            if p in sys.argv[1:]:
                subargs_idx = sys.argv.index(p) + 1
                subargs = sys.argv[subargs_idx:]
                subparser = locals()['parser_' + p]
                # this might cause an error in the subparser, in which case
                # we actually want to show that error first
                args = subparser.parse_args(subargs)

        # Use the default error mechanism for the master parser.
        # If the code gets here, it means the error was not in a subparser
        ArgumentParser.error(parser, e.message)

    if args.profile:
        if args.profile == '-':
            import tempfile
            temphandle, args.profile = tempfile.mkstemp(suffix='.prof',
                                                        prefix='vsm.')
            print("Saving benchmark data to", args.profile)

        from profilehooks import profile

        def benchmark(fn):
            return profile(fn,
                           immediate=True,
                           filename=args.profile,
                           stdout=None)

    elif args.time:
        from profilehooks import timecall

        def benchmark(fn):
            return timecall(fn, immediate=False)
    else:

        def benchmark(fn):
            return fn

    if args.func == 'version':
        from topicexplorer.version import __pretty_version__
        print(__pretty_version__, end='')

    elif args.func == 'init':
        args.config_file = benchmark(init.main)(args)

        print("\nTIP: Only initalizing corpus object and config file.")
        print("     Next prepare the corpus using:")
        print("         topicexplorer prep", args.config_file)
        print("     Or skip directly to training LDA models using:")
        print("         topicexplorer train", args.config_file)

    elif args.func == 'prep':
        benchmark(prep.main)(args)

        print("\nTIP: Train the LDA models with:")
        print("         topicexplorer train", args.config_file)

    elif args.func == 'train':
        benchmark(train.main)(args)

        if not args.dry_run:
            print("\nTIP: launch the topic explorer with:")
            print("         topicexplorer launch", args.config_file)
            print("     or the notebook server with:")
            print("         topicexplorer notebook", args.config_file)

    elif args.func == 'launch' or args.func == 'serve':
        # Note that we are only benchmarking the creation process - obviously
        # benches of the serve process will take longer
        app = benchmark(server.create_app)(args)
        server.main(args, app)

    elif args.func == 'notebook':
        benchmark(notebook.main)(args)

    elif args.func == 'demo':
        benchmark(demo.main)(args)

    elif args.func == 'update':
        benchmark(update.main)(args)

    elif args.func == 'metadata':
        benchmark(metadata.main)(args)

    if args.profile:
        try:
            import snakeviz.cli
            print("\n\n")
            snakeviz.cli.main([args.profile])
        except ImportError:
            print(
                """\nSnakeviz is not installed. Install with `pip install snakeviz`,
            then run `snakeviz {}`.""".format(args.profile))
Example #5
0
def main():
    parser = ArgumentParser()
    parsers = parser.add_subparsers(help="select a command")

    # Init Parser
    parser_init = parsers.add_parser('init',
                                     help="Initialize the topic explorer")
    init.populate_parser(parser_init)
    parser_init.set_defaults(func="init")

    # Prep Parser
    parser_prep = parsers.add_parser(
        'prep',
        help="Prep the corpus",
        formatter_class=argparse.RawDescriptionHelpFormatter)
    prep.populate_parser(parser_prep)
    parser_prep.set_defaults(func="prep")

    # Train Parser
    parser_train = parsers.add_parser('train', help="Train the LDA models")
    train.populate_parser(parser_train)
    parser_train.set_defaults(func="train")

    # Launch Parser
    parser_launch = parsers.add_parser('launch',
                                       help="Serve the trained LDA models")
    launch.populate_parser(parser_launch)
    parser_launch.set_defaults(func="launch")

    # Serve Parser
    parser_serve = parsers.add_parser(
        'serve',
        help="Serve a single LDA model, helper for `vsm launch`," +
        "rarely called directly")
    server.populate_parser(parser_serve)
    parser_serve.set_defaults(func="serve")

    # Notebook Parser
    parser_nb = parsers.add_parser('notebook',
                                   help="Create a set of IPython Notebooks")
    notebook.populate_parser(parser_nb)
    parser_nb.set_defaults(func="notebook")

    # Demo Parser
    parser_demo = parsers.add_parser('demo',
                                     help="Download and run the AP demo")
    parser_demo.set_defaults(func="demo")

    args = parser.parse_args()

    if args.func == 'init':
        args.config_file = init.main(args)

        print "\nTIP: Only initalizing corpus object and config file."
        print "     Next prepare the corpus using:"
        print "         vsm prep", args.config_file
        print "     Or skip directly to training LDA models using:"
        print "         vsm train", args.config_file

    elif args.func == 'prep':
        prep.main(args)

        print "\nTIP: Train the LDA models with:"
        print "         vsm train", args.config_file

    elif args.func == 'train':
        train.main(args)

        print "\nTIP: launch the topic explorer with:"
        print "         vsm launch", args.config_file
        print "     or the notebook server with:"
        print "         vsm notebook", args.config_file

    elif args.func == 'launch':
        launch.main(args)

    elif args.func == 'serve':
        server.main(args)

    elif args.func == 'notebook':
        notebook.main(args)

    elif args.func == 'demo':
        demo.main()
Example #6
0
def main():
    parser = ArgumentParser()
    parsers = parser.add_subparsers(help="select a command")

    # Init Parser
    parser_init = parsers.add_parser('init', help="Initialize the topic explorer")
    init.populate_parser(parser_init)
    parser_init.set_defaults(func="init")
    
    # Prep Parser
    parser_prep = parsers.add_parser('prep', help="Prep the corpus", 
        formatter_class=argparse.RawDescriptionHelpFormatter)
    prep.populate_parser(parser_prep)
    parser_prep.set_defaults(func="prep")

    # Train Parser
    parser_train = parsers.add_parser('train', help="Train the LDA models")
    train.populate_parser(parser_train)
    parser_train.set_defaults(func="train")
    
    # Launch Parser
    parser_launch = parsers.add_parser('launch', help="Serve the trained LDA models")
    launch.populate_parser(parser_launch)
    parser_launch.set_defaults(func="launch")

    # Serve Parser
    parser_serve = parsers.add_parser('serve', 
        help="Serve a single LDA model, helper for `vsm launch`,"+
             "rarely called directly")
    server.populate_parser(parser_serve)
    parser_serve.set_defaults(func="serve")
   
    # Notebook Parser
    parser_nb = parsers.add_parser('notebook', 
        help="Create a set of IPython Notebooks")
    notebook.populate_parser(parser_nb)
    parser_nb.set_defaults(func="notebook")

    # Demo Parser
    parser_demo = parsers.add_parser('demo', 
        help="Download and run the AP demo")
    parser_demo.set_defaults(func="demo")
    
    args = parser.parse_args()

    if args.func == 'init':
        args.config_file = init.main(args)
        
        print "\nTIP: Only initalizing corpus object and config file."
        print "     Next prepare the corpus using:"
        print "         vsm prep", args.config_file
        print "     Or skip directly to training LDA models using:"
        print "         vsm train", args.config_file

    elif args.func == 'prep':
        prep.main(args)
        
        print "\nTIP: Train the LDA models with:"
        print "         vsm train", args.config_file

    elif args.func == 'train':
        train.main(args)
        
        print "\nTIP: launch the topic explorer with:"
        print "         vsm launch", args.config_file
        print "     or the notebook server with:"
        print "         vsm notebook", args.config_file

    elif args.func == 'launch':
        launch.main(args)

    elif args.func == 'serve':
        server.main(args)

    elif args.func == 'notebook':
        notebook.main(args)

    elif args.func == 'demo':
        demo.main()
def main():
    """
    The primary CLI function for the Topic Explorer.
    """
    # Create the master argparse object.
    parser = ThrowingArgumentParser()

    # Adding the benchmarks flags.
    benchmark_group = parser.add_mutually_exclusive_group()
    benchmark_group.add_argument('-t', '--time', help="Print execution time",
                                 action='store_true')
    benchmark_group.add_argument('-p', '--profile', help="""Profile the command.
    Optional filename saves results for use with snakeviz, pstats, or
    cprofilev. Automatically launches snakeviz, if installed.""",
                                 nargs='?', metavar='STATS_FILE')

    # Using add_subparsers(metavar) until argparse.SUPPRESS support is fixed.
    # See issue http://bugs.python.org/issue22848
    parsers = parser.add_subparsers(help="select a command",
                                    parser_class=ArgumentParser,
                                    metavar='{version,demo,update,init,prep,train,launch,notebook,metadata}')
    version_parser = parsers.add_parser('version', help="Print the version and exit")
    version_parser.set_defaults(func='version')

    # Init Parser
    parser_init = parsers.add_parser('init', help="Initialize the topic explorer")
    init.populate_parser(parser_init)
    parser_init.set_defaults(func="init")

    # Prep Parser
    parser_prep = parsers.add_parser('prep', help="Prep the corpus",
                                     formatter_class=argparse.RawDescriptionHelpFormatter)
    prep.populate_parser(parser_prep)
    parser_prep.set_defaults(func="prep")

    # Train Parser
    parser_train = parsers.add_parser('train', help="Train the LDA models")
    train.populate_parser(parser_train)
    parser_train.set_defaults(func="train")

    # Launch Parser
    parser_launch = parsers.add_parser('launch', help="Serve the trained LDA models")
    server.populate_parser(parser_launch)
    parser_launch.set_defaults(func="launch")

    # Serve Parser
    parser_serve = parsers.add_parser('serve', 
        help="Serve a single LDA model, helper for `topicexplorer launch`," +
             "rarely called directly")
    server.populate_parser(parser_serve)
    parser_serve.set_defaults(func="serve")

    # Notebook Parser
    parser_nb = parsers.add_parser('notebook',
                                   help="Create a set of IPython Notebooks")
    notebook.populate_parser(parser_nb)
    parser_nb.set_defaults(func="notebook")

    # Demo Parser
    parser_demo = parsers.add_parser('demo',
                                     help="Download and run the AP demo")
    parser_demo.set_defaults(func="demo")

    # Update Parser
    parser_update = parsers.add_parser('update',
                                       help="Update the Topic Explorer")
    parser_update.set_defaults(func="update")
    
    # Metadata Parser
    parser_metadata = parsers.add_parser('metadata', 
        help="Add spaces before unicode chars")
    metadata.populate_parser(parser_metadata)
    parser_metadata.set_defaults(func="metadata")
    
    # Export Parser
    parser_export = parsers.add_parser('export', help="Export a tez archive")
    export.populate_parser(parser_export)
    parser_export.set_defaults(func="export")
    
    # Export Parser
    parser_import = parsers.add_parser('import', help="Import the tez archive")
    tezimport.populate_parser(parser_import)
    parser_import.set_defaults(func="import")

    # fancy arg validation for manually injecting tempfile to profile arg 
    try:
        try:
            args = parser.parse_args()
        except ArgumentParserError as e:
            import sys
            new_args = sys.argv[1:]
            try:
                # If the error was thrown by the '-p' argument not having a
                # valid file, fix by manually injecting a nargs break
                profile = new_args.index('-p')

                if (len(new_args) > (profile + 1) and
                        new_args[profile + 1] in parsers.choices.keys()):
                    new_args.insert(profile + 1, '-')
                    args = parser.parse_args(new_args)
                else:
                    raise e
            except ValueError:
                raise e
    except ArgumentParserError as e:
        import sys
        # Check to see if error occurs with a subparser and cause the exception
        # to arise from the subparser instead
        for p in parsers.choices.keys():
            if p in sys.argv[1:]:
                subargs_idx = sys.argv.index(p) + 1
                subargs = sys.argv[subargs_idx:]
                subparser = locals()['parser_' + p]
                # this might cause an error in the subparser, in which case
                # we actually want to show that error first
                args = subparser.parse_args(subargs)

        # Use the default error mechanism for the master parser.
        # If the code gets here, it means the error was not in a subparser
        ArgumentParser.error(parser, e.message)

    if args.profile:
        if args.profile == '-':
            import tempfile
            temphandle, args.profile = tempfile.mkstemp(suffix='.prof', prefix='vsm.')
            print("Saving benchmark data to", args.profile)

        from profilehooks import profile

        def benchmark(fn):
            return profile(fn, immediate=True, filename=args.profile, stdout=None)

    elif args.time:
        from profilehooks import timecall

        def benchmark(fn):
            return timecall(fn, immediate=False)
    else:
        def benchmark(fn):
            return fn

    if args.func == 'version':
        from topicexplorer.version import __pretty_version__
        print(__pretty_version__, end='')

    elif args.func == 'init':
        args.config_file = benchmark(init.main)(args)

        print("\nTIP: Only initalizing corpus object and config file.")
        print("     Next prepare the corpus using:")
        print("         topicexplorer prep", args.config_file)
        print("     Or skip directly to training LDA models using:")
        print("         topicexplorer train", args.config_file)

    elif args.func == 'prep':
        benchmark(prep.main)(args)

        print("\nTIP: Train the LDA models with:")
        print("         topicexplorer train", args.config_file)

    elif args.func == 'train':
        benchmark(train.main)(args)

        if not args.dry_run:
            print("\nTIP: launch the topic explorer with:")
            print("         topicexplorer launch", args.config_file)
            print("     or the notebook server with:")
            print("         topicexplorer notebook", args.config_file)

    elif args.func == 'launch' or args.func == 'serve':
        # Note that we are only benchmarking the creation process - obviously
        # benches of the serve process will take longer
        app = benchmark(server.create_app)(args)
        server.main(args, app)


    elif args.func == 'notebook':
        benchmark(notebook.main)(args)

    elif args.func == 'demo':
        benchmark(demo.main)(args)

    elif args.func == 'update':
        benchmark(update.main)(args)

    elif args.func == 'metadata':
        benchmark(metadata.main)(args)
    
    elif args.func == 'export':
        benchmark(export.main)(args)
    
    elif args.func == 'import':
        benchmark(tezimport.main)(args)

    if args.profile:
        try:
            import snakeviz.cli
            print("\n\n")
            snakeviz.cli.main([args.profile])
        except ImportError:
            print("""\nSnakeviz is not installed. Install with `pip install snakeviz`,
            then run `snakeviz {}`.""".format(args.profile))