Exemple #1
0
def main(
    options_filename: Optional[str] = None,
    corpus_config: Optional[str] = None,
    input_filename: Optional[str] = None,
    output_folder: Optional[str] = None,
    output_tag: Optional[str] = None,
    filename_pattern: Optional[str] = None,
    phrase: Sequence[str] = None,
    phrase_file: Optional[str] = None,
    create_subfolder: bool = True,
    pos_includes: str = '',
    pos_paddings: str = '',
    pos_excludes: str = '',
    append_pos: bool = False,
    to_lower: bool = True,
    lemmatize: bool = True,
    remove_stopwords: Optional[str] = None,
    min_word_length: int = 2,
    max_word_length: int = None,
    keep_symbols: bool = False,
    keep_numerals: bool = False,
    only_any_alphanumeric: bool = False,
    only_alphabetic: bool = False,
    tf_threshold: int = 1,
    tf_threshold_mask: bool = False,
    max_tokens: int = None,
    deserialize_processes: int = 4,
    enable_checkpoint: bool = True,
    force_checkpoint: bool = False,
):
    arguments: dict = consolidate_cli_arguments(arguments=locals(), filename_key='options_filename')

    process(**arguments)
Exemple #2
0
def test_update_arguments_from_options_file_with_cli_override() -> dict:
    pytest.importorskip("click")
    yaml_data: str = "  - delta: 48"
    args: dict = dict(alfa=1, beta=2, delta=3, pi=3.14, options=yaml_data)
    args = script_utils.consolidate_cli_arguments(arguments=args,
                                                  filename_key='options')
    assert args['delta'] == 999
Exemple #3
0
def click_main(
    options_filename: Optional[str] = None,
    config_filename: Optional[str] = None,
    corpus_source: Optional[str] = None,
    train_corpus_folder: Optional[str] = None,
    trained_model_folder: Optional[str] = None,
    target_mode: Literal['train', 'predict', 'both'] = 'both',
    target_folder: Optional[str] = None,
    target_name: Optional[str] = None,
    to_lower: bool = True,
    lemmatize: bool = True,
    pos_includes: str = '',
    pos_excludes: str = '',
    max_tokens: int = None,
    tf_threshold: int = None,
    remove_stopwords: Optional[str] = None,
    min_word_length: int = 2,
    max_word_length: Optional[int] = None,
    keep_symbols: bool = False,
    keep_numerals: bool = False,
    alpha: str = 'asymmetric',
    chunk_size: int = 2000,
    engine: str = "gensim_lda-multicore",
    max_iter: int = None,
    minimum_probability: float = None,
    n_topics: int = 50,
    passes: int = None,
    per_word_topics: bool = False,
    random_seed: int = None,
    update_every: int = 1,
    workers: int = None,
    store_corpus: bool = True,
    store_compressed: bool = True,
    fix_hyphenation: bool = True,
    fix_accents: bool = True,
    only_any_alphanumeric: bool = False,
    only_alphabetic: bool = False,
    enable_checkpoint: bool = True,
    force_checkpoint: bool = False,
    passthrough_column: Optional[str] = None,
):
    arguments: dict = consolidate_cli_arguments(
        arguments=locals(), filename_key='options_filename')

    main(**arguments)
Exemple #4
0
def click_main(
    options_filename: str = None,
    config_filename: str = None,
    corpus_source: str = None,
    trained_model_folder: str = None,
    target_folder: str = None,
    target_name: str = None,
    lemmatize: bool = True,
    pos_includes: str = '',
    pos_excludes: str = '',
    to_lower: bool = True,
    remove_stopwords: str = None,
    min_word_length: int = 2,
    max_word_length: int = None,
    keep_symbols: bool = False,
    keep_numerals: bool = False,
    only_any_alphanumeric: bool = False,
    only_alphabetic: bool = False,
    minimum_probability: float = 0.001,
    n_tokens: int = 200,
    enable_checkpoint: bool = True,
    force_checkpoint: bool = False,
):
    if not isfile(config_filename):
        click.echo(f"error: config file {config_filename} not found")
        sys.exit(1)

    if target_name is None:
        click.echo("error: TARGET_NAME not specified")
        sys.exit(1)

    arguments: dict = consolidate_cli_arguments(
        arguments=locals(), filename_key='options_filename')

    model_folder, model_name = split(trained_model_folder)

    arguments['model_folder'] = model_folder
    arguments['model_name'] = model_name

    if not isfile(join(model_folder, "model_options.json")):
        click.echo("error: no model in specified folder")
        sys.exit(1)

    main(**arguments)