def main( options_filename: Optional[str] = None, corpus_config: Optional[str] = None, input_filename: Optional[str] = None, output_folder: Optional[str] = None, output_tag: Optional[str] = None, filename_pattern: Optional[str] = None, phrase: Sequence[str] = None, phrase_file: Optional[str] = None, create_subfolder: bool = True, pos_includes: str = '', pos_paddings: str = '', pos_excludes: str = '', append_pos: bool = False, to_lower: bool = True, lemmatize: bool = True, remove_stopwords: Optional[str] = None, min_word_length: int = 2, max_word_length: int = None, keep_symbols: bool = False, keep_numerals: bool = False, only_any_alphanumeric: bool = False, only_alphabetic: bool = False, tf_threshold: int = 1, tf_threshold_mask: bool = False, max_tokens: int = None, deserialize_processes: int = 4, enable_checkpoint: bool = True, force_checkpoint: bool = False, ): arguments: dict = consolidate_cli_arguments(arguments=locals(), filename_key='options_filename') process(**arguments)
def test_update_arguments_from_options_file_with_cli_override() -> dict: pytest.importorskip("click") yaml_data: str = " - delta: 48" args: dict = dict(alfa=1, beta=2, delta=3, pi=3.14, options=yaml_data) args = script_utils.consolidate_cli_arguments(arguments=args, filename_key='options') assert args['delta'] == 999
def click_main( options_filename: Optional[str] = None, config_filename: Optional[str] = None, corpus_source: Optional[str] = None, train_corpus_folder: Optional[str] = None, trained_model_folder: Optional[str] = None, target_mode: Literal['train', 'predict', 'both'] = 'both', target_folder: Optional[str] = None, target_name: Optional[str] = None, to_lower: bool = True, lemmatize: bool = True, pos_includes: str = '', pos_excludes: str = '', max_tokens: int = None, tf_threshold: int = None, remove_stopwords: Optional[str] = None, min_word_length: int = 2, max_word_length: Optional[int] = None, keep_symbols: bool = False, keep_numerals: bool = False, alpha: str = 'asymmetric', chunk_size: int = 2000, engine: str = "gensim_lda-multicore", max_iter: int = None, minimum_probability: float = None, n_topics: int = 50, passes: int = None, per_word_topics: bool = False, random_seed: int = None, update_every: int = 1, workers: int = None, store_corpus: bool = True, store_compressed: bool = True, fix_hyphenation: bool = True, fix_accents: bool = True, only_any_alphanumeric: bool = False, only_alphabetic: bool = False, enable_checkpoint: bool = True, force_checkpoint: bool = False, passthrough_column: Optional[str] = None, ): arguments: dict = consolidate_cli_arguments( arguments=locals(), filename_key='options_filename') main(**arguments)
def click_main( options_filename: str = None, config_filename: str = None, corpus_source: str = None, trained_model_folder: str = None, target_folder: str = None, target_name: str = None, lemmatize: bool = True, pos_includes: str = '', pos_excludes: str = '', to_lower: bool = True, remove_stopwords: str = None, min_word_length: int = 2, max_word_length: int = None, keep_symbols: bool = False, keep_numerals: bool = False, only_any_alphanumeric: bool = False, only_alphabetic: bool = False, minimum_probability: float = 0.001, n_tokens: int = 200, enable_checkpoint: bool = True, force_checkpoint: bool = False, ): if not isfile(config_filename): click.echo(f"error: config file {config_filename} not found") sys.exit(1) if target_name is None: click.echo("error: TARGET_NAME not specified") sys.exit(1) arguments: dict = consolidate_cli_arguments( arguments=locals(), filename_key='options_filename') model_folder, model_name = split(trained_model_folder) arguments['model_folder'] = model_folder arguments['model_name'] = model_name if not isfile(join(model_folder, "model_options.json")): click.echo("error: no model in specified folder") sys.exit(1) main(**arguments)