def load_app_features_context(config: AppConfig, download_manager: DownloadManager): return AppFeaturesContext(country_lookup=load_lookup_from_config( config.get('lookup', {}).get('country'), download_manager=download_manager), first_name_lookup=load_lookup_from_config( config.get('lookup', {}).get('first_name'), download_manager=download_manager), last_name_lookup=load_lookup_from_config( config.get('lookup', {}).get('last_name'), download_manager=download_manager))
def get_ocr_model_for_app_config( app_config: AppConfig, enabled: bool = True) -> Optional[OpticalCharacterRecognitionModel]: ocr_model_config = app_config.get('ocr_models', {}).get('default') if enabled and ocr_model_config: return get_lazy_ocr_model_for_config(ocr_model_config) return None
def get_cv_model_for_app_config( app_config: AppConfig, enabled: bool = True) -> Optional[ComputerVisionModel]: cv_model_config = app_config.get('cv_models', {}).get('default') if enabled and cv_model_config: return get_lazy_cv_model_for_config(cv_model_config) return None
def test_should_override_bool_value_with_env_var(self, tmp_path: Path, env_vars_mock: dict): env_vars_mock['SCIENCEBEAM_PARSER__KEY1'] = 'false' config_path = tmp_path / 'config.yml' config_path.write_text(yaml.dump({'key1': True})) config = AppConfig.load_yaml(str(config_path)) config = config.apply_environment_variables() assert config.props['key1'] is False
def test_should_override_default_from_app_config(self, field_name: str, value: bool): config = FullTextProcessorConfig.from_app_config(app_config=AppConfig(props={ 'processors': { 'fulltext': { field_name: value } } })) assert getattr(config, field_name) is value
def test_should_preload_if_enabled( self, app_config: AppConfig, fulltext_models: MagicMock ): ScienceBeamParser.from_config( AppConfig({ **app_config.props, 'preload_on_startup': True }) ) fulltext_models.preload.assert_called()
def test_should_override_nested_value_with_env_var(self, tmp_path: Path, env_vars_mock: dict): env_vars_mock['SCIENCEBEAM_PARSER__PARENT1__KEY1'] = 'updated value1' config_path = tmp_path / 'config.yml' config_path.write_text( yaml.dump({'parent1': { 'key1': 'original value1' }})) original_config = AppConfig.load_yaml(str(config_path)) config = original_config.apply_environment_variables() assert config.props['parent1']['key1'] == 'updated value1' assert original_config.props['parent1']['key1'] == 'original value1'
def __init__(self, config: AppConfig): self.config = config self.download_manager = DownloadManager( download_dir=get_download_dir(config)) self.pdfalto_wrapper = PdfAltoWrapper( self.download_manager.download_if_url(config['pdfalto']['path'])) self.pdfalto_wrapper.ensure_executable() self.app_context = AppContext( app_config=config, download_manager=self.download_manager, lazy_wapiti_binary_wrapper=LazyWapitiBinaryWrapper( install_url=config.get('wapiti', {}).get('install_source'), download_manager=self.download_manager)) self.fulltext_processor_config = FullTextProcessorConfig.from_app_config( app_config=config) self.fulltext_models = load_models( config, app_context=self.app_context, fulltext_processor_config=self.fulltext_processor_config) if config.get('preload_on_startup'): self.fulltext_models.preload() self.app_features_context = load_app_features_context( config, download_manager=self.download_manager) tei_to_jats_config = config.get('xslt', {}).get('tei_to_jats', {}) self.tei_to_jats_xslt_transformer = XsltTransformerWrapper.from_template_file( TEI_TO_JATS_XSLT_FILE, xslt_template_parameters=tei_to_jats_config.get('parameters', {})) self.doc_to_pdf_enabled = config.get('doc_to_pdf', {}).get('enabled', True) self.doc_to_pdf_convert_parameters = config.get('doc_to_pdf', {}).get('convert', {}) self.doc_converter_wrapper = DocConverterWrapper( **config.get('doc_to_pdf', {}).get('listener', {}))
def run(args: argparse.Namespace): LOGGER.info('args: %r', args) output_path = args.output_path config = AppConfig.load_yaml(DEFAULT_CONFIG_FILE) sciencebeam_parser = ScienceBeamParser.from_config(config) LOGGER.info('output_path: %r', output_path) os.makedirs(output_path, exist_ok=True) for source_filename in glob(args.source_path): generate_training_data_for_source_filename( source_filename, output_path=output_path, sciencebeam_parser=sciencebeam_parser, use_model=args.use_model, use_directory_structure=args.use_directory_structure)
def main(argv=None): args = parse_args(argv) config = AppConfig.load_yaml( DEFAULT_CONFIG_FILE).apply_environment_variables() logging_config = config.get('logging') if logging_config: for handler_config in logging_config.get('handlers', {}).values(): filename = handler_config.get('filename') if not filename: continue dirname = os.path.dirname(filename) if dirname: os.makedirs(dirname, exist_ok=True) try: dictConfig(logging_config) except ValueError: LOGGER.info('logging_config: %r', logging_config) raise LOGGER.info('app config: %s', config) app = create_app_for_config(config) app.run(port=args.port, host=args.host, threaded=True)
def _app_config() -> AppConfig: return AppConfig.load_yaml(DEFAULT_CONFIG_FILE)
def from_app_config(app_config: AppConfig) -> 'FullTextProcessorConfig': return FullTextProcessorConfig()._replace( **app_config.get('processors', {}).get('fulltext', {}))
def test_should_load_yaml(self, tmp_path: Path): config_path = tmp_path / 'config.yml' config_path.write_text(yaml.dump({'key1': 'value1'})) config = AppConfig.load_yaml(str(config_path)) assert config.props['key1'] == 'value1'