Example #1
0
def load_app_features_context(config: AppConfig,
                              download_manager: DownloadManager):
    return AppFeaturesContext(country_lookup=load_lookup_from_config(
        config.get('lookup', {}).get('country'),
        download_manager=download_manager),
                              first_name_lookup=load_lookup_from_config(
                                  config.get('lookup', {}).get('first_name'),
                                  download_manager=download_manager),
                              last_name_lookup=load_lookup_from_config(
                                  config.get('lookup', {}).get('last_name'),
                                  download_manager=download_manager))
Example #2
0
def get_ocr_model_for_app_config(
        app_config: AppConfig,
        enabled: bool = True) -> Optional[OpticalCharacterRecognitionModel]:
    ocr_model_config = app_config.get('ocr_models', {}).get('default')
    if enabled and ocr_model_config:
        return get_lazy_ocr_model_for_config(ocr_model_config)
    return None
Example #3
0
def get_cv_model_for_app_config(
        app_config: AppConfig,
        enabled: bool = True) -> Optional[ComputerVisionModel]:
    cv_model_config = app_config.get('cv_models', {}).get('default')
    if enabled and cv_model_config:
        return get_lazy_cv_model_for_config(cv_model_config)
    return None
Example #4
0
 def test_should_override_bool_value_with_env_var(self, tmp_path: Path,
                                                  env_vars_mock: dict):
     env_vars_mock['SCIENCEBEAM_PARSER__KEY1'] = 'false'
     config_path = tmp_path / 'config.yml'
     config_path.write_text(yaml.dump({'key1': True}))
     config = AppConfig.load_yaml(str(config_path))
     config = config.apply_environment_variables()
     assert config.props['key1'] is False
Example #5
0
 def test_should_override_default_from_app_config(self, field_name: str, value: bool):
     config = FullTextProcessorConfig.from_app_config(app_config=AppConfig(props={
         'processors': {
             'fulltext': {
                 field_name: value
             }
         }
     }))
     assert getattr(config, field_name) is value
Example #6
0
 def test_should_preload_if_enabled(
     self,
     app_config: AppConfig,
     fulltext_models: MagicMock
 ):
     ScienceBeamParser.from_config(
         AppConfig({
             **app_config.props,
             'preload_on_startup': True
         })
     )
     fulltext_models.preload.assert_called()
Example #7
0
 def test_should_override_nested_value_with_env_var(self, tmp_path: Path,
                                                    env_vars_mock: dict):
     env_vars_mock['SCIENCEBEAM_PARSER__PARENT1__KEY1'] = 'updated value1'
     config_path = tmp_path / 'config.yml'
     config_path.write_text(
         yaml.dump({'parent1': {
             'key1': 'original value1'
         }}))
     original_config = AppConfig.load_yaml(str(config_path))
     config = original_config.apply_environment_variables()
     assert config.props['parent1']['key1'] == 'updated value1'
     assert original_config.props['parent1']['key1'] == 'original value1'
Example #8
0
 def __init__(self, config: AppConfig):
     self.config = config
     self.download_manager = DownloadManager(
         download_dir=get_download_dir(config))
     self.pdfalto_wrapper = PdfAltoWrapper(
         self.download_manager.download_if_url(config['pdfalto']['path']))
     self.pdfalto_wrapper.ensure_executable()
     self.app_context = AppContext(
         app_config=config,
         download_manager=self.download_manager,
         lazy_wapiti_binary_wrapper=LazyWapitiBinaryWrapper(
             install_url=config.get('wapiti', {}).get('install_source'),
             download_manager=self.download_manager))
     self.fulltext_processor_config = FullTextProcessorConfig.from_app_config(
         app_config=config)
     self.fulltext_models = load_models(
         config,
         app_context=self.app_context,
         fulltext_processor_config=self.fulltext_processor_config)
     if config.get('preload_on_startup'):
         self.fulltext_models.preload()
     self.app_features_context = load_app_features_context(
         config, download_manager=self.download_manager)
     tei_to_jats_config = config.get('xslt', {}).get('tei_to_jats', {})
     self.tei_to_jats_xslt_transformer = XsltTransformerWrapper.from_template_file(
         TEI_TO_JATS_XSLT_FILE,
         xslt_template_parameters=tei_to_jats_config.get('parameters', {}))
     self.doc_to_pdf_enabled = config.get('doc_to_pdf',
                                          {}).get('enabled', True)
     self.doc_to_pdf_convert_parameters = config.get('doc_to_pdf',
                                                     {}).get('convert', {})
     self.doc_converter_wrapper = DocConverterWrapper(
         **config.get('doc_to_pdf', {}).get('listener', {}))
def run(args: argparse.Namespace):
    LOGGER.info('args: %r', args)
    output_path = args.output_path
    config = AppConfig.load_yaml(DEFAULT_CONFIG_FILE)
    sciencebeam_parser = ScienceBeamParser.from_config(config)
    LOGGER.info('output_path: %r', output_path)
    os.makedirs(output_path, exist_ok=True)
    for source_filename in glob(args.source_path):
        generate_training_data_for_source_filename(
            source_filename,
            output_path=output_path,
            sciencebeam_parser=sciencebeam_parser,
            use_model=args.use_model,
            use_directory_structure=args.use_directory_structure)
Example #10
0
def main(argv=None):
    args = parse_args(argv)
    config = AppConfig.load_yaml(
        DEFAULT_CONFIG_FILE).apply_environment_variables()
    logging_config = config.get('logging')
    if logging_config:
        for handler_config in logging_config.get('handlers', {}).values():
            filename = handler_config.get('filename')
            if not filename:
                continue
            dirname = os.path.dirname(filename)
            if dirname:
                os.makedirs(dirname, exist_ok=True)
        try:
            dictConfig(logging_config)
        except ValueError:
            LOGGER.info('logging_config: %r', logging_config)
            raise
    LOGGER.info('app config: %s', config)
    app = create_app_for_config(config)
    app.run(port=args.port, host=args.host, threaded=True)
Example #11
0
def _app_config() -> AppConfig:
    return AppConfig.load_yaml(DEFAULT_CONFIG_FILE)
Example #12
0
 def from_app_config(app_config: AppConfig) -> 'FullTextProcessorConfig':
     return FullTextProcessorConfig()._replace(
         **app_config.get('processors', {}).get('fulltext', {}))
Example #13
0
 def test_should_load_yaml(self, tmp_path: Path):
     config_path = tmp_path / 'config.yml'
     config_path.write_text(yaml.dump({'key1': 'value1'}))
     config = AppConfig.load_yaml(str(config_path))
     assert config.props['key1'] == 'value1'