Exemple #1
0
    def pretrained_from_config(config,
                               top=1,
                               use_gpu=True,
                               batch_size=32,
                               cache_size=10000):
        """Load a pre-trained model from a config file.

            Args:
                config (:obj:`str`): Config file that defines the model
                    details. Defaults to `None`.
                top (:obj:`int`, optional): The maximum number of top analyses
                    to return. Defaults to 1.
                use_gpu (:obj:`bool`, optional): The flag to use a GPU or not.
                    Defaults to True.
                batch_size (:obj:`int`, optional): The batch size.
                    Defaults to 32.
                cache_size (:obj:`int`, optional): If greater than zero, then
                    the analyzer will cache the analyses for the cache_size
                    most frequent words, otherwise no analyses will be cached.
                    Defaults to 100000.

            Returns:
                :obj:`BERTUnfactoredDisambiguator`: Instance with loaded
                pre-trained model.
            """

        model_config = _read_json(config)
        model_path = model_config['model_path']
        features = FEATURE_SET_MAP[model_config['feature']]
        db = MorphologyDB(model_config['db_path'], 'a')
        analyzer = Analyzer(db,
                            backoff=model_config['backoff'],
                            cache_size=cache_size)
        scorer = model_config['scorer']
        tie_breaker = model_config['tie_breaker']
        ranking_cache = model_config['ranking_cache']

        return BERTUnfactoredDisambiguator(model_path,
                                           analyzer,
                                           top=top,
                                           features=features,
                                           scorer=scorer,
                                           tie_breaker=tie_breaker,
                                           use_gpu=use_gpu,
                                           batch_size=batch_size,
                                           ranking_cache=ranking_cache)
Exemple #2
0
def main():  # pragma: no cover
    try:
        version = ('CAMeL Tools v{}'.format(__version__))
        arguments = docopt(__doc__, version=version)

        if arguments.get('--list', False):
            _list_dbs()
            sys.exit(1)

        analyze = arguments.get('analyze', False)
        generate = arguments.get('generate', False)
        reinflect = arguments.get('reinflect', False)

        cache = arguments.get('--cache', False)
        backoff = arguments.get('--backoff', 'NONE')

        # Make sure we have a valid backoff mode
        if backoff is None:
            backoff = 'NONE'
        if analyze and backoff not in _ANALYSIS_BACKOFFS:
            sys.stderr.write('Error: invalid backoff mode.\n')
            sys.exit(1)
        if generate and backoff not in _GENARATION_BACKOFFS:
            sys.stderr.write('Error: invalid backoff mode.\n')
            sys.exit(1)

        # Open files (or just use stdin and stdout)
        fin, fout = _open_files(arguments['FILE'], arguments['--output'])

        # Determine required DB flags
        if analyze:
            dbflags = 'a'
        elif generate and backoff == 'NONE':
            dbflags = 'g'
        else:
            dbflags = 'r'

        # Load DB
        try:
            dbname = arguments.get('--db', _DEFAULT_DB)
            if dbname in _BUILTIN_DBS:
                db = MorphologyDB.builtin_db(dbname, dbflags)
            else:
                db = MorphologyDB(dbname, dbflags)
        except DatabaseError:
            sys.stderr.write('Error: Couldn\'t parse database.\n')
            sys.exit(1)
        except IOError:
            sys.stderr.write('Error: Database file could not be read.\n')
            sys.exit(1)

        # Continue execution in requested mode
        if analyze:
            try:
                _analyze(db, fin, fout, backoff, cache)
            except AnalyzerError as error:
                sys.stderr.write('Error: {}\n'.format(error.msg))
                sys.exit(1)
            except IOError:
                sys.stderr.write('Error: An IO error occurred.\n')
                sys.exit(1)

        elif generate:
            try:
                _generate(db, fin, fout, backoff)
            except IOError:
                sys.stderr.write('Error: An IO error occurred.\n')
                sys.exit(1)

        elif reinflect:
            try:
                _reinflect(db, fin, fout)
            except IOError:
                sys.stderr.write('Error: An IO error occurred.\n')
                sys.exit(1)

        sys.exit(0)

    except KeyboardInterrupt:
        sys.stderr.write('Exiting...\n')
        sys.exit(1)
    except Exception:
        sys.stderr.write('Error: An unknown error occurred.\n')
        sys.exit(1)