def pretrained(model_name=None, analyzer=None, top=1, cache_size=100000): """Load a pre-trained MLE disambiguator provided with CAMeL Tools. Args: model_name (:obj:`str`, optional): The name of the pretrained model. If none, the default model ('calima-msa-r13') is loaded. At the moment, the model names available are the same as those in :ref:`camel_morphology_dbs`. Defaults to None. analyzer (:obj:`Analyzer`, optional): Alternative analyzer to use. If None, an instance of the model's default analyzer is created. Defaults to None. top (:obj:`int`, optional): The maximum number of top analyses to return. Defaults to 1. cache_size (:obj:`int`, optional): The number of unique word disambiguations to cache. The cache uses a least-frequently-used eviction policy. Defaults to 100000. Returns: :obj:`MLEDisambiguator`: The loaded MLE disambiguator. """ model_info = DataCatalogue.get_dataset_info('DisambigMLE', model_name) mle_path = model_info.path / 'model.json' if analyzer is None: analyzer = _MLE_ANALYZER_MAP[model_info.name]() return MLEDisambiguator(analyzer, str(mle_path), top, cache_size)
def pretrained(model_name=None): """Load a pre-trained model provided with camel_tools. Args: model_name (:obj:`str`, optional): Name of pre-trained model to load. One model is available: 'arabert'. If None, the default model ('arabert') will be loaded. Defaults to None. Returns: :obj:`NERecognizer`: Instance with loaded pre-trained model. """ model_info = DataCatalogue.get_dataset_info('NamedEntityRecognition', model_name) model_path = str(model_info.path) return NERecognizer(model_path)
def pretrained(model_name=None): """Load a pre-trained model provided with camel_tools. Args: model_name (:obj:`str`, optional): Name of pre-trained model to load. Two models are available: 'arabert' and 'mbert'. If None, the default model ('arabert') will be loaded. Defaults to None. Returns: :obj:`SentimentAnalyzer`: Instance with loaded pre-trained model. """ model_info = DataCatalogue.get_dataset_info('SentimentAnalysis', model_name) model_path = str(model_info.path) return SentimentAnalyzer(model_path)
def builtin_db(db_name='calima-msa-r13', flags='a'): """Create a :obj:`MorphologyDB` instance from one of the builtin databases provided. Args: db_name (:obj:`str`, optional): Name of builtin database. You can use :meth:`list_builtin_dbs` to get a list of builtin databases or see :ref:`camel_morphology_dbs`. Defaults to 'calima-msa-r13'. flags (:obj:`str`, optional): Flag string to be passed to :obj:`MorphologyDB` constructor. Defaults to 'a'. Returns: :obj:`MorphologyDB`: Instance of builtin database with given flags. """ db_info = DataCatalogue.get_dataset_info('MorphologyDB', db_name) return MorphologyDB(str(Path(db_info.path, 'morphology.db')), flags)
'JED': 'Gulf', 'JER': 'Levant', 'KHA': 'Nile Basin', 'MOS': 'Gulf', 'MSA': 'Modern Standard Arabic', 'MUS': 'Gulf', 'RAB': 'Maghreb', 'RIY': 'Gulf', 'SAL': 'Levant', 'SAN': 'Gulf of Aden', 'SFX': 'Maghreb', 'TRI': 'Maghreb', 'TUN': 'Maghreb' } _DATA_DIR = DataCatalogue.get_dataset_info('DialectID').path _CHAR_LM_DIR = Path(_DATA_DIR, 'lm', 'char') _WORD_LM_DIR = Path(_DATA_DIR, 'lm', 'word') _TRAIN_DATA_PATH = Path(_DATA_DIR, 'corpus_26_train.tsv') _TRAIN_DATA_EXTRA_PATH = Path(_DATA_DIR, 'corpus_6_train.tsv') _DEV_DATA_PATH = Path(_DATA_DIR, 'corpus_26_dev.tsv') _TEST_DATA_PATH = Path(_DATA_DIR, 'corpus_26_test.tsv') class DIDPred(collections.namedtuple('DIDPred', ['top', 'scores'])): """A named tuple containing dialect ID prediction results. Attributes: top (:obj:`str`): The dialect label with the highest score. See :ref:`dialectid_labels` for a list of output labels. scores (:obj:`dict`): A dictionary mapping each dialect label to it's