def list_datasets(name=None): """Get valid datasets and registered parameters. Parameters ---------- name : str or None, default None Return names and registered parameters of registered datasets. If name is specified, only registered parameters of the respective dataset are returned. Returns ------- dict: A dict of all the valid keyword parameters names for the specified dataset. If name is set to None, returns a dict mapping each valid name to its respective keyword parameter dict. The valid names can be plugged in `gluonnlp.model.word_evaluation_model.create(name)`. """ reg = registry.get_registry(Dataset) if name is not None: class_ = reg[name.lower()] return _REGSITRY_NAME_KWARGS[class_] else: return { dataset_name: _REGSITRY_NAME_KWARGS[class_] for dataset_name, class_ in registry.get_registry(Dataset).items() }
def list_datasets(name=None): """Get valid datasets and registered parameters. Parameters ---------- name : str or None, default None Return names and registered parameters of registered datasets. If name is specified, only registered parameters of the respective dataset are returned. Returns ------- dict: A dict of all the valid keyword parameters names for the specified dataset. If name is set to None, returns a dict mapping each valid name to its respective keyword parameter dict. The valid names can be plugged in `gluonnlp.model.word_evaluation_model.create(name)`. """ reg = registry.get_registry(Dataset) if name is not None: class_ = reg[name.lower()] return _REGSITRY_NAME_KWARGS[class_] else: return { dataset_name: _REGSITRY_NAME_KWARGS[class_] for dataset_name, class_ in registry.get_registry(Dataset).items() }
def list_evaluation_functions(kind=None): """Get valid word embedding functions names. Parameters ---------- kind : ['similarity', 'analogy', None] Return only valid names for similarity, analogy or both kinds of functions. Returns ------- dict or list: A list of all the valid evaluation function names for the specified kind. If kind is set to None, returns a dict mapping each valid name to its respective output list. The valid names can be plugged in `gluonnlp.model.word_evaluation_model.create(name)`. """ if kind is None: kind = tuple(_REGSITRY_KIND_CLASS_MAP.keys()) if not isinstance(kind, tuple): if kind not in _REGSITRY_KIND_CLASS_MAP.keys(): raise KeyError( 'Cannot find `kind` {}. Use ' '`list_evaluation_functions(kind=None).keys()` to get all the' 'valid kinds of evaluation functions.'.format(kind)) reg = registry.get_registry(_REGSITRY_KIND_CLASS_MAP[kind]) return list(reg.keys()) else: return {name: list_evaluation_functions(kind=name) for name in kind}
def list_evaluation_functions(kind=None): """Get valid word embedding functions names. Parameters ---------- kind : ['similarity', 'analogy', None] Return only valid names for similarity, analogy or both kinds of functions. Returns ------- dict or list: A list of all the valid evaluation function names for the specified kind. If kind is set to None, returns a dict mapping each valid name to its respective output list. The valid names can be plugged in `gluonnlp.model.word_evaluation_model.create(name)`. """ if kind is None: kind = tuple(_REGSITRY_KIND_CLASS_MAP.keys()) if not isinstance(kind, tuple): if kind not in _REGSITRY_KIND_CLASS_MAP.keys(): raise KeyError( 'Cannot find `kind` {}. Use ' '`list_evaluation_functions(kind=None).keys()` to get all the' 'valid kinds of evaluation functions.'.format(kind)) reg = registry.get_registry(_REGSITRY_KIND_CLASS_MAP[kind]) return list(reg.keys()) else: return {name: list_evaluation_functions(kind=name) for name in kind}
def list_sources(embedding_name=None): """Get valid token embedding names and their pre-trained file names. To load token embedding vectors from an externally hosted pre-trained token embedding file, such as those of GloVe and FastText, one should use `gluonnlp.embedding.create(embedding_name, source)`. This method returns all the valid names of `source` for the specified `embedding_name`. If `embedding_name` is set to None, this method returns all the valid names of `embedding_name` with their associated `source`. Parameters ---------- embedding_name : str or None, default None The pre-trained token embedding name. Returns ------- dict or list: A list of all the valid pre-trained token embedding file names (`source`) for the specified token embedding name (`embedding_name`). If the text embedding name is set to None, returns a dict mapping each valid token embedding name to a list of valid pre-trained files (`source`). They can be plugged into `gluonnlp.embedding.create(embedding_name, source)`. """ text_embedding_reg = registry.get_registry(TokenEmbedding) if embedding_name is not None: embedding_name = embedding_name.lower() if embedding_name not in text_embedding_reg: raise KeyError( 'Cannot find `embedding_name` {}. Use ' '`list_sources(embedding_name=None).keys()` to get all the valid' 'embedding names.'.format(embedding_name)) return list(text_embedding_reg[embedding_name].source_file_hash.keys()) else: return { embedding_name: list(embedding_cls.source_file_hash.keys()) for embedding_name, embedding_cls in registry.get_registry( TokenEmbedding).items() }
def list_sources(embedding_name=None): """Get valid token embedding names and their pre-trained file names. To load token embedding vectors from an externally hosted pre-trained token embedding file, such as those of GloVe and FastText, one should use `gluonnlp.embedding.create(embedding_name, source)`. This method returns all the valid names of `source` for the specified `embedding_name`. If `embedding_name` is set to None, this method returns all the valid names of `embedding_name` with their associated `source`. Parameters ---------- embedding_name : str or None, default None The pre-trained token embedding name. Returns ------- dict or list: A list of all the valid pre-trained token embedding file names (`source`) for the specified token embedding name (`embedding_name`). If the text embeding name is set to None, returns a dict mapping each valid token embedding name to a list of valid pre-trained files (`source`). They can be plugged into `gluonnlp.embedding.create(embedding_name, source)`. """ text_embedding_reg = registry.get_registry(TokenEmbedding) if embedding_name is not None: embedding_name = embedding_name.lower() if embedding_name not in text_embedding_reg: raise KeyError('Cannot find `embedding_name` {}. Use ' '`list_sources(embedding_name=None).keys()` to get all the valid' 'embedding names.'.format(embedding_name)) return list(text_embedding_reg[embedding_name].source_file_hash.keys()) else: return {embedding_name: list(embedding_cls.source_file_hash.keys()) for embedding_name, embedding_cls in registry.get_registry(TokenEmbedding).items()}
def list_subword_functions(): """Get valid subword function names.""" reg = registry.get_registry(SubwordFunction) return list(reg.keys())