예제 #1
0
def list_datasets(name=None):
    """Get valid datasets and registered parameters.

    Parameters
    ----------
    name : str or None, default None
        Return names and registered parameters of registered datasets. If name
        is specified, only registered parameters of the respective dataset are
        returned.

    Returns
    -------
    dict:
        A dict of all the valid keyword parameters names for the specified
        dataset. If name is set to None, returns a dict mapping each valid name
        to its respective keyword parameter dict. The valid names can be
        plugged in `gluonnlp.model.word_evaluation_model.create(name)`.

    """
    reg = registry.get_registry(Dataset)

    if name is not None:
        class_ = reg[name.lower()]
        return _REGSITRY_NAME_KWARGS[class_]
    else:
        return {
            dataset_name: _REGSITRY_NAME_KWARGS[class_]
            for dataset_name, class_ in registry.get_registry(Dataset).items()
        }
예제 #2
0
def list_datasets(name=None):
    """Get valid datasets and registered parameters.

    Parameters
    ----------
    name : str or None, default None
        Return names and registered parameters of registered datasets. If name
        is specified, only registered parameters of the respective dataset are
        returned.

    Returns
    -------
    dict:
        A dict of all the valid keyword parameters names for the specified
        dataset. If name is set to None, returns a dict mapping each valid name
        to its respective keyword parameter dict. The valid names can be
        plugged in `gluonnlp.model.word_evaluation_model.create(name)`.

    """
    reg = registry.get_registry(Dataset)

    if name is not None:
        class_ = reg[name.lower()]
        return _REGSITRY_NAME_KWARGS[class_]
    else:
        return {
            dataset_name: _REGSITRY_NAME_KWARGS[class_]
            for dataset_name, class_ in registry.get_registry(Dataset).items()
        }
예제 #3
0
def list_evaluation_functions(kind=None):
    """Get valid word embedding functions names.

    Parameters
    ----------
    kind : ['similarity', 'analogy', None]
        Return only valid names for similarity, analogy or both kinds of functions.

    Returns
    -------
    dict or list:
        A list of all the valid evaluation function names for the specified
        kind. If kind is set to None, returns a dict mapping each valid name to
        its respective output list. The valid names can be plugged in
        `gluonnlp.model.word_evaluation_model.create(name)`.

    """

    if kind is None:
        kind = tuple(_REGSITRY_KIND_CLASS_MAP.keys())

    if not isinstance(kind, tuple):
        if kind not in _REGSITRY_KIND_CLASS_MAP.keys():
            raise KeyError(
                'Cannot find `kind` {}. Use '
                '`list_evaluation_functions(kind=None).keys()` to get all the'
                'valid kinds of evaluation functions.'.format(kind))

        reg = registry.get_registry(_REGSITRY_KIND_CLASS_MAP[kind])
        return list(reg.keys())
    else:
        return {name: list_evaluation_functions(kind=name) for name in kind}
예제 #4
0
def list_evaluation_functions(kind=None):
    """Get valid word embedding functions names.

    Parameters
    ----------
    kind : ['similarity', 'analogy', None]
        Return only valid names for similarity, analogy or both kinds of functions.

    Returns
    -------
    dict or list:
        A list of all the valid evaluation function names for the specified
        kind. If kind is set to None, returns a dict mapping each valid name to
        its respective output list. The valid names can be plugged in
        `gluonnlp.model.word_evaluation_model.create(name)`.

    """

    if kind is None:
        kind = tuple(_REGSITRY_KIND_CLASS_MAP.keys())

    if not isinstance(kind, tuple):
        if kind not in _REGSITRY_KIND_CLASS_MAP.keys():
            raise KeyError(
                'Cannot find `kind` {}. Use '
                '`list_evaluation_functions(kind=None).keys()` to get all the'
                'valid kinds of evaluation functions.'.format(kind))

        reg = registry.get_registry(_REGSITRY_KIND_CLASS_MAP[kind])
        return list(reg.keys())
    else:
        return {name: list_evaluation_functions(kind=name) for name in kind}
예제 #5
0
def list_sources(embedding_name=None):
    """Get valid token embedding names and their pre-trained file names.


    To load token embedding vectors from an externally hosted pre-trained token embedding file,
    such as those of GloVe and FastText, one should use
    `gluonnlp.embedding.create(embedding_name, source)`. This method returns all the
    valid names of `source` for the specified `embedding_name`. If `embedding_name` is set to
    None, this method returns all the valid names of `embedding_name` with their associated
    `source`.


    Parameters
    ----------
    embedding_name : str or None, default None
        The pre-trained token embedding name.


    Returns
    -------
    dict or list:
        A list of all the valid pre-trained token embedding file names (`source`) for the
        specified token embedding name (`embedding_name`). If the text embedding name is set to
        None, returns a dict mapping each valid token embedding name to a list of valid pre-trained
        files (`source`). They can be plugged into
        `gluonnlp.embedding.create(embedding_name, source)`.
    """

    text_embedding_reg = registry.get_registry(TokenEmbedding)

    if embedding_name is not None:
        embedding_name = embedding_name.lower()
        if embedding_name not in text_embedding_reg:
            raise KeyError(
                'Cannot find `embedding_name` {}. Use '
                '`list_sources(embedding_name=None).keys()` to get all the valid'
                'embedding names.'.format(embedding_name))
        return list(text_embedding_reg[embedding_name].source_file_hash.keys())
    else:
        return {
            embedding_name: list(embedding_cls.source_file_hash.keys())
            for embedding_name, embedding_cls in registry.get_registry(
                TokenEmbedding).items()
        }
예제 #6
0
def list_sources(embedding_name=None):
    """Get valid token embedding names and their pre-trained file names.


    To load token embedding vectors from an externally hosted pre-trained token embedding file,
    such as those of GloVe and FastText, one should use
    `gluonnlp.embedding.create(embedding_name, source)`. This method returns all the
    valid names of `source` for the specified `embedding_name`. If `embedding_name` is set to
    None, this method returns all the valid names of `embedding_name` with their associated
    `source`.


    Parameters
    ----------
    embedding_name : str or None, default None
        The pre-trained token embedding name.


    Returns
    -------
    dict or list:
        A list of all the valid pre-trained token embedding file names (`source`) for the
        specified token embedding name (`embedding_name`). If the text embeding name is set to None,
        returns a dict mapping each valid token embedding name to a list of valid pre-trained files
        (`source`). They can be plugged into
        `gluonnlp.embedding.create(embedding_name, source)`.
    """

    text_embedding_reg = registry.get_registry(TokenEmbedding)

    if embedding_name is not None:
        embedding_name = embedding_name.lower()
        if embedding_name not in text_embedding_reg:
            raise KeyError('Cannot find `embedding_name` {}. Use '
                           '`list_sources(embedding_name=None).keys()` to get all the valid'
                           'embedding names.'.format(embedding_name))
        return list(text_embedding_reg[embedding_name].source_file_hash.keys())
    else:
        return {embedding_name: list(embedding_cls.source_file_hash.keys())
                for embedding_name, embedding_cls in registry.get_registry(TokenEmbedding).items()}
예제 #7
0
def list_subword_functions():
    """Get valid subword function names."""
    reg = registry.get_registry(SubwordFunction)
    return list(reg.keys())