Beispiel #1
0
def glove_linear_cnn(
        glove_path: Union[Path, TextIO],
        glove_dim: int,
        num_classes: int,
        vocab: Vocabulary,
        filters: List[int],
        out_channels: int,
        freeze: bool = True,
        saved_glove_file: Optional[Path] = None,
        dropout: float = 0,
        ) -> LinearCnn:
    """Return LinearCnn with embedding layer initialised with GloVe."""
    glove = load_glove(
        input_file=glove_path,
        glove_dim=glove_dim,
        vocab=vocab,
        saved_glove_file=saved_glove_file,
    )

    embedding = nn.Embedding.from_pretrained(glove, freeze=freeze)

    return LinearCnn(
        embedding=embedding,
        num_classes=num_classes,
        filters=filters,
        out_channels=out_channels,
        dropout=dropout,
    )
Beispiel #2
0
def test_synthetic_glove() -> None:
    """Test if GloVe loader works with synthetic data."""
    vocab = data.Vocabulary(["test", "new"])
    input_file = StringIO(glove_str)
    tensor = glove.load_glove(input_file, vocab, 3)

    assert tensor.shape == (vocab.vocab_size(), 3)
Beispiel #3
0
def glove_bc_lstm(
        glove_path: Union[Path, TextIO],
        glove_dim: int,
        num_classes: int,
        vocab: Vocabulary,
        filters: List[int],
        out_channels: int,
        freeze: bool = True,
        saved_glove_file: Optional[Path] = None,
        rnn_hidden_size: int = 100,
        rnn_num_layers: int = 1,
        bidirectional: bool = False,
        rnn_dropout: float = 0,
        ) -> BcLstm:
    """
    Return bcLSTM with embedding layer initialised with GloVe.

    Both RNNs used (utterance and dialogue) are LSTMs.
    """
    glove = load_glove(
        input_file=glove_path,
        glove_dim=glove_dim,
        vocab=vocab,
        saved_glove_file=saved_glove_file,
    )

    embedding = nn.Embedding.from_pretrained(glove, freeze=freeze)
    utterance_lstm = nn.LSTM(
        input_size=out_channels * len(filters),
        hidden_size=rnn_hidden_size,
        num_layers=rnn_num_layers,
        bidirectional=bidirectional,
        dropout=rnn_dropout,
        batch_first=True,
    )

    dialogue_input = (1 + bidirectional) * utterance_lstm.hidden_size
    dialogue_lstm = nn.LSTM(
        input_size=dialogue_input,
        hidden_size=rnn_hidden_size,
        num_layers=rnn_num_layers,
        bidirectional=False,  # this doesn't make sense to be bidirectional [1]
        dropout=rnn_dropout,
        batch_first=True,
    )

    return BcLstm(
        embedding=embedding,
        num_classes=num_classes,
        dialogue_rnn=dialogue_lstm,
        utterance_rnn=utterance_lstm,
        filters=filters,
        out_channels=out_channels,
    )
Beispiel #4
0
def test_real_glove() -> None:
    """Test if GloVe loader works with real data."""
    glove_dim = 50

    data_path = Path('./data/dev/metadata.csv')
    dataset = MeldLinearTextDataset(data_path)

    input_file = Path(f'./data/glove/glove.6B.{glove_dim}d.txt')
    tensor = glove.load_glove(input_file, dataset.vocab, glove_dim)

    assert tensor.shape == (dataset.vocab.vocab_size(), glove_dim)
def glove_contextual_lstm(
        glove_path: Union[Path, TextIO],
        glove_dim: int,
        num_classes: int,
        vocab: Vocabulary,
        freeze: bool = True,
        saved_glove_file: Optional[Path] = None,
        rnn_hidden_size: int = 100,
        rnn_num_layers: int = 1,
        bidirectional: bool = False,
        rnn_dropout: float = 0,
        ) -> ContextualRnn:
    """
    Return ContextualRnn with embedding layer initialised with GloVe.

    Both RNNs used (utterance and dialogue) are LSTMs.
    """
    glove = load_glove(
        input_file=glove_path,
        glove_dim=glove_dim,
        vocab=vocab,
        saved_glove_file=saved_glove_file,
    )

    embedding = nn.Embedding.from_pretrained(glove, freeze=freeze)
    utterance_lstm = nn.LSTM(
        input_size=embedding.embedding_dim,
        hidden_size=rnn_hidden_size,
        num_layers=rnn_num_layers,
        bidirectional=bidirectional,
        dropout=rnn_dropout,
        batch_first=True,
    )
    dialogue_lstm = nn.LSTM(
        input_size=utterance_lstm.hidden_size,
        hidden_size=rnn_hidden_size,
        num_layers=rnn_num_layers,
        bidirectional=False,  # this doesn't make sense to be bidirectional [1]
        dropout=rnn_dropout,
        batch_first=True,
    )

    return ContextualRnn(
        embedding=embedding,
        num_classes=num_classes,
        dialogue_rnn=dialogue_lstm,
        utterance_rnn=utterance_lstm,
    )
def glove_linear_cnn_lstm(
    glove_path: Union[Path, TextIO],
    glove_dim: int,
    num_classes: int,
    vocab: Vocabulary,
    filters: List[int],
    out_channels: int,
    rnn_hidden_size: int = 100,
    rnn_num_layers: int = 1,
    bidirectional: bool = False,
    rnn_dropout: float = 0,
    freeze: bool = True,
    saved_glove_file: Optional[Path] = None,
) -> LinearCnnRnn:
    """Return LinearCnnRnn with embedding layer initialised with GloVe."""
    glove = load_glove(
        input_file=glove_path,
        glove_dim=glove_dim,
        vocab=vocab,
        saved_glove_file=saved_glove_file,
    )

    embedding = nn.Embedding.from_pretrained(glove, freeze=freeze)

    lstm = nn.LSTM(
        input_size=out_channels * len(filters),
        hidden_size=rnn_hidden_size,
        num_layers=rnn_num_layers,
        bidirectional=bidirectional,
        dropout=rnn_dropout,
        batch_first=True,
    )

    return LinearCnnRnn(
        embedding=embedding,
        num_classes=num_classes,
        rnn=lstm,
        filters=filters,
        out_channels=out_channels,
    )
Beispiel #7
0
def glove_simple(
        glove_path: Union[Path, TextIO],
        glove_dim: int,
        num_classes: int,
        vocab: Vocabulary,
        freeze: bool = True,
        saved_glove_file: Optional[Path] = None,
        ) -> Simple:
    """Return Simple with embedding layer initialised with GloVe."""
    glove = load_glove(
        input_file=glove_path,
        glove_dim=glove_dim,
        vocab=vocab,
        saved_glove_file=saved_glove_file,
    )

    embedding = nn.Embedding.from_pretrained(glove, freeze=freeze)

    return Simple(
        embedding=embedding,
        num_classes=num_classes,
    )
Beispiel #8
0
def glove_linear_lstm(
    glove_path: Union[Path, TextIO],
    glove_dim: int,
    num_classes: int,
    vocab: Vocabulary,
    freeze: bool = True,
    saved_glove_file: Optional[Path] = None,
    rnn_hidden_size: int = 100,
    rnn_num_layers: int = 1,
    bidirectional: bool = False,
    rnn_dropout: float = 0,
) -> LinearRnn:
    """
    Return RnnClassifier with embedding layer initialised with GloVe.

    RNN used is an LSTM.
    """
    glove = load_glove(
        input_file=glove_path,
        glove_dim=glove_dim,
        vocab=vocab,
        saved_glove_file=saved_glove_file,
    )

    embedding = nn.Embedding.from_pretrained(glove, freeze=freeze)
    lstm = nn.LSTM(
        input_size=embedding.embedding_dim,
        hidden_size=rnn_hidden_size,
        num_layers=rnn_num_layers,
        bidirectional=bidirectional,
        dropout=rnn_dropout,
        batch_first=True,
    )

    return LinearRnn(
        embedding=embedding,
        num_classes=num_classes,
        rnn=lstm,
    )