Ejemplo n.º 1
0
def main():
    # Load vocab.
    vocab = Vocab.from_json(args.vocab_path)
    label_map = {0: 'negative', 1: 'positive'}

    # Constructs the newtork.
    network = args.network.lower()
    vocab_size = len(vocab)
    num_classes = len(label_map)
    pad_token_id = vocab.to_indices('[PAD]')
    if network == 'bow':
        model = BoWModel(vocab_size, num_classes, padding_idx=pad_token_id)
    elif network == 'bigru':
        model = GRUModel(vocab_size,
                         num_classes,
                         direction='bidirect',
                         padding_idx=pad_token_id)
    elif network == 'bilstm':
        model = LSTMModel(vocab_size,
                          num_classes,
                          direction='bidirect',
                          padding_idx=pad_token_id)
    elif network == 'bilstm_attn':
        lstm_hidden_size = 196
        attention = SelfInteractiveAttention(hidden_size=2 * lstm_hidden_size)
        model = BiLSTMAttentionModel(attention_layer=attention,
                                     vocab_size=vocab_size,
                                     lstm_hidden_size=lstm_hidden_size,
                                     num_classes=num_classes,
                                     padding_idx=pad_token_id)
    elif network == 'birnn':
        model = RNNModel(vocab_size,
                         num_classes,
                         direction='bidirect',
                         padding_idx=pad_token_id)
    elif network == 'cnn':
        model = CNNModel(vocab_size, num_classes, padding_idx=pad_token_id)
    elif network == 'gru':
        model = GRUModel(vocab_size,
                         num_classes,
                         direction='forward',
                         padding_idx=pad_token_id,
                         pooling_type='max')
    elif network == 'lstm':
        model = LSTMModel(vocab_size,
                          num_classes,
                          direction='forward',
                          padding_idx=pad_token_id,
                          pooling_type='max')
    elif network == 'rnn':
        model = RNNModel(vocab_size,
                         num_classes,
                         direction='forward',
                         padding_idx=pad_token_id,
                         pooling_type='max')
    else:
        raise ValueError(
            "Unknown network: %s, it must be one of bow, lstm, bilstm, cnn, gru, bigru, rnn, birnn and bilstm_attn."
            % network)

    # Load model parameters.
    state_dict = paddle.load(args.params_path)
    model.set_dict(state_dict)
    model.eval()

    inputs = [paddle.static.InputSpec(shape=[None, None], dtype="int64")]
    # Convert to static graph with specific input description
    if args.network in [
            "lstm", "bilstm", "gru", "bigru", "rnn", "birnn", "bilstm_attn"
    ]:
        inputs.append(paddle.static.InputSpec(shape=[None],
                                              dtype="int64"))  # seq_len

    model = paddle.jit.to_static(model, input_spec=inputs)
    # Save in static graph model.
    paddle.jit.save(model, args.output_path)
Ejemplo n.º 2
0
 elif network == 'bigru':
     model = GRUModel(vocab_size,
                      num_classes,
                      direction='bidirect',
                      padding_idx=pad_token_id)
 elif network == 'bilstm':
     model = LSTMModel(vocab_size,
                       num_classes,
                       direction='bidirect',
                       padding_idx=pad_token_id)
 elif network == 'bilstm_attn':
     lstm_hidden_size = 196
     attention = SelfInteractiveAttention(hidden_size=2 * stm_hidden_size)
     model = BiLSTMAttentionModel(attention_layer=attention,
                                  vocab_size=vocab_size,
                                  lstm_hidden_size=lstm_hidden_size,
                                  num_classes=num_classes,
                                  padding_idx=pad_token_id)
 elif network == 'birnn':
     model = RNNModel(vocab_size,
                      num_classes,
                      direction='bidirect',
                      padding_idx=pad_token_id)
 elif network == 'cnn':
     model = CNNModel(vocab_size, num_classes, padding_idx=pad_token_id)
 elif network == 'gru':
     model = GRUModel(vocab_size,
                      num_classes,
                      direction='forward',
                      padding_idx=pad_token_id,
                      pooling_type='max')
Ejemplo n.º 3
0
    vocab = Vocab.load_vocabulary(
        args.vocab_path, unk_token='[UNK]', pad_token='[PAD]')

    tokenizer = CharTokenizer(vocab, args.language, '../../../punctuations')

    # Constructs the newtork.
    vocab_size = len(vocab)
    num_classes = len(train_ds.label_list)
    pad_token_id = 0
    pad_value = vocab.token_to_idx.get('[PAD]', 0)

    lstm_hidden_size = 196
    attention = SelfInteractiveAttention(hidden_size=2 * lstm_hidden_size)
    model = BiLSTMAttentionModel(
        attention_layer=attention,
        vocab_size=vocab_size,
        lstm_hidden_size=lstm_hidden_size,
        num_classes=num_classes,
        padding_idx=pad_token_id)

    model = paddle.Model(model)

    # Reads data and generates mini-batches.
    trans_fn = partial(
        convert_example,
        tokenizer=tokenizer,
        is_test=False,
        language=args.language)

    batchify_fn = lambda samples, fn=Tuple(
        Pad(axis=0, pad_val=pad_value),  # input_ids
        Stack(dtype="int64"),  # seq len