def get_data():
    load_dev_vectors()
    datasets = np.load('../mehdi/datasets.npy', allow_pickle=True)[None][0]
    #dev_vectors = np.load('../mehdi/dev_elmo_vectors.npy', allow_pickle=True)

    label2i = get_label2i()
    i2label = {v: k for k, v in label2i.items()}

    # generate
    keys, values, _ = datasets['dev']
    article_ids = sorted({aid for aid, _ in keys})
Beispiel #2
0
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Embedding, LSTM, Dropout, TimeDistributed, Activation, Dense, Masking, Input, GlobalMaxPooling1D, Bidirectional
from tensorflow.keras.initializers import Constant

import gc
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]='3'

ELMO_DIR = "../mehdi/elmo1024/"

seed_val = 3
npr.seed(seed_val)
random.seed(seed_val)

label2i = get_label2i()
#bpemb_model = load_bpemb_model()

def format_labels(labels):
    return [
        np.array([to_categorical(label2i[l], num_classes=len(label2i)) for l in x]).sum(0) if len(x) != 0 else to_categorical(0, num_classes=len(label2i))
        for x in labels
        ]

def pad_batch(batch, max_len):
    for i in range(len(batch)):
        if max_len - len(batch[i]) > 0:
            batch[i] = np.concatenate([
                np.array(batch[i]), 
                np.array([to_categorical(0, num_classes=len(label2i)) for _ in range(max_len - len(batch[i]))])
                ], 0)