def get_data(): load_dev_vectors() datasets = np.load('../mehdi/datasets.npy', allow_pickle=True)[None][0] #dev_vectors = np.load('../mehdi/dev_elmo_vectors.npy', allow_pickle=True) label2i = get_label2i() i2label = {v: k for k, v in label2i.items()} # generate keys, values, _ = datasets['dev'] article_ids = sorted({aid for aid, _ in keys})
from tensorflow.keras.models import Sequential, Model from tensorflow.keras.layers import Embedding, LSTM, Dropout, TimeDistributed, Activation, Dense, Masking, Input, GlobalMaxPooling1D, Bidirectional from tensorflow.keras.initializers import Constant import gc import os os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"]='3' ELMO_DIR = "../mehdi/elmo1024/" seed_val = 3 npr.seed(seed_val) random.seed(seed_val) label2i = get_label2i() #bpemb_model = load_bpemb_model() def format_labels(labels): return [ np.array([to_categorical(label2i[l], num_classes=len(label2i)) for l in x]).sum(0) if len(x) != 0 else to_categorical(0, num_classes=len(label2i)) for x in labels ] def pad_batch(batch, max_len): for i in range(len(batch)): if max_len - len(batch[i]) > 0: batch[i] = np.concatenate([ np.array(batch[i]), np.array([to_categorical(0, num_classes=len(label2i)) for _ in range(max_len - len(batch[i]))]) ], 0)