def __init__(self): ''' Reads the data from training and validation files generated with @{data_generation_call.generate_data}. ''' # loadind valid data self.data = Data() valid_prefix = arguments.data_path + 'valid' self.data.valid_mask= np.load(valid_prefix + '.mask') self.data.valid_mask= self.data.valid_mask:repeatTensor(1,2) self.data.valid_targets = np.load(valid_prefix + '.targets') self.data.valid_inputs = np.load(valid_prefix + '.inputs') self.valid_data_count = self.data.valid_inputs.shape[0] assert(self.valid_data_count >= arguments.train_batch_size, 'Validation data count has to be greater than a train batch size!') self.valid_batch_count = self.valid_data_count / arguments.train_batch_size # loading train data train_prefix = arguments.data_path + 'train' self.data.train_mask = np.load(train_prefix + '.mask') self.data.train_mask = self.data.train_mask:repeatTensor(1,2) self.data.train_inputs = np.load(train_prefix + '.inputs') self.data.train_targets = np.load(train_prefix + '.targets') self.train_data_count = self.data.train_inputs.shape[0] assert(self.train_data_count >= arguments.train_batch_size, 'Training data count has to be greater than a train batch size!') self.train_batch_count = self.train_data_count / arguments.train_batch_size
from collections import defaultdict import numpy as np import torch from models import ConEx, ConExWithNorm from helper_classes import Data kg_path = 'KGs/WN18RR' data_dir = "%s/" % kg_path model_path = 'PretrainedModels/WN18RR/conex_WN18RR.pt' d = Data(data_dir=data_dir, reverse=False) class Reproduce: def __init__(self): self.cuda = False self.batch_size = 128 def get_data_idxs(self, data): data_idxs = [ (self.entity_idxs[data[i][0]], self.relation_idxs[data[i][1]], self.entity_idxs[data[i][2]]) for i in range(len(data)) ] return data_idxs def get_er_vocab(self, data): er_vocab = defaultdict(list) for triple in data: er_vocab[(triple[0], triple[1])].append(triple[2]) return er_vocab
from collections import defaultdict import numpy as np import torch from models import ConEx, ConExWithNorm from helper_classes import Data kg_path = 'KGs/UMLS' data_dir = "%s/" % kg_path model_path = 'PretrainedModels/UMLS/conex_umls.pt' d = Data(data_dir=data_dir, reverse=True) class Reproduce: def __init__(self): self.cuda = False self.batch_size = 128 def get_data_idxs(self, data): data_idxs = [ (self.entity_idxs[data[i][0]], self.relation_idxs[data[i][1]], self.entity_idxs[data[i][2]]) for i in range(len(data)) ] return data_idxs def get_er_vocab(self, data): er_vocab = defaultdict(list) for triple in data: er_vocab[(triple[0], triple[1])].append(triple[2]) return er_vocab