parser.add_argument('--decay1', default=0.9, type=float, help="decay rate for the first moment estimate in Adam") parser.add_argument('--decay2', default=0.999, type=float, help="decay rate for second moment estimate in Adam") args = parser.parse_args() dataset = Dataset(args.dataset) examples = torch.from_numpy(dataset.get_train().astype('int64')) print(dataset.get_shape()) model = { 'CP': lambda: CP(dataset.get_shape(), args.rank, args.init), 'ComplEx': lambda: ComplEx(dataset.get_shape(), args.rank, args.init), }[args.model]() regularizer = { 'F2': F2(args.reg), 'N3': N3(args.reg), }[args.regularizer] device = 'cuda' model.to(device) optim_method = { 'Adagrad': lambda: optim.Adagrad(model.parameters(), lr=args.learning_rate), 'Adam':
# Only keywords dataset_kw = dataset.copy() dataset_kw['text'] = dataset[['keywords']].apply(lambda x: ''.join(x), axis=1).to_list() input_x = dataset[['claim', 'text']] input_x_all = dataset_text_all[['claim', 'text']] input_x_kw = dataset_kw[['claim', 'text']] input_y = dataset[class_list].copy().values data_path = args[2] model_path = args[3] # CKGE Graph embeddings ckge_dataset = Dataset(os.path.join(data_path, "CKGE"), use_cpu=True) ckge_model = CP(ckge_dataset.get_shape(), 50) ckge_model.load_state_dict( torch.load(os.path.join(model_path, "CKGE.pickle"), map_location=torch.device('cpu'))) ckge_graph_vectorizer = GraphEmbeddingTransformer(ckge_dataset, ckge_model) # Distil RoBERTa (DR) flair_vectorizer_DR = FlairTransformer([ TransformerWordEmbeddings(model="distilroberta-base", use_scalar_mix=True) ], batch_size=1) # GPT2 flair_vectorizer_GPT2 = FlairTransformer([ TransformerWordEmbeddings(model="gpt2-large",
) # Setup parser args = parser.parse_args() # Get Dataset dataset = Dataset(args.dataset) if args.model in ['CP', 'ComplEx']: unsorted_examples = torch.from_numpy(dataset.get_train().astype('int64')) examples = unsorted_examples else: sorted_data, slice_dic = dataset.get_sorted_train() examples = torch.from_numpy(dataset.get_train().astype('int64')) model = { 'CP': lambda: CP(dataset.get_shape(), args.rank, args.init), 'ComplEx': lambda: ComplEx(dataset.get_shape(), args.rank, args.init), 'ContExt': lambda: ContExt(dataset.get_shape(), args.rank, sorted_data, slice_dic, max_NB=args.max_NB, init_size=args.init, data_name=args.dataset, ascending=args.ascending, dropout_1=args.dropout_1, dropout_g=args.dropout_g, evaluation_mode=args.evaluation_mode), }[args.model]() regularizer = { 'N0': 'N0', 'N2': N2(args.reg), 'N3': N3(args.reg), 'N4': N4(args.reg, g_weight=args.g_weight) }[args.regularizer] #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
import sys import torch from kbc.datasets import Dataset from kbc import avg_both from kbc.models import CP args = sys.argv[1:] dataset = Dataset(args[0], use_cpu=True) model = CP(dataset.get_shape(), 50) model.load_state_dict(torch.load(args[1], map_location=torch.device('cpu'))) print(avg_both(*dataset.eval(model, "test", 50000, batch_size=100)))
def kbc_model_load(model_path): """ This function loads the KBC model given the model. It uses the common identifiers in the name to identify the metadata/model files and load from there. @params: model_path - full or relative path to the model_path @returns: model : Class(KBCOptimizer) epoch : The epoch trained until (int) loss : The last loss stored in the model """ identifiers = model_path.split('/')[-1] identifiers = identifiers.split('-') dataset_name, timestamp = identifiers[0].strip( ), identifiers[-1][:-3].strip() if "YAGO" in dataset_name: dataset_name = "YAGO3-10" if 'FB15k' and '237' in identifiers: dataset_name = 'FB15k-237' model_dir = os.path.dirname(model_path) with open( os.path.join(model_dir, f'{dataset_name}-metadata-{timestamp}.json'), 'r') as json_file: metadata = json.load(json_file) map_location = None if not torch.cuda.is_available(): map_location = torch.device('cpu') checkpoint = torch.load(model_path, map_location=map_location) factorizer_name = checkpoint['factorizer_name'] models = ['CP', 'ComplEx', 'DistMult'] if 'cp' in factorizer_name.lower(): model = CP(metadata['data_shape'], metadata['rank'], metadata['init']) elif 'complex' in factorizer_name.lower(): model = ComplEx(metadata['data_shape'], metadata['rank'], metadata['init']) elif 'distmult' in factorizer_name.lower(): model = DistMult(metadata['data_shape'], metadata['rank'], metadata['init']) else: raise ValueError(f'Model {factorizer_name} not in {models}') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model.to(device) regularizer = checkpoint['regularizer'] optim_method = checkpoint['optim_method'] batch_size = checkpoint['batch_size'] KBC_optimizer = KBCOptimizer(model, regularizer, optim_method, batch_size) KBC_optimizer.model.load_state_dict(checkpoint['model_state_dict']) KBC_optimizer.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) epoch = checkpoint['epoch'] loss = checkpoint['loss'] print(KBC_optimizer.model.eval()) return KBC_optimizer, epoch, loss
dataset = Dataset(args.dataset) if args.model in ['CP', 'ComplEx', 'ConvE']: # For non-context model unsorted_examples = torch.from_numpy(dataset.get_train().astype('int64')) examples = unsorted_examples else: # Get sorted examples for context model sorted_data, slice_dic = dataset.get_sorted_train() examples = torch.from_numpy(dataset.get_train().astype('int64')) rank, init = [int(config['rank']), float(config['init'])] print(dataset.get_shape()) model = { 'CP': lambda: CP(dataset.get_shape(), rank, init), 'ComplEx': lambda: ComplEx(dataset.get_shape(), rank, init), 'ConvE': lambda: ConvE(dataset.get_shape(), rank, config['dropouts'], config[ 'use_bias'], config['hw'], config['kernel_size'], config[ 'output_channel']), 'Context_CP': lambda: Context_CP(dataset.get_shape(), rank, sorted_data, slice_dic, max_NB=config['max_NB'], init_size=config['init'], data_name=config['dataset']), 'Context_ComplEx':