def dataset_to_query(model, dataset_name, dataset_mode): queries = None try: dataset = Dataset(dataset_name) query_ids = dataset.dataset_to_queries( dataset_mode) # dataset_mode = [train,test,valid] query_side = [] for i in range(len(query_ids[:, 0])): query_side.append( (query_ids[:, 0][i].item(), query_ids[:, 1][i].item())) check = [] for i, j in query_side: check.append(dataset.to_skip['rhs'][i, j]) queries = model.get_queries_separated(query_ids) if not ('train' in dataset_mode.lower()): results = dataset.eval(model, dataset_mode, -1) print("\n\n{} : {}".format(dataset_mode, results)) except RuntimeError as e: print( "Cannot convert the dataset to a query list with error: {}".format( str(e))) return None, None return queries, check
parser.add_argument('--init', default=1e-3, type=float, help="Initial scale") parser.add_argument('--learning_rate', default=1e-1, type=float, help="Learning rate") parser.add_argument('--decay1', default=0.9, type=float, help="decay rate for the first moment estimate in Adam") parser.add_argument('--decay2', default=0.999, type=float, help="decay rate for second moment estimate in Adam") args = parser.parse_args() dataset = Dataset(args.dataset) examples = torch.from_numpy(dataset.get_train().astype('int64')) print(dataset.get_shape()) model = { 'CP': lambda: CP(dataset.get_shape(), args.rank, args.init), 'ComplEx': lambda: ComplEx(dataset.get_shape(), args.rank, args.init), }[args.model]() regularizer = { 'F2': F2(args.reg), 'N3': N3(args.reg), }[args.regularizer] device = 'cuda' model.to(device)
axis=1).to_list() # Only keywords dataset_kw = dataset.copy() dataset_kw['text'] = dataset[['keywords']].apply(lambda x: ''.join(x), axis=1).to_list() input_x = dataset[['claim', 'text']] input_x_all = dataset_text_all[['claim', 'text']] input_x_kw = dataset_kw[['claim', 'text']] input_y = dataset[class_list].copy().values data_path = args[2] model_path = args[3] # CKGE Graph embeddings ckge_dataset = Dataset(os.path.join(data_path, "CKGE"), use_cpu=True) ckge_model = CP(ckge_dataset.get_shape(), 50) ckge_model.load_state_dict( torch.load(os.path.join(model_path, "CKGE.pickle"), map_location=torch.device('cpu'))) ckge_graph_vectorizer = GraphEmbeddingTransformer(ckge_dataset, ckge_model) # Distil RoBERTa (DR) flair_vectorizer_DR = FlairTransformer([ TransformerWordEmbeddings(model="distilroberta-base", use_scalar_mix=True) ], batch_size=1) # GPT2 flair_vectorizer_GPT2 = FlairTransformer([
from pathlib import Path import pkg_resources import pickle from kbc.datasets import Dataset import matplotlib.pyplot as plt #%%% # FB237 dataset degree distribution mydata = Dataset('FB237') train, slice_dic = mydata.get_sorted_train() plt.close('all') plt.figure() plt.hist(slice_dic[:, 3], bins=300, range=(0, 300)) plt.xlabel('Number of adjacent neighbors (Degrees) \n FB237 Dataset ') plt.ylabel('Frequency') plt.show() #%%% # FB15K dataset degree distribution mydata = Dataset('WN18RR') train, slice_dic = mydata.get_sorted_train() plt.figure() plt.hist(slice_dic[:, 3], bins=30, range=(0, 30)) plt.xlabel('Number of adjacent neighbors (Degrees) \n WN18RR Dataset')
parser.add_argument( '--n_freeze', default=0, type=int, help='Number of training epochs you wish to freeze the original embedding' ) parser.add_argument( '--evaluation_mode', default=0, type=int, choices=[0, 1], help='Whther to get an attention mask or not' ) # Setup parser args = parser.parse_args() # Get Dataset dataset = Dataset(args.dataset) if args.model in ['CP', 'ComplEx']: unsorted_examples = torch.from_numpy(dataset.get_train().astype('int64')) examples = unsorted_examples else: sorted_data, slice_dic = dataset.get_sorted_train() examples = torch.from_numpy(dataset.get_train().astype('int64')) model = { 'CP': lambda: CP(dataset.get_shape(), args.rank, args.init), 'ComplEx': lambda: ComplEx(dataset.get_shape(), args.rank, args.init), 'ContExt': lambda: ContExt(dataset.get_shape(), args.rank, sorted_data, slice_dic, max_NB=args.max_NB, init_size=args.init, data_name=args.dataset, ascending=args.ascending, dropout_1=args.dropout_1, dropout_g=args.dropout_g, evaluation_mode=args.evaluation_mode), }[args.model]()
import sys import torch from kbc.datasets import Dataset from kbc import avg_both from kbc.models import CP args = sys.argv[1:] dataset = Dataset(args[0], use_cpu=True) model = CP(dataset.get_shape(), 50) model.load_state_dict(torch.load(args[1], map_location=torch.device('cpu'))) print(avg_both(*dataset.eval(model, "test", 50000, batch_size=100)))
type=float, help="decay rate for second moment estimate in Adam") parser.add_argument('--model_save_schedule', default=50, type=int, help="Saving the model every N iterations") parser.add_argument('--eval_only', action='store_true', default=False) parser.add_argument('--checkpoint', type=str) args = parser.parse_args() args.dataset = os.path.basename(args.path) dataset = Dataset(os.path.join(args.path, 'kbc_data')) args.data_shape = dataset.get_shape() if not args.eval_only: model = { 'CP': lambda: CP(dataset.get_shape(), args.rank, args.init), 'ComplEx': lambda: ComplEx(dataset.get_shape(), args.rank, args.init), 'DistMult': lambda: DistMult(dataset.get_shape(), args.rank, args.init) }[args.model]() regularizer = { 'N2': N2(args.reg), 'N3': N3(args.reg), }[args.regularizer]
if __name__ == "__main__": big_datasets = ['Bio', 'FB15K', 'WN', 'WN18RR', 'FB237', 'YAGO3-10'] datasets = big_datasets parser = argparse.ArgumentParser(description="Chain Dataset Sampling") parser.add_argument('--dataset', choices=datasets, help="Dataset in {}".format(datasets)) parser.add_argument( '--threshold', default=1e5, type=int, help="Threshold for maximum amount sampled per chain type") parser.add_argument('--save_path', default=os.getcwd(), help="Path to save the chained dataset") args = parser.parse_args() chained_dataset_sampler = ChaineDataset(Dataset(args.dataset), args.threshold) chained_dataset_sampler.sample_chains() save_chain_data(args.save_path, args.dataset, chained_dataset_sampler)
input_x = DataFrame() input_x['claim'] = claims input_x['text'] = texts input_x['keywords'] = keywords input_x['author'] = authors input_x['text'] = input_x[['text', 'keywords', 'author']].apply(lambda x: ''.join(x), axis=1).to_list() # input_x['text'] = input_x[['keywords']].apply(lambda x: ''.join(x), axis=1).to_list() input_y = ratings # Graph embeddings dataset = Dataset(os.path.join(args[1]), use_cpu=True) model = CP(dataset.get_shape(), 50) model.load_state_dict(torch.load(args[2], map_location=torch.device('cpu'))) graph_vectorizer = ClamsKGGraphEmbeddingTransformer( dataset, model, args[0], NeighbourhoodVectorConcatStrategy.CONCAT_TRIPLES) # graph_vectorizer = GraphEmbeddingTransformer(dataset, model) # Baseline RoBERTa/BERT flair_vectorizer_baseline_roberta = FlairTransformer([ TransformerWordEmbeddings(model="distilroberta-base", use_scalar_mix=True) ])
# Parser argument for ConvE # dropout parser.add_argument('--dropouts', default=(0.3, 0.3, 0.3), type=tuple, help="Dropout rates for each layer in ConvE") # Whether to use bias for the ConvE layer parser.add_argument('--use_bias', default=True, type=bool, help="Using or not using bias for the ConvE layers") args = parser.parse_args() dataset = Dataset(args.dataset) examples = torch.from_numpy( dataset.get_train().astype('int64')).cpu() # changed for cpu print(dataset.get_shape()) model = { 'CP': lambda: CP(dataset.get_shape(), args.rank, args.init), 'ComplEx': lambda: ComplEx(dataset.get_shape(), args.rank, args.init), 'ConvE': lambda: ConvE(dataset.get_shape(), args.rank, args.dropouts, args.use_bias) }[args.model]() regularizer = { 'N2': N2(args.reg),
import os import re from typing import Dict import numpy import torch from torch import optim from torch.nn import DataParallel from kbc import avg_both from kbc.datasets import Dataset from kbc.models import CP, ComplEx from kbc.optimizers import KBCOptimizer from kbc.regularizers import F2, N3 datasets = Dataset.get_dataset_shortlist() parser = argparse.ArgumentParser(description="Relational learning contraption") parser.add_argument("--save-model", nargs=1, default=[''], dest="save_model", help="Save final model to specified directory") parser.add_argument( "--save-checkpoints", nargs=1, default=[''], dest="save_checkpoints", help="Save checkpoints for each epoch of the model to specified directory")
str(int(args.train_no))) # check if the folder exists if not os.path.exists(folder_path): raise Exception('You do not have folder named:{}'.format(folder_path)) # folder path format 'results/ComplEx/FB15K/train1' # Get the configuration config = pickle.load(open(folder_path + '/config.p', 'rb')) if config['save_pre_train'] == 1: pre_train_folder = '../pre_train/{}/{}'.format('Context_' + args.model, args.dataset) # Get Dataset dataset = Dataset(config['dataset']) dataset = Dataset(args.dataset) if args.model in ['CP', 'ComplEx', 'ConvE']: # For non-context model unsorted_examples = torch.from_numpy(dataset.get_train().astype('int64')) examples = unsorted_examples else: # Get sorted examples for context model sorted_data, slice_dic = dataset.get_sorted_train() examples = torch.from_numpy(dataset.get_train().astype('int64')) rank, init = [int(config['rank']), float(config['init'])] print(dataset.get_shape()) model = { 'CP':