Ejemplo n.º 1
0
parser.add_argument('--decay1',
                    default=0.9,
                    type=float,
                    help="decay rate for the first moment estimate in Adam")
parser.add_argument('--decay2',
                    default=0.999,
                    type=float,
                    help="decay rate for second moment estimate in Adam")
args = parser.parse_args()

dataset = Dataset(args.dataset)
examples = torch.from_numpy(dataset.get_train().astype('int64'))

print(dataset.get_shape())
model = {
    'CP': lambda: CP(dataset.get_shape(), args.rank, args.init),
    'ComplEx': lambda: ComplEx(dataset.get_shape(), args.rank, args.init),
}[args.model]()

regularizer = {
    'F2': F2(args.reg),
    'N3': N3(args.reg),
}[args.regularizer]

device = 'cuda'
model.to(device)

optim_method = {
    'Adagrad':
    lambda: optim.Adagrad(model.parameters(), lr=args.learning_rate),
    'Adam':
    # Only keywords
    dataset_kw = dataset.copy()
    dataset_kw['text'] = dataset[['keywords']].apply(lambda x: ''.join(x), axis=1).to_list()

    input_x = dataset[['claim', 'text']]
    input_x_all = dataset_text_all[['claim', 'text']]
    input_x_kw = dataset_kw[['claim', 'text']]

    input_y = dataset[class_list].copy().values

    data_path = args[2]
    model_path = args[3]

    # CKGE Graph embeddings
    ckge_dataset = Dataset(os.path.join(data_path, "CKGE"), use_cpu=True)
    ckge_model = CP(ckge_dataset.get_shape(), 50)
    ckge_model.load_state_dict(
        torch.load(os.path.join(model_path, "CKGE.pickle"),
                   map_location=torch.device('cpu')))

    ckge_graph_vectorizer = GraphEmbeddingTransformer(ckge_dataset, ckge_model)

    # Distil RoBERTa (DR)
    flair_vectorizer_DR = FlairTransformer([
        TransformerWordEmbeddings(model="distilroberta-base",
                                  use_scalar_mix=True)
    ], batch_size=1)

    # GPT2
    flair_vectorizer_GPT2 = FlairTransformer([
        TransformerWordEmbeddings(model="gpt2-large",
Ejemplo n.º 3
0
Archivo: learn.py Proyecto: jhb115/kbc
)

# Setup parser
args = parser.parse_args()

# Get Dataset
dataset = Dataset(args.dataset)
if args.model in ['CP', 'ComplEx']:
    unsorted_examples = torch.from_numpy(dataset.get_train().astype('int64'))
    examples = unsorted_examples
else:
    sorted_data, slice_dic = dataset.get_sorted_train()
    examples = torch.from_numpy(dataset.get_train().astype('int64'))

model = {
    'CP': lambda: CP(dataset.get_shape(), args.rank, args.init),
    'ComplEx': lambda: ComplEx(dataset.get_shape(), args.rank, args.init),
    'ContExt': lambda: ContExt(dataset.get_shape(), args.rank, sorted_data, slice_dic,
                               max_NB=args.max_NB, init_size=args.init, data_name=args.dataset,
                               ascending=args.ascending, dropout_1=args.dropout_1,
                               dropout_g=args.dropout_g, evaluation_mode=args.evaluation_mode),
}[args.model]()

regularizer = {
    'N0': 'N0',
    'N2': N2(args.reg),
    'N3': N3(args.reg),
    'N4': N4(args.reg, g_weight=args.g_weight)
}[args.regularizer]

#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
Ejemplo n.º 4
0
import sys

import torch
from kbc.datasets import Dataset

from kbc import avg_both
from kbc.models import CP

args = sys.argv[1:]

dataset = Dataset(args[0], use_cpu=True)
model = CP(dataset.get_shape(), 50)
model.load_state_dict(torch.load(args[1], map_location=torch.device('cpu')))

print(avg_both(*dataset.eval(model, "test", 50000, batch_size=100)))
Ejemplo n.º 5
0
def kbc_model_load(model_path):
    """
	This function loads the KBC model given the model. It uses the
	common identifiers in the name to identify the metadata/model files
	and load from there.

	@params:
		model_path - full or relative path to the model_path
	@returns:
		model : Class(KBCOptimizer)
		epoch : The epoch trained until (int)
		loss  : The last loss stored in the model
	"""
    identifiers = model_path.split('/')[-1]
    identifiers = identifiers.split('-')

    dataset_name, timestamp = identifiers[0].strip(
    ), identifiers[-1][:-3].strip()
    if "YAGO" in dataset_name:
        dataset_name = "YAGO3-10"
    if 'FB15k' and '237' in identifiers:
        dataset_name = 'FB15k-237'

    model_dir = os.path.dirname(model_path)

    with open(
            os.path.join(model_dir,
                         f'{dataset_name}-metadata-{timestamp}.json'),
            'r') as json_file:
        metadata = json.load(json_file)

    map_location = None
    if not torch.cuda.is_available():
        map_location = torch.device('cpu')

    checkpoint = torch.load(model_path, map_location=map_location)

    factorizer_name = checkpoint['factorizer_name']
    models = ['CP', 'ComplEx', 'DistMult']
    if 'cp' in factorizer_name.lower():
        model = CP(metadata['data_shape'], metadata['rank'], metadata['init'])
    elif 'complex' in factorizer_name.lower():
        model = ComplEx(metadata['data_shape'], metadata['rank'],
                        metadata['init'])
    elif 'distmult' in factorizer_name.lower():
        model = DistMult(metadata['data_shape'], metadata['rank'],
                         metadata['init'])
    else:
        raise ValueError(f'Model {factorizer_name} not in {models}')

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    regularizer = checkpoint['regularizer']
    optim_method = checkpoint['optim_method']
    batch_size = checkpoint['batch_size']

    KBC_optimizer = KBCOptimizer(model, regularizer, optim_method, batch_size)
    KBC_optimizer.model.load_state_dict(checkpoint['model_state_dict'])
    KBC_optimizer.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    loss = checkpoint['loss']

    print(KBC_optimizer.model.eval())

    return KBC_optimizer, epoch, loss
Ejemplo n.º 6
0
dataset = Dataset(args.dataset)
if args.model in ['CP', 'ComplEx', 'ConvE']:  # For non-context model
    unsorted_examples = torch.from_numpy(dataset.get_train().astype('int64'))
    examples = unsorted_examples
else:  # Get sorted examples for context model
    sorted_data, slice_dic = dataset.get_sorted_train()
    examples = torch.from_numpy(dataset.get_train().astype('int64'))

rank, init = [int(config['rank']), float(config['init'])]

print(dataset.get_shape())

model = {
    'CP':
    lambda: CP(dataset.get_shape(), rank, init),
    'ComplEx':
    lambda: ComplEx(dataset.get_shape(), rank, init),
    'ConvE':
    lambda: ConvE(dataset.get_shape(), rank, config['dropouts'], config[
        'use_bias'], config['hw'], config['kernel_size'], config[
            'output_channel']),
    'Context_CP':
    lambda: Context_CP(dataset.get_shape(),
                       rank,
                       sorted_data,
                       slice_dic,
                       max_NB=config['max_NB'],
                       init_size=config['init'],
                       data_name=config['dataset']),
    'Context_ComplEx':