device = 'cuda' model.to(device) optim_method = { 'Adagrad': lambda: optim.Adagrad(model.parameters(), lr=args.learning_rate), 'Adam': lambda: optim.Adam(model.parameters(), lr=args.learning_rate, betas=(args.decay1, args.decay2)), 'SGD': lambda: optim.SGD(model.parameters(), lr=args.learning_rate) }[args.optimizer]() optimizer = KBCOptimizer(model, regularizer, optim_method, args.batch_size) def avg_both(mrrs: Dict[str, float], hits: Dict[str, torch.FloatTensor]): """ aggregate metrics for missing lhs and rhs :param mrrs: d :param hits: :return: """ m = (mrrs['lhs'] + mrrs['rhs']) / 2. h = (hits['lhs'] + hits['rhs']) / 2. return {'MRR': m, 'hits@[1,3,10]': h} cur_loss = 0
elif args.model in ['ContExt']: for i in range(2): model.embeddings[i].weight.requires_grad = False optim_method = { 'Adagrad': lambda: optim.Adagrad(model.parameters(), lr=args.learning_rate), 'RMSprop': lambda: optim.RMSProp(model.parameters(), lr=args.learning_rate), 'SGD': lambda: optim.SGD(model.parameters(), lr=args.learning_rate) }[args.optimizer]() # print('Model state:') # for param_tensor in model.state_dict(): # print(f'\t{param_tensor}\t{model.state_dict()[param_tensor].size()}') optimizer = KBCOptimizer(model, regularizer, optim_method, args.batch_size, n_freeze=args.n_freeze) def avg_both(mrrs: Dict[str, float], hits: Dict[str, torch.FloatTensor]): """ aggregate metrics for missing lhs and rhs :param mrrs: d :param hits: :return: """ m = (mrrs['lhs'] + mrrs['rhs']) / 2. h = (hits['lhs'] + hits['rhs']) / 2. return {'MRR': m, 'hits@[1,3,10]': h} cur_loss = 0
def kbc_model_load(model_path): """ This function loads the KBC model given the model. It uses the common identifiers in the name to identify the metadata/model files and load from there. @params: model_path - full or relative path to the model_path @returns: model : Class(KBCOptimizer) epoch : The epoch trained until (int) loss : The last loss stored in the model """ identifiers = model_path.split('/')[-1] identifiers = identifiers.split('-') dataset_name, timestamp = identifiers[0].strip( ), identifiers[-1][:-3].strip() if "YAGO" in dataset_name: dataset_name = "YAGO3-10" if 'FB15k' and '237' in identifiers: dataset_name = 'FB15k-237' model_dir = os.path.dirname(model_path) with open( os.path.join(model_dir, f'{dataset_name}-metadata-{timestamp}.json'), 'r') as json_file: metadata = json.load(json_file) map_location = None if not torch.cuda.is_available(): map_location = torch.device('cpu') checkpoint = torch.load(model_path, map_location=map_location) factorizer_name = checkpoint['factorizer_name'] models = ['CP', 'ComplEx', 'DistMult'] if 'cp' in factorizer_name.lower(): model = CP(metadata['data_shape'], metadata['rank'], metadata['init']) elif 'complex' in factorizer_name.lower(): model = ComplEx(metadata['data_shape'], metadata['rank'], metadata['init']) elif 'distmult' in factorizer_name.lower(): model = DistMult(metadata['data_shape'], metadata['rank'], metadata['init']) else: raise ValueError(f'Model {factorizer_name} not in {models}') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model.to(device) regularizer = checkpoint['regularizer'] optim_method = checkpoint['optim_method'] batch_size = checkpoint['batch_size'] KBC_optimizer = KBCOptimizer(model, regularizer, optim_method, batch_size) KBC_optimizer.model.load_state_dict(checkpoint['model_state_dict']) KBC_optimizer.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) epoch = checkpoint['epoch'] loss = checkpoint['loss'] print(KBC_optimizer.model.eval()) return KBC_optimizer, epoch, loss
regularizer = { 'N2': N2(args.reg), 'N3': N3(args.reg), }[args.regularizer] device = 'cuda' model.to(device) pprint(vars(args)) optim_method = { 'Adagrad': lambda: optim.Adagrad(model.parameters(), lr=args.learning_rate), 'Adam': lambda: optim.Adam(model.parameters(), lr=args.learning_rate, betas=(args.decay1, args.decay2)), 'SGD': lambda: optim.SGD(model.parameters(), lr=args.learning_rate) }[args.optimizer]() KBC_optimizer = KBCOptimizer(model, regularizer, optim_method, args.batch_size) curve, results = train_kbc(KBC_optimizer, dataset, args) else: kbc, epoch, loss = kbc_model_load(args.checkpoint) for split in ['valid', 'test']: results = dataset.eval(kbc.model, split, -1) print(f"{split}: ", avg_both(*results))
}[args.regularizer] optim_method = { 'Adagrad': lambda: optim.Adagrad(model.parameters(), lr=args.learning_rate), 'Adam': lambda: optim.SparseAdam(model.parameters(), lr=args.learning_rate, betas=(args.decay1, args.decay2)), 'SGD': lambda: optim.SGD(model.parameters(), lr=args.learning_rate) }[args.optimizer]() optimizer = KBCOptimizer(model, regularizer, optim_method, args.batch_size, use_cpu=use_cpu) cur_loss = 0 curve = {'train': [], 'valid': [], 'test': []} if checkpoint_epoch > -1: training_range = range(checkpoint_epoch, args.max_epochs) print("Resuming training at epoch " + str(checkpoint_epoch)) else: training_range = range(args.max_epochs) for e in training_range: cur_loss = optimizer.epoch(examples, epoch_number=e, max_epochs=args.max_epochs)
if config['model'] == "ConvE": model.init() optim_method = { 'Adagrad': lambda: optim.Adagrad(model.parameters(), lr=config['learning_rate']), 'Adam': lambda: optim.Adam(model.parameters(), lr=config['learning_rate'], betas=(config['decay1'], config['decay2'])), 'SGD': lambda: optim.SGD(model.parameters(), lr=config['learning_rate']) }[config['optimizer']]() optimizer = KBCOptimizer(model, regularizer, optim_method, config['batch_size']) model.load_state_dict(torch.load(folder_path + '/model_state.pt')) def avg_both(mrrs: Dict[str, float], hits: Dict[str, torch.FloatTensor]): """ aggregate metrics for missing lhs and rhs :param mrrs: d :param hits: :return: """ m = (mrrs['lhs'] + mrrs['rhs']) / 2. h = (hits['lhs'] + hits['rhs']) / 2. return {'MRR': m, 'hits@[1,3,10]': h}