def main(): # load dataset dataset = Dataset("../benchmarks/fb15k.nt") # load embeddings embeddings = Embedding(dataset, TransE) embeddings.restore(prefix='../checkpoints/TransE/TransE') # alternatively load embeddings from numpy matrix embs = Embedding(dataset, TransE) embs.load_embeddings_from_npy('../embeddings/TransE/TransE.npy') # query embeddings print(embeddings['/m/02f75t']) print(embs['/m/02f75t']) print() print(embeddings['foobar']) print(embs['foobar'])
def compute(model, n_batches, epochs, neg_ent, neg_rel, bern, workers, optimizer, dims, margin, eval, out, json, valid_file, test_file, file_path): """Initializes the repository.""" file_path = Path(file_path) if file_path.suffix == '.npz': dataset = Dataset.from_npz(file_path) elif valid_file is not None and test_file is not None: dataset = Dataset(train_file=str(file_path), valid_file=valid_file, test_file=test_file, generate_valid_test=True) elif valid_file is None and test_file is None and eval: dataset = Dataset(train_file=str(file_path), generate_valid_test=True) else: dataset = Dataset(train_file=str(file_path)) click.echo("Start training using the following parameters: ") click.echo("-----------------------------------------------") click.echo(f"Knowledge Base: {file_path}") click.echo(f"Batch number: {n_batches} => {int(dataset.size / n_batches)} total batch size") click.echo(f"Epochs: {epochs}") click.echo(f"Neg_Ent: {neg_ent}") click.echo(f"Neg_Rel: {neg_rel}") click.echo(f"bern: {bern}") click.echo(f"Workers: {workers}") click.echo(f"Optimizer: {optimizer}") click.echo(f"Dimensionality: {dims}") click.echo(f"Margin: {margin}") click.echo(f"Output directory: {out}") click.echo("-----------------------------------------------") embedding = Embedding( dataset, get_model(model), folds=n_batches, epochs=epochs, neg_ent=neg_ent, neg_rel=neg_rel, bern=bern, workers=workers, optimizer=optimizer, dimension=dims, # TransE-specific margin=margin, # TransE-specific out_path=out ) checkpoint_path = Path(f'./checkpoints/{model}') out_path = Path(f'{out}/{model}/{dataset.name}') if not out_path.exists(): click.echo(f'Creating output path: {out_path}') out_path.mkdir(parents=True) # Train the model. It is saved in the process. if not checkpoint_path.exists(): click.echo(f'Creating checkpoint directory: {checkpoint_path}') checkpoint_path.mkdir(parents=True) # if dataset is not written out, do so # if not (out_path / f'{dataset.name}_dataset.npz').exists(): # dataset.to_npz(out_path / f'{dataset.name}_dataset.npz') embedding.train(prefix=str(checkpoint_path / dataset.name)) # Save the embedding to a JSON file if json: embedding.save_to_json(f'{out_path}/{dataset.name}_{model.lower()}_{optimizer.lower()}_{dims}_embs.json') # Save the embedding as numpy (.npz) file archive_name = f'{out_path}/{dataset.name}_{model.lower()}_{optimizer.lower()}_{dims}_embs.npz' embedding.save_to_npz(archive_name) if eval: rank_predictions = embedding.get_predictions() # rank_predictions.to_csv(f'{out_path}/{dataset.name}_rank_predictions.csv') results = calc_metrics(rank_predictions=rank_predictions) if (out_path / f'{dataset.name}_metrics.csv').exists(): df = pd.read_csv(str(out_path / f'{dataset.name}_metrics.csv'), index_col=0) prev_epochs = df.iloc[-1]['epochs'] results['epochs'] = int(prev_epochs + epochs) df = df.append(results, ignore_index=True) df.to_csv(str(out_path / f'{dataset.name}_metrics.csv')) rank_predictions.to_csv(f'{out_path}/{dataset.name}_rank_predictions_{int(prev_epochs + epochs)}.csv') print(df) else: results['epochs'] = epochs results.to_csv(str(out_path / f'{dataset.name}_metrics.csv')) rank_predictions.to_csv(f'{out_path}/{dataset.name}_rank_predictions_{epochs}.csv') print(results)
from pyke.dataset import Dataset from pyke.embedding import Embedding from pyke.models import DistMult # Read the dataset dataset = Dataset("./benchmarks/fb15k.nt") embedding = Embedding( dataset, DistMult, folds=100, epochs=20, neg_ent=1, neg_rel=0, bern=False, workers=4, dimension=50, # DistMult-specific weight=0.0001, # DistMult-specific learning_rate=0.1, optimizer="Adagrad", ) # Train the model. It is saved in the process. embedding.train(prefix="./DistMult") # Save the embedding to a JSON file embedding.save_to_json("DistMult.json")
from pyke.dataset import Dataset from pyke.embedding import Embedding from pyke.models import TransR # Read the dataset dataset = Dataset("../benchmarks/fb15k.nt") embedding = Embedding( dataset, TransR, folds=20, epochs=20, neg_ent=1, neg_rel=0, bern=False, workers=4, ent_dim=50, # TransR-specific rel_dim=10, # TransR-specific margin=1.0, # TransR-specific ) # Train the model. It is saved in the process. # TODO: Currently not working embedding.train(prefix="./TransR") # Save the embedding to a JSON file embedding.save_to_json("TransR.json")
from pyke.dataset import Dataset from pyke.embedding import Embedding from pyke.models import TransH # Read the dataset dataset = Dataset("./benchmarks/fb15k.nt") embedding = Embedding( dataset, TransH, folds=20, epochs=20, neg_ent=1, neg_rel=0, bern=False, workers=4, dimension=50, # TransH-specific margin=1.0, # TransH-specific ) # Train the model. It is saved in the process. embedding.train(prefix="./TransH") # Save the embedding to a JSON file embedding.save_to_json("TransH.json")
def test_get_ent_embeddings(self): em = Embedding(self.dataset, TransE) em.train("./tests/tmp/TransE", continue_training=False) embedding_values = em.get_ent_embeddings() self.assertIsInstance(embedding_values, np.ndarray) self.assertEqual(embedding_values.shape, (11, 50))
def test_train_transe(self): em = Embedding(self.dataset, TransE, epochs=5) em.train("./tests/tmp/TransE", continue_training=False) self.assertLessEqual(0, em.get_loss()) self.assertGreaterEqual(4, em.get_loss())
def test_init_transe(self): Embedding(self.dataset, TransE)
from pyke.dataset import Dataset from pyke.embedding import Embedding from pyke.models import HolE # Read the dataset dataset = Dataset("./benchmarks/fb15k.nt") embedding = Embedding( dataset, HolE, folds=20, epochs=20, neg_ent=1, neg_rel=0, bern=False, workers=4, dimension=50, # HolE-specific margin=1.0, # HolE-specific ) # Train the model. It is saved in the process. embedding.train(prefix="./HolE") # Save the embedding to a JSON file embedding.save_to_json("HolE.json")
from pyke.dataset import Dataset from pyke.embedding import Embedding from pyke.models import ComplEx # Read the dataset dataset = Dataset("./benchmarks/fb15k.nt") embedding = Embedding( dataset, ComplEx, folds=100, epochs=20, neg_ent=1, neg_rel=0, bern=False, workers=4, dimension=100, # ComplEx-specific weight=0.0001, # ComplEx-specific learning_rate=0.1, optimizer="Adagrad", ) # Train the model. It is saved in the process. embedding.train(prefix="./ComplEx") # Save the embedding to a JSON file embedding.save_to_json("ComplEx.json")
from pyke.dataset import Dataset from pyke.embedding import Embedding from pyke.models import RESCAL # Read the dataset dataset = Dataset("./benchmarks/fb15k.nt") embedding = Embedding( dataset, RESCAL, folds=20, epochs=20, neg_ent=1, neg_rel=0, bern=False, workers=4, dimension=50, # RESCAL-specific margin=1.0, # RESCAL-specific ) # Train the model. It is saved in the process. embedding.train(prefix="./RESCAL") # Save the embedding to a JSON file embedding.save_to_json("RESCAL.json")