"epoch_times": epoch_times, "total_epoch_times": total_epoch_times }, open(results_path, "wb")) total_epoch_time = time.time() - start_epoch total_epoch_times.append(total_epoch_time) logger.info('epoch: {} took {:.2f}s'.format(epoch, total_epoch_time)) logger.info('Epoch mean loss: {}'.format(np.mean(m_loss))) logger.info('val auc: {}, new node val auc: {}'.format( val_auc, nn_val_auc)) logger.info('val ap: {}, new node val ap: {}'.format( val_ap, nn_val_ap)) # Early stopping if early_stopper.early_stop_check(val_ap): logger.info('No improvement over {} epochs, stop training'.format( early_stopper.max_round)) logger.info( f'Loading the best model at epoch {early_stopper.best_epoch}') best_model_path = get_checkpoint_path(early_stopper.best_epoch) dgnn.load_state_dict(torch.load(best_model_path)) logger.info( f'Loaded the best model at epoch {early_stopper.best_epoch} for inference' ) dgnn.eval() break else: torch.save(dgnn.state_dict(), get_checkpoint_path(epoch)) # Training has finished, we have loaded the best model, and we want to backup its current
with open(results_path, "wb") as f: pickle.dump( { "val_losses": val_losses, "train_losses": train_losses, "epoch_times": [0.0], "new_nodes_val_aps": [], }, f) logger.info( f'Epoch {epoch}: train loss: {loss / num_batch}, val loss: {val_loss}, time: {time.time() - start_epoch}' ) if args.use_validation: if early_stopper.early_stop_check(val_loss): logger.info('No improvement over {} epochs, stop training'.format( early_stopper.max_round)) break else: torch.save(decoder.state_dict(), get_checkpoint_path(epoch)) if args.use_validation: logger.info( f'Loading the best model at epoch {early_stopper.best_epoch}') best_model_path = get_checkpoint_path(early_stopper.best_epoch) decoder.load_state_dict(torch.load(best_model_path)) logger.info( f'Loaded the best model at epoch {early_stopper.best_epoch} for inference' ) decoder.eval()
val_rmses.append(val_rmse) pickle.dump( { "val_rmse": val_rmse, "train_losses": train_losses, "epoch_times": [0.0], "new_nodes_val_aps": [], }, open(results_path, "wb")) logger.info( f'Epoch {epoch}: train loss: {loss / num_batch}, val rmse: {val_rmse},' f' time: {time.time() - start_epoch}') if args.use_validation: if early_stopper.early_stop_check(-val_rmse): logger.info( 'No improvement over {} epochs, stop training'.format( early_stopper.max_round)) break else: torch.save(net.state_dict(), get_checkpoint_path(epoch)) if args.use_validation: logger.info( f'Loading the best model at epoch {early_stopper.best_epoch}') best_model_path = get_checkpoint_path(early_stopper.best_epoch) net.load_state_dict(torch.load(best_model_path)) logger.info( f'Loaded the best model at epoch {early_stopper.best_epoch} for inference' )
}, open(results_path, "wb")) total_epoch_time = time.time() - start_epoch total_epoch_times.append(total_epoch_time) logger.info('epoch: {} took {:.2f}s'.format(epoch, total_epoch_time)) logger.info('Epoch mean loss: {}'.format(np.mean(m_loss))) logger.info( 'val mrr: {}, new node val mrr: {}'.format(val_mrr, nn_val_mrr)) logger.info( 'val recall 20: {}, new node val recall 20: {}'.format(val_recall_20, nn_val_recall_20)) logger.info( 'val recall 50: {}, new node val recall 50: {}'.format(val_recall_50, nn_val_recall_50)) # Early stopping if early_stopper.early_stop_check(val_mrr): logger.info('No improvement over {} epochs, stop training'.format(early_stopper.max_round)) logger.info(f'Loading the best model at epoch {early_stopper.best_epoch}') best_model_path = get_checkpoint_path(early_stopper.best_epoch) dgnn.load_state_dict(torch.load(best_model_path)) logger.info(f'Loaded the best model at epoch {early_stopper.best_epoch} for inference') dgnn.eval() break else: torch.save(dgnn.state_dict(), get_checkpoint_path(epoch)) # Training has finished, we have loaded the best model, and we want to backup its current # memory (which has seen validation edges) so that it can also be used when testing on unseen # nodes val_memory_backup_s = dgnn.memory_s.backup_memory() val_memory_backup_g = dgnn.memory_g.backup_memory()