import torch import numpy as np import datetime from spodernet.utils.global_config import Config from spodernet.utils.cuda_utils import CUDATimer from spodernet.utils.logger import Logger from torch.autograd import Variable from sklearn import metrics #timer = CUDATimer() log = Logger('evaluation{0}.py.txt'.format(datetime.datetime.now())) def ranking_and_hits(model, dev_rank_batcher, vocab, name): log.info('') log.info('-' * 50) log.info(name) log.info('-' * 50) log.info('') hits_left = [] hits_right = [] hits = [] ranks = [] ranks_left = [] ranks_right = [] for i in range(10): hits_left.append([]) hits_right.append([]) hits.append([])
from itertools import chain from spodernet.utils.global_config import Config, Backends from spodernet.utils.logger import Logger log = Logger('frontend.py.txt') class Model(object): def __init__(self, input_module=None): self.modules = [] self.input_module = input_module self.module = self def add(self, module): self.modules.append(module) def forward(self, str2var, *inputs): outputs = inputs if inputs == None: outputs = [] for module in self.modules: outputs = module.forward(str2var, *outputs) return outputs class Trainer(object): def __init__(self, model): self.model = model self.trainer_backend = None
import torch import numpy as np import datetime from spodernet.utils.logger import Logger from torch.autograd import Variable from sklearn import metrics log = Logger('evaluation.py.txt') def ranking_and_hits(model, dev_rank_batcher, vocab, name): log.info('') log.info('-' * 50) log.info(name) log.info('-' * 50) log.info('') hits_left = [] hits_right = [] hits = [] ranks = [] ranks_left = [] ranks_right = [] for i in range(10): hits_left.append([]) hits_right.append([]) hits.append([]) for i, str2var in enumerate(dev_rank_batcher): e1 = str2var['e1'] e2 = str2var['e2']
import os from collections import namedtuple from spodernet.utils.logger import Logger log = Logger('global_config.py.txt') class Backends: TORCH = 'pytorch' TENSORFLOW = 'tensorflow' TEST = 'test' CNTK = 'cntk' class Config: dropout = 0.0 batch_size = 128 learning_rate = 0.001 backend = Backends.TORCH L2 = 0.000 cuda = False embedding_dim = 128 hidden_size = 256 input_dropout = 0.0 feature_map_dropout = 0.0 use_conv_transpose = False use_bias = True optimizer = 'adam' learning_rate_decay = 1.0 label_smoothing_epsilon = 0.1 epochs = 1000 dataset = None
import numpy as np import scipy.stats import datetime from spodernet.interfaces import IAtIterEndObservable, IAtEpochEndObservable, IAtEpochStartObservable from spodernet.utils.util import Timer from spodernet.utils.global_config import Config, Backends from spodernet.utils.logger import Logger log = Logger('hooks.py.txt') class AbstractHook(IAtIterEndObservable, IAtEpochEndObservable): def __init__(self, name, metric_name, print_every_x_batches): self.epoch_errors = [] self.current_scores = [] self.name = name self.iter_count = 0 self.print_every = print_every_x_batches self.metric_name = metric_name self.epoch = 1 # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance self.n = 0 self.epoch_n = 0 self.mean = 0 self.M2 = 0 self.load_backend_specific_functions() def load_backend_specific_functions(self): if Config.backend == Backends.TORCH:
from os.path import join from scipy.sparse import csr_matrix, spmatrix import h5py import os import time import os import numpy as np import torch from spodernet.utils.logger import Logger log = Logger('util.py.txt') rdm = np.random.RandomState(2345235) def save_dense_hdf(path, data): '''Writes a numpy array to a hdf5 file under the given path.''' log.debug_once('Saving hdf5 file to: {0}', path) h5file = h5py.File(path, "w") h5file.create_dataset("default", data=data) h5file.close() def load_dense_hdf(path, keyword='default'): '''Reads and returns a numpy array for a hdf5 file''' log.debug_once('Reading hdf5 file from: {0}', path) h5file = h5py.File(path, 'r') dset = h5file.get(keyword) data = dset[:] h5file.close()
def test_global_logger(): log1 = Logger('test1.txt') log2 = Logger('test2.txt') log1.info('uden') log2.info('kek') log2.info('rolfen') log1.info('keken') GlobalLogger.flush() expected = ['uden', 'kek', 'rolfen', 'keken'] with open(GlobalLogger.global_logger_path) as f: data = f.readlines() print(len(data)) for i, line in enumerate(data[-4:]): message = line.split(':')[3].strip() assert message == expected[i] assert i == len(expected) - 1
from spodernet.preprocessing.processors import ConvertTokenToIdx, ApplyFunction, ToLower, DictKey2ListMapper, ApplyFunction, StreamToBatch from spodernet.utils.global_config import Config, Backends from spodernet.utils.logger import Logger, LogLevel from spodernet.preprocessing.batching import StreamBatcher from spodernet.preprocessing.pipeline import Pipeline from spodernet.preprocessing.processors import TargetIdx2MultiTarget from spodernet.hooks import LossHook, ETAHook from spodernet.utils.util import Timer from spodernet.preprocessing.processors import TargetIdx2MultiTarget import argparse np.set_printoptions(precision=3) cudnn.benchmark = True log = Logger("main.py.txt") ''' Preprocess knowledge graph using spodernet. ''' def preprocess(dataset_name, delete_data=False): full_path = 'data/{0}/e1rel_to_e2_full.json'.format(dataset_name) train_path = 'data/{0}/e1rel_to_e2_train.json'.format(dataset_name) dev_ranking_path = 'data/{0}/e1rel_to_e2_ranking_dev.json'.format(dataset_name) test_ranking_path = 'data/{0}/e1rel_to_e2_ranking_test.json'.format(dataset_name) keys2keys = {} keys2keys['e1'] = 'e1' # entities keys2keys['rel'] = 'rel' # relations keys2keys['rel_eval'] = 'rel' # relations keys2keys['e2'] = 'e1' # entities keys2keys['e2_multi1'] = 'e1' # entity keys2keys['e2_multi2'] = 'e1' # entity
from os.path import join import os import shutil import json import zipfile import numpy as np from spodernet.preprocessing.vocab import Vocab from spodernet.utils.util import Timer from spodernet.preprocessing.processors import SaveLengthsToState from sklearn.feature_extraction.text import TfidfVectorizer from spodernet.utils.logger import Logger log = Logger('pipeline.py.txt') t = Timer() class StreamMethods: files = 'FILES' data = 'DATA' class DatasetStreamer(object): def __init__(self, input_keys=None, output_keys=None, stream_method=StreamMethods.files): self.stream_processors = [] self.input_keys = input_keys or ['input', 'support', 'target'] self.output_keys = output_keys self.paths = [] self.output_keys = output_keys or self.input_keys self.stream_method = stream_method self.data = []
from collections import namedtuple import time import datetime import numpy as np import queue import pickle from spodernet.utils.util import get_data_path, load_data, Timer from spodernet.utils.global_config import Config, Backends from spodernet.hooks import ETAHook from spodernet.interfaces import IAtIterEndObservable, IAtEpochEndObservable, IAtEpochStartObservable, IAtBatchPreparedObservable from spodernet.preprocessing.processors import DictConverter from spodernet.utils.logger import Logger log = Logger('batching.py.txt') benchmark = False class BatcherState(object): def __init__(self): self.clear() def clear(self): self.loss = None self.argmax = None self.pred = None self.batch_size = None self.current_idx = None self.current_epoch = None
from spodernet.utils.util import Timer from spodernet.utils.util import get_data_path, save_data, make_dirs_if_not_exists, load_data, Timer from spodernet.interfaces import IAtBatchPreparedObservable from spodernet.utils.global_config import Config from past.builtins import basestring, long import numpy as np import os import copy import spacy import nltk import json import pickle from spodernet.utils.logger import Logger log = Logger('processors.py.txt') nlp = spacy.load('en') timer = Timer() class KeyToKeyMapper(IAtBatchPreparedObservable): def __init__(self, key2key): self.key2key = key2key def at_batch_prepared(self, batch_parts): str2var = batch_parts new_str2var = {} for key1, key2 in self.key2key.items(): new_str2var[key2] = str2var[key1]