예제 #1
0
import torch
import numpy as np
import datetime

from spodernet.utils.global_config import Config
from spodernet.utils.cuda_utils import CUDATimer
from spodernet.utils.logger import Logger
from torch.autograd import Variable
from sklearn import metrics

#timer = CUDATimer()
log = Logger('evaluation{0}.py.txt'.format(datetime.datetime.now()))


def ranking_and_hits(model, dev_rank_batcher, vocab, name):
    log.info('')
    log.info('-' * 50)
    log.info(name)
    log.info('-' * 50)
    log.info('')
    hits_left = []
    hits_right = []
    hits = []
    ranks = []
    ranks_left = []
    ranks_right = []
    for i in range(10):
        hits_left.append([])
        hits_right.append([])
        hits.append([])
예제 #2
0
from itertools import chain

from spodernet.utils.global_config import Config, Backends

from spodernet.utils.logger import Logger
log = Logger('frontend.py.txt')


class Model(object):
    def __init__(self, input_module=None):
        self.modules = []
        self.input_module = input_module
        self.module = self

    def add(self, module):
        self.modules.append(module)

    def forward(self, str2var, *inputs):
        outputs = inputs
        if inputs == None:
            outputs = []
        for module in self.modules:
            outputs = module.forward(str2var, *outputs)
        return outputs


class Trainer(object):
    def __init__(self, model):
        self.model = model

        self.trainer_backend = None
예제 #3
0
import torch
import numpy as np
import datetime

from spodernet.utils.logger import Logger
from torch.autograd import Variable
from sklearn import metrics

log = Logger('evaluation.py.txt')


def ranking_and_hits(model, dev_rank_batcher, vocab, name):
    log.info('')
    log.info('-' * 50)
    log.info(name)
    log.info('-' * 50)
    log.info('')
    hits_left = []
    hits_right = []
    hits = []
    ranks = []
    ranks_left = []
    ranks_right = []
    for i in range(10):
        hits_left.append([])
        hits_right.append([])
        hits.append([])

    for i, str2var in enumerate(dev_rank_batcher):
        e1 = str2var['e1']
        e2 = str2var['e2']
예제 #4
0
import os
from collections import namedtuple
from spodernet.utils.logger import Logger
log = Logger('global_config.py.txt')


class Backends:
    TORCH = 'pytorch'
    TENSORFLOW = 'tensorflow'
    TEST = 'test'
    CNTK = 'cntk'


class Config:
    dropout = 0.0
    batch_size = 128
    learning_rate = 0.001
    backend = Backends.TORCH
    L2 = 0.000
    cuda = False
    embedding_dim = 128
    hidden_size = 256
    input_dropout = 0.0
    feature_map_dropout = 0.0
    use_conv_transpose = False
    use_bias = True
    optimizer = 'adam'
    learning_rate_decay = 1.0
    label_smoothing_epsilon = 0.1
    epochs = 1000
    dataset = None
예제 #5
0
import numpy as np
import scipy.stats
import datetime

from spodernet.interfaces import IAtIterEndObservable, IAtEpochEndObservable, IAtEpochStartObservable
from spodernet.utils.util import Timer
from spodernet.utils.global_config import Config, Backends

from spodernet.utils.logger import Logger
log = Logger('hooks.py.txt')


class AbstractHook(IAtIterEndObservable, IAtEpochEndObservable):
    def __init__(self, name, metric_name, print_every_x_batches):
        self.epoch_errors = []
        self.current_scores = []
        self.name = name
        self.iter_count = 0
        self.print_every = print_every_x_batches
        self.metric_name = metric_name
        self.epoch = 1

        # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
        self.n = 0
        self.epoch_n = 0
        self.mean = 0
        self.M2 = 0
        self.load_backend_specific_functions()

    def load_backend_specific_functions(self):
        if Config.backend == Backends.TORCH:
예제 #6
0
from os.path import join
from scipy.sparse import csr_matrix, spmatrix

import h5py
import os
import time
import os
import numpy as np
import torch

from spodernet.utils.logger import Logger
log = Logger('util.py.txt')

rdm = np.random.RandomState(2345235)


def save_dense_hdf(path, data):
    '''Writes a numpy array to a hdf5 file under the given path.'''
    log.debug_once('Saving hdf5 file to: {0}', path)
    h5file = h5py.File(path, "w")
    h5file.create_dataset("default", data=data)
    h5file.close()


def load_dense_hdf(path, keyword='default'):
    '''Reads and returns a numpy array for a hdf5 file'''
    log.debug_once('Reading hdf5 file from: {0}', path)
    h5file = h5py.File(path, 'r')
    dset = h5file.get(keyword)
    data = dset[:]
    h5file.close()
예제 #7
0
def test_global_logger():
    log1 = Logger('test1.txt')
    log2 = Logger('test2.txt')
    log1.info('uden')
    log2.info('kek')
    log2.info('rolfen')
    log1.info('keken')

    GlobalLogger.flush()

    expected = ['uden', 'kek', 'rolfen', 'keken']
    with open(GlobalLogger.global_logger_path) as f:
        data = f.readlines()

    print(len(data))
    for i, line in enumerate(data[-4:]):
        message = line.split(':')[3].strip()
        assert message == expected[i]

    assert i == len(expected) - 1
예제 #8
0
파일: main.py 프로젝트: Kerlex/ConvE
from spodernet.preprocessing.processors import ConvertTokenToIdx, ApplyFunction, ToLower, DictKey2ListMapper, ApplyFunction, StreamToBatch
from spodernet.utils.global_config import Config, Backends
from spodernet.utils.logger import Logger, LogLevel
from spodernet.preprocessing.batching import StreamBatcher
from spodernet.preprocessing.pipeline import Pipeline
from spodernet.preprocessing.processors import TargetIdx2MultiTarget
from spodernet.hooks import LossHook, ETAHook
from spodernet.utils.util import Timer
from spodernet.preprocessing.processors import TargetIdx2MultiTarget
import argparse


np.set_printoptions(precision=3)

cudnn.benchmark = True
log = Logger("main.py.txt")

''' Preprocess knowledge graph using spodernet. '''
def preprocess(dataset_name, delete_data=False):
    full_path = 'data/{0}/e1rel_to_e2_full.json'.format(dataset_name)
    train_path = 'data/{0}/e1rel_to_e2_train.json'.format(dataset_name)
    dev_ranking_path = 'data/{0}/e1rel_to_e2_ranking_dev.json'.format(dataset_name)
    test_ranking_path = 'data/{0}/e1rel_to_e2_ranking_test.json'.format(dataset_name)

    keys2keys = {}
    keys2keys['e1'] = 'e1' # entities
    keys2keys['rel'] = 'rel' # relations
    keys2keys['rel_eval'] = 'rel' # relations
    keys2keys['e2'] = 'e1' # entities
    keys2keys['e2_multi1'] = 'e1' # entity
    keys2keys['e2_multi2'] = 'e1' # entity
예제 #9
0
from os.path import join

import os
import shutil
import json
import zipfile
import numpy as np

from spodernet.preprocessing.vocab import Vocab
from spodernet.utils.util import Timer
from spodernet.preprocessing.processors import SaveLengthsToState
from sklearn.feature_extraction.text import TfidfVectorizer

from spodernet.utils.logger import Logger
log = Logger('pipeline.py.txt')

t = Timer()
class StreamMethods:
    files = 'FILES'
    data = 'DATA'

class DatasetStreamer(object):
    def __init__(self, input_keys=None, output_keys=None, stream_method=StreamMethods.files):
        self.stream_processors = []
        self.input_keys = input_keys or ['input', 'support', 'target']
        self.output_keys = output_keys
        self.paths = []
        self.output_keys = output_keys or self.input_keys
        self.stream_method = stream_method
        self.data = []
예제 #10
0
from collections import namedtuple

import time
import datetime
import numpy as np
import queue
import pickle

from spodernet.utils.util import get_data_path, load_data, Timer
from spodernet.utils.global_config import Config, Backends
from spodernet.hooks import ETAHook
from spodernet.interfaces import IAtIterEndObservable, IAtEpochEndObservable, IAtEpochStartObservable, IAtBatchPreparedObservable
from spodernet.preprocessing.processors import DictConverter

from spodernet.utils.logger import Logger
log = Logger('batching.py.txt')

benchmark = False


class BatcherState(object):
    def __init__(self):
        self.clear()

    def clear(self):
        self.loss = None
        self.argmax = None
        self.pred = None
        self.batch_size = None
        self.current_idx = None
        self.current_epoch = None
예제 #11
0
from spodernet.utils.util import Timer
from spodernet.utils.util import get_data_path, save_data, make_dirs_if_not_exists, load_data, Timer
from spodernet.interfaces import IAtBatchPreparedObservable
from spodernet.utils.global_config import Config
from past.builtins import basestring, long

import numpy as np
import os
import copy
import spacy
import nltk
import json
import pickle

from spodernet.utils.logger import Logger
log = Logger('processors.py.txt')

nlp = spacy.load('en')
timer = Timer()


class KeyToKeyMapper(IAtBatchPreparedObservable):
    def __init__(self, key2key):
        self.key2key = key2key

    def at_batch_prepared(self, batch_parts):
        str2var = batch_parts
        new_str2var = {}
        for key1, key2 in self.key2key.items():
            new_str2var[key2] = str2var[key1]