Esempio n. 1
0
def load_set(fname, emb, cache_dir=None):
    save_cache = False
    if cache_dir:
        fname_abs = os.path.abspath(fname)
        from hashlib import md5
        cache_filename = "%s/%s.p" % (cache_dir, md5(
            fname_abs.encode("utf-8")).hexdigest())

        try:
            with open(cache_filename, "rb") as f:
                return pickle.load(f)
        except (IOError, TypeError, KeyError):
            save_cache = True

    s0, s1, y, _, _, _ = loader.load_anssel(fname)
    e0, e1, s0, s1, y = loader.load_embedded(emb,
                                             s0,
                                             s1,
                                             y,
                                             balance=True,
                                             ndim=1)

    if save_cache:
        with open(cache_filename, "wb") as f:
            pickle.dump((e0, e1, y), f)
    return (e0, e1, y)
Esempio n. 2
0
def load_set(glove, fname, balance=False, subsample0=3):
    s0, s1, labels, toklabels = loader.load_anssel(fname,
                                                   subsample0=subsample0)
    print('(%s) Loaded dataset: %d' % (fname, len(s0)))
    e0, e1, s0, s1, labels = loader.load_embedded(glove,
                                                  s0,
                                                  s1,
                                                  labels,
                                                  balance=balance)
    return ([e0, e1], labels)
Esempio n. 3
0
def load_set(fname, emb, cache_dir=None):
    save_cache = False
    if cache_dir:
        fname_abs = os.path.abspath(fname)
        from hashlib import md5
        cache_filename = "%s/%s.p" % (cache_dir, md5(fname_abs.encode("utf-8")).hexdigest())

        try:
            with open(cache_filename, "rb") as f:
                return pickle.load(f)
        except (IOError, TypeError, KeyError):
            save_cache=True

    s0, s1, y, t = loader.load_anssel(fname)
    e0, e1, s0, s1, y = loader.load_embedded(emb, s0, s1, y, balance=True, ndim=1)

    if save_cache:
        with open(cache_filename, "wb") as f:
            pickle.dump((e0, e1, y), f)
    return (e0, e1, y)
Esempio n. 4
0
def load_set(glove, globmask, loadfun=loader.load_sts):
    s0, s1, labels = loader.concat_datasets([loadfun(d) for d in glob.glob(globmask)])
    print('(%s) Loaded dataset: %d' % (globmask, len(s0)))
    e0, e1, s0, s1, labels = loader.load_embedded(glove, s0, s1, labels)
    return ([e0, e1], labels)
Esempio n. 5
0
def load_set(glove, globmask, loadfun=loader.load_sts):
    s0, s1, labels = loader.concat_datasets(
        [loadfun(d) for d in glob.glob(globmask)])
    print('(%s) Loaded dataset: %d' % (globmask, len(s0)))
    e0, e1, s0, s1, labels = loader.load_embedded(glove, s0, s1, labels)
    return ([e0, e1], labels)
Esempio n. 6
0
def load_set(glove, fname, balance=False, subsample0=3):
    s0, s1, labels = loader.load_anssel(fname, subsample0=subsample0)
    print('(%s) Loaded dataset: %d' % (fname, len(s0)))
    e0, e1, s0, s1, labels = loader.load_embedded(glove, s0, s1, labels, balance=balance)
    return ([e0, e1], labels)