def load_set(fname, emb, cache_dir=None): save_cache = False if cache_dir: fname_abs = os.path.abspath(fname) from hashlib import md5 cache_filename = "%s/%s.p" % (cache_dir, md5( fname_abs.encode("utf-8")).hexdigest()) try: with open(cache_filename, "rb") as f: return pickle.load(f) except (IOError, TypeError, KeyError): save_cache = True s0, s1, y, _, _, _ = loader.load_anssel(fname) e0, e1, s0, s1, y = loader.load_embedded(emb, s0, s1, y, balance=True, ndim=1) if save_cache: with open(cache_filename, "wb") as f: pickle.dump((e0, e1, y), f) return (e0, e1, y)
def load_set(glove, fname, balance=False, subsample0=3): s0, s1, labels, toklabels = loader.load_anssel(fname, subsample0=subsample0) print('(%s) Loaded dataset: %d' % (fname, len(s0))) e0, e1, s0, s1, labels = loader.load_embedded(glove, s0, s1, labels, balance=balance) return ([e0, e1], labels)
def load_set(fname, emb, cache_dir=None): save_cache = False if cache_dir: fname_abs = os.path.abspath(fname) from hashlib import md5 cache_filename = "%s/%s.p" % (cache_dir, md5(fname_abs.encode("utf-8")).hexdigest()) try: with open(cache_filename, "rb") as f: return pickle.load(f) except (IOError, TypeError, KeyError): save_cache=True s0, s1, y, t = loader.load_anssel(fname) e0, e1, s0, s1, y = loader.load_embedded(emb, s0, s1, y, balance=True, ndim=1) if save_cache: with open(cache_filename, "wb") as f: pickle.dump((e0, e1, y), f) return (e0, e1, y)
def load_set(glove, globmask, loadfun=loader.load_sts): s0, s1, labels = loader.concat_datasets([loadfun(d) for d in glob.glob(globmask)]) print('(%s) Loaded dataset: %d' % (globmask, len(s0))) e0, e1, s0, s1, labels = loader.load_embedded(glove, s0, s1, labels) return ([e0, e1], labels)
def load_set(glove, globmask, loadfun=loader.load_sts): s0, s1, labels = loader.concat_datasets( [loadfun(d) for d in glob.glob(globmask)]) print('(%s) Loaded dataset: %d' % (globmask, len(s0))) e0, e1, s0, s1, labels = loader.load_embedded(glove, s0, s1, labels) return ([e0, e1], labels)
def load_set(glove, fname, balance=False, subsample0=3): s0, s1, labels = loader.load_anssel(fname, subsample0=subsample0) print('(%s) Loaded dataset: %d' % (fname, len(s0))) e0, e1, s0, s1, labels = loader.load_embedded(glove, s0, s1, labels, balance=balance) return ([e0, e1], labels)