def ctx2vec(gpu, skip_pip): os.makedirs("systems", exist_ok=True) with local.cwd("systems"): git("clone", "https://github.com/orenmel/context2vec.git") if not skip_pip: subdir = "gpu" if gpu else "nogpu" for fn in ["Pipfile", "Pipfile.lock"]: copyfile( "support/context2vec/{}/{}".format(subdir, fn), "systems/context2vec/{}".format(fn), ) for fn in ["test.py", "train.py"]: copyfile( "support/context2vec/{}".format(fn), "systems/context2vec/context2vec/eval/wsd/{}".format(fn), ) with local.cwd("systems/context2vec"), local.env( PIPENV_IGNORE_VIRTUALENVS="1"): if not skip_pip: pipenv("install") tmp_zipped_model_fn = urlretrieve( "https://archive.org/download/ctx2vec-b100-3epoch/ctx2vec-b100-3epoch.zip" ) try: tmp_zip = ZipFile(tmp_zipped_model_fn) tmp_zip.extractall(".") finally: os.remove(tmp_zipped_model_fn)
def _bootstrap(self, res): logger.info("Downloading Elmo word vectors") zipped_tmp_fn = urlretrieve(self.URL) try: tmp_zip = zipfile.ZipFile(zipped_tmp_fn) tmp_zip.extractall(self.get_res("")) finally: os.remove(zipped_tmp_fn)
def _download(self, lang, url, dest): logger.info("Downloading {} word vectors".format(lang)) tmp_fn = urlretrieve(url) try: logger.info("Converting {} word vectors".format(lang)) fi = load_word2vec_format(tmp_fn) fi.save(dest) finally: os.remove(tmp_fn)
def _bootstrap(self, res): logger.info("Downloading Word2Vec word vectors") zipped_tmp_fn = urlretrieve(self.URL) try: tmp_zip = zipfile.ZipFile(zipped_tmp_fn) tmp_fn = get_tmpfile("word2vec-fi.txt") try: copyfileobj(tmp_zip.open("model.txt"), open(tmp_fn, "wb")) logger.info("Converting Word2Vec word vectors") fi = load_word2vec_format(tmp_fn, unicode_errors="replace") fi.save(self._get_res_path("vecs")) finally: os.remove(tmp_fn) finally: os.remove(zipped_tmp_fn)
def _bootstrap(self, res): logger.info("Downloading FiWN ConceptNet word vectors") zipped_tmp_fn = urlretrieve(self.URL) try: tmp_zip = zipfile.ZipFile(zipped_tmp_fn) tmp_fn = get_tmpfile("fiwn-conceptnet.txt") try: copyfileobj(tmp_zip.open("outputVectors.txt"), open(tmp_fn, "wb")) logger.info("Converting FiWN ConceptNet word vectors") fi = load_word2vec_format(tmp_fn) fi.save(self._get_res_path("vecs")) finally: os.remove(tmp_fn) copyfileobj(tmp_zip.open("synsets.txt"), open(self._get_res_path("synsets"), "wb")) finally: os.remove(zipped_tmp_fn)
def _bootstrap(self, _res): logger.info("Downloading word vectors") gzipped_glove_tmp_fn = urlretrieve(self.URL) try: glove_tmp_fn = get_tmpfile("glove.txt") try: copyfileobj(gzip.open(gzipped_glove_tmp_fn), open(glove_tmp_fn, "wb")) logger.info("Converting word vectors") fi = load_word2vec_format(glove_tmp_fn) fi.save(self._get_res_path("vecs")) finally: try: os.remove(glove_tmp_fn) except OSError: pass finally: try: os.remove(gzipped_glove_tmp_fn) except OSError: pass
from finntk.utils import urlretrieve from conceptnet5.readers.wiktionary import prepare_db URL = "https://conceptnet.s3.amazonaws.com/precomputed-data/2016/wiktionary/parsed-2/en.jsons.gz" en_wiktionary_gz = urlretrieve(URL, filename="en.jsons.gz") prepare_db([en_wiktionary_gz], "wiktionary.db")