def build(opt): # get path to data directory dpath = os.path.join(opt['datapath'], 'probing', 'wnli') # check if data had been previously built if not build_data.built(dpath): print('[building data: ' + dpath + ']') build_data.make_dir(dpath) # Download data fname = 'wnli_orig.zip' url = 'https://firebasestorage.googleapis.com/' \ 'v0/b/mtl-sentence-representations.appspot.com/' \ 'o/data%2FWNLI.zip?alt=media&token=068ad0a0-ded7-4bd7-99a5-5e00222e0faf' build_data.download(url, dpath, fname) build_data.unzip(dpath, fname) orig_dpath = os.path.join(dpath, 'wnli_orig') os.rename(os.path.join(dpath, 'WNLI'), orig_dpath) # Process the data create_probing_format(Path(orig_dpath)) # mark the data as built build_data.mark_done(dpath)
def build(opt): task_path = os.path.join(opt['datapath'], TASK_DIR) xml_dir_path = os.path.join(task_path, XML_DIR) json_dir_path = os.path.join(task_path, JSON_DIR) txt_dir_path = os.path.join(task_path, TXT_DIR) txt_file_path = os.path.join(txt_dir_path, TXT_FILE) version = None if not build_data.built(task_path, version_string=version): print('[building data: ' + task_path + ']') # make a clean directory if needed if build_data.built(task_path): # an older version exists, so remove these outdated files. build_data.remove_dir(task_path) build_data.make_dir(task_path) build_data.make_dir(xml_dir_path) build_data.make_dir(json_dir_path) build_data.make_dir(txt_dir_path) # download data fname = "AllPublicXML.zip" url = "https://clinicaltrials.gov/" + fname build_data.download(url, xml_dir_path, fname, True) # build and process data build_data.unzip(xml_dir_path, fname) xml2json(xml_dir_path, json_dir_path) json2txt(json_dir_path, txt_file_path) # mark the data as built build_data.mark_done(task_path, version_string=version)
def build(opt): # get path to data directory dpath = os.path.join(opt['datapath'], 'dialogue_nli') # define version if any version = '1.0' # check if data had been previously built if not build_data.built(dpath, version_string=version): print('[building data: ' + dpath + ']') # make a clean directory if needed if build_data.built(dpath): # an older version exists, so remove these outdated files. build_data.remove_dir(dpath) build_data.make_dir(dpath) # download the data. fname = 'dialogue_nli.zip' gd_id = '1WtbXCv3vPB5ql6w0FVDmAEMmWadbrCuG' build_data.download_from_google_drive(gd_id, os.path.join(dpath, fname)) # uncompress it build_data.unzip(dpath, fname) # mark the data as built build_data.mark_done(dpath, version_string=version)
def build(opt): dpath = os.path.join(opt['datapath'], 'multiwoz') version = '1.0' if not build_data.built(dpath, version_string=version): print('[building data: ' + dpath + ']') if build_data.built(dpath): build_data.remove_dir(dpath) build_data.make_dir(dpath) fname = 'MULTIWOZ2.1.zip' url = 'https://www.repository.cam.ac.uk/bitstream/handle/1810/294507/MULTIWOZ2.1.zip?sequence=1&isAllowed=y' build_data.download(url, dpath, fname) build_data.unzip(dpath, fname) build_data.mark_done(dpath, version_string=version)
def build(opt): # get path to data directory dpath = os.path.join(opt['datapath'], 'probing', 'dialoguenli') # check if data had been previously built if not build_data.built(dpath): print('[building data: ' + dpath + ']') build_data.make_dir(dpath) fname = 'dnli.zip' url = 'https://www.dropbox.com/s/h65c5i8o7q9d2kk/dnli.zip?dl=1' build_data.download(url, dpath, fname) build_data.unzip(dpath, fname) orig_dpath = os.path.join(dpath, 'dialoguenli_orig') os.rename(os.path.join(dpath, 'dnli'), orig_dpath) create_probing_format(Path(orig_dpath)) # mark the data as built build_data.mark_done(dpath)
def build(opt): # get path to data directory dpath = os.path.join(opt['datapath'], 'probing', 'scenariosa') # check if data had been previously built if not build_data.built(dpath): print('[building data: ' + dpath + ']') build_data.make_dir(dpath) fname = 'ScenarioSA.zip' url = 'https://www.dropbox.com/s/h54k6oamrn4gizg/ScenarioSA.zip?dl=1' build_data.download(url, dpath, fname) build_data.unzip(dpath, fname) orig_dpath = os.path.join(dpath, 'scenariosa_orig') os.rename(os.path.join(dpath, 'ScenarioSA'), orig_dpath) create_probing_format(Path(orig_dpath)) # mark the data as built build_data.mark_done(dpath)
def build(opt): # get path to data directory dpath = os.path.join(opt['datapath'], 'probing', 'multiwoz') # check if data had been previously built if not build_data.built(dpath): print('[building data: ' + dpath + ']') build_data.make_dir(dpath) # Download the data fname = 'multiwoz_orig.zip' url = 'https://github.com/budzianowski/multiwoz/raw/master/data/MultiWOZ_2.1.zip' build_data.download(url, dpath, fname) build_data.unzip(dpath, fname) orig_dpath = os.path.join(dpath, 'multiwoz_orig') os.rename(os.path.join(dpath, 'MultiWOZ_2.1'), orig_dpath) create_probing_format(Path(orig_dpath)) # mark the data as built build_data.mark_done(dpath)