Esempio n. 1
0
def build(opt):
    # get path to data directory
    dpath = os.path.join(opt['datapath'], 'probing', 'wnli')

    # check if data had been previously built
    if not build_data.built(dpath):
        print('[building data: ' + dpath + ']')

        build_data.make_dir(dpath)

        # Download data
        fname = 'wnli_orig.zip'
        url = 'https://firebasestorage.googleapis.com/' \
              'v0/b/mtl-sentence-representations.appspot.com/' \
              'o/data%2FWNLI.zip?alt=media&token=068ad0a0-ded7-4bd7-99a5-5e00222e0faf'
        build_data.download(url, dpath, fname)

        build_data.unzip(dpath, fname)

        orig_dpath = os.path.join(dpath, 'wnli_orig')
        os.rename(os.path.join(dpath, 'WNLI'), orig_dpath)

        # Process the data
        create_probing_format(Path(orig_dpath))

        # mark the data as built
        build_data.mark_done(dpath)
def build(opt):
    task_path = os.path.join(opt['datapath'], TASK_DIR)
    xml_dir_path = os.path.join(task_path, XML_DIR)
    json_dir_path = os.path.join(task_path, JSON_DIR)
    txt_dir_path = os.path.join(task_path, TXT_DIR)
    txt_file_path = os.path.join(txt_dir_path, TXT_FILE)
    version = None
    if not build_data.built(task_path, version_string=version):
        print('[building data: ' + task_path + ']')
        # make a clean directory if needed
        if build_data.built(task_path):
            # an older version exists, so remove these outdated files.
            build_data.remove_dir(task_path)
        build_data.make_dir(task_path)
        build_data.make_dir(xml_dir_path)
        build_data.make_dir(json_dir_path)
        build_data.make_dir(txt_dir_path)
        # download data
        fname = "AllPublicXML.zip"
        url = "https://clinicaltrials.gov/" + fname
        build_data.download(url, xml_dir_path, fname, True)
        # build and process data
        build_data.unzip(xml_dir_path, fname)
        xml2json(xml_dir_path, json_dir_path)
        json2txt(json_dir_path, txt_file_path)
        # mark the data as built
        build_data.mark_done(task_path, version_string=version)
Esempio n. 3
0
def build(opt):
    # get path to data directory
    dpath = os.path.join(opt['datapath'], 'dialogue_nli')
    # define version if any
    version = '1.0'

    # check if data had been previously built
    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')

        # make a clean directory if needed
        if build_data.built(dpath):
            # an older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # download the data.
        fname = 'dialogue_nli.zip'
        gd_id = '1WtbXCv3vPB5ql6w0FVDmAEMmWadbrCuG'
        build_data.download_from_google_drive(gd_id,
                                              os.path.join(dpath, fname))

        # uncompress it
        build_data.unzip(dpath, fname)

        # mark the data as built
        build_data.mark_done(dpath, version_string=version)
Esempio n. 4
0
def build(opt):
    dpath = os.path.join(opt['datapath'], 'multiwoz')
    version = '1.0'

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        fname = 'MULTIWOZ2.1.zip'
        url = 'https://www.repository.cam.ac.uk/bitstream/handle/1810/294507/MULTIWOZ2.1.zip?sequence=1&isAllowed=y'
        build_data.download(url, dpath, fname)
        build_data.unzip(dpath, fname)
        build_data.mark_done(dpath, version_string=version)
Esempio n. 5
0
def build(opt):
    # get path to data directory
    dpath = os.path.join(opt['datapath'], 'probing', 'dialoguenli')

    # check if data had been previously built
    if not build_data.built(dpath):
        print('[building data: ' + dpath + ']')

        build_data.make_dir(dpath)

        fname = 'dnli.zip'
        url = 'https://www.dropbox.com/s/h65c5i8o7q9d2kk/dnli.zip?dl=1'
        build_data.download(url, dpath, fname)
        build_data.unzip(dpath, fname)

        orig_dpath = os.path.join(dpath, 'dialoguenli_orig')
        os.rename(os.path.join(dpath, 'dnli'), orig_dpath)

        create_probing_format(Path(orig_dpath))

        # mark the data as built
        build_data.mark_done(dpath)
Esempio n. 6
0
def build(opt):
    # get path to data directory
    dpath = os.path.join(opt['datapath'], 'probing', 'scenariosa')

    # check if data had been previously built
    if not build_data.built(dpath):
        print('[building data: ' + dpath + ']')

        build_data.make_dir(dpath)

        fname = 'ScenarioSA.zip'
        url = 'https://www.dropbox.com/s/h54k6oamrn4gizg/ScenarioSA.zip?dl=1'
        build_data.download(url, dpath, fname)
        build_data.unzip(dpath, fname)

        orig_dpath = os.path.join(dpath, 'scenariosa_orig')
        os.rename(os.path.join(dpath, 'ScenarioSA'), orig_dpath)

        create_probing_format(Path(orig_dpath))

        # mark the data as built
        build_data.mark_done(dpath)
Esempio n. 7
0
def build(opt):
    # get path to data directory
    dpath = os.path.join(opt['datapath'], 'probing', 'multiwoz')

    # check if data had been previously built
    if not build_data.built(dpath):
        print('[building data: ' + dpath + ']')

        build_data.make_dir(dpath)

        # Download the data
        fname = 'multiwoz_orig.zip'
        url = 'https://github.com/budzianowski/multiwoz/raw/master/data/MultiWOZ_2.1.zip'
        build_data.download(url, dpath, fname)
        build_data.unzip(dpath, fname)

        orig_dpath = os.path.join(dpath, 'multiwoz_orig')
        os.rename(os.path.join(dpath, 'MultiWOZ_2.1'), orig_dpath)

        create_probing_format(Path(orig_dpath))

        # mark the data as built
        build_data.mark_done(dpath)