# This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. import parlai.core.build_data as build_data import os from parlai.core.build_data import DownloadableFile ROOT_URL = ( "https://raw.githubusercontent.com/xiul-msr/e2e_dialog_challenge/master/data/" ) RESOURCES = [ # raw data files DownloadableFile( f"{ROOT_URL}/movie_all.tsv", "movie_all.tsv", "d2291fd898d8c2d92d7c92affa5601a0561a28f07f6147e9c196c5a573a222d6", zipped=False, ), DownloadableFile( f"{ROOT_URL}/restaurant_all.tsv", "restaurant_all.tsv", "0e297b2ac2e29f9771fed3cd348873b729eb079cc26f8c2333a28247671bdb28", zipped=False, ), DownloadableFile( f"{ROOT_URL}/taxi_all.tsv", "taxi_all.tsv", "6d8ee9719b3d294b558eb53516c897108d1276e9dbcac0101d4e19a2ad801d20", zipped=False, ), ]
#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. # Download and build the data if it does not exist. from parlai.core.build_data import DownloadableFile import parlai.core.build_data as build_data import os RESOURCES = [ DownloadableFile( 'http://parl.ai/downloads/empatheticdialogues/empatheticdialogues.tar.gz', 'empatheticdialogues.tar.gz', '56f234d77b7dd1f005fd365bb17769cfe346c3c84295b69bc069c8ccb83be03d', ) ] def build(opt): dpath = os.path.join(opt['datapath'], 'empatheticdialogues') version = '1.0' if not build_data.built(dpath, version_string=version): print('[building data: ' + dpath + ']') if build_data.built(dpath): # An older version exists, so remove these outdated files. build_data.remove_dir(dpath) build_data.make_dir(dpath)
#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. # # Download and build the data if it does not exist. import parlai.core.build_data as build_data import os from parlai.core.build_data import DownloadableFile RESOURCES = [ DownloadableFile( 'http://parl.ai/downloads/mnist/mnist.tar.gz', 'mnist.tar.gz', 'c4e2f85cdae81ebf3a76d7ac0f0af8c4d91f4d1fb9bc2fd942b669a72b80585d', ) ] def build(opt): dpath = os.path.join(opt['datapath'], 'mnist') version = None if not build_data.built(dpath, version_string=version): print('[building data: ' + dpath + ']') if build_data.built(dpath): # An older version exists, so remove these outdated files. build_data.remove_dir(dpath) build_data.make_dir(dpath)
# Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. # # Download and build the data if it does not exist. import parlai.core.build_data as build_data import os import numpy from parlai.core.build_data import DownloadableFile from parlai.utils.io import PathManager RESOURCES = [ DownloadableFile( 'https://nlp.stanford.edu/projects/nmt/data/wmt14.en-de/train.en', 'train.en', '845ee390042259f7512eabc6458b0fdb30db28d254c83232d97d4161c1fdae51', zipped=False, ), DownloadableFile( 'https://nlp.stanford.edu/projects/nmt/data/wmt14.en-de/train.de', 'train.de', 'a2e292ad1b1f3fec6224dc043460ba6c453932f470109579b8c1ce6d4df65262', zipped=False, ), DownloadableFile( 'https://nlp.stanford.edu/projects/nmt/data/wmt14.en-de/newstest2014.en', 'newstest2014.en', '2db4575449877142aef9187e5e8f58ec10af73a2589ad7a4690208f5234901bb', zipped=False, ), DownloadableFile(
#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. import parlai.core.build_data as build_data import os from parlai.tasks.light_dialog.builder import build_from_db from parlai.core.build_data import DownloadableFile RESOURCES = [ DownloadableFile( 'http://parl.ai/downloads/light/light-dialog-processed-small7.pkl', 'light_data.pkl', '7c83cf49818586db9999ea67a4a6ad087afbd91c26ed629a9f00e21d0b84058f', zipped=False, ), DownloadableFile( 'http://parl.ai/downloads/light/light-unseen-processed2.pkl', 'light_unseen_data.pkl', '489b98d08dd94eaf1ba95439d04200ccc54623ade056839f87a5c4207bc5699c', zipped=False, ), DownloadableFile( 'http://parl.ai/downloads/light/light-environment.pkl', 'light_environment.pkl', '162389202f22063e1c32af7f9261aac13d20fc05598388d1e9748735996ec016', zipped=False, ), ]
#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. # # Download and build the data if it does not exist. import parlai.core.build_data as build_data import os from parlai.core.build_data import DownloadableFile RESOURCES = [ DownloadableFile( 'http://nlp.cs.washington.edu/zeroshot/relation_splits.tar.bz2', 'relation_splits.tar.bz2', 'e33d0e367b6e837370da17a2d09d217e0a92f8d180f7abb3fd543a2d1726b2b4', ) ] def build(opt): dpath = os.path.join(opt['datapath'], 'QA-ZRE') version = None if not build_data.built(dpath, version_string=version): print('[building data: ' + dpath + ']') if build_data.built(dpath): # An older version exists, so remove these outdated files.
#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. # Download and build the data if it does not exist. import parlai.core.build_data as build_data import parlai.tasks.wikimovies.build as wikimovies_build import os from parlai.core.build_data import DownloadableFile RESOURCES = [ DownloadableFile( 'http://parl.ai/downloads/mturkwikimovies/mturkwikimovies.tar.gz', 'mturkwikimovies.tar.gz', '41a85a17e813bfecd975d448f9a08178f65aba32fc10eaa1a48c0bed65431361', ) ] def build(opt): # Depends upon another dataset, wikimovies, build that first. wikimovies_build.build(opt) dpath = os.path.join(opt['datapath'], 'MTurkWikiMovies') version = None if not build_data.built(dpath, version_string=version): print('[building data: ' + dpath + ']') if build_data.built(dpath):
#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. # Download and build the data if it does not exist. import parlai.core.build_data as build_data import os from parlai.core.build_data import DownloadableFile RESOURCES = [ DownloadableFile( 'https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json', 'train-v2.0.json', '68dcfbb971bd3e96d5b46c7177b16c1a4e7d4bdef19fb204502738552dede002', zipped=False, ), DownloadableFile( 'https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json', 'dev-v2.0.json', '80a5225e94905956a6446d296ca1093975c4d3b3260f1d6c8f68bc2ab77182d8', zipped=False, ), ] def build(opt): dpath = os.path.join(opt['datapath'], 'SQuAD2') version = None
#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. # Download and build the data if it does not exist. from parlai.core.build_data import DownloadableFile import parlai.core.build_data as build_data import os RESOURCES = [ DownloadableFile( 'http://parl.ai/downloads/cbt/cbt.tar.gz', 'cbt.tar.gz', '932df0cadc1337b2a12b4c696b1041c1d1c6d4b6bd319874c6288f02e4a61e92', ) ] def build(opt): dpath = os.path.join(opt['datapath'], 'CBT') version = None if not build_data.built(dpath, version_string=version): print('[building data: ' + dpath + ']') if build_data.built(dpath): # An older version exists, so remove these outdated files. build_data.remove_dir(dpath) build_data.make_dir(dpath)
#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. # Download and build the data if it does not exist. import parlai.core.build_data as build_data import os from parlai.core.build_data import DownloadableFile RESOURCES = [ DownloadableFile( 'http://parl.ai/downloads/sensitive_topics_evaluation/data_valid.jsonl', 'data_valid.jsonl', 'df3a71da78bd231402237fded6df530c80f91814f03a2c3e0581be14fe24633d', zipped=False, ) ] def build(opt): version = 'v1.0' dpath = os.path.join(opt['datapath'], 'sensitive_topics_evaluation') if not build_data.built(dpath, version): print('[building data: ' + dpath + ']') if build_data.built(dpath): # An older version exists, so remove these outdated files. build_data.remove_dir(dpath) build_data.make_dir(dpath)
# This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. # Download and build the data if it does not exist. from parlai.core.build_data import DownloadableFile import parlai.core.build_data as build_data import jsonlines as jl import numpy as np import os,csv RESOURCES = [ DownloadableFile( '1FUv2qit9wQ21NV_dbW5HeZVEzng5CMHE', 'train_self_original.txt', '', False, True ), DownloadableFile( '1lnrgxXCc7Y-6Ic_zl7b3tAXonmuGkjI5', 'valid_self_original.txt', '', False, True ) ] def build_fb_format(q,a,task,dpath): if task == 'train': N = np.int(len(a)*0.8)
# Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. import parlai.core.build_data as build_data import os import subprocess import shutil import csv import time from parlai.core.build_data import DownloadableFile RESOURCES = [ DownloadableFile( 'https://github.com/deepmind/narrativeqa/archive/master.zip', 'narrative_qa.zip', 'd9fc92d5f53409f845ba44780e6689676d879c739589861b4805064513d1476b', ) ] def get_rows_for_set(reader, req_set): selected_rows = [row for row in reader if row['set'].strip() == req_set] return selected_rows def read_csv_to_dict_list(filepath): f = open(filepath, 'r') return csv.DictReader(f, delimiter=','), f
#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. import parlai.core.build_data as build_data import os from parlai.core.build_data import DownloadableFile RESOURCES = [ DownloadableFile( 'https://storage.googleapis.com/airdialogue/airdialogue_data.tar.gz', 'airdialogue.tar.gz', '7d2130cdde73a59afd6ad6c463a25453d8ed677c1b3a4a4aaa2406db9c9712cb', ) ] def build(opt): dpath = os.path.join(opt['datapath']) airdialogue_path = os.path.join(dpath, 'airdialogue_data') version = '1.0' if not build_data.built(airdialogue_path, version_string=version): print('[building data: ' + airdialogue_path + ']') if build_data.built(airdialogue_path): build_data.remove_dir(airdialogue_path) # Download the data. for downloadable_file in RESOURCES:
# Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. # # Download and build the data if it does not exist. import parlai.core.build_data as build_data import os from parlai.core.build_data import DownloadableFile RESOURCES = [ DownloadableFile( '1u5zzfENGbRYVo-HsyFXZc3sJ9FgDTNx4', 'raw_train_data.json', '7380e41ca8c65084140af997057eb9e8f974e08a19fdb40de73a9f96e4b5bd6d', from_google=True, zipped=False, ), DownloadableFile( '1nRsAyuVZu7L2f2YcxNbxsT1gZzFnQy-P', 'raw_test_data.json', '3fd2cc672fbae118f3545640fc4c4f45a2e9037c98eebd1e64ad2e0ce5d1fe35', from_google=True, zipped=False, ), ] def build(opt): dpath = os.path.join(opt['datapath'], 'holl_e')
# Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. # Download and build the data if it does not exist. import parlai.core.build_data as build_data import os import json from parlai.core.build_data import DownloadableFile from parlai.utils.io import PathManager RESOURCES = [ DownloadableFile( 'https://s3.amazonaws.com/my89public/quac/train_v0.2.json', 'train_v0.2.json', 'ff5cca5a2e4b4d1cb5b5ced68b9fce88394ef6d93117426d6d4baafbcc05c56a', zipped=False, ), DownloadableFile( 'https://s3.amazonaws.com/my89public/quac/val_v0.2.json', 'val_v0.2.json', '09e622916280ba04c9352acb1bc5bbe80f11a2598f6f34e934c51d9e6570f378', zipped=False, ), ] VERSION = '0.2' SHOULD = '__SHOULD__' MAYBE = '__MAYBE__' SHOULD_NOT = '__SHOULDNOT__'
#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. from parlai.core.build_data import DownloadableFile import parlai.core.build_data as build_data import os from parlai.tasks.personality_captions.download_images import download_images RESOURCES = [ DownloadableFile( 'http://parl.ai/downloads/image_chat/image_chat.tgz', 'image_chat.tgz', 'ad733e181de33f1085166bb7af17fcf228504bd48228ed8cc20c5e7a9fa5d259', ) ] def build(opt): dpath = os.path.join(opt['datapath'], 'image_chat') image_path = os.path.join(opt['datapath'], 'yfcc_images') version = '1.0' if not build_data.built(dpath, version): print('[building data: ' + dpath + ']') if build_data.built(dpath): # An older version exists, so remove these outdated files. build_data.remove_dir(dpath) build_data.make_dir(dpath) # Download the data.
#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. # Download and build the data if it does not exist. import parlai.core.build_data as build_data import os from parlai.core.build_data import DownloadableFile RESOURCES = [ DownloadableFile( 'http://parl.ai/downloads/personachat/personachat.tgz', 'personachat.tgz', '507cf8641d333240654798870ea584d854ab5261071c5e3521c20d8fa41d5622', ) ] def build(opt): version = 'v1.0' dpath = os.path.join(opt['datapath'], 'Persona-Chat') if not build_data.built(dpath, version): print('[building data: ' + dpath + ']') if build_data.built(dpath): # An older version exists, so remove these outdated files. build_data.remove_dir(dpath) build_data.make_dir(dpath)
#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. from parlai.core.build_data import DownloadableFile import parlai.core.build_data as build_data import os RESOURCES = [ DownloadableFile( 'https://dl.dropboxusercontent.com/s/iyz6l7jhbt6jb7q/new_dataset_release.zip', 'FVQA.zip', '66d1831a61d1282fb0c95c01435eda9b465961d507c1e166e4c32b89687c3c26', ) ] def build(opt): dpath = os.path.join(opt['datapath'], 'FVQA') version = None if not build_data.built(dpath, version_string=version): print('[building data: ' + dpath + ']') # An older version exists, so remove these outdated files. if build_data.built(dpath): build_data.remove_dir(dpath) build_data.make_dir(dpath) # Download the data.
#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. # # Download and build the data if it does not exist. from parlai.core.build_data import DownloadableFile import parlai.tasks.dbll_babi.build as dbll_babi_build import parlai.tasks.wikimovies.build as wikimovies_build RESOURCES = [ DownloadableFile( 'http://parl.ai/downloads/dbll/dbll.tgz', 'dbll.tgz', 'd8c727dac498b652c7f5de6f72155dce711ff46c88401a303399d3fad4db1e68', ) ] def build(opt): # Depends upon another dataset, wikimovies, build that first. wikimovies_build.build(opt) dbll_babi_build.build(opt)
# Download and build the data if it does not exist. from parlai.core.build_data import DownloadableFile import parlai.core.build_data as build_data import subprocess from os.path import join as pjoin from os.path import isfile, isdir # pre-computed files RESOURCES = [ # wet.paths.gz is false because the archive format is not recognized # It gets unzipped with subprocess after RESOURCES are downloaded. DownloadableFile( 'https://commoncrawl.s3.amazonaws.com/crawl-data/CC-MAIN-2018-34/wet.paths.gz', 'wet.paths.gz', 'e3a8addc6a33b54b1dd6488a98c875851ef1aca3b80133d39f6897330a8835fb', zipped=False, ), DownloadableFile( 'https://dl.fbaipublicfiles.com/eli5qa/explainlikeimfive_ccrawl_ids.json.gz', 'explainlikeimfive_ccrawl_ids.json.gz', '59cd7b6a8580421aecae66cd33d065073f2abf21d86097b3262bd460a7a14f0d', zipped=False, ), DownloadableFile( 'https://dl.fbaipublicfiles.com/eli5qa/explainlikeimfive_unigram_counts.json', 'explainlikeimfive_unigram_counts.json', '0433a4dda7532ba1dae2f5b6bf70cd5ab91fd2772f75e99b4c15c2e04ba80dfd', zipped=False, ), ]
#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. import parlai.core.build_data as build_data import os from parlai.core.build_data import DownloadableFile RESOURCES = [ DownloadableFile( 'https://storage.googleapis.com/dialog-data-corpus/TASKMASTER-1-2019/self-dialogs.json', 'self-dialogs.json', '1e590ed0ccee279e40c2fb9e083d3b9417477c6bfe35ce5b2277167698dd858d', zipped=False, ), DownloadableFile( 'https://storage.googleapis.com/dialog-data-corpus/TASKMASTER-1-2019/woz-dialogs.json', 'woz-dialogs.json', 'cd3bc4e968487315d412c044d30af2bf0a4b33c3ef8b74c589f1e1fa832bf72f', zipped=False, ), ] def build(opt): # get path to data directory dpath = os.path.join(opt['datapath'], 'taskmaster-1') # define version if any
# Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. import gzip import json import os import tqdm import parlai.core.build_data as build_data from parlai.core.build_data import DownloadableFile RESOURCES = [ DownloadableFile( 'https://msmarco.blob.core.windows.net/msmarco/train_v2.1.json.gz', 'train.gz', 'e91745411ca81e441a3bb75deb71ce000dc2fc31334085b7d499982f14218fe2', zipped=False, ), DownloadableFile( 'https://msmarco.blob.core.windows.net/msmarco/dev_v2.1.json.gz', 'valid.gz', '5b3c9c20d1808ee199a930941b0d96f79e397e9234f77a1496890b138df7cb3c', zipped=False, ), DownloadableFile( 'https://msmarco.blob.core.windows.net/msmarco/eval_v2.1_public.json.gz', 'test.gz', '05ac0e448450d507e7ff8e37f48a41cc2d015f5bd2c7974d2445f00a53625db6', zipped=False, ), ]
# Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. import parlai.core.build_data as build_data import os import gzip import json from parlai.core.build_data import DownloadableFile from parlai.utils.io import PathManager RESOURCES = [ DownloadableFile( 'http://jmcauley.ucsd.edu/data/amazon/qa/qa_Appliances.json.gz', 'qa_Appliances.json.gz', '9c613a5dfedd1071431faa29de903b1b0e592c5ac1c7861c26d8b69dfda8ac78', zipped=False, ), DownloadableFile( 'http://jmcauley.ucsd.edu/data/amazon/qa/qa_Arts_Crafts_and_Sewing.json.gz', 'qa_Arts_Crafts_and_Sewing.json.gz', 'c9aad6d615294571c1be7ea6a88730829a68e701ca7d1168f4d6b5234c37ac65', zipped=False, ), DownloadableFile( 'http://jmcauley.ucsd.edu/data/amazon/qa/qa_Automotive.json.gz', 'qa_Automotive.json.gz', 'ca2da4b9d3afd3e6c915d69b34618bdcf9c6febadd7389f368fb51e9e1585009', zipped=False, ), DownloadableFile(
#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. # Download and build the data if it does not exist. import parlai.core.build_data as build_data import os from parlai.core.build_data import DownloadableFile RESOURCES = [ DownloadableFile( '0BwmD_VLjROrfN0xhTDVteGQ3eG8', 'qadailymail.tar.gz', '77bfe0d91dbc9774991bbce59895743adfc984eafffc328a7b1d34a89e2b5646', from_google=True, ) ] def _process(fname, fout): with open(fname) as f: lines = [line.strip('\n') for line in f] # main article s = '1 ' + lines[2] # add question s = s + ' ' + lines[4] # add answer s = s + '\t' + lines[6] # add candidates (and strip them of the real names)
#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. import os from parlai.core.build_data import DownloadableFile import parlai.core.build_data as build_data RESOURCES = [ DownloadableFile( 'http://parl.ai/downloads/dialogue_safety/single_turn_safety.json', 'single_turn_safety.json', 'f3a46265aa639cfa4b55d2be4dca4be1c596acb5e8f94d7e0041e1a54cedd4cd', zipped=False, ), DownloadableFile( 'http://parl.ai/downloads/dialogue_safety/multi_turn_safety.json', 'multi_turn_safety.json', 'e3e577f456d63d51eb7b5f98ffd251ad695476f186d422fa8de1a177742fa7b6', zipped=False, ), ] def build(datapath): version = 'v1.0' dpath = os.path.join(datapath, 'dialogue_safety') if not build_data.built(dpath, version):
#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. import os from parlai.core.build_data import DownloadableFile from parlai.core import build_data RESOURCES = [ DownloadableFile( 'https://drive.google.com/uc?export=download&id=0B2MvoQfXtqZmMTJqclpBdGN2bmc', 'dialog-bAbI-plus.zip', 'e67dfecbde5e6250833143a6148150a313204237b765d39e7b8ebc111cb3204e', ) ] def build(opt): dpath = os.path.join(opt['datapath'], 'dialog-bAbI-plus') version = None if not build_data.built(dpath, version_string=version): print('[building data: ' + dpath + ']') if build_data.built(dpath): # An older version exists, so remove these outdated files. build_data.remove_dir(dpath) build_data.make_dir(dpath) # Download the data.
# Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. # Download and build the data if it does not exist. from parlai.core.build_data import DownloadableFile from parlai.utils.io import PathManager import parlai.core.build_data as build_data import codecs import os RESOURCES = [ DownloadableFile( 'http://parl.ai/downloads/cornell_movie/cornell_movie_dialogs_corpus.tgz', 'cornell_movie_dialogs_corpus.tgz', 'ae77ab2e4743ce929087a4f529934059b920c4bdaa3143741b65b1e648ab45fd', ) ] def create_fb_format(lines_file, convo_file, outpath): print('[building fbformat]') with PathManager.open(os.path.join(outpath, 'train.txt'), 'w') as ftrain, PathManager.open( os.path.join(outpath, 'valid.txt'), 'w') as fvalid, PathManager.open( os.path.join(outpath, 'test.txt'), 'w') as ftest: lines = {}
from parlai.core.build_data import DownloadableFile from parlai.utils.io import PathManager import parlai.core.build_data as build_data import os import json VERSION = '1' TRAIN_FILENAME = 'hotpot_train_v{}.1.json'.format(VERSION) DEV_DISTRACTOR_FILENAME = 'hotpot_dev_distractor_v{}.json'.format(VERSION) DEV_FULLWIKI_FILENAME = 'hotpot_dev_fullwiki_v{}.json'.format(VERSION) RESOURCES = [ DownloadableFile( 'http://curtis.ml.cmu.edu/datasets/hotpot/hotpot_train_v1.1.json', 'hotpot_train_v1.1.json', '26650cf50234ef5fb2e664ed70bbecdfd87815e6bffc257e068efea5cf7cd316', zipped=False, ), DownloadableFile( 'http://curtis.ml.cmu.edu/datasets/hotpot/hotpot_dev_distractor_v1.json', 'hotpot_dev_distractor_v1.json', '4e9ecb5c8d3b719f624d66b60f8d56bf227f03914f5f0753d6fa1b359d7104ea', zipped=False, ), DownloadableFile( 'http://curtis.ml.cmu.edu/datasets/hotpot/hotpot_dev_fullwiki_v1.json', 'hotpot_dev_fullwiki_v1.json', '2f1f3e594a3066a3084cc57950ca2713c24712adaad03af6ccce18d1846d5618', zipped=False, ), ]
#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. # Download and build the data if it does not exist. import parlai.core.build_data as build_data import os from parlai.core.build_data import DownloadableFile RESOURCES = [ DownloadableFile( 'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Questions_Train_mscoco.zip', 'v2_Questions_Train_mscoco.zip', '05a64b6e2582d06d7585f5429674a9a33851878be1bff9f8668cdcf792df611e', ), DownloadableFile( 'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Questions_Val_mscoco.zip', 'v2_Questions_Val_mscoco.zip', 'e71f6c5c3e97a51d050f28243e262b28cd0c48d11a6b4632d769d30d3f93222a', ), DownloadableFile( 'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Questions_Test_mscoco.zip', 'v2_Questions_Test_mscoco.zip', '982e2e687a86514b78ea83af356d151976c5e3fb4168a29ca543610574082ad7', ), DownloadableFile( 'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Annotations_Val_mscoco.zip', 'v2_Annotations_Val_mscoco.zip', '0caae7c8d1dafd852727f5ac046bc1efca9b72026bd6ffa34fc489f3a7b3291e',
import os import parlai.core.build_data as build_data from parlai.core.build_data import DownloadableFile from parlai.core.opt import Opt BOT_ADVERSARIAL_DIALOGUE_DATASETS_VERSION = 'v0.2' HUMAN_SAFETY_EVAL_TESTSET_VERSION = 'v0.1' TASK_FOLDER_NAME = 'bot_adversarial_dialogue' BOT_ADVERSARIAL_DIALOGUE_DATASETS_RESOURCES = [ DownloadableFile( f'http://parl.ai/downloads/bot_adversarial_dialogue/dialogue_datasets_{BOT_ADVERSARIAL_DIALOGUE_DATASETS_VERSION}.tar.gz', f'dialogue_datasets_{BOT_ADVERSARIAL_DIALOGUE_DATASETS_VERSION}.tar.gz', '2178b022fac154ddd9b570f6386abc4cd3e7ceb4476f0bebfbce5941424461eb', ) ] HUMAN_SAFETY_EVAL_TESTSET_RESOURCES = [ build_data.DownloadableFile( f'http://parl.ai/downloads/bot_adversarial_dialogue/human_safety_eval_{HUMAN_SAFETY_EVAL_TESTSET_VERSION}.tar.gz', f'human_safety_eval_{HUMAN_SAFETY_EVAL_TESTSET_VERSION}.tar.gz', 'b8b351c3e5eefcd54fdd73cd6a04847cd1eeb9106fc53b92a87e2a4c7537a7b2', ) ] def get_adversarial_dialogue_folder(datapath: str) -> str: return os.path.join(datapath, TASK_FOLDER_NAME, 'dialogue_datasets')