# This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. # Download and build the data if it does not exist. import os from parlai.core import build_data from parlai.core.opt import Opt STYLE_LABELED_DATASETS_VERSION = 'v1.1' TASK_FOLDER_NAME = 'style_gen' STYLE_LABELED_DATASETS_RESOURCES = [ build_data.DownloadableFile( f'http://parl.ai/downloads/style_gen/style_labeled_datasets__{STYLE_LABELED_DATASETS_VERSION}.tar.gz', f'style_labeled_datasets__{STYLE_LABELED_DATASETS_VERSION}.tar.gz', '19995a8957cb3e847d1c0ff18e6ce0c231ed711ae19ebaa624012e1782223445', ) ] PERSONALITY_LIST_RESOURCES = [ build_data.DownloadableFile( 'http://parl.ai/downloads/style_gen/personality_list.txt', 'personality_list.txt', 'f527d9315b9d10f8e65021577a7dc4b1777940cea735588485b1c4b5c8c9032a', zipped=False, ) ] def get_style_labeled_data_folder(datapath: str) -> str: return os.path.join(datapath, TASK_FOLDER_NAME, 'labeled_datasets')
# Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. # Download and build the data if it does not exist. import json import os from parlai.core import build_data from parlai.utils.io import PathManager RESOURCES = [ build_data.DownloadableFile( 'http://parl.ai/downloads/blended_skill_talk/blended_skill_talk.tar.gz', 'blended_skill_talk.tar.gz', '5fbed0068ee89e2d43b93c3ecb341e784617033efa5e8e911a219d4eda6134a6', ), build_data.DownloadableFile( 'http://parl.ai/downloads/blended_skill_talk/personas_list.txt', 'persona_list.txt', '59a51adedc78e806a380f16477de3740cefe3494d20f8a2a733841bedaaa3ee5', zipped=False, ), build_data.DownloadableFile( 'http://parl.ai/downloads/blended_skill_talk/topic_to_persona_list.txt', 'topic_to_persona_list.txt', '47cdb6cbee0516ca7400be35fa07761339b86c6c026425bf5dba00e5534e8182', zipped=False, ), build_data.DownloadableFile(
BOT_ADVERSARIAL_DIALOGUE_DATASETS_VERSION = 'v0.2' HUMAN_SAFETY_EVAL_TESTSET_VERSION = 'v0.1' TASK_FOLDER_NAME = 'bot_adversarial_dialogue' BOT_ADVERSARIAL_DIALOGUE_DATASETS_RESOURCES = [ DownloadableFile( f'http://parl.ai/downloads/bot_adversarial_dialogue/dialogue_datasets_{BOT_ADVERSARIAL_DIALOGUE_DATASETS_VERSION}.tar.gz', f'dialogue_datasets_{BOT_ADVERSARIAL_DIALOGUE_DATASETS_VERSION}.tar.gz', '2178b022fac154ddd9b570f6386abc4cd3e7ceb4476f0bebfbce5941424461eb', ) ] HUMAN_SAFETY_EVAL_TESTSET_RESOURCES = [ build_data.DownloadableFile( f'http://parl.ai/downloads/bot_adversarial_dialogue/human_safety_eval_{HUMAN_SAFETY_EVAL_TESTSET_VERSION}.tar.gz', f'human_safety_eval_{HUMAN_SAFETY_EVAL_TESTSET_VERSION}.tar.gz', 'b8b351c3e5eefcd54fdd73cd6a04847cd1eeb9106fc53b92a87e2a4c7537a7b2', ) ] def get_adversarial_dialogue_folder(datapath: str) -> str: return os.path.join(datapath, TASK_FOLDER_NAME, 'dialogue_datasets') def get_human_safety_eval_folder(datapath: str) -> str: return os.path.join(datapath, TASK_FOLDER_NAME, 'human_eval') def build_dialogue_datasets(opt: Opt): dpath = get_adversarial_dialogue_folder(opt['datapath'])
# Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. # Download and build the data if it does not exist. import json import os from parlai.core import build_data RESOURCES = [ build_data.DownloadableFile( 'http://parl.ai/downloads/blended_skill_talk/blended_skill_talk.tar.gz', 'blended_skill_talk.tar.gz', '5fbed0068ee89e2d43b93c3ecb341e784617033efa5e8e911a219d4eda6134a6', ), build_data.DownloadableFile( 'http://parl.ai/downloads/blended_skill_talk/personas_list.txt', 'persona_list.txt', '59a51adedc78e806a380f16477de3740cefe3494d20f8a2a733841bedaaa3ee5', zipped=False, ), build_data.DownloadableFile( 'http://parl.ai/downloads/blended_skill_talk/topic_to_persona_list.txt', 'topic_to_persona_list.txt', '47cdb6cbee0516ca7400be35fa07761339b86c6c026425bf5dba00e5534e8182', zipped=False, ), ]
HUMAN_SAFETY_EVAL_TESTSET_VERSION = 'v0.1' HUMAN_NONADV_SAFETY_EVAL_TESTSET_VERSION = 'v0.1' TASK_FOLDER_NAME = 'bot_adversarial_dialogue' BOT_ADVERSARIAL_DIALOGUE_DATASETS_RESOURCES = [ DownloadableFile( f'http://parl.ai/downloads/bot_adversarial_dialogue/dialogue_datasets_{BOT_ADVERSARIAL_DIALOGUE_DATASETS_VERSION}.tar.gz', f'dialogue_datasets_{BOT_ADVERSARIAL_DIALOGUE_DATASETS_VERSION}.tar.gz', '2178b022fac154ddd9b570f6386abc4cd3e7ceb4476f0bebfbce5941424461eb', ) ] HUMAN_SAFETY_EVAL_TESTSET_RESOURCES = [ build_data.DownloadableFile( f'http://parl.ai/downloads/bot_adversarial_dialogue/human_safety_eval_{HUMAN_SAFETY_EVAL_TESTSET_VERSION}.tar.gz', f'human_safety_eval_{HUMAN_SAFETY_EVAL_TESTSET_VERSION}.tar.gz', 'b8b351c3e5eefcd54fdd73cd6a04847cd1eeb9106fc53b92a87e2a4c7537a7b2', ) ] HUMAN_NONADV_SAFETY_EVAL_TESTSET_RESOURCES = [ build_data.DownloadableFile( f'http://parl.ai/downloads/bot_adversarial_dialogue/human_nonadv_safety_eval_{HUMAN_NONADV_SAFETY_EVAL_TESTSET_VERSION}.tar.gz', f'human_nonadv_safety_eval_{HUMAN_NONADV_SAFETY_EVAL_TESTSET_VERSION}.tar.gz', 'dfa75cd2d101dafe73f94cc8d4be0af2cd0085ea0066c26cecc322b31fd996d6', ) ] def get_adversarial_dialogue_folder(datapath: str) -> str: return os.path.join(datapath, TASK_FOLDER_NAME, 'dialogue_datasets')
#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. import os import parlai.core.build_data as build_data import parlai.utils.logging as logging import parlai.tasks.wizard_of_internet.constants as CONST DATASET_FILE = build_data.DownloadableFile( 'http://parl.ai/downloads/wizard_of_internet/wizard_of_internet.tgz', 'wizard_of_internet.tgz', 'c2495b13ad00015e431d51738e02d37d2e80c8ffd6312f1b3d273dd908a8a12c', ) def build(opt): dpath = os.path.join(opt['datapath'], CONST.DATASET_NAME) version = '1.0' if not build_data.built(dpath, version): logging.info( f'[building data: {dpath}]\nThis may take a while but only heppens once.' ) if build_data.built(dpath): # An older version exists, so remove these outdated files. build_data.remove_dir(dpath) build_data.make_dir(dpath) # Download the data.