Example #1
0
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# Download and build the data if it does not exist.

import os

from parlai.core import build_data
from parlai.core.opt import Opt

STYLE_LABELED_DATASETS_VERSION = 'v1.1'

TASK_FOLDER_NAME = 'style_gen'
STYLE_LABELED_DATASETS_RESOURCES = [
    build_data.DownloadableFile(
        f'http://parl.ai/downloads/style_gen/style_labeled_datasets__{STYLE_LABELED_DATASETS_VERSION}.tar.gz',
        f'style_labeled_datasets__{STYLE_LABELED_DATASETS_VERSION}.tar.gz',
        '19995a8957cb3e847d1c0ff18e6ce0c231ed711ae19ebaa624012e1782223445',
    )
]
PERSONALITY_LIST_RESOURCES = [
    build_data.DownloadableFile(
        'http://parl.ai/downloads/style_gen/personality_list.txt',
        'personality_list.txt',
        'f527d9315b9d10f8e65021577a7dc4b1777940cea735588485b1c4b5c8c9032a',
        zipped=False,
    )
]


def get_style_labeled_data_folder(datapath: str) -> str:
    return os.path.join(datapath, TASK_FOLDER_NAME, 'labeled_datasets')
Example #2
0
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# Download and build the data if it does not exist.

import json
import os

from parlai.core import build_data
from parlai.utils.io import PathManager


RESOURCES = [
    build_data.DownloadableFile(
        'http://parl.ai/downloads/blended_skill_talk/blended_skill_talk.tar.gz',
        'blended_skill_talk.tar.gz',
        '5fbed0068ee89e2d43b93c3ecb341e784617033efa5e8e911a219d4eda6134a6',
    ),
    build_data.DownloadableFile(
        'http://parl.ai/downloads/blended_skill_talk/personas_list.txt',
        'persona_list.txt',
        '59a51adedc78e806a380f16477de3740cefe3494d20f8a2a733841bedaaa3ee5',
        zipped=False,
    ),
    build_data.DownloadableFile(
        'http://parl.ai/downloads/blended_skill_talk/topic_to_persona_list.txt',
        'topic_to_persona_list.txt',
        '47cdb6cbee0516ca7400be35fa07761339b86c6c026425bf5dba00e5534e8182',
        zipped=False,
    ),
    build_data.DownloadableFile(
Example #3
0
BOT_ADVERSARIAL_DIALOGUE_DATASETS_VERSION = 'v0.2'
HUMAN_SAFETY_EVAL_TESTSET_VERSION = 'v0.1'

TASK_FOLDER_NAME = 'bot_adversarial_dialogue'

BOT_ADVERSARIAL_DIALOGUE_DATASETS_RESOURCES = [
    DownloadableFile(
        f'http://parl.ai/downloads/bot_adversarial_dialogue/dialogue_datasets_{BOT_ADVERSARIAL_DIALOGUE_DATASETS_VERSION}.tar.gz',
        f'dialogue_datasets_{BOT_ADVERSARIAL_DIALOGUE_DATASETS_VERSION}.tar.gz',
        '2178b022fac154ddd9b570f6386abc4cd3e7ceb4476f0bebfbce5941424461eb',
    )
]
HUMAN_SAFETY_EVAL_TESTSET_RESOURCES = [
    build_data.DownloadableFile(
        f'http://parl.ai/downloads/bot_adversarial_dialogue/human_safety_eval_{HUMAN_SAFETY_EVAL_TESTSET_VERSION}.tar.gz',
        f'human_safety_eval_{HUMAN_SAFETY_EVAL_TESTSET_VERSION}.tar.gz',
        'b8b351c3e5eefcd54fdd73cd6a04847cd1eeb9106fc53b92a87e2a4c7537a7b2',
    )
]


def get_adversarial_dialogue_folder(datapath: str) -> str:
    return os.path.join(datapath, TASK_FOLDER_NAME, 'dialogue_datasets')


def get_human_safety_eval_folder(datapath: str) -> str:
    return os.path.join(datapath, TASK_FOLDER_NAME, 'human_eval')


def build_dialogue_datasets(opt: Opt):
    dpath = get_adversarial_dialogue_folder(opt['datapath'])
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# Download and build the data if it does not exist.

import json
import os

from parlai.core import build_data


RESOURCES = [
    build_data.DownloadableFile(
        'http://parl.ai/downloads/blended_skill_talk/blended_skill_talk.tar.gz',
        'blended_skill_talk.tar.gz',
        '5fbed0068ee89e2d43b93c3ecb341e784617033efa5e8e911a219d4eda6134a6',
    ),
    build_data.DownloadableFile(
        'http://parl.ai/downloads/blended_skill_talk/personas_list.txt',
        'persona_list.txt',
        '59a51adedc78e806a380f16477de3740cefe3494d20f8a2a733841bedaaa3ee5',
        zipped=False,
    ),
    build_data.DownloadableFile(
        'http://parl.ai/downloads/blended_skill_talk/topic_to_persona_list.txt',
        'topic_to_persona_list.txt',
        '47cdb6cbee0516ca7400be35fa07761339b86c6c026425bf5dba00e5534e8182',
        zipped=False,
    ),
]
Example #5
0
HUMAN_SAFETY_EVAL_TESTSET_VERSION = 'v0.1'
HUMAN_NONADV_SAFETY_EVAL_TESTSET_VERSION = 'v0.1'

TASK_FOLDER_NAME = 'bot_adversarial_dialogue'

BOT_ADVERSARIAL_DIALOGUE_DATASETS_RESOURCES = [
    DownloadableFile(
        f'http://parl.ai/downloads/bot_adversarial_dialogue/dialogue_datasets_{BOT_ADVERSARIAL_DIALOGUE_DATASETS_VERSION}.tar.gz',
        f'dialogue_datasets_{BOT_ADVERSARIAL_DIALOGUE_DATASETS_VERSION}.tar.gz',
        '2178b022fac154ddd9b570f6386abc4cd3e7ceb4476f0bebfbce5941424461eb',
    )
]
HUMAN_SAFETY_EVAL_TESTSET_RESOURCES = [
    build_data.DownloadableFile(
        f'http://parl.ai/downloads/bot_adversarial_dialogue/human_safety_eval_{HUMAN_SAFETY_EVAL_TESTSET_VERSION}.tar.gz',
        f'human_safety_eval_{HUMAN_SAFETY_EVAL_TESTSET_VERSION}.tar.gz',
        'b8b351c3e5eefcd54fdd73cd6a04847cd1eeb9106fc53b92a87e2a4c7537a7b2',
    )
]
HUMAN_NONADV_SAFETY_EVAL_TESTSET_RESOURCES = [
    build_data.DownloadableFile(
        f'http://parl.ai/downloads/bot_adversarial_dialogue/human_nonadv_safety_eval_{HUMAN_NONADV_SAFETY_EVAL_TESTSET_VERSION}.tar.gz',
        f'human_nonadv_safety_eval_{HUMAN_NONADV_SAFETY_EVAL_TESTSET_VERSION}.tar.gz',
        'dfa75cd2d101dafe73f94cc8d4be0af2cd0085ea0066c26cecc322b31fd996d6',
    )
]


def get_adversarial_dialogue_folder(datapath: str) -> str:
    return os.path.join(datapath, TASK_FOLDER_NAME, 'dialogue_datasets')
Example #6
0
#!/usr/bin/env python3

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import os
import parlai.core.build_data as build_data
import parlai.utils.logging as logging

import parlai.tasks.wizard_of_internet.constants as CONST

DATASET_FILE = build_data.DownloadableFile(
    'http://parl.ai/downloads/wizard_of_internet/wizard_of_internet.tgz',
    'wizard_of_internet.tgz',
    'c2495b13ad00015e431d51738e02d37d2e80c8ffd6312f1b3d273dd908a8a12c',
)


def build(opt):
    dpath = os.path.join(opt['datapath'], CONST.DATASET_NAME)
    version = '1.0'
    if not build_data.built(dpath, version):
        logging.info(
            f'[building data: {dpath}]\nThis may take a while but only heppens once.'
        )
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.