Ejemplo n.º 1
0
    def __init__(self):
        self.nlp = spacy.load('en_core_web_sm')
        self.db = MultiWozDB(cfg.dbs)
        data_path = 'data/multi-woz/annotated_user_da_with_span_full.json'
        archive = zipfile.ZipFile(data_path + '.zip', 'r')
        self.convlab_data = json.loads(archive.open(data_path.split('/')[-1], 'r').read().lower())
        self.delex_sg_valdict_path = 'data/multi-woz-processed/delex_single_valdict.json'
        self.delex_mt_valdict_path = 'data/multi-woz-processed/delex_multi_valdict.json'
        self.ambiguous_val_path = 'data/multi-woz-processed/ambiguous_values.json'
        self.delex_refs_path = 'data/multi-woz-processed/reference_no.json'
        self.delex_refs = json.loads(open(self.delex_refs_path, 'r').read())
        if not os.path.exists(self.delex_sg_valdict_path):
            self.delex_sg_valdict, self.delex_mt_valdict, self.ambiguous_vals = self.get_delex_valdict()
        else:
            self.delex_sg_valdict = json.loads(open(self.delex_sg_valdict_path, 'r').read())
            self.delex_mt_valdict = json.loads(open(self.delex_mt_valdict_path, 'r').read())
            self.ambiguous_vals = json.loads(open(self.ambiguous_val_path, 'r').read())

        self.vocab = utils.Vocab(cfg.vocab_size)

        self.slot_list = [
            'hotel-pricerange', 'hotel-type', 'hotel-parking', 'hotel-stay', 'hotel-day', 'hotel-people', \
            'hotel-area', 'hotel-stars', 'hotel-internet', 'train-destination', 'train-day', 'train-departure', 'train-arrive', \
            'train-people', 'train-leave', 'attraction-area', 'restaurant-food', 'restaurant-pricerange', 'restaurant-area', \
            'attraction-name', 'restaurant-name', 'attraction-type', 'hotel-name', 'taxi-leave', 'taxi-destination', 'taxi-departure', \
            'restaurant-time', 'restaurant-day', 'restaurant-people', 'taxi-arrive', "hospital-department"
        ]
        self.gating_dict = {'ptr': 0, 'dontcare': 1, 'none': 2}
Ejemplo n.º 2
0
    def __init__(self):
        self.nlp = spacy.load('en_core_web_sm')
        self.db = MultiWozDB(cfg.dbs)
        data_path = 'data/multi-woz/annotated_user_da_with_span_full.json'
        archive = zipfile.ZipFile(data_path + '.zip', 'r')
        self.convlab_data = json.loads(
            archive.open(data_path.split('/')[-1],
                         'r').read().decode('utf-8').lower())
        self.delex_sg_valdict_path = 'data/multi-woz-processed/delex_single_valdict.json'
        self.delex_mt_valdict_path = 'data/multi-woz-processed/delex_multi_valdict.json'
        self.ambiguous_val_path = 'data/multi-woz-processed/ambiguous_values.json'
        self.delex_refs_path = 'data/multi-woz-processed/reference_no.json'
        self.delex_refs = json.loads(open(self.delex_refs_path, 'r').read())
        if not os.path.exists(self.delex_sg_valdict_path):
            self.delex_sg_valdict, self.delex_mt_valdict, self.ambiguous_vals = self.get_delex_valdict(
            )
        else:
            self.delex_sg_valdict = json.loads(
                open(self.delex_sg_valdict_path, 'r').read())
            self.delex_mt_valdict = json.loads(
                open(self.delex_mt_valdict_path, 'r').read())
            self.ambiguous_vals = json.loads(
                open(self.ambiguous_val_path, 'r').read())

        self.vocab = utils.Vocab(cfg.vocab_size)
Ejemplo n.º 3
0
    def __init__(self, vocab=None):
        super().__init__()
        self.nlp = spacy.load('en_core_web_sm')
        self.db = MultiWozDB(cfg.dbs)

        self.domain_files = json.loads(open(cfg.domain_file_path, 'r').read())
        self.slot_value_set = json.loads(
            open(cfg.slot_value_set_path, 'r').read())
        if cfg.multi_acts_training:
            self.multi_acts = json.loads(open(cfg.multi_acts_path, 'r').read())

        test_list = [
            l.strip().lower() for l in open(cfg.test_list, 'r').readlines()
        ]
        dev_list = [
            l.strip().lower() for l in open(cfg.dev_list, 'r').readlines()
        ]
        self.dev_files, self.test_files = {}, {}
        for fn in test_list:
            self.test_files[fn.replace('.json', '')] = 1
        for fn in dev_list:
            self.dev_files[fn.replace('.json', '')] = 1

        self.exp_files = {}
        if 'all' not in cfg.exp_domains:
            for domain in cfg.exp_domains:
                fn_list = self.domain_files.get(domain)
                if not fn_list:
                    raise ValueError('[%s] is an invalid experiment setting' %
                                     domain)
                for fn in fn_list:
                    self.exp_files[fn.replace('.json', '')] = 1

        if vocab:
            self.vocab = vocab
            self.vocab_size = vocab.size

        else:
            self.vocab_size = self._build_vocab()
        self._load_data()

        if cfg.limit_bspn_vocab:
            self.bspn_masks = self._construct_bspn_constraint()
        if cfg.limit_aspn_vocab:
            self.aspn_masks = self._construct_aspn_constraint()

        self.multi_acts_record = None