Example #1
1
    def _init_training(self, das_file, ttree_file, data_portion):
        """Initialize training.

        Store input data, initialize 1-hot feature representations for input and output and
        transform training data accordingly, initialize the classification neural network.
        """
        # read input
        log_info('Reading DAs from ' + das_file + '...')
        das = read_das(das_file)
        log_info('Reading t-trees from ' + ttree_file + '...')
        ttree_doc = read_ttrees(ttree_file)
        trees = trees_from_doc(ttree_doc, self.language, self.selector)

        # make training data smaller if necessary
        train_size = int(round(data_portion * len(trees)))
        self.train_trees = trees[:train_size]
        self.train_das = das[:train_size]

        # add empty tree + empty DA to training data
        # (i.e. forbid the network to keep any of its outputs "always-on")
        train_size += 1
        self.train_trees.append(TreeData())
        empty_da = DialogueAct()
        empty_da.parse('inform()')
        self.train_das.append(empty_da)

        self.train_order = range(len(self.train_trees))
        log_info('Using %d training instances.' % train_size)

        # initialize input features/embeddings
        if self.tree_embs:
            self.dict_size = self.tree_embs.init_dict(self.train_trees)
            self.X = np.array([self.tree_embs.get_embeddings(tree) for tree in self.train_trees])
        else:
            self.tree_feats = Features(['node: presence t_lemma formeme'])
            self.tree_vect = DictVectorizer(sparse=False, binarize_numeric=True)
            self.X = [self.tree_feats.get_features(tree, {}) for tree in self.train_trees]
            self.X = self.tree_vect.fit_transform(self.X)

        # initialize output features
        self.da_feats = Features(['dat: dat_presence', 'svp: svp_presence'])
        self.da_vect = DictVectorizer(sparse=False, binarize_numeric=True)
        self.y = [self.da_feats.get_features(None, {'da': da}) for da in self.train_das]
        self.y = self.da_vect.fit_transform(self.y)

        # initialize I/O shapes
        self.input_shape = [list(self.X[0].shape)]
        self.num_outputs = len(self.da_vect.get_feature_names())

        # initialize NN classifier
        self._init_neural_network()
Example #2
0
    def answer_confirm(self, state, res0, slots_to_confirm):
        conflicts = []
        selects = []
        noclues = []
        for slot, slot_value in slots_to_confirm.items():
            state.user_state_confirm[slot] = []

            if not slot in res0:
                noclues += [slot]
            elif not res0[slot].lower() in slot_value:
                conflicts += [slot]
                if len(slot_value) > 1:
                    selects += [(slot, res0[slot], )]

        if len(noclues) > 0:
            return DialogueAct(
                "&".join(["noclue(%s)" % n for n in noclues]))
        elif len(conflicts) == 0:
            if len(selects) == 0:
                return DialogueAct("affirm()")
            else:
                return DialogueAct(
                    "&".join(["confirm(%s=%s)" % (n, v, ) \
                              for n, v in selects]))
        else:
            conf_acts = []
            for conflict in conflicts:
                conf_acts += ["inform(%s='%s')" % (conflict, res0[conflict], )]
            res = DialogueAct("negate()")
            # XXX I understood sorting was not desired.  Substituted with
            # the non-sorting version.
            # res.merge(DialogueAct("&".join(conf_acts)))
            res.extend(conf_acts)
            return res
Example #3
0
    def filter(self, in_da):
        """Go through the input dialogue acts and pick only the ones
        that we can understand and that have good enough confidence."""

        new_nblist = DialogueActNBList()

        # for each dialogue act item check if it is of known type
        # and if it has good probability
        for item in in_da:
            da = item[1]
            new_da = DialogueAct()
            for dai in da:
                if dai.dat in ["inform", "request"]:
                    if dai.value is not None and not dai.value in self.policy.values:
                        continue
                if dai.dat in ["inform", "request", "confirm"]:
                    if not dai.name in self.policy.slots:
                        continue

                # check if the value is in our ontology
                #if type(dai.value) is str and \
                #        self.ontology_unknown_re.match(dai.value):
                #    continue

                if dai.dat in ["inform", "request", "other",
                               "confirm", "reqalts", "bye",
                               "restart"]:
                    new_da.append(dai)

            if item[0] >= 0.3:  # do not consider things bellow 0.3
                if len(new_da) > 0:
                    new_nblist.add(item[0], new_da)

        return new_nblist
Example #4
0
    def test_parse_X(self):
        from alex.components.slu.dainnclassifier import DAINNClassifier
        
        np.random.seed(0)

        cldb = CategoryLabelDatabase()
        class db:
            database = {
                "task": {
                    "find_connection": ["najít spojení", "najít spoj", "zjistit spojení",
                                        "zjistit spoj", "hledám spojení", 'spojení', 'spoj',
                                       ],
                    "find_platform": ["najít nástupiště", "zjistit nástupiště", ],
                    'weather': ['pocasi', 'jak bude', ],
                },
                "number": {
                    "1": ["jednu"]
                },
                "time": {
                    "now": ["nyní", "teď", "teďka", "hned", "nejbližší", "v tuto chvíli", "co nejdřív"],
                },
            }

        cldb.load(db_mod=db)

        preprocessing = SLUPreprocessing(cldb)
        clf = DAINNClassifier(cldb, preprocessing, features_size=4)

        # Train a simple classifier.
        das = {
            '1': DialogueAct('inform(task=weather)'),
            '2': DialogueAct('inform(time=now)'),
            '3': DialogueAct('inform(task=weather)'),
            '4': DialogueAct('inform(task=connection)'),
        }
        utterances = {
            '1': Utterance('pocasi pocasi pocasi pocasi pocasi'),
            '2': Utterance('hned ted nyni hned ted nyni'),
            '3': Utterance('jak bude jak bude jak bude jak bude'),
            '4': Utterance('kdy a odkat mi to jede'),
        }
        clf.extract_classifiers(das, utterances, verbose=False)
        clf.prune_classifiers(min_classifier_count=0)
        clf.gen_classifiers_data(min_pos_feature_count=0,
                                 min_neg_feature_count=0,
                                 verbose2=False)

        clf.train(inverse_regularisation=1e1, verbose=False)

        # Parse some sentences.
        utterance_list = UtteranceNBList()
        utterance_list.add(0.7, Utterance('pocasi'))
        utterance_list.add(0.7, Utterance('jak bude pocasi'))
        utterance_list.add(0.2, Utterance('hned'))
        utterance_list.add(0.2, Utterance('hned'))
        da_confnet = clf.parse_X(utterance_list, verbose=False)

        self.assertTrue(da_confnet.get_prob(DialogueActItem(dai='inform(task=weather)')) != 0.0)
        self.assertTrue(da_confnet.get_prob(DialogueActItem(dai='inform(time=now)')) != 0.0)
Example #5
0
def read_das(da_file):
    """Read dialogue acts from a file, one-per-line."""
    das = []
    with file_stream(da_file) as fh:
        for line in fh:
            da = DialogueAct()
            da.parse(line)
            das.append(da)
    return das
Example #6
0
def read_das(da_file):
    """Read dialogue acts from a file, one-per-line."""
    das = []
    with file_stream(da_file) as fh:
        for line in fh:
            da = DialogueAct()
            da.parse(line)
            das.append(da)
    return das
Example #7
0
    def _zero_act_return(self):
        da = DialogueAct()
        fixed_slots_values = self.metadata['goals'][self.goal_id]['fixed_slots']
        for s, v in fixed_slots_values:
            da.append(DialogueActItem('inform', s, v))

        if self.slot_level_used == 0:
            self.slot_level_used = 1

        return [da]
Example #8
0
    def test_dialog(self):
        ontology_file = script_path(__file__, 'test_ruledm_data',
                                    'ontology.cfg')
        db_file = script_path(__file__, 'test_ruledm_data', 'data.txt')

        class TRuleDMPolicy(DRuleDMPolicy):
            db_cls = CamInfoDb

        class TRuleDM(RuleDM):
            policy_cls = TRuleDMPolicy

        dm = TRuleDM({
            'DM': {
                'ontology': ontology_file,
                'TRuleDM': {
                    'db_cfg': db_file,
                    'provide_code': False,
                    'code_submit_url': None
                },
                'dialogue_state': {
                    'type': DRuleDS,
                },
                'dialogue_policy': {
                    'type': TRuleDMPolicy,
                },
            },
            'Logging': {
                'system_logger': DummyLogger()
            }
        })

        turn_init = dm.da_out()
        self.assertEquals(turn_init.has_dat("hello"), True)
        turn_init = dm.da_out()
        self.assertEquals(turn_init.has_dat("hello"), True)

        dm.da_in([(1.0, DialogueAct("inform(food=chinese)"))])
        turn_1 = dm.da_out()
        self.assertEquals(turn_1.has_dat("inform"), True)

        dm.da_in([(1.0, DialogueAct("asdf(fdsa=asdf)"))])
        turn_2 = dm.da_out()
        self.assertEquals(turn_2.has_dat("notunderstood"), True)

        dm.da_in([(1.0, DialogueAct("inform(price=cheap)"))])
        turn_3 = dm.da_out()
        self.assertEquals(turn_3.has_dat("nomatch"), True)

        dm.da_in([(1.0, DialogueAct("bye()"))])
        turn_bye = dm.da_out()
        self.assertEquals(turn_bye.has_dat("bye"), True)
Example #9
0
    def parse_input_da(self, l):
        """Converts a text including a dialogue act and its probability into a dialogue act instance and float probability.

        The input text must have the following form:
            [prob] the dialogue act

        """
        ri = l.find(" ")

        prob = 1.0

        if ri != -1:
            da = l[ri + 1:]

            try:
                prob = float(l[:ri])
            except:
                # I cannot convert the first part of the input as a float
                # Therefore, assume that all the input is a DA
                da = l
        else:
            da = l

        try:
            da = DialogueAct(da)
        except (DialogueActException, DialogueActItemException):
            raise SemHubException("Invalid dialogue act: s")

        return prob, da
Example #10
0
 def say_query(self, state):
     query = self.build_query(state)
     act = "&".join(["want(%s=%s)" % (
         k,
         v,
     ) for k, v in query.items()])
     return DialogueAct(act)
Example #11
0
    def test_tecto_template_nlg(self):
        # initialize
        cfg = Config.load_configs(config=CONFIG_DICT,
                                  use_default=False,
                                  log=False)
        nlg = TectoTemplateNLG(cfg)
        # test all cases
        for da, correct_text in zip(DAS, TEXTS):
            # try generation
            da = DialogueAct(da)
            generated_text = nlg.generate(da)
            # print output
            s = []
            s.append("")
            s.append("Input DA:")
            s.append(unicode(da))
            s.append("")
            s.append("Correct text:")
            s.append(correct_text)
            s.append("")
            s.append("Generated text:")
            s.append(generated_text)
            s.append("")

            # test the result
            self.assertEqual(correct_text, generated_text)
Example #12
0
def main(input_file, filter_threshold):

    data = [['type', 'abstr_utt', 'abstr_da', 'utt',
             'da']]  # create output headers

    with codecs.open(input_file, "r", 'UTF-8') as fh:
        for line in fh:
            print >> sys.stderr, 'Processing: ', line.strip()
            occ_num, utt, da = line.strip().split('\t')
            da = DialogueAct(da_str=da)
            occ_num = int(occ_num)

            if occ_num < filter_threshold:
                print >> sys.stderr, 'Input "%s" has only %d occurrences, skipping' % (
                    utt, occ_num)
                continue

            if re.match(r'^(\*[A-Z_]+)(\s+\*[A-Z_]+)*$', utt):
                print >> sys.stderr, 'Input "%s" only contains slots, skipping' % utt
                continue

            try:
                ret = process_utt(utt, da)
                print >> sys.stderr, 'Result:', "\n".join(
                    ["\t".join(i) for i in ret])
                print >> sys.stderr, ''
                data.extend(ret)
            except NotImplementedError as e:
                print >> sys.stderr, 'Error:', e

    with codecs.getwriter('utf-8')(sys.stdout) as fh:
        csvwrite = csv.writer(fh, delimiter=b"\t")
        for line in data:
            csvwrite.writerow(line)
Example #13
0
    def epilogue_final_code(self):

        data = None
        attempts = 0
        url_template = self.cfg['DM']['epilogue']['final_code_url']
        system_logger = self.cfg['Logging']['system_logger']

        # store a code on the server (try several times if not successful)
        while attempts < 10 and not data or not data['response'] or data['response'] != 'success':
            code = self.codes.popleft()
            self.codes.append(code)  # put the code back to the end of the queue for reuse
            attempts += 1
            # pull the URL
            url = url_template.format(code=code, logdir=system_logger.get_session_dir_name())
            data = urllib2.urlopen(url).read()
            data = json.loads(data, encoding='UTF-8')

        if attempts >= 10:
            # This shouldn't happen
            text = 'I am sorry. A valid code could not be generated'
        else:
            text = [c for c in code]
            text = ", ".join(text)
            text = self.cfg['DM']['epilogue']['final_code_text'].format(code=text)
            text = [text, ] * 3
            text = self.cfg['DM']['epilogue']['final_code_text_repeat'].join(text)

        da = DialogueAct('say(text="{text}")'.format(text=text))
        self.cfg['Logging']['session_logger'].dialogue_act("system", da)
        self.commands.send(DMDA(da, 'DM', 'HUB'))

        self.final_code_given = True
Example #14
0
 def say_inform(self, state, rec):
     islot_name = self.get_interesting_slot(state) or self.slots[0]
     if not islot_name in rec:
         islot_name = rec.keys()[0]
     slot_name = 'name'
     return DialogueAct(
         "inform(%s='%s')&inform(%s='%s')" % \
         (slot_name, rec[slot_name], islot_name, rec[islot_name])
     )
Example #15
0
    def test_swapping_merge_normalise(self):
        nblist1 = DialogueActNBList()
        nblist1.add(0.7, DialogueAct("hello()"))
        nblist1.add(0.2, DialogueAct("bye()"))
        nblist2 = deepcopy(nblist1)

        nblist1.merge().normalise()
        nblist2.normalise().merge()

        s = []
        s.append("")
        s.append("Using merge().normalise():")
        s.append(unicode(nblist1))
        s.append("")
        s.append("Using normalise().merge():")
        s.append(unicode(nblist2))
        s.append("")

        self.assertEqual(nblist1, nblist2)
Example #16
0
    def compose_utterance_greedy(self, da):
        """\
        Compose an utterance from templates by iteratively looking for
        the longest (up to self.compose_greedy_lookahead) matching
        sub-utterance at the current position in the DA.

        Returns the composed utterance.
        """
        composed_utt = []
        sub_start = 0
        # pass through the dialogue act
        while sub_start < len(da):
            dax_utt = None
            dax_len = None
            # greedily look for the longest template that will cover the next
            # dialogue act items (try longer templates first, from maximum
            # length given in settings down to 1).
            for sub_len in xrange(self.compose_greedy_lookahead, 0, -1):
                dax = DialogueAct()
                dax.extend(da[sub_start:sub_start + sub_len])
                try:
                    # try to find an exact match
                    dax_utt = self.random_select(self.templates[unicode(dax)])
                    dax_len = sub_len
                    break
                except KeyError:
                    # try to find a relaxed match
                    svsx = dax.get_slots_and_values()
                    try:
                        dax_utt = self.match_and_fill_generic(dax, svsx)
                        dax_len = sub_len
                        break
                    except TemplateNLGException:
                        # nothing found: look for shorter templates
                        continue
            if dax_utt is None:  # dummy backoff
                dax_utt = unicode(da[sub_start])
                dax_len = 1
            composed_utt.append(dax_utt)
            sub_start += dax_len
        return ' '.join(composed_utt)
Example #17
0
    def compose_utterance_greedy(self, da):
        """\
        Compose an utterance from templates by iteratively looking for
        the longest (up to self.compose_greedy_lookahead) matching
        sub-utterance at the current position in the DA.

        Returns the composed utterance.
        """
        composed_utt = []
        sub_start = 0
        # pass through the dialogue act
        while sub_start < len(da):
            dax_utt = None
            dax_len = None
            # greedily look for the longest template that will cover the next
            # dialogue act items (try longer templates first, from maximum
            # length given in settings down to 1).
            for sub_len in xrange(self.compose_greedy_lookahead, 0, -1):
                dax = DialogueAct()
                dax.extend(da[sub_start:sub_start + sub_len])
                try:
                    # try to find an exact match
                    dax_utt = self.random_select(self.templates[unicode(dax)])
                    dax_len = sub_len
                    break
                except KeyError:
                    # try to find a relaxed match
                    svsx = dax.get_slots_and_values()
                    try:
                        dax_utt = self.match_and_fill_generic(dax, svsx)
                        dax_len = sub_len
                        break
                    except TemplateNLGException:
                        # nothing found: look for shorter templates
                        continue
            if dax_utt is None:  # dummy backoff
                dax_utt = unicode(da[sub_start])
                dax_len = 1
            composed_utt.append(dax_utt)
            sub_start += dax_len
        return ' '.join(composed_utt)
Example #18
0
def test_random_dialogues(user):
    metadata = get_metadata()
    for i in range(100):
        print '=======================Dialogue %i============================'%(i+1)
        user.new_dialogue()
        print 'Goal:', user.goal
        print '-'*60
        goal_des = metadata['goals'][user.goal['task']]
        ordered_acts = goal_des['acts']
        slots = goal_des['slots']
        for acts in ordered_acts:
            da = DialogueAct()
            for act in acts.split('&'):
                act_des = metadata['act_definitions'][act]
                slot = None
                if act_des['slot_included']:
                    slot = sample_from_list(slots)
                value = None
                if act_des['value_included']:
                    if slot not in user.goal.keys():
                        for s in get_equivalent_slots(goal_des, slot):
                            if s in user.goal.keys():
                                slot = s
                                break
                    if slot in user.goal.keys():
                        if sample_a_prob(0.5):
                            value = user.goal[slot]
                        else:
                            value = 'lct'
                    else:
                        value = 'lct'

                item = DialogueActItem(act, slot, value)
                da.append(item)
            print 'sys_da:\t\t', da
            user.da_in(da)
            da = user.da_out()
            print 'user_da:\t', da[0]
            if len(da[0])==0:
                raise RuntimeError('User simulator doesnt reply anything!!')
                pdb.set_trace()
Example #19
0
    def test_merge_slu_nblists_full_nbest_lists(self):
        # make sure the alex.components.slu.da.merge_slu_nblists merges nblists correctly

        nblist1 = DialogueActNBList()
        nblist1.add(0.7, DialogueAct("hello()"))
        nblist1.add(0.2, DialogueAct("bye()"))
        nblist1.merge().normalise()
        # nblist1.normalise()

        nblist2 = DialogueActNBList()
        nblist2.add(0.6, DialogueAct("hello()"))
        nblist2.add(0.3, DialogueAct("restart()"))
        nblist2.merge().normalise()
        # nblist2.normalise()

        nblists = [[0.7, nblist1], [0.3, nblist2]]

        merged_nblists = merge_slu_nblists(nblists)

        correct_merged_nblists = DialogueActNBList()
        correct_merged_nblists.add(0.7 * 0.7, DialogueAct("hello()"))
        correct_merged_nblists.add(0.7 * 0.2, DialogueAct("bye()"))
        correct_merged_nblists.add(0.7 * 0.1, DialogueAct("other()"))
        correct_merged_nblists.add(0.3 * 0.6, DialogueAct("hello()"))
        correct_merged_nblists.add(0.3 * 0.3, DialogueAct("restart()"))
        correct_merged_nblists.add(0.3 * 0.1, DialogueAct("other()"))
        correct_merged_nblists.merge().normalise()
        # correct_merged_nblists.normalise()

        s = []
        s.append("")
        s.append("Merged nblists:")
        s.append(unicode(merged_nblists))
        s.append("")
        s.append("Correct merged results:")
        s.append(unicode(correct_merged_nblists))
        s.append("")
        print '\n'.join(s)

        self.assertEqual(unicode(merged_nblists),
                         unicode(correct_merged_nblists))
Example #20
0
    def say_slots(self, state, res0, slots_to_say):
        d_str = []
        for slot in slots_to_say:
            d_str += [
                "inform(%s='%s')" % (
                    slot,
                    res0.get(slot, 'dontknow'),
                )
            ]
            state.user_state_request[slot] = True

        return DialogueAct("&".join(d_str))
Example #21
0
def test_reply(user):
    user.new_dialogue()
    print 'GOAL', user.goal
    act_type = 'implconfirm'
    slots = ['from_stop', 'from_street', 'from_city']
    act_slot = None
    act_value = None
    for slot in slots:
        if slot in user.goal.keys():
            act_slot = slot
            act_value = user.goal[slot]
            break
    act_value='abc'
    da = DialogueAct()
    item = DialogueActItem(act_type, act_slot, act_value)
    da.append(item)
    item = DialogueActItem('request', 'to_stop')
    da.append(item)
    print 'sys_da:', da
    user.da_in(da)
    dao = user.da_out()
    print 'user_da:', dao[0]
Example #22
0
 def _build_da_nbest_list(self, i, da, prob):
     if i<len(self._sampled_da_items):
         da_items, probs = self._sampled_da_items[i]
         for dai_index in range(len(da_items)):
             if da is None:
                 da_new = DialogueAct()
                 da_new.append(da_items[dai_index])
                 self._build_da_nbest_list(i+1, da_new, probs[dai_index])
             else:
                 da_new = DialogueAct()
                 da_new.extend(da)
                 da_new.append(da_items[dai_index])
                 self._build_da_nbest_list(i+1, da_new, prob*probs[dai_index])#TODO check the equation and fix it when we there is more types of confusion
     else:
         self._nbest_list.add(da, prob)
Example #23
0
    def test_get_da_nblist(self):
        # Simple case with one good hypothesis.
        dacn = DialogueActConfusionNetwork()
        dacn.add(0.05, DialogueActItem(dai='inform(food=chinese)'))
        dacn.add(0.9, DialogueActItem(dai='inform(food=czech)'))
        dacn.add(0.05, DialogueActItem(dai='inform(food=russian)'))

        nblist = dacn.get_da_nblist()
        best_da = nblist.get_best_da()
        expected_da = DialogueAct(da_str='inform(food=czech)')
        self.assertEqual(best_da, expected_da)

        # More good hypotheses
        dacn = DialogueActConfusionNetwork()
        dacn.add(0.05, DialogueActItem(dai='inform(food=chinese)'))
        dacn.add(0.9, DialogueActItem(dai='inform(food=czech)'))
        dacn.add(0.9, DialogueActItem(dai='inform(food=russian)'))

        nblist = dacn.get_da_nblist()
        best_da = nblist.get_best_da()
        expected_da = DialogueAct(
            da_str='inform(food=czech)&inform(food=russian)')
        self.assertEqual(best_da, expected_da)
Example #24
0
    def compose_utterance_single(self, da):
        """\
        Compose an utterance from templates for single dialogue act items.
        Returns the composed utterance.
        """
        composed_utt = []
        # try to find a template for each single dialogue act item
        for dai in da:
            try:
                # look for an exact match
                dai_utt = self.random_select(self.templates[unicode(dai)])
            except KeyError:
                # try to find a relaxed match
                dax = DialogueAct()
                dax.append(dai)
                svsx = dax.get_slots_and_values()
                try:
                    dai_utt = self.match_and_fill_generic(dax, svsx)
                except TemplateNLGException:
                    dai_utt = unicode(dai)

            composed_utt.append(dai_utt)
        return ' '.join(composed_utt)
Example #25
0
    def compose_utterance_single(self, da):
        """\
        Compose an utterance from templates for single dialogue act items.
        Returns the composed utterance.
        """
        composed_utt = []
        # try to find a template for each single dialogue act item
        for dai in da:
            try:
                # look for an exact match
                dai_utt = self.random_select(self.templates[unicode(dai)])
            except KeyError:
                # try to find a relaxed match
                dax = DialogueAct()
                dax.append(dai)
                svsx = dax.get_slots_and_values()
                try:
                    dai_utt = self.match_and_fill_generic(dax, svsx)
                except TemplateNLGException:
                    dai_utt = unicode(dai)

            composed_utt.append(dai_utt)
        return ' '.join(composed_utt)
Example #26
0
    def _build_da_nbest_list(self, i, da, prob):
        '''Build all combination for the DialogueActItem and probs saved in self._sampled_da_item.

        Currently, not being used.
        '''
        if i<len(self._sampled_da_items):
            da_items, probs = self._sampled_da_items[i]
            for dai_index in range(len(da_items)):
                if da is None:
                    da_new = DialogueAct()
                    da_new.append(da_items[dai_index])
                    self._build_da_nbest_list(i+1, da_new, probs[dai_index])
                else:
                    da_new = DialogueAct()
                    da_new.extend(da)
                    da_new.append(da_items[dai_index])
                    self._build_da_nbest_list(i+1, da_new, prob*probs[dai_index])#TODO check the equation and fix it when we there is more types of confusion
        else:
            self._nbest_list.add(da, prob)
Example #27
0
File: dm.py Project: michlikv/alex
    def epilogue_final_code(self):
        code = self.codes.pop()

        # pull the url
        url = self.cfg['DM']['epilogue']['final_code_url'].format(code = code)
        urllib.urlopen(url)

        text = [c for c in code]
        text = ", ".join(text)
        text = self.cfg['DM']['epilogue']['final_code_text'].format(code = text)
        text = [text,]*3
        text = self.cfg['DM']['epilogue']['final_code_text_repeat'].join(text)

        da = DialogueAct('say(text="{text}")'.format(text=text))
        self.cfg['Logging']['session_logger'].dialogue_act("system", da)
        self.commands.send(DMDA(da, 'DM', 'HUB'))

        self.final_code_given = True
Example #28
0
    def _get_answer_da(self, da_in):
        '''Answer a sytem dialogue act.'''
        da_out = DialogueAct()
        out_of_patience=False

        reply_sys_acts = self.metadata['reply_system_acts']
        da_metadata = self._get_dialogue_act_metadata(da_in)
        for act_in in da_metadata.keys():
            #debug_print('------Handling the sys_act' +  act_in)
            #print '------Handling the sys_act', act_in
            reply = reply_sys_acts[act_in]
            if isinstance(reply, dict):#this action has different definition for different goal
                reply = reply[self.goal_id]
            answer = self._sample_element_from_list_dict(reply)
            if 'ordered_return_acts' in answer:#process list of answer in order, and stop for first appliable
                for solution in answer['ordered_return_acts']:
                    case = self._sample_element_from_list_dict(solution)
                    da_items = self._build_one_answer(da_metadata[act_in], case, True)
                    if len(da_items)>0:
                        answer = case# for filtering acts with add_to_da_prob propertiy
                        break
            else:
                da_items = self._build_one_answer(da_metadata[act_in], answer)
                
            for item in da_items:#process action can be whether add to da_out or not like impl_confirmi
                act_out_des = self._get_act_out_description(item.dat, answer)
                if 'add_to_da_prob' in act_out_des.keys():
                    if sample_a_prob(act_out_des['add_to_da_prob']) and item not in da_out:
                        da_out.append(item)
                else:
                    if item not in da_out:
                        da_out.append(item)
                #-------update patience history
                if item.name is not None:#have slot, the sys act ask repeated the sema slot anserd, ignore the case of over answer
                    if act_in not in self.patience_history.keys():
                        self.patience_history[act_in] = {}
                    if item.name not in self.patience_history[act_in]:
                        self.patience_history[act_in][item.name]=1
                    else:
                        self.patience_history[act_in][item.name]+=1
                        if self.patience_level>=1 and self.patience_history[act_in][item.name]>self.patience_level:
                            out_of_patience = True
                            break#only break the inner loop
            #da_out.extend(da_items)
        if out_of_patience:
            if random.random()>0.5:
                da_out = DialogueAct(self.config['out_of_patience_act'])
                print '!!!!ANGRY...'
            else:
                print '!!Almost ANGRY...'
        return da_out
Example #29
0
    def load_templates(self, file_name):
        """\
        Load templates from an external file, which is assumed to be a
        Python source which defines the variable 'templates' as a dictionary
        containing stringified dialog acts as keys and (lists of) templates
        as values.
        """
        try:
            templates = load_as_module(file_name, force=True).templates
            # normalize the templates
            self.templates = {}
            # generalised templates
            self.gtemplates = {}
            for k, v in templates.iteritems():
                da = DialogueAct(k)
                # k.sort()
                self.templates[unicode(da)] = v
                self.gtemplates[unicode(self.get_generic_da(da))] = (da, v)

        except Exception as e:
            raise TemplateNLGException('No templates loaded from %s -- %s!' % (file_name, e))
Example #30
0
    def test_template_nlg(self):

        cfg = self.cfg
        nlg = TemplateNLG(cfg)

        da = DialogueAct('affirm()&inform(num_transfers="2")').sort()
        correct_text = u"Ano. Na dané trase jsou dva přestupy."
        generated_text = nlg.generate(da)

        s = []
        s.append("")
        s.append("Input DA:")
        s.append(unicode(da))
        s.append("")
        s.append("Correct text:")
        s.append(unicode(correct_text))
        s.append("")
        s.append("Generated text:")
        s.append(unicode(generated_text))
        s.append("")

        self.assertEqual(unicode(correct_text), unicode(generated_text))
Example #31
0
    def test_template_nlg_r(self):

        cfg = self.cfg
        nlg = TemplateNLG(cfg)

        da = DialogueAct('affirm()&inform(from_stop=Sparta)').sort()
        correct_text = "Ano, jede to ze zastávky Sparta."
        generated_text = nlg.generate(da)

        s = []
        s.append("")
        s.append("Input DA:")
        s.append(unicode(da))
        s.append("")
        s.append("Correct text:")
        s.append(unicode(correct_text))
        s.append("")
        s.append("Generated text:")
        s.append(unicode(generated_text))
        s.append("")

        self.assertEqual(unicode(correct_text), unicode(generated_text))
Example #32
0
    def answer_confirm(self, state, res0, slots_to_confirm):
        conflicts = []
        selects = []
        noclues = []
        for slot, slot_value in slots_to_confirm.items():
            state.user_state_confirm[slot] = []

            if not slot in res0:
                noclues += [slot]
            elif not res0[slot].lower() in slot_value:
                conflicts += [slot]
                if len(slot_value) > 1:
                    selects += [(
                        slot,
                        res0[slot],
                    )]

        if len(noclues) > 0:
            return DialogueAct("&".join(["noclue(%s)" % n for n in noclues]))
        elif len(conflicts) == 0:
            if len(selects) == 0:
                return DialogueAct("affirm()")
            else:
                return DialogueAct(
                    "&".join(["confirm(%s=%s)" % (n, v, ) \
                              for n, v in selects]))
        else:
            conf_acts = []
            for conflict in conflicts:
                conf_acts += [
                    "inform(%s='%s')" % (
                        conflict,
                        res0[conflict],
                    )
                ]
            res = DialogueAct("negate()")
            # XXX I understood sorting was not desired.  Substituted with
            # the non-sorting version.
            # res.merge(DialogueAct("&".join(conf_acts)))
            res.extend(conf_acts)
            return res
Example #33
0
    def test_template_nlg_r(self):

        cfg = self.cfg
        nlg = TemplateNLG(cfg)

        da = DialogueAct('affirm()&inform(pricerange="cheap")&inform(task="find")').sort()
        correct_text = "Ok, you are looking for something in the cheap price range."
        generated_text = nlg.generate(da)

        s = []
        s.append("")
        s.append("Input DA:")
        s.append(unicode(da))
        s.append("")
        s.append("Correct text:")
        s.append(unicode(correct_text))
        s.append("")
        s.append("Generated text:")
        s.append(unicode(generated_text))
        s.append("")
        print '\n'.join(s)

        self.assertEqual(unicode(correct_text), unicode(generated_text))
Example #34
0
    def parse_1_best(self, obs, verbose=False, *args, **kwargs):
        """Parse an utterance into a dialogue act.

        :rtype DialogueActConfusionNetwork
        """

        utterance = obs['utt']

        if isinstance(utterance, UtteranceHyp):
            # Parse just the utterance and ignore the confidence score.
            utterance = utterance.utterance

        if verbose:
            print 'Parsing utterance "{utt}".'.format(utt=utterance)

        res_cn = DialogueActConfusionNetwork()

        dict_da = self.utt2da.get(unicode(utterance), None)
        if dict_da:
            for dai in DialogueAct(dict_da):
                res_cn.add(1.0, dai)
            return res_cn

        utterance = self.preprocessing.normalise_utterance(utterance)
        abutterance, category_labels = self.abstract_utterance(utterance)

        if verbose:
            print 'After preprocessing: "{utt}".'.format(utt=abutterance)
            print category_labels

        self.parse_non_speech_events(utterance, res_cn)

        utterance = utterance.replace_all(['_noise_'], '').replace_all(
            ['_laugh_'], '').replace_all(['_ehm_hmm_'],
                                         '').replace_all(['_inhale_'], '')
        abutterance = abutterance.replace_all(['_noise_'], '').replace_all(
            ['_laugh_'], '').replace_all(['_ehm_hmm_'],
                                         '').replace_all(['_inhale_'], '')

        abutterance = self.handle_false_abstractions(abutterance)
        category_labels.add('CITY')
        category_labels.add('VEHICLE')
        category_labels.add('NUMBER')

        if len(res_cn) == 0:
            if 'STOP' in category_labels:
                self.parse_stop(abutterance, res_cn)
            if 'CITY' in category_labels:
                self.parse_city(abutterance, res_cn)
            if 'NUMBER' in category_labels:
                self.parse_number(abutterance)
                if any([word.startswith("TIME") for word in abutterance]):
                    category_labels.add('TIME')
            if 'TIME' in category_labels:
                self.parse_time(abutterance, res_cn)
            if 'DATE_REL' in category_labels:
                self.parse_date_rel(abutterance, res_cn)
            if 'AMPM' in category_labels:
                self.parse_ampm(abutterance, res_cn)
            if 'VEHICLE' in category_labels:
                self.parse_vehicle(abutterance, res_cn)
            if 'TASK' in category_labels:
                self.parse_task(abutterance, res_cn)

            self.parse_meta(utterance, res_cn)

        res_cn.merge()

        return res_cn
Example #35
0
def generate_task():

    task = []
    da = DialogueAct()

    # indicate that we're looking for connection
    da.append(DialogueActItem('inform', 'task', 'find_connection'))

    # get two distinct stops
    from_stop = random.choice(STOPS)
    to_stop = from_stop
    while to_stop == from_stop:
        to_stop = random.choice(STOPS)
    da.append(DialogueActItem('inform', 'from_stop', from_stop))
    da.append(DialogueActItem('inform', 'to_stop', to_stop))
    task.append(da)

    # generate random subsequent questions
    questions = random.sample(range(6), random.randint(5, 6) - len(task))

    query_change = False
    da = DialogueAct()
    for question in sorted(questions):
        dais = QUESTIONS[question]

        if dais[0].name in ['alternative', 'vehicle', 'time', 'to_stop'] and not query_change:
            query_change = True
            task.append(da)
            da = DialogueAct()

        if dais[0].name == 'to_stop':
            new_to_stop = random.choice(STOPS)
            while new_to_stop == from_stop or new_to_stop == to_stop:
                new_to_stop = random.choice(STOPS)
            dais[0].value = new_to_stop

        da.extend(dais)

    task.append(da)
    return task
Example #36
0
def main(args):

    data = []
    good_toks, good_types = 0, 0  # good contexts, useful for tasks
    fthr_toks, fthr_types = 0, 0  # filtered because of threshold
    fslt_toks, fslt_types = 0, 0  # filtered as they only contain slots
    frep_toks, frep_types = 0, 0  # filtered because no reply can be generated
    finished = {}

    with codecs.open(args.input_file, "r", 'UTF-8') as fh:
        for line in fh:
            print >> sys.stderr, 'Processing: ', line.strip()

            if line.count("\t") != 2:
                print >> sys.stderr, 'Invalid input format, skipping'
                continue

            occ_num, utt, da = line.strip().split('\t')
            da = DialogueAct(da_str=da)
            occ_num = int(occ_num)

            if occ_num < args.filter_threshold:
                print >> sys.stderr, 'Input "%s" has only %d occurrences, skipping' % (
                    utt, occ_num)
                fthr_toks += occ_num
                fthr_types += 1
                continue

            if re.match(r'^(\*[A-Z_]+)(\s+\*[A-Z_]+)*$', utt):
                print >> sys.stderr, 'Input "%s" only contains slots, skipping' % utt
                fslt_toks += occ_num
                fslt_types += 1
                continue

            try:
                ret = process_utt(utt, da)
                if not ret:
                    frep_toks += occ_num
                    frep_types += 1
                else:
                    good_toks += occ_num
                    good_types += 1
                print >> sys.stderr, 'Result:', "\n".join(
                    unicode(line) for line in ret)
                print >> sys.stderr, ''
                if args.occ_nums:
                    for ret_line in ret:
                        ret_line.occ_num = occ_num
                data.extend(ret)
            except NotImplementedError as e:
                frep_toks += occ_num
                frep_types += 1
                print >> sys.stderr, 'Error:', e

    if args.load_finished:
        with codecs.open(args.load_finished, "r", 'UTF-8') as fh:
            csvread = csv.reader(fh,
                                 delimiter=str(args.finished_csv_delim),
                                 quotechar=b'"')
            columns = DataLine.get_columns_from_header(csvread.next())
            for row in csvread:
                finished_line = DataLine.from_csv_line(row, columns)
                finished[finished_line.signature] = finished_line

    written = {}
    with codecs.getwriter('utf-8')(sys.stdout) as fh:
        # starting with the header
        csvwrite = csv.writer(fh, delimiter=b"\t", lineterminator="\n")
        csvwrite.writerow(DataLine.get_headers(args.occ_nums))
        for line in data:
            if line.signature in written:  # some lines may be duplicate, skip them
                print >> sys.stderr, 'Duplicate line:', line.signature
                continue
            # skip finished results (if they are loaded and if they should be skipped)
            if line.signature in finished:
                if finished[line.signature].slots != line.slots:
                    print >> sys.stderr, ('Slots changed for ', line.signature,
                                          '-- ignoring finished.')
                    csvwrite.writerow(line.as_tuple(args.occ_nums))
                elif not args.skip_finished:
                    finished[line.signature].occ_num = line.occ_num
                    csvwrite.writerow(finished[line.signature].as_tuple(
                        args.occ_nums))
            # default case: not found among finished
            else:
                csvwrite.writerow(line.as_tuple(args.occ_nums))

            written[line.signature] = line

    print >> sys.stderr, (
        "\n\nGood: %d / %d\nThreshold: %d / %d\nSlots: %d / %d\nReply: %d / %d"
        % (good_toks, good_types, fthr_toks, fthr_types, fslt_toks, fslt_types,
           frep_toks, frep_types))
Example #37
0
 def say_notunderstood(self):
     return DialogueAct("notunderstood()")
Example #38
0
def generate_task():

    task = []
    da = DialogueAct()

    # indicate that we're looking for connection
    da.append(DialogueActItem('inform', 'task', 'find_connection'))

    # get two distinct stops
    from_stop = random.choice(STOPS)
    to_stop = from_stop
    while to_stop == from_stop:
        to_stop = random.choice(STOPS)
    da.append(DialogueActItem('inform', 'from_stop', from_stop))
    da.append(DialogueActItem('inform', 'to_stop', to_stop))
    task.append(da)

    # generate random subsequent questions
    questions = random.sample(range(6), random.randint(5, 6) - len(task))

    query_change = False
    da = DialogueAct()
    for question in sorted(questions):
        dais = QUESTIONS[question]

        if dais[0].name in ['alternative', 'vehicle', 'time', 'to_stop'
                            ] and not query_change:
            query_change = True
            task.append(da)
            da = DialogueAct()

        if dais[0].name == 'to_stop':
            new_to_stop = random.choice(STOPS)
            while new_to_stop == from_stop or new_to_stop == to_stop:
                new_to_stop = random.choice(STOPS)
            dais[0].value = new_to_stop

        da.extend(dais)

    task.append(da)
    return task
Example #39
0
    def get_da(self, dialogue_state):
        # all slots being requested by the user
        requested_slots = dialogue_state.get_slots_being_requested()
        # all slots being confirmed by the user
        confirmed_slots = dialogue_state.get_slots_being_confirmed()
        # all slots which had been supplied by the user but have not been
        # implicitly confirmed
        non_informed_slots = dialogue_state.get_slots_being_noninformed()

        if len(self.das) == 0:
            # NLG("Thank you for calling. How may I help you?")
            self.last_system_dialogue_act = DialogueAct("hello()&thankyou()")
            dialogue_state.slots["ludait"] = "none"

        elif dialogue_state.slots["ludait"] == "bye":
            # NLG("Goodbye.")
            self.last_system_dialogue_act = DialogueAct("bye()")
            dialogue_state.slots["ludait"] = "none"

        elif dialogue_state.slots["ludait"] == "restart":
            # NLG("Let's start again from scratch. How may I help you?")
            dialogue_state.restart()
            self.last_system_dialogue_act = DialogueAct("restart()&hello()")
            dialogue_state.slots["ludait"] = "none"

        elif dialogue_state.slots["ludait"] == "repeat":
            # NLG - use the last dialogue act
            dialogue_state.slots["ludait"] = "none"

        elif dialogue_state.slots["ludait"] == "reqalts":
            # NLG("There is nothing else in the database.")
            self.last_system_dialogue_act = DialogueAct(
                "deny(alternatives=true")
            dialogue_state.slots["ludait"] = "none"

        elif requested_slots:
            # inform about all requested slots
            self.last_system_dialogue_act = DialogueAct()
            for slot in requested_slots:
                dai = DialogueActItem("inform", slot, requested_slots[slot])
                self.last_system_dialogue_act.append(dai)
                dialogue_state.slots["rh_" + slot] = "none"

        elif confirmed_slots:
            # inform about all slots being confirmed by the user
            self.last_system_dialogue_act = DialogueAct()
            for slot in confirmed_slots:
                if confirmed_slots[slot] == dialogue_state.slots[slot]:
                    # it is as user expected
                    self.last_system_dialogue_act.append(
                        DialogueActItem("affirm"))
                    dai = DialogueActItem("inform", slot,
                                          dialogue_state.slots[slot])
                    self.last_system_dialogue_act.append(dai)
                else:
                    # it is something else to what user expected
                    self.last_system_dialogue_act.append(
                        DialogueActItem("negate"))
                    dai = DialogueActItem("deny", slot,
                                          dialogue_state.slots["ch_" + slot])
                    self.last_system_dialogue_act.append(dai)
                    dai = DialogueActItem("inform", slot,
                                          dialogue_state.slots[slot])
                    self.last_system_dialogue_act.append(dai)

                dialogue_state.slots["ch_" + slot] = "none"
        elif non_informed_slots:
            # implicitly confirm all slots provided but not yet implicitly
            # confirmed
            self.last_system_dialogue_act = DialogueAct()
            self.last_system_dialogue_act.append(DialogueActItem("affirm"))
            for slot in non_informed_slots:
                dai = DialogueActItem("inform", slot, non_informed_slots[slot])
                self.last_system_dialogue_act.append(dai)
        else:
            # NLG("Can I help you with anything else?")
            self.last_system_dialogue_act = DialogueAct("reqmore()")
            dialogue_state.slots["ludait"] = "none"

        # record the system dialogue acts
        self.das.append(self.last_system_dialogue_act)
        return self.last_system_dialogue_act
Example #40
0
    def get_da(self, dialogue_state):
        # all slots being requested by the user
        requested_slots = dialogue_state.get_slots_being_requested()
        # all slots being confirmed by the user
        confirmed_slots = dialogue_state.get_slots_being_confirmed()
        # all slots which had been supplied by the user but have not been
        # implicitly confirmed
        non_informed_slots = dialogue_state.get_slots_being_noninformed()

        if len(self.das) == 0:
            # NLG("Thank you for calling. How may I help you?")
            self.last_system_dialogue_act = DialogueAct("hello()&thankyou()")
            dialogue_state.slots["ludait"] = "none"

        elif dialogue_state.slots["ludait"] == "bye":
            # NLG("Goodbye.")
            self.last_system_dialogue_act = DialogueAct("bye()")
            dialogue_state.slots["ludait"] = "none"

        elif dialogue_state.slots["ludait"] == "restart":
            # NLG("Let's start again from scratch. How may I help you?")
            dialogue_state.restart()
            self.last_system_dialogue_act = DialogueAct("restart()&hello()")
            dialogue_state.slots["ludait"] = "none"

        elif dialogue_state.slots["ludait"] == "repeat":
            # NLG - use the last dialogue act
            dialogue_state.slots["ludait"] = "none"

        elif dialogue_state.slots["ludait"] == "reqalts":
            # NLG("There is nothing else in the database.")
            self.last_system_dialogue_act = DialogueAct(
                "deny(alternatives=true")
            dialogue_state.slots["ludait"] = "none"

        elif requested_slots:
            # inform about all requested slots
            self.last_system_dialogue_act = DialogueAct()
            for slot in requested_slots:
                dai = DialogueActItem("inform", slot, requested_slots[slot])
                self.last_system_dialogue_act.append(dai)
                dialogue_state.slots["rh_" + slot] = "none"

        elif confirmed_slots:
            # inform about all slots being confirmed by the user
            self.last_system_dialogue_act = DialogueAct()
            for slot in confirmed_slots:
                if confirmed_slots[slot] == dialogue_state.slots[slot]:
                    # it is as user expected
                    self.last_system_dialogue_act.append(
                        DialogueActItem("affirm"))
                    dai = DialogueActItem("inform", slot,
                                          dialogue_state.slots[slot])
                    self.last_system_dialogue_act.append(dai)
                else:
                    # it is something else to what user expected
                    self.last_system_dialogue_act.append(
                        DialogueActItem("negate"))
                    dai = DialogueActItem("deny", slot,
                                          dialogue_state.slots["ch_" + slot])
                    self.last_system_dialogue_act.append(dai)
                    dai = DialogueActItem("inform", slot,
                                          dialogue_state.slots[slot])
                    self.last_system_dialogue_act.append(dai)

                dialogue_state.slots["ch_" + slot] = "none"
        elif non_informed_slots:
            # implicitly confirm all slots provided but not yet implicitly
            # confirmed
            self.last_system_dialogue_act = DialogueAct()
            self.last_system_dialogue_act.append(DialogueActItem("affirm"))
            for slot in non_informed_slots:
                dai = DialogueActItem("inform", slot, non_informed_slots[slot])
                self.last_system_dialogue_act.append(dai)
        else:
            # NLG("Can I help you with anything else?")
            self.last_system_dialogue_act = DialogueAct("reqmore()")
            dialogue_state.slots["ludait"] = "none"

        # record the system dialogue acts
        self.das.append(self.last_system_dialogue_act)
        return self.last_system_dialogue_act
Example #41
0
File: dm.py Project: michlikv/alex
 def epilogue_final_question(self):
     da = DialogueAct('say(text="{text}")'.format(text=self.cfg['DM']['epilogue']['final_question']))
     self.cfg['Logging']['session_logger'].dialogue_act("system", da)
     self.commands.send(DMDA(da, 'DM', 'HUB'))
Example #42
0
File: dm.py Project: michlikv/alex
 def epilogue_final_apology(self):
     # apology for not reaching minimum number of turns
     text = self.cfg['DM']['epilogue']['final_code_text_min_turn_count_not_reached']
     da = DialogueAct('say(text="{text}")'.format(text=text))
     self.cfg['Logging']['session_logger'].dialogue_act("system", da)
     self.commands.send(DMDA(da, 'DM', 'HUB'))
Example #43
0
    def _init_training(self, das, trees, data_portion):
        """Initialize training.

        Store input data, initialize 1-hot feature representations for input and output and
        transform training data accordingly, initialize the classification neural network.

        @param das: name of source file with training DAs, or list of DAs
        @param trees: name of source file with corresponding trees/sentences, or list of trees
        @param data_portion: portion of the training data to be used (0.0-1.0)
        """
        # read input from files or take it directly from parameters
        if not isinstance(das, list):
            log_info('Reading DAs from ' + das + '...')
            das = read_das(das)
        if not isinstance(trees, list):
            log_info('Reading t-trees from ' + trees + '...')
            ttree_doc = read_ttrees(trees)
            if self.mode == 'tokens':
                tokens = tokens_from_doc(ttree_doc, self.language,
                                         self.selector)
                trees = self._tokens_to_flat_trees(tokens)
            elif self.mode == 'tagged_lemmas':
                tls = tagged_lemmas_from_doc(ttree_doc, self.language,
                                             self.selector)
                trees = self._tokens_to_flat_trees(tls, use_tags=True)
            else:
                trees = trees_from_doc(ttree_doc, self.language, self.selector)
        elif self.mode in ['tokens', 'tagged_lemmas']:
            trees = self._tokens_to_flat_trees(
                trees, use_tags=self.mode == 'tagged_lemmas')

        # make training data smaller if necessary
        train_size = int(round(data_portion * len(trees)))
        self.train_trees = trees[:train_size]
        self.train_das = das[:train_size]

        # ignore contexts, if they are contained in the DAs
        if isinstance(self.train_das[0], tuple):
            self.train_das = [da for (context, da) in self.train_das]

        # add empty tree + empty DA to training data
        # (i.e. forbid the network to keep any of its outputs "always-on")
        train_size += 1
        self.train_trees.append(TreeData())
        empty_da = DialogueAct()
        empty_da.parse('inform()')
        self.train_das.append(empty_da)

        self.train_order = range(len(self.train_trees))
        log_info('Using %d training instances.' % train_size)

        # initialize input features/embeddings
        if self.tree_embs:
            self.dict_size = self.tree_embs.init_dict(self.train_trees)
            self.X = np.array([
                self.tree_embs.get_embeddings(tree)
                for tree in self.train_trees
            ])
        else:
            self.tree_feats = Features(['node: presence t_lemma formeme'])
            self.tree_vect = DictVectorizer(sparse=False,
                                            binarize_numeric=True)
            self.X = [
                self.tree_feats.get_features(tree, {})
                for tree in self.train_trees
            ]
            self.X = self.tree_vect.fit_transform(self.X)

        # initialize output features
        self.da_feats = Features(['dat: dat_presence', 'svp: svp_presence'])
        self.da_vect = DictVectorizer(sparse=False, binarize_numeric=True)
        self.y = [
            self.da_feats.get_features(None, {'da': da})
            for da in self.train_das
        ]
        self.y = self.da_vect.fit_transform(self.y)

        # initialize I/O shapes
        if not self.tree_embs:
            self.input_shape = list(self.X[0].shape)
        else:
            self.input_shape = self.tree_embs.get_embeddings_shape()
        self.num_outputs = len(self.da_vect.get_feature_names())

        # initialize NN classifier
        self._init_neural_network()
        # initialize the NN variables
        self.session.run(tf.initialize_all_variables())
Example #44
0
    def _init_training(self, das, trees, data_portion):
        """Initialize training.

        Store input data, initialize 1-hot feature representations for input and output and
        transform training data accordingly, initialize the classification neural network.

        @param das: name of source file with training DAs, or list of DAs
        @param trees: name of source file with corresponding trees/sentences, or list of trees
        @param data_portion: portion of the training data to be used (0.0-1.0)
        """
        # read input from files or take it directly from parameters
        if not isinstance(das, list):
            log_info('Reading DAs from ' + das_file + '...')
            das = read_das(das_file)
        if not isinstance(trees, list):
            log_info('Reading t-trees from ' + ttree_file + '...')
            ttree_doc = read_ttrees(ttree_file)
            if self.use_tokens:
                tokens = tokens_from_doc(ttree_doc, self.language, self.selector)
                trees = self._tokens_to_flat_trees(tokens)
            else:
                trees = trees_from_doc(ttree_doc, self.language, self.selector)
        elif self.use_tokens:
            trees = self._tokens_to_flat_trees(trees)

        # make training data smaller if necessary
        train_size = int(round(data_portion * len(trees)))
        self.train_trees = trees[:train_size]
        self.train_das = das[:train_size]

        # ignore contexts, if they are contained in the DAs
        if isinstance(self.train_das[0], tuple):
            self.train_das = [da for (context, da) in self.train_das]

        # add empty tree + empty DA to training data
        # (i.e. forbid the network to keep any of its outputs "always-on")
        train_size += 1
        self.train_trees.append(TreeData())
        empty_da = DialogueAct()
        empty_da.parse('inform()')
        self.train_das.append(empty_da)

        self.train_order = range(len(self.train_trees))
        log_info('Using %d training instances.' % train_size)

        # initialize input features/embeddings
        if self.tree_embs:
            self.dict_size = self.tree_embs.init_dict(self.train_trees)
            self.X = np.array([self.tree_embs.get_embeddings(tree) for tree in self.train_trees])
        else:
            self.tree_feats = Features(['node: presence t_lemma formeme'])
            self.tree_vect = DictVectorizer(sparse=False, binarize_numeric=True)
            self.X = [self.tree_feats.get_features(tree, {}) for tree in self.train_trees]
            self.X = self.tree_vect.fit_transform(self.X)

        # initialize output features
        self.da_feats = Features(['dat: dat_presence', 'svp: svp_presence'])
        self.da_vect = DictVectorizer(sparse=False, binarize_numeric=True)
        self.y = [self.da_feats.get_features(None, {'da': da}) for da in self.train_das]
        self.y = self.da_vect.fit_transform(self.y)

        # initialize I/O shapes
        if not self.tree_embs:
            self.input_shape = list(self.X[0].shape)
        else:
            self.input_shape = self.tree_embs.get_embeddings_shape()
        self.num_outputs = len(self.da_vect.get_feature_names())

        # initialize NN classifier
        self._init_neural_network()
        # initialize the NN variables
        self.session.run(tf.initialize_all_variables())
Example #45
0
File: dm.py Project: thanhlct/alex
    def process_pending_commands(self):
        """Process all pending commands.

        Available commands:
          stop() - stop processing and exit the process
          flush() - flush input buffers.
            Now it only flushes the input connection.

        Return True if the process should terminate.
        """

        while self.commands.poll():
            command = self.commands.recv()

            if self.cfg['DM']['debug']:
                self.cfg['Logging']['system_logger'].debug(command)

            if isinstance(command, Command):
                #Thanh:
                if command.parsed['__name__'] == 'print_log_dir':
                    print '===***===session-log-dir:', command.source

                if command.parsed['__name__'] == 'stop':
                    return True

                if command.parsed['__name__'] == 'flush':
                    # discard all data in in input buffers
                    while self.slu_hypotheses_in.poll():
                        data_in = self.slu_hypotheses_in.recv()

                    self.dm.end_dialogue()

                    self.commands.send(Command("flushed()", 'DM', 'HUB'))
                    
                    return False

                #if command.parsed['__name__'] == 'prepare_new_dialogue':
                    #self.dm.new_dialogue()

                if command.parsed['__name__'] == 'new_dialogue':
                    self.dm.new_dialogue()#thanh change???

                    self.epilogue_state = None

                    self.cfg['Logging']['session_logger'].turn("system")
                    self.dm.log_state()

                    # I should generate the first DM output
                    da = self.dm.da_out()

                    if self.cfg['DM']['debug']:
                        s = []
                        s.append("DM Output")
                        s.append("-"*60)
                        s.append(unicode(da))
                        s.append("")
                        s = '\n'.join(s)
                        self.cfg['Logging']['system_logger'].debug(s)

                    self.cfg['Logging']['session_logger'].dialogue_act("system", da)

                    self.commands.send(DMDA(da, 'DM', 'HUB'))

                    return False

                if command.parsed['__name__'] == 'end_dialogue':
                    self.dm.end_dialogue()
                    return False

                if command.parsed['__name__'] == 'timeout':
                    # check whether there is a looong silence
                    # if yes then inform the DM

                    silence_time = command.parsed['silence_time']

                    cn = DialogueActConfusionNetwork()
                    cn.add(1.0, DialogueActItem('silence','time', silence_time))

                    # process the input DA
                    self.dm.da_in(cn)

                    self.cfg['Logging']['session_logger'].turn("system")
                    self.dm.log_state()

                    print '----Time out: ', self.epilogue_state, silence_time
                    '''Thanh
                    if self.epilogue_state == 'give_code':
                        # an cant_apply act have been chosen
                        self.cfg['Logging']['session_logger'].dialogue_act("system", self.epilogue_da)
                        self.commands.send(DMDA(self.epilogue_da, 'DM', 'HUB'))
                        self.commands.send(Command('hangup()', 'DM', 'HUB'))
                        return False
                    #'''
                        
                    if self.epilogue_state and float(silence_time) > 5.0: 
                        if self.epilogue_state == 'final_question': # and self.final_question_repeated<16:
                            da = DialogueAct('say(text="{text}")'.format(text="Sorry, did you get the correct information?"))
                            #self.final_question_repeated += 1
                            self.cfg['Logging']['session_logger'].dialogue_act("system", da)
                            self.commands.send(DMDA(da, 'DM', 'HUB'))
                        else:
                            # a user was silent for too long, therefore hung up
                            self.cfg['Logging']['session_logger'].dialogue_act("system", self.epilogue_da)
                            self.commands.send(DMDA(self.epilogue_da, 'DM', 'HUB'))
                            self.commands.send(Command('hangup()', 'DM', 'HUB'))
                    else:
                        da = self.dm.da_out()

                        if self.cfg['DM']['debug']:
                            s = []
                            s.append("DM Output")
                            s.append("-"*60)
                            s.append(unicode(da))
                            s.append("")
                            s = '\n'.join(s)
                            self.cfg['Logging']['system_logger'].debug(s)

                        self.cfg['Logging']['session_logger'].dialogue_act("system", da)
                        self.commands.send(DMDA(da, 'DM', 'HUB'))

                        if da.has_dat("bye"):
                            self.commands.send(Command('hangup()', 'DM', 'HUB'))

                    return False

        return False