Example #1
0
    def decode_sent(self, sentinfo, output_fname, config=None):
        if config == None:
            config = self.config
        t0 = time.time()
        self.X = {}
        self.y = {}
        self.baseXs = []
        self.baseX_pointers = {}
        self.fnames = {}
        log_input_key = "batch"
        if config.has_option("decode", "log_input_key"):
            log_input_key = config.get("decode", "log_input_key")

        self.extractFeatures2(sentinfo, log_input_key=log_input_key)
        decode_results = self.decode()
        counter = defaultdict(int)

        active_tuples = self.tuples.activeTuples_sent(sentinfo)
        tuple_distribution = {}
        for this_tuple in active_tuples:
            index = counter[this_tuple]
            assert len(decode_results[this_tuple]) == 1
            if len(decode_results[this_tuple]) - 1 < index:
                p = 0
            else:
                p = decode_results[this_tuple][index]
            # p = decode_results[this_tuple][index]
            tuple_distribution[Tuples.generic_to_specific(this_tuple)] = p
            # check we are decoding the right utterance
            counter[this_tuple] += 1
        slu_hyps = self.tuples.distributionToNbest(tuple_distribution)

        return slu_hyps
Example #2
0
    def __init__(self, config):
        # classifier type
        self.type = "svm"
        if config.has_option("classifier", "type"):
            self.type = config.get("classifier", "type")

        # min_examples
        self.min_examples = 10
        if config.has_option("classifier", "min_examples"):
            self.min_examples = int(config.get("classifier", "min_examples"))

        # features
        self.features = ["cnet"]
        if config.has_option("classifier", "features"):
            self.features = json.loads(config.get("classifier", "features"))
        self.feature_extractors = []
        for feature in self.features:
            self.feature_extractors.append(
                sutils.import_class(
                    "convlab.modules.nlu.multiwoz.svm.Features." +
                    feature)(config))
        print(self.feature_extractors)
        self.tuples = Tuples.tuples(config)
        self.config = config
        self.cnet_extractor = cnet_extractor(config)

        # store data:
        self.X = {}
        self.y = {}
        self.baseXs = []
        self.baseX_pointers = {}
        self.fnames = {}
Example #3
0
    def export(self, models_fname, dictionary_fname, config_fname):
        print("exporting Classifier for Caesar to read")
        print("models to be saved in", models_fname)
        print("dictionary to be saved in", dictionary_fname)
        print("config to be saved in", config_fname)

        if self.type != "svm":
            print("Only know how to export SVMs")
            return
        lines = []
        for this_tuple in self.classifiers:
            if self.classifiers[this_tuple] != None:
                t = this_tuple
                if Tuples.is_generic(this_tuple[-1]):
                    t = this_tuple[:-1] + ("<generic_value>", )
                lines += ['(' + ','.join(t) + ')']
                lines += utils.svm_to_libsvm(
                    self.classifiers[this_tuple].model)
                lines += [".", ""]
        models_savefile = open(models_fname, "wb")
        for line in lines:
            models_savefile.write(line + "\n")
        models_savefile.close()

        # save dictionary
        json_dictionary = []
        dictionary_items = self.dictionary.items()
        dictionary_items.sort(key=lambda x: x[1])
        assert [x[1] for x in dictionary_items] == range(len(self.dictionary))
        keys = [list(x[0]) for x in dictionary_items]

        json.dump(keys, open(dictionary_fname, "w"))

        # save config
        config_savefile = open(config_fname, "w")
        config_savefile.write(
            "# Automatically generated by CNetTrain scripts\n")
        options = {
            "FEATURES": json.dumps(self.features),
            "MAX_ACTIVE_TUPLES": str(self.tuples.max_active),
            "TAIL_CUTOFF": str(self.tuples.tail_cutoff),
            "MODELS": os.path.join(os.getcwd(), models_fname),
            "DICTIONARY": os.path.join(os.getcwd(), dictionary_fname),
        }
        if "cnet" in self.features:
            index = self.features.index("cnet")
            cnf = self.feature_extractors[index]
            options["MAX_NGRAM_LENGTH"] = str(cnf.max_length)
            options["MAX_NGRAMS"] = str(cnf.max_ngrams)
        for key in options:
            this_line = "CNET   : %s" % key
            this_line = this_line.ljust(30)
            this_line += "= " + options[key]
            config_savefile.write("\t" + this_line + "\n")
        config_savefile.close()
        print("exported Classifier.")
Example #4
0
    def tuple_calculate(self, this_tuple, log_turn, log_input_key="batch"):
        final_ngrams = self.final_ngrams
        # do we need to add generic ngrams?
        new_ngrams = []
        if Tuples.is_generic(this_tuple[-1]):
            gvalue = this_tuple[-1]
            for ngram in final_ngrams:
                new_ngram = cn_ngram_replaced(ngram, gvalue.value.lower(), "<generic_value>")
                if new_ngram != False:
                    new_ngrams.append(new_ngram)

        return dict([(ng.string_repn(), ng.score()) for ng in new_ngrams])
Example #5
0
    def tuple_calculate(self, this_tuple, log_turn, log_input_key="batch"):
        final_ngrams = self.final_ngrams
        # do we need to add generic ngrams?
        new_ngrams = []

        if Tuples.is_generic(this_tuple[-1]):
            gvalue = this_tuple[-1]
            for ngram, score in final_ngrams:
                if gvalue.value is not None:
                    if gvalue.value.lower() in ngram:
                        new_ngram = ngram.replace(gvalue.value.lower(), "<generic_value>")
                        new_ngrams.append((new_ngram, score))

        return dict(new_ngrams)
Example #6
0
 def tuple_calculate(self, this_tuple, log_turn, log_input_key="batch"):
     if Tuples.is_generic(this_tuple[-1]):
         return {"<generic_value=" + this_tuple[-1].value + ">": 1}
     else:
         return {}
Example #7
0
        return False
    out = cnNgram(new_words, ngram.logp, delta=len(searchwords) - 1)
    return out


if __name__ == '__main__':
    cn = [
        {"arcs": [{"word": "<s>", "score": 0.0}]},
        {"arcs": [{"word": "hi", "score": 0.0}]},
        {"arcs": [{"word": "there", "score": -math.log(2)}, {"word": "!null", "score": -math.log(2)}]},
        {"arcs": [{"word": "how", "score": 0.0}]},
        {"arcs": [{"word": "are", "score": 0.0}]},
        {"arcs": [{"word": "you", "score": 0.0}]},
        {"arcs": [{"word": "</s>", "score": 0.0}]}

    ]
    final_ngrams = get_cnngrams(cn, 200, 3)
    print(dict([(ng.string_repn(), ng.score()) for ng in final_ngrams]))
    import configparser, json, Tuples

    config = configparser.ConfigParser()
    config.read("output/experiments/feature_set/run_1.cfg")
    nb = cnet(config)
    log_file = json.load(open("corpora/data/Mar13_S2A0/voip-318851c80b-20130328_224811/log.json"))
    log_turn = log_file["turns"][2]
    print(nb.calculate(
        log_turn
    ))
    tup = ("inform", "food", Tuples.genericValue("food", "modern european"))
    print(nb.tuple_calculate(tup, log_turn))
Example #8
0
    def extractFeatures(self, dw, log_input_key="batch"):
        # given a dataset walker,
        # adds examples to self.X and self.y
        total_calls = len(dw.session_list)
        print(total_calls)
        # print(dw.session_list)
        self.keys = set([])
        for call_num, call in enumerate(dw):
            print('[%d/%d]' % (call_num, total_calls))
            for log_turn, label_turn in call:
                if label_turn != None:
                    uacts = label_turn['semantics']['json']
                    these_tuples = self.tuples.uactsToTuples(uacts)
                    # check there aren't any tuples we were not expecting:
                    for this_tuple in these_tuples:
                        if this_tuple not in self.tuples.all_tuples:
                            print("Warning: unexpected tuple", this_tuple)
                    # convert tuples to specific tuples:
                    these_tuples = [
                        Tuples.generic_to_specific(tup) for tup in these_tuples
                    ]

                # which tuples would be considered (active) for this turn?
                active_tuples = self.tuples.activeTuples(log_turn)

                # calculate base features that are independent of the tuple
                baseX = defaultdict(float)
                for feature_extractor in self.feature_extractors:
                    feature_name = feature_extractor.__class__.__name__
                    new_feats = feature_extractor.calculate(
                        log_turn, log_input_key=log_input_key)
                    # if new_feats != {}:
                    #     print('base feat:',new_feats.keys())
                    for key in new_feats:
                        baseX[(feature_name, key)] += new_feats[key]
                        self.keys.add((feature_name, key))
                self.baseXs.append(baseX)

                # print('these_tuples',these_tuples)
                # print('active_tuples',active_tuples)

                for this_tuple in active_tuples:
                    # print(this_tuple)
                    if label_turn != None:
                        y = (Tuples.generic_to_specific(this_tuple)
                             in these_tuples)

                    X = defaultdict(float)
                    for feature_extractor in self.feature_extractors:
                        feature_name = feature_extractor.__class__.__name__
                        new_feats = feature_extractor.tuple_calculate(
                            this_tuple, log_turn, log_input_key=log_input_key)
                        # if new_feats!={}:
                        #     print('tuple feat',new_feats.keys())
                        for key in new_feats:
                            X[(feature_name, key)] += new_feats[key]
                            self.keys.add((feature_name, key))

                    if this_tuple not in self.X:
                        self.X[this_tuple] = []
                    if this_tuple not in self.y:
                        self.y[this_tuple] = []
                    if this_tuple not in self.baseX_pointers:
                        self.baseX_pointers[this_tuple] = []
                    # if this_tuple not in self.fnames :
                    #     self.fnames[this_tuple] = []

                    self.X[this_tuple].append(X)
                    if label_turn != None:
                        self.y[this_tuple].append(y)

                    self.baseX_pointers[this_tuple].append(
                        len(self.baseXs) - 1)