def decode_sent(self, sentinfo, output_fname, config=None): if config == None: config = self.config t0 = time.time() self.X = {} self.y = {} self.baseXs = [] self.baseX_pointers = {} self.fnames = {} log_input_key = "batch" if config.has_option("decode", "log_input_key"): log_input_key = config.get("decode", "log_input_key") self.extractFeatures2(sentinfo, log_input_key=log_input_key) decode_results = self.decode() counter = defaultdict(int) active_tuples = self.tuples.activeTuples_sent(sentinfo) tuple_distribution = {} for this_tuple in active_tuples: index = counter[this_tuple] assert len(decode_results[this_tuple]) == 1 if len(decode_results[this_tuple]) - 1 < index: p = 0 else: p = decode_results[this_tuple][index] # p = decode_results[this_tuple][index] tuple_distribution[Tuples.generic_to_specific(this_tuple)] = p # check we are decoding the right utterance counter[this_tuple] += 1 slu_hyps = self.tuples.distributionToNbest(tuple_distribution) return slu_hyps
def __init__(self, config): # classifier type self.type = "svm" if config.has_option("classifier", "type"): self.type = config.get("classifier", "type") # min_examples self.min_examples = 10 if config.has_option("classifier", "min_examples"): self.min_examples = int(config.get("classifier", "min_examples")) # features self.features = ["cnet"] if config.has_option("classifier", "features"): self.features = json.loads(config.get("classifier", "features")) self.feature_extractors = [] for feature in self.features: self.feature_extractors.append( sutils.import_class( "convlab.modules.nlu.multiwoz.svm.Features." + feature)(config)) print(self.feature_extractors) self.tuples = Tuples.tuples(config) self.config = config self.cnet_extractor = cnet_extractor(config) # store data: self.X = {} self.y = {} self.baseXs = [] self.baseX_pointers = {} self.fnames = {}
def export(self, models_fname, dictionary_fname, config_fname): print("exporting Classifier for Caesar to read") print("models to be saved in", models_fname) print("dictionary to be saved in", dictionary_fname) print("config to be saved in", config_fname) if self.type != "svm": print("Only know how to export SVMs") return lines = [] for this_tuple in self.classifiers: if self.classifiers[this_tuple] != None: t = this_tuple if Tuples.is_generic(this_tuple[-1]): t = this_tuple[:-1] + ("<generic_value>", ) lines += ['(' + ','.join(t) + ')'] lines += utils.svm_to_libsvm( self.classifiers[this_tuple].model) lines += [".", ""] models_savefile = open(models_fname, "wb") for line in lines: models_savefile.write(line + "\n") models_savefile.close() # save dictionary json_dictionary = [] dictionary_items = self.dictionary.items() dictionary_items.sort(key=lambda x: x[1]) assert [x[1] for x in dictionary_items] == range(len(self.dictionary)) keys = [list(x[0]) for x in dictionary_items] json.dump(keys, open(dictionary_fname, "w")) # save config config_savefile = open(config_fname, "w") config_savefile.write( "# Automatically generated by CNetTrain scripts\n") options = { "FEATURES": json.dumps(self.features), "MAX_ACTIVE_TUPLES": str(self.tuples.max_active), "TAIL_CUTOFF": str(self.tuples.tail_cutoff), "MODELS": os.path.join(os.getcwd(), models_fname), "DICTIONARY": os.path.join(os.getcwd(), dictionary_fname), } if "cnet" in self.features: index = self.features.index("cnet") cnf = self.feature_extractors[index] options["MAX_NGRAM_LENGTH"] = str(cnf.max_length) options["MAX_NGRAMS"] = str(cnf.max_ngrams) for key in options: this_line = "CNET : %s" % key this_line = this_line.ljust(30) this_line += "= " + options[key] config_savefile.write("\t" + this_line + "\n") config_savefile.close() print("exported Classifier.")
def tuple_calculate(self, this_tuple, log_turn, log_input_key="batch"): final_ngrams = self.final_ngrams # do we need to add generic ngrams? new_ngrams = [] if Tuples.is_generic(this_tuple[-1]): gvalue = this_tuple[-1] for ngram in final_ngrams: new_ngram = cn_ngram_replaced(ngram, gvalue.value.lower(), "<generic_value>") if new_ngram != False: new_ngrams.append(new_ngram) return dict([(ng.string_repn(), ng.score()) for ng in new_ngrams])
def tuple_calculate(self, this_tuple, log_turn, log_input_key="batch"): final_ngrams = self.final_ngrams # do we need to add generic ngrams? new_ngrams = [] if Tuples.is_generic(this_tuple[-1]): gvalue = this_tuple[-1] for ngram, score in final_ngrams: if gvalue.value is not None: if gvalue.value.lower() in ngram: new_ngram = ngram.replace(gvalue.value.lower(), "<generic_value>") new_ngrams.append((new_ngram, score)) return dict(new_ngrams)
def tuple_calculate(self, this_tuple, log_turn, log_input_key="batch"): if Tuples.is_generic(this_tuple[-1]): return {"<generic_value=" + this_tuple[-1].value + ">": 1} else: return {}
return False out = cnNgram(new_words, ngram.logp, delta=len(searchwords) - 1) return out if __name__ == '__main__': cn = [ {"arcs": [{"word": "<s>", "score": 0.0}]}, {"arcs": [{"word": "hi", "score": 0.0}]}, {"arcs": [{"word": "there", "score": -math.log(2)}, {"word": "!null", "score": -math.log(2)}]}, {"arcs": [{"word": "how", "score": 0.0}]}, {"arcs": [{"word": "are", "score": 0.0}]}, {"arcs": [{"word": "you", "score": 0.0}]}, {"arcs": [{"word": "</s>", "score": 0.0}]} ] final_ngrams = get_cnngrams(cn, 200, 3) print(dict([(ng.string_repn(), ng.score()) for ng in final_ngrams])) import configparser, json, Tuples config = configparser.ConfigParser() config.read("output/experiments/feature_set/run_1.cfg") nb = cnet(config) log_file = json.load(open("corpora/data/Mar13_S2A0/voip-318851c80b-20130328_224811/log.json")) log_turn = log_file["turns"][2] print(nb.calculate( log_turn )) tup = ("inform", "food", Tuples.genericValue("food", "modern european")) print(nb.tuple_calculate(tup, log_turn))
def extractFeatures(self, dw, log_input_key="batch"): # given a dataset walker, # adds examples to self.X and self.y total_calls = len(dw.session_list) print(total_calls) # print(dw.session_list) self.keys = set([]) for call_num, call in enumerate(dw): print('[%d/%d]' % (call_num, total_calls)) for log_turn, label_turn in call: if label_turn != None: uacts = label_turn['semantics']['json'] these_tuples = self.tuples.uactsToTuples(uacts) # check there aren't any tuples we were not expecting: for this_tuple in these_tuples: if this_tuple not in self.tuples.all_tuples: print("Warning: unexpected tuple", this_tuple) # convert tuples to specific tuples: these_tuples = [ Tuples.generic_to_specific(tup) for tup in these_tuples ] # which tuples would be considered (active) for this turn? active_tuples = self.tuples.activeTuples(log_turn) # calculate base features that are independent of the tuple baseX = defaultdict(float) for feature_extractor in self.feature_extractors: feature_name = feature_extractor.__class__.__name__ new_feats = feature_extractor.calculate( log_turn, log_input_key=log_input_key) # if new_feats != {}: # print('base feat:',new_feats.keys()) for key in new_feats: baseX[(feature_name, key)] += new_feats[key] self.keys.add((feature_name, key)) self.baseXs.append(baseX) # print('these_tuples',these_tuples) # print('active_tuples',active_tuples) for this_tuple in active_tuples: # print(this_tuple) if label_turn != None: y = (Tuples.generic_to_specific(this_tuple) in these_tuples) X = defaultdict(float) for feature_extractor in self.feature_extractors: feature_name = feature_extractor.__class__.__name__ new_feats = feature_extractor.tuple_calculate( this_tuple, log_turn, log_input_key=log_input_key) # if new_feats!={}: # print('tuple feat',new_feats.keys()) for key in new_feats: X[(feature_name, key)] += new_feats[key] self.keys.add((feature_name, key)) if this_tuple not in self.X: self.X[this_tuple] = [] if this_tuple not in self.y: self.y[this_tuple] = [] if this_tuple not in self.baseX_pointers: self.baseX_pointers[this_tuple] = [] # if this_tuple not in self.fnames : # self.fnames[this_tuple] = [] self.X[this_tuple].append(X) if label_turn != None: self.y[this_tuple].append(y) self.baseX_pointers[this_tuple].append( len(self.baseXs) - 1)