def export(self, models_fname, dictionary_fname, config_fname): print("exporting Classifier for Caesar to read") print("models to be saved in", models_fname) print("dictionary to be saved in", dictionary_fname) print("config to be saved in", config_fname) if self.type != "svm": print("Only know how to export SVMs") return lines = [] for this_tuple in self.classifiers: if self.classifiers[this_tuple] != None: t = this_tuple if Tuples.is_generic(this_tuple[-1]): t = this_tuple[:-1] + ("<generic_value>", ) lines += ['(' + ','.join(t) + ')'] lines += utils.svm_to_libsvm( self.classifiers[this_tuple].model) lines += [".", ""] models_savefile = open(models_fname, "wb") for line in lines: models_savefile.write(line + "\n") models_savefile.close() # save dictionary json_dictionary = [] dictionary_items = self.dictionary.items() dictionary_items.sort(key=lambda x: x[1]) assert [x[1] for x in dictionary_items] == range(len(self.dictionary)) keys = [list(x[0]) for x in dictionary_items] json.dump(keys, open(dictionary_fname, "w")) # save config config_savefile = open(config_fname, "w") config_savefile.write( "# Automatically generated by CNetTrain scripts\n") options = { "FEATURES": json.dumps(self.features), "MAX_ACTIVE_TUPLES": str(self.tuples.max_active), "TAIL_CUTOFF": str(self.tuples.tail_cutoff), "MODELS": os.path.join(os.getcwd(), models_fname), "DICTIONARY": os.path.join(os.getcwd(), dictionary_fname), } if "cnet" in self.features: index = self.features.index("cnet") cnf = self.feature_extractors[index] options["MAX_NGRAM_LENGTH"] = str(cnf.max_length) options["MAX_NGRAMS"] = str(cnf.max_ngrams) for key in options: this_line = "CNET : %s" % key this_line = this_line.ljust(30) this_line += "= " + options[key] config_savefile.write("\t" + this_line + "\n") config_savefile.close() print("exported Classifier.")
def tuple_calculate(self, this_tuple, log_turn, log_input_key="batch"): final_ngrams = self.final_ngrams # do we need to add generic ngrams? new_ngrams = [] if Tuples.is_generic(this_tuple[-1]): gvalue = this_tuple[-1] for ngram in final_ngrams: new_ngram = cn_ngram_replaced(ngram, gvalue.value.lower(), "<generic_value>") if new_ngram != False: new_ngrams.append(new_ngram) return dict([(ng.string_repn(), ng.score()) for ng in new_ngrams])
def tuple_calculate(self, this_tuple, log_turn, log_input_key="batch"): final_ngrams = self.final_ngrams # do we need to add generic ngrams? new_ngrams = [] if Tuples.is_generic(this_tuple[-1]): gvalue = this_tuple[-1] for ngram, score in final_ngrams: if gvalue.value is not None: if gvalue.value.lower() in ngram: new_ngram = ngram.replace(gvalue.value.lower(), "<generic_value>") new_ngrams.append((new_ngram, score)) return dict(new_ngrams)
def tuple_calculate(self, this_tuple, log_turn, log_input_key="batch"): if Tuples.is_generic(this_tuple[-1]): return {"<generic_value=" + this_tuple[-1].value + ">": 1} else: return {}