def save(self, prefix="./"): for name, vv in self.vocabs.items(): fname = prefix + "vv_" + name + ".txt" if vv is not None: vv.write_to_file(fname) for name, vv in self.embeds.items(): fname = prefix + "ve_" + name + ".pic" if vv is not None: default_pickle_serializer.to_file(vv, fname)
def main(vocab_file: str, input_path: str, output_file='lt.pkl'): # first get vocab vocabs = default_pickle_serializer.from_file(vocab_file) arg_voc = vocabs[0]['arg'] zlog(f"Read {arg_voc} from {vocab_file}") # make it to BIO-vocab bio_voc = SeqVocab(arg_voc) zlog(f"Build bio-voc of {bio_voc}") # read insts insts = list(ReaderGetterConf().get_reader( input_path=input_path)) # read from stdin all_sents = list(yield_sents(insts)) # -- mat = np.ones([len(bio_voc), len(bio_voc)], dtype=np.float32) # add-1 smoothing! cc = Counter() for sent in all_sents: for evt in sent.events: labels = ['O'] * len(sent) for arg in evt.args: widx, wlen = arg.mention.get_span() labels[widx:wlen] = ["B-" + arg.role ] + ["I-" + arg.role] * (wlen - 1) for a, b in zip(labels, labels[1:]): cc[f"{a}->{b}"] += 1 mat[bio_voc[a], bio_voc[b]] += 1 # -- # -- v = SimpleVocab() for name, count in cc.items(): v.feed_one(name, count) v.build_sort() print(v.get_info_table()[:50].to_string()) # OtherHelper.printd(cc) # -- # normalize & log according to row and save mat = mat / mat.sum(-1, keepdims=True) mat = np.log(mat) default_pickle_serializer.to_file(mat, output_file)
def main(*args): conf = MainConf() conf.update_from_args(args) # -- if conf.load_pkl: collection = default_pickle_serializer.from_file(conf.load_pkl) else: reader = FrameReader() collection = reader.read_all(conf.dir, conf.onto) if conf.save_pkl: default_pickle_serializer.to_file(collection, conf.save_pkl) if conf.save_txt: with zopen(conf.save_txt, 'w') as fd: for f in collection.frames: fd.write("#--\n" + f.to_string() + "\n") # -- if conf.debug: breakpoint() if conf.query: map_frame = {f.name: f for f in collection.frames} map_lu = ZFrameCollectionHelper.build_lu_map(collection, split_lu={"pb":"_", "fn":None}[conf.onto]) map_role = ZFrameCollectionHelper.build_role_map(collection) while True: line = input(">> ") fields = sh_split(line.strip()) if len(fields) == 0: continue try: query0, query1 = fields _map = {'frame': map_frame, 'lu': map_lu, 'role': map_role}[query0] answer = _map.get(query1, None) if isinstance(answer, ZFrame): zlog(answer.to_string()) else: zlog(answer) except: zlog(f"Wrong cmd: {fields}") pass
def save(self, prefix="./"): fname = prefix + "zsfp.voc.pkl" mkdir_p(os.path.dirname(fname)) # try to make dir if not there! default_pickle_serializer.to_file([self.vocabs, self.embeds], fname)
def do_save(self, file: str): zlog(f"Try saving vars to {file}") default_pickle_serializer.to_file(self.vars, file)
def write(self, x): default_pickle_serializer.to_file(x, self.fd) # -- # b msp2/tasks/zmtl/modules/dec_help.py
def save_vocab(self, v_dir: str): vp_file = os.path.join(v_dir, f"v_{self.name}.pkl") if self.vpack is not None: default_pickle_serializer.to_file(self.vpack, vp_file) zlog(f"Save vocabs ``{self.vpack}'' for {self} to {vp_file}")