def __init__(self, node, node_last_dims): super().__init__(node, node_last_dims) self.size = self.input_ns[0] zcheck(self.size == self.output_ns[0], "AddNormWrapper meets unequal dims.") self.gate = self.add_sub_node( "g", Affine(self.pc, self.size, self.size, act="linear"))
def _restart(self): if not self.input_is_fd: if self.fd is not None: self.fd.close() self.fd = zopen(self.file) else: zcheck(self.restart_times_ == 0, "Cannot restart a FdStreamer")
def __init__(self, node, node_last_dims, std_no_grad=False): super().__init__(node, node_last_dims) self.size = self.input_ns[0] zcheck(self.size == self.output_ns[0], "AddNormWrapper meets unequal dims.") self.normer = self.add_sub_node( "n", LayerNorm(self.pc, self.size, std_no_grad=std_no_grad))
def __init__(self, pc: BK.ParamCollection, rconf: SL0Conf): super().__init__(pc, None, None) # concat enc and label if use-label input_dim = rconf._input_dim # todo(warn): always add label related params self.dim = (input_dim + rconf.dim_label) self.is_att, self.is_sum, self.is_ff = [ rconf.chs_f == z for z in ["att", "sum", "ff"] ] self.ff_reshape = [-1, self.dim * rconf.chs_num] # only for ff # todo(+N): not elegant, but add the params to make most things compatible when reloading self.mha = self.add_sub_node( "fn", MultiHeadAttention(pc, self.dim, input_dim, self.dim, rconf.chs_att)) if self.is_att: # todo(+N): the only possibly inconsistent drop is the one for attention, may consider disabling it. self.fnode = self.mha elif self.is_sum: self.fnode = None elif self.is_ff: zcheck(rconf.chs_num > 0, "Err: Cannot ff with 0 child") self.fnode = self.add_sub_node( "fn", Affine(pc, self.dim * rconf.chs_num, self.dim, act="elu")) else: raise NotImplementedError(f"UNK chs method: {rconf.chs_f}")
def __call__(self, word_arr: np.ndarray = None, char_arr: np.ndarray = None, extra_arrs: Iterable[np.ndarray] = (), aux_arrs: Iterable[np.ndarray] = ()): exprs = [] # word/char/extras/posi seq_shape = None if self.has_word: # todo(warn): singleton-UNK-dropout should be done outside before seq_shape = word_arr.shape word_expr = self.dropmd_word(self.word_embed(word_arr)) exprs.append(word_expr) if self.has_char: seq_shape = char_arr.shape[:-1] char_embeds = self.char_embed( char_arr) # [*, seq-len, word-len, D] char_cat_expr = self.dropmd_char( BK.concat([z(char_embeds) for z in self.char_cnns])) exprs.append(char_cat_expr) zcheck( len(extra_arrs) == len(self.extra_embeds), "Unmatched extra fields.") for one_extra_arr, one_extra_embed, one_extra_dropmd in zip( extra_arrs, self.extra_embeds, self.dropmd_extras): seq_shape = one_extra_arr.shape exprs.append(one_extra_dropmd(one_extra_embed(one_extra_arr))) if self.has_posi: seq_len = seq_shape[-1] posi_idxes = BK.arange_idx(seq_len) posi_input0 = self.posi_embed(posi_idxes) for _ in range(len(seq_shape) - 1): posi_input0 = BK.unsqueeze(posi_input0, 0) posi_input1 = BK.expand(posi_input0, tuple(seq_shape) + (-1, )) exprs.append(posi_input1) # assert len(aux_arrs) == len(self.drop_auxes) for one_aux_arr, one_aux_dim, one_aux_drop, one_fold, one_gamma, one_lambdas in \ zip(aux_arrs, self.dim_auxes, self.drop_auxes, self.fold_auxes, self.aux_overall_gammas, self.aux_fold_lambdas): # fold and apply trainable lambdas input_aux_repr = BK.input_real(one_aux_arr) input_shape = BK.get_shape(input_aux_repr) # todo(note): assume the original concat is [fold/layer, D] reshaped_aux_repr = input_aux_repr.view( input_shape[:-1] + [one_fold, one_aux_dim]) # [*, slen, fold, D] lambdas_softmax = BK.softmax(one_gamma, -1).unsqueeze(-1) # [fold, 1] weighted_aux_repr = (reshaped_aux_repr * lambdas_softmax ).sum(-2) * one_gamma # [*, slen, D] one_aux_expr = one_aux_drop(weighted_aux_repr) exprs.append(one_aux_expr) # concated_exprs = BK.concat(exprs, dim=-1) # optional proj if self.has_proj: final_expr = self.final_layer(concated_exprs) else: final_expr = concated_exprs return final_expr
def _restart(self): self.base_streamer_.restart() if isinstance(self.file, str): if self.fd is not None: self.fd.close() self.fd = zopen(self.file, mode='rb', encoding=None) else: zcheck(self.restart_times_ == 0, "Cannot restart a FdStreamer")
def save_txt(fd, words, vecs, sep): num_words = len(words) embed_size = len(vecs[0]) zcheck(num_words == len(vecs), "Unmatched size!") fd.write(f"{num_words}{sep}{embed_size}\n") for w, vec in zip(words, vecs): zcheck(len(vec)==embed_size, "Unmatched dim!") print_list = [w] + ["%.6f" % float(z) for z in vec] fd.write(sep.join(print_list) + "\n")
def __call__(self, input_exp): if not isinstance(input_exp, (list, tuple)): input_exp = [input_exp] if self.direct_matmul: h0 = BK.matmul(input_exp[0], BK.transpose(self.ws[0], 0, 1)) else: zcheck(len(input_exp) == len(self.n_ins), "Unmatched input sizes!") input_lists = self._fill_input_list(input_exp) h0 = self._affine_f(input_lists) h1 = self._act_f(h0) h2 = self.drop_node(h1) return h2
def __init__(self, conf: TdParserConf, vpack: VocabPackage): super().__init__(conf, vpack) # ===== For decoding ===== self.inferencer = TdInferencer(self.scorer, conf.iconf) # ===== For training ===== sched_depth = ScheduledValue("depth", conf.tconf.sched_depth) self.add_scheduled_values(sched_depth) self.fber = TdFber(self.scorer, conf.iconf, conf.tconf, self.margin, self.sched_sampling, sched_depth) # todo(warn): not elegant, global flag! TdState.is_bfs = conf.is_bfs # ===== zcheck(not self.bter.jpos_multitask_enabled(), "Not implemented for joint pos in this mode!!") zwarn("WARN: This topdown mode is deprecated!!")
def write_fields(self, fd, fields): zcheck( len(fields) == self.num_fileds, "Write: Unmatched number of fields.") sep = "\t" if self.separator is None else self.separator length = len(fields[0]) zcheck(all(len(f) == length for f in fields), "Write: Unmatched length.") for idx in range(length): cur_line_fs = [str(z[idx]) for z in fields] cur_line = sep.join(cur_line_fs) fd.write(cur_line + "\n") fd.write("\n")
def __init__(self, rconf, model, vpack, dev_outfs, dev_goldfs, dev_out_format): super().__init__(rconf, model) self.vpack = vpack self.dev_out_format = dev_out_format # self.dev_goldfs = dev_goldfs if isinstance(dev_outfs, (list, tuple)): zcheck( len(dev_outfs) == len(dev_goldfs), "Mismatched number of output and gold!") self.dev_outfs = dev_outfs else: self.dev_outfs = [dev_outfs] * len(dev_goldfs)
def _build_check(v): # check specials are included zcheck(lambda: all(x in v.v for x in v.pre_list), "Not including pre_specials") zcheck(lambda: all(x in v.v for x in v.post_list), "Not including post_specials") # check special tokens zcheck(lambda: all(v.v[x]<len(v.pre_list) for x in v.pre_list), "Get unexpected pre_special words in plain words!!") zcheck(lambda: all(v.v[x]>=len(v.v)-len(v.post_list) for x in v.post_list), "Get unexpected post_special words in plain words!!")
def replace_weights(self, npvec): num_words, num_dim = npvec.shape zcheck(num_dim == self.n_dim, "Cannot change embedding dimension!") # replace # todo(+N): simply add one param here, the original one is still around # todo(WARN): only use this for testing, since the new weights will not be added to the optimizer zlog( f"Replacing the embedding weights from ({self.n_words}, {num_dim}) to ({num_words}, {num_dim})" ) # here, we are adding params at the outside self.E = self.add_param("E", (num_words, num_dim), init=npvec, lookup=True, check_stack=False) self.n_words = num_words self.dropout_wordceil = self.dropout_wordceil_hp if self.dropout_wordceil_hp is not None else self.n_words
def merge_others(self, others): embed_size = self.embed_size for other in others: zcheck(embed_size == other.embed_size, "Cannot merge two diff-sized embeddings!") this_all_num = other.num_words this_added_num = 0 for one_w, one_vec in zip(other.words, other.vecs): # keep the old one! if one_w not in self.wmap: # here, does not record as hits! this_added_num += 1 self.wmap[one_w] = len(self.words) self.words.append(one_w) self.vecs.append(one_vec) zlog(f"Merge embed: add another with all={this_all_num}/add={this_added_num}") zlog(f"After merge, changed from {self.num_words} to {len(self.words)}") self.num_words = len(self.words) # remember to change this one!
def i2w(dicts, ii, rm_eos=True, factor_split='|'): # Usage: list(Vocab), list(int)/list(list(int)) => list(str) if not isinstance(dicts, Iterable): dicts = [dicts] tmp = [] # get real list real_ii = ii if len(ii)>0 and rm_eos and ii[-1]==dicts[0].eos: real_ii = ii[:-1] # transform each token for one in real_ii: if not isinstance(one, Iterable): one = [one] zcheck(len(one) == len(dicts), "Unequal factors vs. dictionaries.") tmp.append(factor_split.join([v.idx2word(idx) for v, idx in zip(dicts, one)])) return tmp
def combine_slices(slices, keys, skip_combine=False, skip_debug_check=False): # combine them if skip_combine: ew = slices[0].ew zcheck( len(slices) == ew.bsize, "At least get the same batch-size!") values, bidxes = [ew.val], None if skip_debug_check: _, flag = SliceManager._arrange_idxes(slices) zcheck(flag is None, "Failed skip-combine!") else: values, bidxes = SliceManager._arrange_idxes(slices) # prepare with the keys return SliceManager._combine_recursive_keys(values, bidxes, keys)
def read_fields(self, fd): lines = self.read_lines(fd) if lines is None: return None else: if self.num_fileds is None: # first line of data self.num_fileds = len(self.split_line(lines[0])) ret = [[] for i in range(self.num_fileds)] # list of list of fields for one in lines: fields = self.split_line(one) zcheck( len(fields) == self.num_fileds, "READ: Unmatched number of fields.") for i, f in enumerate(fields): ret[i].append(f) return ret
def _run_validate(self, dev_streams): if not isinstance(dev_streams, Iterable): dev_streams = [dev_streams] dev_results = [] zcheck( len(dev_streams) == len(self.dev_goldfs), "Mismatch number of streams!") dev_idx = 0 for one_stream, one_dev_outf, one_dev_goldf in zip( dev_streams, self.dev_outfs, self.dev_goldfs): # todo(+2): overwrite previous ones? rr = ParserTestingRunner(self.model, self.vpack, one_dev_outf + ".dev" + str(dev_idx), one_dev_goldf, self.dev_out_format) x = rr.run(one_stream) dev_results.append(x) dev_idx += 1 return ParsingDevResult(dev_results)
def _load_txt(fname, sep=" "): printing("Going to load pre-trained (txt) w2v from %s ..." % fname) one = WordVectors(sep=sep) repeated_count = 0 with zopen(fname) as fd: # first line line = fd.readline() try: one.num_words, one.embed_size = [int(x) for x in line.split(sep)] printing("Reading w2v num_words=%d, embed_size=%d." % (one.num_words, one.embed_size)) line = fd.readline() except: printing("Reading w2v.") # the rest while len(line) > 0: line = line.rstrip() fields = line.split(sep) word, vec = fields[0], [float(x) for x in fields[1:]] # zcheck(word not in one.wmap, "Repeated key.") # keep the old one if word in one.wmap: repeated_count += 1 zwarn(f"Repeat key {word}") line = fd.readline() continue # if one.embed_size is None: one.embed_size = len(vec) else: zcheck(len(vec) == one.embed_size, "Unmatched embed dimension.") one.vecs.append(vec) one.wmap[word] = len(one.words) one.words.append(word) line = fd.readline() # final if one.num_words is not None: zcheck(one.num_words == len(one.vecs)+repeated_count, "Unmatched num of words.") one.num_words = len(one.vecs) printing(f"Read ok: w2v num_words={one.num_words:d}, embed_size={one.embed_size:d}, repeat={repeated_count:d}") return one
def _combine_recursive(values, bidxes): v0 = values[0] if isinstance(v0, dict): ret = {} for name in v0: next_values = [z[name] for z in values] ret[name] = SliceManager._combine_recursive( next_values, bidxes) elif isinstance(v0, (list, tuple)): ret = [] for idx in range(len(v0)): next_values = [z[idx] for z in values] ret.append(SliceManager._combine_recursive( next_values, bidxes)) # todo(+2): need to revert back to tuple if tuple? else: zcheck(BK.is_expr(v0), "Illegal combine value type.") # todo(warn): first concat and then select, may use more memory ret = BK.concat(values, 0) if bidxes is not None: ret = BK.select(ret, bidxes, 0) return ret
def output_tags(self, scheme): ret = [] if scheme == "BIO": begin_tag, end_tag, single_tag = "B", "I", "B" elif scheme == "BIOES": begin_tag, end_tag, single_tag = "B", "E", "S" else: zfatal("Unknown tagging scheme") # for one in self.all_chunks: start, end, type = one if type: if end-start==1: ret.append(single_tag+"-"+type) else: ret.append(begin_tag+"-"+type) for _ in range(end-start-2): ret.append("I-"+type) ret.append(end_tag+"-"+type) else: ret.append("O") zcheck(len(ret)==self.length, "Err length.") return ret
def filter_embed(self, wv: 'WordVectors', init_nohit=0., scale=1.0, assert_all_hit=False): if init_nohit <= 0.: get_nohit = lambda s: np.zeros((s,), dtype=np.float32) else: get_nohit = lambda s: (Random.random_sample((s,)).astype(np.float32)-0.5) * (2*init_nohit) # ret = [] res = defaultdict(int) for w in self.final_words: hit, norm_name, norm_w = wv.norm_until_hit(w) if hit: value = np.asarray(wv.get_vec(norm_w, norm=False), dtype=np.float32) res[norm_name] += 1 else: value = get_nohit(wv.embed_size) # value = np.zeros((wv.embed_size,), dtype=np.float32) res["no-hit"] += 1 ret.append(value) # if assert_all_hit: zcheck(res["no-hit"]==0, f"Filter-embed error: assert all-hit but get no-hit of {res['no-hit']}") printing("Filter pre-trained embed: %s, no-hit is inited with %s." % (res, init_nohit)) return np.asarray(ret, dtype=np.float32) * scale
def add_chunk(self, start, end, type): new_chunk = (start, end, type) zcheck((len(self.chunks)==0 and start>=0) or (start>=self.chunks[-1][1]), "Un-seq chunk!") zcheck((len(self.all_chunks)==0 and start==0) or (start==self.all_chunks[-1][1]), "Un-cont all-chunk!") if type: self.chunks.append(new_chunk) else: zcheck(end-start==1, "Err: continued Outside tags.") self.all_chunks.append(new_chunk)
def __init__(self, pc: BK.ParamCollection, econf: EncConf): super().__init__(pc, None, None) self.conf = econf # self.input_dim = econf._input_dim self.enc_hidden = econf.enc_hidden # add the sublayers self.layers = [] # todo(0): allowing repeated names last_dim = self.input_dim for name in econf.enc_ordering: if name == "rnn": if econf.enc_rnn_layer > 0: rnn_bidirect, rnn_sep_bidirection = econf.enc_rnn_bidirect, econf.enc_rnn_sep_bidirection rnn_enc_size = self.enc_hidden // 2 if rnn_bidirect else self.enc_hidden rnn_layer = self.add_sub_node( "rnn", RnnLayerBatchFirstWrapper( pc, RnnLayer(pc, last_dim, rnn_enc_size, econf.enc_rnn_layer, node_type=econf.enc_rnn_type, bidirection=rnn_bidirect, sep_bidirection=rnn_sep_bidirection))) self.layers.append(rnn_layer) # todo(+2): different i/o sizes for cnn and att? elif name == "cnn": if econf.enc_cnn_layer > 0: per_cnn_size = self.enc_hidden // len( econf.enc_cnn_windows) cnn_layer = self.add_sub_node( "cnn", Sequential(pc, [ CnnLayer(pc, last_dim, per_cnn_size, econf.enc_cnn_windows, act="elu") for _ in range(econf.enc_cnn_layer) ])) self.layers.append(cnn_layer) elif name == "att": if econf.enc_att_layer > 0: zcheck(last_dim == self.enc_hidden, "I/O should have same dim for Att-Enc") att_layer = self.add_sub_node( "att", TransformerEncoder( pc, econf.enc_att_layer, last_dim, econf.enc_att_ff, econf.enc_att_add_wrapper, econf.enc_att_conf, final_act=econf.enc_att_final_act, fixed_range_vals=econf.enc_att_fixed_ranges)) self.layers.append(att_layer) elif name == "att2": if econf.enc_att2_layer > 0: zcheck(last_dim == self.enc_hidden, "I/O should have same dim for Att-Enc") att2_layer = self.add_sub_node( "att2", Transformer2Encoder( pc, econf.enc_att2_layer, last_dim, econf.enc_att2_conf, short_range=econf.enc_att2_short_range, long_ranges=econf.enc_att2_long_ranges)) self.layers.append(att2_layer) else: zfatal("Unknown encoder name: " + name) if len(self.layers) > 0: last_dim = self.layers[-1].get_output_dims()[-1] self.output_dim = last_dim # if econf.no_final_dropout: self.disable_final_dropout()
def merge_by(self, s: 'LinearState'): zcheck(s.merger is None, "Err: multiple level of merges!") self.status = LinearState.STATUS_MERGED self.merger = s s.add_merge(self)
def set_root(self, s: LinearState): zcheck(s.sg is self, "SGErr: State does not belong here") zcheck(self.root is None, "SGErr: Can only have one root") zcheck(s.is_start(), "SGErr: Only start node can be root") self.root = s
def __init__(self, pc: BK.ParamCollection, econf: EmbedConf, vpack: VocabPackage): super().__init__(pc, None, None) self.conf = econf # repr_sizes = [] # word self.has_word = (econf.dim_word > 0) if self.has_word: npvec = vpack.get_emb( "word") if econf.init_words_from_pretrain else None self.word_embed = self.add_sub_node( "ew", Embedding(self.pc, len(vpack.get_voc("word")), econf.dim_word, npvec=npvec, name="word", freeze=econf.word_freeze)) repr_sizes.append(econf.dim_word) # char self.has_char = (econf.dim_char > 0) if self.has_char: # todo(warn): cnns will also use emb's drop? self.char_embed = self.add_sub_node( "ec", Embedding(self.pc, len(vpack.get_voc("char")), econf.dim_char, name="char")) per_cnn_size = econf.char_cnn_hidden // len(econf.char_cnn_windows) self.char_cnns = [ self.add_sub_node( "cnnc", CnnLayer(self.pc, econf.dim_char, per_cnn_size, z, pooling="max", act="tanh")) for z in econf.char_cnn_windows ] repr_sizes.append(econf.char_cnn_hidden) # posi: absolute positional embeddings self.has_posi = (econf.dim_posi > 0) if self.has_posi: self.posi_embed = self.add_sub_node( "ep", PosiEmbedding(self.pc, econf.dim_posi, econf.posi_clip, econf.posi_fix_sincos, econf.posi_freeze)) repr_sizes.append(econf.dim_posi) # extras: like POS, ... self.dim_extras = econf.dim_extras self.extra_names = econf.extra_names zcheck( len(self.dim_extras) == len(self.extra_names), "Unmatched dims and names!") self.extra_embeds = [] for one_extra_dim, one_name in zip(self.dim_extras, self.extra_names): self.extra_embeds.append( self.add_sub_node( "ext", Embedding(self.pc, len(vpack.get_voc(one_name)), one_extra_dim, npvec=vpack.get_emb(one_name, None), name="extra:" + one_name))) repr_sizes.append(one_extra_dim) # auxes self.dim_auxes = econf.dim_auxes self.fold_auxes = econf.fold_auxes self.aux_overall_gammas = [] self.aux_fold_lambdas = [] for one_aux_dim, one_aux_fold in zip(self.dim_auxes, self.fold_auxes): repr_sizes.append(one_aux_dim) # aux gamma and fold trainable lambdas self.aux_overall_gammas.append(self.add_param("AG", (), 1.)) # scalar self.aux_fold_lambdas.append( self.add_param( "AL", (), [1. / one_aux_fold for _ in range(one_aux_fold)])) # [#fold] # ===== # another projection layer? & set final dim if len(repr_sizes) <= 0: zwarn("No inputs??") # zcheck(len(repr_sizes)>0, "No inputs?") self.repr_sizes = repr_sizes self.has_proj = (econf.emb_proj_dim > 0) if self.has_proj: proj_layer = Affine(self.pc, sum(repr_sizes), econf.emb_proj_dim) if econf.emb_proj_norm: norm_layer = LayerNorm(self.pc, econf.emb_proj_dim) self.final_layer = self.add_sub_node( "fl", Sequential(self.pc, [proj_layer, norm_layer])) else: self.final_layer = self.add_sub_node("fl", proj_layer) self.output_dim = econf.emb_proj_dim else: self.final_layer = None self.output_dim = sum(repr_sizes) # ===== # special MdDropout: dropout the entire last dim (for word, char, extras, but not posi) self.dropmd_word = self.add_sub_node("md", DropoutLastN(pc, lastn=1)) self.dropmd_char = self.add_sub_node("md", DropoutLastN(pc, lastn=1)) self.dropmd_extras = [ self.add_sub_node("md", DropoutLastN(pc, lastn=1)) for _ in self.extra_names ] # dropouts for aux self.drop_auxes = [ self.add_sub_node("aux", Dropout(pc, (one_aux_dim, ))) for one_aux_dim in self.dim_auxes ]
def split_tag_type(t): fields = t.split(sep="-", maxsplit=1) fields.append("") # if not split tag, type = fields[:2] zcheck(tag in "BIOES", "Strange tag of %s." % tag) return tag, type
def end_prev(self): if self.cur_idx > self.prev_start: zcheck(self.prev_tag in "BI", "Strange continuing state.") self.add_chunk(self.prev_start, self.cur_idx, self.prev_type) self.renew_state(self.cur_idx)
def get_output_dims(self, *input_dims): xs = input_dims[0] # -1 dimension out = max(xs) zcheck(all((one == out or one == 1) for one in input_dims), "Should sum with same-dim tensors or broadcastable!") return (out, )