def main(): ck500 = "/tmp/model.ckpt-500" qck0 = "/tmp/model.ckpt-0" var_d1 = load_checkpoint_vars(ck500) var_d2 = load_checkpoint_vars(qck0) var_name1 = "bert/encoder/layer_9/output/dense_29/bias/adam_v" var_name2 = "SCOPE2/bert/encoder/layer_9/output/dense_66/bias/adam_v" print(var_d1[var_name1]) print(var_d2[var_name2])
def main(): start = os.path.join(output_path, "model", "runs", "nli_ex_21", "model-73630") modified = os.path.join(output_path, "model", "runs", "nli_pairing_1", "model-11171") var_d1 = load_checkpoint_vars(start) var_d2 = load_checkpoint_vars(modified) for key in var_d1: if key in var_d2: v1 = var_d1[key] v2 = var_d2[key] print(key, np.sum(v1 - v2))
def work(model_path, save_path): model = load_checkpoint_vars(model_path) var_list = [] source_prefix = dual_model_prefix2 with tf.compat.v1.Session() as sess: for old_name in model: if old_name.startswith(source_prefix): drop_n = len(source_prefix) + 1 new_name = old_name[drop_n:] if "/dense" in new_name: new_name = parse_shift_name(new_name, "dense", -37) if "/layer_normalization" in new_name: new_name = parse_shift_name(new_name, "layer_normalization", -25) var_value = model[old_name] new_var = tf.Variable(var_value, name=new_name) var_list.append(new_var) print("Old: " + old_name) print("New: " + new_name) elif old_name.startswith("cls_dense_1/"): var_value = model[old_name] new_name = old_name.replace("cls_dense_1/", "cls_dense/") new_var = tf.Variable(var_value, name=new_name) var_list.append(new_var) print("Old: " + old_name) print("New: " + new_name) else: pass sess.run(tf.compat.v1.global_variables_initializer()) saver = tf.compat.v1.train.Saver(tf.compat.v1.global_variables()) saver.save(sess, save_path)
def show_record(): path = "C:\work\Code\Chair\output\\uncased_L-12_H-768_A-12\\bert_model.ckpt" vars = load_checkpoint_vars(path) for name, val in vars.items(): print(name, val.shape)
def compare_before_after(): tokenizer = get_tokenizer() ids = tokenizer.convert_tokens_to_ids(tokenizer.tokenize("heavy metal")) dir_name = pjoin(pjoin(output_path, FileName("model")), FileName("alt_emb_heavy_metal_D")) before = pjoin(dir_name, FileName("model.ckpt-0")) after = pjoin(dir_name, FileName("model.ckpt-10000")) v1_d = load_checkpoint_vars(before) v2_d = load_checkpoint_vars(after) for key in v1_d: if key in v2_d: s = np.sum(v1_d[key] - v2_d[key]) if np.abs(s) > 0.01: print(key, s) ori_emb = v2_d['bert/embeddings/word_embeddings'] alt_emb_before = v1_d['bert/embeddings/word_embeddings_alt'] alt_emb_after = v2_d['bert/embeddings/word_embeddings_alt'] def show_diff_from_ori(token_id): diff = np.sum(np.abs(ori_emb[token_id] - alt_emb_after[token_id])) print(token_id, diff) def show_diff_from_step0(token_id): diff = np.sum( np.abs(alt_emb_before[token_id] - alt_emb_after[token_id])) print(token_id, diff) print("Diff against original embedding") print("Target words") for token_id in ids: show_diff_from_ori(token_id) print("Random words") for token_id in [321, 598, 5854]: show_diff_from_ori(token_id) print("Diff against step0 random init embedding") print("Target words") for token_id in range(0, 30000): diff = np.sum( np.abs(alt_emb_before[token_id] - alt_emb_after[token_id])) if diff > 0.001: print(token_id, diff)
def main(): d = load_checkpoint_vars(sys.argv[1]) var_list = [ "dense_75/kernel", "dense_75/bias", "dense_77/kernel", "dense_77/bias", "k1", "k2", "bias" ] for var in var_list: print(var, d[var])
def show_record(): path = "C:\work\Code\Chair\output\model\BERT_Base_trained_on_MSMARCO\model.ckpt-100000" path = "C:\work\Code\Chair\output\model\msmarco_2\msmarco_2" path = "C:\work\Code\Chair\output\model\msmarco_2\msmarco_2" vars = load_checkpoint_vars(path) for name, val in vars.items(): print(name, val.shape)
def combine_pdcd_pc_bert(pc_bert_checkpoint, pdcd_checkpoint, save_path): pc_bert = load_checkpoint_vars(pc_bert_checkpoint) pdcd = load_checkpoint_vars(pdcd_checkpoint) var_list = [] name_set = set() with tf.compat.v1.Session() as sess: for old_name in pc_bert: new_name = "{}/".format(triple_model_prefix1) + old_name new_var = tf.Variable(pc_bert[old_name], name=new_name) var_list.append(new_var) assert new_name not in name_set print(old_name, new_name) name_set.add(new_name) for old_name in pdcd: if dual_model_prefix1 in old_name: new_name = old_name.replace(dual_model_prefix1, triple_model_prefix2) elif dual_model_prefix2 in old_name: new_name = old_name.replace(dual_model_prefix2, triple_model_prefix3) else: new_name = old_name if "/dense" in new_name: parse_shift_name(new_name, "dense", 37) elif "/layer_normalization" in new_name: parse_shift_name(new_name, "layer_normalization", 25) new_var = tf.Variable(pdcd[old_name], name=new_name) print(old_name, new_name) if new_name in name_set: print(new_name) assert new_name not in name_set name_set.add(new_name) var_list.append(new_var) sess.run(tf.compat.v1.global_variables_initializer()) saver = tf.compat.v1.train.Saver(tf.compat.v1.global_variables()) saver.save(sess, save_path)
def combine(nli_checkpoint, alt_emb_checkpoint, save_path): print("Combining...") # Write variables names of nli_checkpoint on alt_emb_checkpoint nli_d = load_checkpoint_vars(nli_checkpoint) alt_emb_d = load_checkpoint_vars(alt_emb_checkpoint) var_list = [] with tf.Session() as sess: for key in alt_emb_d: if key in nli_d: new_var = tf.Variable(nli_d[key], name=key) else: new_var = tf.Variable(alt_emb_d[key], name=key) var_list.append(new_var) for key in nli_d: if key not in alt_emb_d: new_var = tf.Variable(nli_d[key], name=key) var_list.append(new_var) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) saver.save(sess, save_path)
def rewrite(checkpoint, save_path): # Write variables names of nli_checkpoint on alt_emb_checkpoint checkpoint_vars = load_checkpoint_vars(checkpoint) var_list = [] layer_norm_idx = 0 layer_prefix_d = {} dense_prefix_d = {} dense_idx = 0 with tf.Session() as sess: keys = list(checkpoint_vars.keys()) keys.sort(key=natural_keys) for key in keys: print(key) if "LayerNorm" in key: prefix = key[:key.find("LayerNorm")] if prefix in layer_prefix_d: layer_norm = layer_prefix_d[prefix] else: layer_norm = "layer_normalization" if layer_norm_idx > 0: layer_norm += "_{}".format(layer_norm_idx) layer_prefix_d[prefix] = layer_norm layer_norm_idx += 1 new_name = re.sub("LayerNorm", layer_norm, key) elif "dense" in key: prefix = key[:key.find("dense")] if prefix in dense_prefix_d: dense = dense_prefix_d[prefix] else: dense = "dense" if dense_idx > 0: dense += "_{}".format(dense_idx) dense_idx += 1 dense_prefix_d[prefix] = dense new_name = re.sub("dense", dense, key) else: new_name = key new_var = tf.Variable(checkpoint_vars[key], name=new_name) var_list.append(new_var) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) saver.save(sess, save_path)
def work(model_path, save_path): model = load_checkpoint_vars(model_path) var_list = [] with tf.Session() as sess: for key in model: new_name = key new_name = re.sub("layer_normalization[_]?\d*", "LayerNorm", new_name) new_name = re.sub("dense[_]?\d*", "dense", new_name) new_name = re.sub("cls_dense/kernel", "output_weights", new_name) new_name = re.sub("cls_dense/bias", "output_bias", new_name) var_value = model[key] if new_name == "output_weights": print(var_value.shape) var_value = np.transpose(var_value, [1, 0]) new_var = tf.Variable(var_value, name=new_name) var_list.append(new_var) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) saver.save(sess, save_path)
def get_embedding_table(model_path): vars = load_checkpoint_vars(model_path) return vars['bert/embeddings/word_embeddings']
def show_checkpoint(lm_checkpoint, var_name): d = load_checkpoint_vars(lm_checkpoint) print(var_name) print(d[var_name])