def main(_): disable_eager_execution() seq_length = 512 batch_size = FLAGS.train_batch_size virtual_input_ids = np.zeros([batch_size, seq_length], np.int) with tf.device("/device:gpu:0"): input_ids = tf1.placeholder(tf.int32, [batch_size, seq_length]) input_mask = tf1.placeholder(tf.int32, [batch_size, seq_length]) segment_ids = tf1.placeholder(tf.int32, [batch_size, seq_length]) print("Defining grpah...") train_op = define_graph(input_ids, input_mask, segment_ids) print("Initializing variables...") config = tf.compat.v1.ConfigProto(log_device_placement=False, allow_soft_placement=True) config.gpu_options.allow_growth = True sess = tf.compat.v1.Session(config=config) sess.run(tf.compat.v1.global_variables_initializer()) run_options = tf1.RunOptions(trace_level=tf1.RunOptions.FULL_TRACE, ) run_metadata = tf1.RunMetadata() print("Now running...") for i in range(1000): _ = sess.run( [train_op], feed_dict={ input_ids: virtual_input_ids, input_mask: virtual_input_ids, segment_ids: virtual_input_ids, }, ) print("step", i) i = 0
def boring(): disable_eager_execution() seq_length = 512 batch_size = 3 virtual_input_ids = np.zeros([batch_size, seq_length], np.int) input_ids = tf1.placeholder(tf.int32, [batch_size, seq_length]) input_mask = tf1.placeholder(tf.int32, [batch_size, seq_length]) segment_ids = tf1.placeholder(tf.int32, [batch_size, seq_length]) train_op = define_graph(input_ids, input_mask, segment_ids) tf.compat.v1.summary.scalar('accuracy', 0) merged = tf1.summary.merge_all() sess = init_session() sess.run(tf.compat.v1.global_variables_initializer()) run_options = tf1.RunOptions(trace_level=tf1.RunOptions.FULL_TRACE) run_metadata = tf1.RunMetadata() train_writer = tf1.summary.FileWriter( os.path.join(cpath.output_path, "horizon_summary"), sess.graph) _, summary_out = sess.run( [train_op, merged], feed_dict={ input_ids: virtual_input_ids, input_mask: virtual_input_ids, segment_ids: virtual_input_ids, }, options=run_options, run_metadata=run_metadata) i = 0 train_writer.add_run_metadata(run_metadata, 'step%03d' % i) train_writer.add_summary(summary_out, i)
def run_server(model_path): disable_eager_execution() predictor = Predictor(model_path, 3, 300) def predict(payload): sout = predictor.predict(payload) return sout server = BertLikeServer(predict) print("server started") server.start(PORT_UKP)
def run_nli_w_path(run_name, step_name, model_path): #run_name disable_eager_execution() hp = HPBert() nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" data_loader = nli.DataLoader(hp.seq_max, "bert_voca.txt", True) data = get_nli_batches_from_data_loader(data_loader, hp.batch_size) run_name = "{}_{}_NLI".format(run_name, step_name) saved_model = train_nli(hp, nli_setting, run_name, 3, data, model_path) tf.reset_default_graph() avg_acc = test_nli(hp, nli_setting, run_name, data, saved_model) print("avg_acc: ", avg_acc) save_report("nli", run_name, step_name, avg_acc)
def load_bert_like(): disable_eager_execution() model = BertLike() sess = init_session() #sess.run(tf.compat.v1.global_variables_initializer()) load_v2_to_v2(sess, get_bert_full_path(), False) attention_prob_list, = sess.run([model.attention_probs_list]) html = HtmlVisualizer("position.html") for layer_no, attention_prob in enumerate(attention_prob_list): html.write_headline("Layer {}".format(layer_no)) acc_dict = {} zero_scores = [list() for _ in range(12)] for loc in range(2, 40, 2): print("Source : ", loc) for target_loc in range(20): offset = target_loc - loc print(offset, end=" ") for head_idx in range(num_head): key = offset, head_idx if key not in acc_dict: acc_dict[key] = [] e = attention_prob[0, head_idx, loc, target_loc] if target_loc != 0: acc_dict[key].append(e) else: zero_scores[head_idx].append(e) print("{0:.2f}".format(e * 100), end=" ") print() rows = [[Cell("Loc")] + [Cell("Head{}".format(i)) for i in range(12)]] for offset in range(-7, +7): print(offset, end=" ") scores = [] for head_idx in range(12): key = offset, head_idx try: elems = acc_dict[key] if len(elems) < 3: raise KeyError avg = average(elems) scores.append(avg) print("{0:.2f}".format(avg * 100), end=" ") except KeyError: print("SKIP") print() rows.append([Cell(offset)] + [Cell(float(v * 100), v * 1000) for v in scores]) html.write_table(rows) html.write_paragraph("Attention to first token") zero_scores = [average(l) for l in zero_scores] rows = [[Cell(" ")] + [Cell("Head{}".format(i)) for i in range(12)], [Cell(" ")] + [Cell(float(v * 100), v * 1000) for v in zero_scores]] html.write_table(rows)
valid_freq, save_fn, save_interval, num_steps) return save_fn() def train_nil_from_v2_checkpoint(run_name, model_path): hp = hyperparams.HPSENLI3() print(hp.batch_size) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) tokenizer = get_tokenizer() CLS_ID = tokenizer.convert_tokens_to_ids(["[CLS]"])[0] SEP_ID = tokenizer.convert_tokens_to_ids(["[SEP]"])[0] data_loader.CLS_ID = CLS_ID data_loader.SEP_ID = SEP_ID tf_logger.setLevel(logging.INFO) steps = 12271 data = load_cache("nli_batch16") if data is None: tf_logger.info("Encoding data from csv") data = get_nli_batches_from_data_loader(data_loader, hp.batch_size) save_to_pickle(data, "nli_batch16") train_nli(hp, nli_setting, run_name, steps, data, model_path) if __name__ == "__main__": disable_eager_execution() train_nil_from_v2_checkpoint(sys.argv[1], sys.argv[2])
def fn(): disable_eager_execution() model_path = get_bert_full_path() run_nli_w_path("bert_nli", "0", model_path)