def train_single(self, vec, masks1, masks2, labels, total_steps): """ train DGCNN model with single GPU or CPU :param vec: Bert Vector instance :param masks1: question masks :param masks2: evidence masks :param labels: labels, contain global, start, end :param total_steps: total train steps :return: train op, loss, global step, tensorflow summary """ global_step = tf.train.get_or_create_global_step() warmup = False if self.hp.warmup_rate > 0.0: warmup = True warmup_steps = int(total_steps * self.hp.warmup_rate) lr = noam_scheme(global_step, warmup_steps, total_steps, self.hp.lr, warmup) optimizer = tf.train.AdamOptimizer(lr) ques_embedd, evidence_embedd = get_embedding(vec, self.hp.maxlen1, masks1, masks2) ques_atten = self.question(ques_embedd) p_start, p_end = self.evidence(evidence_embedd, ques_atten, True) loss = self._calc_loss(labels, p_start, p_end) train_op = optimizer.minimize(loss, global_step=global_step) tf.summary.scalar("train_loss", loss) summaries = tf.summary.merge_all() return train_op, loss, summaries, global_step
def eval(self, vec, masks1, masks2, labels): """ evaluate model, just use one gpu to evaluate :param vec: Bert Vector instance :param masks1: question masks :param masks2: evidence masks :param labels: labels, contain global, start, end :return: answer indexes, loss, tensorflow summary """ ques_embedd, evidence_embedd = get_embedding(vec, self.hp.maxlen1, masks1, masks2) ques_atten = self.question(ques_embedd) p_start, p_end = self.evidence(evidence_embedd, ques_atten, False) # loss loss = self._calc_loss(labels, p_start, p_end) # get answer p_start = tf.argmax(p_start, axis=1) # [N] p_end = tf.argmax(p_end, axis=1) # [N] p = tf.stack([p_start, p_end], axis=-1) tf.summary.scalar('eval_loss', loss) summaries = tf.summary.merge_all() return p, loss, summaries
def train_multi(self, vec, masks1, masks2, labels, total_steps): """ train DGCNN model with multi GPUs :param xs: question :param ys: evidence :param labels: labels, contain global, start, end :return: train op, loss, global step, tensorflow summary """ tower_grads = [] global_step = tf.train.get_or_create_global_step() global_step_ = global_step * self.hp.gpu_nums warmup = False if self.hp.warmup_rate > 0.0: warmup = True warmup_steps = int(total_steps * self.hp.warmup_rate) lr = noam_scheme(global_step_, warmup_steps, total_steps, self.hp.lr, warmup) optimizer = tf.train.AdamOptimizer(lr) ques_embedd, evidence_embedd = get_embedding(vec, self.hp.maxlen1, masks1, masks2) datas = split_inputs(self.hp.gpu_nums, ques_embedd, evidence_embedd, labels) losses = [] with tf.variable_scope(tf.get_variable_scope()): for no in range(self.hp.gpu_nums): with tf.device("/gpu:%d" % no): with tf.name_scope("tower_%d" % no): ques_atten = self.question(datas[0][no]) p_start, p_end = self.evidence(datas[1][no], ques_atten, True) tf.get_variable_scope().reuse_variables() loss = self._calc_loss(datas[2][no], p_start, p_end) losses.append(loss) grads = optimizer.compute_gradients(loss) tower_grads.append(grads) with tf.device("/cpu:0"): grads = self._average_gradients(tower_grads) train_op = optimizer.apply_gradients(grads, global_step=global_step) loss = sum(losses) / len(losses) tf.summary.scalar("train_loss", loss) summaries = tf.summary.merge_all() return train_op, loss, summaries, global_step_
def build_cnn_model(num_vocab, dim_word, dim_fc, windows, dim_feature, dropout_emb=0.0, dropout_fc=0.0, embedding_type=None, vectors=None, freeze_emb=True, device=None): embedding = get_embedding(num_vocab, dim_word, vectors, freeze_emb, embedding_type, dropout_emb) cnns = nn.ModuleList( nn.Conv2d(1, dim_feature, (w, dim_word)) for w in windows) dim_hidden = len(cnns) * dim_feature classifier = get_classifier(dim_hidden, dim_fc, dropout_fc) model = CNNModel(embedding, cnns, classifier) param_init(model) return model.to(device)
def build_model(data, args): # embedding is_transformer = True if args.model == 'transformer' else False embedding = get_embedding(len(data.vocab), args.dim_word, data.vocab.vectors, args.freeze_emb, args.embedding_type, args.dropout_emb, transformer=is_transformer) # sentence encoder if args.model == 'rnn': sent_encoder = get_rnn_encoder(args.dim_word, args.dim_hidden, args.num_layers, attention=args.attention, mtype=args.mtype, dropout_rnn=args.dropout_rnn, dropout_attn=args.dropout_attn) classifier = get_classifier(args.dim_hidden * 2, args.dim_fc, args.dropout_fc) #elif args.model == 'cnn': # return build_cnn_model(len(data.vocab), args.dim_word, args.dim_fc, # args.windows, args.dim_feature, # dropout_emb=args.dropout_emb, # dropout_fc=args.droprout_fc, # embedding_type=args.embedding_type, # vectors=data.vocab.vectors, # freeze_emb=args.freeze_emb, device=args.device) elif args.model == 'transformer': sent_encoder = get_transformer_encoder(args.dim_model, args.h, args.N, args.dim_ff, args.attention, args.dropout_transformer, args.dropout_attn) classifier = get_classifier(args.dim_model, args.dim_fc, args.dropout_fc) return SentenceClassifier(args.model, embedding, sent_encoder, classifier)\ .to(args.device)