コード例 #1
0
ファイル: model.py プロジェクト: xiongma/DGCNN
    def train_single(self, vec, masks1, masks2, labels, total_steps):
        """
        train DGCNN model with single GPU or CPU
        :param vec: Bert Vector instance
        :param masks1: question masks
        :param masks2: evidence masks
        :param labels: labels, contain global, start, end
        :param total_steps: total train steps
        :return: train op, loss, global step, tensorflow summary
        """
        global_step = tf.train.get_or_create_global_step()

        warmup = False
        if self.hp.warmup_rate > 0.0:
            warmup = True

        warmup_steps = int(total_steps * self.hp.warmup_rate)
        lr = noam_scheme(global_step, warmup_steps, total_steps, self.hp.lr,
                         warmup)

        optimizer = tf.train.AdamOptimizer(lr)
        ques_embedd, evidence_embedd = get_embedding(vec, self.hp.maxlen1,
                                                     masks1, masks2)

        ques_atten = self.question(ques_embedd)
        p_start, p_end = self.evidence(evidence_embedd, ques_atten, True)
        loss = self._calc_loss(labels, p_start, p_end)
        train_op = optimizer.minimize(loss, global_step=global_step)
        tf.summary.scalar("train_loss", loss)
        summaries = tf.summary.merge_all()

        return train_op, loss, summaries, global_step
コード例 #2
0
ファイル: model.py プロジェクト: xiongma/DGCNN
    def eval(self, vec, masks1, masks2, labels):
        """
        evaluate model, just use one gpu to evaluate
        :param vec: Bert Vector instance
        :param masks1: question masks
        :param masks2: evidence masks
        :param labels: labels, contain global, start, end
        :return: answer indexes, loss, tensorflow summary
        """
        ques_embedd, evidence_embedd = get_embedding(vec, self.hp.maxlen1,
                                                     masks1, masks2)

        ques_atten = self.question(ques_embedd)
        p_start, p_end = self.evidence(evidence_embedd, ques_atten, False)

        # loss
        loss = self._calc_loss(labels, p_start, p_end)

        # get answer
        p_start = tf.argmax(p_start, axis=1)  # [N]
        p_end = tf.argmax(p_end, axis=1)  # [N]

        p = tf.stack([p_start, p_end], axis=-1)

        tf.summary.scalar('eval_loss', loss)
        summaries = tf.summary.merge_all()

        return p, loss, summaries
コード例 #3
0
ファイル: model.py プロジェクト: xiongma/DGCNN
    def train_multi(self, vec, masks1, masks2, labels, total_steps):
        """
        train DGCNN model with multi GPUs
        :param xs: question
        :param ys: evidence
        :param labels: labels, contain global, start, end
        :return: train op, loss, global step, tensorflow summary
        """
        tower_grads = []
        global_step = tf.train.get_or_create_global_step()
        global_step_ = global_step * self.hp.gpu_nums

        warmup = False
        if self.hp.warmup_rate > 0.0:
            warmup = True
        warmup_steps = int(total_steps * self.hp.warmup_rate)

        lr = noam_scheme(global_step_, warmup_steps, total_steps, self.hp.lr,
                         warmup)
        optimizer = tf.train.AdamOptimizer(lr)

        ques_embedd, evidence_embedd = get_embedding(vec, self.hp.maxlen1,
                                                     masks1, masks2)
        datas = split_inputs(self.hp.gpu_nums, ques_embedd, evidence_embedd,
                             labels)
        losses = []

        with tf.variable_scope(tf.get_variable_scope()):
            for no in range(self.hp.gpu_nums):
                with tf.device("/gpu:%d" % no):
                    with tf.name_scope("tower_%d" % no):
                        ques_atten = self.question(datas[0][no])
                        p_start, p_end = self.evidence(datas[1][no],
                                                       ques_atten, True)

                        tf.get_variable_scope().reuse_variables()
                        loss = self._calc_loss(datas[2][no], p_start, p_end)
                        losses.append(loss)
                        grads = optimizer.compute_gradients(loss)
                        tower_grads.append(grads)

        with tf.device("/cpu:0"):
            grads = self._average_gradients(tower_grads)
            train_op = optimizer.apply_gradients(grads,
                                                 global_step=global_step)
            loss = sum(losses) / len(losses)
            tf.summary.scalar("train_loss", loss)
            summaries = tf.summary.merge_all()

        return train_op, loss, summaries, global_step_
コード例 #4
0
def build_cnn_model(num_vocab,
                    dim_word,
                    dim_fc,
                    windows,
                    dim_feature,
                    dropout_emb=0.0,
                    dropout_fc=0.0,
                    embedding_type=None,
                    vectors=None,
                    freeze_emb=True,
                    device=None):
    embedding = get_embedding(num_vocab, dim_word, vectors, freeze_emb,
                              embedding_type, dropout_emb)
    cnns = nn.ModuleList(
        nn.Conv2d(1, dim_feature, (w, dim_word)) for w in windows)
    dim_hidden = len(cnns) * dim_feature
    classifier = get_classifier(dim_hidden, dim_fc, dropout_fc)
    model = CNNModel(embedding, cnns, classifier)
    param_init(model)
    return model.to(device)
コード例 #5
0
def build_model(data, args):
    # embedding
    is_transformer = True if args.model == 'transformer' else False
    embedding = get_embedding(len(data.vocab),
                              args.dim_word,
                              data.vocab.vectors,
                              args.freeze_emb,
                              args.embedding_type,
                              args.dropout_emb,
                              transformer=is_transformer)

    # sentence encoder
    if args.model == 'rnn':
        sent_encoder = get_rnn_encoder(args.dim_word,
                                       args.dim_hidden,
                                       args.num_layers,
                                       attention=args.attention,
                                       mtype=args.mtype,
                                       dropout_rnn=args.dropout_rnn,
                                       dropout_attn=args.dropout_attn)
        classifier = get_classifier(args.dim_hidden * 2, args.dim_fc,
                                    args.dropout_fc)
    #elif args.model == 'cnn':
    #    return build_cnn_model(len(data.vocab), args.dim_word, args.dim_fc,
    #                           args.windows, args.dim_feature,
    #                           dropout_emb=args.dropout_emb,
    #                           dropout_fc=args.droprout_fc,
    #                           embedding_type=args.embedding_type,
    #                           vectors=data.vocab.vectors,
    #                           freeze_emb=args.freeze_emb, device=args.device)
    elif args.model == 'transformer':
        sent_encoder = get_transformer_encoder(args.dim_model, args.h, args.N,
                                               args.dim_ff, args.attention,
                                               args.dropout_transformer,
                                               args.dropout_attn)
        classifier = get_classifier(args.dim_model, args.dim_fc,
                                    args.dropout_fc)

    return SentenceClassifier(args.model, embedding, sent_encoder, classifier)\
        .to(args.device)