예제 #1
0
def test_languageModel(args):
    args.dropout = 0.0
    data = pickleload("../data2/random_train_data.pkl", "traindata")
    dev_data = data[len(data) * 4 // 5:len(data)]
    # dev_data = data[2000: 4000]

    batch = Batch(args)
    word2index = pickleload("./word_vec/word2index.pkl", "word2index.pkl")
    input_vec = len(word2index)

    dev_batches = batch.lm_dev_batch(dev_data, args.context_limit)

    log_msg = "输入词空间大小:%d" % (input_vec)
    logger.info(log_msg)
    print(log_msg)

    transform = Transformer(args, input_vec)
    transform.load_state_dict(
        torch.load("./modelsave/" + "TransformModel0.pkl"))
    if torch.cuda.is_available():
        transform = transform.cuda()

    # 打印参数:
    log_msg = "模型名称:%s \n" % (args.loadmodelName)
    logger.info(log_msg)
    print(log_msg)

    result_dic = {}
    true_label_dic = {}
    all_count = 0
    right_count = 0
    loss_func = torch.nn.NLLLoss()
    loss = 0
    for dev_step, dev_batch in enumerate(dev_batches):
        context_idxs = dev_batch['context_idxs']
        seg_indexs = dev_batch['seg_indexs']
        cit_targets = dev_batch['cit_targets']
        targets = dev_batch['targets']
        target_indexs = dev_batch['target_indexs']
        ref_labels = dev_batch['ref_labels']
        id = dev_batch['id']
        print(id)
        context_mask = torch.Tensor(
            np.array(
                [list(map(function, xx)) for xx in context_idxs.data.numpy()],
                dtype=np.float)).cuda()

        context_idxs = Variable(context_idxs).cuda()
        seg_indexs = Variable(seg_indexs).cuda()
        targets = Variable(targets).cuda()
        out1, out2 = transform.forward(context_idxs, seg_indexs, context_mask,
                                       target_indexs)
        # print(out)
        for i in range(out1.size(0)):
            loss += loss_func(out1[i], targets[i])
        loss = loss.item() / out1.size(0)
        all_count += 1
        del out1, out2
    print(loss / all_count)
예제 #2
0
def all_doubletrainKey(args):
    data = pickleload(
        '../Retrieval/train_data/small_pairs_random_train_data.pkl',
        "small_pairs_random_train_data")
    dev_data = pickleload("../data2/random_train_data.pkl", "dev_data")
    train_data = data[0] + data[1] + data[2] + data[3]
    dev_data = dev_data[len(dev_data) * 4 // 5:len(dev_data)]

    batch = Batch(args)
    # source_embedding = pickleload(args.source_emb_mat_pkl, "source_emb_mat_pkl")
    word2index = pickleload("./word_vec/word2index.pkl", "word2index.pkl")
    input_vec = len(word2index)

    train_batches = batch.double_train_batch(train_data, args.context_limit,
                                             args.num_epoches, args.batch_size)

    log_msg = "输入词空间大小:%d" % (input_vec)
    logger.info(log_msg)
    print(log_msg)

    transform = Transformer(args, input_vec)

    if torch.cuda.is_available():
        transform = transform.cuda()

    transform.load_state_dict(
        torch.load("./modelsave/" + "TransformModel0.pkl"))

    model = AllClassifyGetKeyWords(args, transform)

    model = model.cuda()
    if args.loadmodel == True:
        model.load_state_dict(torch.load("./modelsave/" + args.loadmodelName))
    # for param in model.parameters():
    #     param.data.uniform_(-0.08, 0.08)
    #     param.data.uniform_(-0.08, 0.08)

    parameters_trainable = list(
        filter(lambda p: p.requires_grad, model.parameters()))

    if args.optim == "Adadelta":
        optimizer = torch.optim.Adadelta(parameters_trainable,
                                         lr=args.learning_rate,
                                         weight_decay=args.init_weight_decay)
    elif args.optim == "Adam":
        optimizer = torch.optim.Adam(parameters_trainable,
                                     lr=args.learning_rate,
                                     weight_decay=args.init_weight_decay)
    elif args.optim == "SGD":
        optimizer = torch.optim.SGD(parameters_trainable,
                                    lr=args.learning_rate,
                                    weight_decay=args.init_weight_decay)

    if args.loadmodel == True:
        model.load_state_dict(torch.load("./modelsave/" + args.loadmodelName))
    # 打印参数:
    log_msg = "优化函数:%s \n 学习率:%s \n 隐藏层:%s\n 保存模型名称:%s \n" % (
        args.optim, args.learning_rate, args.d_model, args.modelName)
    # print("dropout:", args.dropout)
    logger.info(log_msg)
    print(log_msg)

    set_epoch = 1
    pbar = tqdm(total=len(train_data) * args.num_epoches // args.batch_size +
                1)

    def loss_func(high_out, low_out, seleout11, seleout12, seleout21,
                  seleout22):
        ones = torch.ones(high_out.size(0), 1).cuda()
        ones1 = 7 * torch.ones(high_out.size(0), 1).cuda()
        loss = torch.mean(ones - high_out + low_out) + torch.mean((ones1 - seleout11)*(ones1 - seleout11)) + torch.mean((ones1 - seleout12)*(ones1 - seleout12)) + \
               torch.mean((ones1 - seleout21)*(ones1 - seleout21)) + torch.mean((ones1 - seleout22)*(ones1 - seleout22))
        return F.relu(loss), torch.mean(ones - high_out + low_out)

    print_loss_total = 0
    old_accu = 0
    print_loss_total2 = 0
    for train_step, (train_batch, epoch) in enumerate(train_batches):
        pbar.update(1)
        high_context_idxs = train_batch['high_cit_context_idxs']
        high_seg_ids = train_batch['high_seg_indexs']
        low_context_idxs = train_batch['low_cit_context_idxs']
        low_seg_ids = train_batch['low_seg_indexs']
        high_source_context_idxs = train_batch['high_source_context_idxs']
        high_source_seg_indexs = train_batch['high_source_seg_indexs']
        low_source_context_idxs = train_batch['low_source_context_idxs']
        low_source_seg_indexs = train_batch['low_source_seg_indexs']

        high_context_mask = torch.Tensor(
            np.array([
                list(map(function, xx))
                for xx in high_context_idxs.data.numpy()
            ],
                     dtype=np.float)).cuda()
        low_context_mask = torch.Tensor(
            np.array([
                list(map(function, xx))
                for xx in low_context_idxs.data.numpy()
            ],
                     dtype=np.float)).cuda()
        high_source_context_mask = torch.Tensor(
            np.array([
                list(map(function, xx))
                for xx in high_source_context_idxs.data.numpy()
            ],
                     dtype=np.float)).cuda()
        low_source_context_mask = torch.Tensor(
            np.array([
                list(map(function, xx))
                for xx in low_source_context_idxs.data.numpy()
            ],
                     dtype=np.float)).cuda()

        high_context_idxs = Variable(high_context_idxs).cuda()
        high_seg_ids = Variable(high_seg_ids).cuda()
        low_context_idxs = Variable(low_context_idxs).cuda()
        low_seg_ids = Variable(low_seg_ids).cuda()
        high_source_context_idxs = Variable(high_source_context_idxs).cuda()
        high_source_seg_indexs = Variable(high_source_seg_indexs).cuda()
        low_source_context_idxs = Variable(low_source_context_idxs).cuda()
        low_source_seg_indexs = Variable(low_source_seg_indexs).cuda()

        out1, seleout11, seleout12 = model.forward(high_context_idxs,
                                                   high_seg_ids,
                                                   high_context_mask,
                                                   high_source_context_idxs,
                                                   high_source_seg_indexs,
                                                   high_source_context_mask)
        out2, seleout21, seleout22 = model.forward(low_context_idxs,
                                                   low_seg_ids,
                                                   low_context_mask,
                                                   low_source_context_idxs,
                                                   low_source_seg_indexs,
                                                   low_source_context_mask)
        # Get loss
        optimizer.zero_grad()
        #out1:batch * num_target * word_vec
        #out2:batch * 2
        loss, loss2 = loss_func(out1, out2, seleout11, seleout12, seleout21,
                                seleout22)
        # Backward propagation
        loss.backward()
        optimizer.step()
        loss_value = loss.data.item()
        print_loss_total += loss_value
        print_loss_total2 += loss2.data.item()
        del out1, out2
        if train_step % 100 == 0:
            log_msg = 'Epoch: %d, Train_step %d  loss1: %.4f, loss2:%.4f' % (
                epoch, train_step, print_loss_total / 100,
                print_loss_total2 / 100)
            logger.debug(log_msg)
            print(log_msg)
            print_loss_total = 0
            print_loss_total2 = 0
        if epoch == set_epoch:
            set_epoch += 1
            dev_batches = batch.dev_batch(dev_data, args.context_limit)
            result_dic = {}
            true_label_dic = {}
            for dev_step, dev_batch in enumerate(dev_batches):
                context_idxs = dev_batch['context_idxs']
                source_context_idxs = dev_batch['source_context_idxs']
                seg_indexs = dev_batch['seg_indexs']
                source_seg_indexs = dev_batch['source_seg_indexs']
                ref_labels = dev_batch['ref_labels']
                id = dev_batch['id']

                context_mask = torch.Tensor(
                    np.array([
                        list(map(function, xx))
                        for xx in context_idxs.data.numpy()
                    ],
                             dtype=np.float)).cuda()
                source_context_mask = torch.Tensor(
                    np.array([
                        list(map(function, xx))
                        for xx in source_context_idxs.data.numpy()
                    ],
                             dtype=np.float)).cuda()

                context_idxs = Variable(context_idxs).cuda()
                seg_indexs = Variable(seg_indexs).cuda()
                source_context_idxs = Variable(source_context_idxs).cuda()
                source_seg_indexs = Variable(source_seg_indexs).cuda()
                out, seleout1, seleout2 = model.forward(
                    context_idxs, seg_indexs, context_mask,
                    source_context_idxs, source_seg_indexs,
                    source_context_mask)
                # Get loss
                if id not in result_dic:
                    result_dic[id] = []
                    result_dic[id].append(out.cpu().data)
                    true_label_dic[id] = ref_labels
                else:
                    result_dic[id].append(out.cpu().data)
                del out
            picklesave(result_dic, "./modelsave/all_dev_result_dic22.pkl",
                       "./modelsave/result_dic.pkl")
            picklesave(true_label_dic,
                       "./modelsave/all_dev_true_label_dic22.pkl",
                       "./modelsave/true_label_dic.pkl")
            keys = result_dic.keys()
            MAPS = 0
            precisions = 0
            recalls = 0
            for key in keys:
                out = torch.cat(result_dic[key], dim=0)
                predict_index = torch.topk(out, 2,
                                           dim=0)[1].squeeze(1).data.numpy()
                # print("预测标签:",predict_index)
                precision, recall, MAP = cal_MAP(true_label_dic[key],
                                                 predict_index)
                MAPS += MAP
                precisions += precision
                recalls += recall

            MAPS /= len(dev_data)
            precisions /= len(dev_data)
            recalls /= len(dev_data)
            all_loss = MAPS
            if all_loss > old_accu:
                old_accu = all_loss
                torch.save(model.state_dict(),
                           "./modelsave/max" + args.modelName)
                best_epoch = epoch
            # else:
            #     args.learning_rate = args.learning_rate / 2.0
            #     if args.learning_rate <= 1e-6:
            #         args.learning_rate = 1e-6
            #     if args.optim == "Adadelta":
            #         optimizer = torch.optim.Adadelta(parameters_trainable, lr=args.learning_rate,
            #                                          weight_decay=args.init_weight_decay)
            #     elif args.optim == "Adam":
            #         optimizer = torch.optim.Adam(parameters_trainable, lr=args.learning_rate,
            #                                      weight_decay=args.init_weight_decay)
            #     elif args.optim == "SGD":
            #         optimizer = torch.optim.SGD(parameters_trainable, lr=args.learning_rate,
            #                                     weight_decay=args.init_weight_decay)
            log_msg = '\n验证集的MAP为: %.4f  P为: %.4f  R为: %.4f\n 取得最小loss的epoch为:%d' % (
                all_loss, precisions, recalls, best_epoch)
            logger.info(log_msg)
            print(log_msg)
            # 实时保存每个epoch的模型
            torch.save(model.state_dict(), "./modelsave/" + args.modelName)
    torch.save(model.state_dict(), "./modelsave/" + args.modelName)
    pbar.close()
예제 #3
0
def train(args):
    train_data = pickleload('../Retrieval/train_data/single_train_data.pkl',
                            "traindata")

    batch = Batch(args)
    # source_embedding = pickleload(args.source_emb_mat_pkl, "source_emb_mat_pkl")
    word2index = pickleload("./word_vec/word2index.pkl", "word2index.pkl")
    input_vec = len(word2index)

    train_batches = batch.train_batch(train_data, args.context_limit,
                                      args.num_epoches, args.batch_size)

    log_msg = "输入词空间大小:%d" % (input_vec)
    logger.info(log_msg)
    print(log_msg)

    model = Transformer(args, input_vec)

    if torch.cuda.is_available():
        model = model.cuda()

    for param in model.parameters():
        param.data.uniform_(-0.08, 0.08)
        param.data.uniform_(-0.08, 0.08)

    parameters_trainable = list(
        filter(lambda p: p.requires_grad, model.parameters()))

    if args.optim == "Adadelta":
        optimizer = torch.optim.Adadelta(parameters_trainable,
                                         lr=args.learning_rate,
                                         weight_decay=args.init_weight_decay)
    elif args.optim == "Adam":
        optimizer = torch.optim.Adam(parameters_trainable,
                                     lr=args.learning_rate,
                                     weight_decay=args.init_weight_decay)
    elif args.optim == "SGD":
        optimizer = torch.optim.SGD(parameters_trainable,
                                    lr=args.learning_rate,
                                    weight_decay=args.init_weight_decay)

    if args.loadmodel == True:
        model.load_state_dict(torch.load("./modelsave/" + args.loadmodelName))

    # 打印参数:
    log_msg = "优化函数:%s \n 学习率:%s \n 隐藏层:%s\n 保存模型名称:%s \n" % (
        args.optim, args.learning_rate, args.d_model, args.modelName)
    # print("dropout:", args.dropout)
    logger.info(log_msg)
    print(log_msg)

    set_epoch = 0
    pbar = tqdm(total=len(train_data) * args.num_epoches // args.batch_size +
                1)

    loss_func = torch.nn.NLLLoss()
    print_loss_total = 0
    for train_step, (train_batch, epoch) in enumerate(train_batches):
        pbar.update(1)
        context_idxs = train_batch['context_idxs']
        seg_ids = train_batch['seg_indexs']
        target_indexs = train_batch['target_indexs']
        targets = train_batch['targets']
        labels = train_batch['labels']

        # print("up_context_idxs",up_context_idxs)
        # print("down_context_idxs",down_context_idxs)
        # print("target_idxs",target_idxs)
        # print("-----------------------------------------------------")

        context_mask = torch.Tensor(
            np.array(
                [list(map(function, xx)) for xx in context_idxs.data.numpy()],
                dtype=np.float)).cuda()

        context_idxs = Variable(context_idxs).cuda()
        seg_ids = Variable(seg_ids).cuda()
        targets = Variable(targets).cuda()
        labels = Variable(labels).cuda()

        out1, out2 = model.forward(context_idxs, seg_ids, context_mask,
                                   target_indexs)
        # Get loss
        optimizer.zero_grad()
        #out1:batch * num_target * word_vec
        #out2:batch * 2
        loss1 = 0
        for i in range(out1.size(0)):
            loss1 += loss_func(out1[i], targets[i])
        loss2 = loss_func(out2, labels)
        loss = loss1 / out1.size(0) + loss2
        # Backward propagation
        loss.backward()
        optimizer.step()
        loss_value = loss.data.item()
        print_loss_total += loss_value

        if train_step % 200 == 0:
            log_msg = 'Epoch: %d, Train_step %d  loss: %.4f' % (
                epoch, train_step, print_loss_total / 100)
            logger.debug(log_msg)
            print(log_msg)
            print_loss_total = 0
        if epoch == set_epoch:
            set_epoch += 1
            #实时保存每个epoch的模型
            torch.save(model.state_dict(), "./modelsave/" + args.modelName)
    torch.save(model.state_dict(), "./modelsave/" + args.modelName)
    pbar.close()
예제 #4
0
def test(args):
    args.dropout = 0.0
    data = pickleload("../data2/random_train_data.pkl", "traindata")
    dev_data = data[len(data) * 4 // 5:len(data)]
    # dev_data = data[2000: 4000]

    batch = Batch(args)
    word2index = pickleload("./word_vec/word2index.pkl", "word2index.pkl")
    input_vec = len(word2index)

    dev_batches = batch.dev_batch(dev_data, args.context_limit)

    log_msg = "输入词空间大小:%d" % (input_vec)
    logger.info(log_msg)
    print(log_msg)

    transform = Transformer(args, input_vec)
    # transform.load_state_dict(torch.load("./modelsave/" + "TransformModel0.pkl"))
    if torch.cuda.is_available():
        transform = transform.cuda()

    # model = Classify(args, transform)
    model = Classify(args, transform)

    #if args.loadmodel ==True:
    model.load_state_dict(torch.load("./modelsave/" + "maxclassifyModel2.pkl"))

    if torch.cuda.is_available():
        model = model.cuda()

    # 打印参数:
    log_msg = "模型名称:%s \n" % (args.loadmodelName)
    logger.info(log_msg)
    print(log_msg)

    result_dic = {}
    true_label_dic = {}
    for dev_step, dev_batch in enumerate(dev_batches):
        context_idxs = dev_batch['context_idxs']
        seg_indexs = dev_batch['seg_indexs']
        cit_targets = dev_batch['cit_targets']
        target = dev_batch['targets']
        ref_labels = dev_batch['ref_labels']
        id = dev_batch['id']
        print(id)
        context_mask = torch.Tensor(
            np.array(
                [list(map(function, xx)) for xx in context_idxs.data.numpy()],
                dtype=np.float)).cuda()

        context_idxs = Variable(context_idxs).cuda()
        seg_indexs = Variable(seg_indexs).cuda()
        out = model.forward(context_idxs, seg_indexs, context_mask)
        # Get loss
        if id not in result_dic:
            result_dic[id] = []
            result_dic[id].append(out.cpu().data)
            true_label_dic[id] = ref_labels
        else:
            result_dic[id].append(out.cpu().data)
        del out
    picklesave(result_dic, "./modelsave/classifyModel2_predict.pkl",
               "./modelsave/result_dic.pkl")
    picklesave(true_label_dic, "./modelsave/classifyModel2_true.pkl",
               "./modelsave/true_label_dic.pkl")
    keys = result_dic.keys()
    MAPS = 0
    precisions = 0
    recalls = 0
    for key in keys:
        out = torch.cat(result_dic[key], dim=0)
        predict_index = torch.topk(out, 2, dim=0)[1].squeeze(1).data.numpy()
        # print("预测标签:",predict_index)
        precision, recall, MAP = cal_MAP(true_label_dic[key], predict_index)
        MAPS += MAP
        precisions += precision
        recalls += recall

    MAPS /= len(dev_data)
    precisions /= len(dev_data)
    recalls /= len(dev_data)
    print("MAP:%.4f  P:%.4f  R:%.4f" % (MAPS, precisions, recalls))