Beispiel #1
0
def model_test(**kwargs):
    '''
    根据传进来的参数设置加载的模型,将模型的测试加过写入到保存文件中
    '''
    opt.parse(kwargs)
    model = getattr(Nets, opt.model)().eval()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    #使用cuda
    if opt.use_gpu:
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        model.to(device)

    #准备测试数据
    test_data = imageSentiment(opt.train_path, test=True,train=False)  # 训练集
    test_dataloader = DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers)
    results = []

    with torch.no_grad():
        for ii, data in tqdm(enumerate(dataloader), total=len(dataloader)):
            label,input = data
            if opt.use_gpu:
                model.to(device)
                label,input = label.to(device),input.to(device)
            score = model(input)
            _, predicted = torch.max(score.data, 1)
            batch_result = [(int(path_),int(label_)) for path_,label_ in zip(label,score)]
            results += batch_result
    write_csv(results,opt.result_file) #将结果写进CSV文件
Beispiel #2
0
def test(**kwargs):
    opt.parse(kwargs)
    import ipdb;
    ipdb.set_trace()
    # configure model
    model = getattr(models, opt.model)().eval()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu: model.cuda()

    # data
    train_data = DogCat(opt.test_data_root,test=True)
    test_dataloader = DataLoader(train_data,batch_size=opt.batch_size,shuffle=False,num_workers=opt.num_workers)
    results = []
    for ii,(data,path) in tqdm(enumerate(test_dataloader)):
        input = t.autograd.Variable(data,volatile = True)
        if opt.use_gpu: input = input.cuda()
        score = model(input)
        probability = t.nn.functional.softmax(score)[:,0].data.tolist()
        # label = score.max(dim = 1)[1].data.tolist()
        
        batch_results = [(path_,probability_) for path_,probability_ in zip(path,probability) ]

        results += batch_results
    write_csv(results,opt.result_file)

    return results
Beispiel #3
0
def train(**kwargs):
    opt.parse(kwargs)
    vis = Visualizer(opt.env)

    #step1: config model
    model = getattr(Nets,opt.model)()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu:
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        model.to(device)

    #step2: data
    train_data = imageSentiment(opt.train_path,train = True) #训练集
    val_data = imageSentiment(opt.train_path,train = False) #验证集
    train_dataloader = DataLoader(train_data,batch_size = opt.batch_size,shuffle=True,num_workers = opt.num_workers)
    val_dataloader = DataLoader(val_data,batch_size = opt.batch_size,shuffle=False,num_workers = opt.num_workers)

    #step3: 定义损失函数及优化器
    # criterion = nn.CrossEntropyLoss() #交叉熵损失函数 如果使用该损失函数 则网络最后无需使用softmax函数
    lr = opt.lr
    # optimizer = Optim.Adam(model.parameters(),lr = lr,weight_decay= opt.weight_decay)
    optimizer = Optim.SGD(model.parameters(),lr = 0.001,momentum=0.9,nesterov=True)
    #step4: 统计指标(计算平均损失以及混淆矩阵)
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(7)
    previous_loss = 1e100

    #训练
    for i in range(opt.max_epoch):
        loss_meter.reset()
        confusion_matrix.reset()
        total_loss = 0.
        for ii,(label,data) in tqdm(enumerate(train_dataloader),total=len(train_dataloader)):
            if opt.use_gpu:
                label,data = label.to(device),data.to(device)

            optimizer.zero_grad()
            score = model(data)
            # ps:使用nll_loss和crossentropyloss进行多分类时 target为索引标签即可 无需转为one-hot
            loss = F.nll_loss(score,label)
            total_loss += loss.item()
            loss.backward()
            optimizer.step()

            #更新统计指标以及可视化
            loss_meter.add(loss.item())
            confusion_matrix.add(score.data,label.data)

            if ii%opt.print_freq==opt.print_freq-1:
                vis.plot('loss',loss_meter.value()[0])

        vis.plot('mach avgloss', total_loss/len(train_dataloader))
        model.save()

        #计算验证集上的指标
        val_accuracy = val(model,val_dataloader)

        vis.plot('val_accuracy',val_accuracy)
Beispiel #4
0
def train(**kwargs):
    opt.parse(kwargs)

    # step1: configure model
    #model = getattr(models, opt.model)()
    #model = Pre_models.densenet121(pretrained=True);#过拟合
    #model = Pre_models.densenet121()
    #model = models.densenet169(pretrained=True)
    model = models.densenet201(pretrained=True)
    #model = Pre_models.densenet161(pretrained=True)
    model.classifier = torch.nn.Linear(1920, 2)

    model = model.cuda()
    model = torch.nn.DataParallel(model)

    #if opt.load_model_path:
    #    model.load(opt.load_model_path)
    #if opt.use_gpu: model.cuda()

    # step2: data
    train_data = CAG(opt.train_data_root, train=True)
    val_data = CAG(opt.train_data_root, train=False)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)

    # step3: criterion and optimizer
    loss_func = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = t.optim.SGD(model.parameters(), lr=lr)

    previous_loss = 1.0

    # train
    for epoch in range(opt.max_epoch):
        print('epoch {}'.format(epoch + 1))
        train_num = 1
        train_acc = 0
        batch_num = 0
        loss_sum = 0
        for ii, (data, label) in enumerate(train_dataloader):
            # train model
            input = Variable(data)
            target = Variable(label)
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()

            optimizer.zero_grad()
            score = model(input)
            probability = t.nn.functional.softmax(score, dim=1)
            _, result = torch.max(probability, 1)
            train_correct = (result == target).sum()
            train_acc += train_correct.item()
            train_num += target.size(0)
            loss = loss_func(probability, target)
            loss.backward()
            optimizer.step()
            loss_sum += loss.item()
            batch_num += 1

        print("当前loss:", loss_sum / batch_num)
        logger.scalar_summary('train_loss', loss_sum / batch_num, epoch)
        accuracy = train_acc / train_num
        logger.scalar_summary('train_accurancy', accuracy, epoch)

        if (epoch + 1) % 100 == 0:
            lr = lr * opt.lr_decay
            print("当前学习率", lr)
            logger.scalar_summary('lr', lr, epoch)
            for param_group in optimizer.param_groups:  #optimizer通过param_group来管理参数组. 通过更改param_group[‘lr’]的值来更改对应参数组的学习率。
                param_group['lr'] = lr
        #previous_loss = loss.item()

        # validate and visualize
        if (epoch + 1) % 5 == 0:
            val_accuracy, val_loss = val(model, val_dataloader)
            print("验证集上准确率为:", val_accuracy)
            #prefix = '/home/hdc/yfq/CAG/checkpoints/Densenet121'
            #name = time.strftime(prefix + '%m%d_%H:%M:%S.pth')
            if val_accuracy >= 0.96:
                #torch.save(model.state_dict(),  name)
                opt.flag = True
            if val_accuracy < 0.96:
                opt.flag = False
            logger.scalar_summary('val_accurancy', val_accuracy, epoch)
            print("val_loss:", val_loss)
            logger.scalar_summary('val_loss', val_loss, epoch)
Beispiel #5
0
def train(**kwargs):
    opt.parse(kwargs)

    # step1: configure model (defined in models.py)
    model = FCN8s()
    device = t.device('cpu')
    if opt.use_gpu == True:
        device = t.device("cuda:0" if t.cuda.is_available() else "cpu")
    print(device)
    model.to(device)
    # step2: data preparation
    train_data = KaggleSalt(root=opt.train_data_root)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)

    val_data = KaggleSalt(opt.train_data_root)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=True,
                                num_workers=opt.num_workers)

    # step3: criterion and optimizer
    criterion = t.nn.BCEWithLogitsLoss()
    lr = opt.lr
    optimizer = t.optim.RMSprop(model.parameters(),
                                lr=lr,
                                momentum=opt.momentum,
                                weight_decay=opt.weight_decay)

    loss_pre = 1e10

    # step4: training
    for epoch in range(opt.max_epoch):
        loss_now = 0
        for ii, (data, label) in tqdm(enumerate(train_dataloader)):

            data = data.to(device)
            label = t.Tensor(label.float())
            label = label.to(device)

            # Forward pass
            #import ipdb
            #ipdb.set_trace()
            heatmap = model(data)
            #import ipdb
            #ipdb.set_trace()
            loss = criterion(heatmap.reshape(-1), label.reshape(-1))

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # update and visualize
            loss_now += loss.item()

        model.save()
        val_accuracy = val(model, val_dataloader)
        print('epoch:%d:   loss:%f / acc:%f' % (epoch, loss_now, val_accuracy))

        if loss_now > loss_pre:
            lr = lr * opt.lr_decay
        loss_pre = loss_now
Beispiel #6
0
def train(**kwargs):

    # The received parameters will be used to update configuration dict
    opt.parse(kwargs)

    # Step 0: data and device
    inputs = get_inputs(data_dir=opt.data_dir,
                        corpus_file=opt.corpus_file,
                        vocab_file=opt.vocab_path)
    train_dataloader = DataLoader(dataset=BERTDataset(
        inputs, max_sen_len=opt.max_sen_len),
                                  shuffle=True,
                                  batch_size=opt.batch_size,
                                  collate_fn=BERTCollate_fn)
    use_cuda = True if opt.use_cuda and torch.cuda.is_available() else False
    if use_cuda:
        torch.cuda.empty_cache()
    device = torch.device('cuda' if use_cuda else 'cpu')
    writer = SummaryWriter()

    # Step 1: model
    bert = BERT(n_layers=opt.n_layers,
                d_model=opt.d_model,
                vocab_size=opt.max_vocab_size,
                max_len=opt.max_sen_len,
                n_heads=opt.n_heads,
                n_seg=opt.n_seg,
                ff_hidden=opt.n_ff_hidden,
                device=device).to(device)
    masked_lm = MaskedLM(d_model=opt.d_model,
                         vocab_size=opt.max_vocab_size,
                         bert=bert).to(device)
    next_pred = NextPred(d_model=opt.d_model).to(device)

    # Write model
    dummy_input_ids = torch.zeros(
        (opt.batch_size, opt.max_sen_len)).long().to(device)
    dummy_seg_ids = torch.zeros(
        (opt.batch_size, opt.max_sen_len)).long().to(device)

    writer.add_graph(bert, (dummy_input_ids, dummy_seg_ids), False)

    # dummy_bertout = torch.zeros((opt.batch_size, opt.max_sen_len, opt.d_model)).long().to(device)
    # dummy_masked_pos = torch.zeros((opt.batch_size, opt.max_mask_len)).long().to(device)
    #
    # writer.add_graph(masked_lm, (dummy_bertout, dummy_masked_pos), True)
    # writer.add_graph(next_pred, (dummy_bertout), True)

    # Step 2: criterion and optimizer
    criterion = nn.CrossEntropyLoss()

    num_paras = sum(p.numel() for model in (bert, masked_lm, next_pred)
                    for p in model.parameters() if p.requires_grad)
    paras = list(bert.parameters()) + list(masked_lm.parameters()) + list(
        next_pred.parameters())
    print("Total number of parameters is {}".format(num_paras))
    optimizer = torch.optim.Adam(paras,
                                 lr=0.0001,
                                 betas=(0.9, 0.999),
                                 weight_decay=0.01)

    # Step 3: train
    print("Start training ...")
    for epoch in range(opt.epochs):
        epoch_loss = 0
        for i, batch_data in enumerate(train_dataloader, 1):

            input_ids, seg_ids, masked_pos, masked_token, isnext = map(
                lambda x: x.to(device), batch_data)
            # Reset gradients and forward
            optimizer.zero_grad()
            bertout = bert(input_ids, seg_ids)
            logits_lm = masked_lm(bertout, masked_pos)
            logits_clsf = next_pred(bertout)

            # Compute loss
            logits_lm = logits_lm.view(
                -1, logits_lm.size(-1))  # (bz * len_mask, vocab)
            masked_token = masked_token.view(-1, )  # (bz * len_mask, )
            logits_clsf = logits_clsf.view(-1, logits_clsf.size(-1))  # (bz, )
            isnext.view(-1, )  # (bz, )

            loss_lm = criterion(logits_lm, masked_token)
            loss_clsf = criterion(logits_clsf, isnext)
            loss = loss_lm + loss_clsf

            _, mask_preds = torch.max(logits_lm, dim=-1)
            _, next_preds = torch.max(logits_clsf, dim=-1)
            mask_pred_acc = mask_preds.eq(
                masked_token).sum().item() / masked_token.size(0)
            next_pred_acc = next_preds.eq(isnext).sum().item() / isnext.size(0)

            if i % 20 == 0:
                writer.add_scalar('loss_lm', loss_lm.item(),
                                  i + epoch * len(train_dataloader))
                writer.add_scalar('loss_clsf', loss_clsf.item(),
                                  i + epoch * len(train_dataloader))
                writer.add_scalar('lm_acc', mask_pred_acc,
                                  i + epoch * len(train_dataloader))
                writer.add_scalar('next_acc', next_pred_acc,
                                  i + epoch * len(train_dataloader))
                print(
                    'Epoch {}, Batch {}/{}, loss_lm={}, loss_next={}, lm_acc={}, next_acc={}'
                    .format(epoch + 1, i,
                            len(train_dataloader), loss_lm.item(),
                            loss_clsf.item(), mask_pred_acc, next_pred_acc))

            epoch_loss += loss.item()

            # Backward and update
            loss.backward()
            optimizer.step()

        if (1 + epoch) % 1 == 0:
            print('Epoch:', '%04d' % (epoch + 1), 'cost =',
                  '{:.6f}'.format(epoch_loss))

    print('finished train')

    # Step 4: Save model
    ckpt_file_name = dt.strftime(dt.now(), '%Y-%m-%d %H: %M: %S.ckpt')
    save_path = os.path.join(opt.ckpt_path, ckpt_file_name)
    torch.save(bert.state_dict(), save_path)
Beispiel #7
0
def train(**kwargs):
    '''
    训练
    :param kwargs: 可调整参数,默认是config中的默认参数
    :return:训练出完整模型
    '''

    # 根据命令行参数更新配置
    opt.parse(kwargs)
    # visdom绘图程序
    vis = Visualizer(opt.env, port=opt.vis_port)

    # step:1 构建模型
    # 选取配置中名字为model的模型
    model = getattr(models, opt.model)()
    # 是否读取保存好的模型参数
    if opt.load_model_path:
        model.load(opt.load_model_path)

    # 设置GPU
    os.environ["CUDA_VISIBLE_DEVICES"] = "2"
    model.to(opt.device)

    # step2: 数据
    train_data = CWRUDataset2D(opt.train_data_root, train=True)
    # 测试数据集和验证数据集是一样的,这些数据是没有用于训练的
    test_data = CWRUDataset2D(opt.train_data_root, train=False)

    train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True)
    test_dataloader = DataLoader(test_data, opt.batch_size, shuffle=False)

    # step3: 目标函数和优化器
    # 损失函数,交叉熵
    criterion = torch.nn.CrossEntropyLoss()
    lr = opt.lr
    # 优化函数,Adam
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=opt.weight_decay)

    # step4: 统计指标,平滑处理之后的损失,还有混淆矩阵
    # 损失进行取平均及方差计算。
    loss_meter = meter.AverageValueMeter()
    # 混淆矩阵
    confusion_matrix = meter.ConfusionMeter(opt.category)
    previous_loss = 1e10

    # 训练
    for epoch in range(opt.max_epoch):

        # 重置
        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, label) in tqdm(enumerate(train_dataloader)):

            # 训练模型
            input = data.to(opt.device)
            target = label.to(opt.device)

            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score, target)
            loss.backward()
            optimizer.step()

            # 更新统计指标以及可视化
            loss_meter.add(loss.item())
            # detach 一下更安全保险
            confusion_matrix.add(score.detach(), target.detach())

            if (ii + 1) % opt.print_freq == 0:
                vis.plot('loss', loss_meter.value()[0])

                # 进入debug模式
                if os.path.exists(opt.debug_file):
                    import ipdb;
                    ipdb.set_trace()

        # 每个batch保存模型
        model.save()

        # 计算测试集上的指标和可视化
        val_cm, val_accuracy = val(model, test_dataloader)

        vis.plot('val_accuracy', val_accuracy)
        vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format(
            epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()),
            lr=lr))

        # 如果损失不在下降,那么就降低学习率
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        previous_loss = loss_meter.value()[0]
Beispiel #8
0
def train(**kwargs):
    print(kwargs)
    start = time.time()
    # 根据命令行参数更新配置
    vis = Visualizer(opt.env)
    opt.parse(kwargs)

    # 加载词向量
    print("Loading word vectors...Please wait.")
    vector = KeyedVectors.load_word2vec_format(
        os.path.join(os.path.dirname(os.path.realpath(opt.train_data_root)), 'vector.txt')
    )
    print("Successfully loaded word vectors.")

    # step1: 模型
    model = getattr(models, opt.model)(input_size=vector.vector_size+2, output_size=opt.class_num)
    if opt.load_model_path:
        model.load(opt.load_model_path) # 预加载
    if opt.use_gpu and t.cuda.is_available():
        model = model.cuda()
    print(f"Structure of {model.model_name}:\n{model}\n")

    # step2: 数据
    train_data = Sentence(root=opt.train_data_root, 
                          relations=opt.relations, 
                          max_length=opt.max_length,
                          vector=vector,
                          train=True)   # 训练集
    train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True) 

    val_data = Sentence(opt.train_data_root, opt.relations, opt.max_length, vector, train=False)  # 验证集
    val_dataloader = DataLoader(val_data, opt.batch_size, shuffle=True)

    # step3: 目标函数和优化器
    loss_fn = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = t.optim.Adam(params=model.parameters(), 
                             lr=lr,
                             weight_decay = opt.weight_decay)

    # step4: 统计指标:平滑处理之后的损失,还有混淆矩阵
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(opt.class_num)
    previous_loss = 1e100
    
    # 训练
    for epoch in range(opt.max_epoch):
        
        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, label) in enumerate(train_dataloader):

            # 训练模型参数 
            input = data
            target = label
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()
            optimizer.zero_grad()
            prediction = model(input)
            loss = loss_fn(prediction, target)
            loss.backward()
            optimizer.step()
            
            # 更新统计指标以及可视化
            loss_meter.add(loss.item())
            confusion_matrix.add(prediction.data, target.data)

            # if ii % opt.print_freq == opt.print_freq - 1:
            #     vis.plot('train loss', loss_meter.value()[0])
                
                # 如果需要的话,进入debug模式
                # if os.path.exists(opt.debug_file):
                #     import ipdb;
                #     ipdb.set_trace()
        cm_value = confusion_matrix.value()
        correct = 0
        for i in range(cm_value.shape[0]):
            correct += cm_value[i][i]
        accuracy = 100. * correct / (cm_value.sum())

        vis.plot('train loss', loss_meter.value()[0])
        vis.plot('train accuracy', accuracy)

        if epoch % opt.save_epoch == opt.save_epoch -1:
            model.save()

        # 计算验证集上的指标及可视化
        val_lm, val_cm, val_accuracy = val(model, val_dataloader)
        vis.plot('val loss', val_lm.value()[0])
        vis.plot('val accuracy', val_accuracy)

        
        print("epoch:{epoch}, lr:{lr}, loss:{loss}\ntrain_cm:\n{train_cm}\nval_cm:\n{val_cm}"
                .format(epoch=epoch,
                        loss=loss_meter.value()[0],
                        val_cm=str(val_cm.value()),
                        train_cm=str(confusion_matrix.value()),
                        lr=lr)
        )
        
        # 如果损失不再下降,则降低学习率
        if loss_meter.value()[0] > previous_loss:          
            lr = lr * opt.lr_decay
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
                
        previous_loss = loss_meter.value()[0]

    cost = int(time.time()) - int(start)
    print(f"Cost {int(cost/60)}min{cost%60}s.")
Beispiel #9
0
def train(**kwargs):
    opt.parse(kwargs)
    vis = Visualizer(opt.env)

    # step1: configure model
    model = getattr(models, opt.model)()
    if os.path.exists(opt.load_model_path):
        model.load(opt.load_model_path)
    if opt.use_gpu:
        model.cuda()

    if os.path.exists(opt.pars_path):
        dic = load_dict(opt.pars_path)
        previous_loss = dic['loss'][-1] if 'loss' in dic.keys() else 1e100
    else:
        dic = {}
    # step2: data
    train_data = DogCat(opt.train_data_root, train=True)
    val_data = DogCat(opt.train_data_root, train=False)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)

    # step2: criterion and optimizer
    criterion = nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 weight_decay=opt.weight_decay)

    # step4: meters
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    #previous_loss = 1e100
    # train
    for epoch in range(5, opt.max_epoch):
        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, label) in tqdm(enumerate(train_dataloader),
                                      total=len(train_dataloader)):
            #confusion_matrix.reset()
            # train model
            input = Variable(data)
            target = Variable(label)
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()

            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score, target)
            loss.backward()
            optimizer.step()

            # meters update and visualize
            loss_meter.add(loss.data.item())
            confusion_matrix.add(score.data, target.data)
            if ii % opt.print_freq == opt.print_freq - 1:
                dic = save_dict(opt.pars_path,
                                dic,
                                loss_data=loss_meter.value()[0])
                #loss_meter.reset()
                vis.plot('loss', dic['loss_data'])
                name = model.save()
                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trave()

        name = model.save()
        # update learning: reduce learning rate when loss no longer decrease
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
        previous_loss = loss_meter.value()[0]
        dic = save_dict(opt.pars_path,
                        dic,
                        name=name,
                        epoch=epoch,
                        lr=lr,
                        loss=loss_meter.value()[0],
                        train_cm=confusion_matrix.value())

        # validate and visualize
        val_cm, val_accuracy = val(model, val_dataloader)
        dic = save_dict(opt.pars_path,
                        dic,
                        val_accuracy=val_accuracy,
                        val_cm=val_cm.value())

        vis.log(dic)
Beispiel #10
0
def train(**kwargs):
    opt.parse(kwargs)
    vis = Visualizer(opt.env)

    # step1: configure model
    model = getattr(models, opt.model)()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu: model.cuda()

    # step2: data
    train_data = DogCat(opt.train_data_root,train=True)
    val_data = DogCat(opt.train_data_root,train=False)
    train_dataloader = DataLoader(train_data,opt.batch_size,
                        shuffle=True,num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,opt.batch_size,
                        shuffle=False,num_workers=opt.num_workers)
    
    # step3: criterion and optimizer
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = t.optim.Adam(model.parameters(),lr = lr,weight_decay = opt.weight_decay)
        
    # step4: meters
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100

    # train
    for epoch in range(opt.max_epoch):
        
        loss_meter.reset()
        confusion_matrix.reset()

        for ii,(data,label) in tqdm(enumerate(train_dataloader)):

            # train model 
            input = Variable(data)
            target = Variable(label)
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()

            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score,target)
            loss.backward()
            optimizer.step()
            
            
            # meters update and visualize
            loss_meter.add(loss.data[0])
            confusion_matrix.add(score.data, target.data)

            if ii%opt.print_freq==opt.print_freq-1:
                vis.plot('loss', loss_meter.value()[0])
                
                # 进入debug模式
                if os.path.exists(opt.debug_file):
                    import ipdb;
                    ipdb.set_trace()


        model.save()

        # validate and visualize
        val_cm,val_accuracy = val(model,val_dataloader)

        vis.plot('val_accuracy',val_accuracy)
        vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format(
                    epoch = epoch,loss = loss_meter.value()[0],val_cm = str(val_cm.value()),train_cm=str(confusion_matrix.value()),lr=lr))
        
        # update learning rate
        if loss_meter.value()[0] > previous_loss:          
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
        

        previous_loss = loss_meter.value()[0]
Beispiel #11
0
def train(**kwargs):
    opt.parse(kwargs)
    NUM_TRAIN = 49000

    transform = T.Compose([
        T.ToTensor(),
        T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    ])

    train_transform = T.Compose([])
    if opt.data_aug == True:
        train_transform.transforms.append(T.RandomCrop(32, padding=4))
        train_transform.transforms.append(T.RandomHorizontalFlip())
    train_transform.transforms.append(T.ToTensor())
    train_transform.transforms.append(
        T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)))
    if opt.data_aug == True and opt.use_cutout == True:
        train_transform.transforms.append(Cutout(n_holes=1, length=16))

    cifar10_train = dset.CIFAR10('./datasets',
                                 train=True,
                                 download=True,
                                 transform=train_transform)
    loader_train = DataLoader(cifar10_train,
                              batch_size=opt.batch_size,
                              sampler=sampler.SubsetRandomSampler(
                                  range(NUM_TRAIN)))

    cifar10_val = dset.CIFAR10('./datasets',
                               train=True,
                               download=True,
                               transform=transform)
    loader_val = DataLoader(cifar10_val,
                            batch_size=opt.batch_size,
                            sampler=sampler.SubsetRandomSampler(
                                range(NUM_TRAIN, 50000)))

    cifar10_test = dset.CIFAR10('./datasets',
                                train=False,
                                download=True,
                                transform=transform)
    loader_test = DataLoader(cifar10_test, batch_size=opt.batch_size)

    if opt.use_gpu == True:
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    model = getattr(models, opt.model)()
    if opt.use_trained_model == True:
        cp_name = opt.model
        if opt.checkpoint_load_name != None:
            cp_name = opt.checkpoint_load_name
        model.load(opt.test_model_path + cp_name)
    model.to(device)

    lr = opt.lr
    optimizer = optim.SGD(model.parameters(),
                          lr=lr,
                          momentum=0.9,
                          weight_decay=opt.weight_decay,
                          nesterov=True)

    stages = []
    lrs = []
    if opt.stage1 != -1:
        stages.append(opt.stage1)
        lrs.append(opt.lr1)
    if opt.stage2 != -1:
        stages.append(opt.stage2)
        lrs.append(opt.lr2)
    if opt.stage3 != -1:
        stages.append(opt.stage3)
        lrs.append(opt.lr3)
    if opt.stage4 != -1:
        stages.append(opt.stage4)
        lrs.append(opt.lr4)

    for i, (stage_epoch) in enumerate(stages):
        print(stage_epoch)
        print(lrs[i])
        for param_group in optimizer.param_groups:
            param_group['lr'] = lrs[i]

        for epoch in range(stage_epoch):
            for ii, (data, label) in tqdm(enumerate(loader_train)):
                # put data into gpu
                data = data.to(device=device, dtype=torch.float32)
                label = label.to(device=device, dtype=torch.long)

                # get loss
                # print(data.shape)
                scores = model(data)
                loss = F.cross_entropy(scores, label)

                # bp
                optimizer.zero_grad()
                loss.backward()

                if opt.use_clip == True:
                    torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                   opt.clip,
                                                   norm_type=2)

                optimizer.step()

                loss_now = loss.item()

                if ii == 0:
                    print('Epoch [{}/{}], Loss: {:.4f}, lr :{: f}'.format(
                        epoch + 1, stage_epoch, loss_now, lrs[i]))

            testacc = check_acc(loader_test, model)

            valacc = check_acc(loader_val, model)
            model.update_epoch(lrs[i], loss_now, valacc, testacc)
            model.save(opt.checkpoint_save_name)
def main(**kwargs):
    #动态加全职衰减
    origin_weight_decay = 1e-5

    opt.parse(kwargs, print_=False)
    if opt.debug:
        import ipdb
        ipdb.set_trace()

    model = getattr(models, opt.model)(opt).cuda()
    if opt.model_path:
        model.load(opt.model_path)
    print(model)

    opt.parse(kwargs, print_=True)

    vis.reinit(opt.env)
    pre_loss = 1.0
    lr, lr2 = opt.lr, opt.lr2
    loss_function = getattr(models, opt.loss)()
    # if opt.all:dataset = ZhihuALLData(opt.train_data_path,opt.labels_path,type_=opt.type_)
    dataset = FoldData(opt.train_data_path,
                       opt.labels_path,
                       type_=opt.type_,
                       fold=opt.fold)
    dataloader = data.DataLoader(dataset,
                                 batch_size=opt.batch_size,
                                 shuffle=opt.shuffle,
                                 num_workers=opt.num_workers,
                                 pin_memory=True)

    optimizer = model.get_optimizer(lr, opt.lr2, opt.weight_decay)
    loss_meter = tnt.meter.AverageValueMeter()
    score_meter = tnt.meter.AverageValueMeter()
    best_score = 0
    # pre_score = 0

    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        score_meter.reset()
        for ii, ((title, content), label) in tqdm.tqdm(enumerate(dataloader)):
            title, content, label = Variable(title.cuda()), Variable(
                content.cuda()), Variable(label.cuda())
            optimizer.zero_grad()
            score = model(title, content)
            loss = loss_function(score, opt.weight * label.float())
            loss_meter.add(loss.data[0])
            loss.backward()
            optimizer.step()

            if ii % opt.plot_every == opt.plot_every - 1:
                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()

                predict = score.data.topk(5, dim=1)[1].cpu().tolist(
                )  #(dim=1,descending=True)[1][:,:5].tolist()
                true_target = label.data.float().topk(
                    5, dim=1)  #[1].cpu().tolist()#sort(dim=1,descending=True)
                true_index = true_target[1][:, :5]
                true_label = true_target[0][:, :5]
                predict_label_and_marked_label_list = []
                for jj in range(label.size(0)):
                    true_index_ = true_index[jj]
                    true_label_ = true_label[jj]
                    true = true_index_[true_label_ > 0]
                    predict_label_and_marked_label_list.append(
                        (predict[jj], true.tolist()))
                score_, prec_, recall_, _ss = get_score(
                    predict_label_and_marked_label_list)
                score_meter.add(score_)
                vis.vis.text('prec:%s,recall:%s,score:%s,a:%s' %
                             (prec_, recall_, score_, _ss),
                             win='tmp')
                vis.plot('scores', score_meter.value()[0])

                #eval()
                vis.plot('loss', loss_meter.value()[0])
                # 随机展示一个输出的分布
                k = t.randperm(label.size(0))[0]
                output = t.nn.functional.sigmoid(score)
                # vis.vis.histogram(
                #     output.data[k].view(-1).cpu(), win=u'output_hist', opts=dict
                #     (title='output_hist'))
                # print "epoch:%4d/%4d,time: %.8f,loss: %.8f " %(epoch,ii,time.time()-start,loss_meter.value()[0])

            if ii % opt.decay_every == opt.decay_every - 1:
                del loss
                scores, prec_, recall_, _ss = val(model, dataset)
                if scores > best_score:
                    best_score = scores
                    best_path = model.save(name=str(scores), new=True)

                vis.log({
                    ' epoch:': epoch,
                    ' lr: ': lr,
                    'scores': scores,
                    'prec': prec_,
                    'recall': recall_,
                    'ss': _ss,
                    'scores_train': score_meter.value()[0],
                    'loss': loss_meter.value()[0]
                })
                if scores < best_score:
                    model.load(best_path, change_opt=False)
                    #lr = lr*opt.lr_decay
                    #optimizer = model.get_optimizer(lr)
                    lr = lr * opt.lr_decay
                    # 第二种降低学习率的方法:不会有moment等的丢失
                    if lr2 == 0: lr2 = 2e-4
                    else: lr2 = lr2 * 0.8
                    optimizer = model.get_optimizer(lr, lr2, 0)
                    origin_weight_decay = 5 * origin_weight_decay
                    # optimizer = model.get_optimizer(lr,lr2,0,weight_decay=origin_weight_decay)
                    # origin_weight_decay=5*origin_weight_decay
                    # for param_group in optimizer.param_groups:
                    #     param_group['lr']  *= opt.lr_decay
                    #     if param_group['lr'] ==0:
                    #         param_group['lr'] = 1e-4

                pre_loss = loss_meter.value()[0]
                # pre_score = score_meter.value()[0]
                # pre_score = scores
                loss_meter.reset()
                score_meter.reset()

                if lr < opt.min_lr:
                    break
Beispiel #13
0
def train(**kwargs):
    """根据命令行参数更新配置"""
    opt.parse(kwargs)
    vis = Visualizer(opt.env)
    """(1)step1:加载网络,若有预训练模型也加载"""
    #model = getattr(models,opt.model)()
    model = models.resnet34(pretrained=True)
    model.fc = nn.Linear(512, 2)
    #if opt.load_model_path:
    #	model.load(opt.load_model_path)
    if opt.use_gpu:  #GPU
        model.cuda()
    """(2)step2:处理数据"""
    train_data = DogCat(opt.train_data_root, train=True)  #训练集
    val_data = DogCat(opt.train_data_root, train=False)  #验证集

    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)
    """(3)step3:定义损失函数和优化器"""
    criterion = t.nn.CrossEntropyLoss()  #交叉熵损失
    lr = opt.lr  #学习率
    optimizer = t.optim.SGD(model.parameters(),
                            lr=opt.lr,
                            weight_decay=opt.weight_decay)
    """(4)step4:统计指标,平滑处理之后的损失,还有混淆矩阵"""
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e10
    """(5)开始训练"""
    for epoch in range(opt.max_epoch):

        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, label) in enumerate(train_dataloader):

            print "ii:", ii
            #训练模型参数
            input = Variable(data)
            target = Variable(label)

            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()

            #梯度清零
            optimizer.zero_grad()
            score = model(input)

            loss = criterion(score, target)
            loss.backward()  #反向传播

            #更新参数
            optimizer.step()

            #更新统计指标及可视化
            loss_meter.add(loss.item())
            #print score.shape,target.shape
            confusion_matrix.add(score.detach(), target.detach())

            if ii % opt.print_freq == opt.print_freq - 1:
                vis.plot('loss', loss_meter.value()[0])

                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()
        #model.save()
        name = time.strftime('model' + '%m%d_%H:%M:%S.pth')
        t.save(model.state_dict(), 'checkpoints/' + name)
        """计算验证集上的指标及可视化"""
        val_cm, val_accuracy = val(model, val_dataloader)
        vis.plot('val_accuracy', val_accuracy)
        vis.log(
            "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}"
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    lr=lr))

        print "epoch:", epoch, "loss:", loss_meter.value(
        )[0], "accuracy:", val_accuracy
        """如果损失不再下降,则降低学习率"""
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            for param_group in optimizer.param_groups:
                param_group["lr"] = lr

        previous_loss = loss_meter.value()[0]
Beispiel #14
0
def train(**kwargs):
    opt.parse(kwargs)
    vis = Visualizer(opt.env)

    # step1: configure model
    # model = getattr(models, opt.model)() # opt.model = ResNet34 模块内的文件可以看做是它的属性
    model = Mymodel(pretrained=True)
    model = model.model

    # 直接调用torchvision中的resnet34
    # 预训练里面是包括最后全连接层的,所以直接调用会报错: While copying the parameter named fc.weight, whose dimensions in the model are torch.Size([120, 512])
    # and whose dimensions in the checkpoint are torch.Size([1000, 512]).
    # pre_model = resnet34(pretrained=True)
    # Linear = t.nn.Linear(1000, opt.num_classes)
    # model = t.nn.Sequential(
    #     pre_model,
    #     Linear
    # )


    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu: model.cuda()

    # step2: data
    train_data = DogBreedData(opt.train_data_root, train=True)
    val_data = DogBreedData(opt.train_data_root, train=False)
    train_dataloader = DataLoader(train_data, opt.batch_size,
                                  shuffle=True, num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data, opt.batch_size,
                                shuffle=True, num_workers=opt.num_workers)

    # setp3: loss and optimizer
    loss = t.nn.CrossEntropyLoss()
    optimizer = t.optim.Adam(model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay)

    # step4: meters
    loss_meter = meter.AverageValueMeter() # 一个类,计算平均值,方差的
    # confusion_matrix = meter.ConfusionMeter(120)
    previous_loss = 1e100

    # train
    for epoch in range(opt.max_epoch): # one epoch 表示遍历整个数据集
        loss_meter.reset()
        # confusion_matrix.reset()
        # loss = 0 # 损失值清零,计算每一个epoch的损失值的平均值
        for ii,(data, label) in tqdm(enumerate(train_dataloader)):
            input = Variable(data)
            target = Variable(label.type(t.LongTensor))
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()

            # 梯度清零
            optimizer.zero_grad()
            # 前向传播
            score = model(input)
            # 计算交叉熵损失
            loss = loss(score, target) # loss = t.nn.CrossEntropyLoss(_WeightedLoss)也是Module类,这里是forward()函数
            # 反向传播,梯度下降
            loss.backward()
            optimizer.step()

            # loss update and visualize
            # 两种方法对比以下。。
            loss_meter.add(loss.data[0]) # def value(self): return self.mean, self.std
            # confusion_matrix.add(score.data, target.data)
            # 更新loss
            # loss.data[0] += previous_loss
            # loss_old = loss.data[0]
            # loss_mean = loss_old/float(ii)

            if ii%opt.print_freq == opt.print_freq-1:
                # vis.plot('loss', loss_mean)
                vis.plot('loss', loss_meter.value()[0])

        model.save()

        # validate and visualize
        # val_cm, val_accuracy = val(model, val_dataloader)

        # vis.plot('val_accuracy', val_accuracy)
        # vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format(
            # epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()),
            # train_cm=str(confusion_matrix.value()), lr=opt.lr))

        # update learning rate
        if loss_meter.value()[0] > previous_loss:
            lr = opt.lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
        previous_loss = loss_meter.value()[0]
def train(**kwargs):
    opt.parse(kwargs)
    # log file
    ps = PlotSaver("FrozenCNN_ResNet50_RGB_" +
                   time.strftime("%m_%d_%H:%M:%S") + ".log.txt")

    # step1: Model
    compression_model = getattr(models, opt.model)(
        use_imp=opt.use_imp,
        model_name="CWCNN_limu_ImageNet_imp_r={r}_γ={w}_for_resnet50".format(
            r=opt.rate_loss_threshold, w=opt.rate_loss_weight)
        if opt.use_imp else None)

    compression_model.load(None, opt.compression_model_ckpt)
    compression_model.eval()

    # if use_original_RGB:
    #     resnet_50 = resnet50()    # Official ResNet
    # else:
    #     resnet_50 = ResNet50()    # My ResNet
    c_resnet_51 = cResNet51()
    if opt.use_gpu:
        # compression_model.cuda()
        # resnet_50.cuda()
        compression_model = multiple_gpu_process(compression_model)
        c_resnet_51 = multiple_gpu_process(c_resnet_51)

    # freeze the compression network
    for param in compression_model.parameters():
        # print (param.requires_grad)
        param.requires_grad = False

    cudnn.benchmark = True

    # pdb.set_trace()

    # step2: Data
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_data_transforms = transforms.Compose([
        transforms.Resize(256),
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(), normalize
    ])
    val_data_transforms = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(), normalize
    ])

    train_data = datasets.ImageFolder(opt.train_data_root,
                                      train_data_transforms)
    val_data = datasets.ImageFolder(opt.val_data_root, val_data_transforms)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers,
                                  pin_memory=True)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers,
                                pin_memory=True)

    # step3: criterion and optimizer

    class_loss = t.nn.CrossEntropyLoss()
    lr = opt.lr
    # optimizer = t.optim.Adam(resnet_50.parameters(), lr=lr, betas=(0.9, 0.999), weight_decay=opt.weight_decay)
    optimizer = t.optim.SGD(c_resnet_51.parameters(),
                            lr=lr,
                            momentum=opt.momentum,
                            weight_decay=opt.weight_decay)
    start_epoch = 0

    if opt.resume:
        start_epoch = c_resnet_51.module.load(
            None if opt.finetune else optimizer, opt.resume, opt.finetune)

        if opt.finetune:
            print('Finetune from model checkpoint file', opt.resume)
        else:
            print('Resume training from checkpoint file', opt.resume)
            print('Continue training at epoch %d.' % start_epoch)

    # step4: meters
    class_loss_meter = AverageValueMeter()

    class_acc_top5_meter = AverageValueMeter()
    class_acc_top1_meter = AverageValueMeter()

    # class_loss_meter = AverageMeter()
    # class_acc_top5_meter = AverageMeter()
    # class_acc_top1_meter = AverageMeter()

    # ps init

    ps.new_plot('train class loss',
                opt.print_freq,
                xlabel="iteration",
                ylabel="train_CE_loss")
    ps.new_plot('val class loss', 1, xlabel="epoch", ylabel="val_CE_loss")

    ps.new_plot('train top_5 acc',
                opt.print_freq,
                xlabel="iteration",
                ylabel="train_top5_acc")
    ps.new_plot('train top_1 acc',
                opt.print_freq,
                xlabel="iteration",
                ylabel="train_top1_acc")

    ps.new_plot('val top_5 acc', 1, xlabel="iteration", ylabel="val_top_5_acc")
    ps.new_plot('val top_1 acc', 1, xlabel="iteration", ylabel="val_top_1_acc")

    for epoch in range(start_epoch + 1, opt.max_epoch + 1):
        # per epoch avg loss meter
        class_loss_meter.reset()

        class_acc_top1_meter.reset()
        class_acc_top5_meter.reset()

        # cur_epoch_loss refresh every epoch

        ps.new_plot("cur epoch train class loss",
                    opt.print_freq,
                    xlabel="iteration in cur epoch",
                    ylabel="cur_train_CE_loss")

        c_resnet_51.train()

        for idx, (data, label) in enumerate(train_dataloader):
            ipt = Variable(data)
            label = Variable(label)

            if opt.use_gpu:
                ipt = ipt.cuda()
                label = label.cuda()

            optimizer.zero_grad()
            # if not use_original_RGB:
            # compressed_RGB = compression_model(ipt)
            # else:
            # compressed_RGB = ipt
            # We just wanna compressed features, not to decode this.
            compressed_feat = compression_model(ipt, need_decode=False)
            # print ('RGB', compressed_RGB.requires_grad)
            predicted = c_resnet_51(compressed_feat)

            class_loss_ = class_loss(predicted, label)

            class_loss_.backward()
            optimizer.step()

            class_loss_meter.add(class_loss_.data[0])
            # class_loss_meter.update(class_loss_.data[0], ipt.size(0))

            acc1, acc5 = accuracy(predicted.data, label.data, topk=(1, 5))
            # pdb.set_trace()

            class_acc_top1_meter.add(acc1[0])
            class_acc_top5_meter.add(acc5[0])
            # class_acc_top1_meter.update(acc1[0], ipt.size(0))
            # class_acc_top5_meter.update(acc5[0], ipt.size(0))

            if idx % opt.print_freq == opt.print_freq - 1:
                ps.add_point(
                    'train class loss',
                    class_loss_meter.value()[0]
                    if opt.print_smooth else class_loss_.data[0])
                ps.add_point(
                    'cur epoch train class loss',
                    class_loss_meter.value()[0]
                    if opt.print_smooth else class_loss_.data[0])
                ps.add_point(
                    'train top_5 acc',
                    class_acc_top5_meter.value()[0]
                    if opt.print_smooth else acc5[0])
                ps.add_point(
                    'train top_1 acc',
                    class_acc_top1_meter.value()[0]
                    if opt.print_smooth else acc1[0])

                ps.log(
                    'Epoch %d/%d, Iter %d/%d, class loss = %.4f, top 5 acc = %.2f %%, top 1 acc  = %.2f %%, lr = %.8f'
                    % (epoch, opt.max_epoch, idx, len(train_dataloader),
                       class_loss_meter.value()[0],
                       class_acc_top5_meter.value()[0],
                       class_acc_top1_meter.value()[0], lr))
                # 进入debug模式
                if os.path.exists(opt.debug_file):
                    pdb.set_trace()

        if use_data_parallel:
            c_resnet_51.module.save(optimizer, epoch)

        # plot before val can ease me
        ps.make_plot(
            'train class loss'
        )  # all epoch share a same img, so give ""(default) to epoch
        ps.make_plot('cur epoch train class loss', epoch)
        ps.make_plot("train top_5 acc")
        ps.make_plot("train top_1 acc")

        val_class_loss, val_top5_acc, val_top1_acc = val(
            compression_model, c_resnet_51, val_dataloader, class_loss, None,
            ps)

        ps.add_point('val class loss', val_class_loss)
        ps.add_point('val top_5 acc', val_top5_acc)
        ps.add_point('val top_1 acc', val_top1_acc)

        ps.make_plot('val class loss')
        ps.make_plot('val top_5 acc')
        ps.make_plot('val top_1 acc')

        ps.log(
            'Epoch:{epoch}, lr:{lr}, train_class_loss: {train_class_loss}, train_top5_acc: {train_top5_acc} %, train_top1_acc: {train_top1_acc} %, \
val_class_loss: {val_class_loss}, val_top5_acc: {val_top5_acc} %, val_top1_acc: {val_top1_acc} %'
            .format(epoch=epoch,
                    lr=lr,
                    train_class_loss=class_loss_meter.value()[0],
                    train_top5_acc=class_acc_top5_meter.value()[0],
                    train_top1_acc=class_acc_top1_meter.value()[0],
                    val_class_loss=val_class_loss,
                    val_top5_acc=val_top5_acc,
                    val_top1_acc=val_top1_acc))

        # adjust lr
        if epoch in opt.lr_decay_step_list:
            lr = lr * opt.lr_decay
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
def train(**kwargs):

    setup_seed(opt.seed)

    kwargs.update({'model': 'PCNN_ONE'})
    opt.parse(kwargs)

    if opt.use_gpu:
        torch.cuda.set_device(opt.gpu_id)

    # torch.manual_seed(opt.seed)
    model = getattr(models, 'PCNN_ONE')(opt)
    if opt.use_gpu:
        # torch.cuda.manual_seed_all(opt.seed)
        model.cuda()
        # parallel
        #  model = nn.DataParallel(model)

    # loading data
    DataModel = getattr(dataset, opt.data + 'Data')
    train_data = DataModel(opt.data_root, train=True)
    train_data_loader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers, collate_fn=collate_fn)

    test_data = DataModel(opt.data_root, train=False)
    test_data_loader = DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, collate_fn=collate_fn)
    print('train data: {}; test data: {}'.format(len(train_data), len(test_data)))

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adadelta(filter(lambda p: p.requires_grad, model.parameters()), rho=1.0, eps=1e-6, weight_decay=opt.weight_decay)
    # optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr, betas=(0.9, 0.999), weight_decay=opt.weight_decay)
    # optimizer = optim.Adadelta(model.parameters(), rho=1.0, eps=1e-6, weight_decay=opt.weight_decay)
    # train
    print("start training...")
    max_pre = -1.0
    max_rec = -1.0
    for epoch in range(opt.num_epochs):

        total_loss = 0
        for idx, (data, label_set) in enumerate(train_data_loader):
            label = [l[0] for l in label_set]

            if opt.use_gpu:
                label = torch.LongTensor(label).cuda()
            else:
                label = torch.LongTensor(label)

            data = select_instance(model, data, label)
            model.batch_size = opt.batch_size

            optimizer.zero_grad()

            out = model(data, train=True)
            loss = criterion(out, label)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        if epoch < -1:
            continue
        true_y, pred_y, pred_p = predict(model, test_data_loader)
        all_pre, all_rec, fp_res = eval_metric(true_y, pred_y, pred_p)

        last_pre, last_rec = all_pre[-1], all_rec[-1]
        if last_pre > 0.24 and last_rec > 0.24:
            save_pr(opt.result_dir, model.model_name, epoch, all_pre, all_rec, fp_res, opt=opt.print_opt)
            print('{} Epoch {} save pr'.format(now(), epoch + 1))
            if last_pre > max_pre and last_rec > max_rec:
                print("save model")
                max_pre = last_pre
                max_rec = last_rec
                model.save(opt.print_opt)

        print('{} Epoch {}/{}: train loss: {}; test precision: {}, test recall {}'.format(now(), epoch + 1, opt.num_epochs, total_loss, last_pre, last_rec))
def train(**kwargs):

    kwargs.update({'model': 'PCNN_ATT'})
    opt.parse(kwargs)

    if opt.use_gpu:
        torch.cuda.set_device(opt.gpu_id)

    model = getattr(models, 'PCNN_ATT')(opt)
    if opt.use_gpu:
        model.cuda()

    # loading data
    DataModel = getattr(dataset, opt.data + 'Data')
    train_data = DataModel(opt.data_root, train=True)
    train_data_loader = DataLoader(train_data,
                                   opt.batch_size,
                                   shuffle=True,
                                   num_workers=opt.num_workers,
                                   collate_fn=collate_fn)

    test_data = DataModel(opt.data_root, train=False)
    test_data_loader = DataLoader(test_data,
                                  batch_size=opt.batch_size,
                                  shuffle=False,
                                  num_workers=opt.num_workers,
                                  collate_fn=collate_fn)
    print('{} train data: {}; test data: {}'.format(now(), len(train_data),
                                                    len(test_data)))

    # criterion and optimizer
    # criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adadelta(model.parameters(), rho=0.95, eps=1e-6)

    # train
    #  max_pre = -1.0
    #  max_rec = -1.0
    for epoch in range(opt.num_epochs):
        total_loss = 0
        for idx, (data, label_set) in enumerate(train_data_loader):

            label = [l[0] for l in label_set]

            optimizer.zero_grad()
            model.batch_size = opt.batch_size
            loss = model(data, label)
            if opt.use_gpu:
                label = torch.LongTensor(label).cuda()
            else:
                label = torch.LongTensor(label)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

            # if idx % 100 == 99:
            # print('{}: Train iter: {} finish'.format(now(), idx))

        if epoch > 2:
            # true_y, pred_y, pred_p= predict(model, test_data_loader)
            # all_pre, all_rec = eval_metric(true_y, pred_y, pred_p)
            pred_res, p_num = predict_var(model, test_data_loader)
            all_pre, all_rec = eval_metric_var(pred_res, p_num)

            last_pre, last_rec = all_pre[-1], all_rec[-1]
            if last_pre > 0.24 and last_rec > 0.24:
                save_pr(opt.result_dir,
                        model.model_name,
                        epoch,
                        all_pre,
                        all_rec,
                        opt=opt.print_opt)
                print('{} Epoch {} save pr'.format(now(), epoch + 1))

            print(
                '{} Epoch {}/{}: train loss: {}; test precision: {}, test recall {}'
                .format(now(), epoch + 1, opt.num_epochs, total_loss, last_pre,
                        last_rec))
        else:
            print('{} Epoch {}/{}: train loss: {};'.format(
                now(), epoch + 1, opt.num_epochs, total_loss))
Beispiel #18
0
def train(**kwargs):
    opt.parse(**kwargs)
    # step1: configure model
    model = getattr(models,opt.model)(opt.num_class)
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu:
        model.cuda()

    # step2: data
    train_data = DogCat(opt.train_data_path, transform=opt.train_transform, train = True)
    val_data = DogCat(opt.train_data_path, transform=opt.test_val_transform, train = False, test= False)
    train_dataloader = DataLoader(train_data, batch_size= opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers)
    val_dataloader   = DataLoader(val_data,   batch_size= opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers)

    # step3: criterion and optimizer
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = t.optim.Adam(params=model.parameters(), lr=lr, weight_decay=opt.weight_decay)

    # step4: meters
    loss_meter = meter.AverageValueMeter()                   # 用于统计一个epoch内的平均误差
    confusion_matrix = meter.ConfusionMeter(opt.num_class)
    previous_loss=1e6
    # step5: train
    vis  = Visualizer(opt.env)
    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        confusion_matrix.reset()

        for ii,(data, label) in tqdm(enumerate(train_dataloader)):
            # train model
            input = Variable(data)
            target  = Variable(label)
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()
            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score,target)
            loss.backward()
            optimizer.step()

            loss_meter.add(loss.data)
            confusion_matrix.add(score.data, target.data)

            # ipdb.set_trace()
            if ii%opt.print_freq == opt.print_freq-1:
                vis.plot(win='loss', y=loss_meter.value()[0])

        model.save()

        # step6: validate and visualize
        val_confusion_matrix, val_accuracy = val(model, val_dataloader)
        vis.plot(win='val_accuracy',y=val_accuracy)
        vis.log(win='log_text', info=
                'epoch:{epoch}, lr:{lr}, loss:{loss}, train_cm:{train_cm}, val_cm:{val_cm}'.format(
                    epoch=epoch,lr=lr,loss=loss_meter.value()[0],train_cm=str(confusion_matrix.value()),val_cm=str(val_confusion_matrix)
                )
                )

        # step7: update learning_rate
        if loss_meter.value()[0] > previous_loss:
            lr=lr*opt.lr_decay
            for param_group in optimizer.param_groups:
                param_group['lr']=lr

        previous_loss=loss_meter.value()[0]
Beispiel #19
0
def train(**kwargs):
    opt.parse(kwargs)
    vis = Visualizer(opt.env)

    # step1: configure model
    model = getattr(models, opt.model)(opt)
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu:
        model.cuda()

    # step2: data
    train_data = DocumentPair(opt.train_data_root,
                              doc_type='train',
                              suffix='txt',
                              load=lambda x: x.strip().split(','))
    train_data.initialize(vocab_size=opt.vocab_size)
    val_data = DocumentPair(opt.validate_data_root,
                            doc_type='validate',
                            suffix='txt',
                            load=lambda x: x.strip().split(','),
                            vocab=train_data.vocab)
    val_data.initialize()
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=False,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)

    # step3: criterion and optimizer
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = t.optim.Adam(model.parameters(),
                             lr=lr,
                             weight_decay=opt.weight_decay)

    # step4: meters
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100

    # train
    for epoch in range(opt.max_epoch):

        loss_meter.reset()
        confusion_matrix.reset()

        for ii, batch in enumerate(train_dataloader):

            data_left, data_right, label, num_pos = load_data(
                batch, opt, train_data.vocab)

            # train model
            input_data_left, input_data_right = Variable(
                t.from_numpy(data_left)), Variable(t.from_numpy(data_right))
            target = Variable(t.from_numpy(label))
            if opt.use_gpu:
                input_data_left, input_data_right = input_data_left.cuda(
                ), input_data_right.cuda()
                target = target.cuda()

            optimizer.zero_grad()
            scores, predictions = model((input_data_left, input_data_right))
            loss = criterion(scores, target.max(1)[1])
            loss.backward()
            optimizer.step()

            # meters update and visualize
            loss_meter.add(loss.data[0])
            confusion_matrix.add(predictions.data, target.max(1)[1].data)

            if ii % opt.print_freq == opt.print_freq - 1:
                vis.plot('loss', loss_meter.value()[0])

                # 进入debug模式
                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()

        model.save()

        # validate and visualize
        val_cm, val_accuracy = val(model, val_dataloader)

        vis.plot('val_accuracy', val_accuracy)
        vis.log(
            "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}"
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    lr=lr))

        # update learning rate
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        previous_loss = loss_meter.value()[0]
def train(**kwargs):

    kwargs.update({'model': 'PCNN_ONE'})
    opt.parse(kwargs)

    if opt.use_gpu:
        torch.cuda.set_device(opt.gpu_id)

    # torch.manual_seed(opt.seed)
    model = getattr(models, 'PCNN_ONE')(opt)
    if opt.use_gpu:
        # torch.cuda.manual_seed_all(opt.seed)
        model.cuda()
        #  model = nn.DataParallel(model)

    # loading data
    DataModel = getattr(dataset, opt.data + 'Data')
    train_data = DataModel(opt.data_root, train=True)
    train_data_loader = DataLoader(train_data,
                                   opt.batch_size,
                                   shuffle=False,
                                   num_workers=opt.num_workers,
                                   collate_fn=collate_fn)

    test_data = DataModel(opt.data_root, train=False)
    test_data_loader = DataLoader(test_data,
                                  batch_size=opt.batch_size,
                                  shuffle=False,
                                  num_workers=opt.num_workers,
                                  collate_fn=collate_fn)
    print('train data: {}; test data: {}'.format(len(train_data),
                                                 len(test_data)))

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adadelta(model.parameters(),
                               rho=1.0,
                               eps=1e-6,
                               weight_decay=opt.weight_decay)

    # train
    print("start training...")
    max_f1_score = -1.
    for epoch in range(opt.num_epochs):
        #print epoch
        total_loss = 0
        for idx, (data, label_set) in enumerate(train_data_loader):
            #print idx
            #print data[0]
            label = [l[0] for l in label_set]

            if opt.use_gpu:
                label = torch.LongTensor(label).cuda()
            else:
                label = torch.LongTensor(label)

            data = select_instance(model, data, label)

            model.batch_size = opt.batch_size

            optimizer.zero_grad()
            out = model(data)
            loss = criterion(out, Variable(label))

            loss.backward()
            optimizer.step()

            total_loss += loss.data[0]

        if epoch < -3:
            continue
        true_y, pred_y, pred_p = predict(model, test_data_loader)
        single_true_y = []
        for t_y in true_y:
            single_true_y.append(t_y[0])
        #print single_true_y
        f1score = f1_score(single_true_y, pred_y, average='macro')
        precision = precision_score(single_true_y, pred_y, average='macro')
        recall = recall_score(single_true_y, pred_y, average='macro')
        f1score_class = f1_score(single_true_y, pred_y, average=None)
        precision_class = precision_score(single_true_y, pred_y, average=None)
        recall_class = recall_score(single_true_y, pred_y, average=None)
        #print pred_y
        if f1score > max_f1_score:
            max_f1_score = f1score
            print('save the model')
            torch.save(model, opt.load_model_path)
        #print true_y[:10], pred_y[:10]
        '''
        all_pre, all_rec, fp_res = eval_metric(true_y, pred_y, pred_p)

        last_pre, last_rec = all_pre[-1], all_rec[-1]
        if last_pre > 0.24 and last_rec > 0.24:
            save_pr(opt.result_dir, model.model_name, epoch, all_pre, all_rec, fp_res, opt=opt.print_opt)
            print('{} Epoch {} save pr'.format(now(), epoch + 1))
            if last_pre > max_pre and last_rec > max_rec:
                print("save model")
                max_pre = last_pre
                max_rec = last_rec
                model.save(opt.print_opt)
        '''
        print(precision_class, recall_class, f1score_class)
        print(
            '{} Epoch {}/{}: train loss: {}; test precision: {}, test recall {}, f1_score {}'
            .format(now(), epoch + 1, opt.num_epochs, total_loss, precision,
                    recall, f1score))
Beispiel #21
0
def train(**kwargs):
    opt.parse(kwargs)
    if opt.vis:
        vis = Visualizer(opt.env)

    # step 1: configure model
    model = getattr(models, opt.model)(opt)
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu:
        model.cuda()

    # step 2: data
    train_data = Small(opt.train_root,
                       wv_path=opt.word2vec_path,
                       stopwords_path=opt.stopwords_path,
                       idf_path=opt.idf_train_path,
                       train=True)
    # val_data = Small(opt.train_root,
    #                  wv_path=opt.word2vec_path,
    #                  stopwords_path=opt.stopwords_path,
    #                  train=False)

    data_size = len(train_data)
    indices = t.randperm(data_size)

    # step 3: criterion and optimizer
    criterion = t.nn.KLDivLoss()
    lr = opt.lr
    optimizer = Adamax(model.parameters(),
                       lr=lr,
                       weight_decay=opt.weight_decay)

    # step 4: meters
    previous_loss = float('inf')

    # train
    for epoch in range(opt.max_epoch):

        for i in tqdm(range(0, data_size, opt.batch_size)):

            batch_size = min(opt.batch_size, data_size - i)
            # train_model
            loss = 0.
            for j in range(0, batch_size):
                idx = indices[i + j]
                q, a, label, shallow_features = train_data[idx]
                input_q, input_a, shallow_features = Variable(q), Variable(
                    a), Variable(shallow_features)
                target = Variable(label)
                if opt.use_gpu:
                    input_q = input_q.cuda()
                    input_a = input_a.cuda()
                    shallow_features = shallow_features.cuda()
                    target = target.cuda()

                score = model(input_q, input_a, shallow_features)
                example_loss = criterion(score, target)
                loss += example_loss
            loss /= opt.batch_size

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        model.save(model.module_name + '_' + str(epoch) + '.pth')

        print('epoch:{epoch}, lr:{lr}, loss:{loss}'.format(epoch=epoch,
                                                           loss=loss.data,
                                                           lr=lr))

        # # validate and visualize
        # map, mrr = val(model, val_data)
        #
        # print('epoch:{epoch}, lr:{lr}, loss:{loss}, map:{map}, mrr:{mrr}'.format(
        #     epoch=epoch,
        #     loss=loss.data,
        #     map=map,
        #     mrr=mrr,
        #     lr=lr
        # ))

        # update learning rate
        if (loss.data > previous_loss).all():
            lr = lr * opt.lr_decay

        previous_loss = loss.data
def main(**kwargs):
    #动态加全职衰减
    origin_weight_decay = 1e-5

    opt.parse(kwargs, print_=False)
    if opt.debug:
        import ipdb
        ipdb.set_trace()

    ###################################
    # opt.model_names=['MultiCNNTextBNDeep','CNNText_inception',
    # #'RCNN',
    # 'LSTMText','CNNText_inception']
    # opt.model_paths = ['checkpoints/MultiCNNTextBNDeep_word_0.410330780091','checkpoints/CNNText_tmp_word_0.41096749885',
    # #'checkpoints/RCNN_word_0.411511574999',
    # 'checkpoints/LSTMText_word_0.411994005382','checkpoints/CNNText_tmp_char_0.402429167301']
    ######################################
    #     opt.model_names=['MultiCNNTextBNDeep',
    #  #'CNNText_inception',
    #     #'RCNN',
    #     'LSTMText_boost',
    #     #'CNNText_inception_boost'
    #     ]
    #     opt.model_paths = ['checkpoints/MultiCNNTextBNDeep_word_0.410330780091',
    #     # 'checkpoints/CNNText_tmp_word_0.41096749885',
    #     #'checkpoints/RCNN_word_0.411511574999',
    #     'checkpoints/LSTMText_word_0.381833388089',
    #     #'checkpoints/CNNText_tmp_0.376364647145'
    #     ]
    #####################################################3
    opt.model_names = [
        'MultiCNNTextBNDeep',

        #'RCNN',
        'LSTMText',
        'CNNText_inception',
        'CNNText_inception-boost'
    ]
    opt.model_paths = [
        'checkpoints/MultiCNNTextBNDeep_word_0.410330780091',
        'checkpoints/LSTMText_word_0.381833388089',
        'checkpoints/CNNText_tmp_0.380390420742',
        #'checkpoints/RCNN_word_0.411511574999',
        'checkpoints/CNNText_tmp_0.376364647145'
    ]
    # opt.model_path='checkpoints/BoostModel_word_0.412524727048'

    #**************************************
    #############################################3

    opt.model_names = [
        'MultiCNNTextBNDeep', 'LSTMText', 'MultiCNNTextBNDeep-boost'
    ]
    opt.model_paths = [
        'checkpoints/MultiCNNTextBNDeep_word_0.410330780091',
        'checkpoints/LSTMText_word_0.411994005382', None
    ]
    opt.model_path = 'checkpoints/BoostModel2_word_0.410618920827'
    #*********************************************

    # opt.model_names=['MultiCNNTextBNDeep','LSTMText','CNNText_inception','RCNN']
    # opt.model_paths = ['checkpoints/MultiCNNTextBNDeep_0.37125473788','checkpoints/LSTMText_word_0.381833388089','checkpoints/CNNText_tmp_0.376364647145','checkpoints/RCNN_char_0.3456599248']

    model = getattr(models, opt.model)(opt).cuda()
    # if opt.model_path:
    # model.load(opt.model_path)
    print(model)

    opt.parse(kwargs, print_=True)

    vis.reinit(opt.env)
    pre_loss = 1.0
    lr, lr2 = opt.lr, opt.lr2
    loss_function = getattr(models, opt.loss)()
    if opt.all:
        dataset = ZhihuALLData(opt.train_data_path,
                               opt.labels_path,
                               type_=opt.type_)
    else:
        dataset = ZhihuData(opt.train_data_path,
                            opt.labels_path,
                            type_=opt.type_)
    dataloader = data.DataLoader(dataset,
                                 batch_size=opt.batch_size,
                                 shuffle=opt.shuffle,
                                 num_workers=opt.num_workers,
                                 pin_memory=True)

    optimizer = model.get_optimizer(opt.lr, opt.lr2, 0)
    loss_meter = tnt.meter.AverageValueMeter()
    score_meter = tnt.meter.AverageValueMeter()
    best_score = 0
    # pre_score = 0

    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        score_meter.reset()
        for ii, ((title, content), label) in tqdm.tqdm(enumerate(dataloader)):
            title, content, label = (Variable(
                title[0].cuda()), Variable(title[1].cuda())), (Variable(
                    content[0].cuda()), Variable(content[1].cuda())), Variable(
                        label.cuda())
            optimizer.zero_grad()
            score = model(title, content)
            loss = loss_function(score, opt.weight * label.float())
            loss_meter.add(loss.data[0])
            loss.backward()
            optimizer.step()

            if ii % opt.plot_every == opt.plot_every - 1:
                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()

                predict = score.data.topk(5, dim=1)[1].cpu().tolist(
                )  #(dim=1,descending=True)[1][:,:5].tolist()
                true_target = label.data.float().topk(
                    5, dim=1)  #[1].cpu().tolist()#sort(dim=1,descending=True)
                true_index = true_target[1][:, :5]
                true_label = true_target[0][:, :5]
                predict_label_and_marked_label_list = []
                for jj in range(label.size(0)):
                    true_index_ = true_index[jj]
                    true_label_ = true_label[jj]
                    true = true_index_[true_label_ > 0]
                    predict_label_and_marked_label_list.append(
                        (predict[jj], true.tolist()))
                score_, prec_, recall_, _ss = get_score(
                    predict_label_and_marked_label_list)
                score_meter.add(score_)
                vis.vis.text('prec:%s,recall:%s,score:%s,a:%s' %
                             (prec_, recall_, score_, _ss),
                             win='tmp')
                vis.plot('scores', score_meter.value()[0])

                #eval()
                vis.plot('loss', loss_meter.value()[0])
                # 随机展示一个输出的分布
                k = t.randperm(label.size(0))[0]
                output = t.nn.functional.sigmoid(score)
                # vis.vis.histogram(
                #     output.data[k].view(-1).cpu(), win=u'output_hist', opts=dict
                #     (title='output_hist'))
                # print "epoch:%4d/%4d,time: %.8f,loss: %.8f " %(epoch,ii,time.time()-start,loss_meter.value()[0])

            if ii % opt.decay_every == opt.decay_every - 1:
                del loss
                scores, prec_, recall_, _ss = val(model, dataset)
                if scores > best_score:
                    best_score = scores
                    best_path = model.save(name=str(scores), new=True)

                vis.log({
                    ' epoch:': epoch,
                    ' lr: ': lr,
                    'scores': scores,
                    'prec': prec_,
                    'recall': recall_,
                    'ss': _ss,
                    'scores_train': score_meter.value()[0],
                    'loss': loss_meter.value()[0]
                })
                if scores < best_score:
                    model.load(best_path, change_opt=False)
                    #lr = lr*opt.lr_decay
                    #optimizer = model.get_optimizer(lr)
                    lr = lr * opt.lr_decay
                    # 第二种降低学习率的方法:不会有moment等的丢失
                    if lr2 == 0: lr2 = 2e-4
                    else: lr2 = lr2 * opt.lr_decay
                    optimizer = model.get_optimizer(lr, lr2, 0)
                    origin_weight_decay = 5 * origin_weight_decay
                    # optimizer = model.get_optimizer(lr,lr2,0,weight_decay=origin_weight_decay)
                    # origin_weight_decay=5*origin_weight_decay
                    # for param_group in optimizer.param_groups:
                    #     param_group['lr']  *= opt.lr_decay
                    #     if param_group['lr'] ==0:
                    #         param_group['lr'] = 1e-4

                pre_loss = loss_meter.value()[0]
                # pre_score = score_meter.value()[0]
                # pre_score = scores
                loss_meter.reset()
                score_meter.reset()

                if lr < opt.min_lr:
                    break
Beispiel #23
0
def train(**kwargs):
    opt.parse(kwargs)
    vis = Visualizer(opt.env)

    model = getattr(models, opt.model)()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu:
        model.cuda()
    # 数据设定  户籍科 010 82640433
    train_data = DogCat(opt.load_model_path, train=True)
    val_data = DogCat(opt.train_data_root, train=False)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    train_dataloader = DataLoader(test_data,
                                  opt.batch_size,
                                  shuffle=False,
                                  num_workers=opt.num_workers)
    # 目标函数和优化器
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = t.optim.Adam(model)
    # 统计指标,平滑处理之后的损失
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100
    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, label) in tqdm(
                enumerate(train_dataloader)):  # ii num ,(data,label) enumerate
            # 训练模型参数
            input = Variable(data)
            target = Variable(label)
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()
            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score, target)
            loss.backward()
            optimizer.stop()
            # 更新统计指标及可视化
            loss_meter.add(loss.data[0])
            confusion_matrix.add(loss.data[0])
            confusion_matrix.add(score.data, target.data)
            if ii % opt.print_freq == opt.print_freq - 1:
                vis.plot('loss', loss_meter.value()[0])

                if os.path.exist(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()
            model.save()

            # 计算验证集上的指标及其可视化
            val_cm, val_accuracy = val(model, val_dataloader)
            vis.plot('val_accuracy', val_accuracy)
            vis.log(
                'epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}'
                .format(epoch=epoch,
                        loss=loss_meter.value()[0],
                        val_cm=str(val_cm.value()),
                        train_cm=str(confusion_matrix.value()),
                        lr=lr))
            if loss_meter.value()[0] > previous_loss:
                lr = lr * opt.lr_decay
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr
                previous_loss = loss_meter.value()[0]
Beispiel #24
0
def train(**kwargs):
    opt.parse(kwargs)
    # vis = Visualizer(opt.env)

    # step1: configure model
    model = getattr(models, opt.model)()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu: model.cuda()

    # step2: data
    train_data = DogCat(opt.train_data_root, train=True)
    val_data = DogCat(opt.train_data_root, train=False)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)

    # step3: criterion and optimizer
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = t.optim.Adam(model.parameters(),
                             lr=lr,
                             weight_decay=opt.weight_decay)

    # step4: meters
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100

    # train
    for epoch in range(opt.max_epoch):

        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, label) in tqdm(enumerate(train_dataloader),
                                      total=len(train_data)):

            # train model
            input = Variable(data)
            target = Variable(label)
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()

            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score, target)
            loss.backward()
            optimizer.step()

            # meters update and visualize
            # loss_meter.add(loss.data[0])
            loss_meter.add(loss.item())
            confusion_matrix.add(score.data, target.data)

            # if ii % opt.print_freq == opt.print_freq - 1:
            #     vis.plot('loss', loss_meter.value()[0])

            # 进入debug模式
            # if os.path.exists(opt.debug_file):
            #     import ipdb;
            #     ipdb.set_trace()

        model.save()

        # validate and visualize
        val_cm, val_accuracy = val(model, val_dataloader)

        # vis.plot('val_accuracy', val_accuracy)
        # vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format(
        #     epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()),
        #     lr=lr))

        # update learning rate
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        previous_loss = loss_meter.value()[0]
def train(**kwargs):
    opt.parse(kwargs)
    # log file
    logfile_name = "Cmpr_with_YOLOv2_" + opt.exp_desc + time.strftime(
        "_%m_%d_%H:%M:%S") + ".log.txt"
    ps = PlotSaver(logfile_name)

    # step1: Model
    model = getattr(models, opt.model)(
        use_imp=opt.use_imp,
        n=opt.feat_num,
        input_4_ch=opt.input_4_ch,
        model_name="Cmpr_yolo_imp_" + opt.exp_desc + "_r={r}_gama={w}".format(
            r=opt.rate_loss_threshold, w=opt.rate_loss_weight)
        if opt.use_imp else "Cmpr_yolo_no_imp_" + opt.exp_desc)
    # pdb.set_trace()
    if opt.use_gpu:
        model = multiple_gpu_process(model)

    cudnn.benchmark = True

    # step2: Data
    normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])

    train_data_transforms = transforms.Compose([
        # transforms.RandomHorizontalFlip(),  TODO: try to reimplement by myself to simultaneous operate on label and data
        transforms.ToTensor(),
        normalize
    ])
    val_data_transforms = transforms.Compose(
        [transforms.ToTensor(), normalize])
    train_data = ImageCropWithBBoxMaskDataset(
        opt.train_data_list,
        train_data_transforms,
        contrastive_degree=opt.contrastive_degree,
        mse_bbox_weight=opt.input_original_bbox_weight)
    val_data = ImageCropWithBBoxMaskDataset(
        opt.val_data_list,
        val_data_transforms,
        contrastive_degree=opt.contrastive_degree,
        mse_bbox_weight=opt.input_original_bbox_weight)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers,
                                  pin_memory=True)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers,
                                pin_memory=True)

    # step3: criterion and optimizer

    mse_loss = t.nn.MSELoss(size_average=False)

    if opt.use_imp:
        # TODO: new rate loss
        rate_loss = RateLoss(opt.rate_loss_threshold, opt.rate_loss_weight)
        # rate_loss = LimuRateLoss(opt.rate_loss_threshold, opt.rate_loss_weight)

    def weighted_mse_loss(input, target, weight):
        # weight[weight!=opt.mse_bbox_weight] = 1
        # weight[weight==opt.mse_bbox_weight] = opt.mse_bbox_weight
        # print('max val', weight.max())
        # return mse_loss(input, target)
        # weight_clone = weight.clone()
        # weight_clone[weight_clone == opt.input_original_bbox_weight] = 0
        # return t.sum(weight_clone * (input - target) ** 2)
        weight_clone = t.ones_like(weight)
        weight_clone[weight ==
                     opt.input_original_bbox_inner] = opt.mse_bbox_weight
        return t.sum(weight_clone * (input - target)**2)

    def yolo_rate_loss(imp_map, mask_r):
        return rate_loss(imp_map)
        # V2 contrastive_degree must be 0!
        # return YoloRateLossV2(mask_r, opt.rate_loss_threshold, opt.rate_loss_weight)(imp_map)

    lr = opt.lr
    optimizer = t.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999))

    start_epoch = 0
    decay_file_create_time = -1  # 为了避免同一个文件反复衰减学习率, 所以判断修改时间

    if opt.resume:
        if use_data_parallel:
            start_epoch = model.module.load(
                None if opt.finetune else optimizer, opt.resume, opt.finetune)
        else:
            start_epoch = model.load(None if opt.finetune else optimizer,
                                     opt.resume, opt.finetune)

        if opt.finetune:
            print('Finetune from model checkpoint file', opt.resume)
        else:
            print('Resume training from checkpoint file', opt.resume)
            print('Continue training at epoch %d.' % start_epoch)

    # step4: meters
    mse_loss_meter = AverageValueMeter()
    if opt.use_imp:
        rate_loss_meter = AverageValueMeter()
        rate_display_meter = AverageValueMeter()
        total_loss_meter = AverageValueMeter()

    previous_loss = 1e100
    tolerant_now = 0
    same_lr_epoch = 0

    # ps init

    ps.new_plot('train mse loss',
                opt.print_freq,
                xlabel="iteration",
                ylabel="train_mse_loss")
    ps.new_plot('val mse loss', 1, xlabel="epoch", ylabel="val_mse_loss")
    if opt.use_imp:
        ps.new_plot('train rate value',
                    opt.print_freq,
                    xlabel="iteration",
                    ylabel="train_rate_value")
        ps.new_plot('train rate loss',
                    opt.print_freq,
                    xlabel="iteration",
                    ylabel="train_rate_loss")
        ps.new_plot('train total loss',
                    opt.print_freq,
                    xlabel="iteration",
                    ylabel="train_total_loss")
        ps.new_plot('val rate value',
                    1,
                    xlabel="iteration",
                    ylabel="val_rate_value")
        ps.new_plot('val rate loss',
                    1,
                    xlabel="iteration",
                    ylabel="val_rate_loss")
        ps.new_plot('val total loss',
                    1,
                    xlabel="iteration",
                    ylabel="val_total_loss")

    for epoch in range(start_epoch + 1, opt.max_epoch + 1):

        same_lr_epoch += 1
        # per epoch avg loss meter
        mse_loss_meter.reset()
        if opt.use_imp:
            rate_display_meter.reset()
            rate_loss_meter.reset()
            total_loss_meter.reset()
        else:
            total_loss_meter = mse_loss_meter
        # cur_epoch_loss refresh every epoch
        # vis.refresh_plot('cur epoch train mse loss')
        ps.new_plot("cur epoch train mse loss",
                    opt.print_freq,
                    xlabel="iteration in cur epoch",
                    ylabel="train_mse_loss")
        # progress_bar = tqdm(enumerate(train_dataloader), total=len(train_dataloader), ascii=True)
        # progress_bar.set_description('epoch %d/%d, loss = 0.00' % (epoch, opt.max_epoch))

        # Init val
        if (epoch == start_epoch + 1) and opt.init_val:
            print('Init validation ... ')
            if opt.use_imp:
                mse_val_loss, rate_val_loss, total_val_loss, rate_val_display = val(
                    model, val_dataloader, weighted_mse_loss, yolo_rate_loss,
                    ps)
            else:
                mse_val_loss = val(model, val_dataloader, weighted_mse_loss,
                                   None, ps)

            ps.add_point('val mse loss', mse_val_loss)
            if opt.use_imp:
                ps.add_point('val rate value', rate_val_display)
                ps.add_point('val rate loss', rate_val_loss)
                ps.add_point('val total loss', total_val_loss)

            ps.make_plot('val mse loss')

            if opt.use_imp:
                ps.make_plot('val rate value')
                ps.make_plot('val rate loss')
                ps.make_plot('val total loss')

            # log sth.
            if opt.use_imp:
                ps.log(
                    'Init Val @ Epoch:{epoch}, lr:{lr}, val_mse_loss: {val_mse_loss}, val_rate_loss: {val_rate_loss}, val_total_loss: {val_total_loss}, val_rate_display: {val_rate_display} '
                    .format(epoch=epoch,
                            lr=lr,
                            val_mse_loss=mse_val_loss,
                            val_rate_loss=rate_val_loss,
                            val_total_loss=total_val_loss,
                            val_rate_display=rate_val_display))
            else:
                ps.log(
                    'Init Val @ Epoch:{epoch}, lr:{lr}, val_mse_loss:{val_mse_loss}'
                    .format(epoch=epoch, lr=lr, val_mse_loss=mse_val_loss))

        if opt.only_init_val:
            print('Only Init Val Over!')
            return

        model.train()

        if epoch == start_epoch + 1:
            print('Start training, please inspect log file %s!' % logfile_name)
        # mask is the detection bounding box mask
        for idx, (data, mask, o_mask) in enumerate(train_dataloader):

            # pdb.set_trace()

            data = Variable(data)
            mask = Variable(mask)
            o_mask = Variable(o_mask, requires_grad=False)

            if opt.use_gpu:
                data = data.cuda(async=True)
                mask = mask.cuda(async=True)
                o_mask = o_mask.cuda(async=True)

            # pdb.set_trace()

            optimizer.zero_grad()
            reconstructed, imp_mask_sigmoid = model(data, mask, o_mask)

            # print ('imp_mask_height', model.imp_mask_height)
            # pdb.set_trace()

            # print ('type recons', type(reconstructed.data))

            loss = weighted_mse_loss(reconstructed, data, o_mask)
            # loss = mse_loss(reconstructed, data)
            caffe_loss = loss / (2 * opt.batch_size)

            if opt.use_imp:
                rate_loss_display = imp_mask_sigmoid
                # rate_loss_ =  rate_loss(rate_loss_display)
                rate_loss_ = yolo_rate_loss(rate_loss_display, mask)
                total_loss = caffe_loss + rate_loss_
            else:
                total_loss = caffe_loss

            total_loss.backward()
            optimizer.step()

            mse_loss_meter.add(caffe_loss.data[0])

            if opt.use_imp:
                rate_loss_meter.add(rate_loss_.data[0])
                rate_display_meter.add(rate_loss_display.data.mean())
                total_loss_meter.add(total_loss.data[0])

            if idx % opt.print_freq == opt.print_freq - 1:
                ps.add_point(
                    'train mse loss',
                    mse_loss_meter.value()[0]
                    if opt.print_smooth else caffe_loss.data[0])
                ps.add_point(
                    'cur epoch train mse loss',
                    mse_loss_meter.value()[0]
                    if opt.print_smooth else caffe_loss.data[0])
                if opt.use_imp:
                    ps.add_point(
                        'train rate value',
                        rate_display_meter.value()[0]
                        if opt.print_smooth else rate_loss_display.data.mean())
                    ps.add_point(
                        'train rate loss',
                        rate_loss_meter.value()[0]
                        if opt.print_smooth else rate_loss_.data[0])
                    ps.add_point(
                        'train total loss',
                        total_loss_meter.value()[0]
                        if opt.print_smooth else total_loss.data[0])

                if not opt.use_imp:
                    ps.log('Epoch %d/%d, Iter %d/%d, loss = %.2f, lr = %.8f' %
                           (epoch, opt.max_epoch, idx, len(train_dataloader),
                            total_loss_meter.value()[0], lr))
                else:
                    ps.log(
                        'Epoch %d/%d, Iter %d/%d, loss = %.2f, mse_loss = %.2f, rate_loss = %.2f, rate_display = %.2f, lr = %.8f'
                        %
                        (epoch, opt.max_epoch, idx, len(train_dataloader),
                         total_loss_meter.value()[0],
                         mse_loss_meter.value()[0], rate_loss_meter.value()[0],
                         rate_display_meter.value()[0], lr))

                # 进入debug模式
                if os.path.exists(opt.debug_file):
                    pdb.set_trace()

        if epoch % opt.save_interval == 0:
            print('save checkpoint file of epoch %d.' % epoch)
            if use_data_parallel:
                model.module.save(optimizer, epoch)
            else:
                model.save(optimizer, epoch)

        ps.make_plot('train mse loss')
        ps.make_plot('cur epoch train mse loss', epoch)
        if opt.use_imp:
            ps.make_plot("train rate value")
            ps.make_plot("train rate loss")
            ps.make_plot("train total loss")

        if epoch % opt.eval_interval == 0:

            print('Validating ...')
            # val
            if opt.use_imp:
                mse_val_loss, rate_val_loss, total_val_loss, rate_val_display = val(
                    model, val_dataloader, weighted_mse_loss, yolo_rate_loss,
                    ps)
            else:
                mse_val_loss = val(model, val_dataloader, weighted_mse_loss,
                                   None, ps)

            ps.add_point('val mse loss', mse_val_loss)
            if opt.use_imp:
                ps.add_point('val rate value', rate_val_display)
                ps.add_point('val rate loss', rate_val_loss)
                ps.add_point('val total loss', total_val_loss)

            ps.make_plot('val mse loss')

            if opt.use_imp:
                ps.make_plot('val rate value')
                ps.make_plot('val rate loss')
                ps.make_plot('val total loss')

            # log sth.
            if opt.use_imp:
                ps.log(
                    'Epoch:{epoch}, lr:{lr}, train_mse_loss: {train_mse_loss}, train_rate_loss: {train_rate_loss}, train_total_loss: {train_total_loss}, train_rate_display: {train_rate_display} \n\
    val_mse_loss: {val_mse_loss}, val_rate_loss: {val_rate_loss}, val_total_loss: {val_total_loss}, val_rate_display: {val_rate_display} '
                    .format(epoch=epoch,
                            lr=lr,
                            train_mse_loss=mse_loss_meter.value()[0],
                            train_rate_loss=rate_loss_meter.value()[0],
                            train_total_loss=total_loss_meter.value()[0],
                            train_rate_display=rate_display_meter.value()[0],
                            val_mse_loss=mse_val_loss,
                            val_rate_loss=rate_val_loss,
                            val_total_loss=total_val_loss,
                            val_rate_display=rate_val_display))
            else:
                ps.log(
                    'Epoch:{epoch}, lr:{lr}, train_mse_loss:{train_mse_loss}, val_mse_loss:{val_mse_loss}'
                    .format(epoch=epoch,
                            lr=lr,
                            train_mse_loss=mse_loss_meter.value()[0],
                            val_mse_loss=mse_val_loss))

        # Adaptive adjust lr
        # 每个lr,如果有opt.tolerant_max次比上次的val_loss还高,
        # update learning rate
        # if loss_meter.value()[0] > previous_loss:
        if opt.use_early_adjust:
            if total_loss_meter.value()[0] > previous_loss:
                tolerant_now += 1
                if tolerant_now == opt.tolerant_max:
                    tolerant_now = 0
                    same_lr_epoch = 0
                    lr = lr * opt.lr_decay
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = lr
                    print('Due to early stop anneal lr to %.10f at epoch %d' %
                          (lr, epoch))
                    ps.log('Due to early stop anneal lr to %.10f at epoch %d' %
                           (lr, epoch))

            else:
                tolerant_now -= 1

        if epoch % opt.lr_anneal_epochs == 0:
            # if same_lr_epoch and same_lr_epoch % opt.lr_anneal_epochs == 0:
            same_lr_epoch = 0
            tolerant_now = 0
            lr = lr * opt.lr_decay
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
            print('Anneal lr to %.10f at epoch %d due to full epochs.' %
                  (lr, epoch))
            ps.log('Anneal lr to %.10f at epoch %d due to full epochs.' %
                   (lr, epoch))

        if opt.use_file_decay_lr and os.path.exists(opt.lr_decay_file):
            cur_mtime = os.path.getmtime(opt.lr_decay_file)
            if cur_mtime > decay_file_create_time:
                decay_file_create_time = cur_mtime
                lr = lr * opt.lr_decay
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr
                print(
                    'Anneal lr to %.10f at epoch %d due to decay-file indicator.'
                    % (lr, epoch))
                ps.log(
                    'Anneal lr to %.10f at epoch %d due to decay-file indicator.'
                    % (lr, epoch))

        previous_loss = total_loss_meter.value()[0]
Beispiel #26
0
def test(**kwargs):
    opt.parse(kwargs)
Beispiel #27
0
def train(**kwargs):
    opt.parse(kwargs)

    # step1: configure model
    #model = getattr(models, opt.model)()
    model = Pre_models.vgg19_bn(pretrained=True)
    model.classifier = torch.nn.Sequential(
        torch.nn.Linear(512 * 7 * 7, 4096),
        torch.nn.ReLU(True),
        torch.nn.Dropout(),
        torch.nn.Linear(4096, 4096),
        torch.nn.ReLU(True),
        torch.nn.Dropout(),
        torch.nn.Linear(4096, 2),
    )
    #model = Pre_models.resnet152(pretrained=True);
    #model.classifier = torch.nn.Linear(2208, 2);
    #print(model)

    model = model.cuda()
    model = torch.nn.DataParallel(model)

    #if opt.load_model_path:
    #    model.load(opt.load_model_path)
    #if opt.use_gpu: model.cuda()

    # step2: data
    train_data = CAG(opt.train_data_root, train=True)
    val_data = CAG(opt.train_data_root, train=False)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)

    # step3: criterion and optimizer
    loss_func = t.nn.CrossEntropyLoss()

    lr = opt.lr
    optimizer = t.optim.Adam(model.parameters(),
                             lr=lr,
                             weight_decay=opt.weight_decay)

    previous_loss = 1.0

    # train
    for epoch in range(opt.max_epoch):
        print('epoch {}'.format(epoch + 1))
        train_num = 1
        train_acc = 0
        sum_loss = 0
        batch_num = 0
        for ii, (data, label) in enumerate(train_dataloader):
            # train model
            input = Variable(data)
            target = Variable(label)
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()

            optimizer.zero_grad()
            score = model(input)
            probability = t.nn.functional.softmax(score, dim=1)
            _, result = torch.max(probability, 1)
            train_correct = (result == target.squeeze(0)).sum()
            train_acc += train_correct.item()
            train_num += target.size(0)
            loss = loss_func(score, target)
            sum_loss += loss.item()
            loss.backward()
            optimizer.step()
            batch_num += 1

        print("当前loss:", sum_loss / batch_num)
        logger.scalar_summary('train_loss', sum_loss / batch_num, epoch)
        accuracy = train_acc / train_num
        logger.scalar_summary('train_accurancy', accuracy, epoch)

        if (epoch + 1) % 100 == 0:
            #if loss.item() < previous_loss:
            #model.save()
            lr = lr * opt.lr_decay
            print("当前学习率", lr)
            logger.scalar_summary('lr', lr, epoch)
            for param_group in optimizer.param_groups:  #optimizer通过param_group来管理参数组. 通过更改param_group[‘lr’]的值来更改对应参数组的学习率。
                param_group['lr'] = lr
        #previous_loss = loss.item()

        # validate and visualize
        if (epoch + 1) % 5 == 0:
            val_accuracy, val_loss = val(model, val_dataloader)
            print("验证集上准确率为:", val_accuracy)
            logger.scalar_summary('val_accurancy', val_accuracy, epoch)
            print("val_loss:", val_loss)
            logger.scalar_summary('val_loss', val_loss, epoch)
Beispiel #28
0
def train(**kwargs):
    # kwargs.update({'model': 'CNN'})
    opt.parse(kwargs)

    if (opt.use_gpu):
        torch.cuda.set_device(opt.gpu_id)

    if opt.encoder == 'BERT':
        encoder_model = BertForSequenceClassification.from_pretrained(
            "./downloaded_weights/downloaded_bert_base_uncased",
            num_labels=opt.rel_num)
        # print(encoder_model)
        opt.encoder_out_dimension = opt.rel_num
    else:
        encoder_model = getattr(encoder_models, opt.encoder)(opt)
        opt.encoder_out_dimension = encoder_model.out_dimension
    selector_model = getattr(selector_models, opt.selector)(opt)
    # encoder_model = torch.nn.DataParallel(encoder_model, device_ids=[3,6])

    if (opt.use_gpu):
        encoder_model = encoder_model.cuda()
        selector_model = selector_model.cuda()

    # Loading data
    DataModel = getattr(dataset, opt.data + 'Data')
    train_data = DataModel(opt.data_root,
                           train=True,
                           use_bert=opt.use_bert_tokenizer)
    train_data_loader = DataLoader(train_data,
                                   batch_size=opt.batch_size,
                                   shuffle=True,
                                   num_workers=opt.num_workers,
                                   collate_fn=collate_fn)
    print('train data: {}'.format(len(train_data)))

    test_data = DataModel(opt.data_root,
                          train=False,
                          use_bert=opt.use_bert_tokenizer)
    test_data_loader = DataLoader(test_data,
                                  batch_size=opt.batch_size,
                                  shuffle=False,
                                  num_workers=opt.num_workers,
                                  collate_fn=collate_fn)
    print('test data: {}'.format(len(test_data)))

    criterion = nn.CrossEntropyLoss()
    if opt.encoder == 'BERT':
        optimizer = AdamW(
            [{
                'params': encoder_model.parameters()
            }, {
                'params': selector_model.parameters()
            }],
            lr=opt.lr,
            correct_bias=True
        )  # To reproduce BertAdam specific behavior set correct_bias=False
    else:
        optimizer = optim.Adadelta([{
            'params': encoder_model.parameters()
        }, {
            'params': selector_model.parameters()
        }],
                                   lr=opt.lr,
                                   rho=1.0,
                                   eps=1e-6,
                                   weight_decay=opt.weight_decay)

    scheduler = WarmupLinearSchedule(optimizer, warmup_steps=2,
                                     t_total=3)  # PyTorch scheduler
    ### and used like this:
    # for batch in train_data:
    #     loss = model(batch)
    #     loss.backward()
    #     torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)  # Gradient clipping is not in AdamW anymore (so you can use amp without issue)

    #     optimizer.zero_grad()

    # if opt.encoder == "BERT" and False:
    #     optimizer = optim.SGD([
    #         {'params': selector_model.parameters()}
    #         ], lr=opt.lr)
    # else:

    optimizer = optim.SGD([{
        'params': encoder_model.parameters()
    }, {
        'params': selector_model.parameters()
    }],
                          lr=opt.lr)

    max_pre = 0.0
    max_rec = 0.0
    for epoch in range(opt.num_epochs):
        # if opt.encoder == "BERT":
        encoder_model.train()
        selector_model.train()
        print("*" * 50)
        print("Epoch {}".format(epoch))
        total_loss = 0
        max_insNum = 0
        for batch_num, (data, label_set) in enumerate(train_data_loader):
            # if (batch_num>2000):
            #     break
            # label_set is the label of each bag (there may be no more than 4 labels, but we only wants the first)

            labels = []
            outs = torch.empty([0, 53])

            empty = True  # if all labels of bags in one batch are zeros, then it's empty, continue to avoid error
            for l in label_set:
                if (l[0] != 0):
                    labels.append(l[0])
                    empty = False
            if empty:
                continue
            # labels = [l[0] for l in label_set]
            # Each time enters {batch_size} bags
            # Each time I want one bag!!
            # The model need to give me a representation of an instance!!!

            if opt.use_gpu:
                labels = torch.LongTensor(labels).cuda()
                outs = outs.cuda()
            else:
                labels = torch.LongTensor(labels)

            optimizer.zero_grad()
            train_cor = 0
            for idx, bag in enumerate(data):
                insNum = bag[1]
                # if insNum > max_insNum:
                #     max_insNum = insNum
                #     print(max_insNum)
                label = label_set[idx][0]  # Label of the current bag
                if (label_set[idx][0] == 0):
                    continue

                ins_outs = torch.empty(0)
                instances = bag[2]
                pf_list = []
                mask_list = []
                if opt.encoder != 'BERT':
                    pf_list = bag[3]
                    mask_list = bag[5]

                # pf_list = bag[3]
                ins_out = torch.empty(0)
                encoder_model.batch_size = insNum
                if opt.use_gpu:
                    instances = torch.LongTensor(instances).cuda()

                if opt.encoder == 'BERT':
                    # with torch.no_grad():
                    # print(instances.size(0))
                    if insNum > opt.max_sentence_in_bag:
                        ins_outs = encoder_model(
                            instances[:opt.max_sentence_in_bag])[0]
                    else:
                        ins_outs = encoder_model(instances)[0]
                    # ins_outs = ins_outs[0]
                    # print(ins_outs[0].size())
                else:

                    for idx, instance in enumerate(instances):
                        if opt.use_gpu:
                            pfs = torch.LongTensor(pf_list[idx]).cuda()
                            masks = torch.LongTensor(mask_list[idx]).cuda()
                        else:
                            pfs = torch.LongTensor(pf_list[idx])
                            masks = torch.LongTensor(mask_list[idx])

                        if opt.encoder == 'PCNN':
                            ins_out = encoder_model(instance, pfs, masks)
                        else:
                            ins_out = encoder_model(instance, pfs)

                        if (opt.use_gpu):
                            ins_out = ins_out.cuda()
                            ins_outs = ins_outs.cuda()

                        ins_outs = torch.cat((ins_outs, ins_out), 0)
                        del instance, ins_out

                        if idx >= opt.max_sentence_in_bag:
                            break

                bag_feature = selector_model(ins_outs)
                if opt.use_gpu: bag_feature = bag_feature.cuda()
                if (torch.max(bag_feature.squeeze(), 0)[1] == label):
                    train_cor += 1

                outs = torch.cat((outs, bag_feature), 0)
                del ins_outs, bag_feature

            # outs = outs.squeeze()
            # print("outs.size(): ", outs.size(), '\n', "labels.size(): ", labels.size())
            # print(outs,labels)
            loss = criterion(outs, labels)
            total_loss += loss.item()
            avg_loss = total_loss / (batch_num + 1)
            sys.stdout.write(
                "\rbatch number: {:6d}\tloss: {:7.4f}\ttrain_acc: {:7.2f}\t".
                format(batch_num, avg_loss, train_cor / len(labels)))
            sys.stdout.flush()
            # sys.stdout.write('\033')

            loss.backward()
            if opt.encoder == 'BERT':
                scheduler.step()
            optimizer.step()
            del outs, labels

        if (opt.skip_predict != True):
            with torch.no_grad():
                predict(encoder_model, selector_model, test_data_loader)

    t = time.strftime('%m_%d_%H_%M.pth')
    torch.save(encoder_model.state_dict(),
               'checkpoints/{}_{}'.format(opt.encoder, t))
    torch.save(selector_model.state_dict(),
               'checkpoints/{}_{}'.format(opt.selector, t))
Beispiel #29
0
                                  path=args.path4AffGraph,
                                  img_name=img_name,
                                  path4data=args.path4data)
        else:
            return None

    def __len__(self):
        return len(self.train_file)


if __name__ == "__main__":
    # test the corecness of the dataset
    args.parse(hid_unit=40,
               max_epoch=250,
               drop_rate=.3,
               path4train_images=args.path4train_aug_images,
               path4AffGraph=os.path.join("..", "psa", "AFF_MAT_normalize"),
               path4partial_label_label=os.path.join(
                   "..", "psa", "RES38_PARTIAL_PSEUDO_LABEL_DN"),
               path4node_feat=os.path.join("..", "psa", "AFF_FEATURE_res38"))
    if getpass.getuser() == "u7577591":
        args.path4node_feat = os.path.join("/work/u7577591/",
                                           "irn/AFF_FEATURE_res50_W")
        args.path4partial_label_label = "data/partial_pseudo_label/" + "label/" + "RES_CAM_TRAIN_AUG_PARTIAL_PSEUDO_LABEL" + "@PIL_near@confident_ratio_" + "0.3_cam_DN_johnney"
        args.path4AffGraph = os.path.join("/work/u7577591/irn",
                                          "AFF_MAT_normalize_IRNet")

    dataset = graph_voc()
    import time
    from utils import show_timing
    t_start = time.time()
    for i, item in enumerate(dataset, start=1):
def main(**kwargs):
    '''
    训练入口
    '''

    opt.parse(kwargs, print_=False)
    if opt.debug:
        import ipdb
        ipdb.set_trace()

    model = getattr(models, opt.model)(opt).cuda()
    if opt.model_path:
        model.load(opt.model_path)
    print(model)

    opt.parse(kwargs, print_=True)

    vis.reinit(opt.env)
    pre_loss = 1.0
    lr, lr2 = opt.lr, opt.lr2
    loss_function = getattr(models, opt.loss)()

    dataset = ZhihuData(opt.train_data_path,
                        opt.labels_path,
                        type_=opt.type_,
                        augument=opt.augument)
    dataloader = data.DataLoader(dataset,
                                 batch_size=opt.batch_size,
                                 shuffle=opt.shuffle,
                                 num_workers=opt.num_workers,
                                 pin_memory=True)

    optimizer = model.get_optimizer(lr, opt.lr2, opt.weight_decay)
    loss_meter = tnt.meter.AverageValueMeter()
    score_meter = tnt.meter.AverageValueMeter()
    best_score = 0

    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        score_meter.reset()
        for ii, ((title, content), label) in tqdm.tqdm(enumerate(dataloader)):
            # 训练 更新参数
            title, content, label = Variable(title.cuda()), Variable(
                content.cuda()), Variable(label.cuda())
            optimizer.zero_grad()
            score = model(title, content)
            loss = loss_function(score, opt.weight * label.float())
            loss_meter.add(loss.data[0])
            loss.backward()
            optimizer.step()

            if ii % opt.plot_every == opt.plot_every - 1:
                ### 可视化
                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()

                predict = score.data.topk(5, dim=1)[1].cpu().tolist()
                true_target = label.data.float().topk(5, dim=1)
                true_index = true_target[1][:, :5]
                true_label = true_target[0][:, :5]
                predict_label_and_marked_label_list = []
                for jj in range(label.size(0)):
                    true_index_ = true_index[jj]
                    true_label_ = true_label[jj]
                    true = true_index_[true_label_ > 0]
                    predict_label_and_marked_label_list.append(
                        (predict[jj], true.tolist()))
                score_, prec_, recall_, _ss = get_score(
                    predict_label_and_marked_label_list)
                score_meter.add(score_)
                vis.vis.text('prec:%s,recall:%s,score:%s,a:%s' %
                             (prec_, recall_, score_, _ss),
                             win='tmp')
                vis.plot('scores', score_meter.value()[0])

                #eval()
                vis.plot('loss', loss_meter.value()[0])
                k = t.randperm(label.size(0))[0]
                output = t.nn.functional.sigmoid(score)

            if ii % opt.decay_every == opt.decay_every - 1:
                # 计算在验证集上的分数,并相对应的调整学习率
                del loss
                scores, prec_, recall_, _ss = val(model, dataset)
                vis.log({
                    ' epoch:': epoch,
                    ' lr: ': lr,
                    'scores': scores,
                    'prec': prec_,
                    'recall': recall_,
                    'ss': _ss,
                    'scores_train': score_meter.value()[0],
                    'loss': loss_meter.value()[0]
                })

                if scores > best_score:
                    best_score = scores
                    best_path = model.save(name=str(scores), new=True)

                if scores < best_score:
                    model.load(best_path, change_opt=False)
                    lr = lr * opt.lr_decay
                    lr2 = 2e-4 if lr2 == 0 else lr2 * 0.8
                    optimizer = model.get_optimizer(lr, lr2, 0)

                pre_loss = loss_meter.value()[0]
                loss_meter.reset()
                score_meter.reset()
Beispiel #31
0
def main(**kwargs):
    #动态加全职衰减
    opt.parse(kwargs, print_=False)
    if opt.debug:
        import ipdb
        ipdb.set_trace()
    # opt.model_names=['MultiCNNTextBNDeep','RCNN','LSTMText','CNNText_inception','RCNN','CNNText_inception','LSTMText']
    # opt.model_paths = ['checkpoints/MultiCNNTextBNDeep_word_0.41124002492','checkpoints/RCNN_word_0.411511574999','checkpoints/LSTMText_word_0.411994005382','checkpoints/CNNText_tmp_char_0.402429167301','checkpoints/RCNN_char_0.403710422571','checkpoints/CNNText_tmp_word_0.41096749885','checkpoints/LSTMText_char_0.403192339135',]#'checkpoints/FastText_word_0.400391584867']
    #############################iMultiModelAll2_word_0.425600838271##################################
    # opt.model_names=['MultiCNNTextBNDeep','RCNN','LSTMText','RCNN','CNNText_inception']
    # opt.model_paths = ['checkpoints/MultiCNNTextBNDeep_word_0.41124002492','checkpoints/RCNN_word_0.411511574999','checkpoints/LSTMText_word_0.411994005382','checkpoints/RCNN_char_0.403710422571','checkpoints/CNNText_tmp_char_0.402429167301']
    ###################################################################################################3
    #############################################################
    # opt.model_names=['MultiCNNTextBNDeep','RCNN','LSTMText','RCNN','CNNText_inception']
    # opt.model_paths = ['checkpoints/MultiCNNTextBNDeep_0.37125473788','checkpoints/RCNN_word_0.373609030286','checkpoints/LSTMText_word_0.381833388089','checkpoints/RCNN_char_0.3456599248','checkpoints/CNNText_tmp_0.352036505041']
    ##################################################################333
    # opt.model_names=['LSTMText','MultiCNNTextBNDeep']
    # opt.model_paths=['checkpoints/LSTMText_word_0.396765494482','checkpoints/MultiCNNTextBNDeep_word_0.391018392216']
    # opt.fold=1
    # from data.dataset import ALLFoldData as ZhihuALLData
    ########################################################################
    # opt.model_names=['MultiCNNTextBNDeep','RCNN','LSTMText','RCNN','CNNText_inception']
    # opt.model_paths = ['checkpoints/MultiCNNTextBNDeep_0.37125473788','checkpoints/RCNN_word_0.373609030286','checkpoints/LSTMText_word_0.381833388089','checkpoints/RCNN_char_0.3456599248','checkpoints/CNNText_tmp_0.352036505041']
    #######################################0.0.41884129858126845-force#####################
    # opt.model_names=['MultiCNNTextBNDeep','RCNN','LSTMText','RCNN','MultiCNNTextBNDeep']
    # opt.model_paths = ['checkpoints/MultiCNNTextBNDeep_word_0.410011182415','checkpoints/RCNN_word_0.413446202556','checkpoints/LSTMText_word_0.413681107036','checkpoints/RCNN_char_0.398655349075','checkpoints/MultiCNNTextBNDeep_char_0.38666657051']

    #######################################################################
    ############################################################################################
    opt.model_names = [
        'MultiCNNTextBNDeep', 'FastText3', 'LSTMText', 'CNNText_inception'
    ]
    opt.model_paths = [
        'checkpoints/MultiCNNTextBNDeep_word_0.41124002492',
        'checkpoints/FastText3_word_0.40810787337',
        'checkpoints/LSTMText_word_0.413681107036',
        'checkpoints/CNNText_tmp_char_0.402429167301'
    ]

    ########################################################################################3
    model = getattr(models, opt.model)(opt).cuda()
    if opt.model_path:
        model.load(opt.model_path)
    print(model)

    opt.parse(kwargs, print_=True)

    vis.reinit(opt.env)
    pre_loss = 1.0
    lr, lr2 = opt.lr, opt.lr2
    loss_function = getattr(models, opt.loss)()
    if opt.all:
        dataset = ZhihuALLData(opt.train_data_path,
                               opt.labels_path,
                               type_=opt.type_,
                               augument=opt.augument)
    # else :dataset = ZhihuData(opt.train_data_path,opt.labels_path,type_=opt.type_)
    dataloader = data.DataLoader(dataset,
                                 batch_size=opt.batch_size,
                                 shuffle=opt.shuffle,
                                 num_workers=opt.num_workers,
                                 pin_memory=True)

    optimizer = model.get_optimizer(opt.lr, opt.lr2)
    loss_meter = tnt.meter.AverageValueMeter()
    score_meter = tnt.meter.AverageValueMeter()
    best_score = 0
    # pre_score = 0

    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        score_meter.reset()
        for ii, ((title, content), label) in tqdm.tqdm(enumerate(dataloader)):
            title, content, label = (Variable(
                title[0].cuda()), Variable(title[1].cuda())), (Variable(
                    content[0].cuda()), Variable(content[1].cuda())), Variable(
                        label.cuda())
            optimizer.zero_grad()
            score = model(title, content)
            loss = loss_function(score, label.float())
            loss_meter.add(loss.data[0])
            loss.backward()
            optimizer.step()

            if ii % opt.plot_every == opt.plot_every - 1:
                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()

                predict = score.data.topk(5, dim=1)[1].cpu().tolist(
                )  #(dim=1,descending=True)[1][:,:5].tolist()
                true_target = label.data.float().topk(
                    5, dim=1)  #[1].cpu().tolist()#sort(dim=1,descending=True)
                true_index = true_target[1][:, :5]
                true_label = true_target[0][:, :5]
                predict_label_and_marked_label_list = []
                for jj in range(label.size(0)):
                    true_index_ = true_index[jj]
                    true_label_ = true_label[jj]
                    true = true_index_[true_label_ > 0]
                    predict_label_and_marked_label_list.append(
                        (predict[jj], true.tolist()))
                score_, prec_, recall_, _ss = get_score(
                    predict_label_and_marked_label_list)
                score_meter.add(score_)
                vis.vis.text('prec:%s,recall:%s,score:%s,a:%s' %
                             (prec_, recall_, score_, _ss),
                             win='tmp')
                vis.plot('scores', score_meter.value()[0])
                vis.plot('loss', loss_meter.value()[0])

            if ii % opt.decay_every == opt.decay_every - 1:
                del loss
                scores, prec_, recall_, _ss = val(model, dataset)
                vis.log({
                    ' epoch:': epoch,
                    ' lr: ': lr,
                    'scores': scores,
                    'prec': prec_,
                    'recall': recall_,
                    'ss': _ss,
                    'scores_train': score_meter.value()[0],
                    'loss': loss_meter.value()[0]
                })

                if scores > best_score:
                    best_score = scores
                    best_path = model.save(name=str(scores), new=True)

                if scores < best_score:
                    model.load(best_path, change_opt=False)
                    lr = lr * opt.lr_decay
                    if lr2 == 0: lr2 = 1e-4
                    else: lr2 = lr2 * 0.5
                    optimizer = model.get_optimizer(lr, lr2, 0)

                pre_loss = loss_meter.value()[0]
                loss_meter.reset()
                score_meter.reset()
Beispiel #32
0
    def __init__(self, **kwargs):
        opt.parse(kwargs)
        tl.set_backend('pytorch')
        self.dataset = "cifar"
        self.decomposed_layer_info = {'key': -1, 'image_size': -1, 'kernel_size': -1, 'stride': -1, 'padding': -1}
        self.layer_budget = {}
        self.origin_layer_runtime = {}
        self.origin_model_runtime = 0.0
        self.VBMF_layer_rank = {}
        self.constrain = opt.constrain
        self.conv_target_rate = 0.0
        self.fc_target_rate = 0.0
        self.user_budget = 1
        self.real_model_runtime = 0.0
        self.remain_budget = 0.0
        self.origin_model_constrain = 0.0
        self.search_runtime = {}
        self.bayesian_iter = {}

        # Configure Logger
        self.logger = logging.getLogger()
        log_file = logging.FileHandler('result/log/test.log')
        self.logger.addHandler(log_file)
        formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
        log_file.setFormatter(formatter)
        self.logger.setLevel(logging.DEBUG)

        # Load Perf Model
        self.perf_model = Estimator()

        # Load Pre-trained Model
        if(opt.load_model_path is None):
            import sys
            print('set the model path')
            sys.exit(-1)
        else:
            checkpoint = torch.load(opt.load_model_path)
            if(type(checkpoint) is dict):
                checkpoint = checkpoint['net']
        self.model = checkpoint.cuda()
        print(self.model)

        # Preparing data
        transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
        ])
        transform_test = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
        ])
        trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=transform_train)
        self.trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)
        testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform_test)
        self.testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=4)

        print('\n{}Model Info'.format('\033[33m'))
        print('↓↓↓↓↓↓↓↓↓↓↓↓↓↓{}\n'.format('\033[0m'))

        # Set Criterion
        self.criterion = torch.nn.CrossEntropyLoss()

        # Calculate Image_size for each layer
        self.model_image_size = {}
        if(self.dataset == 'cifar'):
            in_image_size = 32
        for i, key in enumerate(self.model.features._modules.keys()):
            if isinstance(self.model.features._modules[key], torch.nn.modules.conv.Conv2d):
                conv_layer = self.model.features._modules[key]
                after_image_size = ((in_image_size - conv_layer.kernel_size[0] + 2*conv_layer.padding[0]) // conv_layer.stride[0] )+ 1
                self.model_image_size[key] = [in_image_size, after_image_size]
                in_image_size = after_image_size
            elif isinstance(self.model.features._modules[key], torch.nn.modules.MaxPool2d):
                maxpool_layer = self.model.features._modules[key]
                after_image_size = ((in_image_size - maxpool_layer.kernel_size) // maxpool_layer.stride )+ 1
                self.model_image_size[key] = [in_image_size, after_image_size]
                in_image_size = after_image_size
        print('{}Image_Size{}: {}'.format('\033[36m', '\033[0m', self.model_image_size))

        # Get Origin MAC and Weight and runtime
        self.origin_mac, self.origin_weight = self.get_model_mac_weight(self.model)

        self.origin_model_runtime, self.origin_layer_runtime = self.get_model_predict_runtime(self.model)
        self.origin_model_constrain, _ = self.get_model_predict_runtime_without_small(self.model)
        #print('self.origin_model_runtime: {}, self.get_model_predict_runtime: {}'.format(self.origin_model_runtime, self.get_model_predict_runtime(self.model)))

	#deploy to target
        save_model_name = export_onnx_model(self.model)
        decomp_runtime = deploy_by_rpc(save_model_name)
        self.real_model_runtime = decomp_runtime * 1000
        os.remove(save_model_name)

        print('{}Origin_MAC{}: {}, {}Origin_Weight{}: {}'.format('\033[36m', '\033[0m', self.origin_mac, '\033[36m', '\033[0m', self.origin_weight))
        #print('{}Origin_Weight{}: {}'.format('\033[36m', '\033[0m', self.origin_weight))
        print('{}Pred_Origin_Runtime{}: {}, {}Real_Origin_Runtime{}: {}'.format('\033[36m', '\033[0m', self.origin_model_runtime, '\033[36m', '\033[0m', self.real_model_runtime))
        #print('{}Real_Origin_Runtime{}: {}'.format('\033[36m', '\033[0m', self.real_model_runtime))
        print('{}Origin_Layer_Runtime{}: {}'.format('\033[36m', '\033[0m', self.origin_layer_runtime))
        print('{}Origin_Model_Constrain{}: {}'.format('\033[36m', '\033[0m', self.origin_model_constrain))

        self.VBMF_layer_rank = self.get_VBMF_layer_rank()

        if(self.constrain > 0):
            # Calculate importance for each layer
            self.layer_importance = self.get_layer_importance()
            print('{}Layer Importance{}: {}'.format('\033[36m', '\033[0m', self.layer_importance))

            # Get Layer Budget
            self.layer_budget = self.get_layer_budget()
def train(**kwargs):
    opt.parse(kwargs)
    # vis = Visualizer(opt.env)
    # log file
    ps = PlotSaver("Train_ImageNet12_With_ImpMap_" +
                   time.strftime("%m_%d_%H:%M:%S") + ".log.txt")

    # step1: Model
    model = getattr(models, opt.model)(
        use_imp=opt.use_imp,
        model_name="CWCNN_limu_ImageNet_imp_r={r}_γ={w}".format(
            r=opt.rate_loss_threshold, w=opt.rate_loss_weight)
        if opt.use_imp else None)
    # if opt.use_imp else "test_pytorch")
    if opt.use_gpu:
        # model = multiple_gpu_process(model)
        model.cuda()


#    pdb.set_trace()

    cudnn.benchmark = True

    # step2: Data
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_data_transforms = transforms.Compose([
        transforms.Resize(256),
        #transforms.Scale(256),
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize
    ])
    val_data_transforms = transforms.Compose([
        transforms.Resize(256),
        #transforms.Scale(256),
        transforms.CenterCrop(224),
        # transforms.TenCrop(224),
        # transforms.Lambda(lambda crops: t.stack(([normalize(transforms.ToTensor()(crop)) for crop in crops]))),
        transforms.ToTensor(),
        normalize
    ])
    # train_data = ImageNet_200k(opt.train_data_root, train=True, transforms=data_transforms)
    # val_data = ImageNet_200k(opt.val_data_root, train = False, transforms=data_transforms)
    train_data = datasets.ImageFolder(opt.train_data_root,
                                      train_data_transforms)
    val_data = datasets.ImageFolder(opt.val_data_root, val_data_transforms)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers,
                                  pin_memory=True)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers,
                                pin_memory=True)

    # step3: criterion and optimizer

    mse_loss = t.nn.MSELoss(size_average=False)

    if opt.use_imp:
        # rate_loss = RateLoss(opt.rate_loss_threshold, opt.rate_loss_weight)
        rate_loss = LimuRateLoss(opt.rate_loss_threshold, opt.rate_loss_weight)

    lr = opt.lr

    optimizer = t.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999))

    start_epoch = 0

    if opt.resume:
        if hasattr(model, 'module'):
            start_epoch = model.module.load(
                None if opt.finetune else optimizer, opt.resume, opt.finetune)
        else:
            start_epoch = model.load(None if opt.finetune else optimizer,
                                     opt.resume, opt.finetune)

        if opt.finetune:
            print('Finetune from model checkpoint file', opt.resume)
        else:
            print('Resume training from checkpoint file', opt.resume)
            print('Continue training at epoch %d.' % start_epoch)

    # step4: meters
    mse_loss_meter = AverageValueMeter()
    if opt.use_imp:
        rate_loss_meter = AverageValueMeter()
        rate_display_meter = AverageValueMeter()
        total_loss_meter = AverageValueMeter()

    previous_loss = 1e100
    tolerant_now = 0
    same_lr_epoch = 0

    # ps init

    ps.new_plot('train mse loss',
                opt.print_freq,
                xlabel="iteration",
                ylabel="train_mse_loss")
    ps.new_plot('val mse loss', 1, xlabel="epoch", ylabel="val_mse_loss")
    if opt.use_imp:
        ps.new_plot('train rate value',
                    opt.print_freq,
                    xlabel="iteration",
                    ylabel="train_rate_value")
        ps.new_plot('train rate loss',
                    opt.print_freq,
                    xlabel="iteration",
                    ylabel="train_rate_loss")
        ps.new_plot('train total loss',
                    opt.print_freq,
                    xlabel="iteration",
                    ylabel="train_total_loss")
        ps.new_plot('val rate value',
                    1,
                    xlabel="iteration",
                    ylabel="val_rate_value")
        ps.new_plot('val rate loss',
                    1,
                    xlabel="iteration",
                    ylabel="val_rate_loss")
        ps.new_plot('val total loss',
                    1,
                    xlabel="iteration",
                    ylabel="val_total_loss")

    for epoch in range(start_epoch + 1, opt.max_epoch + 1):
        same_lr_epoch += 1
        # per epoch avg loss meter
        mse_loss_meter.reset()
        if opt.use_imp:
            rate_display_meter.reset()
            rate_loss_meter.reset()
            total_loss_meter.reset()
        else:
            total_loss_meter = mse_loss_meter
        # cur_epoch_loss refresh every epoch

        ps.new_plot("cur epoch train mse loss",
                    opt.print_freq,
                    xlabel="iteration in cur epoch",
                    ylabel="train_mse_loss")

        model.train()

        # _ is corresponding Label, compression doesn't use it.
        for idx, (data, _) in enumerate(train_dataloader):
            ipt = Variable(data)

            if opt.use_gpu:
                ipt = ipt.cuda()

            optimizer.zero_grad()  # f**k it! Don't forget to clear grad!
            reconstructed = model(ipt)

            # print ('reconstructed tensor size :', reconstructed.size())
            loss = mse_loss(reconstructed, ipt)
            caffe_loss = loss / (2 * opt.batch_size)

            if opt.use_imp:
                # print ('use data_parallel?',use_data_parallel)
                # pdb.set_trace()
                rate_loss_display = (model.module if use_data_parallel else
                                     model).imp_mask_sigmoid
                rate_loss_ = rate_loss(rate_loss_display)
                total_loss = caffe_loss + rate_loss_
            else:
                total_loss = caffe_loss

            total_loss.backward()

            optimizer.step()

            mse_loss_meter.add(caffe_loss.data[0])

            if opt.use_imp:
                rate_loss_meter.add(rate_loss_.data[0])
                rate_display_meter.add(rate_loss_display.data.mean())
                total_loss_meter.add(total_loss.data[0])

            if idx % opt.print_freq == opt.print_freq - 1:
                ps.add_point(
                    'train mse loss',
                    mse_loss_meter.value()[0]
                    if opt.print_smooth else caffe_loss.data[0])
                ps.add_point(
                    'cur epoch train mse loss',
                    mse_loss_meter.value()[0]
                    if opt.print_smooth else caffe_loss.data[0])
                if opt.use_imp:
                    ps.add_point(
                        'train rate value',
                        rate_display_meter.value()[0]
                        if opt.print_smooth else rate_loss_display.data.mean())
                    ps.add_point(
                        'train rate loss',
                        rate_loss_meter.value()[0]
                        if opt.print_smooth else rate_loss_.data[0])
                    ps.add_point(
                        'train total loss',
                        total_loss_meter.value()[0]
                        if opt.print_smooth else total_loss.data[0])
                if not opt.use_imp:
                    ps.log('Epoch %d/%d, Iter %d/%d, loss = %.2f, lr = %.8f' %
                           (epoch, opt.max_epoch, idx, len(train_dataloader),
                            total_loss_meter.value()[0], lr))
                else:
                    ps.log(
                        'Epoch %d/%d, Iter %d/%d, loss = %.2f, mse_loss = %.2f, rate_loss = %.2f, rate_display = %.2f, lr = %.8f'
                        %
                        (epoch, opt.max_epoch, idx, len(train_dataloader),
                         total_loss_meter.value()[0],
                         mse_loss_meter.value()[0], rate_loss_meter.value()[0],
                         rate_display_meter.value()[0], lr))
                # 进入debug模式
                if os.path.exists(opt.debug_file):
                    pdb.set_trace()

        # data parallel
        # if hasattr(model, 'module'):
        if use_data_parallel:
            model.module.save(optimizer, epoch)
        else:
            model.save(optimizer, epoch)

        # plot before val can ease me
        ps.make_plot(
            'train mse loss'
        )  # all epoch share a same img, so give ""(default) to epoch
        ps.make_plot('cur epoch train mse loss', epoch)
        if opt.use_imp:
            ps.make_plot("train rate value")
            ps.make_plot("train rate loss")
            ps.make_plot("train total loss")

        # val
        if opt.use_imp:
            mse_val_loss, rate_val_loss, total_val_loss, rate_val_display = val(
                model, val_dataloader, mse_loss, rate_loss, ps)
        else:
            mse_val_loss = val(model, val_dataloader, mse_loss, None, ps)

        ps.add_point('val mse loss', mse_val_loss)
        if opt.use_imp:
            ps.add_point('val rate value', rate_val_display)
            ps.add_point('val rate loss', rate_val_loss)
            ps.add_point('val total loss', total_val_loss)

        ps.make_plot('val mse loss')

        if opt.use_imp:
            ps.make_plot('val rate value')
            ps.make_plot('val rate loss')
            ps.make_plot('val total loss')

        # log sth.
        if opt.use_imp:
            ps.log(
                'Epoch:{epoch}, lr:{lr}, train_mse_loss: {train_mse_loss}, train_rate_loss: {train_rate_loss}, train_total_loss: {train_total_loss}, train_rate_display: {train_rate_display} \n\
val_mse_loss: {val_mse_loss}, val_rate_loss: {val_rate_loss}, val_total_loss: {val_total_loss}, val_rate_display: {val_rate_display} '
                .format(epoch=epoch,
                        lr=lr,
                        train_mse_loss=mse_loss_meter.value()[0],
                        train_rate_loss=rate_loss_meter.value()[0],
                        train_total_loss=total_loss_meter.value()[0],
                        train_rate_display=rate_display_meter.value()[0],
                        val_mse_loss=mse_val_loss,
                        val_rate_loss=rate_val_loss,
                        val_total_loss=total_val_loss,
                        val_rate_display=rate_val_display))
        else:
            ps.log(
                'Epoch:{epoch}, lr:{lr}, train_mse_loss:{train_mse_loss}, val_mse_loss:{val_mse_loss}'
                .format(epoch=epoch,
                        lr=lr,
                        train_mse_loss=mse_loss_meter.value()[0],
                        val_mse_loss=mse_val_loss))

        # Adaptive adjust lr
        # 每个lr,如果有opt.tolerant_max次比上次的val_loss还高,
        if opt.use_early_adjust:
            if total_loss_meter.value()[0] > previous_loss:
                tolerant_now += 1
                if tolerant_now == opt.tolerant_max:
                    tolerant_now = 0
                    same_lr_epoch = 0
                    lr = lr * opt.lr_decay
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = lr
                    print('Anneal lr to', lr, 'at epoch', epoch,
                          'due to early stop.')
                    ps.log(
                        'Anneal lr to %.10f at epoch %d due to early stop.' %
                        (lr, epoch))

            else:
                tolerant_now -= 1

        if same_lr_epoch and same_lr_epoch % opt.lr_anneal_epochs == 0:
            same_lr_epoch = 0
            tolerant_now = 0
            lr = lr * opt.lr_decay
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
            print('Anneal lr to', lr, 'at epoch', epoch, 'due to full epochs.')
            ps.log('Anneal lr to %.10f at epoch %d due to full epochs.' %
                   (lr, epoch))

        previous_loss = total_loss_meter.value()[0]
Beispiel #34
0
def train(**kwargs):
    opt.parse(kwargs)
    alpha = [0.2,0.5,0.8,1.0,1.3.1.5.1.8.2.0,2.5]
    images, tags, labels = load_data(opt.data_path)
    pretrain_model = load_pretrain_model(opt.pretrain_model_path)
    y_dim = tags.shape[1]
    label_num = labels.shape[1]
    X, Y, L = split_data(images, tags, labels)
    print('...loading and splitting data finish')
    img_model = ImgModule(opt.bit, pretrain_model)
    txt_model = TxtModule(y_dim, opt.bit)
    hash_model = HashModule(opt.bit)
    label_model = LabModule(label_num)
    if opt.use_gpu:
        img_model = img_model.cuda()
        txt_model = txt_model.cuda()
        hash_model = hash_model.cuda()
        label_model = label_model.cuda()
    train_L = torch.from_numpy(L['train'])
    train_x = torch.from_numpy(X['train'])
    train_y = torch.from_numpy(Y['train'])

    query_L = torch.from_numpy(L['query'])
    query_x = torch.from_numpy(X['query'])
    query_y = torch.from_numpy(Y['query'])

    retrieval_L = torch.from_numpy(L['retrieval'])
    retrieval_x = torch.from_numpy(X['retrieval'])
    retrieval_y = torch.from_numpy(Y['retrieval'])

    num_train = train_x.shape[0]

    F_buffer = torch.randn(num_train, opt.bit)
    G_buffer = torch.randn(num_train, opt.bit)
    X_fea_buffer = torch.randn(num_train, opt.X_fea_nums)
    Y_fea_buffer = torch.randn(num_train,opt.Y_fea_nums)
    X_label_buffer = torch.randn(num_train, label_num)
    Y_label_buffer = torch.randn(num_train, label_num)
    
    Label_buffer = torch.randn(num_train, label_num)
    Label_hash_buffer = torch.randn(num_train, opt.bit)
    Label_label_buffer = torch.randn(num_train, label_num)
    
    if opt.use_gpu:
        train_L = train_L.cuda()
        F_buffer = F_buffer.cuda()
        G_buffer = G_buffer.cuda()
        X_fea_buffer = X_fea_buffer.cuda()
        Y_fea_buffer = Y_fea_buffer.cuda()
        Label_buffer = Label_buffer.cuda()
        X_label_buffer = X_label_buffer.cuda()
        Y_label_buffer =  Y_label_buffer.cuda()
        Label_hash_buffer = Label_hash_buffer.cuda()
        Label_label_buffer = Label_label_buffer.cuda()
    Sim = calc_neighbor(train_L, train_L)
    ###############ddddddd
    B = torch.sign(F_buffer + G_buffer)
    B_buffer = torch.sign(F_buffer + G_buffer)
    batch_size = opt.batch_size

    lr = opt.lr
    optimizer_img = SGD(img_model.parameters(), lr=lr)
    optimizer_txt = SGD(txt_model.parameters(), lr=lr)
    optimizer_hash = SGD(hash_model.parameters(), lr=lr)
    optimizer_label = SGD(label_model.parameters(), lr=lr)

    learning_rate = np.linspace(opt.lr, np.power(10, -6.), opt.max_epoch + 1)
    result = {
        'loss': [],
        'hash_loss' : [],
        'total_loss' : []
    }

    ones = torch.ones(batch_size, 1)
    ones_ = torch.ones(num_train - batch_size, 1)
    unupdated_size = num_train - batch_size

    max_mapi2t = max_mapt2i = 0.

    for epoch in range(opt.max_epoch):
        # train label net
        for i in tqdm(range(num_train // batch_size)):
            index = np.random.permutation(num_train)
            ind = index[0: batch_size]
            unupdated_ind = np.setdiff1d(range(num_train), ind)
            sample_L = Variable(train_L[ind, :])
            label = Variable(train_L[ind,:].unsqueeze(1).unsqueeze(-1).type(torch.float))
            if opt.use_gpu:
                label = label.cuda()
                sample_L = sample_L.cuda()
            # similar matrix size: (batch_size, num_train)
            S = calc_neighbor(sample_L, train_L)
            label_hash, label_label = label_model(label)  #
            Label_hash_buffer[ind, :] = label_hash.data
            Label_label_buffer[ind, :] = label_label.data
            Label = Variable(train_L)
            Label_B = torch.sign(label_hash)
            Label_H = Variable(Label_hash_buffer) 
            
            theta_l = 1.0 / 2 * torch.matmul(label_hash, Label_H.t())
            logloss_l = -torch.sum(S * theta_l - torch.log(1.0 + torch.exp(theta_l)))
            quantization_l = torch.sum(torch.pow(Label_hash_buffer[ind, :] - Label_B, 2))
            labelloss_l = torch.sum(torch.pow(Label[ind, :].float() - label_label, 2))
            loss_label = logloss_l + opt.beta * quantization_l + opt.alpha * labelloss_l  # + logloss_x_fea
            loss_label /= (batch_size * num_train)

            optimizer_label.zero_grad()
            loss_label.backward()
            optimizer_label.step()
        # train image net
        for i in tqdm(range(num_train // batch_size)):
            index = np.random.permutation(num_train)
            ind = index[0: batch_size]
            unupdated_ind = np.setdiff1d(range(num_train), ind)
            sample_L = Variable(train_L[ind, :])
            image = Variable(train_x[ind].type(torch.float))
            if opt.use_gpu:
                image = image.cuda()
                sample_L = sample_L.cuda()
            # similar matrix size: (batch_size, num_train)
            S = calc_neighbor(sample_L, train_L)  # S: (batch_size, num_train)
            image_fea, cur_f, image_label = img_model(image)  # cur_f: (batch_size, bit)
            X_fea_buffer[ind, :] = image_fea.data
            F_buffer[ind, :] = cur_f.data
            X_label_buffer[ind, :] = image_label.data
            G = Variable(G_buffer)
            H_l = Variable(Label_hash_buffer)
            B_x = torch.sign(F_buffer)

            theta_x = 1.0 / 2 * torch.matmul(cur_f, H_l.t())
            logloss_x = -torch.sum(S * theta_x - torch.log(1.0 + torch.exp(theta_x)))
            quantization_xh = torch.sum(torch.pow(B_buffer[ind, :] - cur_f, 2))
            quantization_xb = torch.sum(torch.pow(B_x[ind, :]- cur_f, 2))
            labelloss_x = torch.sum(torch.pow(train_L[ind, :].float() - image_label,2))
            loss_x = logloss_x + opt.beta * quantization_xh + opt.alpha * labelloss_x + opt.gamma * quantization_xb# + logloss_x_fea
            loss_x /= (batch_size * num_train)

            optimizer_img.zero_grad()
            loss_x.backward()
            optimizer_img.step()
        # train txt net
        for i in tqdm(range(num_train // batch_size)):
            index = np.random.permutation(num_train)
            ind = index[0: batch_size]
            unupdated_ind = np.setdiff1d(range(num_train), ind)
            sample_L = Variable(train_L[ind, :])
            text = train_y[ind, :].unsqueeze(1).unsqueeze(-1).type(torch.float)
            text = Variable(text)
            if opt.use_gpu:
                text = text.cuda()
                sample_L = sample_L.cuda()
            # similar matrix size: (batch_size, num_train)
            S = calc_neighbor(sample_L, train_L)  # S: (batch_size, num_train)
            txt_fea, cur_g, txt_label = txt_model(text)  # cur_f: (batch_size, bit)
            Y_fea_buffer[ind, :] = txt_fea.data
            G_buffer[ind, :] = cur_g.data
            Y_label_buffer[ind, :] = txt_label.data
            F = Variable(F_buffer)
            H_l = Variable(Label_hash_buffer)
            B_y = torch.sign(F)
            # calculate loss
            # theta_y: (batch_size, num_train)
            theta_y = 1.0 / 2 * torch.matmul(cur_g, H_l.t())
            logloss_y = -torch.sum(S * theta_y - torch.log(1.0 + torch.exp(theta_y)))
            quantization_yh = torch.sum(torch.pow(B_buffer[ind, :] - cur_g, 2))
            quantization_yb = torch.sum(torch.pow(B_y[ind, :] - cur_g, 2))
            labelloss_y = torch.sum(torch.pow(train_L[ind, :].float() - txt_label, 2))
            loss_y = logloss_y + opt.beta * quantization_yh + opt.alpha * labelloss_y + opt.gamma * quantization_yb# + logloss_y_fea
            loss_y /= (num_train * batch_size)
        
            optimizer_txt.zero_grad()
            loss_y.backward()
            optimizer_txt.step()

        #train hash net
        for i in tqdm(range(num_train // batch_size)):
            index = np.random.permutation(num_train)
            ind = index[0: batch_size]
            unupdated_ind = np.setdiff1d(range(num_train), ind)
            
            sample_L = Variable(train_L[ind, :])
            #W = norm(X_fea_buffer[ind, :], Y_fea_buffer[ind, :])
            #fea = 1.0 / 2 * (torch.matmul(W, X_fea_buffer[ind, :]) + torch.matmul(W, Y_fea_buffer[ind, :]))
            fea = torch.cat([X_fea_buffer[ind, :], Y_fea_buffer[ind, :]], dim=1)
            fea = Variable(fea)
            if opt.use_gpu:
                fea = fea.cuda()
                sample_L = sample_L.cuda()
            S = calc_neighbor(sample_L, train_L)
            A = caculateAdj(sample_L, sample_L)
            cur_B, label_hash = hash_model(fea, A)
            B_buffer[ind, :] = cur_B.data
            #caculate loss
            B = Variable(torch.sign(B_buffer))
            theta_hash = 1.0 / 2 * torch.matmul(cur_B, B_buffer.t())
            logloss_hash = -torch.sum(S * theta_hash - torch.log(1.0 + torch.exp(theta_hash)))
            label_loss = torch.sum(torch.pow(train_L[ind, :].float() - label_hash, 2))
            hashloss = torch.sum(torch.pow(B[ind, :] - cur_B, 2))
            loss_hash = logloss_hash + opt.alpha * label_loss + opt.beta * hashloss

            optimizer_hash.zero_grad()
            loss_hash.backward()
            optimizer_hash.step()
        # train image net
        for i in tqdm(range(num_train // batch_size)):
            index = np.random.permutation(num_train)
            ind = index[0: batch_size]
            unupdated_ind = np.setdiff1d(range(num_train), ind)
            sample_L = Variable(train_L[ind, :])
            image = Variable(train_x[ind].type(torch.float))
            if opt.use_gpu:
                image = image.cuda()
                sample_L = sample_L.cuda()
            # similar matrix size: (batch_size, num_train)
            S = calc_neighbor(sample_L, train_L)  # S: (batch_size, num_train)
            image_fea, cur_f, image_label = img_model(image)  # cur_f: (batch_size, bit)
            X_fea_buffer[ind, :] = image_fea.data
            F_buffer[ind, :] = cur_f.data
            X_label_buffer[ind, :] = image_label.data
            G = Variable(G_buffer)
            H_l = Variable(Label_hash_buffer)
            B_x = torch.sign(F_buffer)

            theta_x = 1.0 / 2 * torch.matmul(cur_f, H_l.t())
            logloss_x = -torch.sum(S * theta_x - torch.log(1.0 + torch.exp(theta_x)))
            quantization_xh = torch.sum(torch.pow(B_buffer[ind, :] - cur_f, 2))
            quantization_xb = torch.sum(torch.pow(B_x[ind, :] - cur_f, 2))
            labelloss_x = torch.sum(torch.pow(train_L[ind, :].float() - image_label, 2))
            loss_x = logloss_x + opt.gamma * quantization_xh + opt.alpha * labelloss_x + opt.beta * quantization_xb  # + logloss_x_fea
            loss_x /= (batch_size * num_train)

            optimizer_img.zero_grad()
            loss_x.backward()
            optimizer_img.step()
        # train txt net
        for i in tqdm(range(num_train // batch_size)):
            index = np.random.permutation(num_train)
            ind = index[0: batch_size]
            unupdated_ind = np.setdiff1d(range(num_train), ind)
            sample_L = Variable(train_L[ind, :])
            text = train_y[ind, :].unsqueeze(1).unsqueeze(-1).type(torch.float)
            text = Variable(text)
            if opt.use_gpu:
                text = text.cuda()
                sample_L = sample_L.cuda()
            # similar matrix size: (batch_size, num_train)
            S = calc_neighbor(sample_L, train_L)  # S: (batch_size, num_train)
            txt_fea, cur_g, txt_label = txt_model(text)  # cur_f: (batch_size, bit)
            Y_fea_buffer[ind, :] = txt_fea.data
            G_buffer[ind, :] = cur_g.data
            Y_label_buffer[ind, :] = txt_label.data
            F = Variable(F_buffer)
            H_l = Variable(Label_hash_buffer)
            B_y = torch.sign(F)
            # calculate loss
            # theta_y: (batch_size, num_train)
            theta_y = 1.0 / 2 * torch.matmul(cur_g, H_l.t())
            logloss_y = -torch.sum(S * theta_y - torch.log(1.0 + torch.exp(theta_y)))
            quantization_yh = torch.sum(torch.pow(B_buffer[ind, :] - cur_g, 2))
            quantization_yb = torch.sum(torch.pow(B_y[ind, :] - cur_g, 2))
            labelloss_y = torch.sum(torch.pow(train_L[ind, :].float() - txt_label, 2))
            loss_y = logloss_y + opt.gamma * quantization_yh + opt.alpha * labelloss_y + opt.beta * quantization_yb  # + logloss_y_fea
            loss_y /= (num_train * batch_size)

            optimizer_txt.zero_grad()
            loss_y.backward()
            optimizer_txt.step()

        # calculate total loss
        loss, hash_loss, total_loss = calc_loss(B, F, G, Variable(Sim), opt.alpha, opt.beta,Label_buffer, train_L, X_label_buffer,Y_label_buffer)

        print('...epoch: %3d, loss: %3.3f, lr: %f' % (epoch + 1, loss.data, lr))
        print('...epoch: %3d, hash_loss: %3.3f, lr: %f' % (epoch + 1, hash_loss.data, lr))
        print('...epoch: %3d, total_loss: %3.3f, lr: %f' % (epoch + 1, total_loss.data, lr))
        result['loss'].append(float(loss.data))
        result['hash_loss'].append(float(hash_loss.data))
        result['total_loss'].append(float(total_loss.data))

        if opt.valid:
            mapi2t, mapt2i = valid(img_model, txt_model, query_x, retrieval_x, query_y, retrieval_y,
                                   query_L, retrieval_L)
            print('...epoch: %3d, valid MAP: MAP(i->t): %3.4f, MAP(t->i): %3.4f' % (epoch + 1, mapi2t, mapt2i))
            if mapt2i >= max_mapt2i and mapi2t >= max_mapi2t:
                max_mapi2t = mapi2t
                max_mapt2i = mapt2i
                img_model.save(img_model.module_name + '.pth')
                txt_model.save(txt_model.module_name + '.pth')
                hash_model.save(hash_model.module_name+'.pth')

        lr = learning_rate[epoch + 1]

        # set learning rate
        for param in optimizer_img.param_groups:
            param['lr'] = lr
        for param in optimizer_txt.param_groups:
            param['lr'] = lr

    print('...training procedure finish')
    if opt.valid:
        print('   max MAP: MAP(i->t): %3.4f, MAP(t->i): %3.4f' % (max_mapi2t, max_mapt2i))
        result['mapi2t'] = max_mapi2t
        result['mapt2i'] = max_mapt2i
    else:
        mapi2t, mapt2i = valid(img_model, txt_model, query_x, retrieval_x, query_y, retrieval_y,
                               query_L, retrieval_L)
        print('   max MAP: MAP(i->t): %3.4f, MAP(t->i): %3.4f' % (mapi2t, mapt2i))
        result['mapi2t'] = mapi2t
        result['mapt2i'] = mapt2i

    write_result(result)
Beispiel #35
0
def main(**kwargs):
    opt.parse(kwargs, print_=True)
    model = getattr(models, opt.model)(opt).cuda()
    print(model)
    lr, lr2 = opt.lr, opt.lr2
    loss_function = getattr(models, opt.loss)()

    dataset = My_dataset(opt.seq_len, augment=opt.augument)
    dataloader = data.DataLoader(dataset,
                                 batch_size=opt.batch_size,
                                 shuffle=True,
                                 num_workers=4,
                                 pin_memory=True)

    optimizer = model.get_optimizer(lr, lr2)

    pred_probs = []
    batch_count = 0
    not_increase_count = 0
    pre_f1 = 0.
    f1 = 0.
    for epoch in range(opt.max_epoch):

        for ii, (content, label, sen_id) in enumerate(dataloader):
            content, label = content.cuda(), label.cuda()

            optimizer.zero_grad()
            score = model(content)

            #proba = score.detach().cpu().numpy()
            # lll = label.cpu().numpy()
            # mul = []
            # for iiiii in range(len(lll)):
            #     ccc = 0
            #     for jjjj in range(len(lll[0])):
            #         if lll[iiiii][jjjj] == 1:
            #             ccc += 1
            #     if ccc > 1:
            #         mul.append(iiiii)
            # print(proba[mul])
            # print(lll[mul])

            predict = score.detach().cpu().numpy()
            predict_ind = np.zeros((predict.shape[0], 10), dtype=np.int32)
            for i in range(predict.shape[0]):
                ttt = predict[i]
                tttt = [ttt > 0.]
                predict_ind[i][tttt] = 1
            #print(score.detach().cpu().numpy())
            #print(label.cpu().numpy())
            loss = loss_function(score, label)
            loss.backward()
            optimizer.step()

            f1 += f1_score(label.cpu().numpy(), predict_ind, average='macro')
            if batch_count % opt.plot_every == opt.plot_every - 1:

                # compute average f1 score
                f1 = f1 / opt.plot_every

                #eval()
                print('average f1: %f' % f1)
                if f1 < pre_f1:
                    not_increase_count += 1
                else:
                    not_increase_count = 0

                if not_increase_count > 3:
                    if lr <= opt.min_lr:
                        break
                    lr *= opt.lr_decay
                    lr2 *= 0.8
                    optimizer = model.get_optimizer(lr, lr2)

                pre_f1 = f1
                f1 = 0.
            if lr <= opt.min_lr:
                break
            batch_count += 1
    torch.save(model.cpu().state_dict(), 'cnn.pt')