Ejemplo n.º 1
0
def test(**kwargs):
    opt.parse(kwargs)

    train_L, query_L, retrieval_L, train_x, query_x, retrieval_x, train_y, query_y, retrieval_y = load_data(
        opt.data_path)

    y_dim = query_y.shape[1]

    print('...loading and splitting data finish')

    img_model = ImgModule(opt.bit)
    txt_model = TxtModule(y_dim, opt.bit)

    if opt.load_img_path:
        img_model.load(opt.load_img_path)

    if opt.load_txt_path:
        txt_model.load(opt.load_txt_path)

    if opt.use_gpu:
        img_model = img_model.cuda()
        txt_model = txt_model.cuda()

    qBX = generate_image_code(img_model, query_x, opt.bit)
    qBY = generate_text_code(txt_model, query_y, opt.bit)
    rBX = generate_image_code(img_model, retrieval_x, opt.bit)
    rBY = generate_text_code(txt_model, retrieval_y, opt.bit)

    if opt.use_gpu:
        query_L = query_L.cuda()
        retrieval_L = retrieval_L.cuda()

    mapi2t = calc_map_k(qBX, rBY, query_L, retrieval_L)
    mapt2i = calc_map_k(qBY, rBX, query_L, retrieval_L)
    print('...test MAP: MAP(i->t): %3.3f, MAP(t->i): %3.3f' % (mapi2t, mapt2i))
Ejemplo n.º 2
0
def test(**kwargs):
    opt.parse(kwargs)

    images, tags, labels = load_data(opt.data_path)
    y_dim = tags.shape[1]

    X, Y, L = split_data(images, tags, labels)
    print('...loading and splitting data finish')

    img_model = ImgModule(opt.bit)
    txt_model = TxtModule(y_dim, opt.bit)

    if opt.load_img_path:
        img_model.load(opt.load_img_path)

    if opt.load_txt_path:
        txt_model.load(opt.load_txt_path)

    if opt.use_gpu:
        img_model = img_model.cuda()
        txt_model = txt_model.cuda()
    print('-----------------------')
    query_L = torch.from_numpy(L['query'])
    query_x = torch.from_numpy(X['query'])
    query_y = torch.from_numpy(Y['query'])

    retrieval_L = torch.from_numpy(L['retrieval'])
    retrieval_x = torch.from_numpy(X['retrieval'])
    retrieval_y = torch.from_numpy(Y['retrieval'])

    qBX = generate_image_code(img_model, query_x, opt.bit)
    qBY = generate_text_code(txt_model, query_y, opt.bit)
    rBX = generate_image_code(img_model, retrieval_x, opt.bit)
    rBY = generate_text_code(txt_model, retrieval_y, opt.bit)

    if opt.use_gpu:
        query_L = query_L.cuda()
        retrieval_L = retrieval_L.cuda()

    mapi2t = calc_map_k(qBX, rBY, query_L, retrieval_L)
    mapt2i = calc_map_k(qBY, rBX, query_L, retrieval_L)
    print('...test MAP: MAP(i->t): %3.3f, MAP(t->i): %3.3f' % (mapi2t, mapt2i))
Ejemplo n.º 3
0
def debug(**kwargs):
    opt.parse(kwargs)

    # load data
    images, tags, labels = load_data(opt.data_path)
    y_dim = tags.shape[1]
    labels1 = np.load(opt.l1_path)

    X, Y, L, L1 = split_data(images, tags, labels, opt, labels1)

    print('...loading and splitting data finish')

    # init module
    img_model = ImgModule(opt.bit)
    txt_model = TxtModule(y_dim, opt.bit)

    if opt.use_gpu:
        img_model = img_model.cuda()
        txt_model = txt_model.cuda()
    
    print("load trained model from file..")
    img_model.load(opt.load_img_path, use_gpu=True)
    txt_model.load(opt.load_txt_path, use_gpu=True)

    train_L = torch.from_numpy(L['train'])
    train_L1 = torch.from_numpy(L1['train'])
    train_x = torch.from_numpy(X['train'])
    train_y = torch.from_numpy(Y['train'])

    query_L = torch.from_numpy(L['query'])
    query_x = torch.from_numpy(X['query'])
    query_y = torch.from_numpy(Y['query'])

    retrieval_L = torch.from_numpy(L['retrieval'])
    retrieval_x = torch.from_numpy(X['retrieval'])
    retrieval_y = torch.from_numpy(Y['retrieval'])

    mapi2t, mapt2i = evaluate(img_model, txt_model, query_x, query_y, retrieval_x, retrieval_y, query_L, retrieval_L, opt.bit)
    print('...test map: map(i->t): %3.3f, map(t->i): %3.3f' % (mapi2t, mapt2i))
Ejemplo n.º 4
0
def train(**kwargs):
    opt.parse(kwargs)
    alpha = [0.2,0.5,0.8,1.0,1.3.1.5.1.8.2.0,2.5]
    images, tags, labels = load_data(opt.data_path)
    pretrain_model = load_pretrain_model(opt.pretrain_model_path)
    y_dim = tags.shape[1]
    label_num = labels.shape[1]
    X, Y, L = split_data(images, tags, labels)
    print('...loading and splitting data finish')
    img_model = ImgModule(opt.bit, pretrain_model)
    txt_model = TxtModule(y_dim, opt.bit)
    hash_model = HashModule(opt.bit)
    label_model = LabModule(label_num)
    if opt.use_gpu:
        img_model = img_model.cuda()
        txt_model = txt_model.cuda()
        hash_model = hash_model.cuda()
        label_model = label_model.cuda()
    train_L = torch.from_numpy(L['train'])
    train_x = torch.from_numpy(X['train'])
    train_y = torch.from_numpy(Y['train'])

    query_L = torch.from_numpy(L['query'])
    query_x = torch.from_numpy(X['query'])
    query_y = torch.from_numpy(Y['query'])

    retrieval_L = torch.from_numpy(L['retrieval'])
    retrieval_x = torch.from_numpy(X['retrieval'])
    retrieval_y = torch.from_numpy(Y['retrieval'])

    num_train = train_x.shape[0]

    F_buffer = torch.randn(num_train, opt.bit)
    G_buffer = torch.randn(num_train, opt.bit)
    X_fea_buffer = torch.randn(num_train, opt.X_fea_nums)
    Y_fea_buffer = torch.randn(num_train,opt.Y_fea_nums)
    X_label_buffer = torch.randn(num_train, label_num)
    Y_label_buffer = torch.randn(num_train, label_num)
    
    Label_buffer = torch.randn(num_train, label_num)
    Label_hash_buffer = torch.randn(num_train, opt.bit)
    Label_label_buffer = torch.randn(num_train, label_num)
    
    if opt.use_gpu:
        train_L = train_L.cuda()
        F_buffer = F_buffer.cuda()
        G_buffer = G_buffer.cuda()
        X_fea_buffer = X_fea_buffer.cuda()
        Y_fea_buffer = Y_fea_buffer.cuda()
        Label_buffer = Label_buffer.cuda()
        X_label_buffer = X_label_buffer.cuda()
        Y_label_buffer =  Y_label_buffer.cuda()
        Label_hash_buffer = Label_hash_buffer.cuda()
        Label_label_buffer = Label_label_buffer.cuda()
    Sim = calc_neighbor(train_L, train_L)
    ###############ddddddd
    B = torch.sign(F_buffer + G_buffer)
    B_buffer = torch.sign(F_buffer + G_buffer)
    batch_size = opt.batch_size

    lr = opt.lr
    optimizer_img = SGD(img_model.parameters(), lr=lr)
    optimizer_txt = SGD(txt_model.parameters(), lr=lr)
    optimizer_hash = SGD(hash_model.parameters(), lr=lr)
    optimizer_label = SGD(label_model.parameters(), lr=lr)

    learning_rate = np.linspace(opt.lr, np.power(10, -6.), opt.max_epoch + 1)
    result = {
        'loss': [],
        'hash_loss' : [],
        'total_loss' : []
    }

    ones = torch.ones(batch_size, 1)
    ones_ = torch.ones(num_train - batch_size, 1)
    unupdated_size = num_train - batch_size

    max_mapi2t = max_mapt2i = 0.

    for epoch in range(opt.max_epoch):
        # train label net
        for i in tqdm(range(num_train // batch_size)):
            index = np.random.permutation(num_train)
            ind = index[0: batch_size]
            unupdated_ind = np.setdiff1d(range(num_train), ind)
            sample_L = Variable(train_L[ind, :])
            label = Variable(train_L[ind,:].unsqueeze(1).unsqueeze(-1).type(torch.float))
            if opt.use_gpu:
                label = label.cuda()
                sample_L = sample_L.cuda()
            # similar matrix size: (batch_size, num_train)
            S = calc_neighbor(sample_L, train_L)
            label_hash, label_label = label_model(label)  #
            Label_hash_buffer[ind, :] = label_hash.data
            Label_label_buffer[ind, :] = label_label.data
            Label = Variable(train_L)
            Label_B = torch.sign(label_hash)
            Label_H = Variable(Label_hash_buffer) 
            
            theta_l = 1.0 / 2 * torch.matmul(label_hash, Label_H.t())
            logloss_l = -torch.sum(S * theta_l - torch.log(1.0 + torch.exp(theta_l)))
            quantization_l = torch.sum(torch.pow(Label_hash_buffer[ind, :] - Label_B, 2))
            labelloss_l = torch.sum(torch.pow(Label[ind, :].float() - label_label, 2))
            loss_label = logloss_l + opt.beta * quantization_l + opt.alpha * labelloss_l  # + logloss_x_fea
            loss_label /= (batch_size * num_train)

            optimizer_label.zero_grad()
            loss_label.backward()
            optimizer_label.step()
        # train image net
        for i in tqdm(range(num_train // batch_size)):
            index = np.random.permutation(num_train)
            ind = index[0: batch_size]
            unupdated_ind = np.setdiff1d(range(num_train), ind)
            sample_L = Variable(train_L[ind, :])
            image = Variable(train_x[ind].type(torch.float))
            if opt.use_gpu:
                image = image.cuda()
                sample_L = sample_L.cuda()
            # similar matrix size: (batch_size, num_train)
            S = calc_neighbor(sample_L, train_L)  # S: (batch_size, num_train)
            image_fea, cur_f, image_label = img_model(image)  # cur_f: (batch_size, bit)
            X_fea_buffer[ind, :] = image_fea.data
            F_buffer[ind, :] = cur_f.data
            X_label_buffer[ind, :] = image_label.data
            G = Variable(G_buffer)
            H_l = Variable(Label_hash_buffer)
            B_x = torch.sign(F_buffer)

            theta_x = 1.0 / 2 * torch.matmul(cur_f, H_l.t())
            logloss_x = -torch.sum(S * theta_x - torch.log(1.0 + torch.exp(theta_x)))
            quantization_xh = torch.sum(torch.pow(B_buffer[ind, :] - cur_f, 2))
            quantization_xb = torch.sum(torch.pow(B_x[ind, :]- cur_f, 2))
            labelloss_x = torch.sum(torch.pow(train_L[ind, :].float() - image_label,2))
            loss_x = logloss_x + opt.beta * quantization_xh + opt.alpha * labelloss_x + opt.gamma * quantization_xb# + logloss_x_fea
            loss_x /= (batch_size * num_train)

            optimizer_img.zero_grad()
            loss_x.backward()
            optimizer_img.step()
        # train txt net
        for i in tqdm(range(num_train // batch_size)):
            index = np.random.permutation(num_train)
            ind = index[0: batch_size]
            unupdated_ind = np.setdiff1d(range(num_train), ind)
            sample_L = Variable(train_L[ind, :])
            text = train_y[ind, :].unsqueeze(1).unsqueeze(-1).type(torch.float)
            text = Variable(text)
            if opt.use_gpu:
                text = text.cuda()
                sample_L = sample_L.cuda()
            # similar matrix size: (batch_size, num_train)
            S = calc_neighbor(sample_L, train_L)  # S: (batch_size, num_train)
            txt_fea, cur_g, txt_label = txt_model(text)  # cur_f: (batch_size, bit)
            Y_fea_buffer[ind, :] = txt_fea.data
            G_buffer[ind, :] = cur_g.data
            Y_label_buffer[ind, :] = txt_label.data
            F = Variable(F_buffer)
            H_l = Variable(Label_hash_buffer)
            B_y = torch.sign(F)
            # calculate loss
            # theta_y: (batch_size, num_train)
            theta_y = 1.0 / 2 * torch.matmul(cur_g, H_l.t())
            logloss_y = -torch.sum(S * theta_y - torch.log(1.0 + torch.exp(theta_y)))
            quantization_yh = torch.sum(torch.pow(B_buffer[ind, :] - cur_g, 2))
            quantization_yb = torch.sum(torch.pow(B_y[ind, :] - cur_g, 2))
            labelloss_y = torch.sum(torch.pow(train_L[ind, :].float() - txt_label, 2))
            loss_y = logloss_y + opt.beta * quantization_yh + opt.alpha * labelloss_y + opt.gamma * quantization_yb# + logloss_y_fea
            loss_y /= (num_train * batch_size)
        
            optimizer_txt.zero_grad()
            loss_y.backward()
            optimizer_txt.step()

        #train hash net
        for i in tqdm(range(num_train // batch_size)):
            index = np.random.permutation(num_train)
            ind = index[0: batch_size]
            unupdated_ind = np.setdiff1d(range(num_train), ind)
            
            sample_L = Variable(train_L[ind, :])
            #W = norm(X_fea_buffer[ind, :], Y_fea_buffer[ind, :])
            #fea = 1.0 / 2 * (torch.matmul(W, X_fea_buffer[ind, :]) + torch.matmul(W, Y_fea_buffer[ind, :]))
            fea = torch.cat([X_fea_buffer[ind, :], Y_fea_buffer[ind, :]], dim=1)
            fea = Variable(fea)
            if opt.use_gpu:
                fea = fea.cuda()
                sample_L = sample_L.cuda()
            S = calc_neighbor(sample_L, train_L)
            A = caculateAdj(sample_L, sample_L)
            cur_B, label_hash = hash_model(fea, A)
            B_buffer[ind, :] = cur_B.data
            #caculate loss
            B = Variable(torch.sign(B_buffer))
            theta_hash = 1.0 / 2 * torch.matmul(cur_B, B_buffer.t())
            logloss_hash = -torch.sum(S * theta_hash - torch.log(1.0 + torch.exp(theta_hash)))
            label_loss = torch.sum(torch.pow(train_L[ind, :].float() - label_hash, 2))
            hashloss = torch.sum(torch.pow(B[ind, :] - cur_B, 2))
            loss_hash = logloss_hash + opt.alpha * label_loss + opt.beta * hashloss

            optimizer_hash.zero_grad()
            loss_hash.backward()
            optimizer_hash.step()
        # train image net
        for i in tqdm(range(num_train // batch_size)):
            index = np.random.permutation(num_train)
            ind = index[0: batch_size]
            unupdated_ind = np.setdiff1d(range(num_train), ind)
            sample_L = Variable(train_L[ind, :])
            image = Variable(train_x[ind].type(torch.float))
            if opt.use_gpu:
                image = image.cuda()
                sample_L = sample_L.cuda()
            # similar matrix size: (batch_size, num_train)
            S = calc_neighbor(sample_L, train_L)  # S: (batch_size, num_train)
            image_fea, cur_f, image_label = img_model(image)  # cur_f: (batch_size, bit)
            X_fea_buffer[ind, :] = image_fea.data
            F_buffer[ind, :] = cur_f.data
            X_label_buffer[ind, :] = image_label.data
            G = Variable(G_buffer)
            H_l = Variable(Label_hash_buffer)
            B_x = torch.sign(F_buffer)

            theta_x = 1.0 / 2 * torch.matmul(cur_f, H_l.t())
            logloss_x = -torch.sum(S * theta_x - torch.log(1.0 + torch.exp(theta_x)))
            quantization_xh = torch.sum(torch.pow(B_buffer[ind, :] - cur_f, 2))
            quantization_xb = torch.sum(torch.pow(B_x[ind, :] - cur_f, 2))
            labelloss_x = torch.sum(torch.pow(train_L[ind, :].float() - image_label, 2))
            loss_x = logloss_x + opt.gamma * quantization_xh + opt.alpha * labelloss_x + opt.beta * quantization_xb  # + logloss_x_fea
            loss_x /= (batch_size * num_train)

            optimizer_img.zero_grad()
            loss_x.backward()
            optimizer_img.step()
        # train txt net
        for i in tqdm(range(num_train // batch_size)):
            index = np.random.permutation(num_train)
            ind = index[0: batch_size]
            unupdated_ind = np.setdiff1d(range(num_train), ind)
            sample_L = Variable(train_L[ind, :])
            text = train_y[ind, :].unsqueeze(1).unsqueeze(-1).type(torch.float)
            text = Variable(text)
            if opt.use_gpu:
                text = text.cuda()
                sample_L = sample_L.cuda()
            # similar matrix size: (batch_size, num_train)
            S = calc_neighbor(sample_L, train_L)  # S: (batch_size, num_train)
            txt_fea, cur_g, txt_label = txt_model(text)  # cur_f: (batch_size, bit)
            Y_fea_buffer[ind, :] = txt_fea.data
            G_buffer[ind, :] = cur_g.data
            Y_label_buffer[ind, :] = txt_label.data
            F = Variable(F_buffer)
            H_l = Variable(Label_hash_buffer)
            B_y = torch.sign(F)
            # calculate loss
            # theta_y: (batch_size, num_train)
            theta_y = 1.0 / 2 * torch.matmul(cur_g, H_l.t())
            logloss_y = -torch.sum(S * theta_y - torch.log(1.0 + torch.exp(theta_y)))
            quantization_yh = torch.sum(torch.pow(B_buffer[ind, :] - cur_g, 2))
            quantization_yb = torch.sum(torch.pow(B_y[ind, :] - cur_g, 2))
            labelloss_y = torch.sum(torch.pow(train_L[ind, :].float() - txt_label, 2))
            loss_y = logloss_y + opt.gamma * quantization_yh + opt.alpha * labelloss_y + opt.beta * quantization_yb  # + logloss_y_fea
            loss_y /= (num_train * batch_size)

            optimizer_txt.zero_grad()
            loss_y.backward()
            optimizer_txt.step()

        # calculate total loss
        loss, hash_loss, total_loss = calc_loss(B, F, G, Variable(Sim), opt.alpha, opt.beta,Label_buffer, train_L, X_label_buffer,Y_label_buffer)

        print('...epoch: %3d, loss: %3.3f, lr: %f' % (epoch + 1, loss.data, lr))
        print('...epoch: %3d, hash_loss: %3.3f, lr: %f' % (epoch + 1, hash_loss.data, lr))
        print('...epoch: %3d, total_loss: %3.3f, lr: %f' % (epoch + 1, total_loss.data, lr))
        result['loss'].append(float(loss.data))
        result['hash_loss'].append(float(hash_loss.data))
        result['total_loss'].append(float(total_loss.data))

        if opt.valid:
            mapi2t, mapt2i = valid(img_model, txt_model, query_x, retrieval_x, query_y, retrieval_y,
                                   query_L, retrieval_L)
            print('...epoch: %3d, valid MAP: MAP(i->t): %3.4f, MAP(t->i): %3.4f' % (epoch + 1, mapi2t, mapt2i))
            if mapt2i >= max_mapt2i and mapi2t >= max_mapi2t:
                max_mapi2t = mapi2t
                max_mapt2i = mapt2i
                img_model.save(img_model.module_name + '.pth')
                txt_model.save(txt_model.module_name + '.pth')
                hash_model.save(hash_model.module_name+'.pth')

        lr = learning_rate[epoch + 1]

        # set learning rate
        for param in optimizer_img.param_groups:
            param['lr'] = lr
        for param in optimizer_txt.param_groups:
            param['lr'] = lr

    print('...training procedure finish')
    if opt.valid:
        print('   max MAP: MAP(i->t): %3.4f, MAP(t->i): %3.4f' % (max_mapi2t, max_mapt2i))
        result['mapi2t'] = max_mapi2t
        result['mapt2i'] = max_mapt2i
    else:
        mapi2t, mapt2i = valid(img_model, txt_model, query_x, retrieval_x, query_y, retrieval_y,
                               query_L, retrieval_L)
        print('   max MAP: MAP(i->t): %3.4f, MAP(t->i): %3.4f' % (mapi2t, mapt2i))
        result['mapi2t'] = mapi2t
        result['mapt2i'] = mapt2i

    write_result(result)
Ejemplo n.º 5
0
def train(**kwargs):
    opt.parse(kwargs)

    # Load data
    images, tags, labels = load_data(opt.data_path)
    y_dim = tags.shape[1]

    X, Y, L = split_data(images, tags, labels, opt)
    print('Loading and splitting data finished.')

    # Init module
    img_model = ImgModule(opt.bit)
    txt_model = TxtModule(y_dim, opt.bit)
    cls_model = ClassifierModule(opt.bit, opt.num_class)
   
    if opt.use_gpu:
        img_model = img_model.cuda()
        txt_model = txt_model.cuda()
        cls_model = cls_model.cuda()

    # Data to torch tensor
    train_L = torch.from_numpy(L['train'])
    train_x = torch.from_numpy(X['train'])
    train_y = torch.from_numpy(Y['train'])

    query_L = torch.from_numpy(L['query'])
    query_x = torch.from_numpy(X['query'])
    query_y = torch.from_numpy(Y['query'])

    retrieval_L = torch.from_numpy(L['retrieval'])
    retrieval_x = torch.from_numpy(X['retrieval'])
    retrieval_y = torch.from_numpy(Y['retrieval'])

    num_train = train_x.shape[0]

    F_buffer = torch.randn(num_train, opt.bit) # tensor (num_train, bit)
    G_buffer = torch.randn(num_train, opt.bit) # tensor (num_train, bit)

    if opt.use_gpu:
        train_L = train_L.float().cuda()
        F_buffer = F_buffer.cuda()
        G_buffer = G_buffer.cuda()

    B = torch.sign(F_buffer + G_buffer) # tensor (num_train, bit)

    lr = opt.lr
    optimizer_img = Adam(img_model.parameters(), lr=lr)
    optimizer_txt = Adam(txt_model.parameters(), lr=lr)
    optimizer_cls = Adam(cls_model.parameters(), lr=lr)
    
    cls_criterion = nn.BCEWithLogitsLoss() # Multi-Label Classification(Binary Cross Entropy Loss)
    
    best_mapi2t = 0.0
    print('...training procedure starts')

    ones = torch.ones(opt.batch_size, 1)
    ones_ = torch.ones(num_train - opt.batch_size, 1)
    if opt.use_gpu:
        ones = ones.cuda()
        ones_ = ones_.cuda()

    for epoch in range(opt.max_epoch):

        # Part 1: train image net & update F & classifier
        for i in tqdm(range(num_train // opt.batch_size)):
            # Random samples
            index = np.random.permutation(num_train)
            ind = index[0: opt.batch_size]
            unupdated_ind = np.setdiff1d(range(num_train), ind)

            sample_L = Variable(train_L[ind, :])   # (batch_size, num_class)
            image = Variable(train_x[ind].type(torch.float))
            if opt.use_gpu:
                image = image.cuda()
                sample_L = sample_L.cuda()
              
            cur_f = img_model(image)  # cur_f: (batch_size, bit)
            F_buffer[ind, :] = cur_f.data # update F

            pred_x = cls_model(cur_f)
            cls_x = cls_criterion(pred_x, sample_L)

            quantization_x = torch.sum(torch.pow(B[ind, :] - cur_f, 2)) / (opt.batch_size * num_train) # ||B-f||_2^F
            
            loss_x = cls_x + quantization_x 

            optimizer_img.zero_grad()
            optimizer_cls.zero_grad()
            loss_x.backward()
            optimizer_img.step()
            optimizer_cls.zero_grad()

        # Part 2: train txt net & update G & classifier
        for i in tqdm(range(num_train // opt.batch_size)):
            index = np.random.permutation(num_train)
            ind = index[0: opt.batch_size]
            unupdated_ind = np.setdiff1d(range(num_train), ind)

            sample_L = Variable(train_L[ind, :])
            text = train_y[ind, :].unsqueeze(1).unsqueeze(-1).type(torch.float)
            text = Variable(text)
            if opt.use_gpu:
                text = text.cuda()
                sample_L = sample_L.cuda()

            cur_g = txt_model(text)  # cur_g: (batch_size, bit)
            G_buffer[ind, :] = cur_g.data # update G

            pred_y = cls_model(cur_g)
            cls_y = cls_criterion(pred_y, sample_L)
            quantization_y = torch.sum(torch.pow(B[ind, :] - cur_g, 2)) / (num_train * opt.batch_size) # ||B-g||_2^F
            
            loss_y = cls_y + quantization_y 
 
            optimizer_txt.zero_grad()
            optimizer_cls.zero_grad()
            loss_y.backward()
            optimizer_txt.step()
            optimizer_cls.step()
        
        # Update B
        B = torch.sign(F_buffer + G_buffer)

        if opt.valid and True:
            mapi2t, mapt2i = evaluate(img_model, txt_model, query_x, query_y, retrieval_x, retrieval_y, query_L, retrieval_L, opt.bit)

             # save best model
            if mapi2t > best_mapi2t:
                print("best mapi2t, save model...")
                best_mapi2t = mapi2t
                txt_model.save(opt.load_txt_path)
                img_model.save(opt.load_img_path)

            print("{}".format(datetime.now()))
            print('%d...eval map: map(i->t): \033[1;32;40m%3.3f\033[0m, map(t->i): \033[1;32;40m%3.3f\033[0m' % (
                    epoch, mapi2t, mapt2i))

    print('...training procedure finish')
    mapi2t, mapt2i = evaluate(img_model, txt_model, query_x, query_y, retrieval_x, retrieval_y, query_L, retrieval_L, opt.bit)
    print('...test map: map(i->t): %3.3f, map(t->i): %3.3f' % (mapi2t, mapt2i))
Ejemplo n.º 6
0
def train(**kwargs):
    opt.parse(kwargs)

    images, tags, labels = load_data(opt.data_path)
    pretrain_model = load_pretrain_model(opt.pretrain_model_path)
    y_dim = tags.shape[1]

    X, Y, L = split_data(images, tags, labels)
    print('...loading and splitting data finish')

    img_model = ImgModule(opt.bit, pretrain_model)
    txt_model = TxtModule(y_dim, opt.bit)
    if opt.use_gpu:
        img_model = img_model.cuda()
        txt_model = txt_model.cuda()

    train_L = torch.from_numpy(L['train'])
    train_x = torch.from_numpy(X['train'])
    train_y = torch.from_numpy(Y['train'])

    query_L = torch.from_numpy(L['query'])
    query_x = torch.from_numpy(X['query'])
    query_y = torch.from_numpy(Y['query'])

    retrieval_L = torch.from_numpy(L['retrieval'])
    retrieval_x = torch.from_numpy(X['retrieval'])
    retrieval_y = torch.from_numpy(Y['retrieval'])

    num_train = train_x.shape[0]

    F_buffer = torch.randn(num_train, opt.bit)
    G_buffer = torch.randn(num_train, opt.bit)

    if opt.use_gpu:
        train_L = train_L.cuda()
        F_buffer = F_buffer.cuda()
        G_buffer = G_buffer.cuda()

    Sim = calc_neighbor(train_L, train_L)
    B = torch.sign(F_buffer + G_buffer)

    batch_size = opt.batch_size

    lr = opt.lr
    optimizer_img = SGD(img_model.parameters(), lr=lr)
    optimizer_txt = SGD(txt_model.parameters(), lr=lr)

    learning_rate = np.linspace(opt.lr, np.power(10, -6.), opt.max_epoch + 1)
    result = {'loss': []}

    ones = torch.ones(batch_size, 1)
    ones_ = torch.ones(num_train - batch_size, 1)
    unupdated_size = num_train - batch_size

    max_mapi2t = max_mapt2i = 0.

    for epoch in range(opt.max_epoch):
        # train image net
        for i in tqdm(range(num_train // batch_size)):
            index = np.random.permutation(num_train)
            ind = index[0:batch_size]
            unupdated_ind = np.setdiff1d(range(num_train), ind)

            sample_L = Variable(train_L[ind, :])
            image = Variable(train_x[ind].type(torch.float))
            if opt.use_gpu:
                image = image.cuda()
                sample_L = sample_L.cuda()
                ones = ones.cuda()
                ones_ = ones_.cuda()

            # similar matrix size: (batch_size, num_train)
            S = calc_neighbor(sample_L, train_L)  # S: (batch_size, num_train)
            cur_f = img_model(image)  # cur_f: (batch_size, bit)
            F_buffer[ind, :] = cur_f.data
            F = Variable(F_buffer)
            G = Variable(G_buffer)

            theta_x = 1.0 / 2 * torch.matmul(cur_f, G.t())
            logloss_x = -torch.sum(S * theta_x -
                                   torch.log(1.0 + torch.exp(theta_x)))
            quantization_x = torch.sum(torch.pow(B[ind, :] - cur_f, 2))
            balance_x = torch.sum(
                torch.pow(cur_f.t().mm(ones) + F[unupdated_ind].t().mm(ones_),
                          2))
            loss_x = logloss_x + opt.gamma * quantization_x + opt.eta * balance_x
            loss_x /= (batch_size * num_train)

            optimizer_img.zero_grad()
            loss_x.backward()
            optimizer_img.step()

        # train txt net
        for i in tqdm(range(num_train // batch_size)):
            index = np.random.permutation(num_train)
            ind = index[0:batch_size]
            unupdated_ind = np.setdiff1d(range(num_train), ind)

            sample_L = Variable(train_L[ind, :])
            text = train_y[ind, :].unsqueeze(1).unsqueeze(-1).type(torch.float)
            text = Variable(text)
            if opt.use_gpu:
                text = text.cuda()
                sample_L = sample_L.cuda()

            # similar matrix size: (batch_size, num_train)
            S = calc_neighbor(sample_L, train_L)  # S: (batch_size, num_train)
            cur_g = txt_model(text)  # cur_f: (batch_size, bit)
            G_buffer[ind, :] = cur_g.data
            F = Variable(F_buffer)
            G = Variable(G_buffer)

            # calculate loss
            # theta_y: (batch_size, num_train)
            theta_y = 1.0 / 2 * torch.matmul(cur_g, F.t())
            logloss_y = -torch.sum(S * theta_y -
                                   torch.log(1.0 + torch.exp(theta_y)))
            quantization_y = torch.sum(torch.pow(B[ind, :] - cur_g, 2))
            balance_y = torch.sum(
                torch.pow(cur_g.t().mm(ones) + G[unupdated_ind].t().mm(ones_),
                          2))
            loss_y = logloss_y + opt.gamma * quantization_y + opt.eta * balance_y
            loss_y /= (num_train * batch_size)

            optimizer_txt.zero_grad()
            loss_y.backward()
            optimizer_txt.step()

        # update B
        B = torch.sign(F_buffer + G_buffer)

        # calculate total loss
        loss = calc_loss(B, F, G, Variable(Sim), opt.gamma, opt.eta)

        print('...epoch: %3d, loss: %3.3f, lr: %f' %
              (epoch + 1, loss.data, lr))
        result['loss'].append(float(loss.data))

        if opt.valid:
            mapi2t, mapt2i = valid(img_model, txt_model, query_x, retrieval_x,
                                   query_y, retrieval_y, query_L, retrieval_L)
            print(
                '...epoch: %3d, valid MAP: MAP(i->t): %3.4f, MAP(t->i): %3.4f'
                % (epoch + 1, mapi2t, mapt2i))
            if mapt2i >= max_mapt2i and mapi2t >= max_mapi2t:
                max_mapi2t = mapi2t
                max_mapt2i = mapt2i
                img_model.save(img_model.module_name + '.pth')
                txt_model.save(txt_model.module_name + '.pth')

        lr = learning_rate[epoch + 1]

        # set learning rate
        for param in optimizer_img.param_groups:
            param['lr'] = lr
        for param in optimizer_txt.param_groups:
            param['lr'] = lr

    print('...training procedure finish')
    if opt.valid:
        print('   max MAP: MAP(i->t): %3.4f, MAP(t->i): %3.4f' %
              (max_mapi2t, max_mapt2i))
        result['mapi2t'] = max_mapi2t
        result['mapt2i'] = max_mapt2i
    else:
        mapi2t, mapt2i = valid(img_model, txt_model, query_x, retrieval_x,
                               query_y, retrieval_y, query_L, retrieval_L)
        print('   max MAP: MAP(i->t): %3.4f, MAP(t->i): %3.4f' %
              (mapi2t, mapt2i))
        result['mapi2t'] = mapi2t
        result['mapt2i'] = mapt2i

    write_result(result)