def siam_train(vectors, seq2seq_model, batch_size, layers, directory):
    first, sec, answ = readBcb(vectors + '/train')

    first_enc = seq2seq_model.get_encoder_status(first)
    sec_enc = seq2seq_model.get_encoder_status(sec)

    siam_model = SiameseNetwork(first_enc[0].shape[1], batch_size, layers)
    siam_model.train(first_enc, sec_enc, answ, directory)
    return siam_model
Exemple #2
0
def train(args):
    # basic arguments.
    ngpu = args.ngpu
    margin = args.margin
    num_epochs = args.num_epochs
    train_batch_size = args.train_batch_size
    test_batch_size = args.test_batch_size
    gamma = args.gamma # for learning rate decay

    root_dir = args.root_dir
    image_txt = args.image_txt
    train_test_split_txt = args.train_test_split_txt
    label_txt = args.label_txt
    ckpt_dir = args.ckpt_dir
    eval_step = args.eval_step


    pretrained = args.pretrained
    aux_logits = args.aux_logits
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    kargs = {'ngpu': ngpu, 'pretrained': pretrained, 'aux_logits':aux_logits}

    # network and loss
    siamese_network = SiameseNetwork(**kargs)
    gpu_number = torch.cuda.device_count()
    if device.type == 'cuda' and gpu_number > 1:
        siamese_network = nn.DataParallel(siamese_network, list(range(torch.cuda.device_count())))
    siamese_network.to(device)
    contrastive_loss = ContrastiveLoss(margin=margin)

    # params = siamese_network.parameters()
    # optimizer = optim.Adam(params, lr=0.0005)
    # optimizer = optim.SGD(params, lr=0.01, momentum=0.9)

    # using different lr
    optimizer = optim.SGD([
                       {'params': siamese_network.module.inception_v3.parameters() if gpu_number > 1 else siamese_network.inception_v3.parameters()},
                       {'params': siamese_network.module.main.parameters() if gpu_number > 1 else siamese_network.main.parameters(), 'lr': 1e-2}
                      ], lr=0.00001, momentum=0.9)

    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=gamma, last_epoch=-1)


    transform = transforms.Compose([transforms.Resize((299, 299)),
                                    transforms.CenterCrop(299),
                                    transforms.ToTensor(),
                                    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]
                                  )
    cub_dataset = CubDataset(root_dir, image_txt, train_test_split_txt, label_txt, transform=transform, is_train=True, offset=1)
    dataloader = DataLoader(dataset=cub_dataset, batch_size=train_batch_size, shuffle=True, num_workers=4)

    cub_dataset_eval = CubDataset(root_dir, image_txt, train_test_split_txt, label_txt, transform=transform, is_train=False, offset=1)
    dataloader_eval = DataLoader(dataset=cub_dataset_eval, batch_size=test_batch_size, shuffle=False, num_workers=4)

    for epoch in range(num_epochs):
        if epoch == 0:
            feature_set, label_set = get_feature_and_label(siamese_network, dataloader_eval, device)
            evaluation(feature_set, label_set)
        siamese_network.train()
        for i, data in enumerate(dataloader, 0):
            img_1, img_2, sim_label = data['img_1'].to(device), data['img_2'].to(device), data['sim_label'].type(torch.FloatTensor).to(device)
            optimizer.zero_grad()
            output_1, output_2 = siamese_network(img_1, img_2)
            loss = contrastive_loss(output_1, output_2, sim_label)
            loss.backward()
            optimizer.step()

            if i % 20 == 0 and i > 0:
                print("{}, Epoch [{:3d}/{:3d}], Iter [{:3d}/{:3d}], Current loss: {}".format(
                      datetime.datetime.now(), epoch, num_epochs, i, len(dataloader), loss.item()))
        if epoch % eval_step == 0:
            print("Start evalution")
            feature_set, label_set = get_feature_and_label(siamese_network, dataloader_eval, device)
            evaluation(feature_set, label_set)
            torch.save(siamese_network.module.state_dict(), os.path.join(ckpt_dir, 'model_' + str(epoch) +'_.pth'))
Exemple #3
0
                        num_workers=4)

cub_dataset_eval = CubDataset(root_dir,
                              image_txt,
                              train_test_split_txt,
                              label_txt,
                              transform=transform,
                              is_train=False,
                              offset=1)
dataloader_eval = DataLoader(dataset=cub_dataset_eval,
                             batch_size=test_batch_size,
                             shuffle=False,
                             num_workers=4)

for epoch in range(num_epochs):
    siamese_network.train()
    # feature_set, label_set = get_feature_and_label(siamese_network, dataloader_eval, device)
    for i, data in enumerate(dataloader, 0):
        img_1, img_2, sim_label = data['img_1'].to(device), data['img_2'].to(
            device), data['sim_label'].type(torch.FloatTensor).to(device)
        optimizer.zero_grad()
        output_1, output_2 = siamese_network(img_1, img_2)
        loss = contrastive_loss(output_1, output_2, sim_label)
        loss.backward()
        optimizer.step()

        if i % 20 == 0 and i > 0:
            print("Epoch [{:3d}/{:3d}], Iter [{:3d}/{:3d}], \
                  Current loss: {}".format(epoch, num_epochs, i,
                                           len(dataloader), loss.item()))
    if epoch % 10 == 0:
Exemple #4
0
    print("num of trainable_ parameter :", trainable_params)
    print("------------------------------------------------------------")

    # train
    for epoch in range(0, num_epochs):
        time_ = datetime.datetime.now()

        # sample train
        train_dataloader, _ = data_loader(root=DATASET_PATH,
                                          phase='train',
                                          batch_size=batch)
        for iter_, data in enumerate(train_dataloader, 0):
            iter1_, img0, iter2_, img1, label = data
            img0, img1, label = img0.cuda(), img1.cuda(), label.cuda()
            optimizer.zero_grad()
            model.train()

            output1, output2 = model(img0, img1)
            loss_contrastive = criterion(output1, output2, label)
            loss_contrastive.backward()
            optimizer.step()
            # cosine scheduler
            scheduler.step()
            if iter_ % print_iter == 0:
                elapsed = datetime.datetime.now() - time_
                expected = elapsed * (num_batches / print_iter)
                _epoch = epoch + ((iter_ + 1) / num_batches)
                print('[{:.3f}/{:d}] loss({}) '
                      'elapsed {} expected per epoch {}'.format(
                          _epoch, num_epochs, loss_contrastive.item(), elapsed,
                          expected))
Exemple #5
0
def train(args):
    # basic arguments.
    ngpu = args.ngpu
    margin = args.margin
    manual_seed = args.manual_seed
    torch.manual_seed(manual_seed)
    mean_value = args.mean_value
    std_value = args.std_value
    print("margin = {:5.2f}".format(margin))
    print("manual_seed = {:5.2f}".format(manual_seed))
    print("mean_value = {:5.2f}".format(mean_value))
    print("std_value = {:5.2f}".format(std_value))
    num_epochs = args.num_epochs
    train_batch_size = args.train_batch_size
    test_batch_size = args.test_batch_size
    gamma = args.gamma # for learning rate decay
    learning_rate = args.learning_rate
    learning_rate2 = args.learning_rate2


    loss_type = args.loss_type
    dataset_name = args.dataset_name
    pair_type = args.pair_type
    mode = args.mode
    weight_file = args.weight_file
    print("pair_type = {}".format(pair_type))
    print("loss_type = {}".format(loss_type))
    print("mode = {}".format(mode))
    print("weight_file = {}".format(weight_file))

    root_dir = args.root_dir
    image_txt = args.image_txt
    train_test_split_txt = args.train_test_split_txt
    label_txt = args.label_txt
    ckpt_dir = args.ckpt_dir
    eval_step = args.eval_step
    display_step = args.display_step
    embedding_size = args.embedding_size


    pretrained = args.pretrained
    aux_logits = args.aux_logits
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    kargs = {'ngpu': ngpu, 'pretrained': pretrained, 'aux_logits':aux_logits, 'embedding_size': embedding_size}

    # create directory
    model_dir = os.path.join(ckpt_dir, dataset_name, loss_type, str(int(embedding_size)))
    print("model_dir = {}".format(model_dir))
    if not os.path.isdir(model_dir):
        os.makedirs(model_dir)
    # network and loss
    siamese_network = SiameseNetwork(**kargs)


    first_group, second_group = siamese_network.separate_parameter_group()

    param_lr_dict = [
               {'params': first_group, 'lr': learning_rate2},
               {'params': second_group, 'lr': learning_rate}
              ]

    gpu_number = torch.cuda.device_count()
    if device.type == 'cuda' and gpu_number > 1:
        siamese_network = nn.DataParallel(siamese_network, list(range(torch.cuda.device_count())))
    siamese_network.to(device)

    # contrastive_loss = ContrastiveLoss(margin=margin)

    # params = siamese_network.parameters()

    print("args.optimizer = {:10s}".format(args.optimizer))
    print("learning_rate = {:5.5f}".format(learning_rate))
    print("learning_rate2 = {:5.5f}".format(learning_rate2))
    optimizer = configure_optimizer(param_lr_dict, optimizer=args.optimizer)

    # using different lr
    # scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=gamma, last_epoch=-1)


    transform = transforms.Compose([transforms.Resize((299, 299)),
                                    transforms.CenterCrop(299),
                                    transforms.ToTensor(),
                                    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]
                                  )

    if dataset_name == 'cub200':
        """
        print("dataset_name = {:10s}".format(dataset_name))
        print(root_dir)
        print(image_txt)
        print(train_test_split_txt)
        print(label_txt)
        """
        dataset_train = CubDataset(root_dir, image_txt, train_test_split_txt, label_txt, transform=transform, is_train=True, offset=1)
        dataset_eval = CubDataset(root_dir, image_txt, train_test_split_txt, label_txt, transform=transform, is_train=False, offset=1)
    elif dataset_name == 'online_product':
        """
        print("dataset_name = {:10s}".format(dataset_name))
        """
        dataset_train = OnlineProductDataset(root_dir, train_txt=image_txt, test_txt=train_test_split_txt, transform=transform, is_train=True, offset=1)
        dataset_eval = OnlineProductDataset(root_dir, train_txt=image_txt, test_txt=train_test_split_txt, transform=transform, is_train=False, offset=1)
    elif dataset_name == "car196":
        print("dataset_name = {}".format(dataset_name))
        dataset_train = CarDataset(root_dir, image_info_mat=image_txt, transform=transform, is_train=True, offset=1)
        dataset_eval = CarDataset(root_dir, image_info_mat=image_txt, transform=transform, is_train=False, offset=1)


    dataloader = DataLoader(dataset=dataset_train, batch_size=train_batch_size, shuffle=False, num_workers=4)
    dataloader_eval = DataLoader(dataset=dataset_eval, batch_size=test_batch_size, shuffle=False, num_workers=4)

    log_for_loss = []

    if mode == 'evaluation':
        print("Do one time evluation and exit")
        print("Load pretrained model")
        siamese_network.module.load_state_dict(torch.load(weight_file))
        print("Finish loading")
        print("Calculting features")
        feature_set, label_set, path_set = get_feature_and_label(siamese_network, dataloader_eval, device)
        rec_pre = evaluation(feature_set, label_set)
        # np.save("car196_rec_pre_ftl.npy", rec_pre)
        # for visualization
        sum_dict = {'feature': feature_set, 'label': label_set, 'path': path_set}
        np.save('car196_fea_label_path.npy', sum_dict)
        sys.exit()
    print("Finish eval")

    for epoch in range(num_epochs):
        if epoch == 0:
            feature_set, label_set, _ = get_feature_and_label(siamese_network, dataloader_eval, device)
            # distance_type: Euclidean or cosine
            rec_pre = evaluation(feature_set, label_set, distance_type='cosine')
        siamese_network.train()
        for i, data in enumerate(dataloader, 0):
            # img_1, img_2, sim_label = data['img_1'].to(device), data['img_2'].to(device), data['sim_label'].type(torch.FloatTensor).to(device)
            img_1, img_2, label_1, label_2 = data['img_1'].to(device), data['img_2'].to(device), data['label_1'].to(device), data['label_2'].to(device)
            optimizer.zero_grad()
            output_1, output_2 = siamese_network(img_1, img_2)
            pair_dist, pair_sim_label = calculate_distance_and_similariy_label(output_1, output_2, label_1, label_2, sqrt=True, pair_type=pair_type)
            if loss_type == "contrastive_loss":
                loss, positive_loss, negative_loss = contrastive_loss(pair_dist, pair_sim_label, margin)
            elif loss_type == "focal_contrastive_loss":
                loss, positive_loss, negative_loss = focal_contrastive_loss(pair_dist, pair_sim_label, margin, mean_value, std_value)
            elif loss_type == "triplet_loss":
                loss, positive_loss, negative_loss = triplet_loss(pair_dist, pair_sim_label, margin)
            elif loss_type == "focal_triplet_loss":
                loss, positive_loss, negative_loss = focal_triplet_loss(pair_dist, pair_sim_label, margin, mean_value, std_value)
            elif loss_type == "angular_loss":
                center_output = (output_1 + output_2)/2.
                pair_dist_2, _ = calculate_distance_and_similariy_label(center_output, output_2, label_1, label_2, sqrt=True, pair_type=pair_type)
                # angle margin is 45^o
                loss, positive_loss, negative_loss = angular_loss(pair_dist, pair_dist_2, pair_sim_label, 45)
            else:
                print("Unknown loss function")
                sys.exit()

            # try my own customized loss function
            # loss = contrastive_loss(output_1, output_2, pair_sim_label)
            loss.backward()
            optimizer.step()
            log_for_loss.append(loss.detach().item())
            if i % display_step == 0 and i > 0:
                print("{}, Epoch [{:3d}/{:3d}], Iter [{:3d}/{:3d}], Loss: {:6.5f}, Positive loss: {:6.5f}, Negative loss: {:6.5f}".format(
                      datetime.datetime.now(), epoch, num_epochs, i, len(dataloader), loss.item(), positive_loss.item(), negative_loss.item()))
        if epoch % eval_step == 0:
            print("Start evalution")
            # np.save(loss_type +'.npy', log_for_loss)
            feature_set, label_set, _ = get_feature_and_label(siamese_network, dataloader_eval, device)
            # distance_type: Euclidean or cosine
            rec_pre = evaluation(feature_set, label_set, distance_type='cosine')
            torch.save(siamese_network.module.state_dict(), os.path.join(model_dir, 'model_' + str(epoch) +'_.pth'))
Exemple #6
0
#  Dataset
print('loading Quora data...')
data = Quora(batch_size, input_size)
word_vocab_size = len(data.TEXT.vocab)
siamese = SiameseNetwork(input_size, word_vocab_size, hidden_size, num_layers,
                         data)

parameters = filter(lambda p: p.requires_grad, siamese.parameters())

# Loss and Optimizer
optimizer = torch.optim.Adam(parameters, lr=learning_rate)
criterion = nn.CrossEntropyLoss()

writer = SummaryWriter(log_dir='runs/' + model_time)

siamese.train()
loss, last_epoch = 0, -1
max_dev_acc, max_test_acc = 0, 0

best_model = copy.deepcopy(siamese)

# Train the Model
print('training start!')
iterator = data.train_iter
for i, batch in enumerate(iterator):
    present_epoch = int(iterator.epoch)
    if present_epoch == num_epochs:
        break
    if present_epoch > last_epoch:
        print('epoch:', present_epoch + 1)
    last_epoch = present_epoch