def siam_train(vectors, seq2seq_model, batch_size, layers, directory): first, sec, answ = readBcb(vectors + '/train') first_enc = seq2seq_model.get_encoder_status(first) sec_enc = seq2seq_model.get_encoder_status(sec) siam_model = SiameseNetwork(first_enc[0].shape[1], batch_size, layers) siam_model.train(first_enc, sec_enc, answ, directory) return siam_model
def train(args): # basic arguments. ngpu = args.ngpu margin = args.margin num_epochs = args.num_epochs train_batch_size = args.train_batch_size test_batch_size = args.test_batch_size gamma = args.gamma # for learning rate decay root_dir = args.root_dir image_txt = args.image_txt train_test_split_txt = args.train_test_split_txt label_txt = args.label_txt ckpt_dir = args.ckpt_dir eval_step = args.eval_step pretrained = args.pretrained aux_logits = args.aux_logits device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') kargs = {'ngpu': ngpu, 'pretrained': pretrained, 'aux_logits':aux_logits} # network and loss siamese_network = SiameseNetwork(**kargs) gpu_number = torch.cuda.device_count() if device.type == 'cuda' and gpu_number > 1: siamese_network = nn.DataParallel(siamese_network, list(range(torch.cuda.device_count()))) siamese_network.to(device) contrastive_loss = ContrastiveLoss(margin=margin) # params = siamese_network.parameters() # optimizer = optim.Adam(params, lr=0.0005) # optimizer = optim.SGD(params, lr=0.01, momentum=0.9) # using different lr optimizer = optim.SGD([ {'params': siamese_network.module.inception_v3.parameters() if gpu_number > 1 else siamese_network.inception_v3.parameters()}, {'params': siamese_network.module.main.parameters() if gpu_number > 1 else siamese_network.main.parameters(), 'lr': 1e-2} ], lr=0.00001, momentum=0.9) scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=gamma, last_epoch=-1) transform = transforms.Compose([transforms.Resize((299, 299)), transforms.CenterCrop(299), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])] ) cub_dataset = CubDataset(root_dir, image_txt, train_test_split_txt, label_txt, transform=transform, is_train=True, offset=1) dataloader = DataLoader(dataset=cub_dataset, batch_size=train_batch_size, shuffle=True, num_workers=4) cub_dataset_eval = CubDataset(root_dir, image_txt, train_test_split_txt, label_txt, transform=transform, is_train=False, offset=1) dataloader_eval = DataLoader(dataset=cub_dataset_eval, batch_size=test_batch_size, shuffle=False, num_workers=4) for epoch in range(num_epochs): if epoch == 0: feature_set, label_set = get_feature_and_label(siamese_network, dataloader_eval, device) evaluation(feature_set, label_set) siamese_network.train() for i, data in enumerate(dataloader, 0): img_1, img_2, sim_label = data['img_1'].to(device), data['img_2'].to(device), data['sim_label'].type(torch.FloatTensor).to(device) optimizer.zero_grad() output_1, output_2 = siamese_network(img_1, img_2) loss = contrastive_loss(output_1, output_2, sim_label) loss.backward() optimizer.step() if i % 20 == 0 and i > 0: print("{}, Epoch [{:3d}/{:3d}], Iter [{:3d}/{:3d}], Current loss: {}".format( datetime.datetime.now(), epoch, num_epochs, i, len(dataloader), loss.item())) if epoch % eval_step == 0: print("Start evalution") feature_set, label_set = get_feature_and_label(siamese_network, dataloader_eval, device) evaluation(feature_set, label_set) torch.save(siamese_network.module.state_dict(), os.path.join(ckpt_dir, 'model_' + str(epoch) +'_.pth'))
num_workers=4) cub_dataset_eval = CubDataset(root_dir, image_txt, train_test_split_txt, label_txt, transform=transform, is_train=False, offset=1) dataloader_eval = DataLoader(dataset=cub_dataset_eval, batch_size=test_batch_size, shuffle=False, num_workers=4) for epoch in range(num_epochs): siamese_network.train() # feature_set, label_set = get_feature_and_label(siamese_network, dataloader_eval, device) for i, data in enumerate(dataloader, 0): img_1, img_2, sim_label = data['img_1'].to(device), data['img_2'].to( device), data['sim_label'].type(torch.FloatTensor).to(device) optimizer.zero_grad() output_1, output_2 = siamese_network(img_1, img_2) loss = contrastive_loss(output_1, output_2, sim_label) loss.backward() optimizer.step() if i % 20 == 0 and i > 0: print("Epoch [{:3d}/{:3d}], Iter [{:3d}/{:3d}], \ Current loss: {}".format(epoch, num_epochs, i, len(dataloader), loss.item())) if epoch % 10 == 0:
print("num of trainable_ parameter :", trainable_params) print("------------------------------------------------------------") # train for epoch in range(0, num_epochs): time_ = datetime.datetime.now() # sample train train_dataloader, _ = data_loader(root=DATASET_PATH, phase='train', batch_size=batch) for iter_, data in enumerate(train_dataloader, 0): iter1_, img0, iter2_, img1, label = data img0, img1, label = img0.cuda(), img1.cuda(), label.cuda() optimizer.zero_grad() model.train() output1, output2 = model(img0, img1) loss_contrastive = criterion(output1, output2, label) loss_contrastive.backward() optimizer.step() # cosine scheduler scheduler.step() if iter_ % print_iter == 0: elapsed = datetime.datetime.now() - time_ expected = elapsed * (num_batches / print_iter) _epoch = epoch + ((iter_ + 1) / num_batches) print('[{:.3f}/{:d}] loss({}) ' 'elapsed {} expected per epoch {}'.format( _epoch, num_epochs, loss_contrastive.item(), elapsed, expected))
def train(args): # basic arguments. ngpu = args.ngpu margin = args.margin manual_seed = args.manual_seed torch.manual_seed(manual_seed) mean_value = args.mean_value std_value = args.std_value print("margin = {:5.2f}".format(margin)) print("manual_seed = {:5.2f}".format(manual_seed)) print("mean_value = {:5.2f}".format(mean_value)) print("std_value = {:5.2f}".format(std_value)) num_epochs = args.num_epochs train_batch_size = args.train_batch_size test_batch_size = args.test_batch_size gamma = args.gamma # for learning rate decay learning_rate = args.learning_rate learning_rate2 = args.learning_rate2 loss_type = args.loss_type dataset_name = args.dataset_name pair_type = args.pair_type mode = args.mode weight_file = args.weight_file print("pair_type = {}".format(pair_type)) print("loss_type = {}".format(loss_type)) print("mode = {}".format(mode)) print("weight_file = {}".format(weight_file)) root_dir = args.root_dir image_txt = args.image_txt train_test_split_txt = args.train_test_split_txt label_txt = args.label_txt ckpt_dir = args.ckpt_dir eval_step = args.eval_step display_step = args.display_step embedding_size = args.embedding_size pretrained = args.pretrained aux_logits = args.aux_logits device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') kargs = {'ngpu': ngpu, 'pretrained': pretrained, 'aux_logits':aux_logits, 'embedding_size': embedding_size} # create directory model_dir = os.path.join(ckpt_dir, dataset_name, loss_type, str(int(embedding_size))) print("model_dir = {}".format(model_dir)) if not os.path.isdir(model_dir): os.makedirs(model_dir) # network and loss siamese_network = SiameseNetwork(**kargs) first_group, second_group = siamese_network.separate_parameter_group() param_lr_dict = [ {'params': first_group, 'lr': learning_rate2}, {'params': second_group, 'lr': learning_rate} ] gpu_number = torch.cuda.device_count() if device.type == 'cuda' and gpu_number > 1: siamese_network = nn.DataParallel(siamese_network, list(range(torch.cuda.device_count()))) siamese_network.to(device) # contrastive_loss = ContrastiveLoss(margin=margin) # params = siamese_network.parameters() print("args.optimizer = {:10s}".format(args.optimizer)) print("learning_rate = {:5.5f}".format(learning_rate)) print("learning_rate2 = {:5.5f}".format(learning_rate2)) optimizer = configure_optimizer(param_lr_dict, optimizer=args.optimizer) # using different lr # scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=gamma, last_epoch=-1) transform = transforms.Compose([transforms.Resize((299, 299)), transforms.CenterCrop(299), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])] ) if dataset_name == 'cub200': """ print("dataset_name = {:10s}".format(dataset_name)) print(root_dir) print(image_txt) print(train_test_split_txt) print(label_txt) """ dataset_train = CubDataset(root_dir, image_txt, train_test_split_txt, label_txt, transform=transform, is_train=True, offset=1) dataset_eval = CubDataset(root_dir, image_txt, train_test_split_txt, label_txt, transform=transform, is_train=False, offset=1) elif dataset_name == 'online_product': """ print("dataset_name = {:10s}".format(dataset_name)) """ dataset_train = OnlineProductDataset(root_dir, train_txt=image_txt, test_txt=train_test_split_txt, transform=transform, is_train=True, offset=1) dataset_eval = OnlineProductDataset(root_dir, train_txt=image_txt, test_txt=train_test_split_txt, transform=transform, is_train=False, offset=1) elif dataset_name == "car196": print("dataset_name = {}".format(dataset_name)) dataset_train = CarDataset(root_dir, image_info_mat=image_txt, transform=transform, is_train=True, offset=1) dataset_eval = CarDataset(root_dir, image_info_mat=image_txt, transform=transform, is_train=False, offset=1) dataloader = DataLoader(dataset=dataset_train, batch_size=train_batch_size, shuffle=False, num_workers=4) dataloader_eval = DataLoader(dataset=dataset_eval, batch_size=test_batch_size, shuffle=False, num_workers=4) log_for_loss = [] if mode == 'evaluation': print("Do one time evluation and exit") print("Load pretrained model") siamese_network.module.load_state_dict(torch.load(weight_file)) print("Finish loading") print("Calculting features") feature_set, label_set, path_set = get_feature_and_label(siamese_network, dataloader_eval, device) rec_pre = evaluation(feature_set, label_set) # np.save("car196_rec_pre_ftl.npy", rec_pre) # for visualization sum_dict = {'feature': feature_set, 'label': label_set, 'path': path_set} np.save('car196_fea_label_path.npy', sum_dict) sys.exit() print("Finish eval") for epoch in range(num_epochs): if epoch == 0: feature_set, label_set, _ = get_feature_and_label(siamese_network, dataloader_eval, device) # distance_type: Euclidean or cosine rec_pre = evaluation(feature_set, label_set, distance_type='cosine') siamese_network.train() for i, data in enumerate(dataloader, 0): # img_1, img_2, sim_label = data['img_1'].to(device), data['img_2'].to(device), data['sim_label'].type(torch.FloatTensor).to(device) img_1, img_2, label_1, label_2 = data['img_1'].to(device), data['img_2'].to(device), data['label_1'].to(device), data['label_2'].to(device) optimizer.zero_grad() output_1, output_2 = siamese_network(img_1, img_2) pair_dist, pair_sim_label = calculate_distance_and_similariy_label(output_1, output_2, label_1, label_2, sqrt=True, pair_type=pair_type) if loss_type == "contrastive_loss": loss, positive_loss, negative_loss = contrastive_loss(pair_dist, pair_sim_label, margin) elif loss_type == "focal_contrastive_loss": loss, positive_loss, negative_loss = focal_contrastive_loss(pair_dist, pair_sim_label, margin, mean_value, std_value) elif loss_type == "triplet_loss": loss, positive_loss, negative_loss = triplet_loss(pair_dist, pair_sim_label, margin) elif loss_type == "focal_triplet_loss": loss, positive_loss, negative_loss = focal_triplet_loss(pair_dist, pair_sim_label, margin, mean_value, std_value) elif loss_type == "angular_loss": center_output = (output_1 + output_2)/2. pair_dist_2, _ = calculate_distance_and_similariy_label(center_output, output_2, label_1, label_2, sqrt=True, pair_type=pair_type) # angle margin is 45^o loss, positive_loss, negative_loss = angular_loss(pair_dist, pair_dist_2, pair_sim_label, 45) else: print("Unknown loss function") sys.exit() # try my own customized loss function # loss = contrastive_loss(output_1, output_2, pair_sim_label) loss.backward() optimizer.step() log_for_loss.append(loss.detach().item()) if i % display_step == 0 and i > 0: print("{}, Epoch [{:3d}/{:3d}], Iter [{:3d}/{:3d}], Loss: {:6.5f}, Positive loss: {:6.5f}, Negative loss: {:6.5f}".format( datetime.datetime.now(), epoch, num_epochs, i, len(dataloader), loss.item(), positive_loss.item(), negative_loss.item())) if epoch % eval_step == 0: print("Start evalution") # np.save(loss_type +'.npy', log_for_loss) feature_set, label_set, _ = get_feature_and_label(siamese_network, dataloader_eval, device) # distance_type: Euclidean or cosine rec_pre = evaluation(feature_set, label_set, distance_type='cosine') torch.save(siamese_network.module.state_dict(), os.path.join(model_dir, 'model_' + str(epoch) +'_.pth'))
# Dataset print('loading Quora data...') data = Quora(batch_size, input_size) word_vocab_size = len(data.TEXT.vocab) siamese = SiameseNetwork(input_size, word_vocab_size, hidden_size, num_layers, data) parameters = filter(lambda p: p.requires_grad, siamese.parameters()) # Loss and Optimizer optimizer = torch.optim.Adam(parameters, lr=learning_rate) criterion = nn.CrossEntropyLoss() writer = SummaryWriter(log_dir='runs/' + model_time) siamese.train() loss, last_epoch = 0, -1 max_dev_acc, max_test_acc = 0, 0 best_model = copy.deepcopy(siamese) # Train the Model print('training start!') iterator = data.train_iter for i, batch in enumerate(iterator): present_epoch = int(iterator.epoch) if present_epoch == num_epochs: break if present_epoch > last_epoch: print('epoch:', present_epoch + 1) last_epoch = present_epoch