def main(): # parameters learning_rate = 0.001 num_epochs = 100 batch_size = 20 batch_size_test = 20 feature_size = 2048 * 1 * 1 # create the save log file print("Create the directory") if not os.path.exists("./save"): os.makedirs("./save") if not os.path.exists("./logfile"): os.makedirs("./logfile") if not os.path.exists("./logfile/MTL"): os.makedirs("./logfile/MTL") # load my Dataset inf_csv_path = [ "./dataset_public/infograph/infograph_train.csv", "./dataset_public/infograph/infograph_test.csv" ] qdr_csv_path = [ "./dataset_public/quickdraw/quickdraw_train.csv", "./dataset_public/quickdraw/quickdraw_test.csv" ] skt_csv_path = [ "./dataset_public/sketch/sketch_train.csv", "./dataset_public/sketch/sketch_test.csv" ] rel_csv_path = ["./dataset_public/real/real_train.csv"] test_path = "./dataset_public/test" inf_train_dataset = Dataset.Dataset(csv_path=inf_csv_path[0], argu=True) inf_train_loader = DataLoader(inf_train_dataset, batch_size=batch_size, shuffle=True, num_workers=1) qdr_train_dataset = Dataset.Dataset(csv_path=qdr_csv_path[0], argu=True) qdr_train_loader = DataLoader(qdr_train_dataset, batch_size=batch_size, shuffle=True, num_workers=1) skt_train_dataset = Dataset.Dataset(csv_path=skt_csv_path[0], argu=True) skt_train_loader = DataLoader(skt_train_dataset, batch_size=batch_size, shuffle=True, num_workers=1) rel_train_dataset = Dataset.Dataset(csv_path=rel_csv_path[0], sample=True, argu=True) rel_train_loader = DataLoader(rel_train_dataset, batch_size=batch_size, shuffle=True, num_workers=1) valid_dataset = Dataset.Valid_Dataset(csv_path=rel_csv_path[0], sample=True) valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True, num_workers=1) test_dataset = Dataset.Dataset(csv_path=test_path, mode="test", filename=True) test_loader = DataLoader(test_dataset, batch_size=batch_size_test, shuffle=False, num_workers=1) print('the source dataset has %d size.' % (len(rel_train_dataset))) print('the valid dataset has %d size.' % (len(valid_dataset))) print('the target dataset has %d size.' % (len(test_dataset))) print('the batch_size is %d' % (batch_size)) # Pre-train models modules = list(models.resnet152(pretrained=True).children())[:-1] encoder = nn.Sequential(*modules) #encoder = model.Encoder() classifier_0 = model.Classifier(feature_size) classifier_1 = model.Classifier(feature_size) classifier_2 = model.Classifier(feature_size) moe_classifier = model.Moe_Classifier() domain_classifier_0 = model.Domain_classifier(feature_size, number_of_domain) domain_classifier_1 = model.Domain_classifier(feature_size, number_of_domain) domain_classifier_2 = model.Domain_classifier(feature_size, number_of_domain) # GPU enable use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") print('Device used:', device) if torch.cuda.is_available(): encoder = encoder.to(device) classifier_0 = classifier_0.to(device) classifier_1 = classifier_1.to(device) classifier_2 = classifier_2.to(device) moe_classifier = moe_classifier.to(device) domain_classifier_0 = domain_classifier_0.to(device) domain_classifier_1 = domain_classifier_1.to(device) domain_classifier_2 = domain_classifier_2.to(device) # setup optimizer """ optimizer_encoder = optim.SGD( list(encoder.parameters()),lr=learning_rate,momentum=0.9) optimizer_classifier = optim.SGD( list(classifier_0.parameters())+ list(classifier_1.parameters())+ list(classifier_2.parameters())+ list(moe_classifier.parameters()),lr=learning_rate,momentum=0.9) optimizer_domain = optim.SGD([{'params': domain_classifier_0.parameters()}, {'params': domain_classifier_1.parameters()}, {'params': domain_classifier_2.parameters()}], lr= learning_rate,momentum=0.9) """ optimizer_encoder = optim.Adam([ { 'params': encoder.parameters() }, ], weight_decay=1e-4, lr=learning_rate) optimizer_classifier = optim.Adam( list(classifier_0.parameters()) + list(classifier_1.parameters()) + list(classifier_2.parameters()) + list(moe_classifier.parameters()), weight_decay=1e-4, lr=learning_rate) optimizer_domain = optim.Adam([{ 'params': domain_classifier_0.parameters() }, { 'params': domain_classifier_1.parameters() }, { 'params': domain_classifier_2.parameters() }], lr=learning_rate, weight_decay=1e-4) #Lossfunction moe_criterion = nn.BCELoss() mtl_criterion = nn.NLLLoss() D_loss_list = [] L_loss_list = [] sum_src_acc_list = [] sum_trg_acc_list = [] sum_label_acc_loist = [] sum_test_acc_list = [] print("Starting training...") best_acc = 0. valid_acc = 0.0 for epoch in range(num_epochs): print("Epoch:", epoch + 1) encoder.train() classifier_0.train() classifier_1.train() classifier_2.train() moe_classifier.train() domain_classifier_0.train() domain_classifier_1.train() domain_classifier_2.train() epoch_D_loss = 0.0 epoch_C_loss = 0.0 epoch_C_moe_loss = 0.0 sum_src_acc = 0.0 sum_trg_acc = 0.0 sum_label_acc = 0.0 sum_test_acc = 0.0 tmp_valid_acc = 0.0 if (epoch + 1) in [5, 10, 15, 20, 25, 30, 35, 40, 45]: for optimizer_t in optimizer_domain.param_groups: optimizer_t['lr'] /= 1.7 optimizer_encoder.param_groups[0]['lr'] /= 1.7 optimizer_classifier.param_groups[0]['lr'] /= 1.7 train_loader = [ inf_train_loader, qdr_train_loader, skt_train_loader, rel_train_loader ] len_loader = min([ len(train_loader[0]), len(train_loader[1]), len(train_loader[2]), len(train_loader[3]) ]) for index, (inf, qdr, skt, rel, test) in enumerate( zip(train_loader[0], train_loader[1], train_loader[2], train_loader[3], valid_loader)): optimizer_classifier.zero_grad() optimizer_encoder.zero_grad() optimizer_domain.zero_grad() # colculate the lambda_ p = (index + len_loader * epoch) / (len_loader * num_epochs) lambda_ = 5.0 / (1. + np.exp(-10 * p)) - 1.0 s1_imgs, s1_labels = skt s2_imgs, s2_labels = qdr s3_imgs, s3_labels = inf t1_imgs, _ = rel from_s1_labels = Variable(torch.zeros(len(s1_imgs))).to(device) from_s2_labels = Variable(torch.zeros(len(s2_imgs))).to(device) from_s3_labels = Variable(torch.zeros(len(s3_imgs))).to(device) from_t1_labels = Variable(torch.ones(len(t1_imgs))).to(device) s1_imgs = Variable(s1_imgs).to(device) s1_labels = Variable(s1_labels.view(-1)).to(device) s2_imgs = Variable(s2_imgs).to(device) s2_labels = Variable(s2_labels.view(-1)).to(device) s3_imgs = Variable(s3_imgs).to(device) s3_labels = Variable(s3_labels.view(-1)).to(device) t1_imgs = Variable(t1_imgs).to(device) s1_feature = encoder(s1_imgs) s2_feature = encoder(s2_imgs) s3_feature = encoder(s3_imgs) t1_feature = encoder(t1_imgs) # Testing test_imgs, test_labels = test test_imgs = Variable(test_imgs).to(device) test_labels = Variable(test_labels.view(-1)).to(device) test_feature = encoder(test_imgs) test_output_0 = classifier_0(test_feature) test_output_1 = classifier_1(test_feature) test_output_2 = classifier_2(test_feature) test_output = moe_classifier(test_output_0, test_output_1, test_output_2) test_preds = test_output.argmax(1).cpu() test_acc = np.mean( (test_preds.detach().cpu() == test_labels.cpu()).numpy()) tmp_valid_acc += test_acc # Training Classifier network (loss_mtl) s1_output = classifier_0(s1_feature) s2_output = classifier_1(s2_feature) s3_output = classifier_2(s3_feature) s1_preds = s1_output.argmax(1).cpu() s2_preds = s2_output.argmax(1).cpu() s3_preds = s3_output.argmax(1).cpu() s1_acc = np.mean( (s1_preds.detach().cpu() == s1_labels.cpu()).numpy()) s2_acc = np.mean( (s2_preds.detach().cpu() == s2_labels.cpu()).numpy()) s3_acc = np.mean( (s3_preds.detach().cpu() == s3_labels.cpu()).numpy()) s1_c_loss = mtl_criterion(s1_output, s1_labels) s2_c_loss = mtl_criterion(s2_output, s2_labels) s3_c_loss = mtl_criterion(s3_output, s3_labels) mtl_loss = s1_c_loss + s2_c_loss + s3_c_loss # Domain_classifier network with source domain (loss_adv) s1_domain_output = domain_classifier_0(s1_feature, lambda_) s2_domain_output = domain_classifier_1(s2_feature, lambda_) s3_domain_output = domain_classifier_2(s3_feature, lambda_) s1_domain_acc = np.mean( (s1_domain_output.detach().cpu() <= 0.5).numpy()) s2_domain_acc = np.mean( (s2_domain_output.detach().cpu() <= 0.5).numpy()) s3_domain_acc = np.mean( (s3_domain_output.detach().cpu() <= 0.5).numpy()) s1_d_loss = moe_criterion(s1_domain_output, from_s1_labels) s2_d_loss = moe_criterion(s2_domain_output, from_s2_labels) s3_d_loss = moe_criterion(s3_domain_output, from_s3_labels) D_loss_src = s1_d_loss + s2_d_loss + s3_d_loss #print(D_loss_src.item()) # Domain_classifier network with target domain (loss_adv) t1_domain_0_output = domain_classifier_0(t1_feature, lambda_) t1_domain_1_output = domain_classifier_1(t1_feature, lambda_) t1_domain_2_output = domain_classifier_2(t1_feature, lambda_) t1_domain_0_acc = np.mean( (t1_domain_0_output.detach().cpu() > 0.5).numpy()) t1_domain_1_acc = np.mean( (t1_domain_1_output.detach().cpu() > 0.5).numpy()) t1_domain_2_acc = np.mean( (t1_domain_2_output.detach().cpu() > 0.5).numpy()) D0_loss_trg = moe_criterion(t1_domain_0_output, from_t1_labels) D1_loss_trg = moe_criterion(t1_domain_1_output, from_t1_labels) D2_loss_trg = moe_criterion(t1_domain_2_output, from_t1_labels) D_loss_trg = D0_loss_trg + D1_loss_trg + D2_loss_trg adv_loss = D_loss_src + D_loss_trg # Moe combination s1_output_0 = classifier_0(s1_feature) s1_output_1 = classifier_1(s1_feature) s1_output_2 = classifier_2(s1_feature) s2_output_0 = classifier_0(s2_feature) s2_output_1 = classifier_1(s2_feature) s2_output_2 = classifier_2(s2_feature) s3_output_0 = classifier_0(s3_feature) s3_output_1 = classifier_1(s3_feature) s3_output_2 = classifier_2(s3_feature) s1_output_moe = moe_classifier(s1_output_0, s1_output_1, s1_output_2) s2_output_moe = moe_classifier(s2_output_0, s2_output_1, s2_output_2) s3_output_moe = moe_classifier(s3_output_0, s3_output_1, s3_output_2) s1_preds_moe = s1_output_moe.argmax(1).cpu() s2_preds_moe = s2_output_moe.argmax(1).cpu() s3_preds_moe = s3_output_moe.argmax(1).cpu() s1_acc = np.mean( (s1_preds_moe.detach().cpu() == s1_labels.cpu()).numpy()) s2_acc = np.mean( (s2_preds_moe.detach().cpu() == s2_labels.cpu()).numpy()) s3_acc = np.mean( (s3_preds_moe.detach().cpu() == s3_labels.cpu()).numpy()) s1_c_loss_moe = mtl_criterion(s1_output_moe, s1_labels) s2_c_loss_moe = mtl_criterion(s2_output_moe, s2_labels) s3_c_loss_moe = mtl_criterion(s3_output_moe, s3_labels) moe_loss = s1_c_loss_moe + s2_c_loss_moe + s3_c_loss_moe moe_acc = (s1_acc + s2_acc + s3_acc) / 3. loss = 0.4 * mtl_loss + 0.4 * moe_loss + 2 * adv_loss epoch_D_loss += adv_loss.item() epoch_C_loss += mtl_loss.item() epoch_C_moe_loss += moe_loss.item() D_trg_acc = (t1_domain_0_acc + t1_domain_1_acc + t1_domain_2_acc) / 3. D_src_acc = (s1_domain_acc + s2_domain_acc + s3_domain_acc) / 3. loss.backward() optimizer_domain.step() optimizer_classifier.step() optimizer_encoder.step() if (index + 1) % 10 == 0: print( 'Epoch [%d/%d], Iter [%d/%d] C_loss %.4f , D_loss %.4f ,LR: %.5f' % (epoch + 1, num_epochs, index + 1, len_loader, mtl_loss.item(), adv_loss.item(), optimizer_encoder.param_groups[0]['lr'])) print( "====> Acc %.4f %.4f %.4f Domain Acc: %.4f %.4f ,Test Acc: %.4f" % (s1_acc, s2_acc, s3_acc, D_src_acc, D_trg_acc, test_acc)) print("====> Loss_moe %.4f ,Moe acc %.4f" % (moe_loss.item(), moe_acc)) print('Validing: Acc %.4f ' % (tmp_valid_acc / len_loader)) if tmp_valid_acc / len_loader > best_acc: best_acc = tmp_valid_acc / len_loader print('Find best: Acc %.6f ' % (best_acc)) print('Best: Acc %.6f ' % (best_acc)) if tmp_valid_acc / len_loader >= best_acc and epoch + 1 > 5 and tmp_valid_acc / len_loader > 0.3: best_acc = tmp_valid_acc / len_loader print('Find best: Acc %.6f ' % (best_acc)) save_checkpoint( './save/encoder-multiD-rel-%.4f-%03i.pth' % (best_acc, epoch), encoder) save_checkpoint( './save/classifier-multiD-rel-%.4f-%03i.pth' % (best_acc, epoch), moe_classifier) save_checkpoint( './save/classifier-multiD-rel-%.4f-%03i.pth' % (best_acc, epoch), classifier_0) save_checkpoint( './save/classifier-multiD-rel-%.4f-%03i.pth' % (best_acc, epoch), classifier_1) save_checkpoint( './save/classifier-multiD-rel-%.4f-%03i.pth' % (best_acc, epoch), classifier_2) test_acc = 0. test_loss = 0. encoder.eval() moe_classifier.eval() classifier_0.eval() classifier_1.eval() classifier_2.eval() domain_classifier_0.eval() domain_classifier_1.eval() domain_classifier_2.eval() filename_save = [] output_save = [] for index, (imgs, filenames) in enumerate(test_loader): output_list = [] loss_mtl = [] imgs = Variable(imgs).to(device) #labels = Variable(labels.view(-1)).to(device) hidden = encoder(imgs) tmp_0 = classifier_0(hidden) tmp_1 = classifier_1(hidden) tmp_2 = classifier_2(hidden) output = moe_classifier(tmp_0, tmp_1, tmp_2) preds = output.argmax(1).cpu() #acc = np.mean((preds.detach().cpu() == labels.cpu()).numpy()) for filename in filenames: filename_save.append(filename[-10:]) for out in preds.detach().cpu(): output_save.append(out) #test_acc += acc #if index % 500 == 0: #print(acc) # save csv file = open("./submission/multi_01_rel.csv", "w") file.write("image_name,label\n") for i, (filename, data) in enumerate(zip(filename_save, output_save)): file.write("test/%s,%d\n" % (filename, int(data))) return 0
def main(): batch_size = 2 batch_size_test = 2 feature_size = 2048 * 1 * 1 print("Check img size!!!!!!!!!!!!") # load my Dataset inf_csv_path = [ "./dataset_public/infograph/infograph_train.csv", "./dataset_public/infograph/infograph_test.csv" ] qdr_csv_path = [ "./dataset_public/quickdraw/quickdraw_train.csv", "./dataset_public/quickdraw/quickdraw_test.csv" ] skt_csv_path = [ "./dataset_public/sketch/sketch_train.csv", "./dataset_public/sketch/sketch_test.csv" ] rel_csv_path = ["./dataset_public/real/real_train.csv"] test_path = "./dataset_public/test" test_dataset = Dataset.Dataset(csv_path=qdr_csv_path[1], filename=True) test_loader = DataLoader(test_dataset, batch_size=batch_size_test, shuffle=False, num_workers=1) print('the target dataset has %d size.' % (len(test_loader))) print('the batch_size is %d' % (batch_size)) # Pre-train models modules = list(models.resnet50(pretrained=True).children())[:-1] encoder = nn.Sequential(*modules) #encoder = model.Encoder() classifier = model.Classifier(feature_size) # GPU enable use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") print('Device used:', device) if torch.cuda.is_available(): encoder = encoder.to(device) classifier = classifier.to(device) #loading models encoder_path = sys.argv[1] classifier_path = sys.argv[2] load_checkpoint(encoder_path, encoder) load_checkpoint(classifier_path, classifier) encoder.eval() classifier.eval() filename_save = [] output_save = [] total_acc = 0. with torch.no_grad(): for index, (imgs, labels, filenames) in enumerate(test_loader): print("\r%d/%d" % (index + 1, len(test_loader)), end="") output_list = [] imgs = Variable(imgs).to(device) labels = Variable(labels.view(-1)).to(device) hidden = encoder(imgs) output = classifier(hidden) preds = output.argmax(1).cpu() acc = np.mean((preds.detach().cpu() == labels.cpu()).numpy()) total_acc += acc for filename in filenames: filename_save.append(filename[-10:]) for out in preds.detach().cpu(): output_save.append(out) total_acc = total_acc / len(test_loader) print("Acc = ", total_acc) print(len(filename_save)) print(len(output_save)) # save csv file = open("./submission/pred_qdr.csv", "w") file.write("image_name,label\n") for i, (filename, data) in enumerate(zip(filename_save, output_save)): file.write("test/%s,%d\n" % (filename, int(data))) return 0
def main(): # parameters learning_rate = 0.001 num_epochs = 30 batch_size = 30 batch_size_test = 30 feature_size = 2048 * 1 * 1 #learning_rate = float(sys.argv[1]) # create the save log file print("Create the directory") if not os.path.exists("./save"): os.makedirs("./save") if not os.path.exists("./logfile"): os.makedirs("./logfile") if not os.path.exists("./logfile/MTL"): os.makedirs("./logfile/MTL") # load my Dataset inf_csv_path = [ "./dataset_public/infograph/infograph_train.csv", "./dataset_public/infograph/infograph_test.csv" ] qdr_csv_path = [ "./dataset_public/quickdraw/quickdraw_train.csv", "./dataset_public/quickdraw/quickdraw_test.csv" ] skt_csv_path = [ "./dataset_public/sketch/sketch_train.csv", "./dataset_public/sketch/sketch_test.csv" ] rel_csv_path = ["./dataset_public/real/real_train.csv"] test_path = "./dataset_public/test" inf_train_dataset = Dataset.Dataset(csv_path=inf_csv_path[0], argu=True) inf_train_loader = DataLoader(inf_train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) qdr_train_dataset = Dataset.Dataset(csv_path=qdr_csv_path[0], argu=True) qdr_train_loader = DataLoader(qdr_train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) skt_train_dataset = Dataset.Dataset(csv_path=skt_csv_path[0], argu=True) skt_train_loader = DataLoader(skt_train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) rel_train_dataset = Dataset.Dataset(csv_path=rel_csv_path[0], sample=True, argu=True) rel_train_loader = DataLoader(rel_train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) valid_dataset = Dataset.Valid_Dataset(csv_path=rel_csv_path[0], sample=True) valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True, num_workers=4) test_dataset = Dataset.Dataset(csv_path=test_path, mode="test", filename=True) test_loader = DataLoader(test_dataset, batch_size=batch_size_test, shuffle=False, num_workers=4) print('the source dataset has %d size.' % (len(rel_train_dataset))) print('the valid dataset has %d size.' % (len(valid_dataset))) print('the target dataset has %d size.' % (len(test_loader))) print('the batch_size is %d' % (batch_size)) # Pre-train models modules = list(models.resnet152(pretrained=True).children())[:-1] encoder = nn.Sequential(*modules) #encoder = model.Encoder() classifier = model.Classifier(feature_size) domain_classifier_0 = model.Domain_classifier_0(feature_size, number_of_domain) domain_classifier_1 = model.Domain_classifier_1(feature_size, number_of_domain) domain_classifier_2 = model.Domain_classifier_2(feature_size, number_of_domain) # GPU enable use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") print('Device used:', device) if torch.cuda.is_available(): encoder = encoder.to(device) classifier = classifier.to(device) domain_classifier_0 = domain_classifier_0.to(device) domain_classifier_1 = domain_classifier_1.to(device) domain_classifier_2 = domain_classifier_2.to(device) # setup optimizer optimizer_encoder = optim.SGD(list(encoder.parameters()) + list(classifier.parameters()), lr=learning_rate, momentum=0.9) optimizer_classifier = optim.SGD(list(classifier.parameters()), lr=learning_rate, momentum=0.9) optimizer_domain = optim.Adam([{ 'params': domain_classifier_0.parameters() }, { 'params': domain_classifier_1.parameters() }, { 'params': domain_classifier_2.parameters() }], lr=learning_rate, weight_decay=1e-4) """ optimizer_encoder = optim.Adam([{'params': encoder.parameters()}, {'params': classifier.parameters()}], weight_decay = 1e-4 , lr= learning_rate) optimizer_classifier = optim.Adam(classifier.parameters(), weight_decay = 1e-4 , lr= learning_rate) optimizer_domain = optim.Adam([{'params': domain_classifier_0.parameters()}, {'params': domain_classifier_1.parameters()}, {'params': domain_classifier_2.parameters()}], lr= learning_rate, weight_decay = 1e-4 ) """ #loading models #if input("Loading pre- file?(T/F)") == "T": # encoder_path = sys.argv[2] # classifier_path = sys.argv[3] # load_checkpoint(encoder_path,encoder) # load_checkpoint(classifier_path,classifier) #Lossfunction moe_criterion = nn.CrossEntropyLoss() mtl_criterion = nn.NLLLoss() D_loss_list = [] L_loss_list = [] sum_src_acc_list = [] sum_trg_acc_list = [] sum_label_acc_loist = [] sum_test_acc_list = [] print("Starting training...") best_acc = 0. valid_acc = 0.0 for epoch in range(num_epochs): print("Epoch:", epoch + 1) encoder.train() classifier.train() domain_classifier_0.train() domain_classifier_1.train() domain_classifier_2.train() epoch_D_loss = 0.0 epoch_C_loss = 0.0 domain_acc_src = 0.0 domain_acc_trg = 0.0 sum_trg_acc = 0.0 sum_label_acc = 0.0 sum_test_acc = 0.0 tmp_valid_acc = 0.0 if (epoch + 1) in [5, 10, 15, 20, 25, 30, 35, 40, 45]: for optimizer_t in optimizer_domain.param_groups: optimizer_t['lr'] /= 1.7 optimizer_encoder.param_groups[0]['lr'] /= 1.7 optimizer_classifier.param_groups[0]['lr'] /= 1.7 train_loader = [ inf_train_loader, qdr_train_loader, skt_train_loader, rel_train_loader ] len_loader = min([ len(train_loader[0]), len(train_loader[1]), len(train_loader[2]), len(train_loader[3]) ]) sum_acc_1 = 0. sum_acc_2 = 0. sum_acc_3 = 0. for index, (inf, qdr, skt, rel, test) in enumerate( zip(train_loader[0], train_loader[1], train_loader[2], train_loader[3], valid_loader)): optimizer_classifier.zero_grad() optimizer_encoder.zero_grad() optimizer_domain.zero_grad() # colculate the lambda_ p = (index + len_loader * epoch) / (len_loader * num_epochs) lambda_ = 2.0 / (1. + np.exp(-10 * p)) - 1.0 s1_imgs, s1_labels = skt s2_imgs, s2_labels = inf s3_imgs, s3_labels = qdr t1_imgs, _ = rel from_s1_labels = Variable( torch.LongTensor([0 for i in range(len(s1_imgs))])).to(device) from_s2_labels = Variable( torch.LongTensor([0 for i in range(len(s2_imgs))])).to(device) from_s3_labels = Variable( torch.LongTensor([0 for i in range(len(s3_imgs))])).to(device) from_t1_labels = Variable( torch.LongTensor([1 for i in range(len(t1_imgs))])).to(device) s1_imgs = Variable(s1_imgs).to(device) s1_labels = Variable(s1_labels.view(-1)).to(device) s2_imgs = Variable(s2_imgs).to(device) s2_labels = Variable(s2_labels.view(-1)).to(device) s3_imgs = Variable(s3_imgs).to(device) s3_labels = Variable(s3_labels.view(-1)).to(device) t1_imgs = Variable(t1_imgs).to(device) s1_feature = encoder(s1_imgs) s2_feature = encoder(s2_imgs) s3_feature = encoder(s3_imgs) t1_feature = encoder(t1_imgs) if len(s1_feature) < 2 or len(s2_feature) < 2 or len( s3_feature) < 2 or len(t1_feature) < 2: break # Training Classifier network (loss_mtl) s1_output = classifier(s1_feature) s2_output = classifier(s2_feature) s3_output = classifier(s3_feature) s1_preds = s1_output.argmax(1).cpu() s2_preds = s2_output.argmax(1).cpu() s3_preds = s3_output.argmax(1).cpu() s1_acc = np.mean( (s1_preds.detach().cpu() == s1_labels.cpu()).numpy()) s2_acc = np.mean( (s2_preds.detach().cpu() == s2_labels.cpu()).numpy()) s3_acc = np.mean( (s3_preds.detach().cpu() == s3_labels.cpu()).numpy()) s1_c_loss = mtl_criterion(s1_output, s1_labels) s2_c_loss = mtl_criterion(s2_output, s2_labels) s3_c_loss = mtl_criterion(s3_output, s3_labels) mtl_loss = 1 * (1 * s1_c_loss + 1. * s2_c_loss + 1. * s3_c_loss) s_acc = s1_acc + s2_acc + s3_acc # Domain_classifier network with source domain (loss_adv) s1_domain_output = domain_classifier_0(s1_feature, lambda_) s2_domain_output = domain_classifier_1(s2_feature, lambda_) s3_domain_output = domain_classifier_2(s3_feature, lambda_) #if index == 10: # print(s1_domain_preds) s1_domain_acc = np.mean( (s1_domain_output.argmax(1).cpu() == from_s1_labels.cpu() ).numpy()) s2_domain_acc = np.mean( (s2_domain_output.argmax(1).cpu() == from_s2_labels.cpu() ).numpy()) s3_domain_acc = np.mean( (s3_domain_output.argmax(1).cpu() == from_s3_labels.cpu() ).numpy()) #print(s1_domain_output.shape) #print(s1_domain_output[0]) s1_d_loss = moe_criterion(s1_domain_output, from_s1_labels) s2_d_loss = moe_criterion(s2_domain_output, from_s2_labels) s3_d_loss = moe_criterion(s3_domain_output, from_s3_labels) #D_loss_src = 1 * s1_d_loss + s2_d_loss + 1* s3_d_loss #print(D_loss_src.item()) # Domain_classifier network with target domain (loss_adv) t1_domain_0_output = domain_classifier_0(t1_feature, lambda_) t1_domain_1_output = domain_classifier_1(t1_feature, lambda_) t1_domain_2_output = domain_classifier_2(t1_feature, lambda_) t1_domain_0_acc = np.mean( (t1_domain_0_output.argmax(1).cpu() == from_t1_labels.cpu() ).numpy()) t1_domain_1_acc = np.mean( (t1_domain_1_output.argmax(1).cpu() == from_t1_labels.cpu() ).numpy()) t1_domain_2_acc = np.mean( (t1_domain_2_output.argmax(1).cpu() == from_t1_labels.cpu() ).numpy()) D0_loss_trg = moe_criterion(t1_domain_0_output, from_t1_labels) D1_loss_trg = moe_criterion(t1_domain_1_output, from_t1_labels) D2_loss_trg = moe_criterion(t1_domain_2_output, from_t1_labels) D_loss_trg = (1 * D0_loss_trg + D1_loss_trg + 1 * D2_loss_trg) / 2.9 D_s1t1_loss = s1_d_loss D_s2t1_loss = s2_d_loss D_s3t1_loss = s3_d_loss if D_s1t1_loss > D_s2t1_loss and D_s1t1_loss > D_s3t1_loss: adv_loss = D_s1t1_loss + D_loss_trg if D_s2t1_loss > D_s1t1_loss and D_s2t1_loss > D_s3t1_loss: adv_loss = D_s2t1_loss + D_loss_trg if D_s3t1_loss > D_s1t1_loss and D_s3t1_loss > D_s2t1_loss: adv_loss = D_s3t1_loss + D_loss_trg #adv_loss = D_loss_src + D_loss_trg loss = 1. * mtl_loss + adv_loss epoch_D_loss += adv_loss.item() epoch_C_loss += mtl_loss.item() D_trg_acc = (t1_domain_0_acc + t1_domain_1_acc + t1_domain_2_acc) / 3. D_src_acc = (s1_domain_acc + s2_domain_acc + s3_domain_acc) / 3. sum_acc_1 += s1_acc sum_acc_2 += s2_acc sum_acc_3 += s3_acc domain_acc_src += D_src_acc domain_acc_trg += D_trg_acc #mtl_loss.backward() #adv_loss.backward() loss.backward() optimizer_classifier.step() optimizer_encoder.step() optimizer_domain.step() if (index + 1) % 10 == 0: print( 'Epoch [%d/%d], Iter [%d/%d] C_loss %.4f , D_loss %.4f ,LR: %.5f' % (epoch + 1, num_epochs, index + 1, len_loader, epoch_C_loss / (index + 1), epoch_D_loss / (index + 1), optimizer_domain.param_groups[0]['lr'])) print( "====> Acc %.4f %.4f %.4f Domain Acc: %.4f %.4f ,Test Acc: %.4f" % (sum_acc_1 / (index + 1), sum_acc_2 / (index + 1), sum_acc_3 / (index + 1), domain_acc_src / (index + 1), domain_acc_trg / (index + 1), tmp_valid_acc / (index + 1))) # Testing encoder.eval() classifier.eval() with torch.no_grad(): test_imgs, test_labels = test test_imgs = Variable(test_imgs).to(device) test_labels = Variable(test_labels.view(-1)).to(device) test_feature = encoder(test_imgs) test_output = classifier(test_feature) test_preds = test_output.argmax(1).cpu() test_acc = np.mean( (test_preds.detach().cpu() == test_labels.cpu()).numpy()) tmp_valid_acc += test_acc encoder.train() classifier.train() s1_avg_acc = sum_acc_1 / len_loader s2_avg_acc = sum_acc_2 / len_loader s3_avg_acc = sum_acc_3 / len_loader print('Validing: Acc %.4f ' % (tmp_valid_acc / len_loader)) print('Avg sur: Acc %.4f,%.4f,%.4f ' % (s1_avg_acc, s2_avg_acc, s3_avg_acc)) if tmp_valid_acc / len_loader > best_acc: best_acc = tmp_valid_acc / len_loader print('Find best: Acc %.6f ' % (best_acc)) save_checkpoint( './save/encoder-%.4f-%.4f-%.4f.pth' % (s1_avg_acc, s2_avg_acc, s3_avg_acc), encoder) save_checkpoint( './save/domain-%.4f-%.4f.pth' % (domain_acc_src / (index + 1), domain_acc_trg / (index + 1)), domain_classifier_0) save_checkpoint( './save/classifier-%.4f-%.4f-%.4f.pth' % (s1_avg_acc, s2_avg_acc, s3_avg_acc), classifier) print('Best: Acc %.6f Avg sur: Acc %.4f,%.4f,%.4f' % (best_acc, s1_avg_acc, s2_avg_acc, s3_avg_acc)) if tmp_valid_acc / len_loader > 0.3: best_acc = tmp_valid_acc / len_loader print('Find best: Acc %.6f ' % (best_acc)) save_checkpoint('./save/encoder--%.4f.pth' % (best_acc), encoder) save_checkpoint('./save/classifier--%.4f.pth' % (best_acc), classifier) test_acc = 0. test_loss = 0. encoder.eval() classifier.eval() domain_classifier_0.eval() domain_classifier_1.eval() domain_classifier_2.eval() filename_save = [] output_save = [] with torch.no_grad(): for index, (imgs, filenames) in enumerate(test_loader): output_list = [] loss_mtl = [] imgs = Variable(imgs).to(device) #labels = Variable(labels.view(-1)).to(device) hidden = encoder(imgs) output = classifier(hidden) preds = output.argmax(1).cpu() #acc = np.mean((preds.detach().cpu() == labels.cpu()).numpy()) for filename in filenames: filename_save.append(filename[-10:]) for out in preds.detach().cpu(): output_save.append(out) #test_acc += acc #if index % 500 == 0: #print(acc) # save csv file = open("./submission/multi_01_rel.csv", "w") file.write("image_name,label\n") for i, (filename, data) in enumerate(zip(filename_save, output_save)): file.write("test/%s,%d\n" % (filename, int(data))) return 0
def main(): # parameters learning_rate = 0.001 num_epochs = 50 batch_size_train = 200 batch_size_test = 100 feature_size = 512 * 1 * 1 # load my Dataset inf_csv_path = [ "./dataset_public/infograph/infograph_train.csv", "./dataset_public/infograph/infograph_test.csv" ] qdr_csv_path = [ "./dataset_public/quickdraw/quickdraw_train.csv", "./dataset_public/quickdraw/quickdraw_test.csv" ] skt_csv_path = [ "./dataset_public/sketch/sketch_train.csv", "./dataset_public/sketch/sketch_test.csv" ] rel_csv_path = ["./dataset_public/real/real_train.csv"] test_path = "./dataset_public/test" source = rel_csv_path[0] test = rel_csv_path[0] print("source from : ", source) print("test from : ", test) inf_train_dataset = Dataset.Dataset(csv_path=source, sample=True) s_train_loader = DataLoader(inf_train_dataset, batch_size=batch_size_train, shuffle=True, num_workers=1) test_dataset = Dataset.Valid_Dataset(csv_path=test) #test_dataset = Dataset.Dataset(csv_path = test) test_loader = DataLoader(test_dataset, batch_size=batch_size_test, shuffle=True, num_workers=1) print('the source dataset has %d size.' % (len(inf_train_dataset))) print('the target dataset has %d size.' % (len(test_dataset))) print('the batch_size is %d' % (batch_size_train)) # models setting modules = list(models.resnet18(pretrained=True).children())[:-1] feature_extractor = nn.Sequential(*modules) #feature_extractor = model.Encoder() label_predictor = model.Classifier(feature_size) # GPU enable use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") print('Device used:', device) if torch.cuda.is_available(): feature_extractor = feature_extractor.to(device) label_predictor = label_predictor.to(device) # setup optimizer optimizer = optim.Adam([{ 'params': label_predictor.parameters() }, { 'params': feature_extractor.parameters() }], lr=learning_rate) #Lossfunction L_criterion = nn.NLLLoss() print("Starting training...") best_acc = 0. for epoch in range(num_epochs): feature_extractor.train() label_predictor.train() print("Epoch:", epoch + 1) len_dataloader = len(s_train_loader) epoch_L_loss = 0.0 if (epoch + 1) == 8: for optimizer_t in optimizer.param_groups: optimizer_t['lr'] /= 2 # F_optimizer.param_groups[0]['lr'] /= 2 # L_optimizer.param_groups[0]['lr'] /= 2 # D_optimizer.param_groups[0]['lr'] /= 2 if (epoch + 1) == 15: for optimizer_t in optimizer.param_groups: optimizer_t['lr'] /= 2 if (epoch + 1) == 20: for optimizer_t in optimizer.param_groups: optimizer_t['lr'] /= 2 if (epoch + 1) == 25: for optimizer_t in optimizer.param_groups: optimizer_t['lr'] /= 2 if (epoch + 1) == 30: for optimizer_t in optimizer.param_groups: optimizer_t['lr'] /= 2 # F_optimizer.param_groups[0]['lr'] /= 2 # L_optimizer.param_groups[0]['lr'] /= 2 # D_optimizer.param_groups[0]['lr'] /= 2 for i, source_data in enumerate(s_train_loader): source_img, source_label = source_data source_img = Variable(source_img).to(device) source_label = Variable(source_label).to(device) # train the feature_extractor optimizer.zero_grad() #optimizer_label.zero_grad() source_feature = feature_extractor(source_img) # Label_Predictor network src_label_output = label_predictor(source_feature) _, src_pred_arg = torch.max(src_label_output, 1) src_acc = np.mean( np.array(src_pred_arg.cpu()) == np.array( source_label.view(-1).cpu())) loss = L_criterion(src_label_output, source_label.view(-1)) epoch_L_loss += loss.item() loss.backward() #optimizer_label.step() optimizer.step() if (i % 20 == 0): print( 'Epoch [%d/%d], Iter [%d/%d] loss %.4f , LR = %.6f , Acc = %.4f' % (epoch + 1, num_epochs, i + 1, len_dataloader, loss.item(), optimizer.param_groups[0]['lr'], src_acc)) # epoch done print('-' * 80) feature_extractor.eval() label_predictor.eval() total_acc = 0. for i, test_data in enumerate(test_loader): imgs, labels = test_data imgs = Variable(imgs).to(device) labels = Variable(labels).to(device) feature = feature_extractor(imgs) # Label_Predictor network output = label_predictor(feature) _, pred_arg = torch.max(output, 1) acc = np.mean( np.array(pred_arg.cpu()) == np.array(labels.view(-1).cpu())) if i % 100 == 0: print(acc) total_acc += acc total_acc = total_acc / len(test_loader) if total_acc > best_acc: best_acc = total_acc print("Best accuracy : ", best_acc) print("<" + "=" * 40 + ">") print("Best accuracy : ", best_acc)
def main(): # parameters learning_rate = 0.001 num_epochs = 100 batch_size = 50 batch_size_test = 50 feature_size = 2048 * 1 * 1 # create the save log file print("Create the directory") if not os.path.exists("./save"): os.makedirs("./save") if not os.path.exists("./logfile"): os.makedirs("./logfile") if not os.path.exists("./logfile/MTL"): os.makedirs("./logfile/MTL") # load my Dataset inf_csv_path = ["./dataset_public/infograph/infograph_train.csv","./dataset_public/infograph/infograph_test.csv"] qdr_csv_path = ["./dataset_public/quickdraw/quickdraw_train.csv","./dataset_public/quickdraw/quickdraw_test.csv"] skt_csv_path = ["./dataset_public/sketch/sketch_train.csv","./dataset_public/sketch/sketch_test.csv"] rel_csv_path = ["./dataset_public/real/real_train.csv"] test_path = "./dataset_public/test" inf_train_dataset = Dataset.Dataset(csv_path = inf_csv_path[0],argu = True) inf_train_loader = DataLoader(inf_train_dataset ,batch_size = batch_size ,shuffle=True ,num_workers=1) qdr_train_dataset = Dataset.Dataset(csv_path = qdr_csv_path[0],argu = True) qdr_train_loader = DataLoader(qdr_train_dataset ,batch_size = batch_size ,shuffle=True ,num_workers=1) skt_train_dataset = Dataset.Dataset(csv_path = skt_csv_path[0],argu = True) skt_train_loader = DataLoader(skt_train_dataset ,batch_size = batch_size ,shuffle=True ,num_workers=1) rel_train_dataset = Dataset.Dataset(csv_path = rel_csv_path[0],sample = True,argu = True) rel_train_loader = DataLoader(rel_train_dataset ,batch_size = batch_size ,shuffle=True ,num_workers=1) valid_dataset = Dataset.Valid_Dataset(csv_path = rel_csv_path[0],sample = True) valid_loader = DataLoader(valid_dataset ,batch_size = batch_size ,shuffle=True ,num_workers=1) test_dataset = Dataset.Dataset(csv_path = test_path,mode = "test",filename = True) test_loader = DataLoader(test_dataset ,batch_size = batch_size_test ,shuffle=False ,num_workers=1) print('the source dataset has %d size.' % (len(rel_train_dataset))) print('the valid dataset has %d size.' % (len(valid_dataset))) print('the target dataset has %d size.' % (len(test_dataset))) print('the batch_size is %d' % (batch_size)) # Pre-train models modules = list(models.resnet50(pretrained = True).children())[:-1] encoder = nn.Sequential(*modules) #encoder = model.Encoder() classifier = model.Classifier(feature_size) domain_classifier = model.Domain_classifier_0(feature_size,number_of_domain) # GPU enable use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") print('Device used:', device) if torch.cuda.is_available(): encoder = encoder.to(device) classifier = classifier.to(device) domain_classifier = domain_classifier.to(device) # setup optimizer """ optimizer_encoder = optim.SGD( list(encoder.parameters()) + list(classifier.parameters()),lr=learning_rate,momentum=0.9) optimizer_classifier = optim.SGD(list(classifier.parameters()),lr=learning_rate,momentum=0.9) optimizer = optim.SGD([{'params': domain_classifier.parameters()}], lr= learning_rate,momentum=0.9) """ optimizer_encoder = optim.Adam([{'params': encoder.parameters()}, {'params': classifier.parameters()}], weight_decay = 1e-4 , lr= learning_rate) optimizer_classifier = optim.Adam(classifier.parameters(), weight_decay = 1e-4 , lr= learning_rate) optimizer = optim.Adam([{'params': domain_classifier.parameters()}], lr= learning_rate, weight_decay = 1e-4 ) #Lossfunction moe_criterion = nn.CrossEntropyLoss() mtl_criterion = nn.NLLLoss() D_loss_list = [] L_loss_list = [] sum_src_acc_list = [] sum_trg_acc_list = [] sum_label_acc_loist = [] sum_test_acc_list = [] print("Starting training...") best_acc = 0. valid_acc = 0.0 for epoch in range(num_epochs): print("Epoch:", epoch+1) encoder.train() classifier.train() domain_classifier.train() epoch_D_loss = 0.0 epoch_C_loss = 0.0 sum_src_acc = 0.0 sum_trg_acc = 0.0 sum_label_acc = 0.0 sum_test_acc = 0.0 tmp_valid_acc = 0.0 if (epoch+1) in [5,10,15,20,25,30,35,40,45]: for optimizer_t in optimizer.param_groups: optimizer_t['lr'] /= 1.7 train_loader = [inf_train_loader,qdr_train_loader,skt_train_loader,rel_train_loader] len_loader = min([len(train_loader[0]),len(train_loader[1]),len(train_loader[2]),len(train_loader[3])]) for index, (inf,qdr,skt,rel,test) in enumerate(zip(train_loader[0],train_loader[1],train_loader[2],train_loader[3],valid_loader)): optimizer_encoder.zero_grad() optimizer_classifier.zero_grad() optimizer.zero_grad() # colculate the lambda_ p = (index + len_loader * epoch)/(len_loader * num_epochs) lambda_ = 2.0 / (1. + np.exp(-10 * p)) - 1.0 s1_imgs , s1_labels = skt s2_imgs , s2_labels = inf s3_imgs , s3_labels = inf test_imgs , test_labels = test t1_imgs , _ = rel from_s2_labels = Variable(torch.LongTensor([0 for i in range(len(s3_imgs))])).to(device) from_t1_labels = Variable(torch.LongTensor([1 for i in range(len(t1_imgs))])).to(device) s1_imgs = Variable(s1_imgs).to(device) ; s1_labels = Variable(s1_labels.view(-1)).to(device) s2_imgs = Variable(s2_imgs).to(device) ; s2_labels = Variable(s2_labels.view(-1)).to(device) s3_imgs = Variable(s3_imgs).to(device) ; s3_labels = Variable(s3_labels.view(-1)).to(device) test_imgs = Variable(test_imgs).to(device) ; test_labels = Variable(test_labels.view(-1)).to(device) t1_imgs = Variable(t1_imgs).to(device) s2_feature = encoder(s2_imgs) test_feature = encoder(test_imgs) t1_feature = encoder(t1_imgs) test_output = classifier(test_feature) test_preds = test_output.argmax(1).cpu() test_acc = np.mean((test_preds.detach().cpu() == test_labels.cpu()).numpy()) s2_output = classifier(s2_feature) s2_preds = s2_output.argmax(1).cpu() s2_acc = np.mean((s2_preds.detach().cpu() == s2_labels.cpu()).numpy()) s2_c_loss = mtl_criterion(s2_output,s2_labels) mtl_loss = s2_c_loss # Domain_classifier network with source domain (loss_adv) s2_domain_output = domain_classifier(s2_feature,lambda_) s2_domain_acc = np.mean((s2_domain_output.argmax(1).cpu() <= 0.5).numpy()) s2_d_loss = moe_criterion(s2_domain_output,from_s2_labels) D_loss_src = s2_d_loss #print(D_loss_src.item()) # Domain_classifier network with target domain (loss_adv) t1_domain_0_output = domain_classifier(t1_feature,lambda_) t1_domain_0_acc = np.mean((t1_domain_0_output.argmax(1).cpu() > 0.5).numpy()) D0_loss_trg = moe_criterion(t1_domain_0_output,from_t1_labels) D_loss_trg = D0_loss_trg if (index+1) % 100 == 0: print(s2_domain_output.argmax(1).cpu()) print(t1_domain_0_output.argmax(1).cpu()) adv_loss = D_loss_src + D_loss_trg loss = 1 * mtl_loss + 1 * adv_loss D_trg_acc = t1_domain_0_acc D_src_acc = s2_domain_acc #mtl_loss.backward() #adv_loss.backward() loss.backward() optimizer.step() optimizer_encoder.step() optimizer_classifier.step() if (index+1) % 10 == 0: print('Epoch [%d/%d], Iter [%d/%d] C_loss : %.4f D_loss %.4f arc: %.4f trg: %.4f ,LR: %.5f' %(epoch+1, num_epochs,index+1, len_loader,mtl_loss.item(), adv_loss.item(),D_loss_src.item(),D_loss_trg.item(),optimizer.param_groups[0]['lr'])) print("====> Domain Acc: %.4f %.4f Test: %.4f"%(D_src_acc,D_trg_acc,test_acc)) print('Validing: Acc %.4f ' %(tmp_valid_acc/len_loader)) if tmp_valid_acc/len_loader > best_acc : best_acc = tmp_valid_acc/len_loader print('Find best: Acc %.6f ' %(best_acc)) print('Best: Acc %.6f ' %(best_acc)) return 0