def run(config_file='config/large_margin.cfg'): set_seed() config = configparser.ConfigParser() config.read(os.path.join(ROOT_DIR, config_file)) config_logging(config) os.environ['CUDA_VISIBLE_DEVICES'] = str(config['MAIN']['device']) # get the mean and std of dataset train/a standarizer = TaskbStandarizer(data_manager=Dcase18TaskbData()) mu, sigma = standarizer.load_mu_sigma(mode='train', device='a') # get the normalized train dataset train_dataset = DevSet(mode='train', device='a', transform=Compose( [Normalize(mean=mu, std=sigma), ToTensor()])) test_dataset = DevSet(mode='test', device='a', transform=Compose( [Normalize(mean=mu, std=sigma), ToTensor()])) model = getattr(networks, config['MAIN']['net'])() model = model.cuda() if config['CE_PRETRAIN'].getboolean('enable'): model = cross_entropy_pretrain(config, model, train_dataset, test_dataset) train_batch_sampler = BalanceBatchSampler( dataset=train_dataset, n_classes=int(config['EMBEDDING']['n_classes']), n_samples=int(config['EMBEDDING']['n_samples'])) train_balanced_loader = DataLoader(dataset=train_dataset, batch_sampler=train_batch_sampler, num_workers=1) train_loader = DataLoader(dataset=train_dataset, batch_size=int( config['EMBEDDING']['batch_size']), shuffle=False, num_workers=1) test_loader = DataLoader(dataset=test_dataset, batch_size=int(config['EMBEDDING']['batch_size']), shuffle=False, num_workers=1) model = train_triplet(config, model, train_balanced_loader, train_loader, test_loader)
def classification_baseline_exp(device='2', lr=1e-3, n_epochs=300, batch_size=128, log_interval=50): os.environ['CUDA_VISIBLE_DEVICES'] = str(device) standarizer = TaskbStandarizer(data_manager=Dcase18TaskbData()) mu, sigma = standarizer.load_mu_sigma(mode='train', device='a') train_dataset = DevSet(mode='train', device='a', transform=Compose( [Normalize(mean=mu, std=sigma), ToTensor()])) test_dataset = DevSet(mode='test', device='a', transform=Compose( [Normalize(mean=mu, std=sigma), ToTensor()])) train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=1) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, num_workers=1) model = vggish_bn() model = model.cuda() loss_fn = torch.nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=lr) scheduler = lr_scheduler.StepLR(optimizer=optimizer, step_size=30, gamma=0.5, last_epoch=-1) fit(train_loader=train_loader, val_loader=test_loader, model=model, loss_fn=loss_fn, optimizer=optimizer, scheduler=scheduler, n_epochs=n_epochs, log_interval=log_interval, metrics=[AccumulatedAccuracyMetric()])
def triplet_loss_with_knn_exp(device='3', ckpt_prefix='Run01', lr=1e-3, pretrain_epochs=50, batch_all_epochs=30, batch_hard_epochs=80, n_classes=10, n_samples=12, margin=0.3, log_interval=50, log_level="INFO", k=3, squared=False, embed_dims=64, embed_net='vgg', is_train_embedding_model=False, using_pretrain=False, batch_size=128, select_method='batch_all_and_hard', soft_margin=True): """ knn as classifier. :param device: :param lr: :param n_epochs: :param n_classes: :param n_samples: :param k: kNN parameter :return: """ SEED = 0 torch.manual_seed(SEED) torch.cuda.manual_seed(SEED) np.random.seed(SEED) kwargs = locals() log_file = '{}/ckpt/{}_with_knn_exp/{}.log'.format(ROOT_DIR, select_method, ckpt_prefix) if not os.path.exists(os.path.dirname(log_file)): os.makedirs(os.path.dirname(log_file)) logging.basicConfig(filename=log_file, level=getattr(logging, log_level.upper(), None)) logging.info(str(kwargs)) os.environ['CUDA_VISIBLE_DEVICES'] = str(device) # get the mean and std of dataset train/a standarizer = TaskbStandarizer(data_manager=Dcase18TaskbData()) mu, sigma = standarizer.load_mu_sigma(mode='train', device='a') # get the normalized train dataset train_dataset = DevSet(mode='train', device='a', transform=Compose( [Normalize(mean=mu, std=sigma), ToTensor()])) test_dataset = DevSet(mode='test', device='a', transform=Compose( [Normalize(mean=mu, std=sigma), ToTensor()])) train_batch_sampler = BalanceBatchSampler(dataset=train_dataset, n_classes=n_classes, n_samples=n_samples) train_batch_loader = DataLoader(dataset=train_dataset, batch_sampler=train_batch_sampler, num_workers=1) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, num_workers=1) if embed_net == 'vgg': model = networks.vggish_bn(classify=using_pretrain) elif embed_net == 'shallow': model = networks.embedding_net_shallow() else: print("{} doesn't exist!".format(embed_net)) return model = model.cuda() if is_train_embedding_model: if using_pretrain: pt_loss_fn = nn.CrossEntropyLoss() pt_optimizer = optim.Adam(model.parameters(), lr=lr) pt_scheduler = lr_scheduler.StepLR(optimizer=pt_optimizer, step_size=30, gamma=0.5) pt_train_hist = History(name='pretrain_train/a') pt_val_hist = History(name='pretrain_test/a') pt_ckpter = CheckPoint(model=model, optimizer=pt_optimizer, path='{}/ckpt/{}_with_knn_exp'.format( ROOT_DIR, select_method), prefix=(ckpt_prefix + 'pretrain'), interval=1, save_num=1) for epoch in range(1, pretrain_epochs + 1): pt_scheduler.step() train_loss, metrics = train_epoch( train_loader=train_batch_loader, model=model, loss_fn=pt_loss_fn, optimizer=pt_optimizer, log_interval=log_interval, metrics=[AccumulatedAccuracyMetric()]) train_logs = {'loss': train_loss} for metric in metrics: train_logs[metric.name()] = metric.value() pt_train_hist.add(logs=train_logs, epoch=epoch) test_loss, metrics = test_epoch( val_loader=test_loader, model=model, loss_fn=pt_loss_fn, metrics=[AccumulatedAccuracyMetric()]) test_logs = {'loss': test_loss} for metric in metrics: test_logs[metric.name()] = metric.value() pt_val_hist.add(logs=test_logs, epoch=epoch) pt_train_hist.clear() pt_train_hist.plot() pt_val_hist.plot() logging.info('Epoch{:04d}, {:15}, {}'.format( epoch, pt_train_hist.name, str(pt_train_hist.recent))) logging.info('Epoch{:04d}, {:15}, {}'.format( epoch, pt_val_hist.name, str(pt_val_hist.recent))) pt_ckpter.check_on(epoch=epoch, monitor='acc', loss_acc=pt_val_hist.recent) best_pt_model_filename = Reporter(ckpt_root=os.path.join(ROOT_DIR, 'ckpt'), exp='{}_with_knn_exp'.\ format(select_method)).select_best(run=(ckpt_prefix + 'pretrain')).selected_ckpt model.load_state_dict( torch.load(best_pt_model_filename)['model_state_dict']) model.set_classify(False) if select_method == 'batch_all': loss_fn = BatchAllTripletLoss(margin=margin, squared=squared, soft_margin=soft_margin) elif select_method == 'batch_hard': loss_fn = BatchHardTripletLoss(margin=margin, squared=squared, soft_margin=soft_margin) elif select_method == 'random_hard': loss_fn = RandomHardTripletLoss( margin=margin, triplet_selector=RandomNegativeTripletSelector(margin=margin), squared=squared, soft_margin=soft_margin) elif select_method == 'batch_all_and_hard': loss_fn_ba = BatchAllTripletLoss(margin=margin, squared=squared, soft_margin=soft_margin) loss_fn_bh = BatchHardTripletLoss(margin=margin, squared=squared, soft_margin=soft_margin) else: print("{} is not defined!".format(select_method)) return optimizer = optim.Adam(model.parameters(), lr=lr) scheduler = lr_scheduler.StepLR(optimizer=optimizer, step_size=30, gamma=0.5) train_hist = History(name='train/a') val_hist = History(name='test/a') ckpter = CheckPoint(model=model, optimizer=optimizer, path='{}/ckpt/{}_with_knn_exp'.format( ROOT_DIR, select_method), prefix=ckpt_prefix, interval=1, save_num=1) for epoch in range(1, batch_all_epochs + batch_hard_epochs + 1): scheduler.step() if epoch <= batch_all_epochs: cur_loss_fn = loss_fn_ba else: cur_loss_fn = loss_fn_bh train_loss, metrics = train_epoch( train_loader=train_batch_loader, model=model, loss_fn=cur_loss_fn, optimizer=optimizer, log_interval=log_interval, metrics=[AverageNoneZeroTripletsMetric()]) train_logs = dict() train_logs['loss'] = train_loss for metric in metrics: train_logs[metric.name()] = metric.value() train_hist.add(logs=train_logs, epoch=epoch) # TODO sklearn knn test_acc = kNN(model=model, train_loader=train_batch_loader, test_loader=test_loader, k=k) test_logs = {'acc': test_acc} val_hist.add(logs=test_logs, epoch=epoch) train_hist.clear() train_hist.plot() val_hist.plot() logging.info('Epoch{:04d}, {:15}, {}'.format( epoch, train_hist.name, str(train_hist.recent))) logging.info('Epoch{:04d}, {:15}, {}'.format( epoch, val_hist.name, str(val_hist.recent))) ckpter.check_on(epoch=epoch, monitor='acc', loss_acc=val_hist.recent) # reload best embedding model best_model_filename = Reporter(ckpt_root=os.path.join(ROOT_DIR, 'ckpt'), exp='{}_with_knn_exp'.\ format(select_method)).select_best(run=ckpt_prefix).selected_ckpt model.load_state_dict(torch.load(best_model_filename)['model_state_dict']) train_embedding, train_labels = extract_embeddings(train_batch_loader, model, embed_dims) test_embedding, test_labels = extract_embeddings(test_loader, model, embed_dims) xgb_cls(train_data=train_embedding, train_label=train_labels, val_data=test_embedding, val_label=test_labels, exp_dir=os.path.dirname(log_file)) # TODO plot all curve if using_pretrain: pt_train_hist.plot() pt_val_hist.plot()
def batch_hard_with_knn_exp(device='3', ckpt_prefix='Run01', lr=1e-3, embedding_epochs=10, classify_epochs=100, n_classes=10, n_samples=12, margin=0.3, log_interval=50, log_level="INFO", k=3, squared=False): """ knn as classifier. :param device: :param lr: :param n_epochs: :param n_classes: :param n_samples: :param k: kNN parameter :return: """ SEED = 0 torch.manual_seed(SEED) torch.cuda.manual_seed(SEED) np.random.seed(SEED) kwargs = locals() log_file = '{}/ckpt/batch_hard_with_knn_exp/{}.log'.format( ROOT_DIR, ckpt_prefix) if not os.path.exists(os.path.dirname(log_file)): os.makedirs(os.path.dirname(log_file)) logging.basicConfig(filename=log_file, level=getattr(logging, log_level.upper(), None)) logging.info(str(kwargs)) os.environ['CUDA_VISIBLE_DEVICES'] = str(device) # get the mean and std of dataset train/a standarizer = TaskbStandarizer(data_manager=Dcase18TaskbData()) mu, sigma = standarizer.load_mu_sigma(mode='train', device='a') # get the normalized train dataset train_dataset = DevSet(mode='train', device='a', transform=Compose( [Normalize(mean=mu, std=sigma), ToTensor()])) test_dataset = DevSet(mode='test', device='a', transform=Compose( [Normalize(mean=mu, std=sigma), ToTensor()])) train_batch_sampler = BalanceBatchSampler(dataset=train_dataset, n_classes=n_classes, n_samples=n_samples) train_batch_loader = DataLoader(dataset=train_dataset, batch_sampler=train_batch_sampler, num_workers=1) test_batch_sampler = BalanceBatchSampler(dataset=test_dataset, n_classes=n_classes, n_samples=n_samples) test_batch_loader = DataLoader(dataset=test_dataset, batch_sampler=test_batch_sampler, num_workers=1) model = networks.embedding_net_shallow() model = model.cuda() loss_fn = HardTripletLoss(margin=margin, hardest=True, squared=squared) optimizer = optim.Adam(model.parameters(), lr=lr) scheduler = lr_scheduler.StepLR(optimizer=optimizer, step_size=30, gamma=0.5) # fit(train_loader=train_batch_loader, val_loader=test_batch_loader, model=model, loss_fn=loss_fn, # optimizer=optimizer, scheduler=scheduler, n_epochs=embedding_epochs, log_interval=log_interval, # metrics=[AverageNoneZeroTripletsMetric()]) train_hist = History(name='train/a') val_hist = History(name='test/a') ckpter = CheckPoint( model=model, optimizer=optimizer, path='{}/ckpt/batch_hard_with_knn_exp'.format(ROOT_DIR), prefix=ckpt_prefix, interval=1, save_num=1) for epoch in range(1, embedding_epochs + 1): scheduler.step() train_loss, metrics = train_epoch( train_loader=train_batch_loader, model=model, loss_fn=loss_fn, optimizer=optimizer, log_interval=log_interval, metrics=[AverageNoneZeroTripletsMetric()]) train_logs = dict() train_logs['loss'] = train_loss for metric in metrics: train_logs[metric.name()] = metric.value() train_hist.add(logs=train_logs, epoch=epoch) test_acc = kNN(model=model, train_loader=train_batch_loader, test_loader=test_batch_loader, k=k) test_logs = {'acc': test_acc} val_hist.add(logs=test_logs, epoch=epoch) train_hist.clear() train_hist.plot() val_hist.plot() logging.info('Epoch{:04d}, {:15}, {}'.format(epoch, train_hist.name, str(train_hist.recent))) logging.info('Epoch{:04d}, {:15}, {}'.format(epoch, val_hist.name, str(val_hist.recent))) ckpter.check_on(epoch=epoch, monitor='acc', loss_acc=val_hist.recent) # train classifier using learned embeddings. classify_model = networks.classifier() classify_model = classify_model.cuda() classify_loss_fn = nn.CrossEntropyLoss() classify_optimizer = optim.Adam(classify_model.parameters(), lr=lr) classify_scheduler = lr_scheduler.StepLR(optimizer=classify_optimizer, step_size=30, gamma=0.5) classify_train_hist = History(name='classify_train/a') classify_val_hist = History(name='classify_val/a') classify_ckpter = CheckPoint( model=classify_model, optimizer=classify_optimizer, path='{}/ckpt/batch_hard_with_knn_exp'.format(ROOT_DIR), prefix=ckpt_prefix, interval=1, save_num=1) # reload best embedding model best_model_filename = Reporter(ckpt_root=os.path.join(ROOT_DIR, 'ckpt'), exp='batch_hard_with_knn_exp').select_best( run=ckpt_prefix).selected_ckpt model.load_state_dict(torch.load(best_model_filename)['model_state_dict']) train_embedding, train_labels = extract_embeddings(train_batch_loader, model, 128) test_embedding, test_labels = extract_embeddings(test_batch_loader, model, 128) classify_train_dataset = DatasetWrapper(data=train_embedding, labels=train_labels, transform=ToTensor()) classify_test_dataset = DatasetWrapper(data=test_embedding, labels=test_labels, transform=ToTensor()) classify_train_loader = DataLoader(dataset=classify_train_dataset, batch_size=128, shuffle=True, num_workers=1) classify_test_loader = DataLoader(dataset=classify_test_dataset, batch_size=128, shuffle=False, num_workers=1) fit(train_loader=classify_train_loader, val_loader=classify_test_loader, model=classify_model, loss_fn=classify_loss_fn, optimizer=classify_optimizer, scheduler=classify_scheduler, n_epochs=classify_epochs, log_interval=log_interval, metrics=[AccumulatedAccuracyMetric()], train_hist=classify_train_hist, val_hist=classify_val_hist, ckpter=classify_ckpter, logging=logging)
def classification_baseline_exp(device='2', ckpt_prefix='Run01', lr=1e-3, n_epochs=300, batch_size=128, log_interval=50, classify=True, log_level='INFO'): kwargs = locals() log_file = '{}/ckpt/classification_exp/{}.log'.format( ROOT_DIR, ckpt_prefix) if not os.path.exists(os.path.dirname(log_file)): os.makedirs(os.path.dirname(log_file)) logging.basicConfig(filename=log_file, level=getattr(logging, log_level.upper(), None)) logging.info(str(kwargs)) os.environ['CUDA_VISIBLE_DEVICES'] = str(device) standarizer = TaskbStandarizer(data_manager=Dcase18TaskbData()) mu, sigma = standarizer.load_mu_sigma(mode='train', device='a') train_dataset = DevSet(mode='train', device='a', transform=Compose( [Normalize(mean=mu, std=sigma), ToTensor()])) test_dataset = DevSet(mode='test', device='a', transform=Compose( [Normalize(mean=mu, std=sigma), ToTensor()])) train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=1) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, num_workers=1) model = vggish_bn(classify) model = model.cuda() loss_fn = torch.nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=lr) scheduler = lr_scheduler.StepLR(optimizer=optimizer, step_size=30, gamma=0.5, last_epoch=-1) train_hist = History(name='train/a') val_hist = History(name='val/a') ckpter = CheckPoint(model=model, optimizer=optimizer, path='{}/ckpt/classification_exp'.format(ckpt_prefix), prefix=ckpt_prefix, interval=1, save_num=1) fit(train_loader=train_loader, val_loader=test_loader, model=model, loss_fn=loss_fn, optimizer=optimizer, scheduler=scheduler, n_epochs=n_epochs, log_interval=log_interval, metrics=[AccumulatedAccuracyMetric()], train_hist=train_hist, val_hist=val_hist, ckpter=ckpter, logging=logging)
def hard_triplet_baseline_exp(device='3', ckpt_prefix='Run01', lr=1e-3, n_epochs=300, n_classes=10, n_samples=12, margin=0.3, log_interval=50, log_level="INFO"): """ :param device: :param lr: :param n_epochs: :param n_classes: :param n_samples: :return: """ kwargs = locals() log_file = '{}/ckpt/hard_triplet_baseline_exp/{}.log'.format( ROOT_DIR, ckpt_prefix) if not os.path.exists(os.path.dirname(log_file)): os.makedirs(os.path.dirname(log_file)) logging.basicConfig(filename=log_file, level=getattr(logging, log_level.upper(), None)) logging.info(str(kwargs)) os.environ['CUDA_VISIBLE_DEVICES'] = str(device) # get the mean and std of dataset train/a standarizer = TaskbStandarizer(data_manager=Dcase18TaskbData()) mu, sigma = standarizer.load_mu_sigma(mode='train', device='a') # get the normalized train dataset train_dataset = DevSet(mode='train', device='a', transform=Compose( [Normalize(mean=mu, std=sigma), ToTensor()])) test_dataset = DevSet(mode='test', device='a', transform=Compose( [Normalize(mean=mu, std=sigma), ToTensor()])) train_batch_sampler = BalanceBatchSampler(dataset=train_dataset, n_classes=n_classes, n_samples=n_samples) train_batch_loader = DataLoader(dataset=train_dataset, batch_sampler=train_batch_sampler, num_workers=1) test_batch_sampler = BalanceBatchSampler(dataset=test_dataset, n_classes=n_classes, n_samples=n_samples) test_batch_loader = DataLoader(dataset=test_dataset, batch_sampler=test_batch_sampler, num_workers=1) model = networks.embedding_net_shallow() model = model.cuda() loss_fn = RandomHardTripletLoss( margin=margin, triplet_selector=RandomNegativeTripletSelector(margin=margin)) optimizer = optim.Adam(model.parameters(), lr=lr) scheduler = lr_scheduler.StepLR(optimizer=optimizer, step_size=30, gamma=0.5) fit(train_loader=train_batch_loader, val_loader=test_batch_loader, model=model, loss_fn=loss_fn, optimizer=optimizer, scheduler=scheduler, n_epochs=n_epochs, log_interval=log_interval, metrics=[AverageNoneZeroTripletsMetric()]) verification(model=model) train_embedding_tl, train_labels_tl = extract_embeddings( train_batch_loader, model, 64) # utils.plot_embeddings(embeddings=train_embedding_tl, targets=train_labels_tl, title='train set') test_embedding_tl, test_labels_tl = extract_embeddings( test_batch_loader, model, 64) # utils.plot_embeddings(embeddings=test_embedding_tl, targets=test_labels_tl, title='test set') model2 = networks.classifier() model2 = model2.cuda() loss_fn2 = nn.CrossEntropyLoss() optimizer2 = optim.Adam(model2.parameters(), lr=lr) scheduler2 = lr_scheduler.StepLR(optimizer=optimizer2, step_size=30, gamma=0.5) train_dataset2 = DatasetWrapper(data=train_embedding_tl, labels=train_labels_tl, transform=ToTensor()) test_dataset2 = DatasetWrapper(data=test_embedding_tl, labels=test_labels_tl, transform=ToTensor()) train_loader2 = DataLoader(dataset=train_dataset2, batch_size=128, shuffle=True, num_workers=1) test_loader2 = DataLoader(dataset=test_dataset2, batch_size=128, shuffle=False, num_workers=1) train_hist = History(name='train/a') val_hist = History(name='val/a') ckpter = CheckPoint( model=model, optimizer=optimizer, path='{}/ckpt/hard_triplet_baseline_exp'.format(ROOT_DIR), prefix=ckpt_prefix, interval=1, save_num=1) fit(train_loader=train_loader2, val_loader=test_loader2, model=model2, loss_fn=loss_fn2, optimizer=optimizer2, scheduler=scheduler2, n_epochs=n_epochs, log_interval=log_interval, metrics=[AccumulatedAccuracyMetric()], train_hist=train_hist, val_hist=val_hist, ckpter=ckpter, logging=logging)
def hard_triplet_baseline_exp(device='3', lr=1e-3, n_epochs=300, n_classes=10, n_samples=12, margin=0.3, log_interval=50): """ :param device: :param lr: :param n_epochs: :param n_classes: :param n_samples: :return: """ os.environ['CUDA_VISIBLE_DEVICES'] = str(device) # get the mean and std of dataset train/a standarizer = TaskbStandarizer(data_manager=Dcase18TaskbData()) mu, sigma = standarizer.load_mu_sigma(mode='train', device='a') # get the normalized train dataset train_dataset = DevSet(mode='train', device='a', transform=Compose( [Normalize(mean=mu, std=sigma), ToTensor()])) test_dataset = DevSet(mode='test', device='a', transform=Compose( [Normalize(mean=mu, std=sigma), ToTensor()])) train_batch_sampler = BalanceBatchSampler(dataset=train_dataset, n_classes=n_classes, n_samples=n_samples) train_batch_loader = DataLoader(dataset=train_dataset, batch_sampler=train_batch_sampler, num_workers=1) test_batch_sampler = BalanceBatchSampler(dataset=test_dataset, n_classes=n_classes, n_samples=n_samples) test_batch_loader = DataLoader(dataset=test_dataset, batch_sampler=test_batch_sampler, num_workers=1) model = networks.embedding_net_shallow() model = model.cuda() loss_fn = OnlineTripletLoss( margin=margin, triplet_selector=utils.RandomNegativeTripletSelector(margin=margin)) optimizer = optim.Adam(model.parameters(), lr=lr) scheduler = lr_scheduler.StepLR(optimizer=optimizer, step_size=30, gamma=0.5) fit(train_loader=train_batch_loader, val_loader=test_batch_loader, model=model, loss_fn=loss_fn, optimizer=optimizer, scheduler=scheduler, n_epochs=n_epochs, log_interval=log_interval, metrics=[AverageNoneZeroTripletsMetric()]) train_embedding_tl, train_labels_tl = utils.extract_embeddings( train_batch_loader, model) utils.plot_embeddings(train_embedding_tl, train_labels_tl) test_embedding_tl, test_labels_tl = utils.extract_embeddings( test_batch_loader, model) utils.plot_embeddings(test_embedding_tl, test_labels_tl)
def batch_all_total_with_knn_exp(device='0', ckpt_prefix='Run01', lr=1e-3, embedding_epochs=10, classify_epochs=100, n_classes=10, n_samples=12, batch_size=128, margin=0.3, log_interval=50, log_level="INFO", k=3, squared=False, embed_dims=64, embed_net='vgg'): """ Using the entire data set, including device A, B, C. Using the batch all method to select the triplets, kNN as the verification Finally, using a linear classifier to classify the extraction into embeddings. :param device: :param lr: :param n_epochs: :param n_classes: :param n_samples: :param k: kNN parameter :return: """ SEED = 0 torch.manual_seed(SEED) torch.cuda.manual_seed(SEED) np.random.seed(SEED) kwargs = locals() log_file = '{}/ckpt/batch_all_total_with_knn_exp/{}.log'.format( ROOT_DIR, ckpt_prefix) if not os.path.exists(os.path.dirname(log_file)): os.makedirs(os.path.dirname(log_file)) logging.basicConfig(filename=log_file, level=getattr(logging, log_level.upper(), None)) logging.info(str(kwargs)) os.environ['CUDA_VISIBLE_DEVICES'] = str(device) # get the mean and std of dataset train/a standarizer = TaskbStandarizer(data_manager=Dcase18TaskbData()) mu, sigma = standarizer.load_mu_sigma(mode='train', device='abc') # get the normalized train dataset train_dataset = DevSet(mode='train', device='abc', transform=Compose( [Normalize(mean=mu, std=sigma), ToTensor()])) # get the normalized test dataset test_dataset = {} device_list = ['a', 'b', 'c', 'bc'] for device in device_list: test_dataset[device] = DevSet( mode='test', device=device, transform=Compose([Normalize(mean=mu, std=sigma), ToTensor()])) train_batch_sampler = BalanceBatchSampler(dataset=train_dataset, n_classes=n_classes, n_samples=n_samples) train_batch_loader = DataLoader(dataset=train_dataset, batch_sampler=train_batch_sampler, num_workers=1) test_loader = {} for device in device_list: test_loader[device] = DataLoader(dataset=test_dataset[device], batch_size=batch_size, shuffle=False, num_workers=1) # network architecture if embed_net == 'vgg': model = networks.vggish_bn() elif embed_net == 'shallow': model = networks.embedding_net_shallow() else: print("{} network doesn't exist.".format(embed_net)) return # to gpu model = model.cuda() loss_fn = BatchAllTripletLoss(margin=margin, squared=squared, soft_margin=False) optimizer = optim.Adam(model.parameters(), lr=lr) scheduler = lr_scheduler.StepLR(optimizer=optimizer, step_size=30, gamma=0.5) train_hist = History(name='train/a') val_hist = {} for device in device_list: val_hist[device] = History(name=('test/' + str(device))) # learning embedding checkpointer. ckpter = CheckPoint( model=model, optimizer=optimizer, path='{}/ckpt/batch_all_total_with_knn_exp'.format(ROOT_DIR), prefix=ckpt_prefix, interval=1, save_num=1) # training embedding network for epoch in range(1, embedding_epochs + 1): scheduler.step() train_loss, metrics = train_epoch( train_loader=train_batch_loader, model=model, loss_fn=loss_fn, optimizer=optimizer, log_interval=log_interval, metrics=[AverageNoneZeroTripletsMetric()]) train_logs = {'loss': train_loss} for metric in metrics: train_logs[metric.name()] = metric.value() train_hist.add(logs=train_logs, epoch=epoch) for device in device_list: test_acc = kNN(model=model, train_loader=train_batch_loader, test_loader=test_loader[device], k=k) test_logs = {'acc': test_acc} val_hist[device].add(logs=test_logs, epoch=epoch) train_hist.clear() train_hist.plot() logging.info('Epoch{:04d}, {:15}, {}'.format(epoch, train_hist.name, str(train_hist.recent))) for device in device_list: val_hist[device].plot() logging.info('Epoch{:04d}, {:15}, {}'.format( epoch, val_hist[device].name, str(val_hist[device].recent))) ckpter.check_on(epoch=epoch, monitor='acc', loss_acc=val_hist['bc'].recent) # train classifier using learned embeddings. classify_model = networks.classifier() classify_model = classify_model.cuda() classify_loss_fn = nn.CrossEntropyLoss() classify_optimizer = optim.Adam(classify_model.parameters(), lr=lr) classify_scheduler = lr_scheduler.StepLR(optimizer=classify_optimizer, step_size=30, gamma=0.5) classify_train_hist = History(name='classify_train/a') classify_val_hist = {} for device in device_list: classify_val_hist[device] = History(name=('classify_val/' + str(device))) classify_ckpter = CheckPoint( model=classify_model, optimizer=classify_optimizer, path='{}/ckpt/batch_all_total_with_knn_exp'.format(ROOT_DIR), prefix=ckpt_prefix, interval=1, save_num=1) # reload best embedding model best_model_filename = Reporter( ckpt_root=os.path.join(ROOT_DIR, 'ckpt'), exp='batch_all_total_with_knn_exp').select_best( run=ckpt_prefix).selected_ckpt model.load_state_dict(torch.load(best_model_filename)['model_state_dict']) # learned best embeddings train_embedding, train_labels = extract_embeddings(train_batch_loader, model, embed_dims) test_embedding, test_labels = {}, {} for device in device_list: test_embedding[device], test_labels[device] = extract_embeddings( test_loader[device], model, embed_dims) # wrap embeddings(numpy) to Dataset classify_train_dataset = DatasetWrapper(data=train_embedding, labels=train_labels, transform=ToTensor()) classify_test_dataset = {} for device in device_list: classify_test_dataset[device] = DatasetWrapper( data=test_embedding[device], labels=test_labels[device], transform=ToTensor()) classify_train_loader = DataLoader(dataset=classify_train_dataset, batch_size=batch_size, shuffle=True, num_workers=1) classify_test_loader = {} for device in device_list: classify_test_loader[device] = DataLoader( dataset=classify_test_dataset[device], batch_size=batch_size, shuffle=False, num_workers=1) fit(train_loader=classify_train_loader, val_loader=classify_test_loader, model=classify_model, loss_fn=classify_loss_fn, optimizer=classify_optimizer, scheduler=classify_scheduler, n_epochs=classify_epochs, log_interval=log_interval, metrics=[AccumulatedAccuracyMetric()], train_hist=classify_train_hist, val_hist=classify_val_hist, ckpter=classify_ckpter, logging=logging)