Ejemplo n.º 1
0
    def __init__(self, net, device, query, gallery, batch_size=64, top_k=100):
        self.top_k = top_k

        self.net = net
        self.device = device

        dataset = getter.get_instance(query)
        self.query_dataloader = DataLoader(dataset, batch_size=batch_size)

        dataset = getter.get_instance(gallery)
        self.gallery_dataloader = DataLoader(dataset, batch_size=batch_size)

        self.reset()
Ejemplo n.º 2
0
    def train_dataloader(self):

        train_dataloader = get_instance(
            self.config['dataset']['train']['loader'],
            dataset=self.train_dataset,
            num_workers=4)
        return train_dataloader
Ejemplo n.º 3
0
    def __init__(self, dataset):
        self.dataset = getter.get_instance(dataset)

        self.train = self.dataset.train
        self.transform = self.dataset.transform

        self.labels = self.dataset.labels
        self.data = self.dataset
        self.labels_set = set(self.labels.numpy())
        self.label_to_indices = {label: np.where(self.labels.numpy() == label)[0]
                                 for label in self.labels_set}

        if not self.train:
            triplets = [[i,
                         np.random.choice(
                             self.label_to_indices[self.labels[i].item()]),
                         np.random.choice(self.label_to_indices[
                             np.random.choice(
                                 list(
                                     self.labels_set - set([self.labels[i].item()]))
                             )
                         ])
                         ]
                        for i in range(len(self.data))]
            self.triplets = triplets
Ejemplo n.º 4
0
    def __init__(self, dataset):
        self.dataset = getter.get_instance(dataset)

        self.train = self.dataset.train
        self.transform = self.dataset.transform

        self.labels = self.dataset.labels
        self.data = self.dataset
        self.labels_set = set(self.labels.numpy())
        self.label_to_indices = {label: np.where(self.labels.numpy() == label)[0]
                                 for label in self.labels_set}

        if not self.train:
            positive_pairs = [[i,
                               np.random.choice(
                                   self.label_to_indices[self.labels[i].item()]),
                               1]
                              for i in range(0, len(self.data), 2)]

            negative_pairs = [[i,
                               np.random.choice(self.label_to_indices[
                                   np.random.choice(
                                       list(
                                           self.labels_set - set([self.labels[i].item()]))
                                   )
                               ]),
                               0]
                              for i in range(1, len(self.data), 2)]
            self.pairs = positive_pairs + negative_pairs
Ejemplo n.º 5
0
    def __init__(self, dataset, niters, nimgs):
        self.dataset = getter.get_instance(dataset)
        self.niters = niters
        self.nimgs = nimgs

        if len(self.dataset) != len(self):
            print("Random getitem")
        else:
            print("Get all items")
Ejemplo n.º 6
0
def evaluate(mode, gpus, weight_path,
             query_dir, query_label,
             gallery_dir, gallery_label,
             top_k, cmc_rank):
    dev_id = 'cuda:{}'.format(gpus) \
        if torch.cuda.is_available() and gpus is not None \
        else 'cpu'
    device = torch.device(dev_id)

    # Get pretrained model
    assert os.path.exists(weight_path)
    pretrained = torch.load(weight_path, map_location=dev_id)

    # 1: Define network
    net = get_instance(pretrained['config']['model']).to(device)
    net.load_state_dict(pretrained['model_state_dict'])

    # 2: Load datasets
    print('Load queries...')
    dataset = AIC2020Track2(query_dir, query_label, train=True)
    dataloader = DataLoader(dataset, batch_size=64)

    print('Extract queries...')
    q_embs, q_labels = extract_embeddings(dataloader, net, device)

    print('Load gallery...')
    dataset = AIC2020Track2(root=gallery_dir, path=gallery_label, train=True)
    dataloader = DataLoader(dataset, batch_size=64)

    print('Extract gallery...')
    g_embs, g_labels = extract_embeddings(dataloader, net, device)

    if mode == 'finetune':
        import optuna

        def objective(trial):
            k1 = trial.suggest_int('k1', 10, 60)
            k2 = trial.suggest_int('k2', 1, 6)
            l = trial.suggest_uniform('lambda', 0.1, 0.9)

            mAP, _ = reid_evaluate(q_embs, g_embs, q_labels, g_labels,
                                   cmc_rank, top_k,
                                   k1, k2, l)
            return -mAP

        study = optuna.create_study()
        study.optimize(objective, n_trials=100)

        print(study.best_params)
    elif mode == 'eval':
        print('Evaluate...')
        cmc_rank = 1
        top_k = 100
        mAP, cmc = reid_evaluate(q_embs, g_embs, q_labels, g_labels,
                                 cmc_rank, top_k, 55, 6, 0.1)
        print(f'mAP@{top_k}={mAP}, cmc@{cmc_rank}={cmc}')
Ejemplo n.º 7
0
 def __init__(self, losses, weights=None):
     super().__init__()
     self.loss_fns = nn.ModuleList(
         [getter.get_instance(loss) for loss in losses])
     if weights is None:
         weights = torch.ones(len(losses))
     elif isinstance(weights, list):
         weights = torch.FloatTensor(weights)
     weights /= weights.sum()
     self.register_buffer('weights', weights)
Ejemplo n.º 8
0
def generate_submission(gpus, weight_path, save_dir,
                        query_dir, gallery_dir, top_k):
    dev_id = 'cuda:{}'.format(gpus) \
        if torch.cuda.is_available() and gpus is not None \
        else 'cpu'
    device = torch.device(dev_id)

    # Get pretrained model
    assert os.path.exists(weight_path)
    pretrained = torch.load(weight_path, map_location=dev_id)

    model_id = pretrained['config']['id'] + '-' + \
        datetime.now().strftime('%Y_%m_%d-%H_%M_%S')

    # 1: Define network
    net = get_instance(pretrained['config']['model']).to(device)
    net.load_state_dict(pretrained['model_state_dict'])

    # 2: Load datasets
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    print('Load queries...')
    dataset = ImageFolderDataset(query_dir, transform)
    dataloader = DataLoader(dataset, batch_size=64)

    print('Extract queries...')
    q_embs, q_ids = extract_embeddings(dataloader, net, device)

    print('Load gallery...')
    dataset = ImageFolderDataset(gallery_dir, transform)
    dataloader = DataLoader(dataset, batch_size=64)

    print('Extract gallery...')
    g_embs, g_ids = extract_embeddings(dataloader, net, device)

    print('Generate...')
    submission = ranking(q_embs, q_ids,
                         g_embs, g_ids,
                         top_k)
    create_submission_file(model_id, save_dir, submission)
Ejemplo n.º 9
0
def predict(config):

    device = xm.xla_device()
    config['dataset']['train']['args']['data_root_dir'] = './data/clean/'
    config['dataset']['train']['args']['infer'] = True
    test_dataset = get_instance(config['dataset']['train'])
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=64,
                                 shuffle=False,
                                 drop_last=False)
    model = pipeline(config)
    checkpoint_path = config['trainer']['cp_dir']
    print('Loading model ...')
    checkpoint = torch.load(checkpoint_path,
                            map_location=lambda storage, loc: storage)
    model.load_state_dict(checkpoint['state_dict'])

    model.to(device)
    model.eval()
    model.freeze()
    print('generating submission ... ')
    res = torch.Tensor().long().cpu()
    for x in tqdm(test_dataloader):
        inps = x['input_ids'].to(device)
        mask = x['attention_mask'].to(device)
        tmp = model(inps, mask)

        tmp = tmp.argmax(-1).cpu()
        res = torch.cat((res, tmp), 0)
    length = res.shape[0]
    if config['model']['args']['nclasses'] == 3:
        res = res + torch.Tensor([3] * length).long()
    else:
        res = res + torch.Tensor([1] * length).long()

    res_csv = res.tolist()
    idx = [x + 1 for x in range(length)]
    df = pd.DataFrame(zip(idx, res_csv), columns=['review_id', 'rating'])
    print(length)
    df.to_csv(config['out'], index=False)
Ejemplo n.º 10
0
 def configure_optimizers(self):
     optimizer = get_instance(self.config['optimizer'],
                              params=self.model.parameters())
     return optimizer
Ejemplo n.º 11
0
 def val_dataloader(self):
     val_dataloader = get_instance(self.config['dataset']['val']['loader'],
                                   dataset=self.val_dataset,
                                   num_workers=4)
     return val_dataloader
Ejemplo n.º 12
0
from utils.getter import get_instance

if __name__ == "__main__":

    # config_path = '/content/shopee-contest2/configs/train/baseline_colab.yaml'
    cp_model_dir = './cp/bert_multi3/best_metric_Accuracy.pth'
    csv_test_dir = './data/clean/test.csv'

    config = torch.load(cp_model_dir).device('cpu')['config']

    dev_id = 'cuda:{}'.format(config['gpus']) \
        if torch.cuda.is_available() and config.get('gpus', None) is not None \
        else 'cpu'
    device = torch.device(dev_id)
    model = get_instance(config['model']).to(device)
    model.load_state_dict(torch.load(cp_model_dir)['model_state_dict'])

    print('load weights-----------------------')

    # Classify
    print('generate submission----------------')

    dataset = get_instance(config['dataset']['infer'])
    dataloader = torch.utils.data.DataLoader(
        dataset, batch_size=5, shuffle=False)

    # tbar.set_description_str(f'{iter_id}: {loss.item()}')

    model.eval()
    with torch.no_grad():
Ejemplo n.º 13
0
 def prepare_data(self):
     self.train_dataset = get_instance(self.config['dataset']['train'])
     self.val_dataset = get_instance(self.config['dataset']['val'])
Ejemplo n.º 14
0
 def __init__(self, config):
     super().__init__()
     self.config = config
     self.model = get_instance(self.config['model'])
     self.loss = get_instance(self.config['loss'])
Ejemplo n.º 15
0
def train(config):
    assert config is not None, "Do not have config file!"

    pprint.PrettyPrinter(indent=2).pprint(config)

    dev_id = 'cuda:{}'.format(config['gpus']) \
        if torch.cuda.is_available() and config.get('gpus', None) is not None \
        else 'cpu'
    device = torch.device(dev_id)

    # Get pretrained model
    pretrained_path = config["pretrained"]

    pretrained = None
    if (pretrained_path != None):
        pretrained = torch.load(pretrained_path, map_location=dev_id)
        for item in ["model"]:
            config[item] = pretrained["config"][item]

    # 1: Load datasets
    set_seed()
    train_dataset = get_instance(config['dataset']['train'])
    train_dataloader = get_instance(config['dataset']['train']['loader'],
                                    dataset=train_dataset)

    val_dataset = get_instance(config['dataset']['val'])
    val_dataloader = get_instance(config['dataset']['val']['loader'],
                                  dataset=val_dataset)

    # 2: Define network
    set_seed()
    model = get_instance(config['model']).to(device)

    # Train from pretrained if it is not None
    if pretrained is not None:
        model.load_state_dict(pretrained['model_state_dict'])

    # 3: Define loss
    criterion = get_instance(config['loss']).to(device)

    # 4: Define Optimizer
    optimizer = get_instance(config['optimizer'],
                             params=model.parameters())
    if pretrained is not None:
        optimizer.load_state_dict(pretrained['optimizer_state_dict'])

    # 5: Define Scheduler
    set_seed()
    scheduler = get_instance(config['scheduler'],
                             optimizer=optimizer)

    # 6: Define metrics
    set_seed()
    metric = {mcfg['name']: get_instance(mcfg,
                                         net=model, device=device)
              for mcfg in config['metric']}

    # 6: Create trainer
    trainer = Trainer(device=device,
                      config=config,
                      model=model,
                      criterion=criterion,
                      optimier=optimizer,
                      scheduler=scheduler,
                      metric=metric)

    # 7: Start to train
    set_seed()
    trainer.train(train_dataloader=train_dataloader,
                  val_dataloader=val_dataloader)
Ejemplo n.º 16
0
 def __init__(self, extractor):
     super().__init__()
     self.embedding_net = getter.get_instance(extractor)
     self.feature_dim = self.embedding_net.feature_dim
Ejemplo n.º 17
0
def train(config):
    assert config is not None, "Do not have config file!"

    pprint.PrettyPrinter(indent=2).pprint(config)

    # Get device
    dev_id = 'cuda:{}'.format(config['gpus']) \
        if torch.cuda.is_available() and config.get('gpus', None) is not None \
        else 'cpu'
    device = torch.device(dev_id)

    # Get pretrained model
    pretrained_path = config["pretrained"]

    pretrained = None
    if (str(pretrained_path) != 'None'):
        pretrained = torch.load(pretrained_path, map_location=dev_id)
        # for item in ["model"]:
        #     config[item] = pretrained["config"][item]

    # 1: Load datasets
    train_dataloader, val_dataloader = \
        get_data(config['dataset'], config['seed'])

    # 2: Define network
    set_seed(config['seed'])
    model = get_instance(config['model']).to(device)

    # if config['parallel']:
    #     print("Load parallel model")
    #     model = nn.DataParallel(model)

    # Train from pretrained if it is not None
    if pretrained is not None:
        pretrained = torch.load(pretrained_path)
        if 'model_state_dict' in pretrained:
            model.load_state_dict(pretrained['model_state_dict'])
        else:
            print("Load model case 2")
            try:
                ret = model.load_state_dict(pretrained, strict=False)
            except RuntimeError as e:
                print(f'[Warning] Ignoring {e}')
                print(
                    '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.'
                )

    # 3: Define loss
    set_seed(config['seed'])
    criterion = get_instance(config['loss']).to(device)
    criterion.device = device

    # 4: Define Optimizer
    set_seed(config['seed'])
    optimizer = get_instance(config['optimizer'], params=model.parameters())
    # 5: Define Scheduler
    set_seed(config['seed'])
    scheduler = get_instance(config['scheduler'], optimizer=optimizer)

    # 6: Define metrics
    set_seed(config['seed'])
    metric = {mcfg['name']: get_instance(mcfg) for mcfg in config['metric']}

    # 6: Create trainer
    set_seed(config['seed'])
    trainer = Trainer(device=device,
                      config=config,
                      model=model,
                      criterion=criterion,
                      optimizer=optimizer,
                      scheduler=scheduler,
                      metric=metric)

    # 7: Start to train
    set_seed(config['seed'])
    trainer.train(train_dataloader=train_dataloader,
                  val_dataloader=val_dataloader)
Ejemplo n.º 18
0
 def __init__(self, extractor_cfg, nclasses):
     super().__init__()
     self.nclasses = nclasses
     self.extractor = getter.get_instance(extractor_cfg)
     self.feature_dim = self.extractor.feature_dim
     self.classifier = nn.Linear(self.feature_dim, self.nclasses)
 def __init__(self, margin, pair_selector):
     super().__init__()
     self.margin = margin
     self.pair_selector = getter.get_instance(pair_selector)
Ejemplo n.º 20
0
 def __init__(self, margin, selector):
     super().__init__()
     self.margin = margin
     self.triplet_selector = getter.get_instance(selector)
     self.cls = nn.Linear(1280, 333)
Ejemplo n.º 21
0
 def __init__(self, margin, selector):
     super(OnlineTripletLoss, self).__init__()
     self.margin = margin
     self.triplet_selector = getter.get_instance(selector)
Ejemplo n.º 22
0
                    help='(single) GPU to use (default: None)')
parser.add_argument('-b',
                    type=int,
                    default=64,
                    help='batch size (default: 64)')
args = parser.parse_args()

# Device
dev_id = 'cuda:{}'.format(args.g) \
    if torch.cuda.is_available() and args.g is not None \
    else 'cpu'
device = torch.device(dev_id)

# Load model
config = torch.load(args.w, map_location=dev_id)
model = get_instance(config['config']['model']).to(device)
model.load_state_dict(config['model_state_dict'])

# Load data
dataset = ShopeeDataset(img_dir=args.d, csv_path=args.c, is_train=False)
dataloader = DataLoader(dataset, batch_size=args.b)

# Metrics
metrics = {
    'Accuracy': Accuracy(),
    'ConfusionMatrix': ConfusionMatrix(nclasses=42),
}

with torch.no_grad():
    for m in metrics.values():
        m.reset()
Ejemplo n.º 23
0
 def __init__(self, extractor_cfg):
     super().__init__()
     self.extractor = getter.get_instance(extractor_cfg)
     self.feature_dim = self.extractor.feature_dim