Ejemplo n.º 1
0
def get_coefs(datacfg, darknetcfg, learnetcfg, weightfile):
    options = read_data_cfg(datacfg)
    metadict = options['meta']

    m = Darknet(darknetcfg, learnetcfg)
    m.print_network()
    m.load_weights(weightfile)
    m.cuda()
    m.eval()

    kwargs = {'num_workers': 4, 'pin_memory': True}

    metaset = dataset.MetaDataset(metafiles=metadict,
                                  train=False,
                                  ensemble=True,
                                  with_ids=True)
    metaloader = torch.utils.data.DataLoader(metaset,
                                             batch_size=64,
                                             shuffle=False,
                                             **kwargs)
    n_cls = len(metaset.classes)

    coef = [[[] for j in range(n_cls)] for i in range(3)]
    cnt = [0.0] * n_cls
    print('===> Generating dynamic weights...')
    kkk = 0
    for metax, mask, clsids in metaloader:
        print('===> {}/{}'.format(kkk, len(metaset) // 64))
        kkk += 1
        metax, mask = metax.cuda(), mask.cuda()
        metax, mask = Variable(metax, volatile=True), Variable(mask,
                                                               volatile=True)
        dws = m.meta_forward(metax, mask)
        for ci, c in enumerate(clsids):
            for i in range(3):
                coef[i][c].append(dws[i][ci].data.squeeze().cpu().numpy())

    return coef
Ejemplo n.º 2
0
def valid(datacfg,
          darknetcfg,
          learnetcfg,
          weightfile,
          outfile,
          traindict,
          use_baserw=False):
    options = read_data_cfg(datacfg)
    valid_images = options['valid']
    metadict = traindict  #options['meta']
    # name_list = options['names']
    # backup = cfg.backup
    ckpt = weightfile.split('/')[-1].split('.')[0]
    backup = weightfile.split('/')[-2]
    ckpt_pre = '/ene_' if use_baserw else '/ene'
    prefix = 'results/' + backup.split('/')[-1] + ckpt_pre + ckpt
    print('saving to: ' + prefix)
    # prefix = 'results/' + weightfile.split('/')[1]
    # names = load_class_names(name_list)

    with open(valid_images) as fp:
        tmp_files = fp.readlines()
        valid_files = [item.rstrip() for item in tmp_files]

    m = Darknet(darknetcfg, learnetcfg)
    m.print_network()
    m.load_weights(weightfile)
    m.cuda()
    m.eval()

    valid_dataset = dataset.listDataset(valid_images,
                                        shape=(m.width, m.height),
                                        shuffle=False,
                                        transform=transforms.Compose([
                                            transforms.ToTensor(),
                                        ]))
    valid_batchsize = 2
    assert (valid_batchsize > 1)

    kwargs = {'num_workers': 4, 'pin_memory': True}
    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=valid_batchsize,
                                               shuffle=False,
                                               **kwargs)

    if False:
        metaset = dataset.MetaDataset(metafiles=metadict,
                                      train=False,
                                      ensemble=True)
        metaloader = torch.utils.data.DataLoader(metaset,
                                                 batch_size=len(metaset),
                                                 shuffle=False,
                                                 **kwargs)
        metaloader = iter(metaloader)
        n_cls = len(metaset.classes)

        print('===> Generating dynamic weights...')
        metax, mask = metaloader.next()
        metax, mask = metax.cuda(), mask.cuda()
        metax, mask = Variable(metax, volatile=True), Variable(mask,
                                                               volatile=True)
        dynamic_weights = m.meta_forward(metax, mask)

        for i in range(len(dynamic_weights)):
            assert dynamic_weights[i].size(0) == sum(metaset.meta_cnts)
            inds = np.cumsum([0] + metaset.meta_cnts)
            new_weight = []
            for j in range(len(metaset.meta_cnts)):
                new_weight.append(
                    torch.mean(dynamic_weights[i][inds[j]:inds[j + 1]], dim=0))
            dynamic_weights[i] = torch.stack(new_weight)
            print(dynamic_weights[i].shape)
    else:

        metaset = dataset.MetaDataset(metafiles=metadict,
                                      train=False,
                                      ensemble=True,
                                      with_ids=True)
        metaloader = torch.utils.data.DataLoader(metaset,
                                                 batch_size=64,
                                                 shuffle=False,
                                                 **kwargs)
        # metaloader = iter(metaloader)
        n_cls = len(metaset.classes)
        print(n_cls)

        enews = [0.0] * n_cls
        cnt = [0.0] * n_cls
        print('===> Generating dynamic weights...')
        kkk = 0
        #         import pdb; pdb.set_trace
        for metax, mask, clsids in metaloader:
            print('===> {}/{}'.format(kkk, len(metaset) // 64))
            kkk += 1
            metax, mask = metax.cuda(), mask.cuda()
            metax, mask = Variable(metax,
                                   volatile=True), Variable(mask,
                                                            volatile=True)
            dws = m.meta_forward(metax, mask)
            dw = dws[0]
            for ci, c in enumerate(clsids):
                #                 print(ci, c, enews[c], cnt[c], dw[ci])
                enews[c] = enews[c] * cnt[c] / (cnt[c] +
                                                1) + dw[ci] / (cnt[c] + 1)
                cnt[c] += 1
        dynamic_weights = [torch.stack(enews)]
        #         import pdb; pdb.set_trace()
        #         import pickle
        #         with open('dynamic_weights.pkl', 'wb') as f:
        #             tmp = [x.data.cpu().numpy() for x in dynamic_weights]
        #             pickle.dump(tmp, f)
        #         import pdb; pdb.set_trace()

        if use_baserw:
            import pickle
            # f = 'data/rws/voc_novel{}_.pkl'.format(cfg.novelid)
            f = 'dynamic_weights.pkl'.format(0)
            print('===> Loading from {}...'.format(f))
            with open(f, 'rb') as f:
                # with open('data/rws/voc_novel0_.pkl', 'rb') as f:
                rws = pickle.load(f)
                dynamic_weights = [
                    Variable(torch.from_numpy(rw)).cuda() for rw in rws
                ]
        #             tki = cfg._real_base_ids
        #             for i in range(len(rws)):
        #                 dynamic_weights[i][tki] = rws[i][tki]
        # dynamic_weights[i] = rws[i]

    if not os.path.exists(prefix):
        # os.mkdir(prefix)
        os.makedirs(prefix)

    fps = [0] * n_cls
    for i, cls_name in enumerate(metaset.classes):
        buf = '%s/%s%s.txt' % (prefix, outfile, cls_name)
        fps[i] = open(buf, 'w')

    lineId = -1

    conf_thresh = 0.005
    nms_thresh = 0.45
    for batch_idx, (data, target) in enumerate(valid_loader):
        #         import pdb; pdb.set_trace()
        data = data.cuda()
        data = Variable(data, volatile=True)
        output = m.detect_forward(data, dynamic_weights)

        if isinstance(output, tuple):
            output = (output[0].data, output[1].data)
        else:
            output = output.data

        # import pdb; pdb.set_trace()
        batch_boxes = get_region_boxes_v2(output, n_cls, conf_thresh,
                                          m.num_classes, m.anchors,
                                          m.num_anchors, 0, 1)

        if isinstance(output, tuple):
            bs = output[0].size(0)
        else:
            assert output.size(0) % n_cls == 0
            bs = output.size(0) // n_cls


#         import pdb; pdb.set_trace()
        for b in range(bs):
            lineId = lineId + 1
            imgpath = valid_dataset.lines[lineId].rstrip()
            print(imgpath)
            imgid = os.path.basename(imgpath).split('.')[0]
            width, height = get_image_size(imgpath)
            for i in range(n_cls):
                # oi = i * bs + b
                oi = b * n_cls + i
                boxes = batch_boxes[oi]
                boxes = nms(boxes, nms_thresh)
                for box in boxes:
                    x1 = (box[0] - box[2] / 2.0) * width
                    y1 = (box[1] - box[3] / 2.0) * height
                    x2 = (box[0] + box[2] / 2.0) * width
                    y2 = (box[1] + box[3] / 2.0) * height

                    det_conf = box[4]
                    for j in range((len(box) - 5) / 2):
                        cls_conf = box[5 + 2 * j]
                        cls_id = box[6 + 2 * j]
                        prob = det_conf * cls_conf
                        fps[i].write('%s %f %f %f %f %f\n' %
                                     (imgid, prob, x1, y1, x2, y2))

    for i in range(n_cls):
        fps[i].close()
Ejemplo n.º 3
0
def valid(datacfg, darknetcfg, learnetcfg, weightfile, outfile):
    options = read_data_cfg(datacfg)
    valid_images = options['valid']
    metadict = options['meta']
    # name_list = options['names']
    # backup = cfg.backup
    ckpt = weightfile.split('/')[-1].split('.')[0]
    backup = weightfile.split('/')[-2]
    prefix = 'results/' + backup.split('/')[-1] + '/e' + ckpt
    print('saving to: ' + prefix)
    # prefix = 'results/' + weightfile.split('/')[1]
    # names = load_class_names(name_list)

    with open(valid_images) as fp:
        tmp_files = fp.readlines()
        valid_files = [item.rstrip() for item in tmp_files]
    
    m = Darknet(darknetcfg, learnetcfg)
    m.print_network()
    m.load_weights(weightfile)
    m.cuda()
    m.eval()
    torch.set_grad_enabled(False)

    valid_dataset = dataset.listDataset(valid_images, shape=(m.width, m.height),
                       shuffle=False,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                       ]))
    valid_batchsize = 2
    assert(valid_batchsize > 1)

    kwargs = {'num_workers': 4, 'pin_memory': True}
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=valid_batchsize, shuffle=False, **kwargs) 

    metaset = dataset.MetaDataset(metafiles=metadict, train=False)
    metaloader = torch.utils.data.DataLoader(
        metaset,
        batch_size=metaset.batch_size,
        shuffle=False,
        **kwargs
    )
    metaloader = iter(metaloader)
    n_cls = len(metaset.classes)

    if not os.path.exists(prefix):
        # os.mkdir(prefix)
        os.makedirs(prefix)

    fps = [0]*n_cls
    for i, cls_name in enumerate(metaset.classes):
        buf = '%s/%s%s.txt' % (prefix, outfile, cls_name)
        fps[i] = open(buf, 'w')
   
    lineId = -1
    
    conf_thresh = 0.005
    nms_thresh = 0.45
    for batch_idx, (data, target) in enumerate(valid_loader):
        metax, mask = metaloader.next()
        # print(ids)
        data = data.cuda()
        mask = mask.cuda()
        metax = metax.cuda()
        data = Variable(data)
        mask = Variable(mask)
        metax = Variable(metax)
        output = m(data, metax, mask)

        if isinstance(output, tuple):
            output = (output[0].data, output[1].data)
        else:
            output = output.data
 
        batch_boxes = get_region_boxes_v2(output, n_cls, conf_thresh, m.num_classes, m.anchors, m.num_anchors, 0, 1)

        if isinstance(output, tuple):
            bs = output[0].size(0)
        else:
            assert output.size(0) % n_cls == 0
            bs = output.size(0) // n_cls

        for b in range(bs):
            lineId = lineId + 1
            imgpath = valid_dataset.lines[lineId].rstrip()
            print(imgpath)
            imgid = os.path.basename(imgpath).split('.')[0]
            width, height = get_image_size(imgpath)
            for i in range(n_cls):
                # oi = i * bs + b
                oi = b * n_cls + i
                boxes = batch_boxes[oi]
                boxes = nms(boxes, nms_thresh)
                for box in boxes:
                    x1 = (box[0] - box[2]/2.0) * width
                    y1 = (box[1] - box[3]/2.0) * height
                    x2 = (box[0] + box[2]/2.0) * width
                    y2 = (box[1] + box[3]/2.0) * height

                    det_conf = box[4]
                    for j in range((len(box)-5)/2):
                        cls_conf = box[5+2*j]
                        cls_id = box[6+2*j]
                        prob =det_conf * cls_conf
                        fps[i].write('%s %f %f %f %f %f\n' % (imgid, prob, x1, y1, x2, y2))

    for i in range(n_cls):
        fps[i].close()
Ejemplo n.º 4
0
def train(epoch):
    global processed_batches
    t0 = time.time()
    if ngpus > 1:
        cur_model = model.module
    else:
        cur_model = model

    train_loader = torch.utils.data.DataLoader(dataset.listDataset(
        trainlist,
        shape=(init_width, init_height),
        shuffle=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
        ]),
        train=True,
        seen=cur_model.seen,
        batch_size=batch_size,
        num_workers=num_workers),
                                               batch_size=batch_size,
                                               shuffle=False,
                                               **kwargs)

    metaset = dataset.MetaDataset(metafiles=metadict, train=True)
    metaloader = torch.utils.data.DataLoader(metaset,
                                             batch_size=metaset.batch_size,
                                             shuffle=False,
                                             num_workers=num_workers,
                                             pin_memory=True)
    metaloader = iter(metaloader)

    lr = adjust_learning_rate(optimizer, processed_batches)
    logging('epoch %d/%d, processed %d samples, lr %f' %
            (epoch, max_epochs, epoch * len(train_loader.dataset), lr))

    model.train()
    t1 = time.time()
    avg_time = torch.zeros(9)
    for batch_idx, (data, target) in enumerate(train_loader):
        metax, mask = metaloader.next()
        t2 = time.time()
        adjust_learning_rate(optimizer, processed_batches)
        processed_batches = processed_batches + 1

        if use_cuda:
            data = data.cuda()
            metax = metax.cuda()
            mask = mask.cuda()
            #target= target.cuda()
        t3 = time.time()
        data, target = Variable(data), Variable(target)
        metax, mask = Variable(metax), Variable(mask)
        t4 = time.time()
        optimizer.zero_grad()
        t5 = time.time()
        output = model(data, metax, mask)
        t6 = time.time()
        region_loss.seen = region_loss.seen + data.data.size(0)
        loss = region_loss(output, target)
        t7 = time.time()
        loss.backward()
        t8 = time.time()
        optimizer.step()
        t9 = time.time()
        if False and batch_idx > 1:
            avg_time[0] = avg_time[0] + (t2 - t1)
            avg_time[1] = avg_time[1] + (t3 - t2)
            avg_time[2] = avg_time[2] + (t4 - t3)
            avg_time[3] = avg_time[3] + (t5 - t4)
            avg_time[4] = avg_time[4] + (t6 - t5)
            avg_time[5] = avg_time[5] + (t7 - t6)
            avg_time[6] = avg_time[6] + (t8 - t7)
            avg_time[7] = avg_time[7] + (t9 - t8)
            avg_time[8] = avg_time[8] + (t9 - t1)
            print('-------------------------------')
            print('       load data : %f' % (avg_time[0] / (batch_idx)))
            print('     cpu to cuda : %f' % (avg_time[1] / (batch_idx)))
            print('cuda to variable : %f' % (avg_time[2] / (batch_idx)))
            print('       zero_grad : %f' % (avg_time[3] / (batch_idx)))
            print(' forward feature : %f' % (avg_time[4] / (batch_idx)))
            print('    forward loss : %f' % (avg_time[5] / (batch_idx)))
            print('        backward : %f' % (avg_time[6] / (batch_idx)))
            print('            step : %f' % (avg_time[7] / (batch_idx)))
            print('           total : %f' % (avg_time[8] / (batch_idx)))
        t1 = time.time()
    print('')
    t1 = time.time()
    logging('training with %f samples/s' % (len(train_loader.dataset) /
                                            (t1 - t0)))

    if (epoch + 1) % cfg.save_interval == 0:
        logging('save weights to %s/%06d.weights' % (backupdir, epoch + 1))
        cur_model.seen = (epoch + 1) * len(train_loader.dataset)
        cur_model.save_weights('%s/%06d.weights' % (backupdir, epoch + 1))
Ejemplo n.º 5
0
print(num_workers)

kwargs = {'num_workers': num_workers, 'pin_memory': True} if use_cuda else {}
test_loader = torch.utils.data.DataLoader(dataset.listDataset(
    testlist,
    shape=(init_width, init_height),
    shuffle=False,
    transform=transforms.Compose([
        transforms.ToTensor(),
    ]),
    train=False),
                                          batch_size=batch_size,
                                          shuffle=False,
                                          **kwargs)

test_metaset = dataset.MetaDataset(metafiles=metadict, train=True)
test_metaloader = torch.utils.data.DataLoader(
    test_metaset,
    batch_size=test_metaset.batch_size,
    shuffle=False,
    num_workers=num_workers // 2,
    pin_memory=True)

# Adjust learning rate
factor = len(test_metaset.classes)
if cfg.neg_ratio == 'full':
    factor = 15.
elif cfg.neg_ratio == 1:
    factor = 3.0
elif cfg.neg_ratio == 0:
    factor = 1.5
Ejemplo n.º 6
0
def valid(datacfg, darknetcfg, learnetcfg, weightfile, outfile, use_baserw=False):
    options = read_data_cfg(datacfg)
    valid_images = options['valid']
    metadict = options['meta']
    # name_list = options['names']
    # backup = cfg.backup
    ckpt = weightfile.split('/')[-1].split('.')[0]
    backup = weightfile.split('/')[-2]
    ckpt_pre = '/ene_' if use_baserw else '/ene'
    prefix = 'results/' + backup.split('/')[-1] + ckpt_pre + ckpt
    print('saving to: ' + prefix)
    # prefix = 'results/' + weightfile.split('/')[1]
    # names = load_class_names(name_list)

    with open(valid_images) as fp:
        tmp_files = fp.readlines()
        valid_files = [item.rstrip() for item in tmp_files]

    m = Darknet(darknetcfg, learnetcfg)
    m.print_network()
    m.load_weights(weightfile)
    m.cuda()
    m.eval()

    kwargs = {'num_workers': 4, 'pin_memory': True}

    metaset = dataset.MetaDataset(metafiles=metadict, train=False, ensemble=True, with_ids=True)
    metaloader = torch.utils.data.DataLoader(
        metaset,
        batch_size=64,
        shuffle=False,
        **kwargs
    )
    # metaloader = iter(metaloader)
    n_cls = len(metaset.classes)

    coef = [[[] for j in range(n_cls)] for i in range(3)]
    cnt = [0.0] * n_cls
    print('===> Generating dynamic weights...')
    kkk = 0
    for metax, mask, clsids in metaloader:
        print('===> {}/{}'.format(kkk, len(metaset) // 64))
        kkk += 1
        metax, mask = metax.cuda(), mask.cuda()
        metax, mask = Variable(metax, volatile=True), Variable(mask, volatile=True)
        dws = m.meta_forward(metax, mask)
        for ci, c in enumerate(clsids):
            for i in range(3):
                coef[i][c].append(dws[i][ci])

    outfile = './reweight_coef.data'
    with open(outfile, 'w') as f:
        for c in range(n_cls):
            print('processing %s' % classes[c])
            f.write(classes[c] + '\n')

            for i in range(3):
                f.write('coef%d\n' % i)
                for dw in coef[i][c]:
                    for n in dw:
                        f.write('%e ' % n.data[0])
                    f.write('\n')
Ejemplo n.º 7
0
def train(epoch):
    global processed_batches

    ic("Training...")
    t0 = time.time()
    if ngpus > 1:
        cur_model = model.module
    else:
        cur_model = model

    train_loader = torch.utils.data.DataLoader(dataset.listDataset(
        trainlist,
        shape=(init_width, init_height),
        shuffle=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
        ]),
        train=True,
        seen=cur_model.seen,
        batch_size=actual_bs,
        num_workers=num_workers),
                                               batch_size=actual_bs,
                                               shuffle=False,
                                               **kwargs)

    metaset = dataset.MetaDataset(metafiles=metadict, train=True)
    metaloader = torch.utils.data.DataLoader(metaset,
                                             batch_size=metaset.batch_size,
                                             shuffle=False,
                                             num_workers=num_workers,
                                             pin_memory=True)
    metaloader = iter(metaloader)

    lr = adjust_learning_rate(optimizer, processed_batches)
    logging('epoch %d/%d, processed %d samples, lr %.8f' %
            (epoch, max_epochs, epoch * len(train_loader.dataset), lr))
    logging('processed_batches %d' % (processed_batches))

    # AA I can't fit batch size 64 on my GPU, so I'm using gradient accumulation
    # to account for this
    accumulate_gradients = actual_bs != batch_size
    ic(accumulate_gradients)
    # Check that our effective bs is evenly divisible by our actual
    assert (batch_size % actual_bs == 0)
    accumulate_step = batch_size // actual_bs
    ic(accumulate_step)

    # B/c we're not validating, use loss as the LR scheduler trigger
    losses = []

    model.train()
    t1 = time.time()
    avg_time = torch.zeros(9)
    optimizer.zero_grad()
    for batch_idx, (data, target) in enumerate(train_loader):
        #ic("iter")
        metax, mask = metaloader.next()
        t2 = time.time()
        adjust_learning_rate(optimizer, processed_batches)
        if (batch_idx + 1) % accumulate_step == 0:
            processed_batches = processed_batches + 1

        if use_cuda:
            data = data.cuda()
            metax = metax.cuda()
            mask = mask.cuda()
            #target= target.cuda()
        t3 = time.time()
        data, target = Variable(data), Variable(target)
        metax, mask = Variable(metax), Variable(mask)
        t4 = time.time()
        t5 = time.time()
        output = model(data, metax, mask)
        t6 = time.time()
        region_loss.seen = region_loss.seen + data.data.size(0)
        loss = region_loss(output, target)
        t7 = time.time()
        loss.backward()
        losses.append(loss.item())
        t8 = time.time()
        if (batch_idx + 1) % accumulate_step == 0:
            #ic("step")
            optimizer.step()
            optimizer.zero_grad()
        t9 = time.time()
        if False and batch_idx > 1:
            avg_time[0] = avg_time[0] + (t2 - t1)
            avg_time[1] = avg_time[1] + (t3 - t2)
            avg_time[2] = avg_time[2] + (t4 - t3)
            avg_time[3] = avg_time[3] + (t5 - t4)
            avg_time[4] = avg_time[4] + (t6 - t5)
            avg_time[5] = avg_time[5] + (t7 - t6)
            avg_time[6] = avg_time[6] + (t8 - t7)
            avg_time[7] = avg_time[7] + (t9 - t8)
            avg_time[8] = avg_time[8] + (t9 - t1)
            print('-------------------------------')
            print('       load data : %f' % (avg_time[0] / (batch_idx)))
            print('     cpu to cuda : %f' % (avg_time[1] / (batch_idx)))
            print('cuda to variable : %f' % (avg_time[2] / (batch_idx)))
            print('       zero_grad : %f' % (avg_time[3] / (batch_idx)))
            print(' forward feature : %f' % (avg_time[4] / (batch_idx)))
            print('    forward loss : %f' % (avg_time[5] / (batch_idx)))
            print('        backward : %f' % (avg_time[6] / (batch_idx)))
            print('            step : %f' % (avg_time[7] / (batch_idx)))
            print('           total : %f' % (avg_time[8] / (batch_idx)))
        t1 = time.time()
    print('')
    t1 = time.time()
    logging('training with %f samples/s' % (len(train_loader.dataset) /
                                            (t1 - t0)))

    avg_loss = np.mean(losses)
    logging('Average epoch loss: %f' % avg_loss)
    scheduler.step(avg_loss)

    if (epoch + 1) % cfg.save_interval == 0:
        logging('save weights to %s/%06d.weights' % (backupdir, epoch + 1))
        cur_model.seen = (epoch + 1) * len(train_loader.dataset)
        cur_model.save_weights('%s/%06d.weights' % (backupdir, epoch + 1))

    if epoch == max_epochs - 1:
        print("Writing final model weights")
        cur_model.save_weights('%s/model_final.weights' % backupdir)
Ejemplo n.º 8
0
def train(epoch):
    global processed_batches
    t0 = time.time()
    if ngpus > 1:
        cur_model = model.module
    else:
        cur_model = model

    train_loader = torch.utils.data.DataLoader(
        dataset.listDataset(
            trainlist,
            shape=(init_width, init_height),
            shuffle=True,
            transform=transforms.Compose([
                # transforms.Resize([448, 448]),
                transforms.ToTensor(),
            ]),
            train=True,
            seen=cur_model.seen,
            batch_size=batch_size,
            num_workers=num_workers),
        batch_size=batch_size,
        shuffle=False,
        **kwargs)
    # print("block b nw is: ", batch_size, num_workers)
    metaset = dataset.MetaDataset(metafiles=metadict,
                                  train=True,
                                  num_workers=num_workers)
    metaloader = torch.utils.data.DataLoader(
        metaset,
        batch_size=metaset.batch_size,
        # batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True  ####################
    )
    # print("meta b nw is: ", batch_size, num_workers)
    metaloader = iter(metaloader)

    lr = adjust_learning_rate(optimizer, processed_batches)
    logging('epoch %d/%d, processed %d samples, lr %f' %
            (epoch, max_epochs, epoch * len(train_loader.dataset), lr))

    model.train()
    # t1 = time.time()
    avg_time = torch.zeros(9)
    _len = len(train_loader)
    for batch_idx, (data, target) in enumerate(tqdm(train_loader)):
        # t_data = time.time()
        metax, mask = metaloader.next()
        # t2 = time.time()
        adjust_learning_rate(optimizer, processed_batches)
        processed_batches = processed_batches + 1

        if use_cuda:
            data = data.cuda()
            metax = metax.cuda()
            mask = mask.cuda()
            #target= target.cuda()
        # t3 = time.time()
        data, target = Variable(data), Variable(target)
        metax, mask = Variable(metax), Variable(mask)
        # t4 = time.time()
        optimizer.zero_grad()
        # t5 = time.time()
        # print("input data shape: ", [data.shape, metax.shape, mask.shape])
        output = model(data.float(), metax.float(),
                       mask.float())  # torch.Size([1, 30, 13, 13])

        # t6 = time.time()
        region_loss.seen = region_loss.seen + data.data.size(0)
        # ("target shape :", target.shape)
        loss_total, loss, printout, cur_step = region_loss(
            output, target.float(), use_cuda)

        # t7 = time.time()
        loss_total.backward()
        # t8 = time.time()
        optimizer.step()
        # t9 = time.time()
        # print(f"t_data:{t_data - t1}, t_meta:{t2 - t_data}")
        if False and batch_idx > 1:
            avg_time[0] = avg_time[0] + (t2 - t1)
            avg_time[1] = avg_time[1] + (t3 - t2)
            avg_time[2] = avg_time[2] + (t4 - t3)
            avg_time[3] = avg_time[3] + (t5 - t4)
            avg_time[4] = avg_time[4] + (t6 - t5)
            avg_time[5] = avg_time[5] + (t7 - t6)
            avg_time[6] = avg_time[6] + (t8 - t7)
            avg_time[7] = avg_time[7] + (t9 - t8)
            avg_time[8] = avg_time[8] + (t9 - t1)
            print('-------------------------------')
            print('       load data : %f' % (avg_time[0] / (batch_idx)))
            print('     cpu to cuda : %f' % (avg_time[1] / (batch_idx)))
            print('cuda to variable : %f' % (avg_time[2] / (batch_idx)))
            print('       zero_grad : %f' % (avg_time[3] / (batch_idx)))
            print(' forward feature : %f' % (avg_time[4] / (batch_idx)))
            print('    forward loss : %f' % (avg_time[5] / (batch_idx)))
            print('        backward : %f' % (avg_time[6] / (batch_idx)))
            print('            step : %f' % (avg_time[7] / (batch_idx)))
            print('           total : %f' % (avg_time[8] / (batch_idx)))

        # t1 = time.time()

        writer.add_scalar("scalar/trainLoss", loss_total.item(), cur_step)
        writer.add_scalars(
            "scalar/separatedLoss", {
                "loss_conf": loss["loss_conf"].item(),
                "loss_cls": loss["loss_cls"].item()
            }, cur_step)
        writer.add_scalar("scalar/trainLr", lr, cur_step)
        if batch_idx % 301 == 300:
            print(str(epoch + 1) + '->' + printout)

            logging('save weights to %s/%06d_%06d.weights' %
                    (backupdir, epoch + 1, batch_idx))
            cur_model.seen = (epoch + 1) * len(train_loader.dataset)
            cur_model.save_weights('%s/%06d_%06d.weights' %
                                   (backupdir, epoch + 1, batch_idx))
            print("save checkpoint finished!")

#         del loss_total, loss, cur_step
#         torch.cuda.empty_cache()

# print('')
    t1 = time.time()
    logging('training with %f samples/s' % (len(train_loader.dataset) /
                                            (t1 - t0)))

    if (epoch + 1) % save_interval == 0:

        # torch.save({'epoch': epoch + 1, 'state_dict': model.state_dict(),
        #             'optimizer': optimizer.state_dict()},
        #            checkpoint_path + '/m-' + launchTimestamp + '-' + str(epoch+1) + 'epoch-' + str("%.4f" % loss_total.data) + '.pth.tar')

        logging('save weights to %s/%06d.weights' % (backupdir, epoch + 1))
        cur_model.seen = (epoch + 1) * len(train_loader.dataset)
        cur_model.save_weights('%s/%06d.weights' % (backupdir, epoch + 1))
        print("save checkpoint finished!")
def train(epoch):
    global processed_batches
    t0 = time.time()
    if ngpus > 1:
        cur_model = model.module
    else:
        cur_model = model

    train_loader = torch.utils.data.DataLoader(
        dataset.listDataset(
            trainlist,
            shape=(init_width, init_height),
            shuffle=False,
            transform=transforms.Compose([
                transforms.ToTensor(),
                # lambda x: 2 * (x - 0.5)
            ]),
            train=True,
            seen=cur_model.seen,
            batch_size=batch_size,
            num_workers=num_workers),
        batch_size=batch_size,
        shuffle=False,
        **kwargs)

    metaset = dataset.MetaDataset(metafiles=metadict, train=True)
    metaloader = torch.utils.data.DataLoader(metaset,
                                             batch_size=metaset.batch_size,
                                             shuffle=False,
                                             num_workers=num_workers,
                                             pin_memory=True)
    metaloader = iter(metaloader)

    lr = adjust_learning_rate(optimizer, processed_batches)
    logging('epoch %d/%d, processed %d samples, lr %f' %
            (epoch, max_epochs, epoch * len(train_loader.dataset), lr))
    model.train()
    t1 = time.time()
    avg_time = torch.zeros(9)
    device = torch.device('cuda')

    pbar = tqdm(dynamic_ncols=True, total=int(len(train_loader)))
    for batch_idx, (data, target) in enumerate(train_loader):
        metax, mask = metaloader.next()
        db.printTensor(metax)
        db.printTensor(data)
        t2 = time.time()
        adjust_learning_rate(optimizer, processed_batches)
        processed_batches = processed_batches + 1
        data = data.to(device, non_blocking=True)
        metax = metax.to(device, non_blocking=True)
        mask = mask.to(device, non_blocking=True)
        t3 = time.time()
        t4 = time.time()
        optimizer.zero_grad()
        t5 = time.time()
        output = model(data, metax, mask)
        t6 = time.time()
        region_loss.seen = region_loss.seen + data.data.size(0)
        del data, metax, mask
        loss = region_loss(output, target)
        del output, target
        t7 = time.time()
        loss.backward()
        t8 = time.time()
        optimizer.step()
        t9 = time.time()
        status = '{} :: E: {} / {} :: iter: {} :: lr: {:.1e} :: L: {:.4f} '.format(
            'train', epoch, max_epochs, region_loss.seen, lr, loss.item())
        pbar.set_description(status, refresh=False)
        pbar.update(1)
        del loss
        if False and batch_idx > 1:
            avg_time[0] = avg_time[0] + (t2 - t1)
            avg_time[1] = avg_time[1] + (t3 - t2)
            avg_time[2] = avg_time[2] + (t4 - t3)
            avg_time[3] = avg_time[3] + (t5 - t4)
            avg_time[4] = avg_time[4] + (t6 - t5)
            avg_time[5] = avg_time[5] + (t7 - t6)
            avg_time[6] = avg_time[6] + (t8 - t7)
            avg_time[7] = avg_time[7] + (t9 - t8)
            avg_time[8] = avg_time[8] + (t9 - t1)
            print('-------------------------------')
            print('       load data : %f' % (avg_time[0] / (batch_idx)))
            print('     cpu to cuda : %f' % (avg_time[1] / (batch_idx)))
            print('cuda to variable : %f' % (avg_time[2] / (batch_idx)))
            print('       zero_grad : %f' % (avg_time[3] / (batch_idx)))
            print(' forward feature : %f' % (avg_time[4] / (batch_idx)))
            print('    forward loss : %f' % (avg_time[5] / (batch_idx)))
            print('        backward : %f' % (avg_time[6] / (batch_idx)))
            print('            step : %f' % (avg_time[7] / (batch_idx)))
            print('           total : %f' % (avg_time[8] / (batch_idx)))
        t1 = time.time()
    pbar.close()
    print('')
    t1 = time.time()
    logging('training with %f samples/s' % (len(train_loader.dataset) /
                                            (t1 - t0)))

    if (epoch + 1) % cfg.save_interval == 0:
        logging('save weights to %s/%06d.weights' % (backupdir, epoch + 1))
        cur_model.seen = (epoch + 1) * len(train_loader.dataset)
        cur_model.save_weights('%s/%06d.weights' % (backupdir, epoch + 1))