Esempio n. 1
0
def train(config):
    config.check_constraint()
    cudnn.benchmark = True
    assert config.model_type in ['U_Net']

    # Create directories if not exist
    if not os.path.exists(config.model_path):
        os.makedirs(config.model_path)
    if not os.path.exists(config.result_path):
        os.makedirs(config.result_path)
    if not os.path.exists(config.result_path):
        os.makedirs(config.result_path)

    logging.info(config)

    train_loader = get_loader(config, mode='train')
    valid_loader = get_loader(config, mode='valid')
    test_loader = get_loader(config, mode='test')

    solve = utils.solver.Solver(config, train_loader, valid_loader,
                                test_loader)

    # Train and sample the images
    if config.mode == 'train':
        solve.run()
    elif config.mode == 'test':
        solve.test()
Esempio n. 2
0
 def __init_data_loader(self, data_json):
     data_loader = get_loader(image_dir=self.args.image_dir,
                              caption_json=self.args.caption_json,
                              data_json=data_json,
                              vocabulary=self.vocab,
                              transform=self.transform,
                              batch_size=self.args.batch_size,
                              shuffle=True)
     return data_loader
    def __init__(self, args):
        super(Trainer, self).__init__()
        self.args = args
        train_dataset = data_factory[args.dataset](self.args, 'train')
        self.train_loader = get_loader(train_dataset, args, 'train')
        self.num_classes = train_dataset.num_classes

        val_dataset = data_factory[args.dataset](self.args, 'val')
        self.val_loader = get_loader(val_dataset, args, 'val')

        self.model = model_factory[args.model](self.args, self.num_classes)
        self.model.cuda()

        trainable_parameters = filter(lambda param: param.requires_grad,
                                      self.model.parameters())
        if self.args.optimizer == 'Adam':
            self.optimizer = Adam(trainable_parameters, lr=self.args.lr)
        elif self.args.optimizer == 'SGD':
            self.optimizer = SGD(trainable_parameters, lr=self.args.lr)

        self.lr_scheduler = lr_scheduler.ReduceLROnPlateau(self.optimizer,
                                                           patience=2,
                                                           verbose=True)
        if self.args.loss == 'BCElogitloss':
            self.criterion = nn.BCEWithLogitsLoss()
        elif self.args.loss == 'tencentloss':
            self.criterion = TencentLoss(self.num_classes)
        elif self.args.loss == 'focalloss':
            self.criterion = FocalLoss()
        self.early_stopping = EarlyStopping(patience=5)

        self.voc12_mAP = VOC12mAP(self.num_classes)
        self.average_loss = AverageLoss(self.args.batch_size)
        self.average_topk_meter = TopkAverageMeter(self.num_classes,
                                                   topk=self.args.topk)
        self.average_threshold_meter = ThresholdAverageMeter(
            self.num_classes, threshold=self.args.threshold)

        self.global_step = 0
        self.writer = get_summary_writer(self.args)
Esempio n. 4
0
    def main(args):
    	# dataset setting
    	image_root = args.image_root
    	ann_path = args.ann_path
    	vocab_path = args.vocab_path
    	batch_size = args.batch_size
    	shuffle = args.shuffle
    	num_workers = args.num_workers

    	with open(vocab_path, 'rb') as f:
    		vocab = pickle.load(f)

    	dataloader = get_loader(image_root, ann_path, vocab, batch_size,
    							 shuffle=True, num_workers=args.num_workers)

    	# model setting
    	vis_dim = args.vis_dim
    	vis_num = args.vis_num
    	embed_dim = args.embed_dim
    	hidden_dim = args.hidden_dim
    	vocab_size =args.vocab_size
    	num_layers = args.num_layers
    	dropout_ratio = args.dropout_ratio

    	model = Decoder(vis_dim=vis_dim,
    					vis_num=vis_num,
    					embed_dim=embed_dim,
    					hidden_dim=hidden_dim,
    					vocab_size=vocab_size,
    					num_layers=num_layers,
    					dropout_ratio=dropout_ratio)

    	# optimizer setting
    	lr = args.lr
    	num_epochs = args.num_epochs
    	optimizer = optim.Adam(model.parameters(), lr=lr)

    	# criterion
    	criterion = nn.CrossEntropyLoss()
    	if cuda_check:
    		model.cuda()
    		criterion.cuda()

    	model.train()

    	print('Number of epochs:', num_epochs)
    	for epoch in range(num_epochs):
    		train(dataloader=dataloader, model=model, optimizer=optimizer, criterion=criterion,
    			  epoch=epoch, total_epoch=num_epochs)
    		torch.save(model, './checkpoints/model_%d.pth'%(epoch))
Esempio n. 5
0
def main(opt):
    with open(opt.infos_path, 'rb') as f:
        infos = pickle.load(f)

    #override and collect parameters
    if len(opt.input_h5) == 0:
        opt.input_h5 = infos['opt'].input_h5

    if len(opt.input_json) == 0:
        opt.input_json = infos['opt'].input_json

    if opt.batch_size == 0:
        opt.batch_size = infos['opt'].batch_size

    if len(opt.id) == 0:
        opt.id = infos['opt'].id
    ignore = ['id', 'batch_size', 'beam_size', 'strat_from', 'language_eval']

    for key, value in vars(infos['opt']).items():
        if key not in ignore:
            if key in vars(opt):
                assert vars(opt)[key] == vars(infos['opt'])[key],\
                key+" option not consistent"
            else:
                vars(opt).update({key: value})
    vocab = infos['vocab']
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    encoder = Encoder()
    decoder = Decoder(opt)
    encoder = encoder.to(device)
    decoder = decoder.to(device)
    decoder.load_state_dict(torch.load(opt.model, map_location=str(device)))
    encoder.eval()
    decoder.eval()
    criterion = utils.LanguageModelCriterion().to(device)
    if len(opt.image_folder) == 0:
        loader = get_loader(opt, 'test')
        loader.ix_to_word = vocab
        loss, split_predictions, lang_stats = \
        eval_utils.eval_split(encoder, decoder, criterion, opt, vars(opt))
        print('loss: ', loss)
        print(lang_stats)

        result_json_path = os.path.join(opt.checkpoint_path, "captions_"+opt.split+"2014_"+opt.id+"_results.json")
        with open(result_json_path, "w") as f:
            json.dump(split_predictions, f)
Esempio n. 6
0
    def __init__(self, args):
        super(Tester, self).__init__()
        self.args = args

        test_dataset = data_factory[args.dataset](self.args, 'test')
        self.test_loader = get_loader(test_dataset, args, 'test')
        self.num_classes = test_dataset.num_classes

        self.model = model_factory[args.model](self.args, self.num_classes)
        self.model.cuda()

        if self.args.loss == 'BCElogitloss':
            self.criterion = nn.BCEWithLogitsLoss()
        elif self.args.loss == 'tencentloss':
            self.criterion = TencentLoss(self.num_classes)

        self.voc12_mAP = VOC12mAP(self.num_classes)
        self.average_loss = AverageLoss(self.args.batch_size)
        self.average_topk_meter = TopkAverageMeter(self.num_classes,
                                                   topk=self.args.topk)
        self.average_threshold_meter = ThresholdAverageMeter(
            self.num_classes, threshold=self.args.threshold)
Esempio n. 7
0
def evaluate_NMD(NMD, param=0.5, mode='alpha'):
    NMD.eval()
    data_loader = get_loader(train=False, batch_size=1)
    data_iter = iter(data_loader)
    flag = True
    right = 0
    count = 0
    for _ in range(25):
        try:
            lr, hr = next(data_iter)
        except StopIteration:
            data_iter = iter(data_loader)
            lr, hr = next(data_iter)
        lr = lr.to(device)
        hr = hr.to(device)
        
        if NMD(hr)[:,0] > 0.5:
            right += 1
            
        if mode == 'alpha':
            A = data_gen_alpha(lr, hr, param)
            if NMD(A)[:,0] < 0.5:
                right += 1
                
        elif mode == 'sigma':
            B = data_gen_sigma(lr, hr, param)
            if NMD(B)[:,0] < 0.5:
                right += 1
                
        else:
            print(f'wrong mode: {mode}. It should be alpha or sigma')
            assert False
            
        count += 2
        
    return right / count
def main():

    config = configparser.ConfigParser()
    config.read('config.ini')
    params = config['EVAL']
    encoder_path = params['encoder_path']
    decoder_path = params['decoder_path']
    crop_size = int(params['crop_size'])
    vocab_path = params['vocab_path']
    image_dir = params['image_dir']
    caption_path = params['caption_path']
    embed_size = int(params['embed_size'])
    hidden_size = int(params['hidden_size'])
    num_layers = int(params['num_layers'])
    batch_size = int(params['batch_size'])
    num_workers = int(params['num_workers'])

    # Image preprocessing
    transform = transforms.Compose([
        transforms.Resize(229),
        transforms.RandomCrop(crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])
    # Load vocabulary wrapper
    with open(vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Build models
    encoder = EncoderCNN(
        embed_size).eval()  # eval mode (batchnorm uses moving mean/variance)
    decoder = DecoderRNN(embed_size, hidden_size, len(vocab),
                         num_layers).eval()
    encoder = encoder.to(device)
    decoder = decoder.to(device)

    # Load the trained model parameters
    encoder.load_state_dict(torch.load(encoder_path))
    decoder.load_state_dict(torch.load(decoder_path))

    data_loader = get_loader(image_dir, caption_path, vocab, transform,
                             batch_size, True, num_workers)

    bleu_score = 0

    def id_to_word(si):
        s = []
        for word_id in si:
            word = vocab.idx2word[word_id]
            s.append(word)
            if word == '<end>':
                break
        #try :
        #s.remove('.')
        #s.remove('<start>')
        #s.remove('<end>')
        #except:
        #pass
        return (s)

    for i, (images, captions, lengths) in enumerate(data_loader):
        # Generate an caption from the image
        images = images.to(device)
        feature = encoder(images)
        sampled_ids = decoder.sample(feature)
        sampled_ids = sampled_ids[0].cpu().numpy(
        )  # (1, max_seq_length) -> (max_seq_length)
        captions = captions.detach().cpu().numpy()
        references = []
        for cap in captions:
            references.append(id_to_word(cap))
        gen_cap = id_to_word(sampled_ids)

        bleu_score = bleu_score + nltk.translate.bleu_score.sentence_bleu(
            references, gen_cap, smoothing_function=c.method7)

        if i % 500 == 0:
            print(i + 1, ' bleu_score ', bleu_score / (i + 1))
Esempio n. 9
0
                        type=int,
                        default=1,
                        help='the channel of out img, decide the num of class')
    parser.add_argument('--gpu_avaiable',
                        type=str,
                        default='0',
                        help='the gpu used')
    parser.add_argument('--checkpoints',
                        type=str,
                        default='./weights/model_best.pth',
                        help="weight's path")
    args = parser.parse_args()

    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_avaiable
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # Loading test data
    # test_loader = get_loader(
    #     args.data_path, args.crop_size, args.resize, args.batch_size, mode='test')
    test_loader = get_loader(args.data_path,
                             None,
                             args.resize,
                             args.batch_size,
                             mode='test')

    # Load model
    net = HybridNet(input_size=args.resize[0], n_classes=args.n_class)
    net, _ = load_pretrained(net, args.checkpoints)

    eval(net, test_loader, device)
def test(encoder,
         decoder,
         vocab,
         num_samples,
         num_hints,
         debug=False,
         c_step=0.0,
         no_avg=True):
    transform = transforms.Compose([
        transforms.Resize(224),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])
    rt_image = './data/val_resized2014'
    annotations = args.caption or './data/annotations/captions_val2014.json'
    shuffle = False
    batch_size = 2 if args.adapt else 1  # If inputting random caption as gt, use batch of 2 and swap gts
    data_loader = get_loader(rt_image, annotations, vocab, transform,
                             batch_size, shuffle, 1)
    assert len(vocab) == decoder.linear.out_features

    avg_gt_score, avg_gt_score_hint = torch.zeros(args.compare_steps,
                                                  1), torch.zeros(
                                                      args.compare_steps, 1)
    gt_scores, gt_scores_hint = [], []

    avg_crossEnloss, avg_crossEnloss_hint = torch.zeros(
        args.compare_steps, 1), torch.zeros(args.compare_steps, 1)
    crossEnlosses, crossEnlosses_hint = [], []

    num_sampled = 0
    data_points = []
    coco_json = CocoJson('data/captions_val2014.json',
                         'data/captions_val2014_results.json')
    coco_json_update = CocoJson('data/captions_val2014.json',
                                'data/captions_val2014_results_u.json')

    for i, (images, captions, lengths, img_ids,
            ann_ids) in enumerate(data_loader):
        if i >= num_samples or args.adapt and i * 2 >= num_samples:
            break

        for k in range(batch_size):
            image, length, img_id, ann_id = images[k:k+1], lengths[k:k+1], \
                                            img_ids[k:k+1], ann_ids[k:k+1]

            caption = captions[k:k + 1]
            if args.adapt:  # use the other image's caption for gt input
                gt_input = captions[(k + 1) % batch_size, :args.num_hints + 1]
            else:
                gt_input = captions[k, :args.num_hints + 1]

            image_tensor = to_var(image, volatile=True)
            feature = encoder(image_tensor)

            # Compute probability score
            if args.msm == "ps":
                gt_score, gt_score_hint, num_compare = probabilityScore(
                    caption, feature, vocab, num_hints, decoder, c_step,
                    args.compare_steps)
                if not no_avg:
                    avg_gt_score = avg_gt_score.index_add_(
                        0, torch.LongTensor(range(num_compare)), gt_score)
                    avg_gt_score_hint = avg_gt_score_hint.index_add_(
                        0, torch.LongTensor(range(num_compare)), gt_score_hint)
                else:
                    gt_scores.append(gt_score[:num_compare])
                    gt_scores_hint.append(gt_score_hint[:num_compare])
            # Compute cross entropy loss
            elif args.msm == 'ce':
                crossEnloss, crossEnloss_hint, num_compare = crsEntropyLoss(
                    caption, length, feature, vocab, num_hints, decoder,
                    c_step, args.compare_steps)
                if type(crossEnloss) == type(None):
                    continue
                if not no_avg:
                    avg_crossEnloss = avg_crossEnloss.index_add_(
                        0, torch.LongTensor(range(num_compare)), crossEnloss)
                    avg_crossEnloss_hint = avg_crossEnloss_hint.index_add_(
                        0, torch.LongTensor(range(num_compare)),
                        crossEnloss_hint)
                else:
                    crossEnlosses.append(crossEnloss)
                    crossEnlosses_hint.append(crossEnloss_hint)
            # Evaluate with pycoco tools
            elif args.msm == "co":
                no_update, pred_caption, _ = decode_beta(feature, gt_input, decoder, \
                                              vocab, c_step, args.prop_steps)

                caption = [vocab.idx2word[c] for c in caption[0, 1:-1]]
                gt_input = [vocab.idx2word[c] for c in gt_input[:-1]]
                no_update = ' '.join(gt_input) + ' ' + ' '.join(
                    no_update.split()[num_hints:])
                pred_caption = ' '.join(gt_input) + ' ' + ' '.join(
                    pred_caption.split()[num_hints:])
                caption = ' '.join(caption)

                if args.load_val:
                    caption = None

                coco_json_update.add_entry(img_id[0], ann_id[0], caption,
                                           pred_caption)
                coco_json.add_entry(img_id[0], ann_id[0], caption, no_update)

            if debug and not args.test_c_step:
                print(
                    "Ground Truth: {}\nNo hint: {}\nHint: {}\
                      \nGround Truth Score: {}\nGround Truth Score Improve {}\
                      ".format(caption, hypothesis, hypothesis_hint, gt_score,
                               gt_score_hint))

    if args.test_c_step:
        return data_points

    if args.msm == "ps":
        avg_gt_score /= i
        avg_gt_score_hint /= i
        if not no_avg:
            return (avg_gt_score, avg_gt_score_hint)
        else:
            return (gt_scores, gt_scores_hint)
    elif args.msm == "ce":
        avg_crossEnloss /= i
        avg_crossEnloss_hint /= i
        if not no_avg:
            return (avg_crossEnloss, avg_crossEnloss_hint)
        else:
            return (crossEnlosses, crossEnlosses_hint)
    elif args.msm == "co":
        coco_json.create_json()
        coco_json_update.create_json()
        return None
Esempio n. 11
0
from models.hmnet_heavy_x1_ab_fea_0310 import hmnet
from utils.data_loader import get_loader
import trainer_hmnet_Flickr2K as trainer
torch.manual_seed(0)
scale_factor = 4

batch_size = 1
epoch_start = 0
num_epochs = 40
model = hmnet(scale=scale_factor)
#model.load_state_dict(torch.load('./weights/HMNET_x4_Heavy_REDS_JPEG.pth'))

train_loader = get_loader(data='Flickr2K',
                          mode='train',
                          batch_size=batch_size,
                          height=0,
                          width=0,
                          scale_factor=1,
                          augment=True,
                          force_size=True)
test_loader = get_loader(data='Flickr2K',
                         mode='test',
                         batch_size=batch_size,
                         height=0,
                         width=0,
                         scale_factor=1,
                         augment=True,
                         force_size=True)
trainer.train(model,
              train_loader,
              test_loader,
              mode=f'HMNET_Flickr2K_ablation_fea',
Esempio n. 12
0
def test(encoder,
         decoder,
         vocab,
         num_samples,
         num_hints,
         debug=False,
         c_step=0.0,
         no_avg=True):
    transform = transforms.Compose([
        transforms.Resize(224),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])
    rt_image = 'data/val_resized2014'
    annotations = args.caption or 'data/annotations/captions_val2014.json'
    shuffle = False
    data_loader = get_loader(rt_image, annotations, vocab, transform, 1,
                             shuffle, 1)
    assert len(vocab) == decoder.linear.out_features

    avg_gt_score, avg_gt_score_hint = torch.zeros(args.compare_steps,
                                                  1), torch.zeros(
                                                      args.compare_steps, 1)
    gt_scores, gt_scores_hint = [], []

    avg_crossEnloss, avg_crossEnloss_hint = torch.zeros(
        args.compare_steps, 1), torch.zeros(args.compare_steps, 1)
    crossEnlosses, crossEnlosses_hint = [], []

    num_sampled = 0
    data_points = []
    coco_json = CocoJson('data/captions_val2014.json',
                         'data/captions_val2014_results.json')
    coco_json_update = CocoJson('data/captions_val2014.json',
                                'data/captions_val2014_results_u.json')

    count = 0
    for i, (image, caption, length, img_id, ann_id) in enumerate(data_loader):
        if num_sampled > num_samples or i > num_samples:
            break
        image_tensor = to_var(image, volatile=True)
        feature = encoder(image_tensor)

        # Compute optimal c_step by (pred, ce)
        if args.msm == "co":
            no_update, pred_caption, _ = decode_beta(feature, caption[0,:num_hints+1], decoder, \
                                          vocab, c_step, args.prop_steps)
            # print caption
            # no_hint, _, _ = decode_beta(feature,caption[0,:1], decoder, \
            #                                           vocab, c_step, args.prop_steps)

            caption = [vocab.idx2word[c] for c in caption[0, 1:-1]]

            no_update = ' '.join(caption[:num_hints]) + ' ' + ' '.join(
                no_update.split()[num_hints:])
            pred_caption = ' '.join(caption[:num_hints]) + ' ' + ' '.join(
                pred_caption.split()[num_hints:])
            caption = ' '.join(caption)

            if args.load_val:
                caption = None

            coco_json_update.add_entry(img_id[0], ann_id[0], caption,
                                       pred_caption)
            coco_json.add_entry(img_id[0], ann_id[0], caption, no_update)

        if debug:
            print("Ground Truth: {}\nNo hint: {}\nHint: {}\
                  \nGround Truth Score: {}\nGround Truth Score Improve {}\
                  ".format(caption, hypothesis, hypothesis_hint, gt_score,
                           gt_score_hint))

    if args.msm == "co":
        coco_json.create_json()
        coco_json_update.create_json()
        return None
Esempio n. 13
0
from models.hmnet_heavy_ablation_fea import hmnet
from utils.data_loader import get_loader
import trainer_0426 as trainer
torch.manual_seed(0)
scale_factor = 4

batch_size = 1
epoch_start = 0
num_epochs = 5
model = hmnet(scale=scale_factor)
today = datetime.datetime.now().strftime('%Y.%m.%d')

size = 256
num_epochs = 5
train_loader = get_loader(data='REDS', mode='train', batch_size=batch_size, height=size, width=size, scale_factor=4, augment=True)
test_loader = get_loader(data='REDS', mode='test', height=256, width=256, scale_factor=4)
trainer.train(model, train_loader, test_loader, mode=f'HMNET_REDS_ab_fea', epoch_start=0, num_epochs=num_epochs, save_model_every=1, test_model_every=1, today=today, refresh=False)



from models.hmnet_heavy import hmnet
from utils.data_loader import get_loader
import trainer_0426 as trainer
torch.manual_seed(0)
scale_factor = 4

batch_size = 1
epoch_start = 0
num_epochs = 5
model = hmnet(scale=scale_factor)
Esempio n. 14
0
def main():
    logger = LogMaster.get_logger('eval')

    if not os.path.isfile(args.ckpt_path):
        print('checkpoint not found: ', args.ckpt_path)
        exit(-1)

    # Image preprocessing
    # For normalization, see https://github.com/pytorch/vision#models
    transform = transforms.Compose([
        transforms.Resize([256, 256]),
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    # Load vocabulary wrapper.
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    logger.info('building data loader...')
    # Build data loader
    data_loader, image_ids = get_loader(args.val_image_dir,
                                        args.val_caption_path,
                                        vocab,
                                        transform,
                                        args.batch_size,
                                        shuffle=False,
                                        num_workers=args.num_workers,
                                        is_eval=True)

    logger.info('building model...')
    # Build the models
    vocab_size = len(vocab)

    if args.model == 'ssa':
        net = SSA(embed_dim=args.embed_size,
                  lstm_dim=args.hidden_size,
                  vocab_size=vocab_size)
    elif args.model == 'nic':
        net = NIC(embed_dim=args.embed_size,
                  lstm_dim=args.hidden_size,
                  vocab_size=vocab_size)
    elif args.model == 'scacnn':
        net = SCACNN(embed_dim=args.embed_size,
                     lstm_dim=args.hidden_size,
                     vocab_size=vocab_size)
    else:
        net = None
        print('model name not found: ' + args.model)
        exit(-2)

    net.eval()

    if torch.cuda.is_available():
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)
        net.cuda()

    net.zero_grad()
    logger.info('restoring pretrained model...')
    checkpoint = torch.load(args.ckpt_path)
    try:
        args_dict = checkpoint['args']
        args.batch_size = args_dict['batch_size']
        args.learning_rate = args_dict['learning_rate']
        args.att_mode = args_dict['att_mode']
        args.model = args_dict['model']
        args.embed_size = args_dict['embed_size']
        args.hidden_size = args_dict['hidden_size']
        args.num_layers = args_dict['num_layers']
        net.load_state_dict(checkpoint['net_state'])
        epoch = checkpoint['epoch']
        print('using loaded args from checkpoint:')
        pprint(args)
    except:
        net.load_state_dict(checkpoint)
        epoch = 0

    logger.info('start generating captions...')
    total_step = len(data_loader)
    start_token = vocab('<start>')
    end_token = vocab('<end>')
    syn_captions = []
    keys = {}
    for i, (images, inputs, targets, masks, lengths,
            img_ids) in tqdm(enumerate(data_loader),
                             total=total_step,
                             leave=False,
                             ncols=80,
                             unit='b'):
        images = to_var(images, requires_grad=False)
        if args.beam_width == 1:
            results = net.greedy_search(images, start_token).data.cpu().numpy()
        else:
            results = net.beam_search(
                images, start_token,
                beam_width=args.beam_width).data.cpu().numpy()

        results = list(results)  # each element is [seq_len, 1]

        for i in range(len(results)):
            sentence = ''
            res = list(results[i])
            img_id = img_ids[i]
            for w in res:
                if w == start_token:
                    continue
                elif w == end_token:
                    break
                word = vocab.idx2word[w]
                sentence += (' ' + word)
            # only keep one caption for each image
            try:
                _ = keys[img_id]
            except KeyError:
                keys[img_id] = 1
                item = {'image_id': img_id, 'caption': sentence}
                syn_captions.append(item)

    res_dir = Path(args.result_dir)
    if not res_dir.is_dir():
        res_dir.mkdir()
    result_path = res_dir / Path(args.model + '-' + str(epoch) +
                                 '-predictions.json')
    with open(str(result_path), 'w') as fout:
        json.dump(syn_captions, fout)
    logger.info(f'captions saved: {str(result_path)}')
Esempio n. 15
0
def main(args):
    args.model_path = os.path.join(args.model_path, str(datetime.date.today()))
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    loss_fn = nn.CrossEntropyLoss()
    softmax = nn.Softmax(dim=1)

    # Build DDxNet with 4 DDx blocks of convolutions
    model = DDxNet(args.num_channels,
                   args.num_timesteps,
                   DDx_block, [2, 6, 8, 4],
                   args.output_dim,
                   causal=True,
                   use_dilation=True).to(device)

    # multi-gpu training if available
    if (torch.cuda.device_count() > 1):
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model)
    model = model.to(device)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.learning_rate,
                                 betas=(0.9, 0.98))
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           'min',
                                                           patience=2,
                                                           factor=0.1**0.75,
                                                           verbose=True)

    train_loader = get_loader(args.data_dir, 'train', args.batch_size,
                              args.shuffle)
    test_loader = get_loader(args.data_dir,
                             'test',
                             args.batch_size,
                             shuffle=False)
    best_val_acc = 0.

    if not os.path.exists(os.path.join('./logs', str(datetime.date.today()))):
        os.makedirs(os.path.join('./logs', str(datetime.date.today())))

    results_file = os.path.join('./logs', str(datetime.date.today()),
                                args.results_file)
    results = ResultsLog(results_file)

    for epoch in range(args.num_epochs):
        avg_loss = 0.
        total_predlabs = []
        total_truelabs = []
        total_probs = []

        for itr, (X, y_true) in enumerate(train_loader):
            model.train()
            X = X.to(device).float()
            y_true = y_true.to(device).long()

            y_pred = model(X)
            loss = loss_fn(y_pred, torch.max(y_true, 1)[1])
            avg_loss += loss.item() / len(train_loader)

            optimizer.zero_grad()
            loss.backward()
            if args.clip_grad > 0:
                torch.nn.utils.clip_grad_norm_(model.parameters(),
                                               args.clip_grad)
            optimizer.step()

            probs = softmax(y_pred)
            _, predlabs = torch.max(probs.data, 1)
            total_probs.extend(probs.data.cpu().numpy())
            total_predlabs.extend(predlabs.data.cpu().numpy())
            total_truelabs.extend(torch.max(y_true, 1)[1].data.cpu().numpy())
            batch_acc = accuracy_score(
                torch.max(y_true, 1)[1].data.cpu().numpy(),
                predlabs.data.cpu().numpy())

            if (itr + 1) % 50 == 0:
                print(('Epoch: {} Iter: {}/{} Loss: {} Acc: {}').format(
                    epoch, itr + 1, len(train_loader), loss.item(), batch_acc))

            if ((itr + 1) % len(train_loader)) == 0:
                total_truelabs = np.array(total_truelabs)
                total_predlabs = np.array(total_predlabs)
                total_probs = np.array(total_probs)
                total_train_acc = accuracy_score(total_truelabs,
                                                 total_predlabs)

                f1 = f1_score(total_truelabs, total_predlabs, average='macro')
                res = {
                    'epoch': epoch + (itr * 1.0 + 1.0) / len(train_loader),
                    'steps': epoch * len(train_loader) + itr + 1,
                    'train_loss': avg_loss,
                    'train_f1': f1,
                    'train_acc': total_train_acc
                }

                model.eval()

                with torch.no_grad():
                    total_predlabs = []
                    total_probs = []
                    total_truelabs = []
                    total_val_loss = 0.

                    for i, (dat, labs) in enumerate(test_loader):
                        dat = dat.to(device).float()
                        labs = labs.to(device).long()
                        y_pred = model(dat)
                        val_loss = loss_fn(y_pred, torch.max(labs, 1)[1])

                        probs = softmax(y_pred)
                        _, predlabs = torch.max(probs.data, 1)
                        total_probs.extend(probs.data.cpu().numpy())
                        total_predlabs.extend(predlabs.data.cpu().numpy())
                        total_truelabs.extend(
                            torch.max(labs, 1)[1].data.cpu().numpy())
                        total_val_loss += (val_loss.item() / len(test_loader))

                    total_truelabs = np.array(total_truelabs)
                    total_predlabs = np.array(total_predlabs)
                    total_probs = np.array(total_probs)
                    total_val_acc = accuracy_score(total_truelabs,
                                                   total_predlabs)

                    total_val_f1 = f1_score(total_truelabs,
                                            total_predlabs,
                                            average='macro')
                    print("At Epoch: {}, Iter: {}, val_loss: {}, val_acc: {}".
                          format(epoch, itr + 1, total_val_loss,
                                 total_val_acc))
                    print("Confusion Matrix: ")
                    print(confusion_matrix(total_truelabs, total_predlabs))
                    if (total_val_acc > best_val_acc):
                        best_val_acc = total_val_acc
                        print("saving model")
                        torch.save(
                            model.state_dict(),
                            os.path.join(args.model_path,
                                         args.results_file + '_model.pth'))
                        np.savetxt(os.path.join(
                            args.model_path, args.results_file + '_prob.txt'),
                                   total_probs,
                                   delimiter=',')
                        np.savetxt(os.path.join(
                            args.model_path, args.results_file + '_pred.txt'),
                                   total_predlabs,
                                   delimiter=',')
                        np.savetxt(os.path.join(
                            args.model_path, args.results_file + '_true.txt'),
                                   total_truelabs,
                                   delimiter=',')

                    res['val_loss'] = total_val_loss
                    res['val_acc'] = total_val_acc
                    res['val_f1'] = total_val_f1

                    plot_loss = ['train_loss']
                    plot_acc = ['train_acc']
                    plot_f1 = ['train_f1']

                    plot_loss += ['val_loss']
                    plot_acc += ['val_acc']
                    plot_f1 += ['val_f1']

                    results.add(**res)
                    results.plot(x='epoch',
                                 y=plot_loss,
                                 title='Multi-Class Loss',
                                 ylabel='CE Loss')
                    results.plot(x='epoch',
                                 y=plot_acc,
                                 title='Accuracy',
                                 ylabel='Accuracy')
                    results.plot(x='epoch',
                                 y=plot_f1,
                                 title='F1-Score (Macro)',
                                 ylabel='F1-Score')
                    results.save()

        scheduler.step(total_val_loss, epoch)
Esempio n. 16
0
def main():
    logger = LogMaster.get_logger('main')

    # Create checkpoint directory
    if not os.path.exists(args.ckpt_dir):
        os.makedirs(args.ckpt_dir)

    # Image preprocessing
    # For normalization, see https://github.com/pytorch/vision#models
    transform = transforms.Compose([
        transforms.Resize([256, 256]),
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    # Load vocabulary wrapper.
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    if args.restore_train:
        if not os.path.isfile(args.ckpt_path):
            print('checkpoint not found: ', args.ckpt_path)
            exit(-1)
        checkpoint = torch.load(args.ckpt_path)
        args_dict = checkpoint['args']
        args.batch_size = args_dict['batch_size']
        args.learning_rate = args_dict['learning_rate']
        args.att_mode = args_dict['att_mode']
        args.model = args_dict['model']
        args.embed_size = args_dict['embed_size']
        args.hidden_size = args_dict['hidden_size']
        args.num_layers = args_dict['num_layers']
        cur_epoch = checkpoint['epoch']
        print('restore training from existing checkpoint')
        pprint.pprint(args_dict)
    else:
        cur_epoch = 0
        checkpoint = None

    logger.info('building data loader...')
    # Build data loader
    data_loader = get_loader(args.train_image_dir,
                             args.train_caption_path,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)

    logger.info(f'building model {args.model}...')
    # Build the models
    vocab_size = len(vocab)

    if args.model == 'ssa':
        net = SSA(embed_dim=args.embed_size,
                  lstm_dim=args.hidden_size,
                  vocab_size=vocab_size,
                  dropout=args.dropout,
                  fine_tune=args.fine_tune)
    elif args.model == 'nic':
        net = NIC(embed_dim=args.embed_size,
                  lstm_dim=args.hidden_size,
                  vocab_size=vocab_size,
                  dropout=args.dropout,
                  fine_tune=args.fine_tune)
    elif args.model == 'scacnn':
        net = SCACNN(embed_dim=args.embed_size,
                     lstm_dim=args.hidden_size,
                     vocab_size=vocab_size,
                     dropout=args.dropout,
                     att_mode=args.att_mode,
                     fine_tune=args.fine_tune)
    else:
        net = None
        print('model name not found: ' + args.model)
        exit(-2)

    net.train()
    net.zero_grad()
    params = net.train_params

    if torch.cuda.is_available():
        if torch.cuda.device_count() > 1 and args.model == 'scacnn':
            net = nn.DataParallel(net)
        net.cuda()

    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss(reduce=False)
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    if args.restore_train:
        print('restoring from checkpoint...')
        net.load_state_dict(checkpoint['net_state'])
        optimizer.load_state_dict(checkpoint['opt_state'])

    logger.info('start training...')
    # Train the Models
    total_step = len(data_loader)
    running_loss = 0
    for epoch in range(args.num_epochs):
        for i, (images, inputs, targets, masks, lengths,
                img_ids) in tqdm(enumerate(data_loader),
                                 total=total_step,
                                 leave=False,
                                 ncols=80,
                                 unit='b'):
            # Set mini-batch data
            if args.fine_tune:
                images = to_var(images, requires_grad=True)
            else:
                images = to_var(images, requires_grad=False)
            inputs = to_var(inputs, requires_grad=False)
            targets = to_var(targets, requires_grad=False)
            targets = targets.view(-1)
            masks = to_var(masks, requires_grad=False).view(-1)

            net.zero_grad()
            # Forward, Backward and Optimize
            outputs = net.forward(images, inputs, lengths)
            outputs = outputs.contiguous().view(-1, vocab_size)
            loss = criterion(outputs, targets)
            loss = torch.mean(loss * masks)

            loss.backward()
            optimizer.step()
            running_loss += loss.data.item()

            # Make sure python releases GPU memory
            del loss, outputs, images, inputs, targets, masks, lengths, img_ids

        running_loss /= total_step
        logger.info('Epoch [%d/%d], Loss: %.4f, Perplexity: %5.4f' %
                    (cur_epoch + epoch + 1, args.num_epochs, running_loss,
                     np.exp(running_loss)))
        running_loss = 0
        # Save the model
        if (epoch + 1) % args.save_step == 0:
            if args.model == 'scacnn':
                save_file = args.model + '-' + args.att_mode + '-model-' + str(
                    cur_epoch + epoch + 1) + '.ckpt'
            else:
                save_file = args.model + '-model-' + str(cur_epoch + epoch +
                                                         1) + '.ckpt'
            save_path = os.path.join(args.ckpt_dir, save_file)
            args_dict = vars(args)
            opt_state = optimizer.state_dict()
            net_state = net.state_dict()
            epoch_id = epoch + 1
            save_data = {
                'net_state': net_state,
                'opt_state': opt_state,
                'args': args_dict,
                'epoch': epoch_id
            }
            torch.save(save_data, save_path)
            logger.info(f'model saved: {save_path}')
Esempio n. 17
0
        ssims.append(get_ssim(hr[b].unsqueeze(0), sr[b].unsqueeze(0)).item())
        if h > 160 and w > 160:
            msssim = get_msssim(hr[b].unsqueeze(0), sr[b].unsqueeze(0)).item()
        else:
            msssim = 0
        msssims.append(msssim)
    return np.array(psnrs).mean(), np.array(ssims).mean(), np.array(
        msssims).mean()


quantize = lambda x: x.mul(255).clamp(0, 255).round().div(255)

torch.manual_seed(0)

train_loader = get_loader(mode='train',
                          batch_size=16,
                          scale_factor=4,
                          augment=True)
test_loader = get_loader(mode='test')

device = 'cuda' if torch.cuda.is_available() else 'cpu'
up = torch.nn.UpsamplingBilinear2d(scale_factor=4).to(device)


def get_sr(lr, hr, alpha=0.1):
    return quantize(up(lr) * (1 - alpha) + hr * alpha)


model = VGG(pretrained=True).to(device)

weight_dir = f'./weights/Discriminator'
os.makedirs(weight_dir, exist_ok=True)
Esempio n. 18
0
test_loader = get_loader(mode='test', height=256, width=256, scale_factor=4)
trainer.train(model, train_loader, test_loader, mode=f'ref_denoise_FFDNet', epoch_start=epoch_start, num_epochs=num_epochs, save_model_every=20, test_model_every=1, today=today)
"""

today = '2021.03.05'


#model = DnCNN()
#train_loader = get_loader(data='SIDD', mode='train', batch_size=batch_size, height=192, width=192, scale_factor=1, augment=True)
#test_loader = get_loader(data='SIDD', mode='test', height=256, width=256, scale_factor=1)
#trainer.train(model, train_loader, test_loader, mode=f'ref_denoise_sidd_DNCNN', epoch_start=epoch_start, num_epochs=num_epochs, save_model_every=20, test_model_every=1, today=today)


#model = MemNet(in_channels=3, channels=64, num_memblock=6, num_resblock=6)
#train_loader = get_loader(data='SIDD', mode='train', batch_size=batch_size, height=192, width=192, scale_factor=1, augment=True)
#test_loader = get_loader(data='SIDD', mode='test', height=256, width=256, scale_factor=1)
#trainer.train(model, train_loader, test_loader, mode=f'ref_denoise_sidd_MemNet', epoch_start=epoch_start, num_epochs=num_epochs, save_model_every=20, test_model_every=1, today=today)


#model = DHDN()
#train_loader = get_loader(data='SIDD', mode='train', batch_size=batch_size, height=192, width=192, scale_factor=1, augment=True)
#test_loader = get_loader(data='SIDD', mode='test', height=256, width=256, scale_factor=1)
#trainer.train(model, train_loader, test_loader, mode=f'ref_denoise_sidd_DHDN', epoch_start=epoch_start, num_epochs=num_epochs, save_model_every=20, test_model_every=1, today=today)


model = FFDNet()
train_loader = get_loader(data='SIDD', mode='train', batch_size=batch_size, height=192, width=192, scale_factor=1, augment=True)
test_loader = get_loader(data='SIDD', mode='test', height=256, width=256, scale_factor=1)
trainer.train(model, train_loader, test_loader, mode=f'ref_denoise_sidd_FFDNet', epoch_start=epoch_start, num_epochs=num_epochs, save_model_every=20, test_model_every=1, today=today)

Esempio n. 19
0
model = NMDiscriminator().to(device)

# torchsummary(model, input_size=(3, 448, 448))

model.load_state_dict(torch.load('./models/weights/NMD.pth'))

# Hyper-parameters
num_epochs = 1000
learning_rate = 1e-4
eps = 1e-7

criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

loader = get_loader(batch_size=24)

total_iter = len(loader)
current_lr = learning_rate

alpha = 0.5
sigma = 0.1

stime = time.time()
total_epoch_iter = total_iter * num_epochs

iter_count = 0
for epoch in range(num_epochs):
    for i, (lr, hr) in enumerate(loader):
        iter_count += 1
        lr = lr.to(device)
Esempio n. 20
0
import trainer_DRN as trainer
from models.DRN import DRN
from utils.data_loader import get_loader
import torch

torch.manual_seed(0)

model = DRN()

# train_loader = get_loader(mode='train', height=192, width=192, scale_factor=4, batch_size=4)
train_loader = get_loader(mode='train',
                          height=196,
                          width=196,
                          scale_factor=4,
                          batch_size=4,
                          augment=True)
test_loader = get_loader(mode='test', scale_factor=4)

trainer.train(model, train_loader, test_loader, mode='DRN_Baseline')
Esempio n. 21
0
def main(args):
    # create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # image preprocessing
    transform = transforms.Compose([
        transforms.Resize(args.crop_size),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    # load vocab wrapper
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)
    print("cluster sizes: ", vocab.get_shapes())

    with open(args.annotation_path, 'rb') as f:
        annotation = pickle.load(f)
    print("annotations size:", len(annotation))

    # build data loader
    data_loader = get_loader(annotation,
                             args.image_dir,
                             args.h_dir,
                             args.openpose_dir,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers,
                             seq_length=args.seq_length)

    upp_size, low_size = vocab.get_shapes()
    encoder = EncoderCNN(args.embed_size).to(device)

    if args.upp:
        decoder = DecoderRNN(args.embed_size, args.hidden_size, upp_size + 1,
                             args.num_layers).to(device)
    elif args.low:
        decoder = DecoderRNN(args.embed_size, args.hidden_size, low_size + 1,
                             args.num_layers).to(device)
    else:
        print('Please specify upper/lower body model to train')
        exit(0)

    # loss and optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(
        encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    # train the models
    total_step = len(data_loader)
    print("total iter", total_step)
    for epoch in range(args.num_epochs):
        for i, (images, poses, homography, poses2,
                lengths) in enumerate(data_loader):
            images = images.to(device)
            poses = poses.to(device)
            targets = pack_padded_sequence(poses, lengths, batch_first=True)[0]

            # forward, backward, optimize
            features = encoder(images)
            outputs = decoder(features, homography, poses2, lengths)
            loss = criterion(outputs, targets)
            decoder.zero_grad()
            encoder.zero_grad()
            loss.backward()
            optimizer.step()

            if i % args.log_step == 0:
                print(
                    'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}'
                    .format(epoch, args.num_epochs, i, total_step, loss.item(),
                            np.exp(loss.item())))

            if ((i + 1) % args.save_step == 0) or (i == total_step - 1):
                torch.save(
                    decoder.state_dict(),
                    os.path.join(args.model_path,
                                 'decoder-{}-{}.ckpt'.format(epoch + 1,
                                                             i + 1)))
                torch.save(
                    encoder.state_dict(),
                    os.path.join(args.model_path,
                                 'encoder-{}-{}.ckpt'.format(epoch + 1,
                                                             i + 1)))
Esempio n. 22
0
def main(args):
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Image preprocessing, normalization for the pretrained resnet
    transform = None

    # Load vocabulary wrapper
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)
    vocab_size = vocab.__len__()

    # Build data loader
    data_loader = get_loader(args.training_feat_dir,
                             args.training_captions,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)

    val_data_loader = get_loader(args.validation_feat_dir,
                                 args.validation_captions,
                                 vocab,
                                 transform,
                                 args.batch_size,
                                 shuffle=True,
                                 num_workers=args.num_workers)

    # Build the models
    encoder = EncoderRNN(args.video_size, args.embed_size,
                         args.input_dropout_p, args.rnn_dropout_p,
                         args.num_layers, args.bidirectional).to(device)
    decoder = DecoderRNN(
        vocab_size,
        args.max_seq_length,
        args.embed_size,
        args.word_size,
    ).to(device)
    model = S2VTAttentionModel(encoder, decoder)

    # Loss and optimizer
    criterion = CustomLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.learning_rate,
                                 weight_decay=0)
    exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                       step_size=25,
                                                       gamma=0.8)

    # Train the models
    total_step = len(data_loader)
    best_loss = 999

    for epoch in range(args.num_epochs):
        model.train()
        exp_lr_scheduler.step()
        for i, (features, captions, lengths) in enumerate(data_loader):

            # Set teacher forcing and schedule sampling
            teacher_forcing_ratio = 0.7
            use_teacher_forcing = True if random.random(
            ) < teacher_forcing_ratio else False

            # Set mini-batch dataset
            features = features.to(device)
            captions = captions.to(device)

            # Forward, backward and optimize
            if use_teacher_forcing:
                # Teacher forcing: Feed the target as the next input
                seq_probs, _ = model(features,
                                     captions,
                                     mode='teacher_forcing')
            else:
                # Without teacher forcing: use its own predictions as the next input
                seq_probs, _ = model(features, mode='no_teacher')

            loss = criterion(seq_probs, captions[:, 1:],
                             lengths)  # elimanate <SOS>

            optimizer.zero_grad()
            loss.backward()
            # clip_gradient(optimizer, grad_clip=0.1)
            optimizer.step()

            # Print log info
            if i % args.log_step == 0:
                print(
                    'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}'
                    .format(epoch + 1, args.num_epochs, i, total_step,
                            loss.item(), np.exp(loss.item())))

        # Evaluation
        model.eval()
        val_loss = []
        for i, (features, captions, lengths) in enumerate(val_data_loader):

            # Set mini-batch dataset
            features = features.to(device)
            captions = captions.to(device)

            # Forward, backward and optimize
            with torch.no_grad():
                seq_probs, _ = model(features, mode='no_teacher')
            loss = criterion(seq_probs, captions[:, 1:],
                             lengths)  # elimanate <SOS>
            val_loss.append(loss.item())

        # Print validation info
        val_loss = np.mean(val_loss)
        print('Epoch [{}/{}], VAL_Loss: {:.4f}, Perplexity: {:5.4f}'.format(
            epoch + 1, args.num_epochs, val_loss, np.exp(val_loss)))

        # Save the model checkpoints
        if (epoch + 1) % args.save_step == 0:
            torch.save(
                model.state_dict(),
                os.path.join(args.model_path,
                             'checkpoint-{}.ckpt'.format(epoch + 1)))

            if best_loss > val_loss:
                torch.save(
                    model.state_dict(),
                    os.path.join(args.model_path,
                                 'best.pth'.format(epoch + 1)))
                best_loss = val_loss
Esempio n. 23
0
    return loss1 + loss2


if __name__ == "__main__":

    netname = 'iternet'
    num_epochs = 1000
    eps = 1e-6
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    model = IterNet().to(device)
    # model.load_state_dict(torch.load('./models/weights/iternet_model_final.pth'))
    # from torchsummary import summary
    # summary(model, input_size=(3, 512, 512))

    loader = get_loader(image_dir='./data/', batch_size=2, mode='train')
    total_iter = len(loader)

    lr = 1e-3
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    stime = time.time()
    total_epoch_iter = total_iter * num_epochs
    iter_count = 0

    bce = nn.BCELoss()

    losses = []
    summary = SummaryWriter()
    for epoch in range(num_epochs):
        for i, (ximg, yimg) in enumerate(loader):
Esempio n. 24
0
                        default='0',
                        help='the gpu used')
    parser.add_argument('--checkpoints',
                        type=str,
                        default='./weights/DRIVE1/model_best.pth',
                        help="weight's path")
    parser.add_argument('--save_path',
                        type=str,
                        required=True,
                        choices=['./results/prob', './results/binary'],
                        help="weight's path")
    args = parser.parse_args()

    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_avaiable
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # Loading test data
    test_loader = get_loader(args.data_path,
                             args.resize,
                             args.batch_size,
                             mode='test',
                             dataset_name=args.dataset)

    # Load model
    net = MDMNet(input_size=args.resize, n_classes=args.n_class)

    # Load weights
    net, _ = load_pretrained(net, args.checkpoints)

    run_test(net, test_loader, device, args.save_path)
Esempio n. 25
0
sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(__file__))))

from utils.data_loader import get_loader
import torch

torch.manual_seed(0)

from models.RCAN import RCAN
model = RCAN()

scale_factor = 4

if scale_factor == 4:
    train_loader = get_loader(mode='train',
                              batch_size=16,
                              height=192,
                              width=192,
                              scale_factor=4,
                              augment=True)
    test_loader = get_loader(mode='test',
                             height=256,
                             width=256,
                             scale_factor=4)
elif scale_factor == 2:
    train_loader = get_loader(mode='train', batch_size=16, augment=True)
    test_loader = get_loader(mode='test')

# import trainer as trainer
# trainer.train(model, train_loader, test_loader, mode='RCAN_x2_Baseline')

# import trainer_v6_from_shallow as trainer
# from models.RCAN_train_from_shallow import RCAN
Esempio n. 26
0
import trainer
from models.RCAN import RCAN
from utils.data_loader import get_loader
import torch

torch.manual_seed(0)

model = RCAN()

train_loader = get_loader(mode='train', batch_size=16, augment=True)
test_loader = get_loader(mode='test')

trainer.train(model, train_loader, test_loader, mode='RCAN_Baseline')
Esempio n. 27
0
def main(args):
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Load vocabulary wrapper.
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Image preprocessing
    # For normalization, see https://github.com/pytorch/vision#models
    transform = transforms.Compose([
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])
    val_loader = get_loader('./data/val_resized2014/',
                            './data/annotations/captions_val2014.json', vocab,
                            transform, 1, False, 1)

    start_epoch = 0

    encoder_state = args.encoder
    decoder_state = args.decoder

    # Build the models
    encoder = EncoderCNN(args.embed_size)
    if not args.train_encoder:
        encoder.eval()
    decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab),
                         args.num_layers)

    if args.restart:
        encoder_state, decoder_state = 'new', 'new'

    if encoder_state == '': encoder_state = 'new'
    if decoder_state == '': decoder_state = 'new'

    if decoder_state != 'new':
        start_epoch = int(decoder_state.split('-')[1])

    print("Using encoder: {}".format(encoder_state))
    print("Using decoder: {}".format(decoder_state))

    # Build data loader
    data_loader = get_loader(args.image_dir,
                             args.caption_path,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)
    """ Make logfile and log output """
    with open(args.model_path + args.logfile, 'a+') as f:
        f.write("Training on vanilla loss (using new model). Started {} .\n".
                format(str(datetime.now())))
        f.write("Using encoder: new\nUsing decoder: new\n\n")

    if torch.cuda.is_available():
        encoder.cuda()
        decoder.cuda()

    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(
        encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    batch_loss = []
    batch_acc = []

    # Train the Models
    total_step = len(data_loader)
    for epoch in range(start_epoch, args.num_epochs):
        for i, (images, captions, lengths, _, _) in enumerate(data_loader):

            # Set mini-batch dataset
            images = to_var(images, volatile=True)
            captions = to_var(captions)
            targets = pack_padded_sequence(captions, lengths,
                                           batch_first=True)[0]

            # Forward, Backward and Optimize
            decoder.zero_grad()
            encoder.zero_grad()
            features = encoder(images)
            out = decoder(features, captions, lengths)
            loss = criterion(out, targets)
            batch_loss.append(loss.data[0])

            loss.backward()
            optimizer.step()

            # # Print log info
            # if i % args.log_step == 0:
            #     print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f, Val: %.5f, %.5f'
            #           %(epoch, args.num_epochs, i, total_step,
            #             loss.data[0], np.exp(loss.data[0]), acc, gt_acc))

            #     with open(args.model_path + args.logfile, 'a') as f:
            #         f.write('Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f, Val: %.5f, %.5f\n'
            #               %(epoch, args.num_epochs, i, total_step,
            #                 loss.data[0], np.exp(loss.data[0]), acc, gt_acc))

            # Save the models
            if (i + 1) % args.save_step == 0:
                torch.save(
                    decoder.state_dict(),
                    os.path.join(args.model_path,
                                 'decoder-%d-%d.pkl' % (epoch + 1, i + 1)))
                torch.save(
                    encoder.state_dict(),
                    os.path.join(args.model_path,
                                 'encoder-%d-%d.pkl' % (epoch + 1, i + 1)))
                with open(args.model_path + 'training_loss.pkl', 'w+') as f:
                    pickle.dump(batch_loss, f)
                with open(args.model_path + 'training_val.pkl', 'w+') as f:
                    pickle.dump(batch_acc, f)
    with open(args.model_path + args.logfile, 'a') as f:
        f.write("Training finished at {} .\n\n".format(str(datetime.now())))
Esempio n. 28
0
    args = parser.parse_args()

    os.environ['CUDA_DEVICE_ORDER'] = "PCI_BUS_ID"
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_avaiable

    # cudnn related setting
    cudnn.benchmark = True
    cudnn.deterministic = False
    cudnn.enabled = True

    rm_mkdir(args.checkpoints)

    # Data iteration generation
    train_loader = get_loader(args.data_path,
                              args.resize,
                              args.batch_size,
                              shuffle=True,
                              dataset_name=args.dataset,
                              num_workers=4)
    val_loader = get_loader(args.data_path,
                            args.resize,
                            args.batch_size,
                            shuffle=False,
                            dataset_name=args.dataset,
                            mode='test',
                            num_workers=4)

    net = MDMNet(input_size=args.resize, n_classes=args.n_class)

    if args.resume:
        net, _ = load_pretrained(net, args.pretrained)
Esempio n. 29
0
        '%Y%m%d%H') + '.log'
    train_logger = setup_logger('train_logger', log_file)
    model = ResNetPD().cuda()
    print('Params: ', get_n_params(model))
    train_logger.info(f'Params: {get_n_params(model)}')
    train_logger.info(
        f'optimizer: {optimizer_type}, lr: {lr}, batch_size: {batch_size}, image_size: {train_size}'
    )
    params = model.parameters()
    if optimizer_type == 'Adam':
        optimizer = torch.optim.Adam(params, lr)
    else:
        optimizer = torch.optim.SGD(params,
                                    lr,
                                    weight_decay=1e-4,
                                    momentum=0.9)

    image_root = '{}/images/'.format(train_path)
    gt_root = '{}/masks/'.format(train_path)

    train_loader = get_loader(image_root,
                              gt_root,
                              batchsize=batch_size,
                              trainsize=train_size,
                              augmentation=augumentation)
    total_step = len(train_loader)

    print('#' * 20, 'Start Training', '#' * 20)
    train(train_loader, model, optimizer, epochs, batch_size, train_size, clip,
          test_kvasir_path)
def eval_split(encoder, decoder, crit, opt, eval_kwargs={}):
    verbose = eval_kwargs.get('verbose', True)
    num_images = eval_kwargs.get('num_images',
                                 eval_kwargs.get('val_images_use', -1))
    split = eval_kwargs.get('split', 'val')
    lang_eval = eval_kwargs.get('language_eval', 0)
    dataset = eval_kwargs.get('dataset', 'coco')
    beam_size = eval_kwargs.get('beam_size', 1)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    loader = get_loader(opt, split)
    decoder.eval()
    with torch.no_grad():
        loss = 0
        loss_sum = 0
        loss_evals = 1e-8
        predictions = []
        total_step = len(loader)
        start = time.time()
        for i, data in enumerate(loader, 0):
            transform = transforms.Normalize((0.485, 0.456, 0.406),
                                             (0.229, 0.224, 0.225))
            imgs = []
            for k in range(data['imgs'].shape[0]):
                img = torch.tensor(data['imgs'][k], dtype=torch.float)
                img = transform(img)
                imgs.append(img)
            imgs = torch.stack(imgs, dim=0).to(device)
            labels = torch.tensor(data['labels'].astype(np.int32),
                                  dtype=torch.long).to(device)
            masks = torch.tensor(data['masks'], dtype=torch.float).to(device)

            features = encoder(imgs)
            seqs = decoder(features, labels)
            loss = crit(seqs, labels[:, 1:], masks[:, 1:])
            loss_sum += loss
            loss_evals += 1
            seq, _ = decoder.sample(features)

            sents = utils.decode_sequence(
                loader.ix_to_word,
                seq[torch.arange(loader.batch_size, dtype=torch.long) *
                    loader.seq_per_img])

            print("batch [{} / {}] cost: {}".format(i, total_step,
                                                    utils.get_duration(start)))
            for k, sent in enumerate(sents):
                entry = {"image_id": data['infos'][k]['id'], "caption": sent}
                predictions.append(entry)

                if verbose:
                    print("image: %s: %s" %
                          (entry['image_id'], entry['caption']))

            if num_images >= 0 and (i + 1) * loader.batch_size >= num_images:
                break

    lang_stats = None
    if lang_eval == 1:
        lang_stats = language_eval(dataset, predictions, eval_kwargs['id'],
                                   split)
    decoder.train()
    return loss_sum / loss_evals, predictions, lang_stats