Exemple #1
0
def init_model(gpu_ids, model_name):
    # model_name = 'pnasnet5large'
    # could be fbresnet152 or inceptionresnetv2
    model = pretrainedmodels.__dict__[model_name](num_classes=1000,
                                                  pretrained='imagenet')
    model.eval()
    load_img = utils.LoadImage()

    # transformations depending on the model
    # rescale, center crop, normalize, and others (ex: ToBGR, ToRange255)
    tf_img = utils.TransformImage(model)
    """
    TODO(WG): Would be nice to use something like DataParallel, but that only does forward pass on given module.
    Need to stop before logits step. 
    Should create wrapper for pretrainedmodels that does the MPI-like ops across GPUs on model.features modules:
    1) replicated
    2) scatter
    3) parallel_apply
    4) gather
    Would have to know what layers are being used on each model. 
    """
    if torch.cuda.is_available():
        model = model.cuda(device=gpu_ids[0])

    return load_img, tf_img, model
Exemple #2
0
def main(opt):
    dataset = VideoDataset(opt, 'inference')
    opt["vocab_size"] = dataset.get_vocab_size()
    opt["seq_length"] = dataset.max_len

    if opt['beam_size'] != 1:
        assert opt["batch_size"] == 1
    if opt["model"] == 'S2VTModel':
        model = S2VTModel(opt["vocab_size"],
                          opt["max_len"],
                          opt["dim_hidden"],
                          opt["dim_word"],
                          opt['dim_vid'],
                          n_layers=opt['num_layers'],
                          rnn_cell=opt['rnn_type'],
                          bidirectional=opt["bidirectional"],
                          rnn_dropout_p=opt["rnn_dropout_p"])
    elif opt["model"] == "S2VTAttModel":
        encoder = EncoderRNN(opt["dim_vid"],
                             opt["dim_hidden"],
                             n_layers=opt['num_layers'],
                             rnn_cell=opt['rnn_type'],
                             bidirectional=opt["bidirectional"],
                             input_dropout_p=opt["input_dropout_p"],
                             rnn_dropout_p=opt["rnn_dropout_p"])
        decoder = DecoderRNN(opt["vocab_size"],
                             opt["max_len"],
                             opt["dim_hidden"],
                             opt["dim_word"],
                             n_layers=opt['num_layers'],
                             rnn_cell=opt['rnn_type'],
                             input_dropout_p=opt["input_dropout_p"],
                             rnn_dropout_p=opt["rnn_dropout_p"],
                             bidirectional=opt["bidirectional"])
        model = S2VTAttModel(encoder, decoder)
    else:
        return

    # if torch.cuda.device_count() > 1:
    #     print("{} devices detected, switch to parallel model.".format(torch.cuda.device_count()))
    #     model = nn.DataParallel(model)

    convnet = 'nasnetalarge'
    vocab = dataset.get_vocab()
    full_decoder = ConvS2VT(convnet, model, opt)

    tf_img_fn = ptm_utils.TransformImage(full_decoder.conv)
    load_img_fn = PIL.Image.fromarray

    for video_path in opt['videos']:
        print(video_path)
        with torch.no_grad():
            frames = skvideo.io.vread(video_path)
            # bp ---
            batches = create_batches(frames, load_img_fn, tf_img_fn)
            seq_prob, seq_preds = full_decoder(batches, mode='inference')
            sents = utils.decode_sequence(vocab, seq_preds)

            for sent in sents:
                print(sent)
Exemple #3
0
def run_eval(img_path, model):
    model = model  # resnet50 model by default
    model.eval()

    # Load and Transform one input image
    tf_img = utils.TransformImage(model)
    img = img_path
    input_data = Image.open(img)  # 3x400x225
    input_data = tf_img(input_data)  # 3x299x299
    input_data = input_data.unsqueeze(0)  # 1x3x299x299
    input = torch.autograd.Variable(input_data)

    # Load Imagenet Synsets
    with open('data/imagenet_synsets.txt', 'r') as f:
        synsets = f.readlines()

    synsets = [x.strip() for x in synsets]
    splits = [line.split(' ') for line in synsets]
    key_to_classname = {spl[0]: ' '.join(spl[1:]) for spl in splits}

    with open('data/imagenet_classes.txt', 'r') as f:
        class_id_to_key = f.readlines()

    class_id_to_key = [x.strip() for x in class_id_to_key]

    # Make predictions
    output = model(input)  # size(1, 1000)
    max_, argmax = output.data.squeeze().max(0)
    class_id = argmax.item()
    class_key = class_id_to_key[class_id]
    classname = key_to_classname[class_key]
    return img, classname, max_, class_key
def test_pm_imagenet(model_name, pretrained):
    if set_grad_enabled: set_grad_enabled(False)

    print('test_pm_imagenet("{}")'.format(model_name))
    net = pm.__dict__[model_name](num_classes=1000, pretrained=pretrained)
    net.eval()

    tensor = utils.TransformImage(net)(img)
    tensor = tensor.unsqueeze(0)
    x = Variable(tensor, requires_grad=False)

    out_logits = net(x)
    if 'squeezenet' in model_name:
        # Conv2d without view at the end
        assert out_logits.shape == torch.Size([1, 1000, 1, 1])
        return

    assert out_logits.shape == torch.Size([1, 1000])

    out_feats = net.features(x)
    out_logits_2 = net.logits(out_feats)
    assert equal(out_logits, out_logits_2)

    if 'dpn' in model_name:
        # Conv2d instead of Linear
        return
    net.last_linear = nn.Linear(net.last_linear.in_features, 10)

    out_logits_3 = net.logits(out_feats)
    assert out_logits_3.shape == torch.Size([1, 10])

    if set_grad_enabled: set_grad_enabled(True)
Exemple #5
0
def train():
    opt._parse()
    model = Inceptionv4(n_class=2,
                        use_drop=opt.use_drop,
                        model_name=opt.model_name,
                        pre_trained=opt.pretrained_model).cuda()
    print('model construct completed')
    tf_img = utils.TransformImage(model.inception_model)
    train_dataset = PathologyDataset(opt.data_dir,
                                     mode="train",
                                     transform=tf_img)
    test_dataset = PathologyDataset(opt.data_dir,
                                    mode="test",
                                    transform=tf_img)
    print('load data')
    train_dataloader = data_.DataLoader(train_dataset,
                                        batch_size=opt.train_batch_size,
                                        shuffle=True,
                                        num_workers=opt.num_workers)
    test_dataloader = data_.DataLoader(test_dataset,
                                       batch_size=opt.test_batch_size,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False,
                                       pin_memory=True)
    if opt.load_path:
        model.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)
    best_map = 0
    lr_ = opt.lr
    optimizer = model.get_optimizer()
    avg_loss = 0

    for epoch in range(opt.epoch):
        for ii, (img, label) in tqdm(enumerate(train_dataloader)):
            img, label = img.cuda().float(), label.cuda()
            label = label.view(len(label))
            img, label = Variable(img), Variable(label)
            output = model(img)
            cls_loss = nn.CrossEntropyLoss()(output, label)
            optimizer.zero_grad()
            cls_loss.backward()
            optimizer.step()
            avg_loss += cls_loss[0].data.cpu().numpy()
            if (ii + 1) % opt.plot_every == 0:
                print("cls_loss=" + str(avg_loss / opt.plot_every))
                avg_loss = 0
        eval_result = test_model(test_dataloader, model, test_num=opt.test_num)
        if eval_result > best_map:
            best_map = eval_result
            best_path = model.save(best_map=best_map)
        if epoch == opt.epoch / 5:
            model.load(best_path)
            model.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay
        if epoch == opt.epoch - 1:
            break
Exemple #6
0
 def __init__(self, cnn_model, image_dir, index_file, caption_file, vocab):
     self.load_img = utils.LoadImage()
     self.tf_img = utils.TransformImage(cnn_model)
     self.image_dir = image_dir
     self.image_indices = codecs.open(index_file,
                                      encoding="utf-8").readlines()
     self.captions = codecs.open(caption_file, encoding="utf-8").readlines()
     self.vocab = vocab
     self.vocab_keys = vocab.keys()
     self.max_len = 50
Exemple #7
0
 def __init__(self, img_paths, model, img_size=224, augment=False):
     self.load_img = utils.LoadImage()
     additional_args = {}
     if augment:
         additional_args = {
             'random_crop': True, 'random_hflip': False,
             'random_vflip': False
         }
     self.tf_img = utils.TransformImage(
         model, scale=img_size / 256, **additional_args)
     self.img_paths = img_paths
Exemple #8
0
 def __init__(self, use_gpu: bool = True):
     super().__init__(use_gpu)
     self.use_gpu = use_gpu
     self.cnn = vgg16()
     self.trans = utils.TransformImage(self.cnn)
     self.trans = transforms.Compose([transforms.ToPILImage(), self.trans])
     if use_gpu:
         self.cnn = self.cnn.cuda()
     self.cnn.eval()
     for param in self.cnn.parameters():
         param.requires_grad = False
Exemple #9
0
def load_data(model):
    trainTransform = utils.TransformImage(model)
    validTransform = utils.TransformImage(model)

    # 构建MyDataset实例
    train_data = MyDataset(data_path=os.path.join(data_dir, 'train'),
                           transform=trainTransform)
    valid_data = MyDataset(data_path=os.path.join(data_dir, 'val'),
                           transform=validTransform)

    # 构建DataLoder
    train_loader = DataLoader(dataset=train_data,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=4)
    valid_loader = DataLoader(dataset=valid_data,
                              batch_size=batch_size,
                              shuffle=False,
                              num_workers=4)

    return train_loader, valid_loader
Exemple #10
0
 def __init__(self, pil_images, model, img_size=224, augment=False):
     additional_args = {}
     if augment:
         additional_args = {
             'random_crop': True,
             'random_hflip': False,
             'random_vflip': False
         }
     self.tf_img = utils.TransformImage(model,
                                        scale=img_size / 256,
                                        **additional_args)
     self.pil_images = pil_images
    def __init__(self, use_gpu: bool = True, transform: bool = True):
        super().__init__()

        self.cnn = pretrainedmodels.vgg16()
        self.tf_image = utils.TransformImage(self.cnn)
        self.transform = transform
        self.use_gpu = use_gpu

        if self.use_gpu:
            self.cnn = self.cnn.cuda()
        self.cnn.eval()

        for param in self.cnn.parameters():
            param.requires_grad = False
Exemple #12
0
    def __init__(self, use_gpu: bool = True, transform: bool = True):
        super().__init__()
        print('USING InceptionV3Extractor')
        self.cnn = pretrainedmodels.inceptionv3()

        self.tf_image = utils.TransformImage(self.cnn)
        self.transform = transform
        self.use_gpu = use_gpu
        if self.use_gpu:
            self.cnn = self.cnn.cuda()
        self.cnn.eval()
        self.features_size = 2048
        self.regions_count = 64
        self.regions_features_size = 2048

        for param in self.cnn.parameters():
            param.requires_grad = False
    def __init__(self, 
                 classifier=LogisticRegression(),
                 feature_model=pretrainedmodels.inceptionv4(num_classes=1000, pretrained='imagenet'), 
                 image_transformer=None, image_type=None, padding=0):
        """
        Arguments
        =========
        classifier - Class that implements fit and predict
            The classifier that will be used.  Is assumed to follow the scikit-learn API
        
        feature_model - Class that implements "eval" and "features"
            The feature extraction model.  Assumed to follow the pretrainedmodels API 
        
        image_transformer - None or callable
            The feature model is almost certainly assuming something about the size or pixel
            distribution of the data.  The image transformer should be callable and convert
            an image to the format the feature_model is expecting.  This is IMPORTANT!

        image_type - string
            The format the image should be in.  RGB or something else

        padding - int
            The amount of padding to put around the bounding box (i.e., extra pixels to retain)
            
        """

        self.classifier = classifier
        self.feature_model = feature_model
        self.feature_model.eval()  # Setup the model
        
        # If it is none, then use pretrained models to get it.  Otherwise, you gotta define this
        if image_transformer is None: 
            transformer = utils.TransformImage(self.feature_model)
            self.image_transformer = lambda x: torch.autograd.Variable(transformer(x).unsqueeze(0), requires_grad=False)
        else:
            self.image_transformer = image_transformer
        
        if image_type is None:
            self.image_type = self.feature_model.input_space
        else:
            self.image_type = image_type

        self.padding = padding

        # Map strings to integers
        self.label_mapping = []
Exemple #14
0
def test_multi_process_infer():
    model = pretrainedmodels.__dict__["resnet18"](num_classes=1000,
                                                  pretrained='imagenet')
    tf_img = utils.TransformImage(model)
    load_img = utils.LoadImage()
    img_list = [
        "010.jpg", "004.jpg", "005.jpg", "011.jpg", "012.jpg", "boy.jpg"
    ]
    res = {}
    t0 = time.time()
    p_list = []
    my_queue = Queue()
    with Manager() as manager:
        pred_list = manager.list()
        my_pool = Pool(4)
        for img in img_list:
            input_img = load_img(img)
            input_tensor = tf_img(
                input_img)  # 3x400x225 -> 3x299x299 size may differ
            input_tensor = input_tensor.unsqueeze(
                0)  # 3x299x299 -> 1x3x299x299
            input_img = torch.autograd.Variable(input_tensor,
                                                requires_grad=False)
            print("image:", img)
            p = Process(target=infer_process,
                        args=(my_queue, input_img, model))
            p.start()
            p_list.append(p)
            # p.start()
            # p.join()
            # xx = my_pool.apply_async(infer_process,args=(pred_list,input_img,model,),callback=infer_callback)
            # res[img] = xx
        # my_pool.close()
        # my_pool.join()
    for p in p_list:
        p.join()

    while not my_queue.empty():
        value = my_queue.get(True)
        print("queue get a predict result:", value)
        # time.sleep(random.random())

    print("Time cost:", time.time() - t0)
    print("模型推理完毕...")
Exemple #15
0
def main():
    global args
    args = parser.parse_args()

    for arch in args.arch:
        # Load Model
        model = pretrainedmodels.__dict__[arch](num_classes=1000,
                                                pretrained='imagenet')
        model.eval()

        path_img = args.path_img
        # Load and Transform one input image
        load_img = utils.LoadImage()
        tf_img = utils.TransformImage(model)

        input_data = load_img(args.path_img)  # 3x400x225
        input_data = tf_img(input_data)  # 3x299x299
        input_data = input_data.unsqueeze(0)  # 1x3x299x299
        input = torch.autograd.Variable(input_data)

        # Load Imagenet Synsets
        with open('data/imagenet_synsets.txt', 'r') as f:
            synsets = f.readlines()

        # len(synsets)==1001
        # sysnets[0] == background
        synsets = [x.strip() for x in synsets]
        splits = [line.split(' ') for line in synsets]
        key_to_classname = {spl[0]: ' '.join(spl[1:]) for spl in splits}

        with open('data/imagenet_classes.txt', 'r') as f:
            class_id_to_key = f.readlines()

        class_id_to_key = [x.strip() for x in class_id_to_key]

        # Make predictions
        output = model(input)  # size(1, 1000)
        max, argmax = output.data.squeeze().max(0)
        class_id = argmax[0]
        class_key = class_id_to_key[class_id]
        classname = key_to_classname[class_key]

        print("'{}': '{}' is a '{}'".format(arch, path_img, classname))
Exemple #16
0
def test_batch_infer():
    model = pretrainedmodels.__dict__["resnet18"](num_classes=1000,
                                                  pretrained='imagenet')
    tf_img = utils.TransformImage(model)
    load_img = utils.LoadImage()
    img_list = [
        "010.jpg", "004.jpg", "005.jpg", "011.jpg", "012.jpg", "boy.jpg"
    ]
    t0 = time.time()
    for img in img_list:
        input_img = load_img(img)
        input_tensor = tf_img(
            input_img)  # 3x400x225 -> 3x299x299 size may differ
        input_tensor = input_tensor.unsqueeze(0)  # 3x299x299 -> 1x3x299x299
        input_img = torch.autograd.Variable(input_tensor, requires_grad=False)
        output = model(input_img)
        _, predicted = torch.max(output.data, 1)
        print("model predict result is:", predicted.numpy()[0])
    print("Time cost:", time.time() - t0)
Exemple #17
0
def createFeatures3(encoder, img_dir, lbl_train, lbl_test):
    import pretrainedmodels.utils as utils
    load_img = utils.LoadImage()
    tf_img = utils.TransformImage(encoder)

    train_dir = '%s/natural-image_training' % img_dir
    featuresTrain = []

    for i in range(len(lbl_train)):
        file = '%s/%s' % (train_dir, lbl_train[i])
        sample = transformFile2(load_img, tf_img, file)
        featuresTrain.append(encoder(sample))

    test_dir = '%s/natural-image_test' % img_dir
    featuresTest = []
    for i in range(len(lbl_test)):
        file = '%s/%s' % (test_dir, lbl_test[i])
        sample = transformFile2(load_img, tf_img, file)
        featuresTest.append(encoder(sample))
    return featuresTrain, featuresTest
Exemple #18
0
def extract():
    cuda_is_availabe = torch.cuda.is_available()
    print('CUDA is available' if cuda_is_availabe else 'CUDA is NOT available')
    xception_model = xception_with_pooling_features(num_classes=1000,
                                                    pretrained='imagenet')
    if cuda_is_availabe:
        xception_model = xception_model.to(torch.device('cuda:0'))
    photo_df = pd.read_json('photos/photo.json', lines=True)

    load_img = utils.LoadImage()

    # transformations depending on the model
    # rescale, center crop, normalize, and others (ex: ToBGR, ToRange255)
    tf_img = utils.TransformImage(xception_model)

    photo_df['features'] = None
    # Extract features just for a small subset.
    #photo_df = photo_df.loc[:100-1,:]
    for index, row in tqdm(photo_df.iterrows(),
                           total=photo_df.shape[0],
                           desc="Extracting features from photos"):
        photo_id = row['photo_id']
        file_name = os.path.join('photos', 'photos', photo_id + '.jpg')
        input_img = load_img(file_name)
        input_tensor = tf_img(input_img)  # 3x?x? -> 3x299x299 size may differ
        input_tensor = input_tensor.unsqueeze(0)  # 3x299x299 -> 1x3x299x299
        if cuda_is_availabe:
            input_tensor = input_tensor.cuda()
        with torch.no_grad():
            #input = torch.autograd.Variable(input_tensor, requires_grad=False)
            output_features = xception_model.features(
                input_tensor).cpu().numpy()  # 1x2048x1x1
        output_features = np.reshape(output_features, (-1, ))
        row['features'] = output_features

    os.makedirs('.cache', exist_ok=True)
    photo_df.to_pickle('.cache/photo.pkl')
Exemple #19
0
    for name in features_names:
        model._modules.get(name).register_forward_hook(hook_resnet)
    return model


W_attribute = np.load(
    './model/imageFeatureModel/W_sceneattribute_wideresnet18.npy')
features_vgg = []
features_resnet = []

objectModel = load_object_model()
sceneModel = load_scene_model()

tf = returnTF()  #for resnet
load_img = utils.LoadImage()
tf_img = utils.TransformImage(objectModel)  #for vgg


def getObjectFeature(img_url):
    input_img = load_img(img_url)
    input_tensor = tf_img(input_img)
    input_tensor = input_tensor.unsqueeze(0)
    input = torch.autograd.Variable(input_tensor, requires_grad=False)

    logit = objectModel.forward(input)
    h_x = F.softmax(logit, 1).data.squeeze()
    probs, idx = h_x.sort(0, True)
    probs = probs.numpy()
    idx = idx.numpy()

    objectRes = []
Exemple #20
0
def pd_bninception_v3():
    import torch

    model_name = 'pd_bninception_v3'

    device = 'cuda:0'

    torch.backends.cudnn.benchmark = True

    # In[2]:

    import numpy as np
    import pandas as pd
    from sklearn.model_selection import KFold, StratifiedKFold
    from sklearn.utils import shuffle

    import matplotlib.pyplot as plt
    plt.style.use('seaborn-white')
    import seaborn as sns
    sns.set_style("white")

    from skimage.transform import resize
    from skimage.color import rgb2gray, gray2rgb

    from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score

    from tqdm import tqdm_notebook

    import gc
    import math
    import sys

    from fastai import *
    from fastai.vision import *

    np.random.seed(42)

    data_dir = '../input/'
    submit_l1_dir = "../submits/"
    weights_dir = "weights/"
    results_dir = '../results/'

    name_label_dict = {
        0: 'Nucleoplasm',
        1: 'Nuclear membrane',
        2: 'Nucleoli',
        3: 'Nucleoli fibrillar center',
        4: 'Nuclear speckles',
        5: 'Nuclear bodies',
        6: 'Endoplasmic reticulum',
        7: 'Golgi apparatus',
        8: 'Peroxisomes',
        9: 'Endosomes',
        10: 'Lysosomes',
        11: 'Intermediate filaments',
        12: 'Actin filaments',
        13: 'Focal adhesion sites',
        14: 'Microtubules',
        15: 'Microtubule ends',
        16: 'Cytokinetic bridge',
        17: 'Mitotic spindle',
        18: 'Microtubule organizing center',
        19: 'Centrosome',
        20: 'Lipid droplets',
        21: 'Plasma membrane',
        22: 'Cell junctions',
        23: 'Mitochondria',
        24: 'Aggresome',
        25: 'Cytosol',
        26: 'Cytoplasmic bodies',
        27: 'Rods & rings'
    }

    def twenty_kfold_threshold(y_true, y_pred):
        n_classes = len(name_label_dict)
        classes_thresholds = []
        classes_scores = []
        for i in range(n_classes):
            kf_class_thresholds = []
            for j in range(20):
                kf = StratifiedKFold(n_splits=5,
                                     shuffle=True,
                                     random_state=239 + j * 101)
                for _, tst_inx in kf.split(y_true, y_true[:, i]):
                    t_min = np.min(y_pred[tst_inx, i])
                    t_max = np.max(y_pred[tst_inx, i])
                    thresholds = np.linspace(t_min, t_max, 50)
                    scores = np.array([
                        f1_score(y_true[tst_inx, i],
                                 np.int32(y_pred[tst_inx, i] >= threshold))
                        for threshold in thresholds
                    ])
                    threshold_best_index = np.argmax(scores)
                    kf_class_thresholds.append(
                        thresholds[threshold_best_index])
            threshold = np.mean(kf_class_thresholds)
            classes_thresholds.append(threshold)
            f1 = f1_score(y_true[:, i], np.int32(y_pred[:, i] >= threshold))
            classes_scores.append(f1)
        return classes_thresholds, classes_scores

    # In[3]:

    import pretrainedmodels

    pretrainedmodels.__dict__['model_names']

    # In[4]:

    import pretrainedmodels
    import pretrainedmodels.utils as pqutils

    _model_name = 'bninception'
    model = pretrainedmodels.__dict__[_model_name](num_classes=1000,
                                                   pretrained='imagenet')
    tf_img = pqutils.TransformImage(model)
    tf_mean = list(map(float, tf_img.__dict__['mean']))
    tf_std = list(map(float, tf_img.__dict__['std']))
    model_stats = (tf_mean, tf_std)
    model_stats

    # In[5]:

    data_dir = '../input/'
    valid_df = pd.read_csv('../input/' + 'val_id.csv',
                           header=None,
                           names=['idx', 'Id'])
    train_df = pd.read_csv(data_dir + 'train.csv')
    len(train_df)

    # In[6]:

    from PIL import Image as QImage
    ids = []
    labels = []

    def file_jpg_to_png(path):
        global ids
        gclasses = set(list(range(28))) - set([0, 25])
        f1 = '../input/new_data/' + path + '.jpg'
        f2 = '../input/train_png/' + path + '.png'
        xs = path.split('_')
        q = xs.index('classes') + 1
        xs = xs[q:]
        if len(gclasses & set([int(x) for x in xs])) == 0:
            return
        xs = ' '.join(xs)
        if not os.path.isfile(f2):
            try:
                im = QImage.open(f1)
                im = im.resize((512, 512), QImage.NEAREST)
                im.save(f2)
                ids.append(path)
                labels.append(xs)
            except:
                pass
        else:
            ids.append(path)
            labels.append(xs)

    need_to_prepare_extra = False
    if need_to_prepare_extra:
        for filename in tqdm_notebook(os.listdir('../input/new_data/'),
                                      total=64447):
            if filename.endswith(".jpg"):
                file_jpg_to_png(filename[:-4])

    # In[7]:

    if need_to_prepare_extra:
        xtra_data = pd.DataFrame()
        xtra_data['Id'] = ids
        xtra_data['Target'] = labels
        xtra_data.to_csv(data_dir + 'xtra_train.csv', index=False)
        xtra_data.head(n=3)

    # In[8]:

    test_matches = pd.read_csv('../input/test_matches.csv')
    test_matches.Extra = test_matches['Extra'].apply(
        lambda x: "_".join(x.split("_")[2:]))

    # In[9]:

    xtra_data = pd.read_csv(data_dir + 'xtra_train.csv')
    xtra_data['Extra'] = xtra_data.Id.apply(lambda x: x[:x.find("_classes")])

    # In[10]:

    xtra_matches_ids = test_matches.Extra.values.tolist()
    xtra_data_train = xtra_data.loc[~xtra_data.Extra.isin(xtra_matches_ids),
                                    ['Id', 'Target']].reset_index(drop=True)
    xtra_data_valid = xtra_data.loc[xtra_data.Extra.isin(xtra_matches_ids),
                                    ['Id', 'Target']].reset_index(drop=True)

    # In[11]:

    data = xtra_data_train
    labels = np.zeros((data.shape[0], 28), dtype=np.int32)
    if "Target" in data:
        for i, lbls in data['Target'].str.split().iteritems():
            for j in map(int, lbls):
                labels[i, j] = 1
    for j in range(28):
        print(j, '\t', name_label_dict[j], '\t', labels[:, j].sum(), '\t',
              labels[:, j].sum() / labels.shape[0])

    # In[12]:

    xtra_matches_ids = [
        '1054_E4_1_classes_25_16_0', '1762_G4_5_classes_27',
        '1335_C6_2_classes_3', '935_D5_2_classes_22_0', '27_H9_2_classes_10',
        '669_D8_1_classes_16_2', '1178_D4_2_classes_19_16_14',
        '791_A9_1_classes_10_9', '759_F9_9_classes_25_21_19_16',
        '1283_F10_2_classes_16_0', '688_E7_10_classes_23',
        '1772_F9_7_classes_25_17', '454_E5_1_classes_14_0',
        '1020_C5_3_classes_23', '1386_G4_2_classes_8', '681_G8_5_classes_13',
        '1609_C4_2_classes_16_0', '690_D3_5_classes_22_21_1_0',
        '1245_B2_2_classes_21_0', '1335_C10_4_classes_16_0',
        '693_A11_3_classes_23', '1139_A12_4_classes_23',
        '916_F8_1_classes_25_2_0', '694_C1_2_classes_18_1',
        '929_B8_1_classes_25_19', '340_F5_3_classes_13', '138_B12_1_classes_8',
        '932_G11_2_classes_25_16', '28_H9_1_classes_10',
        '924_F12_1_classes_27', '682_F12_2_classes_25_4',
        '1147_D3_13_classes_16_0', '346_A5_1_classes_12', '616_F1_4_classes_8',
        '73_A10_1_classes_27_25', '663_A9_2_classes_16_14',
        '859_C8_4_classes_16_14', '933_C10_4_classes_22_21',
        '1207_B10_7_classes_12', '694_F10_1_classes_25_21',
        '908_E3_1_classes_4', '1758_C9_4_classes_17_2',
        '1335_D2_2_classes_2_0', '929_H2_2_classes_23',
        '1717_G8_34_classes_25_17', '1150_H4_7_classes_13',
        '1054_E4_2_classes_25_16_0', '504_B1_3_classes_25_16_0',
        '747_B5_4_classes_10_9', '1020_B1_7_classes_23_5',
        '918_H10_2_classes_25_15', '532_H3_1_classes_25_16_0',
        '757_C6_3_classes_16_2', '1346_H6_3_classes_16_5_0',
        '496_D1_1_classes_16_0', '1042_C3_3_classes_27', '929_B12_1_classes_3',
        '684_C4_2_classes_23_0', '696_C9_5_classes_25_21_0',
        '1144_A10_4_classes_2', '846_A8_2_classes_16_14',
        '903_F12_2_classes_23_5', '1264_G1_1_classes_27',
        '925_H8_2_classes_1_0', '121_C6_2_classes_10_9',
        '1657_E10_3_classes_25_17', '932_G11_1_classes_25_16',
        '704_G4_1_classes_25_12', '1039_C3_2_classes_19_16',
        '906_H7_2_classes_25_6', '19_H7_2_classes_8',
        '725_G10_2_classes_16_14', '681_B2_4_classes_4',
        '697_A6_4_classes_19_0', '1581_B12_2_classes_16_14',
        '926_F7_2_classes_5_0', '1770_D2_4_classes_21_17_4',
        '1037_F4_3_classes_19', '1413_F11_6_classes_21_16',
        '694_A2_1_classes_2', '1049_D11_2_classes_25_16_0',
        '1276_C3_2_classes_21_0', '346_B12_3_classes_14_0',
        '1773_G12_3_classes_16_12', '1183_F4_2_classes_15',
        '1158_H11_8_classes_16_5', '380_C6_1_classes_16_0',
        '792_B6_7_classes_13_0', '682_C9_6_classes_25_12_2',
        '906_A9_4_classes_20_0', '400_D3_2_classes_25_7',
        '1237_G1_4_classes_21_6', '793_B1_1_classes_25_22_0',
        '1308_A5_4_classes_5', '800_E1_1_classes_16_14',
        '1421_G5_7_classes_17', '906_A9_6_classes_20_0',
        '1245_B2_3_classes_21_0', '626_D7_6_classes_25_21_12',
        '344_G2_4_classes_11', '901_E12_1_classes_25_6_2',
        '1050_F6_6_classes_16_0', '240_G8_1_classes_8',
        '933_C2_1_classes_23_2_0', '556_B9_1_classes_25_18_0',
        '1335_C10_2_classes_16_0', '1125_F6_3_classes_4',
        '1495_F7_3_classes_7_0', '694_C1_1_classes_18_1',
        '918_B3_4_classes_14', '1762_E6_5_classes_7', '915_C6_5_classes_4',
        '820_G4_3_classes_10_9', '927_F12_12_classes_18_0',
        '901_D10_2_classes_12_0', '1642_G7_34_classes_25_16',
        '928_G1_2_classes_14_7', '682_G9_1_classes_7_0',
        '903_F2_1_classes_2_0', '1645_E1_32_classes_16_14',
        '685_G10_5_classes_12_0', '927_A9_10_classes_25_5',
        '957_G6_4_classes_16', '757_C6_2_classes_16_2', '1213_C4_2_classes_4',
        '909_A6_1_classes_2', '694_D6_2_classes_1_0', '480_D6_3_classes_25_16',
        '1050_F1_3_classes_25_16_0', '692_A1_5_classes_25_14_0',
        '1772_H1_5_classes_18_17_16_0', '991_G6_7_classes_10_9',
        '782_F8_2_classes_25_16', '693_H4_1_classes_7',
        '1259_A11_4_classes_19_16', '1414_D12_2_classes_21_0',
        '1139_D5_5_classes_5', '930_H3_2_classes_1',
        '901_G9_5_classes_25_19_0', '1754_G2_34_classes_5',
        '353_A9_1_classes_21_13', '1179_H7_1_classes_25_16_0',
        '1423_A4_2_classes_16_14', '686_F4_2_classes_22_21',
        '1693_E1_2_classes_23_16', '400_H8_2_classes_23',
        '1680_G4_4_classes_16', '935_G3_1_classes_5', '838_E8_1_classes_3',
        '1030_D8_2_classes_7_0', '684_D12_4_classes_18',
        '812_C10_2_classes_13_0', '1416_D10_6_classes_21_16_0',
        '1293_E3_2_classes_1_0', '480_D6_2_classes_25_16',
        '700_H6_2_classes_25_2', '1773_E10_4_classes_16_0',
        '611_E10_1_classes_25_13', '346_B12_4_classes_14_0',
        '523_A9_4_classes_5', '1581_B12_3_classes_16_14',
        '684_D8_6_classes_25_12_0', '927_F12_11_classes_18_0',
        '353_E4_2_classes_5', '556_C1_5_classes_25_22_16',
        '1179_H7_2_classes_25_16_0', '1711_B12_3_classes_26_21_4',
        '449_G8_2_classes_4_2', '544_A8_5_classes_22_21_7',
        '1772_H1_3_classes_18_17_16_0', '1772_G2_6_classes_25_19_16_0',
        '909_C11_2_classes_2_0', '930_C12_1_classes_18_14_6',
        '690_C10_2_classes_13', '1009_B6_2_classes_10_9',
        '757_E10_5_classes_12', '88_D7_2_classes_8', '383_E8_7_classes_25_17',
        '1432_F2_2_classes_6', '505_C10_1_classes_25_15',
        '1104_E7_2_classes_16_14', '699_E8_1_classes_1', '1213_C4_3_classes_4',
        '690_H5_1_classes_4', '1169_D3_6_classes_16_0',
        '686_F4_1_classes_22_21', '532_D1_1_classes_16_0',
        '896_G8_3_classes_5_0', '934_G4_3_classes_21', '344_G2_1_classes_11',
        '369_C9_1_classes_18_14_0', '682_F12_1_classes_25_4',
        '683_E1_2_classes_25_1_0', '697_G3_6_classes_13_7',
        '1772_A6_7_classes_5', '933_C4_6_classes_5', '1231_F9_5_classes_7',
        '802_D5_9_classes_16_0', '682_G10_1_classes_7',
        '850_C1_9_classes_21_0', '929_B12_2_classes_3',
        '1339_D3_3_classes_2_1', '858_D4_2_classes_4', '334_B12_2_classes_4',
        '622_F1_7_classes_8', '908_G5_2_classes_2_0',
        '778_G6_2_classes_25_16_14', '1027_C4_1_classes_7',
        '886_C10_5_classes_23_0', '807_C2_3_classes_4',
        '1314_D2_2_classes_25_16_0', '1770_B5_1_classes_21_16_11',
        '1105_F10_2_classes_16_0', '1283_B2_10_classes_16_0',
        '583_E11_1_classes_25_16', '820_G4_7_classes_10_9',
        '928_H3_2_classes_14_0', '970_H1_4_classes_25_18',
        '1751_A7_32_classes_27', '701_H10_2_classes_25_14',
        '1773_B6_11_classes_23_17_16', '1736_G7_31_classes_25_16',
        '928_H3_1_classes_14_0', '1645_E5_34_classes_17',
        '539_B3_1_classes_25_21_0', '683_E1_1_classes_25_1_0',
        '484_G6_3_classes_22', '928_A1_1_classes_4',
        '1773_B6_7_classes_23_17_16', '1255_A3_4_classes_16_0',
        '698_C6_2_classes_25_21_4', '1773_D5_6_classes_17',
        '681_G8_4_classes_13', '935_H11_2_classes_22_0',
        '1125_B9_4_classes_25_7', '698_F11_1_classes_13_0',
        '344_F7_1_classes_25_21', '906_C11_1_classes_4',
        '1656_F5_2_classes_19_17', '1761_A10_3_classes_23_17_14',
        '1772_H5_7_classes_17_7', '910_B8_1_classes_12_0',
        '1283_F10_4_classes_16_0', '508_C10_1_classes_25_15',
        '681_B2_3_classes_4', '868_E8_2_classes_17_16_0',
        '1339_B9_2_classes_16_0', '856_A2_4_classes_2_0',
        '700_C3_6_classes_21', '869_B3_1_classes_16_0',
        '701_B9_2_classes_21_13_0', '1178_F9_6_classes_16_0',
        '542_G1_1_classes_11_2_0'
    ]
    exclude_valid = [
        '5ae3db3a-bbc4-11e8-b2bc-ac1f6b6435d0',
        'e6d0b648-bbbc-11e8-b2ba-ac1f6b6435d0',
        '3202385a-bbca-11e8-b2bc-ac1f6b6435d0',
        '0cf36c82-bbca-11e8-b2bc-ac1f6b6435d0',
        '7cb0006e-bbaf-11e8-b2ba-ac1f6b6435d0',
        '87b77dd2-bba2-11e8-b2b9-ac1f6b6435d0',
        '62c88efa-bbc8-11e8-b2bc-ac1f6b6435d0',
        '44d819c2-bbbb-11e8-b2ba-ac1f6b6435d0',
        'b1ca2b40-bbbd-11e8-b2ba-ac1f6b6435d0',
        '8cd67266-bbbe-11e8-b2ba-ac1f6b6435d0',
        'cead83ec-bb9a-11e8-b2b9-ac1f6b6435d0',
        'a166d11a-bbca-11e8-b2bc-ac1f6b6435d0',
        '91a0a67e-bb9e-11e8-b2b9-ac1f6b6435d0',
        '2be24582-bbb1-11e8-b2ba-ac1f6b6435d0'
    ]
    exclude_train = [
        '7138c4aa-bb9b-11e8-b2b9-ac1f6b6435d0',
        '8a10533e-bba6-11e8-b2ba-ac1f6b6435d0',
        'be92e108-bbb5-11e8-b2ba-ac1f6b6435d0',
        'abfa727e-bba4-11e8-b2ba-ac1f6b6435d0',
        '2384acac-bbae-11e8-b2ba-ac1f6b6435d0',
        'c7a7a462-bbb1-11e8-b2ba-ac1f6b6435d0',
        '559f7ce0-bbb2-11e8-b2ba-ac1f6b6435d0'
    ]

    # In[13]:

    xtra_data_train = xtra_data.loc[~xtra_data.Id.isin(xtra_matches_ids),
                                    ['Id', 'Target']].reset_index(drop=True)
    xtra_data_valid = xtra_data.loc[xtra_data.Id.isin(xtra_matches_ids),
                                    ['Id', 'Target']].reset_index(drop=True)

    # In[14]:

    valid_df = pd.read_csv('../input/' + 'val_id.csv',
                           header=None,
                           names=['idx', 'Id'])
    valid_df = valid_df.loc[~valid_df.Id.isin(exclude_valid), :]
    train_df = pd.read_csv(data_dir + 'train.csv')
    train_df = train_df.loc[~train_df.Id.isin(exclude_train), :]

    test_df = pd.read_csv('../input/' + "sample_submission.csv")
    train = train_df.loc[
        ~train_df.Id.isin(valid_df.Id.values.tolist()), :].reset_index(
            drop=True)
    train = pd.concat([train, xtra_data_train], axis=0, sort=False)
    valid = train_df.loc[train_df.Id.isin(valid_df.Id.values.tolist()
                                          ), :].reset_index(drop=True)
    valid = pd.concat([valid, xtra_data_valid], axis=0, sort=False)
    test = test_df

    # In[15]:

    train.shape

    # In[16]:

    def zero_25(x):
        return x in ['0', '25', '25 0', '0 25']

    train = train[~((train['Id'].str.contains('classes')) &
                    (train['Target'].apply(zero_25)))]
    train.shape

    # In[17]:

    del train_df, valid_df, test_df, xtra_data_valid, xtra_data_train
    gc.collect()

    train_files = train.Id.apply(
        lambda s: '../input/' + 'train_png/' + s + '.png')
    train_labels = train.Target.astype(str).apply(
        lambda s: [name_label_dict[int(q)] for q in s.split(' ')])
    train_ds = ImageMultiDataset(fns=train_files,
                                 labels=train_labels,
                                 classes=list(name_label_dict.values()))
    del train_files, train_labels

    valid_files = valid.Id.apply(
        lambda s: '../input/' + 'train_png/' + s + '.png')
    valid_labels = valid.Target.astype(str).apply(
        lambda s: [name_label_dict[int(q)] for q in s.split(' ')])
    valid_ds = ImageMultiDataset(fns=valid_files,
                                 labels=valid_labels,
                                 classes=list(name_label_dict.values()))
    del valid_files, valid_labels

    test_files = test.Id.apply(
        lambda s: '../input/' + 'test_png/' + s + '.png')
    test_labels = test.Predicted.astype(str).apply(
        lambda s: [name_label_dict[int(q)] for q in s.split(' ')])
    test_ds = ImageMultiDataset(fns=test_files,
                                labels=test_labels,
                                classes=list(name_label_dict.values()))
    del test_files, test_labels

    xtra = [RandTransform(squish, {})]
    tfms = get_transforms(do_flip=True,
                          flip_vert=True,
                          max_rotate=180.0,
                          max_zoom=1.25,
                          max_lighting=0.25,
                          max_warp=0.05,
                          p_affine=0.9,
                          p_lighting=0.7,
                          xtra_tfms=xtra)
    data = ImageDataBunch.create(train_ds,
                                 valid_ds,
                                 test_ds,
                                 path=data_dir,
                                 device=device,
                                 size=512,
                                 bs=28,
                                 ds_tfms=tfms,
                                 padding_mode='zeros')
    data.normalize(model_stats)

    # In[18]:

    data.show_batch(rows=2, figsize=(12, 8))

    # In[19]:

    class FocalLoss(nn.Module):
        def __init__(self, gamma=2):
            super().__init__()
            self.gamma = gamma

        def forward(self, input, target):
            if not (target.size() == input.size()):
                raise ValueError(
                    "Target size ({}) must be the same as input size ({})".
                    format(target.size(), input.size()))

            max_val = (-input).clamp(min=0)
            loss = input - input * target + max_val + (
                (-max_val).exp() + (-input - max_val).exp()).log()

            invprobs = F.logsigmoid(-input * (target * 2.0 - 1.0))
            loss = (invprobs * self.gamma).exp() * loss

            return loss.sum(dim=1).mean()

    # In[20]:

    def create_head(nf: int,
                    nc: int,
                    lin_ftrs: Optional[Collection[int]] = None,
                    ps: Floats = 0.5):
        lin_ftrs = [nf, nc] if lin_ftrs is None else [nf] + lin_ftrs + [nc]
        ps = listify(ps)

        if len(ps) == 1: ps = [ps[0] / 2] * (len(lin_ftrs) - 2) + ps

        actns = [nn.ReLU(inplace=True)] * (len(lin_ftrs) - 2) + [None]
        layers = [AdaptiveConcatPool2d(), Flatten()]
        for ni, no, p, actn in zip(lin_ftrs[:-1], lin_ftrs[1:], ps, actns):
            layers += bn_drop_lin(ni, no, True, p, actn)

        return nn.Sequential(*layers)

    # In[21]:

    from pretrainedmodels import bninception

    class Stub(nn.Module):
        def __init__(self):
            super().__init__()

        def forward(self, x):
            return x.view(-1, 1024, 16, 16)

    model = bninception()
    model.global_pool = Stub()
    model.last_linear = Stub()

    def create_cnn(data: DataBunch,
                   arch: Callable,
                   cut: Union[int, Callable] = None,
                   pretrained: bool = True,
                   lin_ftrs: Optional[Collection[int]] = None,
                   ps: Floats = 0.5,
                   custom_head: Optional[nn.Module] = None,
                   split_on: Optional[SplitFuncOrIdxList] = None,
                   classification: bool = True,
                   **kwargs: Any) -> Learner:
        "Build convnet style learners."
        assert classification, 'Regression CNN not implemented yet, bug us on the forums if you want this!'
        #     meta = cnn_config(arch)
        body = create_body(arch, 0)
        nf = 2048
        head = custom_head or create_head(nf, data.c, lin_ftrs, ps)
        model = nn.Sequential(body, head)
        learner_cls = ifnone(data.learner_type(), ClassificationLearner)
        learn = learner_cls(data, model, **kwargs)
        learn.split(ifnone(split_on, (model[0], model[1])))
        if pretrained: learn.freeze()
        learn.freeze_to(0)
        apply_init(model[1], nn.init.kaiming_normal_)
        return learn

    # In[22]:

    from fastai.vision import models
    from sklearn import metrics
    import torchvision

    from fastai.vision.learner import cnn_config

    def body(pretrained=True):
        return pretrainedmodels.bninception(pretrained='imagenet').to(device)

    learner = create_cnn(data,
                         arch=model,
                         cut=-1,
                         custom_head=create_head(2048,
                                                 len(data.classes),
                                                 ps=0.5))
    learner.loss_fn = FocalLoss()

    # In[23]:

    from fastai.torch_core import split_model_idx

    learner.split(
        split_model_idx(learner.model, idxs=[3, 10, 70, 174, 215, 221]))
    layers = [0] * 7

    # In[24]:

    def get_lrs(lr, base=16):
        return np.logspace(np.log(lr / base),
                           np.log(lr),
                           len(layers),
                           base=np.e)

    # In[25]:

    learner.freeze()
    lr = 2e-3
    learner.fit_one_cycle(6, get_lrs(lr))

    # In[26]:

    learner.save('bninception-stage-1-1')

    # In[27]:

    learner.lr_find(num_it=1000)
    learner.recorder.plot()

    # In[28]:

    learner.load('bninception-stage-1-1')
    learner.unfreeze()
    lr = 1e-3
    learner.fit_one_cycle(16, max_lr=get_lrs(lr))

    # In[ ]:

    lr = 5e-4
    learner.fit_one_cycle(16, max_lr=get_lrs(lr))

    # In[30]:

    y_pred_solo, avg_preds1, y = learner.TTA(beta=None)
    y = y.cpu().numpy().copy()
    _, avg_preds2, _ = learner.TTA(beta=None)
    _, avg_preds3, _ = learner.TTA(beta=None)
    _, avg_preds4, _ = learner.TTA(beta=None)

    avg_preds = y_pred_solo.cpu().numpy().copy() * 0.4 + torch.stack([
        avg_preds1, avg_preds2, avg_preds3, avg_preds4
    ]).mean(0).cpu().numpy().copy() * 0.6

    # In[31]:

    classes_thresholds, classes_scores = twenty_kfold_threshold(y, avg_preds)
    n_classes = len(name_label_dict)
    yp = avg_preds.copy()
    for i in range(n_classes):
        yp[:, i] = avg_preds[:, i] >= classes_thresholds[i]
    yp = yp.astype(np.uint8)
    sc = f1_score(y, yp, average='macro')
    print('val F1 macro:', f1_score(y, yp, average='macro'))
    s = ''
    for i in range(n_classes):
        s += name_label_dict[i] + ':' + ('{:.4f}, {:.4f}  ').format(
            classes_scores[i], classes_thresholds[i])

    learner.save(model_name + '_{:.4f}.pnt'.format(sc))

    # In[25]:

    learner = learner.load('pd_bninception_v3_0.6672.pnt')
    learner.unfreeze()

    # In[26]:

    for i, c in enumerate(learner.model.children()):
        if i == 0:
            continue

        for j, c1 in enumerate(c):
            if j == 3:
                c1.p = 0.6

    # In[27]:

    xtra = [RandTransform(squish, {})]
    tfms = get_transforms(do_flip=True,
                          flip_vert=True,
                          max_rotate=180.0,
                          max_zoom=1.25,
                          max_lighting=0.25,
                          max_warp=0.05,
                          p_affine=0.9,
                          p_lighting=0.7,
                          xtra_tfms=xtra)
    data = ImageDataBunch.create(train_ds,
                                 valid_ds,
                                 test_ds,
                                 path=data_dir,
                                 device=device,
                                 size=512,
                                 bs=28,
                                 ds_tfms=tfms,
                                 padding_mode='zeros')
    data.normalize(model_stats)

    learner.data = data

    lr = 1e-4
    learner.fit_one_cycle(16, max_lr=get_lrs(lr))

    # In[28]:

    y_pred_solo, avg_preds1, y = learner.TTA(beta=None)
    y = y.cpu().numpy().copy()
    _, avg_preds2, _ = learner.TTA(beta=None)
    _, avg_preds3, _ = learner.TTA(beta=None)
    _, avg_preds4, _ = learner.TTA(beta=None)

    avg_preds = y_pred_solo.cpu().numpy().copy() * 0.4 + torch.stack([
        avg_preds1, avg_preds2, avg_preds3, avg_preds4
    ]).mean(0).cpu().numpy().copy() * 0.6

    # In[29]:

    classes_thresholds, classes_scores = twenty_kfold_threshold(y, avg_preds)
    n_classes = len(name_label_dict)
    yp = avg_preds.copy()
    for i in range(n_classes):
        yp[:, i] = avg_preds[:, i] >= classes_thresholds[i]
    yp = yp.astype(np.uint8)
    sc = f1_score(y, yp, average='macro')
    print('val F1 macro:', f1_score(y, yp, average='macro'))
    s = ''
    for i in range(n_classes):
        s += name_label_dict[i] + ':' + ('{:.4f}, {:.4f}  ').format(
            classes_scores[i], classes_thresholds[i])

    learner.save(model_name + '_{:.4f}.pnt'.format(sc))

    # In[30]:

    xtra = [RandTransform(squish, {})]
    tfms = get_transforms(do_flip=True,
                          flip_vert=True,
                          max_rotate=180.0,
                          max_zoom=1.27,
                          max_lighting=0.28,
                          max_warp=0.07,
                          p_affine=0.9,
                          p_lighting=0.73,
                          xtra_tfms=xtra)
    data = ImageDataBunch.create(train_ds,
                                 valid_ds,
                                 test_ds,
                                 path=data_dir,
                                 device=device,
                                 size=512,
                                 bs=28,
                                 ds_tfms=tfms,
                                 padding_mode='zeros')
    data.normalize(model_stats)

    learner.data = data

    lr = 1e-4
    learner.fit_one_cycle(8, max_lr=get_lrs(lr))

    # In[31]:

    lr = 1e-4
    learner.fit_one_cycle(8, max_lr=get_lrs(lr), div_factor=50)

    # In[32]:

    y_pred_solo, avg_preds1, y = learner.TTA(beta=None)
    y = y.cpu().numpy().copy()
    _, avg_preds2, _ = learner.TTA(beta=None)
    _, avg_preds3, _ = learner.TTA(beta=None)
    _, avg_preds4, _ = learner.TTA(beta=None)

    avg_preds = y_pred_solo.cpu().numpy().copy() * 0.4 + torch.stack([
        avg_preds1, avg_preds2, avg_preds3, avg_preds4
    ]).mean(0).cpu().numpy().copy() * 0.6

    # In[33]:

    classes_thresholds, classes_scores = twenty_kfold_threshold(y, avg_preds)
    n_classes = len(name_label_dict)
    yp = avg_preds.copy()
    for i in range(n_classes):
        yp[:, i] = avg_preds[:, i] >= classes_thresholds[i]
    yp = yp.astype(np.uint8)
    sc = f1_score(y, yp, average='macro')
    print('val F1 macro:', f1_score(y, yp, average='macro'))
    s = ''
    for i in range(n_classes):
        s += name_label_dict[i] + ':' + ('{:.4f}, {:.4f}  ').format(
            classes_scores[i], classes_thresholds[i])

    learner.save(model_name + '_{:.4f}.pnt'.format(sc))

    # In[34]:

    lr = 5e-5
    learner.fit_one_cycle(8, max_lr=get_lrs(lr))

    # In[35]:

    y_pred_solo, avg_preds1, y = learner.TTA(beta=None)
    y = y.cpu().numpy().copy()
    _, avg_preds2, _ = learner.TTA(beta=None)
    _, avg_preds3, _ = learner.TTA(beta=None)
    _, avg_preds4, _ = learner.TTA(beta=None)

    avg_preds = y_pred_solo.cpu().numpy().copy() * 0.4 + torch.stack([
        avg_preds1, avg_preds2, avg_preds3, avg_preds4
    ]).mean(0).cpu().numpy().copy() * 0.6

    # In[36]:

    classes_thresholds, classes_scores = twenty_kfold_threshold(y, avg_preds)
    n_classes = len(name_label_dict)
    yp = avg_preds.copy()
    for i in range(n_classes):
        yp[:, i] = avg_preds[:, i] >= classes_thresholds[i]
    yp = yp.astype(np.uint8)
    sc = f1_score(y, yp, average='macro')
    print('val F1 macro:', f1_score(y, yp, average='macro'))
    s = ''
    for i in range(n_classes):
        s += name_label_dict[i] + ':' + ('{:.4f}, {:.4f}  ').format(
            classes_scores[i], classes_thresholds[i])

    learner.save(model_name + '_{:.4f}.pnt'.format(sc))

    # In[42]:

    learner = learner.load(model_name + '_0.6750.pnt')

    # In[43]:

    xtra = [RandTransform(squish, {})]
    tfms = get_transforms(do_flip=True,
                          flip_vert=True,
                          max_rotate=180.0,
                          max_zoom=1.25,
                          max_lighting=0.25,
                          max_warp=0.05,
                          p_affine=0.9,
                          p_lighting=0.7,
                          xtra_tfms=xtra)
    data = ImageDataBunch.create(train_ds,
                                 valid_ds,
                                 test_ds,
                                 path=data_dir,
                                 device=device,
                                 size=512,
                                 bs=28,
                                 ds_tfms=tfms,
                                 padding_mode='zeros')
    data.normalize(model_stats)

    learner.data = data

    lr = 1e-4
    learner.fit_one_cycle(8, max_lr=get_lrs(lr))

    # In[44]:

    y_pred_solo, avg_preds1, y = learner.TTA(beta=None)
    y = y.cpu().numpy().copy()
    _, avg_preds2, _ = learner.TTA(beta=None)
    _, avg_preds3, _ = learner.TTA(beta=None)
    _, avg_preds4, _ = learner.TTA(beta=None)

    avg_preds = y_pred_solo.cpu().numpy().copy() * 0.4 + torch.stack([
        avg_preds1, avg_preds2, avg_preds3, avg_preds4
    ]).mean(0).cpu().numpy().copy() * 0.6

    # In[45]:

    classes_thresholds, classes_scores = twenty_kfold_threshold(y, avg_preds)
    n_classes = len(name_label_dict)
    yp = avg_preds.copy()
    for i in range(n_classes):
        yp[:, i] = avg_preds[:, i] >= classes_thresholds[i]
    yp = yp.astype(np.uint8)
    sc = f1_score(y, yp, average='macro')
    print('val F1 macro:', f1_score(y, yp, average='macro'))
    s = ''
    for i in range(n_classes):
        s += name_label_dict[i] + ':' + ('{:.4f}, {:.4f}  ').format(
            classes_scores[i], classes_thresholds[i])

    learner.save(model_name + '_{:.4f}.pnt'.format(sc))

    # ^ saved

    # In[25]:

    learner = learner.load(model_name + '_0.6815.pnt')
    lr = 9e-5
    learner.fit_one_cycle(8, max_lr=get_lrs(lr))

    # In[26]:

    y_pred_solo, avg_preds1, y = learner.TTA(beta=None)
    y = y.cpu().numpy().copy()
    _, avg_preds2, _ = learner.TTA(beta=None)
    _, avg_preds3, _ = learner.TTA(beta=None)
    _, avg_preds4, _ = learner.TTA(beta=None)

    avg_preds = y_pred_solo.cpu().numpy().copy() * 0.4 + torch.stack([
        avg_preds1, avg_preds2, avg_preds3, avg_preds4
    ]).mean(0).cpu().numpy().copy() * 0.6

    # In[27]:

    classes_thresholds, classes_scores = twenty_kfold_threshold(y, avg_preds)
    n_classes = len(name_label_dict)
    yp = avg_preds.copy()
    for i in range(n_classes):
        yp[:, i] = avg_preds[:, i] >= classes_thresholds[i]
    yp = yp.astype(np.uint8)
    sc = f1_score(y, yp, average='macro')
    print('val F1 macro:', f1_score(y, yp, average='macro'))
    s = ''
    for i in range(n_classes):
        s += name_label_dict[i] + ':' + ('{:.4f}, {:.4f}  ').format(
            classes_scores[i], classes_thresholds[i])

    learner.save(model_name + '_{:.4f}.pnt'.format(sc))

    # In[28]:

    v = len('../input/test_png/')

    ids = []
    dd = data.test_ds.ds.__dict__['x']
    for i in dd:
        ids.append(i[v:-4])

    # In[29]:

    avg_tests1 = learner.TTA(ds_type=DatasetType.Test, beta=0.4)
    avg_tests2 = learner.TTA(ds_type=DatasetType.Test, beta=0.4)
    avg_tests3 = learner.TTA(ds_type=DatasetType.Test, beta=0.4)
    avg_tests4 = learner.TTA(ds_type=DatasetType.Test, beta=0.4)

    # In[30]:

    preds = torch.stack(
        [avg_tests1[0], avg_tests2[0], avg_tests3[0],
         avg_tests4[0]]).mean(0).cpu().numpy().copy()

    # In[31]:

    results_dir = '../results/'
    np.save(results_dir + model_name + '_test.npy', preds.copy())

    # In[32]:

    results_dir = '../results/'
    np.save(results_dir + model_name + '_y.npy', y)
    np.save(results_dir + model_name + '_ids.npy', valid.Id.values)
    np.save(results_dir + model_name + '_holdout_1.npy', avg_preds)
Exemple #21
0
if not os.path.exists(FilePathManager.resolve("models")):
    os.makedirs(FilePathManager.resolve("models"))


def round_list(l, f=6):
    return [round(x, f) for x in l]


extractor = Vgg16Extractor(transform=False)

max_length = 17
captions_per_image = 1
corpus = Corpus.load(FilePathManager.resolve("data/corpus.pkl"), max_length)
evaluator = Evaluator(corpus).cuda()
dataset = ECocoDataset(corpus,
                       tranform=utils.TransformImage(extractor.cnn),
                       captions_per_image=captions_per_image)
generator = ConditionalGenerator(corpus, max_sentence_length=max_length).cuda()
state_dict = torch.load('./models/generator.pth')
generator.load_state_dict(state_dict['state_dict'])
generator.eval()

# model2 = "./models/evaluator-old2.pth"
# model3 = "./models/evaluator-c49.pth"
# model4 = "./models/evaluator-c99.pth"
# models = [model1, model2, model3, model4]
models = [
    "./models/evaluator.pth",
    "./models/evaluator-0.pth",
    "./models/evaluator-1.pth",
    "./models/evaluator-2.pth",
Exemple #22
0
        
        if (j+1)%2 == 0:
            
            input_file.write("fitness at iteration " + str(j+1) + ' ' + str(result[1]))
            input_file.write("\n")
            input_file.write("Target probability: " +str(target_prob) + ":: Original probability: " +str(orig_prob))
            input_file.write("\n")
            input_file.write('Top scorer: '+ str(labels[int(np.argmax(probabilities))]) +
                   ', probability:: ' + str(np.max(probabilities)))
            input_file.write("\n")

    return j,history, probabilities_all, orig_probs, target_probs


load_img = utils.LoadImage()
tf_img = utils.TransformImage(model)

NUMBER_OF_LINE_PERMUTATIONS = 10

import random
for iterations in range(NUMBER_OF_FILES):
    target_class = random.randint(0,1000)    
    for permutation in range(NUMBER_OF_LINE_PERMUTATIONS):

        xpoint = random.randint(0,224)
        ypoint = random.randint(0,224)
        xpoint2 = random.randint(0,224)
        ypoint2 = random.randint(0,224)
    #     xpoint,ypoint,xpoint2,ypoint2 = 13,5,200,210
        print ('Random points: ', xpoint,ypoint,xpoint2,ypoint2)
Exemple #23
0
      )        
  else:
    transform_train = transform
elif args.image_model == 'res34':
  image_model = Resnet34(n_class=args.n_class, pretrained=True) 
  if args.random_crop:
    transform_train = transforms.Compose(
      [transforms.RandomSizedCrop(224),
       transforms.ToTensor(),
       transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]
      )        
  else:
    transform_train = transform
elif args.image_model == 'inceptionresnetv2':
  image_model = InceptionResnetv2(n_class=args.n_class)
  transform_train = transform = utils.TransformImage(pretrainedmodels.__dict__[args.image_model](num_classes=1000, pretrained='imagenet'))


print(args.exp_dir)
tasks = [1, 3]

if args.dataset == 'mscoco_130k' or args.dataset == 'mscoco_2k':
  data_path = '/home/lwang114/data/mscoco/val2014/'
  args.class2id_file = '/ws/ifp-53_2/hasegawa/lwang114/data/mscoco/concept2idx_65class.json'
  with open(args.class2id_file, 'r') as f:
    class2idx = json.load(f)  
  args.n_class = len(class2idx.keys())
elif args.dataset == 'mscoco_train':
  data_path = '/ws/ifp-53_2/hasegawa/lwang114/data/mscoco/'
  args.class2id_file = '{}/mscoco_class2idx.json'.format(data_path)
  args.n_class = 80
Exemple #24
0
    os.mkdir(rootpath + 'dataset0_pre/')
for class_name in class_names:
    if not os.path.exists(rootpath + 'dataset0_pre/' + class_name):
        os.mkdir(rootpath + 'dataset0_pre/' + class_name)
for i in range(len(labels_init)):
    true_class_name = class_id_to_key[labels_init.TrueLabel[i]]
    shutil.copy(rootpath + 'dataset0/' + labels_init.ImageId[i] + '.png',
                rootpath + 'dataset0_pre/' + true_class_name)

# In[ ]:

model = pretrainedmodels.__dict__['inceptionv4'](
    num_classes=1000, pretrained='imagenet')  # .to(device)
tf_img = utils.TransformImage(model,
                              scale=1,
                              random_crop=False,
                              random_hflip=False,
                              random_vflip=False,
                              preserve_aspect_ratio=True)
net = model
# state = {
#     'net': model.state_dict(),
#     'acc': 0,
#     'epoch': 0,
# }
# if not os.path.isdir('checkpoint'):
#     os.mkdir('checkpoint')
# torch.save(state, 'checkpoint/ImageNet.pth')
total_epoch = -1
checkpoint = torch.load('checkpoint/ImageNet.pth')
net.load_state_dict(checkpoint['net'])
best_acc = checkpoint['acc']
Exemple #25
0
def main(opt):

    dataset = VideoDataset(opt, 'inference')
    opt["vocab_size"] = dataset.get_vocab_size()
    opt["seq_length"] = dataset.max_len

    if opt['beam_size'] != 1:
        assert opt["batch_size"] == 1
    if opt["model"] == 'S2VTModel':
        model = S2VTModel(opt["vocab_size"],
                          opt["max_len"],
                          opt["dim_hidden"],
                          opt["dim_word"],
                          opt['dim_vid'],
                          n_layers=opt['num_layers'],
                          rnn_cell=opt['rnn_type'],
                          bidirectional=opt["bidirectional"],
                          rnn_dropout_p=opt["rnn_dropout_p"])
    elif opt["model"] == "S2VTAttModel":
        encoder = EncoderRNN(opt["dim_vid"],
                             opt["dim_hidden"],
                             n_layers=opt['num_layers'],
                             rnn_cell=opt['rnn_type'],
                             bidirectional=opt["bidirectional"],
                             input_dropout_p=opt["input_dropout_p"],
                             rnn_dropout_p=opt["rnn_dropout_p"])
        decoder = DecoderRNN(opt["vocab_size"],
                             opt["max_len"],
                             opt["dim_hidden"],
                             opt["dim_word"],
                             n_layers=opt['num_layers'],
                             rnn_cell=opt['rnn_type'],
                             input_dropout_p=opt["input_dropout_p"],
                             rnn_dropout_p=opt["rnn_dropout_p"],
                             bidirectional=opt["bidirectional"])
        model = S2VTAttModel(encoder, decoder)
    else:
        return

    # if torch.cuda.device_count() > 1:
    #     print("{} devices detected, switch to parallel model.".format(torch.cuda.device_count()))
    #     model = nn.DataParallel(model)

    #model, videopath, targetcap, dataset, config, optimizer, crit, window

    #config: batch_size, c, learning rate, num it,input shape

    config = {
        #lr 0.005 and dimensions 224, c was 100. #Best was 0.06 lr, c = 1 for show and fool.
        #
        "batch_size": BATCH_SIZE,
        "c": 10000,
        "learning_rate": 0.2,
        "num_iterations": 1000,
        "input_shape": (224, 224),
        "num_frames": 288,
        "dimensions": 224,
        "k": 0.1,
        # "attack_algorithm": "showandfool"
        "attack_algorithm": "carliniwagner"
    }

    convnet = 'vgg16'
    # convnet = 'nasnetalarge'
    # convnet = 'resnet152'
    full_decoder = ConvS2VT(convnet, model, opt)
    '''
    Layer freezing experiment.
    
    Top 10 contributing layers: 
    conv.cell_stem_1.comb_iter_0_right.separable_1.depthwise_conv2d.weight
    conv.cell_stem_1.comb_iter_2_right.separable_2.depthwise_conv2d.weight
    conv.cell_stem_1.comb_iter_1_right.separable_1.depthwise_conv2d.weight
    conv.cell_16.comb_iter_4_left.separable_1.depthwise_conv2d.weight
    conv.cell_17.comb_iter_4_left.separable_1.depthwise_conv2d.weight
    conv.cell_16.comb_iter_4_left.separable_1.pointwise_conv2d.weight
    conv.cell_13.comb_iter_4_left.bn_sep_1.weight
    conv.reduction_cell_0.conv_prev_1x1.bn.weight
    conv.cell_17.comb_iter_4_left.separable_2.depthwise_conv2d.weight
    conv.cell_13.comb_iter_0_left.bn_sep_1.weight
    
    
    '''

    top = open("top_layers.txt", "r")
    top_layers = top.readlines()
    top.close()
    print(top_layers)

    #set the gradients on the layers you don't want to contribute to 0
    top_layers = []

    for name, parameters in full_decoder.named_parameters():
        reset = True
        for f in top_layers:
            if name in f:
                reset = False

        if reset:
            parameters.require_grad = False
            if parameters.grad is not None:
                print(name)
                parameters.grad.data.zero_()

    # for name, parameters in full_decoder.named_parameters():
    #     for f in top_layers:
    #         if name not in f:
    #             print(name)
    #             parameters.require_grad = False
    #             if parameters.grad is not None:
    #                 # parameters.data = 0
    #                 parameters.grad.data.zero_()
    #         else:
    #             # print(name)
    #             continue

    #'A woman is cutting a green onion'
    video_path = opt['videos'][0]

    tf_img_fn = ptm_utils.TransformImage(full_decoder.conv)
    load_img_fn = PIL.Image.fromarray
    vocab = dataset.get_vocab()

    vid_id = video_path.split('/')[-1]
    vid_id = vid_id.split('.')[0]

    viable_ids = dataset.splits['test'] + dataset.splits['val']
    viable_target_captions = []
    for v_id in viable_ids:
        if v_id == vid_id:
            continue
        plausible_caps = [
            ' '.join(toks)
            for toks in dataset.vid_to_meta[v_id]['final_captions']
        ]
        viable_target_captions.extend(plausible_caps)

    #target_caption = np.random.choice(viable_target_captions)
    # 5 captions:
    '''
    <sos> A person is typing into a laptop computer <eos>
    <sos> A boy is kicking a soccer ball into the goal <eos>
    <sos> Someone is frying fish <eos>
    <sos> A dog is running with a ball <eos>
    <sos> The cat approaches on grass <eos>
    
    '''
    captions = {
        1: '<sos> A woman is talking <eos>',
        2: '<sos> A boy is kicking a soccer ball into the goal <eos>',
        3: '<sos> A man is frying fish <eos>',
        4: '<sos> A dog is running with a ball <eos>',
        5: '<sos> A cat is walking on grass <eos>'
    }

    #1 doesn't work
    videos = {

        #2 is too high res or something, replaced X6uJyuD_Zso_3_17.avi with nc8hwLaOyZU_1_19.avi
        #5,'ceOXCFUmxzA_100_110.avi' out of memory, replaced with 'X7sQq-Iu1gQ_12_22'
        #1: 'RSx5G0_xH48_12_17.avi',
        2: 'nc8hwLaOyZU_1_19.avi',
        3: 'O2qiPS2NCeY_2_18.avi',
        4: 'kI6MWZrl8v8_149_161.avi',
        5: 'X7sQq-Iu1gQ_12_22.avi',
        6: '77iDIp40m9E_159_181.avi',
        7: 'SaYwh6chmiw_15_40.avi',
        8: 'pFSoWsocv0g_8_17.avi',
        9: 'HmVPxs4ygMc_44_53.avi',
        10: 'glii-kazad8_21_29.avi',
        11: 'AJJ-iQkbRNE_97_109.avi'
    }
    #"D:\College\Research\December 2018 Video Captioning Attack\video captioner\YouTubeClips\AJJ-iQkbRNE_97_109.avi"
    # video_path = ''

    video_path = 'D:\\College\\Research\\December 2018 Video Captioning Attack\\video captioner\\YouTubeClips\\' + videos[
        2]
    # target_caption = '<sos> A man is moving a toy <eos>'
    # target_caption = '<sos> A boy is kicking a soccer ball into the goal <eos>'

    #Just switch the number to get a target caption.
    target_caption = captions[1]

    #Should use the original caption function we use in the attack because the scaling is sightly different
    with torch.no_grad():
        frames = skvideo.io.vread(video_path, num_frames=config["num_frames"])

        # bp ---
        batches = create_batches(frames, load_img_fn, tf_img_fn)
        seq_prob, seq_preds = full_decoder(batches, mode='inference')
        sents = utils.decode_sequence(vocab, seq_preds)

        original_caption = sents[0]

    #video_path = 'D:\\College\Research\\December 2018 Video Captioning Attack\\video captioner\\YouTubeClips\\ACOmKiJDkA4_49_54.avi'

    #/96 gives 3 frames
    # length = math.ceil(len(skvideo.io.vread(video_path,num_frames=config["num_frames"]))/96)
    #12 frames
    length = 3
    print("Total number of frames: {}".format(length))
    adv_frames = []
    iteration = 1
    frame_counter = 0

    total_iterations = np.ceil(length / BATCH_SIZE)

    #model is full_decoder

    optimizer = ['Adam', (0.9, 0.999)]

    crit = utils.LanguageModelCriterion()
    seq_decoder = utils.decode_sequence

    # model, videopath, targetcap, dataset, config, optimizer, crit, window

    while (frame_counter < length):
        print("\n\n\nIteration {}/{}".format(iteration, int(total_iterations)))
        iteration = iteration + 1
        if length - frame_counter < BATCH_SIZE:
            window = [frame_counter, length]
            frame_counter = frame_counter + (length - frame_counter)
            print("Using frames {}".format(window))
            print("Frame counter at: {}\nTotal length is: {}\n".format(
                frame_counter, length))
            attack_package = S2VT_Attack(model=full_decoder,
                                         video_path=video_path,
                                         target=target_caption,
                                         dataset=dataset,
                                         config=config,
                                         optimizer=optimizer,
                                         crit=crit,
                                         seq_decoder=seq_decoder,
                                         window=window)
            carlini = Attack(attack_package=attack_package)
            finished_frames = carlini.execute(functional=True)
            adv_frames.append(finished_frames.detach().cpu().numpy())

        else:
            window = [frame_counter, frame_counter + BATCH_SIZE - 1]
            print("Using frames {}".format(window))
            print("Frame counter at: {}\nTotal length is: {}\n".format(
                frame_counter, length))

            attack_package = S2VT_Attack(model=full_decoder,
                                         video_path=video_path,
                                         target=target_caption,
                                         dataset=dataset,
                                         config=config,
                                         optimizer=optimizer,
                                         crit=crit,
                                         seq_decoder=seq_decoder,
                                         window=window)
            carlini = Attack(attack_package=attack_package)
            finished_frames = carlini.execute(functional=True)
            adv_frames.append(finished_frames.detach().cpu().numpy())
            frame_counter = frame_counter + BATCH_SIZE

    base_toks = video_path.split('/')
    base_dir_toks = base_toks[:-1]
    base_filename = base_toks[-1]
    base_name = ''.join(base_filename.split('.')[:-1])
    adv_path = os.path.join('/'.join(base_dir_toks),
                            base_name + '_adversarialWINDOW.avi')

    print("\nSaving to: {}".format(adv_path))
    # adv_frames_1 = np.concatenate(adv_frames, axis=0)
    # # batches = create_batches(adv_frames[0].astype(np.uint8), load_img_fn, tf_img_fn)
    # batches = exp_create_batches(adv_frames_1.astype(np.uint8), 3)
    # seq_prob, seq_preds = full_decoder(batches, mode='inference')
    # sents = utils.decode_sequence(vocab, seq_preds)

    # print("Adversarial Frames 1: {}".format(sents[0]))
    adv_frames = np.concatenate(adv_frames, axis=0)
    # batches = create_batches(adv_frames, load_img_fn, tf_img_fn)
    # seq_prob, seq_preds = full_decoder(batches, mode='inference')
    # sents = utils.decode_sequence(vocab, seq_preds)
    #
    # print("Adversarial Frames 2: {}".format(sents[0]))

    outputfile = adv_path

    writer = skvideo.io.FFmpegWriter(
        outputfile,
        outputdict={
            #huffyuv is lossless. r10k is really good

            # '-c:v': 'libx264', #libx264 # use the h.264 codec
            '-c:v': 'huffyuv',  #r210 huffyuv r10k
            # '-pix_fmt': 'rgb32',
            # '-crf': '0', # set the constant rate factor to 0, which is lossless
            # '-preset': 'ultrafast'  # ultrafast, veryslow the slower the better compression, in princple, try
        })
    for f in adv_frames:
        writer.writeFrame(f)

    writer.close()

    # np_path = os.path.join('/'.join(base_dir_toks), base_name + '_adversarialWINDOW')
    # np.save(np_path, adv_frames)
    #ffv1 0.215807946043995
    #huffyuv 0.21578424050191813
    #libx264 0.2341074901578537
    #r210 -0.7831487262059795, -0.7833399258537526
    #gif 0.6889478809555243
    #png 0.2158991440582696 0.21616862708842177
    #qtrle  0.21581286337807626
    #flashsv 0.21610510459932186 0.21600030673323545
    #ffvhuff 0.21620682250167533
    #r10k similar to r210
    #rawvideo 0.21595001

    with torch.no_grad():

        #getting a new model to see how it actually works now
        # full_decoder = ConvS2VT(convnet, model, opt)
        full_decoder = full_decoder.eval()

        frames = skvideo.io.vread(adv_path)

        frames = np.float32(frames)
        plt.imshow(frames[0] / 255.)
        plt.show()

        difference = np.array(adv_frames) - np.array(frames)
        np.save('difference_tmp', difference)
        #loadtxt to load np array from txt

        exp = np.load('difference_tmp.npy')

        # numpy_frames = np.load(np_path+'.npy')
        # print("Are numpy frames == adv frames: ", np.array_equal(numpy_frames, adv_frames))
        # print("Is the saved array equal to loaded array for difference: ", np.array_equal(exp, difference))

        frames = frames + difference

        # batches = exp_create_batches(numpy_frames, BATCH_SIZE)
        # feats = full_decoder.conv_forward((batches.unsqueeze(0)))
        # seq_prob, seq_preds = full_decoder.encoder_decoder_forward(feats, mode='inference')
        #
        # # seq_prob, seq_preds = full_decoder(batches, mode='inference')
        # sents = utils.decode_sequence(vocab, seq_preds)
        # numpy_caption = sents[0]
        #
        # print("Numpy Frames exp: {}".format(numpy_caption))
        #

        # numpy_frames_tensor = torch.tensor(numpy_frames)
        # numpy_frames_tensor = numpy_frames_tensor.float()
        # batches = exp_create_batches(numpy_frames_tensor, BATCH_SIZE)
        # feats = full_decoder.conv_forward((batches.unsqueeze(0)))
        # seq_prob, seq_preds = full_decoder.encoder_decoder_forward(feats, mode='inference')
        #
        # # seq_prob, seq_preds = full_decoder(batches, mode='inference')
        # sents = utils.decode_sequence(vocab, seq_preds)
        # numpy_caption_tensor = sents[0]
        #
        # print("Numpy Frames tensor: {}".format(numpy_caption_tensor))

        # numpy_frames = numpy_frames.astype(np.uint8)
        # batches = create_batches(numpy_frames, load_img_fn, tf_img_fn)
        #
        # # batches = exp_create_batches(adv_frames, BATCH_SIZE)
        # # feats = full_decoder.conv_forward((batches.unsqueeze(0)))
        # # seq_prob, seq_preds = full_decoder.encoder_decoder_forward(feats, mode='inference')
        #
        # seq_prob, seq_preds = full_decoder(batches, mode='inference')
        # sents = utils.decode_sequence(vocab, seq_preds)
        #
        # print("Numpy Frames originalscale: {}".format(sents[0]))
        # # bp ---
        adv_frames = adv_frames.astype(np.uint8)
        batches = create_batches(adv_frames, load_img_fn, tf_img_fn)

        # batches = exp_create_batches(adv_frames, BATCH_SIZE)
        # feats = full_decoder.conv_forward((batches.unsqueeze(0)))
        # seq_prob, seq_preds = full_decoder.encoder_decoder_forward(feats, mode='inference')

        seq_prob, seq_preds = full_decoder(batches, mode='inference')
        sents = utils.decode_sequence(vocab, seq_preds)

        print("Adversarial Frames old: {}".format(sents[0]))

        batches = exp_create_batches(adv_frames, BATCH_SIZE)
        feats = full_decoder.conv_forward((batches.unsqueeze(0)))
        seq_prob, seq_preds = full_decoder.encoder_decoder_forward(
            feats, mode='inference')

        # seq_prob, seq_preds = full_decoder(batches, mode='inference')
        sents = utils.decode_sequence(vocab, seq_preds)

        print("Adversarial Frames new: {}".format(sents[0]))

        frames = frames.astype(np.uint8)
        batches = create_batches(frames, load_img_fn, tf_img_fn)

        # batches = exp_create_batches(frames, BATCH_SIZE)
        # feats = full_decoder.conv_forward((batches.unsqueeze(0)))
        # seq_prob, seq_preds = full_decoder.encoder_decoder_forward(feats, mode='inference')

        seq_prob, seq_preds = full_decoder(batches, mode='inference')
        sents = utils.decode_sequence(vocab, seq_preds)
        print("frames old caption: ", sents[0])

        # frames = frames.astype(np.uint8)
        # batches = create_batches(frames, load_img_fn, tf_img_fn)

        batches = exp_create_batches(frames, BATCH_SIZE)
        feats = full_decoder.conv_forward((batches.unsqueeze(0)))
        seq_prob, seq_preds = full_decoder.encoder_decoder_forward(
            feats, mode='inference')

        # seq_prob, seq_preds = full_decoder(batches, mode='inference')
        sents = utils.decode_sequence(vocab, seq_preds)
        adv_caption = sents[0]

    print(
        "\nOriginal Caption: {}\nTarget Caption: {}\nAdversarial Caption: {}".
        format(original_caption, target_caption, adv_caption))
def main():
    path = '/home/ubuntu/sda/pjx/submit/clouds/classification'
    train_path = '/home/ubuntu/sda/pjx/submit/clouds/data/train/'
    # 获取当前文件名,用于创建模型及结果文件的目录
    file_name = 'resnet34'
    # 创建保存模型和结果的文件夹
    if not os.path.exists(path + '/model/%s' % file_name):
        os.makedirs(path + '/model/%s' % file_name)
    if not os.path.exists(path + '/result/%s' % file_name):
        os.makedirs(path + '/result/%s' % file_name)
    # 创建日志文件
    if not os.path.exists(path + '/result/%s.txt' % file_name):
        with open(path + '/result/%s.txt' % file_name, 'w') as acc_file:
            pass
    with open(path + '/result/%s.txt' % file_name, 'a') as acc_file:
        acc_file.write('\n%s %s\n' % (time.strftime(
            "%Y-%m-%d %H:%M:%S", time.localtime(time.time())), file_name))

    print_freq = 5
    best_precision = 0
    lowest_loss = 100
    os.environ["CUDA_VISIBLE_DEVICES"] = '0, 1'

    batch_size = 24
    # 进程数量,最好不要超过电脑最大进程数。windows下报错可以改为workers=0
    workers = 12

    # epoch数量,分stage进行,跑完一个stage后降低学习率进入下一个stage
    stage_epochs = [20, 10, 10]
    # 初始学习率
    lr = 1e-4
    # 学习率衰减系数 (new_lr = lr / lr_decay)
    lr_decay = 5
    # 正则化系数
    weight_decay = 1e-4

    # 参数初始化
    stage = 0
    start_epoch = 0
    total_epochs = sum(stage_epochs)

    # 设定打印频率,即多少step打印一次,用于观察loss和acc的实时变化
    # 打印结果中,括号前面为实时loss和acc,括号内部为epoch内平均loss和acc
    print_freq = 1
    # 验证集比例
    val_ratio = 0.12
    # 是否只验证,不训练
    evaluate = False
    is_train = True
    # 是否从断点继续跑
    resume = False

    def default_loader(path):
        # return Image.open(path)
        return Image.open(train_path + path).convert('RGB')

    class SteelDataset(Dataset):
        def __init__(self,
                     image_list,
                     df,
                     transform=None,
                     target_transform=None,
                     loader=default_loader):
            self.imgs = image_list
            self.df = df
            self.transform = transform
            self.target_transform = target_transform
            self.loader = loader

        def __getitem__(self, index):
            filename = self.imgs[index]
            img = self.loader(filename)
            cls_dict = ['Fish', 'Flower', 'Gravel', 'Sugar']
            cls_label = np.array([0, 0, 0, 0])
            for i in range(4):
                # print(self.df.query('Image_Label=="'+filename+'_'+cls_dict[i]+'"')['EncodedPixels'].values)
                rle_0 = self.df.query('Image_Label=="' + filename + '_' +
                                      cls_dict[i] +
                                      '"')['EncodedPixels'].values[0]

                if rle_0 is not np.nan:
                    cls_label[i] = 1
            if self.transform is not None:
                img = self.transform(img)
            label = cls_label
            return img, label

        def __len__(self):
            return len(self.imgs)

    def train(train_loader, model, criterion, optimizer, epoch):
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        acc = AverageMeter()

        # switch to train mode
        model.train()
        train_iter = iter(train_loader)

        end = time.time()
        # 从训练集迭代器中获取训练数据
        for i, (images, target) in enumerate(train_iter):
            # 评估图片读取耗时
            size = images.shape[0]
            data_time.update(time.time() - end)
            # 将图片和标签转化为tensor
            image_var = images.cuda()
            label = target.cuda().float()

            # 将图片输入网络,前传,生成预测值
            y_pred = torch.sigmoid(model(image_var))
            # 计算loss

            #label = torch.zeros(size, 2).scatter_(1,label,1).type(torch.FloatTensor).cuda()
            loss = criterion(y_pred, label)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            losses.update(loss.item(), images.size(0))
            y_pred = y_pred.detach().cpu().numpy()

            # 计算top1正确率
            #print(y_pred.data.shape, target.shape)
            #prec, PRED_COUNT = accuracy(y_pred.data, target, topk=(1, 1))
            y_pred[y_pred >= 0.5] = 1
            y_pred[y_pred < 0.5] = 0
            target = target.numpy()
            prec = np.sum(y_pred == target) / (4 * size)

            acc.update(prec, size * 4)

            # 评估训练耗时
            batch_time.update(time.time() - end)
            end = time.time()

            # 打印耗时与结果
            if i % print_freq == 0:
                print('Epoch: [{0}][{1}/{2}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Accuray {acc.val:.3f} ({acc.avg:.3f})'.format(
                          epoch,
                          i,
                          len(train_loader),
                          batch_time=batch_time,
                          data_time=data_time,
                          loss=losses,
                          acc=acc))

    def validate(val_loader, model, criterion):
        batch_time = AverageMeter()
        losses = AverageMeter()
        acc = AverageMeter()

        # switch to evaluate mode
        model.eval()

        end = time.time()
        for i, (images, labels) in enumerate(val_loader):
            size = images.shape[0]
            image_var = torch.tensor(images).cuda()
            target = torch.tensor(labels, dtype=torch.float).cuda()
            #target = torch.zeros(image_var.shape[0], 2).scatter_(1,target,1).type(torch.FloatTensor).cuda()
            # 图片前传。验证和测试时不需要更新网络权重,所以使用torch.no_grad(),表示不计算梯度
            with torch.no_grad():
                y_pred = torch.sigmoid(model(image_var))
                loss = criterion(y_pred, target)

            y_pred = y_pred.detach().cpu().numpy()
            # measure accuracy and record loss
            #prec, PRED_COUNT = accuracy(y_pred.data, labels, topk=(1, 1))
            y_pred[y_pred > 0.5] = 1
            y_pred[y_pred <= 0.5] = 0
            target = target.cpu().numpy()
            prec = np.sum(y_pred == target) / (4 * size)

            losses.update(loss.item(), images.size(0))
            acc.update(prec, size)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % print_freq == 0:
                print('TrainVal: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Accuray {acc.val:.3f} ({acc.avg:.3f})'.format(
                          i,
                          len(val_loader),
                          batch_time=batch_time,
                          loss=losses,
                          acc=acc))

        print(' * Accuray {acc.avg:.3f}'.format(acc=acc),
              '(Previous Best Acc: %.3f)' % best_precision,
              ' * Loss {loss.avg:.3f}'.format(loss=losses),
              'Previous Lowest Loss: %.3f)' % lowest_loss)
        return acc.avg, losses.avg

    def test(test_loader, model):
        csv_map = OrderedDict({'filename': [], 'label': []})
        # switch to evaluate mode
        model.eval()
        for i, (images, filepath) in enumerate(tqdm(test_loader)):
            # bs, ncrops, c, h, w = images.size()
            filepath = [os.path.basename(i) for i in filepath]
            image_var = torch.tensor(images,
                                     requires_grad=False)  # for pytorch 0.4

            with torch.no_grad():
                y_pred = torch.sigmoid(model(image_var))

            # 保存图片名称与预测概率
            y_pred[y_pred > 0.5] = 1
            y_pred[y_pred <= 0.5] = 0
            y_pred = y_pred.cpu().numpy()
            csv_map['filename'].extend(filepath)
            for output in y_pred:
                csv_map['label'].append(output)

        result = pd.DataFrame(csv_map)

        # 转换成提交样例中的格式
        sub_filename, sub_label = [], []
        for index, row in result.iterrows():
            for i in range(4):
                sub_filename.append(row['filename'] + '_' + str(i + 1))
                pred_label = row['label'][i]

        # 生成结果文件,保存在result文件夹中,可用于直接提交
        submission = pd.DataFrame({
            'filename': sub_filename,
            'label': sub_label
        })
        submission.to_csv(path + '/result/%s/submission.csv' % file_name,
                          header=None,
                          index=False)
        return

    def save_checkpoint(state,
                        is_best,
                        is_lowest_loss,
                        filename=path +
                        '/model/%s/checkpoint.pth.tar' % file_name):
        torch.save(state, filename)
        if is_best:
            shutil.copyfile(filename,
                            path + '/model/%s/model_best.pth.tar' % file_name)
        if is_lowest_loss:
            shutil.copyfile(filename,
                            path + '/model/%s/lowest_loss.pth.tar' % file_name)

    class AverageMeter(object):
        """Computes and stores the average and current value"""
        def __init__(self):
            self.reset()

        def reset(self):
            self.val = 0
            self.avg = 0
            self.sum = 0
            self.count = 0

        def update(self, val, n=1):
            self.val = val
            self.sum += val * n
            self.count += n
            self.avg = self.sum / self.count

    # 学习率衰减:lr = lr / lr_decay
    def adjust_learning_rate():
        nonlocal lr
        lr = lr / lr_decay
        return optim.Adam(model.parameters(),
                          lr,
                          weight_decay=weight_decay,
                          amsgrad=True)

    # 创建inception_v4模型
    print('3.5 Creating model...')
    model_name = 'resnet34'
    model = pretrainedmodels.__dict__[model_name](num_classes=1000,
                                                  pretrained='imagenet')
    tf_img = utils.TransformImage(model)
    fc_features = model.last_linear.in_features
    model.last_linear = nn.Linear(fc_features, 4)
    #checkpoint = torch.load('gdrive/My Drive/Colab Notebooks/kaggle/data/weights/inceptionv4-8e4777a0.pth')
    #model = model.load_state_dict(checkpoint)
    #model = model.load_state_dict(checkpoint)
    gpu_num = torch.cuda.device_count()
    if gpu_num > 1:
        model = torch.nn.DataParallel(model, list(range(gpu_num))).cuda()

    # optionally resume from a checkpoint
    if resume:
        checkpoint_path = path + '/model/%s/checkpoint.pth.tar' % file_name
        if os.path.isfile(checkpoint_path):
            print("=> loading checkpoint '{}'".format(checkpoint_path))
            checkpoint = torch.load(checkpoint_path)
            start_epoch = checkpoint['epoch'] + 1
            best_precision = checkpoint['best_precision']
            lowest_loss = checkpoint['lowest_loss']
            stage = checkpoint['stage']
            lr = checkpoint['lr']
            model.load_state_dict(checkpoint['state_dict'])
            # 如果中断点恰好为转换stage的点,需要特殊处理
            if start_epoch in np.cumsum(stage_epochs)[:-1]:
                stage += 1
                optimizer = adjust_learning_rate()
                model.load_state_dict(
                    torch.load(path + '/model/%s/model_best.pth.tar' %
                               file_name)['state_dict'])
            print("=> loaded checkpoint (epoch {})".format(
                checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(checkpoint_path))

    # 读取训练图片列表
    image_list = os.listdir(train_path)
    df = pd.read_csv('/home/ubuntu/sda/pjx/submit/clouds/data/train.csv')
    # 分离训练集和测试集,stratify参数用于分层抽样
    print(df.head())
    train_data_list, val_data_list = train_test_split(image_list,
                                                      test_size=val_ratio,
                                                      random_state=666)
    print('train_data:', len(train_data_list))
    print('valid_data:', len(val_data_list))
    '''
    train_data_list = all_data[0:int(len(all_data)*0.85)]
    print(len(train_data_list))
    val_data_list = all_data[int(len(all_data)*0.85)+1:len(all_data)]
    '''
    # 读取测试图片列表
    #test_data_list = pd.read_csv('/home/ubuntu/sda/pjx/submit/clouds/data/test.csv')

    # 图片归一化,由于采用ImageNet预训练网络,因此这里直接采用ImageNet网络的参数
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_data = SteelDataset(train_data_list, df, transform=tf_img)
    # 验证集图片变换
    val_data = SteelDataset(val_data_list, df, transform=tf_img)

    # 测试集图片变换
    '''
    test_data = SteelDataset(val_data_list,df,
                          transform=tf_img
                          )
    '''

    # 生成图片迭代器
    train_loader = DataLoader(train_data,
                              batch_size=batch_size,
                              shuffle=True,
                              pin_memory=True,
                              num_workers=workers)
    val_loader = DataLoader(val_data,
                            batch_size=batch_size * 2,
                            shuffle=False,
                            pin_memory=False,
                            num_workers=workers)
    #test_loader = DataLoader(test_data, batch_size=batch_size*2, shuffle=False, pin_memory=False, num_workers=workers)

    # 使用交叉熵损失函数
    criterion = nn.BCEWithLogitsLoss().cuda()
    # 优化器,使用带amsgrad的Adam
    optimizer = optim.Adam(model.parameters(),
                           lr,
                           weight_decay=weight_decay,
                           amsgrad=True)

    if evaluate:
        validate(val_loader, model, criterion)
    elif is_train:
        # 开始训练
        for epoch in range(start_epoch, total_epochs):
            # train for one epoch
            train(train_loader, model, criterion, optimizer, epoch)
            # evaluate on validation set
            precision, avg_loss = validate(val_loader, model, criterion)

            # 在日志文件中记录每个epoch的精度和loss
            with open(path + '/result/%s.txt' % file_name, 'a') as acc_file:
                acc_file.write('Epoch: %2d, Precision: %.8f, Loss: %.8f\n' %
                               (epoch, precision, avg_loss))

            # 记录最高精度与最低loss,保存最新模型与最佳模型
            is_best = precision > best_precision
            is_lowest_loss = avg_loss < lowest_loss
            best_precision = max(precision, best_precision)
            lowest_loss = min(avg_loss, lowest_loss)
            state = {
                'epoch': epoch,
                'state_dict': model.state_dict(),
                'best_precision': best_precision,
                'lowest_loss': lowest_loss,
                'stage': stage,
                'lr': lr,
            }
            save_checkpoint(state, is_best, is_lowest_loss)

            # 判断是否进行下一个stage
            if (epoch + 1) in np.cumsum(stage_epochs)[:-1]:
                stage += 1
                optimizer = adjust_learning_rate()
                model.load_state_dict(
                    torch.load(path + '/model/%s/model_best.pth.tar' %
                               file_name)['state_dict'])
                print('Step into next stage')
                with open(path + '/result/%s.txt' % file_name,
                          'a') as acc_file:
                    acc_file.write(
                        '---------------Step into next stage----------------\n'
                    )

    # 记录线下最佳分数
        with open(path + '/result/%s.txt' % file_name, 'a') as acc_file:
            acc_file.write('* best acc: %.8f  %s\n' %
                           (best_precision, os.path.basename(__file__)))
        with open(path + '/result/best_acc.txt', 'a') as acc_file:
            acc_file.write(
                '%s  * best acc: %.8f  %s\n' %
                (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(
                    time.time())), best_precision, os.path.basename(__file__)))

    # 读取最佳模型,预测测试集,并生成可直接提交的结果文件
    best_model = torch.load(path + '/model/%s/model_best.pth.tar' % file_name)
    model.load_state_dict(best_model['state_dict'])
    # test(test_loader=test_loader, model=model)

    # 释放GPU缓存
    torch.cuda.empty_cache()
"""The data set class that prepares the images to be inputted into the model."""

import numpy as np
import torch
from torchvision import datasets, transforms
from skimage.color import rgb2lab, rgb2gray
from pretrainedmodels import utils
from model import inception

load_img = utils.LoadImage()
tf_img = utils.TransformImage(inception) 
encoder_transform = transforms.Compose([transforms.CenterCrop(224)])
inception_transform = transforms.Compose([transforms.CenterCrop(299)])

class ImageDataset(datasets.ImageFolder):
  """
  Subclass of ImageFolder that separates LAB channels into L and AB channels.
  It also transforms the image into the correctly formatted input for Inception.
  """
  def __getitem__(self, index):
    img_path, _ = self.imgs[index]

    img_inception = tf_img(inception_transform(load_img(img_path)))
    img = self.loader(img_path)

    img_original = encoder_transform(img)
    img_original = np.asarray(img_original)

    img_lab = rgb2lab(img_original)
    img_lab = (img_lab + 128) / 255
    
Exemple #28
0
from models import Encoder


print(pretrainedmodels.model_names)
test_models = ['pnasnet5large', 'nasnetalarge', 'senet154', 'polynet', 'inceptionv4', 'xception', 'resnet152']
attr = {model_name:{} for model_name in test_models}


for model_name in test_models:
    model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet')
    model.cuda()
    model.eval()
    with torch.no_grad():
        load_img = pretrained_utils.LoadImage()
        tf_img = pretrained_utils.TransformImage(model)
        path_img = '../test/2.png'
        input_img = load_img(path_img)
        input_tensor = tf_img(input_img)
        input_tensor = input_tensor.unsqueeze(0).cuda()
        time_used_per_model = []
        for i in range(100):
            s = time.time()
            output_features  = model.features(input_tensor)
            e = time.time()
            time_used_per_model.append(e-s)
            print(e-s, model_name)
        attr[model_name]['time_used'] = time_used_per_model
        attr[model_name]['size'] = input_tensor.size()

def main():
    csv_path = '/scratch/group/xqian-group/ISIC2018/ISIC2018_Task3_Training_GroundTruth'
    csv_file = 'ISIC2018_Task3_Training_GroundTruth.csv'
    csv = ReadCSV(os.path.join(csv_path, csv_file))
    
    img_path = '/scratch/group/xqian-group/ISIC2018/ISIC2018_Task3_Training_Input'
    
    data_file = csv.data.T
    
    n_classes = len(np.unique(data_file[:, 1]))
    
    split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
    strat_fileset = {}
    for train_idx, val_idx in split.split(data_file, data_file[:, 1]):
        strat_fileset['train'] = data_file[train_idx]
        strat_fileset['val'] = data_file[val_idx]
    
    batch_sizes = {'train': 10, 'val': 10}
    num_epochs = 10
    
    model_list = ['vgg19_bn']#,
                  #'vgg11', 'vgg13', 'vgg16', 'vgg19']
#    models_loaded = [pretrainedmodels.__dict__[mdl](num_classes=1000,
#                     pretrained='imagenet') for mdl in model_list]
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    result_path = '/scratch/user/jinqing/ISIC2018/results'
    
    for mdl_name in tqdm(model_list):
        mdl = pretrainedmodels.__dict__[mdl_name](num_classes=1000,
                                                  pretrained='imagenet')
        since = time.time()
        
        dim_features = mdl.last_linear.in_features
        mdl.last_linear = nn.Linear(dim_features, n_classes)
        mdl = mdl.to(device)
        
        tf_img = putils.TransformImage(mdl)
        
        strat_datasets = {k: ImageFilelist(img_path, strat_fileset[k], tf_img)
                            for k in ['train', 'val']}
        
        strat_dataloader = {k: torch.utils.data.DataLoader(strat_datasets[k],
                                                           batch_size=batch_sizes[k],
                                                           shuffle=True,
                                                           num_workers=4)
                            for k in ['train', 'val']}
        
        best_acc = 0.0
        
        mdl_path = os.path.join(result_path, mdl_name)
        if os.path.isdir(mdl_path):
            mdl_file = GetLatestFile(mdl_path)
            mdl.load_state_dict(torch.load(mdl_file))
            best_acc = FindSubstring(mdl_file, '_Acc_', '.pt')
            best_acc = float(best_acc)
        
        criterion = nn.CrossEntropyLoss(size_average=False)
        optimizer_ft = optim.SGD(mdl.parameters(), lr=0.001, momentum=0.9)
        exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer_ft,
                                                     step_size=7,
                                                     gamma=0.1)
        
        for epoch in range(num_epochs):
            print('Epoch {}/{}'.format(epoch, num_epochs - 1))
            print('-' * 10)
            train(mdl, device, strat_dataloader['train'], criterion,
                  optimizer_ft, exp_lr_scheduler)
            val_acc = validate(mdl, device, strat_dataloader['val'], criterion)
            if val_acc > best_acc:
                best_acc = val_acc
                best_weight_file = '{:%Y%m%d%H%M%S}_Acc_{:.4f}.pt'.format(
                        datetime.now(), best_acc)
                if not os.path.isdir(mdl_path):
                    os.makedirs(mdl_path)
                best_weight_file = os.path.join(mdl_path, best_weight_file)
                torch.save(mdl.state_dict(), best_weight_file)
        
        t_d = timedelta(seconds=time.time() - since)
        t_d = [int(float(x)) for x in str(t_d).split(':')]
        print('Time Elapsed for {}: {}:{}:{}'.format(mdl_name, *t_d))
from torch.autograd import Variable
from torch.nn.utils.rnn import pack_padded_sequence
from torch.optim import Adam
from torch.utils.data import DataLoader

from dataset.corpus import Corpus
# torch.backends.cudnn.enabled = False
from dataset.generator_coco_dataset import GeneratorCocoDataset
from extractor.vgg_extractor import VggExtractor
from file_path_manager import FilePathManager
from generator.conditional_generator import ConditionalGenerator

if not os.path.exists(FilePathManager.resolve("models")):
    os.makedirs(FilePathManager.resolve("models"))
extractor = VggExtractor(use_gpu=True)
tf_img = utils.TransformImage(extractor.cnn)
corpus = Corpus.load(FilePathManager.resolve("data/corpus.pkl"))
print("Corpus loaded")

batch_size = 96
dataset = GeneratorCocoDataset(corpus)
dataloader = DataLoader(dataset,
                        batch_size=batch_size,
                        shuffle=True,
                        num_workers=cpu_count())

generator = ConditionalGenerator(corpus).cuda()
criterion = nn.CrossEntropyLoss(
    ignore_index=corpus.word_index(corpus.PAD)).cuda()
optimizer = Adam(generator.parameters(), lr=0.0001, weight_decay=1e-5)
epochs = 20