Esempio n. 1
0
    def __init__(self, opt):
        self.opt = opt
        self.coco_json = opt.get('coco_json', '')
        self.folder_path = opt.get('folder_path', '')

        self.batch_size = opt.get('batch_size', 1)
        self.seq_per_img = 1

        # Load resnet
        self.cnn_model = opt.get('cnn_model', 'resnet101')
        resnet = getattr(resnet, self.cnn_model)()
        resnet.load_state_dict(
            torch.load('./data/imagenet_weights/' + self.cnn_model + '.pth'))
        self.my_resnet = myResnet(resnet)
        self.my_resnet.cuda()
        self.my_resnet.eval()

        # load the json file which contains additional information about the dataset
        print('DataLoaderRaw loading images from folder: ', self.folder_path)

        self.files = []
        self.ids = []

        print(len(self.coco_json))
        if len(self.coco_json) > 0:
            print('reading from ' + opt.coco_json)
            # read in filenames from the coco-style json file
            self.coco_annotation = json.load(open(self.coco_json))
            for k, v in enumerate(self.coco_annotation['images']):
                fullpath = os.path.join(self.folder_path, v['file_name'])
                self.files.append(fullpath)
                self.ids.append(v['id'])
        else:
            # read in all the filenames from the folder
            print('listing all images in directory ' + self.folder_path)

            def isImage(f):
                supportedExt = [
                    '.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG', '.ppm',
                    '.PPM'
                ]
                for ext in supportedExt:
                    start_idx = f.rfind(ext)
                    if start_idx >= 0 and start_idx + len(ext) == len(f):
                        return True
                return False

            n = 1
            for root, dirs, files in os.walk(self.folder_path, topdown=False):
                for file in files:
                    fullpath = os.path.join(self.folder_path, file)
                    if isImage(fullpath):
                        self.files.append(fullpath)
                        self.ids.append(str(n))  # just order them sequentially
                        n = n + 1

        self.N = len(self.files)
        print('DataLoaderRaw found ', self.N, ' images')

        self.iterator = 0
def main(params):
    data = json.load(open(params['input_json'], 'r'))
    #imgs = imgs['images']

    seed(123)  # make reproducible
    #shuffle(imgs)  # shuffle the order
    imgs = data["images"]
    prepro_captions(imgs)

    # create the vocab
    vocab = build_vocab(imgs, params)
    itow = {i + 1: w
            for i, w in enumerate(vocab)
            }  # a 1-indexed vocab translation table
    wtoi = {w: i + 1 for i, w in enumerate(vocab)}  # inverse table

    # done

    # assign the splits
    assign_splits(imgs, params)

    # encode captions in large arrays, ready to ship to hdf5 file
    L, label_start_ix, label_end_ix, label_length = encode_captions(
        imgs, params, wtoi)

    import misc.resnet as resnet
    resnet_type = 'resnet151'
    if resnet_type == 'resnet101':
        #resnet = resnet.resnet101()
        #resnet.load_state_dict(torch.load('resnet/resnet101.pth'))
        resnet = models.resnet101()

    else:
        resnet = resnet.resnet152()
        resnet.load_state_dict(torch.load('resnet/resnet152.pth'))
    my_resnet = myResnet(resnet)
    my_resnet.cuda()
    my_resnet.eval()

    # create output h5 file
    N = len(imgs)
    f_lb = h5py.File(params['output_h5'] + '_' + resnet_type + '_label.h5',
                     "w")
    f_fc = h5py.File(params['output_h5'] + '_' + resnet_type + '_fc.h5', "w")
    f_att = h5py.File(params['output_h5'] + '_' + resnet_type + '_att.h5', "w")
    f_lb.create_dataset("labels", dtype='uint32', data=L)
    f_lb.create_dataset("label_start_ix", dtype='uint32', data=label_start_ix)
    f_lb.create_dataset("label_end_ix", dtype='uint32', data=label_end_ix)
    f_lb.create_dataset("label_length", dtype='uint32', data=label_length)
    f_lb.close()

    exit()
    ### extract features
    dset_fc = f_fc.create_dataset("fc", (N, 2048), dtype='float32')
    dset_att = f_att.create_dataset("att", (N, 14, 14, 2048), dtype='float32')
    for i, img in enumerate(imgs):
        # load the image
        real_path = img['filepath'] + "/" + img['filename']
        I = skimage.io.imread(
            os.path.join(params['images_root'] + "/",
                         real_path))  # note the path
        # handle grayscale input images
        if len(I.shape) == 2:
            I = I[:, :, np.newaxis]
            I = np.concatenate((I, I, I), axis=2)

        I = I.astype('float32') / 255.0
        I = torch.from_numpy(I.transpose([2, 0, 1])).cuda()
        I = Variable(preprocess(I), volatile=True)
        tmp_fc, tmp_att = my_resnet(I)
        # write to h5
        dset_fc[i] = tmp_fc.data.cpu().float().numpy()
        dset_att[i] = tmp_att.data.cpu().float().numpy()
        if i % 1000 == 0:
            print 'processing %d/%d (%.2f%% done)' % (i, N, i * 100.0 / N)
    f_fc.close()
    f_att.close()
    print 'wrote ', params['output_h5']

    # create output json file
    out = {}
    out['ix_to_word'] = itow  # encode the (1-indexed) vocab
    out['images'] = []
    for i, img in enumerate(imgs):

        jimg = {}
        jimg['split'] = img['split']
        if 'filepath' in img:
            jimg['filepath'] = img['filepath']  # copy it over, might need
        if 'id' in img:
            jimg['id'] = img[
                'id']  # copy over & mantain an id, if present (e.g. coco ids, useful)

        out['images'].append(jimg)

    json.dump(out, open(params['output_json'], 'w'))
    print 'wrote ', params['output_json']
Esempio n. 3
0
from torch.autograd import Variable
import skimage
import skimage.io
import scipy.misc

from torchvision import transforms as trn
preprocess = trn.Compose([
    #trn.ToTensor(),
    trn.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

from misc.resnet_utils import myResnet
import misc.resnet as resnet

resnet = resnet.resnet101()
resnet.load_state_dict(
    torch.load('/datadrive/resnet_pretrianed_t7/resnet101.pth'))
my_resnet = myResnet(resnet)
my_resnet.cuda()
my_resnet.eval()


class DataLoaderRaw():
    def __init__(self, opt):
        self.opt = opt
        self.coco_json = opt.get('coco_json', '')
        self.folder_path = opt.get('folder_path', '')

        self.batch_size = opt.get('batch_size', 1)
        self.seq_per_img = 1

        # load the json file which contains additional information about the dataset
def main(params):
    data = json.load(open(params['input_json'], 'r'))
    #imgs = imgs['images']

    seed(123)  # make reproducible
    #shuffle(imgs)  # shuffle the order
    imgs = data["images"]
    prepro_captions(imgs)

    # create the vocab
    vocab = build_vocab(imgs, params)
    itow = {i + 1: w for i, w in enumerate(vocab)}  # a 1-indexed vocab translation table
    wtoi = {w: i + 1 for i, w in enumerate(vocab)}  # inverse table

    # done 

    # assign the splits
    assign_splits(imgs, params)

    # encode captions in large arrays, ready to ship to hdf5 file
    L, label_start_ix, label_end_ix, label_length = encode_captions(imgs, params, wtoi)

    
    import misc.resnet as resnet
    resnet_type = 'resnet151'
    if resnet_type == 'resnet101':
        resnet = resnet.resnet101()
        resnet.load_state_dict(torch.load('resnet/resnet101.pth'))
    else:
        resnet = resnet.resnet152()
        resnet.load_state_dict(torch.load('resnet/resnet152.pth'))
    my_resnet = myResnet(resnet)
    my_resnet.cuda()
    my_resnet.eval()

    # create output h5 file
    N = len(imgs)
    f_lb = h5py.File(params['output_h5'] + '_'+ resnet_type +'_label.h5', "w")
    f_fc = h5py.File(params['output_h5'] + '_'+ resnet_type +'_fc.h5', "w")
    f_att = h5py.File(params['output_h5'] + '_'+ resnet_type +'_att.h5', "w")
    f_lb.create_dataset("labels", dtype='uint32', data=L)
    f_lb.create_dataset("label_start_ix", dtype='uint32', data=label_start_ix)
    f_lb.create_dataset("label_end_ix", dtype='uint32', data=label_end_ix)
    f_lb.create_dataset("label_length", dtype='uint32', data=label_length)
    f_lb.close()

    #exit()
    ### extract features
    dset_fc = f_fc.create_dataset("fc", (N, 2048), dtype='float32')
    dset_att = f_att.create_dataset("att", (N, 14, 14, 2048), dtype='float32')
    for i, img in enumerate(imgs):
        # load the image
        real_path = img['filepath'] + "/" + img['filename']
        I = skimage.io.imread(os.path.join(params['images_root'],real_path))  # note the path 
        # handle grayscale input images
        if len(I.shape) == 2:
            I = I[:, :, np.newaxis]
            I = np.concatenate((I, I, I), axis=2)

        I = I.astype('float32') / 255.0
        I = torch.from_numpy(I.transpose([2, 0, 1])).cuda()
        I = Variable(preprocess(I), volatile=True)
        tmp_fc, tmp_att = my_resnet(I)
        # write to h5
        dset_fc[i] = tmp_fc.data.cpu().float().numpy()
        dset_att[i] = tmp_att.data.cpu().float().numpy()
        if i % 1000 == 0:
            print 'processing %d/%d (%.2f%% done)' % (i, N, i * 100.0 / N)
    f_fc.close()
    f_att.close()
    print 'wrote ', params['output_h5']

    # create output json file
    out = {}
    out['ix_to_word'] = itow  # encode the (1-indexed) vocab
    out['images'] = []
    for i, img in enumerate(imgs):

        jimg = {}
        jimg['split'] = img['split']
        if 'filepath' in img: jimg['filepath'] = img['filepath']  # copy it over, might need
        if 'id' in img: jimg['id'] = img['id']  # copy over & mantain an id, if present (e.g. coco ids, useful)

        out['images'].append(jimg)

    json.dump(out, open(params['output_json'], 'w'))
    print 'wrote ', params['output_json']
Esempio n. 5
0
from torch.autograd import Variable
import skimage
import skimage.io
import scipy.misc

from torchvision import transforms as trn
preprocess = trn.Compose([
        #trn.ToTensor(),
        trn.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

from misc.resnet_utils import myResnet
import misc.resnet as resnet

resnet = resnet.resnet101()
resnet.load_state_dict(torch.load('data/model/resnet101.pth'))
my_resnet = myResnet(resnet)
my_resnet.cuda()
my_resnet.eval()

class DataLoaderRaw():
    
    def __init__(self, opt):
        self.opt = opt
        self.coco_json = opt.get('coco_json', '')
        self.folder_path = opt.get('folder_path', '')

        self.batch_size = opt.get('batch_size', 1)
        self.seq_per_img = 1

        # load the json file which contains additional information about the dataset
Esempio n. 6
0
import torch
import torchvision.models as models
from torch.autograd import Variable
import skimage.io

from torchvision import transforms as trn
preprocess = trn.Compose([
    #trn.ToTensor(),
    trn.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

from misc.resnet_utils import myResnet
import misc.resnet as resnet

resnet = resnet.resnet101()
resnet.load_state_dict(
    torch.load('/home-nfs/rluo/rluo/model/pytorch-resnet/resnet101.pth'))
my_resnet = myResnet(resnet)
my_resnet.cuda()
my_resnet.eval()


def build_vocab(imgs, params):
    count_thr = params['word_count_threshold']

    # count up the number of words
    counts = {}
    for img in imgs:
        for sent in img['sentences']:
            for w in sent['tokens']:
                counts[w] = counts.get(w, 0) + 1
    cw = sorted([(count, w) for w, count in counts.iteritems()], reverse=True)