def __init__(self, opt): self.opt = opt self.coco_json = opt.get('coco_json', '') self.folder_path = opt.get('folder_path', '') self.batch_size = opt.get('batch_size', 1) self.seq_per_img = 1 # Load resnet self.cnn_model = opt.get('cnn_model', 'resnet101') resnet = getattr(resnet, self.cnn_model)() resnet.load_state_dict( torch.load('./data/imagenet_weights/' + self.cnn_model + '.pth')) self.my_resnet = myResnet(resnet) self.my_resnet.cuda() self.my_resnet.eval() # load the json file which contains additional information about the dataset print('DataLoaderRaw loading images from folder: ', self.folder_path) self.files = [] self.ids = [] print(len(self.coco_json)) if len(self.coco_json) > 0: print('reading from ' + opt.coco_json) # read in filenames from the coco-style json file self.coco_annotation = json.load(open(self.coco_json)) for k, v in enumerate(self.coco_annotation['images']): fullpath = os.path.join(self.folder_path, v['file_name']) self.files.append(fullpath) self.ids.append(v['id']) else: # read in all the filenames from the folder print('listing all images in directory ' + self.folder_path) def isImage(f): supportedExt = [ '.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG', '.ppm', '.PPM' ] for ext in supportedExt: start_idx = f.rfind(ext) if start_idx >= 0 and start_idx + len(ext) == len(f): return True return False n = 1 for root, dirs, files in os.walk(self.folder_path, topdown=False): for file in files: fullpath = os.path.join(self.folder_path, file) if isImage(fullpath): self.files.append(fullpath) self.ids.append(str(n)) # just order them sequentially n = n + 1 self.N = len(self.files) print('DataLoaderRaw found ', self.N, ' images') self.iterator = 0
def main(params): data = json.load(open(params['input_json'], 'r')) #imgs = imgs['images'] seed(123) # make reproducible #shuffle(imgs) # shuffle the order imgs = data["images"] prepro_captions(imgs) # create the vocab vocab = build_vocab(imgs, params) itow = {i + 1: w for i, w in enumerate(vocab) } # a 1-indexed vocab translation table wtoi = {w: i + 1 for i, w in enumerate(vocab)} # inverse table # done # assign the splits assign_splits(imgs, params) # encode captions in large arrays, ready to ship to hdf5 file L, label_start_ix, label_end_ix, label_length = encode_captions( imgs, params, wtoi) import misc.resnet as resnet resnet_type = 'resnet151' if resnet_type == 'resnet101': #resnet = resnet.resnet101() #resnet.load_state_dict(torch.load('resnet/resnet101.pth')) resnet = models.resnet101() else: resnet = resnet.resnet152() resnet.load_state_dict(torch.load('resnet/resnet152.pth')) my_resnet = myResnet(resnet) my_resnet.cuda() my_resnet.eval() # create output h5 file N = len(imgs) f_lb = h5py.File(params['output_h5'] + '_' + resnet_type + '_label.h5', "w") f_fc = h5py.File(params['output_h5'] + '_' + resnet_type + '_fc.h5', "w") f_att = h5py.File(params['output_h5'] + '_' + resnet_type + '_att.h5', "w") f_lb.create_dataset("labels", dtype='uint32', data=L) f_lb.create_dataset("label_start_ix", dtype='uint32', data=label_start_ix) f_lb.create_dataset("label_end_ix", dtype='uint32', data=label_end_ix) f_lb.create_dataset("label_length", dtype='uint32', data=label_length) f_lb.close() exit() ### extract features dset_fc = f_fc.create_dataset("fc", (N, 2048), dtype='float32') dset_att = f_att.create_dataset("att", (N, 14, 14, 2048), dtype='float32') for i, img in enumerate(imgs): # load the image real_path = img['filepath'] + "/" + img['filename'] I = skimage.io.imread( os.path.join(params['images_root'] + "/", real_path)) # note the path # handle grayscale input images if len(I.shape) == 2: I = I[:, :, np.newaxis] I = np.concatenate((I, I, I), axis=2) I = I.astype('float32') / 255.0 I = torch.from_numpy(I.transpose([2, 0, 1])).cuda() I = Variable(preprocess(I), volatile=True) tmp_fc, tmp_att = my_resnet(I) # write to h5 dset_fc[i] = tmp_fc.data.cpu().float().numpy() dset_att[i] = tmp_att.data.cpu().float().numpy() if i % 1000 == 0: print 'processing %d/%d (%.2f%% done)' % (i, N, i * 100.0 / N) f_fc.close() f_att.close() print 'wrote ', params['output_h5'] # create output json file out = {} out['ix_to_word'] = itow # encode the (1-indexed) vocab out['images'] = [] for i, img in enumerate(imgs): jimg = {} jimg['split'] = img['split'] if 'filepath' in img: jimg['filepath'] = img['filepath'] # copy it over, might need if 'id' in img: jimg['id'] = img[ 'id'] # copy over & mantain an id, if present (e.g. coco ids, useful) out['images'].append(jimg) json.dump(out, open(params['output_json'], 'w')) print 'wrote ', params['output_json']
from torch.autograd import Variable import skimage import skimage.io import scipy.misc from torchvision import transforms as trn preprocess = trn.Compose([ #trn.ToTensor(), trn.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) from misc.resnet_utils import myResnet import misc.resnet as resnet resnet = resnet.resnet101() resnet.load_state_dict( torch.load('/datadrive/resnet_pretrianed_t7/resnet101.pth')) my_resnet = myResnet(resnet) my_resnet.cuda() my_resnet.eval() class DataLoaderRaw(): def __init__(self, opt): self.opt = opt self.coco_json = opt.get('coco_json', '') self.folder_path = opt.get('folder_path', '') self.batch_size = opt.get('batch_size', 1) self.seq_per_img = 1 # load the json file which contains additional information about the dataset
def main(params): data = json.load(open(params['input_json'], 'r')) #imgs = imgs['images'] seed(123) # make reproducible #shuffle(imgs) # shuffle the order imgs = data["images"] prepro_captions(imgs) # create the vocab vocab = build_vocab(imgs, params) itow = {i + 1: w for i, w in enumerate(vocab)} # a 1-indexed vocab translation table wtoi = {w: i + 1 for i, w in enumerate(vocab)} # inverse table # done # assign the splits assign_splits(imgs, params) # encode captions in large arrays, ready to ship to hdf5 file L, label_start_ix, label_end_ix, label_length = encode_captions(imgs, params, wtoi) import misc.resnet as resnet resnet_type = 'resnet151' if resnet_type == 'resnet101': resnet = resnet.resnet101() resnet.load_state_dict(torch.load('resnet/resnet101.pth')) else: resnet = resnet.resnet152() resnet.load_state_dict(torch.load('resnet/resnet152.pth')) my_resnet = myResnet(resnet) my_resnet.cuda() my_resnet.eval() # create output h5 file N = len(imgs) f_lb = h5py.File(params['output_h5'] + '_'+ resnet_type +'_label.h5', "w") f_fc = h5py.File(params['output_h5'] + '_'+ resnet_type +'_fc.h5', "w") f_att = h5py.File(params['output_h5'] + '_'+ resnet_type +'_att.h5', "w") f_lb.create_dataset("labels", dtype='uint32', data=L) f_lb.create_dataset("label_start_ix", dtype='uint32', data=label_start_ix) f_lb.create_dataset("label_end_ix", dtype='uint32', data=label_end_ix) f_lb.create_dataset("label_length", dtype='uint32', data=label_length) f_lb.close() #exit() ### extract features dset_fc = f_fc.create_dataset("fc", (N, 2048), dtype='float32') dset_att = f_att.create_dataset("att", (N, 14, 14, 2048), dtype='float32') for i, img in enumerate(imgs): # load the image real_path = img['filepath'] + "/" + img['filename'] I = skimage.io.imread(os.path.join(params['images_root'],real_path)) # note the path # handle grayscale input images if len(I.shape) == 2: I = I[:, :, np.newaxis] I = np.concatenate((I, I, I), axis=2) I = I.astype('float32') / 255.0 I = torch.from_numpy(I.transpose([2, 0, 1])).cuda() I = Variable(preprocess(I), volatile=True) tmp_fc, tmp_att = my_resnet(I) # write to h5 dset_fc[i] = tmp_fc.data.cpu().float().numpy() dset_att[i] = tmp_att.data.cpu().float().numpy() if i % 1000 == 0: print 'processing %d/%d (%.2f%% done)' % (i, N, i * 100.0 / N) f_fc.close() f_att.close() print 'wrote ', params['output_h5'] # create output json file out = {} out['ix_to_word'] = itow # encode the (1-indexed) vocab out['images'] = [] for i, img in enumerate(imgs): jimg = {} jimg['split'] = img['split'] if 'filepath' in img: jimg['filepath'] = img['filepath'] # copy it over, might need if 'id' in img: jimg['id'] = img['id'] # copy over & mantain an id, if present (e.g. coco ids, useful) out['images'].append(jimg) json.dump(out, open(params['output_json'], 'w')) print 'wrote ', params['output_json']
from torch.autograd import Variable import skimage import skimage.io import scipy.misc from torchvision import transforms as trn preprocess = trn.Compose([ #trn.ToTensor(), trn.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) from misc.resnet_utils import myResnet import misc.resnet as resnet resnet = resnet.resnet101() resnet.load_state_dict(torch.load('data/model/resnet101.pth')) my_resnet = myResnet(resnet) my_resnet.cuda() my_resnet.eval() class DataLoaderRaw(): def __init__(self, opt): self.opt = opt self.coco_json = opt.get('coco_json', '') self.folder_path = opt.get('folder_path', '') self.batch_size = opt.get('batch_size', 1) self.seq_per_img = 1 # load the json file which contains additional information about the dataset
import torch import torchvision.models as models from torch.autograd import Variable import skimage.io from torchvision import transforms as trn preprocess = trn.Compose([ #trn.ToTensor(), trn.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) from misc.resnet_utils import myResnet import misc.resnet as resnet resnet = resnet.resnet101() resnet.load_state_dict( torch.load('/home-nfs/rluo/rluo/model/pytorch-resnet/resnet101.pth')) my_resnet = myResnet(resnet) my_resnet.cuda() my_resnet.eval() def build_vocab(imgs, params): count_thr = params['word_count_threshold'] # count up the number of words counts = {} for img in imgs: for sent in img['sentences']: for w in sent['tokens']: counts[w] = counts.get(w, 0) + 1 cw = sorted([(count, w) for w, count in counts.iteritems()], reverse=True)