def coco(mode="dev", n_captions=1, test_size=None): """loads coco data into train and test features and targets. mode = 'dev' is used for development (quick load of subset) """ # train_fns dataType='train2014' train_fns = os.listdir("%s/features/%s"%(COCO_DIR, dataType)) # reduce it to a dev set if mode == "dev": train_fns = shuffle(train_fns)[:256] trX, trY = loadFeaturesTargets(train_fns, dataType, n_captions) # val_fns dataType='val2014' test_fns = os.listdir("%s/features/%s"%(COCO_DIR, dataType)) # reduce it to a dev set if mode == "dev": test_fns = shuffle(test_fns)[:128] if test_size: test_fns = shuffle(test_fns)[:test_size] teX, teY = loadFeaturesTargets(test_fns, dataType, n_captions) return trX, teX, trY, teY
def coco(mode="dev", n_captions=1, test_size=None): """loads coco data into train and test features and targets. mode = 'dev' is used for development (quick load of subset) """ # train_fns dataType = 'train2014' train_fns = os.listdir("%s/features/%s" % (COCO_DIR, dataType)) # reduce it to a dev set if mode == "dev": train_fns = shuffle(train_fns)[:256] trX, trY = loadFeaturesTargets(train_fns, dataType, n_captions) # val_fns dataType = 'val2014' test_fns = os.listdir("%s/features/%s" % (COCO_DIR, dataType)) # reduce it to a dev set if mode == "dev": test_fns = shuffle(test_fns)[:128] if test_size: test_fns = shuffle(test_fns)[:test_size] teX, teY = loadFeaturesTargets(test_fns, dataType, n_captions) return trX, teX, trY, teY
def iterXY(self, X, Y): if self.shuffle: X, Y = shuffle(X, Y) for xmb, ymb in iter_data(X, Y, size=self.size): xmb = self.trXt(xmb) ymb = self.trYt(ymb) yield xmb, ymb
def coco(mode="dev", batch_size=64, n_captions=1): # train_fns dataType='train2014' train_fns = os.listdir("%s/features/%s"%(dataDir, dataType)) # reduce it to a dev set if mode == "dev": train_fns = shuffle(train_fns)[:batch_size*50] trX, trY = loadFeaturesTargets(train_fns, dataType, n_captions) # val_fns dataType='val2014' test_fns = os.listdir("%s/features/%s"%(dataDir, dataType)) # reduce it to a dev set if mode == "dev": test_fns = shuffle(test_fns)[:batch_size*25] teX, teY = loadFeaturesTargets(test_fns, dataType, n_captions) return trX, teX, trY, teY
def iterXY(self, X, Y): if self.shuffle: X, Y = shuffle(X, Y) self.loader = Loader(X, self.train_load, self.train_transform, self.size) self.proc = Process(target=self.loader.load) self.proc.start() for ymb in iter_data(Y, size=self.size): xmb = self.loader.get() yield xmb, floatX(ymb)
def iterXY(self, X, Y): if self.shuffle: X, Y = shuffle(X, Y) for x_chunk, y_chunk in iter_data(X, Y, size=self.size*20): sort = np.argsort([len(x) for x in x_chunk]) x_chunk = [x_chunk[idx] for idx in sort] y_chunk = [y_chunk[idx] for idx in sort] mb_chunks = [[x_chunk[idx:idx+self.size], y_chunk[idx:idx+self.size]] for idx in range(len(x_chunk))[::self.size]] py_rng.shuffle(mb_chunks) for xmb, ymb in mb_chunks: xmb = self.trXt(xmb) ymb = self.trYt(ymb) yield xmb, ymb
def loadFeaturesTargets(fns, dataType, n_captions=1): """ Note: filenames should come from the same type of dataType. filenames from val2014, for example, should have dataType val2014 Parameters ---------- fns: filenames, strings dataType: string folder, i.e. train2014, val2014 n_captions: int, number of captions for each image to load Returns ------- X: list of im_vects 1st list length = len(fns) vectors are shape (4096, ) Y: list of list of captions. 1st list length = len(fns) sublist length = n_captions """ annFile = '%s/annotations/captions_%s.json' % (COCO_DIR, dataType) caps = COCO(annFile) X = [] Y = [] for fn in fns: # Features x = np.load('%s/features/%s/%s' % (COCO_DIR, dataType, fn)) # Targets annIds = caps.getAnnIds(imgIds=getImageId(fn)) anns = caps.loadAnns(annIds) # sample n_captions per image anns = shuffle(anns) captions = [getCaption(anns[i]) for i in range(n_captions)] X.append(x) Y.append(captions) return X, Y
def loadFeaturesTargets(fns, dataType, n_captions=1): """ Note: filenames should come from the same type of dataType. filenames from val2014, for example, should have dataType val2014 Parameters ---------- fns: filenames, strings dataType: string folder, i.e. train2014, val2014 n_captions: int, number of captions for each image to load Returns ------- X: list of im_vects 1st list length = len(fns) vectors are shape (4096, ) Y: list of list of captions. 1st list length = len(fns) sublist length = n_captions """ annFile = '%s/annotations/captions_%s.json'%(dataDir,dataType) caps=COCO(annFile) X = [] Y = [] for fn in fns: # Features x = np.load('%s/features/%s/%s'%(dataDir, dataType, fn)) # Targets annIds = caps.getAnnIds(imgIds=getImageId(fn)); anns = caps.loadAnns(annIds) # sample n_captions per image anns = shuffle(anns) captions = [getCaption(anns[i]) for i in range(n_captions)] X.append(x) Y.append(captions) return X, Y
def cocoXYFilenames(n_captions=5, dataType='val2014'): """Helps when you are evaluating and want the filenames associated with the features and target variables Parameters ---------- n_captions: integer how many captions to load for the image dataType: 'val2014' or 'train2014' Returns ------- X: the features Y: the targets filenames: the filenames corresponding to each """ fns = os.listdir("%s/features/%s" % (COCO_DIR, dataType)) fns = shuffle(fns) X, Y = loadFeaturesTargets(fns, dataType, n_captions) return X, Y, fns
def cocoXYFilenames(n_captions=5, dataType='val2014'): """Helps when you are evaluating and want the filenames associated with the features and target variables Parameters ---------- n_captions: integer how many captions to load for the image dataType: 'val2014' or 'train2014' Returns ------- X: the features Y: the targets filenames: the filenames corresponding to each """ fns = os.listdir("%s/features/%s"%(dataDir, dataType)) fns = shuffle(fns) X, Y = loadFeaturesTargets(fns, dataType, n_captions) return X, Y, fns
def get_data(self, request=None): if request is not None: raise ValueError data = next(self.child_epoch_iterator) return shuffle(*data)