コード例 #1
0
def coco(mode="dev", n_captions=1, test_size=None):
    """loads coco data into train and test features and targets.
    mode = 'dev' is used for development (quick load of subset)
    """
    # train_fns
    dataType='train2014'
    train_fns = os.listdir("%s/features/%s"%(COCO_DIR, dataType))

    # reduce it to a dev set
    if mode == "dev":
        train_fns = shuffle(train_fns)[:256]
    trX, trY = loadFeaturesTargets(train_fns, dataType, n_captions)

    # val_fns
    dataType='val2014'
    test_fns = os.listdir("%s/features/%s"%(COCO_DIR, dataType))

    # reduce it to a dev set
    if mode == "dev":
        test_fns = shuffle(test_fns)[:128]

    if test_size:
        test_fns = shuffle(test_fns)[:test_size]

    teX, teY = loadFeaturesTargets(test_fns, dataType, n_captions)

    return trX, teX, trY, teY
コード例 #2
0
def coco(mode="dev", n_captions=1, test_size=None):
    """loads coco data into train and test features and targets.
    mode = 'dev' is used for development (quick load of subset)
    """
    # train_fns
    dataType = 'train2014'
    train_fns = os.listdir("%s/features/%s" % (COCO_DIR, dataType))

    # reduce it to a dev set
    if mode == "dev":
        train_fns = shuffle(train_fns)[:256]
    trX, trY = loadFeaturesTargets(train_fns, dataType, n_captions)

    # val_fns
    dataType = 'val2014'
    test_fns = os.listdir("%s/features/%s" % (COCO_DIR, dataType))

    # reduce it to a dev set
    if mode == "dev":
        test_fns = shuffle(test_fns)[:128]

    if test_size:
        test_fns = shuffle(test_fns)[:test_size]

    teX, teY = loadFeaturesTargets(test_fns, dataType, n_captions)

    return trX, teX, trY, teY
コード例 #3
0
    def iterXY(self, X, Y):

        if self.shuffle:
            X, Y = shuffle(X, Y)

        for xmb, ymb in iter_data(X, Y, size=self.size):
            xmb = self.trXt(xmb)
            ymb = self.trYt(ymb)
            yield xmb, ymb
コード例 #4
0
    def iterXY(self, X, Y):

        if self.shuffle:
            X, Y = shuffle(X, Y)

        for xmb, ymb in iter_data(X, Y, size=self.size):
            xmb = self.trXt(xmb)
            ymb = self.trYt(ymb)
            yield xmb, ymb
コード例 #5
0
ファイル: dataset.py プロジェクト: youralien/MLFun
def coco(mode="dev", batch_size=64, n_captions=1):

    # train_fns
    dataType='train2014'
    train_fns = os.listdir("%s/features/%s"%(dataDir, dataType))

    # reduce it to a dev set
    if mode == "dev":
        train_fns = shuffle(train_fns)[:batch_size*50]
    trX, trY = loadFeaturesTargets(train_fns, dataType, n_captions)

    # val_fns
    dataType='val2014'
    test_fns = os.listdir("%s/features/%s"%(dataDir, dataType))

    # reduce it to a dev set
    if mode == "dev":
        test_fns = shuffle(test_fns)[:batch_size*25]
    teX, teY = loadFeaturesTargets(test_fns, dataType, n_captions)

    return trX, teX, trY, teY
コード例 #6
0
    def iterXY(self, X, Y):

        if self.shuffle:
            X, Y = shuffle(X, Y)

        self.loader = Loader(X, self.train_load, self.train_transform, self.size)
        self.proc = Process(target=self.loader.load)
        self.proc.start()

        for ymb in iter_data(Y, size=self.size):
            xmb = self.loader.get()
            yield xmb, floatX(ymb)
コード例 #7
0
    def iterXY(self, X, Y):

        if self.shuffle:
            X, Y = shuffle(X, Y)

        self.loader = Loader(X, self.train_load, self.train_transform,
                             self.size)
        self.proc = Process(target=self.loader.load)
        self.proc.start()

        for ymb in iter_data(Y, size=self.size):
            xmb = self.loader.get()
            yield xmb, floatX(ymb)
コード例 #8
0
    def iterXY(self, X, Y):
        
        if self.shuffle:
            X, Y = shuffle(X, Y)

        for x_chunk, y_chunk in iter_data(X, Y, size=self.size*20):
            sort = np.argsort([len(x) for x in x_chunk])
            x_chunk = [x_chunk[idx] for idx in sort]
            y_chunk = [y_chunk[idx] for idx in sort]
            mb_chunks = [[x_chunk[idx:idx+self.size], y_chunk[idx:idx+self.size]] for idx in range(len(x_chunk))[::self.size]]
            py_rng.shuffle(mb_chunks)
            for xmb, ymb in mb_chunks:
                xmb = self.trXt(xmb)
                ymb = self.trYt(ymb)
                yield xmb, ymb
コード例 #9
0
    def iterXY(self, X, Y):

        if self.shuffle:
            X, Y = shuffle(X, Y)

        for x_chunk, y_chunk in iter_data(X, Y, size=self.size*20):
            sort = np.argsort([len(x) for x in x_chunk])
            x_chunk = [x_chunk[idx] for idx in sort]
            y_chunk = [y_chunk[idx] for idx in sort]
            mb_chunks = [[x_chunk[idx:idx+self.size], y_chunk[idx:idx+self.size]] for idx in range(len(x_chunk))[::self.size]]
            py_rng.shuffle(mb_chunks)
            for xmb, ymb in mb_chunks:
                xmb = self.trXt(xmb)
                ymb = self.trYt(ymb)
                yield xmb, ymb
コード例 #10
0
def loadFeaturesTargets(fns, dataType, n_captions=1):
    """
    Note: filenames should come from the same type of dataType.

    filenames from val2014, for example, should have dataType val2014
    Parameters
    ----------
    fns: filenames, strings

    dataType: string folder, i.e. train2014, val2014

    n_captions: int, number of captions for each image to load

    Returns
    -------
    X: list of im_vects
        1st list length = len(fns)
        vectors are shape (4096, )

    Y: list of list of captions.
        1st list length = len(fns)
        sublist length = n_captions
    """
    annFile = '%s/annotations/captions_%s.json' % (COCO_DIR, dataType)
    caps = COCO(annFile)

    X = []
    Y = []

    for fn in fns:
        # Features
        x = np.load('%s/features/%s/%s' % (COCO_DIR, dataType, fn))

        # Targets
        annIds = caps.getAnnIds(imgIds=getImageId(fn))
        anns = caps.loadAnns(annIds)

        # sample n_captions per image
        anns = shuffle(anns)
        captions = [getCaption(anns[i]) for i in range(n_captions)]

        X.append(x)
        Y.append(captions)

    return X, Y
コード例 #11
0
ファイル: dataset.py プロジェクト: youralien/MLFun
def loadFeaturesTargets(fns, dataType, n_captions=1):
    """
    Note: filenames should come from the same type of dataType.

    filenames from val2014, for example, should have dataType val2014
    Parameters
    ----------
    fns: filenames, strings

    dataType: string folder, i.e. train2014, val2014

    n_captions: int, number of captions for each image to load

    Returns
    -------
    X: list of im_vects
        1st list length = len(fns)
        vectors are shape (4096, )

    Y: list of list of captions.
        1st list length = len(fns)
        sublist length = n_captions
    """
    annFile = '%s/annotations/captions_%s.json'%(dataDir,dataType)
    caps=COCO(annFile)

    X = []
    Y = []

    for fn in fns:
        # Features
        x = np.load('%s/features/%s/%s'%(dataDir, dataType, fn))

        # Targets
        annIds = caps.getAnnIds(imgIds=getImageId(fn));
        anns = caps.loadAnns(annIds)

        # sample n_captions per image
        anns = shuffle(anns)
        captions = [getCaption(anns[i]) for i in range(n_captions)]

        X.append(x)
        Y.append(captions)

    return X, Y
コード例 #12
0
def cocoXYFilenames(n_captions=5, dataType='val2014'):
    """Helps when you are evaluating and want the filenames
    associated with the features and target variables

    Parameters
    ----------
    n_captions: integer
        how many captions to load for the image

    dataType: 'val2014' or 'train2014'

    Returns
    -------
    X: the features
    Y: the targets
    filenames: the filenames corresponding to each
    """
    fns = os.listdir("%s/features/%s" % (COCO_DIR, dataType))
    fns = shuffle(fns)
    X, Y = loadFeaturesTargets(fns, dataType, n_captions)

    return X, Y, fns
コード例 #13
0
ファイル: dataset.py プロジェクト: youralien/MLFun
def cocoXYFilenames(n_captions=5, dataType='val2014'):
    """Helps when you are evaluating and want the filenames
    associated with the features and target variables

    Parameters
    ----------
    n_captions: integer
        how many captions to load for the image

    dataType: 'val2014' or 'train2014'

    Returns
    -------
    X: the features
    Y: the targets
    filenames: the filenames corresponding to each
    """
    fns = os.listdir("%s/features/%s"%(dataDir, dataType))
    fns = shuffle(fns)
    X, Y = loadFeaturesTargets(fns, dataType, n_captions)

    return X, Y, fns
コード例 #14
0
 def get_data(self, request=None):
     if request is not None:
         raise ValueError
     data = next(self.child_epoch_iterator)
     return shuffle(*data)
コード例 #15
0
ファイル: dataset.py プロジェクト: youralien/MLFun
 def get_data(self, request=None):
     if request is not None:
         raise ValueError
     data = next(self.child_epoch_iterator)
     return shuffle(*data)