예제 #1
0
    def split(self, train_ratio, valid_ratio):
        """Split all the samples into training, validation and testing sets
        randomly

        Args:
            train_ratio: The ratio between training and all the samples
            valid_ratio: The ratio between validation and all the samples
        """

        m = self.X.shape[0]

        m_train = int(m * train_ratio)
        m_valid = int(m * valid_ratio)

        p = numpy_rng.permutation(m)

        self.train_ind = p[0:m_train]
        self.valid_ind = p[m_train:m_train + m_valid]
        self.test_ind = p[m_train + m_valid:]

        self.train_x = self._create_shared(self.X[self.train_ind, :])
        self.train_y = self._create_shared(self.Y[self.train_ind, :])
        self.valid_x = self._create_shared(self.X[self.valid_ind, :])
        self.valid_y = self._create_shared(self.Y[self.valid_ind, :])
        self.test_x = self._create_shared(self.X[self.test_ind, :])
        self.test_y = self._create_shared(self.Y[self.test_ind, :])
예제 #2
0
파일: dataset.py 프로젝트: Cysu/Person-Reid
    def split(self, train_ratio, valid_ratio):
        """Split all the samples into training, validation and testing sets
        randomly

        Args:
            train_ratio: The ratio between training and all the samples
            valid_ratio: The ratio between validation and all the samples
        """

        m = self.X.shape[0]

        m_train = int(m * train_ratio)
        m_valid = int(m * valid_ratio)

        p = numpy_rng.permutation(m)

        self.train_ind = p[0 : m_train]
        self.valid_ind = p[m_train : m_train+m_valid]
        self.test_ind = p[m_train+m_valid : ]

        self.train_x = self._create_shared(self.X[self.train_ind, :])
        self.train_y = self._create_shared(self.Y[self.train_ind, :])
        self.valid_x = self._create_shared(self.X[self.valid_ind, :])
        self.valid_y = self._create_shared(self.Y[self.valid_ind, :])
        self.test_x = self._create_shared(self.X[self.test_ind, :])
        self.test_y = self._create_shared(self.Y[self.test_ind, :])
예제 #3
0
def sample(indices, pos_downsample=1.0, neg_pos_ratio=1.0):
    """Sample positive and negative data

    Args:
        indices: [(pid, vid)]
        neg_pos_ratio: #neg / #pos

    Returns:
        The sampled data indices. (train, vaid, test) each is [(i, j, 0/1)].
    """

    print "Sampling ..."

    data = cachem.load('sample')

    if data is not None: return data

    import random
    from reid.utils.math_utils import numpy_rng

    n_imgs = len(indices)

    def gensamples(pids):
        samples = []

        # Positive samples
        for i in xrange(n_imgs):
            if indices[i][0] not in pids: continue
            j = i + 1
            while j < n_imgs and indices[i][0] == indices[j][0]:
                if numpy_rng.rand() < pos_downsample:
                    samples.append((i, j, True))
                j += 1

        # Negative samples
        n = int(len(samples) * neg_pos_ratio)
        for k in xrange(n):
            while True:
                i, j = numpy_rng.randint(0, n_imgs, 2)
                if indices[i][0] in pids and indices[j][0] in pids and \
                        indices[i][0] != indices[j][0] and \
                        (i,j,False) not in samples and (j,i,False) not in samples:
                    samples.append((i, j, False))
                    break

        random.shuffle(samples)
        return samples

    # Split by pid
    m = indices[-1][0] + 1
    m_train = int(m * 0.7)
    m_valid = int(m * 0.2)

    p = numpy_rng.permutation(m)

    train_pids = p[0 : m_train]
    valid_pids = p[m_train : m_train+m_valid]
    test_pids = p[m_train+m_valid : ]

    train = gensamples(train_pids)
    valid = gensamples(valid_pids)
    test = gensamples(test_pids)

    return (train, valid, test, train_pids, valid_pids, test_pids)
예제 #4
0
def sample(indices, pos_downsample=1.0, neg_pos_ratio=1.0):
    """Sample positive and negative data

    Args:
        indices: [(pid, vid)]
        neg_pos_ratio: #neg / #pos

    Returns:
        The sampled data indices. (train, vaid, test) each is [(i, j, 0/1)].
    """

    print "Sampling ..."

    data = cachem.load('sample')

    if data is not None: return data

    import random
    from reid.utils.math_utils import numpy_rng

    n_imgs = len(indices)

    def gensamples(pids):
        samples = []

        # Positive samples
        for i in xrange(n_imgs):
            if indices[i][0] not in pids: continue
            j = i + 1
            while j < n_imgs and indices[i][0] == indices[j][0]:
                if numpy_rng.rand() < pos_downsample:
                    samples.append((i, j, True))
                j += 1

        # Negative samples
        n = int(len(samples) * neg_pos_ratio)
        for k in xrange(n):
            while True:
                i, j = numpy_rng.randint(0, n_imgs, 2)
                if indices[i][0] in pids and indices[j][0] in pids and \
                        indices[i][0] != indices[j][0] and \
                        (i,j,False) not in samples and (j,i,False) not in samples:
                    samples.append((i, j, False))
                    break

        random.shuffle(samples)
        return samples

    # Split by pid
    m = indices[-1][0] + 1
    m_train = int(m * 0.7)
    m_valid = int(m * 0.2)

    p = numpy_rng.permutation(m)

    train_pids = p[0:m_train]
    valid_pids = p[m_train:m_train + m_valid]
    test_pids = p[m_train + m_valid:]

    train = gensamples(train_pids)
    valid = gensamples(valid_pids)
    test = gensamples(test_pids)

    return (train, valid, test, train_pids, valid_pids, test_pids)