Ejemplo n.º 1
0
    def split_data(root_dir, domain, r_val, r_data, rand_seed):
        root_dir = Path(root_dir)

        # test
        xt, yt = read(root_dir / domain / f'{domain}_test.txt')
        xt = xt.tolist()
        for i, xi in enumerate(xt):
            xt[i] = root_dir / xi
        xt = np.array(xt)

        xd, yd = read(root_dir / domain / f'{domain}_train.txt')
        # val
        if r_val is not None:
            (xv, yv), (xd, yd) = data.split_data(xd.copy(), yd.copy(),
                                                 rand_seed, r_val)
            xv = xv.tolist()
            for i, xi in enumerate(xv):
                xv[i] = root_dir / xi
            xv = np.array(xv)
        else:
            xv, yv = xt, yt
        # train
        x, y = data.split_data(xd.copy(), yd.copy(), rand_seed, r_data)[0]
        x = x.tolist()
        for i, xi in enumerate(x):
            x[i] = root_dir / xi
        x = np.array(x)

        return x, y, xv, yv, xt, yt
Ejemplo n.º 2
0
    def split_data(root_dir,
                   tgt_domains,
                   src_domains,
                   r_val,
                   r_lab,
                   r_unlab,
                   w_unlab,
                   rand_seed,
                   r_data=None):
        root_dir = Path(root_dir)

        # test
        d = sio.loadmat(root_dir / 'test_32x32.mat')
        xt = np.transpose(d['X'], (3, 0, 1, 2))
        yt = d['y'].reshape(-1).astype(int) - 1  # SVHN labels are 1-10

        # val, lab, unlab
        d = sio.loadmat(root_dir / 'train_32x32.mat')
        x = np.transpose(d['X'], (3, 0, 1, 2))
        y = d['y'].reshape(-1).astype(int) - 1  # SVHN labels are 1-10
        if r_val is not None:
            (xv, yv), (x, y) = data.split_data(x.copy(), y.copy(), rand_seed,
                                               r_val)
        else:
            xv, yv = xt, yt
        (xl, yl), (xu, yu) = data.split_data(x.copy(), y.copy(), rand_seed,
                                             r_lab)

        # reduce data
        if r_data is not None:
            xu, yu = data.split_data(xu.copy(), yu.copy(), rand_seed,
                                     r_data)[0]

        return xl, yl, xu, xv, yv, xt, yt
Ejemplo n.º 3
0
    def split_data(root_dir,
                   tgt_domains,
                   src_domains,
                   r_val,
                   r_lab,
                   r_unlab,
                   w_unlab,
                   rand_seed,
                   r_data=None):
        root_dir = Path(root_dir)

        # test
        file = root_dir / 'test_batch'
        batch = pickle.load(open(file, 'rb'), encoding='latin1')
        xt = np.transpose(batch['data'].reshape((-1, 3, 32, 32)), (0, 2, 3, 1))
        yt = np.array(batch['labels'], dtype=np.int)

        # val, lab, unlab
        files = [root_dir / f'data_batch_{i}' for i in range(1, 6)]
        batches = [
            pickle.load(open(file, 'rb'), encoding='latin1') for file in files
        ]
        x = [batch['data'].reshape((-1, 3, 32, 32)) for batch in batches]
        x = np.concatenate([np.transpose(xi, (0, 2, 3, 1)) for xi in x])
        y = np.concatenate(
            [np.array(batch['labels'], dtype=np.int) for batch in batches])
        if r_val is not None:
            (xv, yv), (x, y) = data.split_data(x.copy(), y.copy(), rand_seed,
                                               r_val)
        else:
            xv, yv = xt, yt
        (xl, yl), (xu, yu) = data.split_data(x.copy(), y.copy(), rand_seed,
                                             r_lab)

        # reduce data
        if r_data is not None:
            xu, yu = data.split_data(xu.copy(), yu.copy(), rand_seed,
                                     r_data)[0]

        return xl, yl, xu, xv, yv, xt, yt
Ejemplo n.º 4
0
    def split_data(root_dir,
                   tgt_domains,
                   src_domains,
                   r_val,
                   r_lab,
                   r_unlab,
                   w_unlab,
                   rand_seed,
                   r_data=None):
        root_dir = Path(root_dir)

        # test
        xt = np.load(root_dir / 'xtest.npy')
        yt = np.load(root_dir / 'ytest.npy')
        classes = np.unique(yt)
        name2num = dict(zip(classes, np.arange(len(classes))))
        yt = np.array([name2num[yi] for yi in yt])

        # val, lab, unlab
        x = np.load(root_dir / 'xtrain.npy')
        y = np.load(root_dir / 'ytrain.npy')
        x, y = data.shuffle_data([x, y], rand_seed)
        x, y = x[:50000], y[:50000]
        y = np.array([name2num[yi] for yi in y])
        if r_val is not None:
            (xv, yv), (x, y) = data.split_data(x.copy(), y.copy(), rand_seed,
                                               r_val)
        else:
            xv, yv = xt, yt
        (xl, yl), (xu, yu) = data.split_data(x.copy(), y.copy(), rand_seed,
                                             r_lab)

        # reduce data
        if r_data is not None:
            xu, yu = data.split_data(xu.copy(), yu.copy(), rand_seed,
                                     r_data)[0]

        return xl, yl, xu, xv, yv, xt, yt
Ejemplo n.º 5
0
    def split_data(root_dir,
                   tgt_domains,
                   src_domains,
                   r_val,
                   r_lab,
                   r_unlab,
                   w_unlab,
                   rand_seed,
                   r_data=None):
        root_dir = Path(root_dir)

        # test
        file = root_dir / 'test'
        batch = pickle.load(open(file, 'rb'), encoding='latin1')
        xt = np.transpose(batch['data'].reshape((-1, 3, 32, 32)), (0, 2, 3, 1))
        yt = np.array(batch['fine_labels'], dtype=np.int)

        # val, lab, unlab
        file = root_dir / 'train'
        batch = pickle.load(open(file, 'rb'), encoding='latin1')
        x = np.transpose(batch['data'].reshape((-1, 3, 32, 32)), (0, 2, 3, 1))
        y = np.array(batch['fine_labels'], dtype=np.int)
        if r_val is not None:
            (xv, yv), (x, y) = data.split_data(x.copy(), y.copy(), rand_seed,
                                               r_val)
        else:
            xv, yv = xt, yt
        (xl, yl), (xu, yu) = data.split_data(x.copy(), y.copy(), rand_seed,
                                             r_lab)

        # reduce data
        if r_data is not None:
            xu, yu = data.split_data(xu.copy(), yu.copy(), rand_seed,
                                     r_data)[0]

        return xl, yl, xu, xv, yv, xt, yt
Ejemplo n.º 6
0
def split_data(root_dir, tgt_domains, src_domains, r_val, r_lab, r_unlab,
               w_unlab, rand_seed, r_data):
    root_dir = Path(root_dir)
    w_unlab = np.array(w_unlab) / np.sum(w_unlab)
    if len(set(tgt_domains) & set(src_domains)) != 0:
        print('tgt_domains should not overlap with src_domains')
        raise AttributeError

    # target test
    xt, yt = [], []
    for i, domain in enumerate(tgt_domains):
        xd, yd = read(root_dir / domain / f'{domain}_test.txt')
        xt.extend(xd.tolist())
        yt.extend(yd.tolist())
    for i, xi in enumerate(xt):
        xt[i] = root_dir / xi
    xt, yt = np.array(xt), np.array(yt)

    # target val, target lab, target unlab
    xv, yv, xl, yl, xu, yu, Nu = [], [], [], [], [], [], 0
    for i, domain in enumerate(tgt_domains):
        xd, yd = read(root_dir / domain / f'{domain}_train.txt')
        # target val
        if r_val is not None:
            (xvd, yvd), (xd, yd) = data.split_data(xd.copy(), yd.copy(),
                                                   rand_seed, r_val)
            xv.extend(xvd.tolist())
            yv.extend(yvd.tolist())
        # target lab
        (xld, yld), (xud, yud) = data.split_data(xd.copy(), yd.copy(),
                                                 rand_seed, r_lab)
        xl.extend(xld.tolist())
        yl.extend(yld.tolist())
        # target unlab
        (xdu, ydu), (xres, _) = data.split_data(xud.copy(), yud.copy(),
                                                rand_seed, 1. - r_unlab)
        xu.extend(xdu.tolist())
        yu.extend(ydu.tolist())
        Nu += len(xres)
    if r_val is not None:
        for i, xi in enumerate(xv):
            xv[i] = root_dir / xi
        xv, yv = np.array(xv), np.array(yv)
    else:
        xv, yv = xt, yt
    for i, xi in enumerate(xl):
        xl[i] = root_dir / xi
    xl, yl = np.array(xl), np.array(yl)

    # source unlab
    for i, domain in enumerate(src_domains):
        xd, yd = read(root_dir / domain / f'{domain}_train.txt')
        Ndu = int(round(Nu * w_unlab[i]))
        xd, yd = data.split_data(xd.copy(), yd.copy(), rand_seed, Ndu)[0]
        xu.extend(xd.tolist())
        yu.extend(yd.tolist())
    for i, xi in enumerate(xu):
        xu[i] = root_dir / xi
    xu, yu = np.array(xu), np.array(yu)

    # reduce data
    if r_data is not None:
        xl, yl = data.split_data(xl.copy(), yl.copy(), rand_seed, r_data)[0]
        xu, yu = data.split_data(xu.copy(), yu.copy(), rand_seed, r_data)[0]

    return xl, yl, xu, xv, yv, xt, yt