Beispiel #1
0
def dataset_read(source, target, batch_size, scale=False, all_use='no'):
    S = {}
    S_test = {}
    T = {}
    T_test = {}
    usps = False
    if source == 'usps' or target == 'usps':
        usps = True

    train_source, s_label_train, test_source, s_label_test = return_dataset(
        source, scale=scale, usps=usps, all_use=all_use)
    train_target, t_label_train, test_target, t_label_test = return_dataset(
        target, scale=scale, usps=usps, all_use=all_use)

    S['imgs'] = train_source
    S['labels'] = s_label_train
    T['imgs'] = train_target
    T['labels'] = t_label_train

    # input target samples for both
    S_test['imgs'] = test_target
    S_test['labels'] = t_label_test
    T_test['imgs'] = test_target
    T_test['labels'] = t_label_test
    scale = 40 if source == 'synth' else 28 if source == 'usps' or target == 'usps' else 32
    train_loader = UnalignedDataLoader()
    train_loader.initialize(S, T, batch_size, batch_size, scale=scale)
    dataset = train_loader.load_data()
    test_loader = UnalignedDataLoader()
    test_loader.initialize(S_test, T_test, batch_size, batch_size, scale=scale)
    dataset_test = test_loader.load_data()
    return dataset, dataset_test
Beispiel #2
0
def dataset_read(target, batch_size):
    S1 = {}
    S1_test = {}
    S2 = {}
    S2_test = {}
    S3 = {}
    S3_test = {}
    S4 = {}
    S4_test = {}
    
    S = [S1, S2, S3, S4]
    S_test = [S1_test, S2_test, S3_test, S4_test]

    T = {}
    T_test = {}
    domain_all = ['mnistm', 'mnist', 'usps', 'svhn', 'syn']
    domain_all.remove(target)
    dataset_size = list()

    target_train, target_train_label , target_test, target_test_label = return_dataset(target)
    dataset_size.append(target_train.shape[0])
    
    for i in range(len(domain_all)):
        source_train, source_train_label, source_test , source_test_label = return_dataset(domain_all[i])
        S[i]['imgs'] = source_train
        S[i]['labels'] = source_train_label
        #input target sample when test, source performance is not important
        S_test[i]['imgs'] = source_test
        S_test[i]['labels'] = source_test_label
        dataset_size.append(source_train.shape[0])

    #S['imgs'] = train_source
    #S['labels'] = s_label_train
    T['imgs'] = target_train
    T['labels'] = target_train_label

    # input target samples for both 
    #S_test['imgs'] = test_target
    #S_test['labels'] = t_label_test
    T_test['imgs'] = target_test
    T_test['labels'] = target_test_label

    scale = 32

    train_loader = UnalignedDataLoader()
    train_loader.initialize(S, T, batch_size, batch_size, scale=scale)
    dataset = train_loader.load_data()


    test_loader = UnalignedDataLoader()
    test_loader.initialize(S_test, T_test, batch_size, batch_size, scale=scale)
    
    dataset_test = test_loader.load_data()

    # return dataset, dataset_test, min(dataset_size), max(dataset_size)
    return dataset, dataset_test, min(dataset_size)
Beispiel #3
0
def dataset_read(source, target, batch_size, scale=False, all_use='no'):
    S = {}
    S_test = {}
    T = {}
    T_test = {}
    usps = False
    if source == 'usps' or target == 'usps':
        usps = True

    domain_all = ['mnistm', 'mnist', 'usps', 'svhn', 'syn']
    domain_all.remove(source)
    train_source, s_label_train, test_source, s_label_test = return_dataset(
        source, scale=scale, usps=usps, all_use=all_use)

    train_target, t_label_train, test_target, t_label_test = return_dataset(
        domain_all[0], scale=scale, usps=usps, all_use=all_use)
    for i in range(1, len(domain_all)):
        train_target_, t_label_train_, test_target_, t_label_test_ = return_dataset(
            domain_all[i], scale=scale, usps=usps, all_use=all_use)
        train_target = np.concatenate((train_target, train_target_), axis=0)
        t_label_train = np.concatenate((t_label_train, t_label_train_), axis=0)
        test_target = np.concatenate((test_target, test_target_), axis=0)
        t_label_test = np.concatenate((t_label_test, t_label_test_), axis=0)

    # print(domain)
    print('Source Training: ', train_source.shape)
    print('Source Training label: ', s_label_train.shape)
    print('Source Test: ', test_source.shape)
    print('Source Test label: ', s_label_test.shape)

    print('Target Training: ', train_target.shape)
    print('Target Training label: ', t_label_train.shape)
    print('Target Test: ', test_target.shape)
    print('Target Test label: ', t_label_test.shape)

    S['imgs'] = train_source
    S['labels'] = s_label_train
    T['imgs'] = train_target
    T['labels'] = t_label_train

    # input target samples for both
    S_test['imgs'] = test_target
    S_test['labels'] = t_label_test
    T_test['imgs'] = test_target
    T_test['labels'] = t_label_test
    scale = 32 if source == 'synth' else 32 if source == 'usps' or target == 'usps' else 32
    train_loader = UnalignedDataLoader()
    train_loader.initialize(S, T, batch_size, batch_size, scale=scale)
    dataset = train_loader.load_data()
    test_loader = UnalignedDataLoader()
    test_loader.initialize(S_test, T_test, batch_size, batch_size, scale=scale)
    dataset_test = test_loader.load_data()
    return dataset, dataset_test
Beispiel #4
0
def dataset_read(source,
                 target,
                 batch_size,
                 pixel_norm=True,
                 scale=False,
                 all_use=False):
    S = {}
    S_test = {}
    T = {}
    T_test = {}
    usps = False
    if source == 'usps' or target == 'usps':
        usps = True

    train_source, s_label_train, test_source, s_label_test = return_dataset(
        source, scale=scale, usps=usps, all_use=all_use)
    train_target, t_label_train, test_target, t_label_test = return_dataset(
        target, scale=scale, usps=usps, all_use=all_use)
    #normalize with mean value of pixels
    if pixel_norm:
        pixel_mean = np.vstack([train_source, train_target]).mean((0, ))
        train_source = (train_source - pixel_mean) / float(255)
        train_target = (train_target - pixel_mean) / float(255)
        test_target = (test_target - pixel_mean) / float(255)

    S['imgs'] = torch.from_numpy(train_source).float()
    S['labels'] = torch.from_numpy(s_label_train).long()

    T['imgs'] = torch.from_numpy(train_target).float()
    T['labels'] = torch.from_numpy(t_label_train).long()

    # input target samples for both
    S_test['imgs'] = torch.from_numpy(test_target).float()
    S_test['labels'] = torch.from_numpy(t_label_test).long()
    T_test['imgs'] = torch.from_numpy(test_target).float()
    T_test['labels'] = torch.from_numpy(t_label_test).long()
    train_loader = UnalignedDataLoader()
    train_loader.initialize(S, T, batch_size, batch_size)
    dataset = train_loader.load_data()
    test_loader = UnalignedDataLoader()
    test_loader.initialize(S_test, T_test, batch_size, batch_size)
    dataset_test = test_loader.load_data()
    return dataset, dataset_test
Beispiel #5
0
def dataset_read(target, batch_size):
    S1 = {}
    S1_test = {}
    S2 = {}
    S2_test = {}
    S3 = {}
    S3_test = {}
    S4 = {}
    S4_test = {}

    S = [S1, S2, S3, S4]
    S_test = [S1_test, S2_test, S3_test, S4_test]

    T = {}
    T_test = {}
    T_val = {}
    domain_all = ['mnistm', 'mnist', 'usps', 'svhn', 'syn']
    domain_all.remove(target)

    target_train, target_train_label, target_test, target_test_label = return_dataset(
        target)
    indices_tar = np.arange(0, target_test.shape[0])

    np.random.seed(42)
    np.random.shuffle(indices_tar)
    val_split = int(0.05 * target_test.shape[0])
    target_val = target_test[indices_tar[:val_split]]
    target_val_label = target_test_label[indices_tar[:val_split]]
    target_test = target_test[indices_tar[val_split:]]
    target_test_label = target_test_label[indices_tar[val_split:]]

    for i in range(len(domain_all)):
        source_train, source_train_label, source_test, source_test_label = return_dataset(
            domain_all[i])
        S[i]['imgs'] = source_train
        S[i]['labels'] = source_train_label
        # input target sample when test, source performance is not important
        S_test[i]['imgs'] = target_test
        S_test[i]['labels'] = target_test_label

    # S['imgs'] = train_source
    # S['labels'] = s_label_train
    T['imgs'] = target_train
    T['labels'] = target_train_label

    # input target samples for both
    # S_test['imgs'] = test_target
    # S_test['labels'] = t_label_test
    T_test['imgs'] = target_test
    T_test['labels'] = target_test_label

    T_val['imgs'] = target_val
    T_val['labels'] = target_val_label

    scale = 32

    train_loader = UnalignedDataLoader()
    train_loader.initialize(S, T, batch_size, batch_size, scale=scale)
    dataset = train_loader.load_data()

    test_loader = UnalignedDataLoader()
    test_loader.initialize(S_test, T_test, batch_size, batch_size, scale=scale)

    dataset_test = test_loader.load_data()

    S_val = {}
    S_val['imgs'] = np.zeros((20, 3, 32, 32))
    S_val['labels'] = np.zeros((20))
    val_loader = UnalignedDataLoader()
    val_loader.initialize([S_val], T_val, batch_size, batch_size, scale=scale)

    dataset_valid = val_loader.load_data()
    return dataset, dataset_test, dataset_valid
Beispiel #6
0
def dataset_hard_cluster(target, batch_size, num_clus):
    # Number of components for PCA
    n_comp = 50

    T = {}
    T_test = {}
    T_val = {}
    domain_all = ['mnistm', 'mnist', 'usps', 'svhn', 'syn']
    domain_all.remove(target)

    target_train, target_train_label, target_test, target_test_label = return_dataset(
        target)

    indices_tar = np.arange(0, target_test.shape[0])
    np.random.seed(42)
    np.random.shuffle(indices_tar)
    val_split = int(0.05 * target_test.shape[0])
    target_val = target_test[indices_tar[:val_split]]
    target_val_label = target_test_label[indices_tar[:val_split]]
    target_test = target_test[indices_tar[val_split:]]
    target_test_label = target_test_label[indices_tar[val_split:]]
    # Generate target dataset label splits
    #target_train, target_train_label, target_test, target_test_label = return_dataset(target_dataset)

    T['imgs'] = target_train
    T['labels'] = target_train_label
    # input target samples for both
    T_test['imgs'] = target_test
    T_test['labels'] = target_test_label

    T_val['imgs'] = target_val
    T_val['labels'] = target_val_label

    S_train = []
    S_train_labels = []
    #S_train_std = []

    # Read the respective source domain datasets
    for i in range(len(domain_all)):

        source_train, source_train_label, source_test, source_test_label = return_dataset(
            domain_all[i])

        # Convert all the datasets to (3,28,28) image size for (2352 Feature vector)

        # Broadcast to three channels
        if source_train.shape[1] == 1:
            source_train = np.repeat(source_train, 3, 1)
        # Clip to 28x28
        if source_train.shape[2] == 32:
            source_train = source_train[:, :, 2:30, 2:30]
        S_train.append(source_train)
        S_train_labels.append(source_train_label)

        #S_train_std.append(StandardScaler().fit_transform(source_train.reshape(source_train.shape[0], -1)))

    X_combined = np.concatenate(S_train, axis=0)
    X_labels = np.concatenate(S_train_labels, axis=0)
    source_num_train_ex = X_combined.shape[0]
    X_vec = X_combined.reshape(source_num_train_ex, -1)

    mean = X_vec.mean(0)
    X_vec = X_vec - mean
    pca_transformed = PCA(n_components=n_comp).fit_transform(X_vec)
    kmeans = KMeans(n_clusters=num_clus, n_init=1)
    predict = kmeans.fit(pca_transformed).predict(pca_transformed)

    print("Hard Clustering Ends")
    S = []
    S_test = []
    for i in range(num_clus):
        S.append({})
        S[i]['imgs'] = X_combined[predict == i]
        S[i]['labels'] = X_labels[predict == i]

        # input target sample when test, source performance is not important
        S_test.append({})
        S_test[i]['imgs'] = target_test
        S_test[i]['labels'] = target_test_label

    scale = 32

    train_loader = UnalignedDataLoader()
    train_loader.initialize(S, T, batch_size, batch_size, scale=scale)
    dataset = train_loader.load_data()

    test_loader = UnalignedDataLoader()
    test_loader.initialize(S_test, T_test, batch_size, batch_size, scale=scale)

    dataset_test = test_loader.load_data()

    S_val = {}
    S_val['imgs'] = np.zeros((20, 3, 32, 32))
    S_val['labels'] = np.zeros((20))
    val_loader = UnalignedDataLoader()
    val_loader.initialize([S_val], T_val, batch_size, batch_size, scale=scale)

    dataset_valid = val_loader.load_data()
    return dataset, dataset_test, dataset_valid
def dataset_read(target, batch_size, args):
    """
    S1 = {}
    S1_test = {}
    S2 = {}
    S2_test = {}
    S3 = {}
    S3_test = {}
    S4 = {}
    S4_test = {}
    
    #S = [S1, S2, S3, S4]
    S=[S1, S2, S3]
    #S_test = [S1_test, S2_test, S3_test, S4_test]
    S_test = [S1_test, S2_test, S3_test]

    T = {}
    T_test = {}
    """
    #domain_all = ['mnistm', 'mnist', 'usps', 'svhn', 'syn']
    #domain_all.remove(target)
    domain_all = ['real', 'sketch', 'painting']
    domain_all.remove(target)

    print("all domains")
    print(domain_all)
    print("target domain")
    print(target)
    print()

    #target_train, target_train_label , target_test, target_test_label = return_dataset(args)
    """
    target_train, target_train_label , target_test, target_test_label = return_dataset2(target)
    
    for i in range(len(domain_all)):
        source_train, source_train_label, source_test , source_test_label = return_dataset2(domain_all[i])
        S[i]['imgs'] = source_train
        S[i]['labels'] = source_train_label
        #input target sample when test, source performance is not important
        S_test[i]['imgs'] = target_test
        S_test[i]['labels'] = target_test_label

    #S['imgs'] = train_source
    #S['labels'] = s_label_train
    T['imgs'] = target_train
    T['labels'] = target_train_label

    # input target samples for both 
    #S_test['imgs'] = test_target
    #S_test['labels'] = t_label_test
    T_test['imgs'] = target_test
    T_test['labels'] = target_test_label
    """
    print(args)
    scale = 32
    print("Here after defining scale")
    print(args.save_epoch)
    source_loader1, source_loader2, target_loader, class_list1, class_list2 = return_dataset(
        args)
    #use_gpu = torch.cuda.is_available()

    batch_size1 = batch_size
    batch_size2 = batch_size
    train_loader = UnalignedDataLoader()
    #train_loader.initialize(S, T, batch_size1, batch_size2, scale=scale)
    train_loader.initialize(source_loader1,
                            source_loader2,
                            target_loader,
                            batch_size1,
                            batch_size2,
                            scale=scale)
    dataset = train_loader.load_data()
    """
    test_loader = UnalignedDataLoader()
    test_loader.initialize(S_test, T_test, batch_size, batch_size, scale=scale)
    
    dataset_test = test_loader.load_data()
    ""

    return dataset, dataset_test
    """
    return dataset