Exemple #1
0
def get_new_batches(net, batch_size, train_batches, train_data, train_labels, M):
	scores = get_scores(net, train_batches)
	scores[0:50000] = 10
	train_data2, train_labels2 = get_easy_data_balanced(train_data, train_labels, scores, M)
	order = range(M)
	np.random.shuffle(order)
	train_data2 = train_data2[:,order]
	train_labels2 = train_labels2[order]
	train_batches2 = data_loader.prepare_batches(train_data2, train_labels2, batch_size)
	print '# train:', train_data2.shape[1], 'samples', len(train_batches2), 'batches'
	return train_batches2
	
def get_new_batches(net, batch_size, train_batches, train_data, train_labels,
                    M):
    scores = get_scores(net, train_batches)
    #scores[0:20000] = 10
    train_data2, train_labels2 = get_easy_data_balanced(
        train_data, train_labels, scores, M)
    order = range(M)
    np.random.shuffle(order)
    train_data2 = train_data2[:, order]
    train_labels2 = train_labels2[order]
    train_batches2 = data_loader.prepare_batches(train_data2, train_labels2,
                                                 batch_size)
    print '# train:', train_data2.shape[1], 'samples', len(
        train_batches2), 'batches'
    return train_batches2
train_data = np.concatenate(
    (train_data[:, 0:pure_sz], noisy_data[:, 0:noise_sz],
     back_data[:, 0:back_sz]),
    axis=1)
train_labels = np.concatenate(
    (train_labels[0:pure_sz], noisy_labels[0:noise_sz],
     back_labels[0:back_sz]))

# shuffle data
order = range(pure_sz + back_sz + noise_sz)
np.random.shuffle(order)
train_data = train_data[:, order]
train_labels = train_labels[order]

train_batches = data_loader.prepare_batches(train_data, train_labels,
                                            batch_size)
test_batches = data_loader.prepare_batches(test_data, test_labels, batch_size)

print '# train:', train_data.shape[1], 'samples', len(train_batches), 'batches'
print '# test:', test_data.shape[1], 'samples', len(test_batches), 'batches'

s = np.zeros((22, 11))
s[:11, :11] = np.eye(11)
s[11:, :11] = np.eye(11)
net.layers[-2].weight = data_loader.copy_to_gpu(s)

w = np.eye(22)
w[11:21, 11:21] = 0.5 * np.eye(10) + (np.ones(
    (10, 10)) - np.eye(10)) * 0.5 / 9.0
net.W_denoise = data_loader.copy_to_gpu(w)
net.label_tmp = data_loader.copy_to_gpu(np.zeros((22, 128)))
Exemple #4
0
pure_sz = int(sys.argv[1])

# setting
batch_size = 128
param_file = '/home/sainbar/fastnet-confussion-layer/config/cifar-10-18pct-confussion10.cfg'
learning_rate = 1
image_color = 3
image_size = 32
image_shape = (image_color, image_size, image_size, batch_size)
init_model = parser.parse_config_file(param_file)
net = fastnet.net.FastNet(learning_rate, image_shape, init_model)
net.layers[-2].weight = data_loader.copy_to_gpu(np.eye(10))

# prepare data
train_data, train_labels, test_data, test_labels = data_loader.load_cifar10()
data_mean = train_data.mean(axis=1,keepdims=True)
train_data = train_data - data_mean
test_data = test_data - data_mean

# add noise to label
W = np.load('mixing-matrix-' + sys.argv[2] + '.npy')
train_labels_noisy = confusion_matrix.mix_labels(W, train_labels)

train_batches = data_loader.prepare_batches(train_data[:,:pure_sz], train_labels_noisy[:pure_sz], batch_size)
train_batches2 = data_loader.prepare_batches(train_data[:,:pure_sz], train_labels[:pure_sz], batch_size)
test_batches = data_loader.prepare_batches(test_data, test_labels, batch_size)

print 'train:', train_data.shape[1], 'samples', len(train_batches), 'batches'
print 'test:', test_data.shape[1], 'samples', len(test_batches), 'batches'
pure_sz2 = pure_sz + int(1. * val_sz * pure_sz/(pure_sz + noisy_sz + back_sz))
noisy_sz2 = noisy_sz + int(1. * val_sz * noisy_sz/(pure_sz + noisy_sz + back_sz))
back_sz2 = back_sz + int(1. * val_sz * back_sz/(pure_sz + noisy_sz + back_sz))
assert pure_sz2 <= clean_data.shape[1]
assert noisy_sz2 <= noisy_data.shape[1]
assert back_sz2 <= back_data.shape[1]
val_data = np.concatenate((clean_data[:,pure_sz:pure_sz2], noisy_data[:,noisy_sz:noisy_sz2], back_data[:,back_sz:back_sz2]), axis=1)
val_labels = np.concatenate((clean_labels[pure_sz:pure_sz2], noisy_labels[noisy_sz:noisy_sz2], back_labels[back_sz:back_sz2]))

# shuffle data
order = range(train_data.shape[1])
np.random.shuffle(order)
train_data = train_data[:,order]
train_labels = train_labels[order]

train_batches = data_loader.prepare_batches(train_data, train_labels, batch_size)
val_batches = data_loader.prepare_batches(val_data, val_labels, batch_size)
test_batches = data_loader.prepare_batches(test_data, test_labels, batch_size)

print '# train:', train_data.shape[1], 'samples', len(train_batches), 'batches'
print '# val:', val_data.shape[1], 'samples', len(val_batches), 'batches'
print '# test:', test_data.shape[1], 'samples', len(test_batches), 'batches'

# confussion matrix
w = np.eye(10) * alpha + (1 - alpha) / 10.0
net.layers[-2].weight = data_loader.copy_to_gpu(w)

if net_checkpoint.try_load(net) == False:
	# net.adjust_learning_rate(2)
	# net.adjust_learning_rate(1. + (1-alpha)/alpha/10)
	# net_trainer.train(net, 300, train_batches, val_batches, test_batches)
back_labels[0:40000] = np.random.randint(0, 10, [40000])
noisy_data = np.concatenate((train_data[:,10000:20000], back_data[:,0:50000]), axis=1)
noisy_labels = np.concatenate((train_labels[10000:20000], back_labels[0:50000]))

w = np.zeros([11, 14])
w[:10,:10] = np.eye(10) / 6.0
w[:10,:10] += 4.0 / 60.0
w[10,:10] = 1.0 / 6.0
w[:10,10:] = 4.0 / 50.0
w[10,10:] = 1.0 / 5.0
net.layers[-2].weight = data_loader.copy_to_gpu(w)

# shuffle data
order = range(10000)
np.random.shuffle(order)
train_data = train_data[:,order]
train_labels = train_labels[order]

order = range(60000)
np.random.shuffle(order)
noisy_data = noisy_data[:,order]
noisy_labels = noisy_labels[order]

train_batches = data_loader.prepare_batches(train_data, train_labels, batch_size)
noisy_batches = data_loader.prepare_batches(noisy_data, noisy_labels, batch_size)
test_batches = data_loader.prepare_batches(test_data, test_labels, batch_size)

print 'train:', train_data.shape[1], 'samples', len(train_batches), 'batches'
print 'noisy:', noisy_data.shape[1], 'samples', len(noisy_batches), 'batches'
print 'test:', test_data.shape[1], 'samples', len(test_batches), 'batches'
# background noise
back_data = data_loader.load_noise()
back_data = back_data - data_mean
back_labels = np.ones(back_data.shape[1]) * 10

# confussion matrix
w = np.eye(11)
net.layers[-2].weight = data_loader.copy_to_gpu(w)

# shuffle data
order = range(pure_sz)
np.random.shuffle(order)
train_data = train_data[:,order]
train_labels = train_labels[order]
order = range(back_sz)
np.random.shuffle(order)
back_data2 = back_data[:,back_sz:back_sz+10000]
back_labels2 = back_labels[back_sz:back_sz+10000]
back_data = back_data[:,order]
back_labels = back_labels[order]

train_batches = data_loader.prepare_batches(train_data, train_labels, batch_size)
back_batches = data_loader.prepare_batches(back_data, back_labels, batch_size)
back_batches2 = data_loader.prepare_batches(back_data2, back_labels2, batch_size)
test_batches = data_loader.prepare_batches(test_data, test_labels, batch_size)

print 'train:', train_data.shape[1], 'samples', len(train_batches), 'batches'
print 'back:', back_data.shape[1], 'samples', len(back_batches), 'batches'
print 'test:', test_data.shape[1], 'samples', len(test_batches), 'batches'
Exemple #8
0
pure_sz = int(sys.argv[1])

# setting
batch_size = 128
param_file = '/home/sainbar/fastnet-confussion-layer/config/svhn.cfg'
learning_rate = 1
image_color = 3
image_size = 32
image_shape = (image_color, image_size, image_size, batch_size)
init_model = parser.parse_config_file(param_file)
net = fastnet.net.FastNet(learning_rate, image_shape, init_model)
net.layers[-2].weight = data_loader.copy_to_gpu(np.eye(10))

# prepare data
train_data, train_labels, test_data, test_labels = data_loader.load_svhn100k()
train_data = train_data * 100
test_data = test_data * 100
data_mean = train_data.mean(axis=1, keepdims=True)
train_data = train_data - data_mean
test_data = test_data - data_mean

train_batches = data_loader.prepare_batches(train_data[:, :pure_sz],
                                            train_labels[:pure_sz], batch_size)
test_batches = data_loader.prepare_batches(test_data[:, :10000],
                                           test_labels[:10000], batch_size)
# test_batches = data_loader.prepare_batches(test_data, test_labels, batch_size)

print 'train:', train_data.shape[1], 'samples', len(train_batches), 'batches'
print 'test:', test_data.shape[1], 'samples', len(test_batches), 'batches'
image_color = 3
image_size = 32
image_shape = (image_color, image_size, image_size, batch_size)
init_model = parser.parse_config_file(param_file)
net = fastnet.net.FastNet(learning_rate, image_shape, init_model)
net.layers[-2].weight = data_loader.copy_to_gpu(np.eye(10))

# prepare data
train_data, train_labels, test_data, test_labels = data_loader.load_svhn100k()
train_data = train_data * 100
test_data = test_data * 100
data_mean = train_data.mean(axis=1,keepdims=True)
train_data = train_data - data_mean
test_data = test_data - data_mean

# add noise to label
W = np.load('mixing-matrix-' + sys.argv[2] + '.npy')
train_labels_noisy = confusion_matrix.mix_labels(W, train_labels)

train_batches = data_loader.prepare_batches(train_data[:,:pure_sz], train_labels_noisy[:pure_sz], batch_size)
test_batches = data_loader.prepare_batches(test_data[:,:10000], test_labels[:10000], batch_size)
# test_batches = data_loader.prepare_batches(test_data, test_labels, batch_size)

print 'train:', train_data.shape[1], 'samples', len(train_batches), 'batches'
print 'test:', test_data.shape[1], 'samples', len(test_batches), 'batches'

net_trainer_noisy.train_mixed(net, 5, train_batches, test_batches)
net.layers[-2].epsW = 0.001
net.layers[-2].wc = float(sys.argv[3])
net_trainer_noisy.train_mixed(net, 100, train_batches, test_batches)
# prepare data
train_data, train_labels, test_data, test_labels = data_loader.load_cifar10()
data_mean = train_data.mean(axis=1,keepdims=True)
train_data = train_data - data_mean
test_data = test_data - data_mean

# background noise
back_data = data_loader.load_noise()
back_data = back_data - data_mean
back_labels = np.ones(back_data.shape[1]) * 10

train_data = np.concatenate((train_data[:,0:pure_sz], back_data[:,0:back_sz]), axis=1)
train_labels = np.concatenate((train_labels[0:pure_sz], back_labels[0:back_sz]))
test_data = np.concatenate((test_data, back_data[:,back_sz:back_sz+1000]), axis=1)
test_labels = np.concatenate((test_labels, back_labels[back_sz:back_sz+1000]))
order = range(pure_sz + back_sz)
np.random.shuffle(order)
train_data = train_data[:,order]
train_labels = train_labels[order]

# add noise to label
W = np.load('mixing-matrix-back-' + sys.argv[3] + '.npy')
train_labels_noisy = confusion_matrix.mix_labels(W, train_labels)

train_batches = data_loader.prepare_batches(train_data, train_labels_noisy, batch_size)
train_batches2 = data_loader.prepare_batches(train_data, train_labels, batch_size)
test_batches = data_loader.prepare_batches(test_data, test_labels, batch_size)

print 'train:', train_data.shape[1], 'samples', len(train_batches), 'batches'
print 'test:', test_data.shape[1], 'samples', len(test_batches), 'batches'
back_data = data_loader.load_noise()
back_data = back_data - data_mean
back_labels = np.ones(back_data.shape[1]) * 10

# confussion matrix
w = np.eye(11)
net.layers[-2].weight = data_loader.copy_to_gpu(w)

# shuffle data
order = range(pure_sz)
np.random.shuffle(order)
train_data = train_data[:, order]
train_labels = train_labels[order]
order = range(back_sz)
np.random.shuffle(order)
back_data2 = back_data[:, back_sz:back_sz + 10000]
back_labels2 = back_labels[back_sz:back_sz + 10000]
back_data = back_data[:, order]
back_labels = back_labels[order]

train_batches = data_loader.prepare_batches(train_data, train_labels,
                                            batch_size)
back_batches = data_loader.prepare_batches(back_data, back_labels, batch_size)
back_batches2 = data_loader.prepare_batches(back_data2, back_labels2,
                                            batch_size)
test_batches = data_loader.prepare_batches(test_data, test_labels, batch_size)

print 'train:', train_data.shape[1], 'samples', len(train_batches), 'batches'
print 'back:', back_data.shape[1], 'samples', len(back_batches), 'batches'
print 'test:', test_data.shape[1], 'samples', len(test_batches), 'batches'