class FaceNet(): def __init__(self): self.model = Chain(conv1=L.Convolution2D(3, 20, 3, 1, 1), conv2=L.Convolution2D(20, 20, 3, 1, 1), conv3=L.Convolution2D(20, 40, 3, 1, 1), conv4=L.Convolution2D(40, 40, 3, 1, 1), linear1=L.Linear(None, 100), linear2=L.Linear(100, 4)) self.optimizer = optimizers.Adam() self.optimizer.setup(self.model) def foward(self, x): out = self.model.conv1(x) out = F.elu(out) out = self.model.conv2(out) out = F.max_pooling_2d(out, 2) out = F.elu(out) out = self.model.conv3(out) out = F.elu(out) out = self.model.conv4(out) out = F.elu(out) out = F.average_pooling_2d(out, 6) out = F.dropout(out) out = self.model.linear1(out) out = F.elu(out) out = F.dropout(out) out = self.model.linear2(out) return out def predict(self, X, step=100): with chainer.using_config('train', False): with chainer.no_backprop_mode(): output = [] for i in range(0, len(X), step): x = Variable(X[i:i + step]) output.append(self.foward(x).data) return np.vstack(output) def score(self, X, Y, step=100): predicted = self.predict(X, step) score = F.r2_score(predicted, Y).data return score def fit(self, X, Y, batchsize=100, n_epoch=10): with chainer.using_config('train', True): learning_curve = [] for epoch in range(n_epoch): print('epoch ', epoch) index = np.random.permutation(len(X)) for i in range(0, len(index), batchsize): self.model.cleargrads() print(i) x = X[index[i:i + batchsize]] y = Y[index[i:i + batchsize]] #augment(x, y) x = Variable(x) y = Variable(y) output = self.foward(x) loss = F.mean_squared_error(y, output) loss.backward() learning_curve.append(float(loss.data)) self.optimizer.update() return learning_curve
# ====================================================================== # AutoEncoderの学習 print("******* Learning AutoEncoder *******") for epoch in range(1, n_epoch + 1): print('epoch', epoch) # 学習 perm = np.random.permutation(N) sum_loss = 0 for i in range(0, N, batchsize): x_batch = x_train[perm[i:i + batchsize]] y_batch = y_train[perm[i:i + batchsize]] model_ae.cleargrads() loss = forward(x_batch, y_batch) loss.backward() optimizer.update() train_loss.append(loss.data) sum_loss += float(loss.data) * batchsize print('\ttrain mean loss={0:.3f} '.format(sum_loss / N)) # 評価(テストデータの評価は学習のループ終了後に1回だけ実行してもよい) sum_loss = 0 for i in range(0, N_test, batchsize): x_batch = x_test[i:i + batchsize] y_batch = y_test[i:i + batchsize] loss = forward(x_batch, y_batch, train=False)
class SoftSeqKmeans(): def __init__(self, n_centroid, centroid_length, alphabet, use_gpu=True, tau=2): self.model = None self.optimizer = None self.centroid_length = centroid_length self.n_centroid = n_centroid self.tau = tau self.use_gpu = use_gpu self.alphabet = alphabet self.dict_alphabet = {alphabet[i]: i for i in range(len(alphabet))} self.max_length = None def fit(self, X, batchsize=100, n_iter=100, init_smooth=0.8, init_scale=0.1, lr=0.01, optimizer='Momentum'): L = np.array([len(seq) for seq in X]) self.max_length = np.max(L) init = X[np.where(L == self.centroid_length)[0]] init = np.unique(init) init = init[np.random.choice(len(init), self.n_centroid, replace=False)] print(init) init_seq = one_hot_encoding(init, self.dict_alphabet, self.max_length, init_smooth) init_seq[np.where(init_seq != 0)] = np.log( init_seq[np.where(init_seq != 0)]) noise = np.random.gumbel(0, 1, init_seq.shape) init_seq[np.where(init_seq != 0)] += noise[np.where(init_seq != 0)] init_seq *= init_scale init_seq = np.transpose( np.transpose(init_seq, (1, 0, 2)) - np.mean(init_seq, axis=1), (1, 0, 2)) self.model = Chain(kmeans=SoftKMeansLayer(self.n_centroid, self.centroid_length, init_W=init_seq, tau1=self.tau)) self.optimizer = { 'Adam': optimizers.Adam(lr), 'Momentum': optimizers.MomentumSGD(lr), 'SGD': optimizers.SGD(lr) }[optimizer] self.optimizer.setup(self.model) self.optimizer.add_hook(chainer.optimizer.WeightDecay(1e-6)) if self.use_gpu: self.model.to_gpu() with chainer.using_config('train', True): lcurve = [] for i in range(n_iter): self.model.cleargrads() indexes = np.random.choice(len(X), batchsize) x = X[indexes] x = one_hot_encoding(x, self.dict_alphabet, self.max_length) if self.use_gpu: x = cupy.array(x) loss = self.model.kmeans(x[indexes]) loss.backward() lcurve.append(float(loss.data)) self.optimizer.update() print(i, np.mean(lcurve[-10:])) return np.array(lcurve) def transform(self, X, batchsize=1000): labels = [] with chainer.using_config('train', False): with chainer.no_backprop_mode(): for i in range(0, len(X), batchsize): print(i) x = X[i:i + batchsize] x = one_hot_encoding(x, self.dict_alphabet, self.max_length) if self.use_gpu: x = cupy.array(x) loss, indexes = self.model.kmeans(x, inference=True) labels.append(indexes) return np.concatenate(labels) def get_centroid(self): return cupy.asnumpy(self.model.kmeans.get_centroid())
Nh = int(0.5 * M) #M: number of 2D space NNset = Chain(conv1=L.Convolution2D(1, 32, 5), conv2=L.Convolution2D(32, 32, 5), l1=L.Linear(13 * 13 * 32, Nh), l2=L.Linear(Nh, C)) def model(x): h = F.max_pooling_2d(L.relu(NNet.conv1(x)), 2) h = F.max_pooling_2d(F.relu(NNset.conv2(h)), 2) h = F.relu(NNset.l1(h)) y = NNset.l2(h) return y Tall = 100 mb = 100 train_loss = [] for i in range(Tall): index = np.random.permutation(range(Ntrain)) for j in range(0, Ntrain, mb): x = Variable(xtrain[index[j:j + mb]].reshape(mb, 1, 64, 64)) t = Variable(ytrain[index[j:j + mb]]) NNset.cleargrads() y = model(x) loss = F.softmax_cross_entropy(y, t) loss.backward() optimizer.update() train_loss.append(loss.data)
class SeqKmeans(): def __init__(self, n_centroid, centroid_length, alphabet, use_gpu=True, tau=2): self.model = None self.optimizer = None self.centroid_length = centroid_length self.n_centroid = n_centroid self.tau = tau self.use_gpu = use_gpu self.alphabet = alphabet self.dict_alphabet = {alphabet[i]: i for i in range(len(alphabet))} self.max_length = None def get_initialize_points(self, X, smooth, n_centroid): X = cupy.array(one_hot_encoding(X, self.dict_alphabet, self.max_length, smooth), dtype=np.float32) I = np.ravel(np.broadcast_to(np.arange(len(X)), (len(X), len(X))).T) J = np.ravel(np.broadcast_to(np.arange(len(X)), (len(X), len(X)))) d = edit_distance(X[I], X[J]).reshape((len(X), len(X))) d = cupy.asnumpy(d) out = [random.randint(0, len(X) - 1)] for i in range(n_centroid - 1): min_d = np.min(d[:, out], axis=1) new_point = np.random.choice(len(min_d), 1, p=min_d / np.sum(min_d)) out.append(new_point) return cupy.asnumpy(X)[out, :, :] def fit(self, X, mini_batch=1000, subsample_batch=100, n_iter=100, step_per_iter=10, init_smooth=0.8, init_scale=0.1, lr=0.1, optimizer='SGD'): L = np.array([len(seq) for seq in X]) self.max_length = np.max(L) init = X[np.where(L == self.centroid_length)[0]] init = np.unique(init) if len(init) > self.n_centroid * 100: init = init[np.random.choice(len(init), self.n_centroid * 100, replace=False)] init_seq = self.get_initialize_points(init, init_smooth, self.n_centroid) """init_seq = one_hot_encoding(init, self.dict_alphabet, self.max_length, init_smooth) init_seq[np.where(init_seq != 0)] = np.log(init_seq[np.where(init_seq != 0)]) noise = np.random.gumbel(0, 1, init_seq.shape) init_seq[np.where(init_seq != 0)] += noise[np.where(init_seq != 0)] init_seq *= init_scale""" init_seq = np.transpose( np.transpose(init_seq, (1, 0, 2)) - np.mean(init_seq, axis=1), (1, 0, 2)) self.model = Chain(kmeans=KMeansLayer(self.n_centroid, self.centroid_length, init_W=init_seq, tau=self.tau)) self.optimizer = { 'Adam': optimizers.Adam(lr), 'Momentum': optimizers.MomentumSGD(lr), 'SGD': optimizers.SGD(lr) }[optimizer] self.optimizer.setup(self.model) self.optimizer.add_hook(chainer.optimizer.WeightDecay(1e-6)) if self.use_gpu: self.model.to_gpu() with chainer.using_config('train', True): lcurve = [] for i in range(n_iter): self.model.cleargrads() indexes = np.random.choice(len(X), mini_batch) x = X[indexes] x = one_hot_encoding(x, self.dict_alphabet, self.max_length) if self.use_gpu: x = cupy.array(x) with chainer.no_backprop_mode(): _, labels = self.model.kmeans(x, inference=True) labels_indexes = [ np.where(labels == u)[0] for u in np.unique(labels) ] for j in range(step_per_iter): indexes = [] for row in labels_indexes: indexes += np.random.choice( row, subsample_batch // len(labels_indexes)).tolist() loss = self.model.kmeans(x[indexes], indexes=labels[indexes]) loss = F.mean(loss) loss.backward() lcurve.append(float(loss.data)) self.optimizer.update() print(i, j, np.mean(lcurve[-10:])) return np.array(lcurve) def transform(self, X, batchsize=1000): labels = [] with chainer.using_config('train', False): with chainer.no_backprop_mode(): for i in range(0, len(X), batchsize): print(i) x = X[i:i + batchsize] x = one_hot_encoding(x, self.dict_alphabet, self.max_length) if self.use_gpu: x = cupy.array(x) loss, indexes = self.model.kmeans(x, inference=True) labels.append(indexes) return np.concatenate(labels) def get_centroid(self): return cupy.asnumpy(self.model.kmeans.get_centroid())