class TestFunctionSet(TestCase): def setUp(self): self.fs = FunctionSet( a = Linear(3, 2), b = Linear(3, 2) ) def test_get_sorted_funcs(self): self.assertItemsEqual([k for (k, v) in self.fs._get_sorted_funcs()], ('a', 'b')) def check_equal_fs(self, fs1, fs2): self.assertTrue((fs1.a.W == fs2.a.W).all()) self.assertTrue((fs1.a.b == fs2.a.b).all()) self.assertTrue((fs1.b.W == fs2.b.W).all()) self.assertTrue((fs1.b.b == fs2.b.b).all()) def test_pickle_cpu(self): s = pickle.dumps(self.fs) fs2 = pickle.loads(s) self.check_equal_fs(self.fs, fs2) @attr.gpu def test_pickle_gpu(self): self.fs.to_gpu() s = pickle.dumps(self.fs) fs2 = pickle.loads(s) self.fs.to_cpu() fs2.to_cpu() self.check_equal_fs(self.fs, fs2)
class TestFunctionSet(TestCase): def setUp(self): self.fs = FunctionSet(a=Linear(3, 2), b=Linear(3, 2)) def test_get_sorted_funcs(self): assertCountEqual(self, [k for (k, v) in self.fs._get_sorted_funcs()], ('a', 'b')) def check_equal_fs(self, fs1, fs2): self.assertTrue((fs1.a.W == fs2.a.W).all()) self.assertTrue((fs1.a.b == fs2.a.b).all()) self.assertTrue((fs1.b.W == fs2.b.W).all()) self.assertTrue((fs1.b.b == fs2.b.b).all()) def test_pickle_cpu(self): s = pickle.dumps(self.fs) fs2 = pickle.loads(s) self.check_equal_fs(self.fs, fs2) @attr.gpu def test_pickle_gpu(self): self.fs.to_gpu() s = pickle.dumps(self.fs) fs2 = pickle.loads(s) self.fs.to_cpu() fs2.to_cpu() self.check_equal_fs(self.fs, fs2)
class TestFunctionSet(TestCase): def setUp(self): self.fs = FunctionSet( a = Linear(3, 2), b = Linear(3, 2) ) def check_equal_fs(self, fs1, fs2): self.assertTrue((fs1.a.W == fs2.a.W).all()) self.assertTrue((fs1.a.b == fs2.a.b).all()) self.assertTrue((fs1.b.W == fs2.b.W).all()) self.assertTrue((fs1.b.b == fs2.b.b).all()) def test_pickle_cpu(self): s = pickle.dumps(self.fs) fs2 = pickle.loads(s) self.check_equal_fs(self.fs, fs2) def test_pickle_gpu(self): self.fs.to_gpu() s = pickle.dumps(self.fs) fs2 = pickle.loads(s) self.fs.to_cpu() fs2.to_cpu() self.check_equal_fs(self.fs, fs2)
class TestNestedFunctionSet(TestCase): def setUp(self): self.fs1 = FunctionSet( a = MockFunction((1, 2))) self.fs2 = FunctionSet( fs1 = self.fs1, b = MockFunction((3, 4))) def test_get_sorted_funcs(self): self.assertItemsEqual([k for (k, v) in self.fs2._get_sorted_funcs()], ('b', 'fs1')) def test_collect_parameters(self): p_b = np.zeros((3, 4)).astype(np.float32) p_a = np.zeros((1, 2)).astype(np.float32) gp_b = np.ones((3, 4)).astype(np.float32) gp_a = np.ones((1, 2)).astype(np.float32) actual = self.fs2.collect_parameters() self.assertTrue(map(len, actual) == [2, 2]) self.assertTrue((actual[0][0] == p_b).all()) self.assertTrue((actual[0][1] == p_a).all()) self.assertTrue((actual[1][0] == gp_b).all()) self.assertTrue((actual[1][1] == gp_a).all()) def test_pickle_cpu(self): fs2_serialized = pickle.dumps(self.fs2) fs2_loaded = pickle.loads(fs2_serialized) self.assertTrue((self.fs2.b.p == fs2_loaded.b.p).all()) self.assertTrue((self.fs2.fs1.a.p == fs2_loaded.fs1.a.p).all()) @attr.gpu def test_pickle_gpu(self): self.fs2.to_gpu() fs2_serialized = pickle.dumps(self.fs2) fs2_loaded = pickle.loads(fs2_serialized) fs2_loaded.to_cpu() self.fs2.to_cpu() self.assertTrue((self.fs2.b.p == fs2_loaded.b.p).all()) self.assertTrue((self.fs2.fs1.a.p == fs2_loaded.fs1.a.p).all())
class TestNestedFunctionSet(TestCase): def setUp(self): self.fs1 = FunctionSet(a=MockFunction((1, 2))) self.fs2 = FunctionSet(fs1=self.fs1, b=MockFunction((3, 4))) def test_get_sorted_funcs(self): assertCountEqual(self, [k for (k, v) in self.fs2._get_sorted_funcs()], ('b', 'fs1')) def test_collect_parameters(self): p_b = np.zeros((3, 4)).astype(np.float32) p_a = np.zeros((1, 2)).astype(np.float32) gp_b = np.ones((3, 4)).astype(np.float32) gp_a = np.ones((1, 2)).astype(np.float32) actual = self.fs2.collect_parameters() self.assertTrue(list(map(len, actual)) == [2, 2]) self.assertTrue((actual[0][0] == p_b).all()) self.assertTrue((actual[0][1] == p_a).all()) self.assertTrue((actual[1][0] == gp_b).all()) self.assertTrue((actual[1][1] == gp_a).all()) def test_pickle_cpu(self): fs2_serialized = pickle.dumps(self.fs2) fs2_loaded = pickle.loads(fs2_serialized) self.assertTrue((self.fs2.b.p == fs2_loaded.b.p).all()) self.assertTrue((self.fs2.fs1.a.p == fs2_loaded.fs1.a.p).all()) @attr.gpu def test_pickle_gpu(self): self.fs2.to_gpu() fs2_serialized = pickle.dumps(self.fs2) fs2_loaded = pickle.loads(fs2_serialized) fs2_loaded.to_cpu() self.fs2.to_cpu() self.assertTrue((self.fs2.b.p == fs2_loaded.b.p).all()) self.assertTrue((self.fs2.fs1.a.p == fs2_loaded.fs1.a.p).all())
max_accuracy, max_accuracy_epoch) log_string = str(l_current_parameters) + ', ' + str(max_accuracy) logging.info(log_string) parameter_search4.history_x.append(l_current_parameters) parameter_search4.history_y.append(max_accuracy) parameter_search4.clf.fit(parameter_search4.history_x, parameter_search4.history_y) if max_accuracy > best_accuracy: best_accuracy = max_accuracy l_best_params = l_current_parameters except KeyboardInterrupt: evaluate_results(x_test, y_test, N_test, batchsize, max_len) print '\n Max accuracy was {} in epoch {}'.format( max_accuracy, max_accuracy_epoch) sys.exit(0) except Exception as e: print '\n' traceback.print_exc() print e print '\n' continue finally: model.to_cpu() pickle.dump( (model, max_pool_window_1, max_pool_stride_1, avg_pool_window_2, avg_pool_stride_2, max_pool_window_2, max_pool_stride_2), open('./models/' + str(datetime.datetime.now()), 'wb'))
class DA(object): def __init__( self, rng, data, n_inputs=784, n_hidden=784, corruption_level=0.3, optimizer=optimizers.AdaDelta, gpu=-1 ): """ Denoising AutoEncoder data: data for train n_inputs: a number of units of input layer and output layer n_hidden: a number of units of hidden layer corruption_level: a ratio of masking noise """ self.model = FunctionSet( encoder=F.Linear(n_inputs, n_hidden), decoder=F.Linear(n_hidden, n_inputs) ) if gpu >= 0: self.model.to_gpu() self.gpu = gpu self.x_train, self.x_test = data self.n_train = len(self.x_train) self.n_test = len(self.x_test) self.n_inputs = n_inputs self.n_hidden = n_hidden self.optimizer = optimizer() self.optimizer.setup(self.model) self.corruption_level = corruption_level self.rng = rng self.train_losses = [] self.test_losses = [] @property def xp(self): return cuda.cupy if self.gpu >= 0 else numpy def forward(self, x_data, train=True): y_data = x_data # add noise (masking noise) x_data = self.get_corrupted_inputs(x_data, train=train) x, t = Variable(x_data), Variable(y_data) # encode h = self.encode(x) # decode y = self.decode(h) # compute loss loss = F.mean_squared_error(y, t) return loss def compute_hidden(self, x_data): # x_data = self.xp.asarray(x_data) x = Variable(x_data) h = self.encode(x) # return cuda.to_cpu(h.data) return h.data def predict(self, x_data): x = Variable(x_data) # encode h = self.encode(x) # decode y = self.decode(h) return cuda.to_cpu(y.data) def encode(self, x): return F.relu(self.model.encoder(x)) def decode(self, h): return F.relu(self.model.decoder(h)) def encoder(self): initialW = self.model.encoder.W initial_bias = self.model.encoder.b return F.Linear(self.n_inputs, self.n_hidden, initialW=initialW, initial_bias=initial_bias) def decoder(self): return self.model.decoder def to_cpu(self): self.model.to_cpu() self.xp = np def to_gpu(self): if self.gpu < 0: logging.error("something wrong") raise self.model.to_gpu() self.xp = cuda.cupy # masking noise def get_corrupted_inputs(self, x_data, train=True): if train and self.corruption_level != 0.0: mask = self.rng.binomial(size=x_data.shape, n=1, p=1.0 - self.corruption_level) mask = mask.astype(numpy.float32) mask = self.xp.asarray(mask) ret = mask * x_data # return self.xp.asarray(ret.astype(numpy.float32)) return ret else: return x_data def train_and_test(self, n_epoch=5, batchsize=100): for epoch in xrange(1, n_epoch+1): logging.info('epoch: {}'.format(epoch)) perm = self.rng.permutation(self.n_train) sum_loss = 0 for i in xrange(0, self.n_train, batchsize): x_batch = self.xp.asarray(self.x_train[perm[i:i+batchsize]]) real_batchsize = len(x_batch) self.optimizer.zero_grads() loss = self.forward(x_batch) loss.backward() self.optimizer.update() sum_loss += float(loss.data) * real_batchsize logging.info( 'train mean loss={}'.format(sum_loss / self.n_train) ) # evaluation sum_loss = 0 for i in xrange(0, self.n_test, batchsize): x_batch = self.xp.asarray(self.x_test[i:i+batchsize]) real_batchsize = len(x_batch) loss = self.forward(x_batch, train=False) sum_loss += float(loss.data) * real_batchsize logging.info( 'test mean loss={}'.format(sum_loss / self.n_test) )
class Inception(Function): """Inception module of GoogLeNet. It applies four different functions to the input array and concatenates their outputs along the channel dimension. Three of them are 2D convolutions of sizes 1x1, 3x3 and 5x5. Convolution paths of 3x3 and 5x5 sizes have 1x1 convolutions (called projections) ahead of them. The other path consists of 1x1 convolution (projection) and 3x3 max pooling. The output array has the same spatial size as the input. In order to satisfy this, Inception module uses appropriate padding for each convolution and pooling. See: `Going Deeper with Convolutions <http://arxiv.org/abs/1409.4842>`_. Args: in_channels (int): Number of channels of input arrays. out1 (int): Output size of 1x1 convolution path. proj3 (int): Projection size of 3x3 convolution path. out3 (int): Output size of 3x3 convolution path. proj5 (int): Projection size of 5x5 convolution path. out5 (int): Output size of 5x5 convolution path. proj_pool (int): Projection size of max pooling path. Returns: Variable: Output variable. Its array has the same spatial size and the same minibatch size as the input array. The channel dimension has size ``out1 + out3 + out5 + proj_pool``. .. note:: This function inserts the full computation graph of the Inception module behind the input array. This function itself is not inserted into the computation graph. """ def __init__(self, in_channels, out1, proj3, out3, proj5, out5, proj_pool): self.f = FunctionSet( conv1 = Convolution2D(in_channels, out1, 1), proj3 = Convolution2D(in_channels, proj3, 1), conv3 = Convolution2D(proj3, out3, 3, pad=1), proj5 = Convolution2D(in_channels, proj5, 1), conv5 = Convolution2D(proj5, out5, 5, pad=2), projp = Convolution2D(in_channels, proj_pool, 1), ) def forward(self, x): self.x = Variable(x[0]) out1 = self.f.conv1(self.x) out3 = self.f.conv3(relu(self.f.proj3(self.x))) out5 = self.f.conv5(relu(self.f.proj5(self.x))) pool = self.f.projp(max_pooling_2d(self.x, 3, stride=1, pad=1)) self.y = relu(concat((out1, out3, out5, pool), axis=1)) return self.y.data, def backward(self, x, gy): self.y.grad = gy[0] self.y.backward() return self.x.grad, def to_gpu(self, device=None): return self.f.to_gpu(device) def to_cpu(self): return self.f.to_cpu() @property def parameters(self): return self.f.parameters @parameters.setter def parameters(self, params): self.f.parameters = params @property def gradients(self): return self.f.gradients @gradients.setter def gradients(self, grads): self.f.gradients = grads
print '\n Max accuracy was {} in epoch {}'.format(max_accuracy, max_accuracy_epoch) log_string = str(l_current_parameters) + ', ' + str(max_accuracy) logging.info(log_string) parameter_search4.history_x.append(l_current_parameters) parameter_search4.history_y.append(max_accuracy) parameter_search4.clf.fit(parameter_search4.history_x, parameter_search4.history_y) if max_accuracy > best_accuracy: best_accuracy = max_accuracy l_best_params = l_current_parameters except KeyboardInterrupt: evaluate_results(x_test, y_test, N_test, batchsize, max_len) print '\n Max accuracy was {} in epoch {}'.format(max_accuracy, max_accuracy_epoch) sys.exit(0) except Exception as e: print '\n' traceback.print_exc() print e print '\n' continue finally: model.to_cpu() pickle.dump((model, max_pool_window_1, max_pool_stride_1, avg_pool_window_2, avg_pool_stride_2, max_pool_window_2, max_pool_stride_2), open('./models/' + str(datetime.datetime.now()), 'wb'))
class Inception(Function): """Inception module of GoogLeNet. It applies four different functions to the input array and concatenates their outputs along the channel dimension. Three of them are 2D convolutions of sizes 1x1, 3x3 and 5x5. Convolution paths of 3x3 and 5x5 sizes have 1x1 convolutions (called projections) ahead of them. The other path consists of 1x1 convolution (projection) and 3x3 max pooling. The output array has the same spatial size as the input. In order to satisfy this, Inception module uses appropriate padding for each convolution and pooling. See: `Going Deeper with Convolutions <http://arxiv.org/abs/1409.4842>`_. Args: in_channels (int): Number of channels of input arrays. out1 (int): Output size of 1x1 convolution path. proj3 (int): Projection size of 3x3 convolution path. out3 (int): Output size of 3x3 convolution path. proj5 (int): Projection size of 5x5 convolution path. out5 (int): Output size of 5x5 convolution path. proj_pool (int): Projection size of max pooling path. Returns: Variable: Output variable. Its array has the same spatial size and the same minibatch size as the input array. The channel dimension has size ``out1 + out3 + out5 + proj_pool``. .. note:: This function inserts the full computation graph of the Inception module behind the input array. This function itself is not inserted into the computation graph. """ def __init__(self, in_channels, out1, proj3, out3, proj5, out5, proj_pool): self.f = FunctionSet( conv1=Convolution2D(in_channels, out1, 1), proj3=Convolution2D(in_channels, proj3, 1), conv3=Convolution2D(proj3, out3, 3, pad=1), proj5=Convolution2D(in_channels, proj5, 1), conv5=Convolution2D(proj5, out5, 5, pad=2), projp=Convolution2D(in_channels, proj_pool, 1), ) def forward(self, x): self.x = Variable(x[0]) out1 = self.f.conv1(self.x) out3 = self.f.conv3(relu(self.f.proj3(self.x))) out5 = self.f.conv5(relu(self.f.proj5(self.x))) pool = self.f.projp(max_pooling_2d(self.x, 3, stride=1, pad=1)) self.y = relu(concat((out1, out3, out5, pool), axis=1)) return self.y.data, def backward(self, x, gy): self.y.grad = gy[0] self.y.backward() return self.x.grad, def to_gpu(self, device=None): return self.f.to_gpu(device) def to_cpu(self): return self.f.to_cpu() @property def parameters(self): return self.f.parameters @parameters.setter def parameters(self, params): self.f.parameters = params @property def gradients(self): return self.f.gradients @gradients.setter def gradients(self, grads): self.f.gradients = grads
class GaussianEmbedding(object): def __init__(self): pass def _init_parameters(self, args): if not hasattr(self, '_model'): print('\x1b[4;31mERROR:\x1b[0;31m model not defined!', file=sys.stderr) sys.exit() self._window = args.window if args.gpu >= 0: self._use_gpu = True self._model.to_gpu() else: self._use_gpu = False self._model.to_cpu() self._init_optimizer() def _init_optimizer(self): self._opt = optimizers.Adam() self._opt.setup(self._model) def _forward(self, dataset, position): d = xp.asarray(dataset[position], dtype=xp.int32) t = Variable(d) w = numpy.random.randint(self._window - 1) + 1 loss = None for offset in range(-w, w + 1): if offset == 0: continue x = Variable(xp.asarray(dataset[position + offset])) mean_x, cov_x = self._model.embed(x) loss_i = self._model.loss(mean_x, cov_x, t) loss = loss_i if loss is None else loss + loss_i return loss def _regularize_parameters(self): if self._use_gpu: # self._model.embed.regularize_gpu() self._model.loss.regularize_gpu() else: # self._model.embed.regularize_cpu() self._model.loss.regularize_cpu() @staticmethod def make_model(args, count_list): self = GaussianEmbedding() if args.covariance == 'diagonal': self._covariance_type = CovarianceType.diagonal elif args.covariance == 'spherical': self._covariance_type = CovarianceType.spherical cov = self._covariance_type self._model = FunctionSet( embed=EmbedIDGaussian(len(count_list), args.size, cov), loss=PairwiseSampling(args.size, count_list, cov), ) self._init_parameters(args) return self # @classmethod # def from_pickle(cls, model_file): # self = GaussianEmbedding() # with open(model_file, 'rb') as fp: # self._model = pickle.load(fp) # return self def train(self, dataset, position): self._opt.zero_grads() loss = self._forward(dataset, position) loss.backward() self._opt.update() self._regularize_parameters() return loss def dump_model(self, model_name, index_word, word_index): self._model.to_cpu() obj = (self._model, index_word, word_index) with open(model_name, 'wb') as fp: pickle.dump(obj, fp)
CM = confusion_matrix(shoken_true, shoken_pred) Conf_matrix_list.append(CM) for d in xrange(3): Sensitivity_matrix[c_f, epoch, d] = 1.0 * CM[d, d] / (CM[d, 0] + CM[d, 1] + CM[d, 2]) new = np.delete(CM, d, 0) TN = np.delete(new, d, 1) Specificity_matrix[c_f, epoch, d] = 1.0 * np.sum(TN) / np.sum(new) print 'Test shoken accuracy: %.4f' % (shoken_acc) c_f += 1 #model output model.to_cpu() #モデルをcpuに移す # Save model npz形式で書き出し save_model_path = os.path.join(save_dir, 'model_5disease_MEG.npz') serializers.save_npz(save_model_path, model) #所見ごとの判定結果出力 log_txt += '-----------acc for test shoken----------\n' for a in xrange(FOLD_N): log_txt += '[' log_txt += 'Fold {0} '.format(a) for b in xrange(EPOCH_N): log_txt += '{0:f},'.format(shoken_acc_matrix[a, b]) log_txt += ']\n' log_txt += '-----------sensitivity for HV----------\n' for a in xrange(FOLD_N):
class CNN: file_names = None def __init__(self): self.optimizer = optimizers.Adam() self.model_name = "cnn_nantyara" if os.path.exists(self.model_name): self.load_model() else: self.crete_model() self.optimizer.setup(self.model.collect_parameters()) def crete_model(self): self.model = FunctionSet( conv1=F.Convolution2D(3, 32, 3), bn1=F.BatchNormalization(32), conv2=F.Convolution2D(32, 64, 3, pad=1), bn2=F.BatchNormalization(64), conv3=F.Convolution2D(64, 64, 3, pad=1), fl4=F.Linear(1024, 256), fl5=F.Linear(256, 2), ) def get_data(self, ifpath, image_categories, reshape_size=(3, 32, 32)): x = [] x_apd = x.append y = [] y_apd = y.append for i_category, category in enumerate(image_categories): for i_num in xrange(1, self.get_num_of_images(ifpath, category)): image = np.array(Image.open(ifpath + "/" + category + str(i_num) + ".jpeg"), dtype=np.float32).reshape( reshape_size ) x_apd(image) y_apd(i_category) self.N = len(x) return x, np.array(y, dtype=np.int32) def get_data_for_predict(self, ifpath, image_name, reshape_size=(3, 32, 32)): image = np.array(Image.open(ifpath + "/" + image_name), dtype=np.float32) image = cv2.resize(image, (reshape_size[1], reshape_size[2])) # print image.shape image = image.reshape(reshape_size) return [image] def forward(self, x_data, y_data, train=True): x, t = Variable(np.array(x_data)), Variable(y_data) h1 = F.max_pooling_2d(F.relu(self.model.bn1(self.model.conv1(x))), 2) h2 = F.max_pooling_2d(F.relu(self.model.bn2(self.model.conv2(h1))), 2) h3 = F.max_pooling_2d(F.relu(self.model.conv3(h2)), 2) h4 = F.dropout(F.relu(self.model.fl4(h3)), train=train) y = self.model.fl5(h4) if train: return F.softmax_cross_entropy(y, t), F.accuracy(y, t) else: res = [d for data in F.softmax(y).data for d in data] # print res return np.array(res).argmax() if len([r for r in res if r > 0.5]) > 0 else "unknown" def get_num_of_images(self, path, image_name): cmd = "ls images|grep %s|wc -l" % (image_name) return int(subprocess.check_output(cmd, shell=True)) def dump_model(self): self.model.to_cpu() with open(self.model_name, "wb") as f: pickle.dump(self.model, f, -1) def load_model(self): with open(self.model_name, "rb") as f: self.model = pickle.load(f) def fit(self, x_train, y_train, epoch=20, batchsize=100): for epoch in xrange(1, epoch + 1): print "epoch", epoch # training sum_accuracy = 0 sum_loss = 0 for i in xrange(0, self.N, batchsize): self.optimizer.zero_grads() loss, acc = self.forward(x_train[i : i + batchsize], y_train[i : i + batchsize]) loss.backward() self.optimizer.update() print "train mean loss=%s, accuracy =%s" % (str(loss.data), str(acc.data)) self.dump_model() def predict(self, x): y = self.forward(x, np.zeros(1, dtype=np.int32), train=False) sys.stdout.write(str(self.file_names[y]) if y != "unknonw" else "unknonw")
sum_loss += float(cuda.to_cpu(loss.data)) * batchsize sum_accuracy += float(cuda.to_cpu(acc.data)) * batchsize # テストデータでの誤差と正解精度を表示 print 'test mean loss={}, accuracy={}'.format(sum_loss / N_test, sum_accuracy / N_test) test_loss.append(sum_loss / N_test) test_acc.append(sum_accuracy / N_test) sys.stdout.flush() # optimizer.lr *= DECAY_FACTOR # model と optimizer を保存する print('save the model') if args.gpu >= 0: model.to_cpu() serializers.save_hdf5('model/fine-tuning-mlp.model', model) if args.gpu >= 0: model.to_gpu() print('save the optimizer') serializers.save_hdf5('model/fine-tuning-mlp.state', optimizer) # 精度と誤差をグラフ描画 if args.plot: plt.figure(figsize=(8, 6)) plt.subplot(1, 2, 1) plt.ylim(0., 1.) plt.plot(range(len(train_acc)), train_acc) plt.plot(range(len(test_acc)), test_acc) plt.legend(['train_acc', 'test_acc'], loc=4) plt.title('Accuracy of cnn recognition.')
class CommentNetwork: def __init__(self, n, saveFile, opt, lossFunc, mod=None, use_gpu=False, numDirectIterations=1, defaultOutputTruncation=10): self.n=n self.saveFile=saveFile self.use_gpu=use_gpu self.lossFunc=lossFunc self.numDirectIterations=numDirectIterations self.defaultOutputTruncation=defaultOutputTruncation if mod==None: #construct network model self.model= FunctionSet( x_to_h = F.Linear(7, n), h_to_h = F.Linear(n, n), h_to_y = F.Linear(n, 7) ) else: self.model=mod if self.use_gpu: self.model.to_gpu() else: self.model.to_cpu() self.optimizer = opt self.optimizer.setup(self.model) #constants self.null_byte=np.array([[0]*7], dtype=np.float32) if self.use_gpu: self.null_byte=cuda.to_gpu(self.null_byte) self.null_byte=Variable(self.null_byte) def forward_one_step(self, h, x, computeOutput=True): h=F.sigmoid(self.model.x_to_h(x) + self.model.h_to_h(h)) if computeOutput: y=F.sigmoid(self.model.h_to_y(h)) return h, y else: return h def forward(self, input_string, output_string, truncateSize=None, volatile=False): if truncateSize==None: truncateSize=self.defaultOutputTruncation #feed variable in, ignoring output until model has whole input string h=np.zeros((1,self.n),dtype=np.float32) if self.use_gpu: h=cuda.to_gpu(h) h=Variable(h, volatile=volatile) for c in input_string: bits=np.array([[bool(ord(c)&(2**i)) for i in range(7)]], dtype=np.float32) if self.use_gpu: bits=cuda.to_gpu(bits) bits=Variable(bits, volatile=volatile) #8 bits, never all 0 for ascii h=self.forward_one_step(h, bits, computeOutput=False) #prep for training self.optimizer.zero_grads() y='' #output string nullEnd=False loss=0 def yc_translation(yc, y, nullEnd, truncateSize): yc=sum([bool(round(bit))*(2**i_bit) for i_bit, bit in enumerate(cuda.to_cpu(yc.data[0]))]) #translate to int if not yc: #null byte signifies end of sequence nullEnd=True if not nullEnd: y+=chr(yc) #translate to character truncateSize-=1 return y, nullEnd, truncateSize #Read output by prompting with null bytes.; train with training output for c in output_string: bits=np.array([[bool(ord(c)&(2**i)) for i in range(7)]], dtype=np.float32) if self.use_gpu: bits=cuda.to_gpu(bits) bits=Variable(bits, volatile=volatile) h, yc = self.forward_one_step(h, self.null_byte) loss+=self.lossFunc(yc, bits) y, nullEnd, truncateSize = yc_translation(yc, y, nullEnd, truncateSize) #reinforce null byte as end of sequence h, yc = self.forward_one_step(h, self.null_byte) loss+=self.lossFunc(yc, self.null_byte) y, nullEnd, truncateSize = yc_translation(yc, y, nullEnd, truncateSize) #continue reading out as long as network does not terminate and we have not hit TruncateSize while not nullEnd and truncateSize>0: h, yc = self.forward_one_step(h, self.null_byte) y, nullEnd, truncateSize = yc_translation(yc, y, nullEnd, truncateSize) #Train loss.backward() self.optimizer.update() return y, nullEnd #nullEnd true if netowrk terminated output sequence. False if output sequence truncated. def trainTree(self, tree, maxCommentLength=float('inf')): #DFS training if 'children' in tree: allPass=True for child in tree['children']: self.trainTree(child, maxCommentLength) prompt=tree['body'] trainResponse=child['body'] if prompt!='[deleted]' and trainResponse!='[deleted]' and prompt and trainResponse and len(prompt)<=maxCommentLength and len(trainResponse)<=maxCommentLength: for i in range(self.numDirectIterations): givenResponse, nullEnd=self.forward(prompt, trainResponse) print '<#'+str(i)+'--prompt--'+str(len(prompt))+'chars-->\n', repr(prompt), '\n<--trainResponse--'+str(len(trainResponse))+'chars-->\n', repr(trainResponse), '\n<--givenResponse--'+str(len(givenResponse))+'chars'+('' if nullEnd else ', truncated')+'-->\n', repr(givenResponse)+'\n' if givenResponse==trainResponse: break else: allPass=False return allPass # loop over lines in a file identifying if they contain a tree after parsing the json def trainFile(self, openFile, maxCommentLength=float('inf')): allPass=True for i, treeText in enumerate(openFile): #throw away whitespace if treeText.strip(): #print fileName, treeText tree=json.loads(treeText.strip()) #it's a tree, let's train if 'children' in tree: print 'training #'+str(i)+' '+openFile.name allPass&=self.trainTree(tree, maxCommentLength) return allPass def saveModel(self): print 'Stopped computation, saving model. Please wait...' f=open(self.saveFile,'w') pickle.dump(self.model, f) f.close() print 'Saved model' def sig_exit(self, _1, _2): self.saveModel() exit()
sum_acc = 0 for i in range(0, N_test, batch_size): x_batch = xp.asarray(x_test[i: i + batch_size]) y_batch = xp.asarray(y_test[i: i + batch_size]) acc = forward(x_batch, y_batch, train=False) sum_acc += float(acc.data) * len(y_batch) epoch_acc = sum_acc / N_test log.info("test accuracy: {0}".format(epoch_acc)) fp_acc.write("{0}\t{1}\n".format(epoch, epoch_acc)) fp_acc.flush() # モデルパラメータの途中保存 if check_interval > 0 and (epoch % check_interval) == 0: check_file = "result/{0:0>3}_".format(epoch) + args.model log.info("Checkpoint : {0}".format(check_file)) param = model.to_cpu().parameters np.save(check_file, param) end_time = time.clock() log.info("Trainging time: {0} [s]".format(end_time - start_time)) fp_acc.close() fp_loss.close() # モデルのパラメータを保存 if n_epoch > 0: log.info("Save model parameters : {0}".format(param_file)) param = model.to_cpu().parameters np.save(param_file, param) if plot_model == 1: # 重みを表示 plot_mlp_weight(model, "l1", shape=(28, 28))