class Model1: def __init__(self, model): if isinstance(model, tuple): input_dims, n_units, output_dims = model self.model = FunctionSet(l1=F.Linear(input_dims, n_units), l2=F.Linear(n_units, n_units), l3=F.Linear(n_units, output_dims)) else: self.model = model def __call__(self): return self.model # Neural net architecture # ニューラルネットの構造 def forward(self, x_data, y_data, train=True): x = Variable(x_data) if not y_data is None: t = Variable(y_data) h1 = F.dropout(F.relu(self.model.l1(x)), train=train) h2 = F.dropout(F.relu(self.model.l2(h1)), train=train) y = self.model.l3(h2) if not y_data is None: # 多クラス分類なので誤差関数としてソフトマックス関数の # 交差エントロピー関数を用いて、誤差を導出 return F.softmax_cross_entropy(y, t), F.accuracy(y, t), y else: return y def evaluate(self, x_data): return self.forward(x_data, None, train=False)
class TestFunctionSet(TestCase): def setUp(self): self.fs = FunctionSet( a = Linear(3, 2), b = Linear(3, 2) ) def check_equal_fs(self, fs1, fs2): self.assertTrue((fs1.a.W == fs2.a.W).all()) self.assertTrue((fs1.a.b == fs2.a.b).all()) self.assertTrue((fs1.b.W == fs2.b.W).all()) self.assertTrue((fs1.b.b == fs2.b.b).all()) def test_pickle_cpu(self): s = pickle.dumps(self.fs) fs2 = pickle.loads(s) self.check_equal_fs(self.fs, fs2) def test_pickle_gpu(self): self.fs.to_gpu() s = pickle.dumps(self.fs) fs2 = pickle.loads(s) self.fs.to_cpu() fs2.to_cpu() self.check_equal_fs(self.fs, fs2)
def main(): if P.use_mean_var: conv6_output = 126 else: conv6_output = 128 if P.model_name is None: model = FunctionSet( conv1 = F.Convolution2D( 1, 128, 3, stride=1), conv2 = F.Convolution2D(128, 128, 3, stride=1), conv3 = F.Convolution2D(128, 128, 3, stride=1), conv4 = F.Convolution2D(128, 128, 3, stride=1), conv5 = F.Convolution2D(128, 128, 3, stride=1), conv6 = F.Convolution2D(128, conv6_output, 3, stride=1), conv7 = F.Convolution2D(128, 128, 1, stride=1), conv8 = F.Convolution2D(128, 1, 1, stride=1) ) if P.gpu >= 0: cuda.init(P.gpu) model.to_gpu() else: if P.gpu >= 0: cuda.init(P.gpu) model = pickle.load(open(os.path.join(P.model_dir, P.model_name), 'rb')) optimizer = optimizers.MomentumSGD(lr=P.lr, momentum=P.momentum) optimizer.setup(model.collect_parameters()) train(model, optimizer) return
def main(args): def forward(x_data, y_data): x = Variable(x_data) t = Variable(y_data) h1 = F.relu(model.l1(x)) # activation function h2 = F.relu(model.l2(h1)) # ReLU does not have parameters to optimize y = model.l3(h2) # the loss function of softmax regression return F.softmax_cross_entropy(y, t), F.accuracy(y, t) # current accuracy def evaluate(): sum_loss, sum_accuracy = 0, 0 for i in xrange(0, 10000, batchsize): x_batch = x_test[i:i+batchsize] y_batch = y_test[i:i+batchsize] loss, accuracy = forward(x_batch, y_batch) sum_loss += loss.data * batchsize sum_accuracy += accuracy.data * batchsize mean_loss = sum_loss / 10000 mean_accuracy = sum_accuracy / 10000 print(mean_loss[0], mean_accuracy) return global debug, verbose debug = args.debug if debug == True: verbose = True else: verbose = args.verbose mnist = fetch_mldata('MNIST original') x_all = mnist.data.astype(np.float32) / 255 # Scaling features to [0, 1] y_all = mnist.target.astype(np.int32) x_train, x_test = np.split(x_all, [60000]) # 60000 for training, 10000 for test y_train, y_test = np.split(y_all, [60000]) # Simple three layer rectfier network model = FunctionSet( l1 = F.Linear(784, 100), # 784 pixels -> 100 units l2 = F.Linear(100, 100), # 100 units -> 100 units l3 = F.Linear(100, 10), # 100 units -> 10 digits ) optimizer = optimizers.SGD() optimizer.setup(model.collect_parameters()) batchsize = 100 for epoch in xrange(20): if verbose: logger.info('epoch: {}'.format(epoch)) indexes = np.random.permutation(60000) for i in xrange(0, 60000, batchsize): x_batch = x_train[indexes[i:i+batchsize]] y_batch = y_train[indexes[i:i+batchsize]] optimizer.zero_grads() # Initialize gradient arrays loss, accuracy = forward(x_batch, y_batch) # loss function loss.backward() # Backpropagation optimizer.update() evaluate() return 0
class AutoEncoder: """ Constract AutoEncoder by #input and #hidden """ def __init__(self, xn, hn): self.model = FunctionSet(encode=F.Linear(xn, hn), decode=F.Linear(hn, xn)) def encode(self, x, train=True): h = F.dropout(F.relu(self.model.encode(x)), train=train) return h def decode(self, h, train=True): y = F.dropout(F.relu(self.model.decode(h)), train=train) return y def train_once(self, x_data): x = Variable(x_data) h = self.encode(x) y = self.decode(h) return F.mean_squared_error(x, y)#, F.accuracy(x, y) def reconstract(self, x_data): x = Variable(x_data) h = self.encode(x, train=False) y = self.decode(h, train=False) return y.data
def main(log_file, h_sizes, improve_loss_min=0.001): x_train, y_train, x_test, y_test = generate_cases(log_file) in_size = LINE_MAX_CHAR out_size = 2 layers = [in_size] + h_sizes + [out_size] model = FunctionSet() for li in range(1, len(layers)): setattr(model, "l%d" % li, F.Linear(layers[li-1], layers[li])) optimizer = optimizers.SGD() optimizer.setup(model.collect_parameters()) last_loss = None for epoch in range(3000000): optimizer.zero_grads() loss, accuracy = forward(model, x_train, y_train) loss.backward() if epoch % 100 == 0: print "epoch: %s, loss: %s, accuracy: %s" % (epoch, loss.data, accuracy.data) if last_loss is not None and last_loss - improve_loss_min < loss.data: print "Finish Training" break last_loss = loss.data optimizer.update() if epoch % 1000 == 0: loss, accuracy = forward(model, x_test, y_test) print "epoch: %s, Try Test Result: loss: %s, accuracy: %s" % (epoch, loss.data, accuracy.data) # result loss, accuracy = forward(model, x_test, y_test) print "epoch: %s, Test Result: loss: %s, accuracy: %s" % (epoch, loss.data, accuracy.data) return epoch, accuracy.data
class CNN3_Model(ModelBase): u"""see: http://aidiary.hatenablog.com/entry/20151007/1444223445""" def __init__(self, input_size=32): super(CNN3_Model, self).__init__() # F.Convolution2D(in_channel, out_channel, filter_size) self.model = FunctionSet( # 1*32*32 -(conv)-> 20*28*28 -(pool)-> 20*14*14 conv1=F.Convolution2D(1, 20, 5), # 20*14*14 -(conv)-> 50*10*10 -(pool)-> 50*5*5=1250 conv2=F.Convolution2D(20, 50, 5), l1=F.Linear(1250, 300), l2=F.Linear(300, 2)) def forward(self, x_data, y_data, train=True): u"""return loss, accuracy""" x, t = Variable(x_data), Variable(y_data) h1 = F.max_pooling_2d(F.relu(self.model.conv1(x)), 2) h2 = F.max_pooling_2d(F.relu(self.model.conv2(h1)), 2) h3 = F.dropout(F.relu(self.model.l1(h2)), train=train) y = self.model.l2(h3) # 多クラス分類なので誤差関数としてソフトマックス関数の # 交差エントロピー関数を用いて、誤差を導出。最低でもlossは必要 return { "loss": F.softmax_cross_entropy(y, t), "accuracy": F.accuracy(y, t) }
class TestFunctionSet(TestCase): def setUp(self): self.fs = FunctionSet( a = Linear(3, 2), b = Linear(3, 2) ) def test_get_sorted_funcs(self): self.assertItemsEqual([k for (k, v) in self.fs._get_sorted_funcs()], ('a', 'b')) def check_equal_fs(self, fs1, fs2): self.assertTrue((fs1.a.W == fs2.a.W).all()) self.assertTrue((fs1.a.b == fs2.a.b).all()) self.assertTrue((fs1.b.W == fs2.b.W).all()) self.assertTrue((fs1.b.b == fs2.b.b).all()) def test_pickle_cpu(self): s = pickle.dumps(self.fs) fs2 = pickle.loads(s) self.check_equal_fs(self.fs, fs2) @attr.gpu def test_pickle_gpu(self): self.fs.to_gpu() s = pickle.dumps(self.fs) fs2 = pickle.loads(s) self.fs.to_cpu() fs2.to_cpu() self.check_equal_fs(self.fs, fs2)
def __init__(self, in_channels=1, n_hidden=100, n_outputs=10): FunctionSet.__init__( self, conv1=F.Convolution2D(in_channels, 32, 5), conv2=F.Convolution2D(32, 32, 5), l3=F.Linear(288, n_hidden), l4=F.Linear(n_hidden, n_outputs) )
def setup_model(n_dimention, n_units): model = FunctionSet(l1=F.Linear(n_dimention, n_units), l2=F.Linear(n_units, n_dimention)) # Setup optimizer optimizer = optimizers.Adam() optimizer.setup(model.collect_parameters()) return model, optimizer
class FTCS_Y: def __init__(self): self.model0 = FunctionSet(l=F.Convolution2D(1, 1, 3, pad=1, nobias=True)) self.model0.l.W[0,0,:,:] = np.array([[0,-1,0],[0,0,0],[0,1,0]]).astype(np.float32)/2 self.model0.to_gpu() def forward(self, x_data): y0 = self.model0.l(x_data) return y0
def __init__(self): self.model0 = FunctionSet(l=F.Convolution2D(1, 1, 5, stride=1, pad=2, nobias=True)) self.model1 = FunctionSet(l=F.Convolution2D(1, 1, 5, stride=1, pad=2, nobias=True)) #print self.model.l.W.shape self.model0.l.W[0,0,:,:] = np.array([[0,0,2,0,0],[0,0,-12,0,0],[0,0,6,0,0],[0,0,4,0,0],[0,0,0,0,0]]).astype(np.float32)/12.0 self.model1.l.W[0,0,:,:] = np.array([[0,0,0,0,0],[0,0,-4,0,0],[0,0,-6,0,0],[0,0,12,0,0],[0,0,-2,0,0]]).astype(np.float32)/12.0 #print self.model.l.W.shape self.model0.to_gpu() self.model1.to_gpu()
class ConvolutionalDenoisingAutoencoder(): def __init__(self, imgsize, n_in_channels, n_out_channels, ksize, stride=1, pad=0, use_cuda=False): self.model = FunctionSet( encode=F.Convolution2D(n_in_channels, n_out_channels, ksize, stride, pad), decode=F.Linear(n_out_channels*(math.floor((imgsize+2*pad-ksize)/stride)+1)**2, n_in_channels*imgsize**2) ) self.use_cuda = use_cuda if self.use_cuda: self.model.to_gpu() self.optimizer = optimizers.Adam() self.optimizer.setup(self.model.collect_parameters()) def encode(self, x_var): return F.sigmoid(self.model.encode(x_var)) def decode(self, x_var): return self.model.decode(x_var) def predict(self, x_data): if self.use_cuda: x_data = cuda.to_gpu(x_data) x = Variable(x_data) p = self.encode(x) if self.use_cuda: return cuda.to_cpu(p.data) else: return p.data def cost(self, x_data): x = Variable(x_data) t = Variable(x_data.reshape(x_data.shape[0], x_data.shape[1]*x_data.shape[2]*x_data.shape[3])) h = F.dropout(x) h = self.encode(h) y = self.decode(h) return F.mean_squared_error(y, t) def train(self, x_data): if self.use_cuda: x_data = cuda.to_gpu(x_data) self.optimizer.zero_grads() loss = self.cost(x_data) loss.backward() self.optimizer.update() if self.use_cuda: return float(cuda.to_cpu(loss.data)) else: return loss.data def test(self, x_data): if self.use_cuda: x_data = cuda.to_gpu(x_data) loss = self.cost(x_data) return float(cuda.to_cpu(loss.data))
class DenoisingAutoencoder(SuperClass): def __init__(self, n_in, n_hidden, n_epoch=20, batchsize=100, use_cuda=False): super().__init__(n_epoch, batchsize, use_cuda) self.model = FunctionSet( encode=F.Linear(n_in, n_hidden), decode=F.Linear(n_hidden, n_in) ) self.registModel() def encode(self, x_var): return F.sigmoid(self.model.encode(x_var)) def decode(self, x_var): return self.model.decode(x_var) def predict(self, x_data): x_data = self.procInput(x_data) x = Variable(x_data) p = self.encode(x) return self.procOutput(p.data) def cost(self, x_data): x_data = self.procInput(x_data) x = Variable(x_data) t = Variable(x_data) h = self.encode(F.dropout(t)) y = self.decode(h) return self.procOutput(F.mean_squared_error(y, x)) def test(self, x_data): x_data = self.procInput(x_data) x = Variable(x_data) t = Variable(x_data) h = self.encode(t) y = self.decode(h) return self.procOutput(F.mean_squared_error(y, x)) def save(self, filedir, n_hidden, n_epoch, batchsize): name = "SdA_"+ "layer"+str(n_hidden) + "_epoch"+str(n_epoch) param = {} param['W'] = self.model.encode.parameters[0] param['b'] = self.model.encode.parameters[1] pickle.dump(param, open(filedir+'/'+name+'.pkl', 'wb'), pickle.HIGHEST_PROTOCOL) return def load(self, filename): if filename.find('.pkl')==-1: filename = filename + '.pkl' param = pickle.load(open(filename, 'rb')) self.model.encode.parameters = (param['W'], param['b']) return
def getResult(self, data, batch_size=100): """ 入力データをネットワークに与え、結果を取得する。 batch_size は一度にネットワークに投げるデータ数。マシン性能により調整。 """ self.logger.info("Get result start.") ### Model 設定 model = FunctionSet() for num, f_layer in enumerate(self.f_layers, 1): name = "l_f{0}".format(num) model.__setattr__(name, f_layer) if self.use_gpu: model = model.to_gpu() self.optimizer.setup(model) ### forward 処理設定 def forward(x_data): x = Variable(x_data) t = Variable(x_data) h = x for num in xrange(1, len(self.f_layers)): h = self.activation(model.__getitem__("l_f{0}".format(num))(h)) y = model.__getitem__("l_f{0}".format(num + 1))(h) return y.data ### 結果取得 test_data = data test_size = len(test_data) batch_max = int(math.ceil(test_size / float(batch_size))) y_data = np.zeros((test_size, self.layer_sizes[len(self.layer_sizes) - 1]), dtype=test_data.dtype) for i in xrange(batch_max): start = i * batch_size end = (i + 1) * batch_size x_batch = test_data[start:end] self.logger.debug("Index {0} => {1}, data count = {2}".format(start, end, len(x_batch))) if self.use_gpu: x_batch = cuda.to_gpu(x_batch) y_batch = forward(x_batch) if self.use_gpu: y_batch = cuda.to_cpu(y_batch) y_data[start:end] = y_batch self.logger.info("Complete get result.") return y_data
class DenoisingAutoencoder: def __init__( self, n_input, n_hidden, tied=True, noise=None, ratio=None, optimizer=optimizers.Adam(), loss_function=F.sigmoid_cross_entropy, activation_function=F.sigmoid, ): self.model = FunctionSet(encoder=F.Linear(n_input, n_hidden), decoder=F.Linear(n_hidden, n_input)) if tied: self.model.decoder.W = self.model.encoder.W.T self.noise = noise self.ratio = ratio self.optimizer = optimizer self.optimizer.setup(self.model.collect_parameters()) self.loss_function = loss_function self.activation_function = activation_function def train(self, x_data): self.optimizer.zero_grads() loss = self.autoencode(x_data, train=True) loss.backward() self.optimizer.update() return loss def test(self, x_data): return self.autoencode(x_data, train=False) def autoencode(self, x_data, train=True): x = Variable(x_data) if self.noise and train: nx = Variable(self.noise.noise(x_data)) else: nx = Variable(x_data) if self.ratio: h = F.dropout(self.encode(nx), ratio=self.ratio, train=train) else: h = self.encode(nx) y = self.decode(h) return self.loss_function(y, x) def encode(self, x): return self.activation_function(self.model.encoder(x)) def decode(self, x): return self.activation_function(self.model.decoder(x))
def setup_model(gpu_id, n_channel, n_output): model = FunctionSet( conv1=F.Convolution2D(n_channel, 32, 5, pad=2), conv2=F.Convolution2D(32, 32, 5, pad=2), conv3=F.Convolution2D(32, 64, 5, pad=2), fl5=F.Linear(960, 64), fl6=F.Linear(64, n_output), ) # optimizer = optimizers.MomentumSGD(lr=1e-03) optimizer = optimizers.AdaGrad() optimizer.setup(model.collect_parameters()) mlp = ChainerModel(model, optimizer, forward_function=forward) return mlp
class DeepLearning: def __init__(self, input_size, hidden_size, output_size): self.model = FunctionSet(l1=F.Linear(input_size, hidden_size), l2=F.Linear(hidden_size, hidden_size), l3=F.Linear(hidden_size, output_size)) self.optimizer = optimizers.Adam() self.optimizer.setup(self.model.collect_parameters()) def batch(self, X_train, y_train, batch_size, perm): train_size = X_train.shape[0] for i in xrange(0, train_size, batch_size): X_batch = X_train[perm[i: i+batch_size]] y_batch = y_train[perm[i: i+batch_size]] # Chainer用に型変換 x = Variable(X_batch) t = Variable(y_batch) self.optimizer.zero_grads() y = self.forward(x) # 予測結果 loss = F.softmax_cross_entropy(y, t) loss.backward() self.optimizer.update() def forward(self, x, train=True): h1 = F.dropout(F.sigmoid(self.model.l1(x)), train=train) h2 = F.dropout(F.sigmoid(self.model.l2(h1)), train=train) return self.model.l3(h2) def predicate(self, x_data): x = np.array([x_data], dtype=np.float32) x = Variable(x) y = self.forward(x, train=False) return np.argmax(y.data) def save(self, fpath): pickle.dump(self.model, open(fpath, 'wb'), -1) def load(self, fpath): self.model = pickle.load(open(fpath,'rb'))
def init_model(model_params): wscale1 = model_params.wscale1 # math.sqrt(5 * 5 * 3) * 0.0001 wscale2 = model_params.wscale2 # math.sqrt(5 * 5 * 32) * 0.01 wscale3 = model_params.wscale3 # math.sqrt(5 * 5 * 32) * 0.01 wscale4 = model_params.wscale4 # math.sqrt(576) * 0.1 wscale5 = model_params.wscale5 # math.sqrt(64) * 0.1 # wscale1, wscale2, wscale3, wscale4, wscale5 = [math.sqrt(2)] * 5 model = FunctionSet(conv1=F.Convolution2D(3, 32, 5, wscale=wscale1, stride=1, pad=2), conv2=F.Convolution2D(32, 32, 5, wscale=wscale2, stride=1, pad=2), conv3=F.Convolution2D(32, 64, 5, wscale=wscale3, stride=1, pad=2), fl4=F.Linear(576, 64, wscale=wscale4), fl5=F.Linear(64, 10, wscale=wscale5)) if params.gpu_flag: model.to_gpu() return model
def __init__(self, use_gpu, enable_controller, dim): self.use_gpu = use_gpu self.num_of_actions = len(enable_controller) self.enable_controller = enable_controller self.dim = dim print("Initializing Q-Network...") hidden_dim = 256 self.model = FunctionSet( l4=F.Linear(self.dim*self.hist_size, hidden_dim, wscale=np.sqrt(2)), q_value=F.Linear(hidden_dim, self.num_of_actions, initialW=np.zeros((self.num_of_actions, hidden_dim), dtype=np.float32)) ) if self.use_gpu >= 0: self.model.to_gpu() self.model_target = copy.deepcopy(self.model) self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001) self.optimizer.setup(self.model.collect_parameters()) # History Data : D=[s, a, r, s_dash, end_episode_flag] self.d = [np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8), np.zeros(self.data_size, dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.int8), np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.bool)]
def __init__(self, enable_controller=[0, 3, 4]): self.num_of_actions = len(enable_controller) self.enable_controller = enable_controller # Default setting : "Pong" print "Initializing DQN..." # Initialization for Chainer 1.1.0 or older. # print "CUDA init" # cuda.init() print "Model Building" self.model = FunctionSet( l1=F.Convolution2D(4, 16, ksize=8, stride=4, wscale=np.sqrt(2)), l2=F.Convolution2D(16, 32, ksize=4, stride=2, wscale=np.sqrt(2)), l3=F.Linear(2592, 256), q_value=F.Linear(256, self.num_of_actions, initialW=np.zeros((self.num_of_actions, 256), dtype=np.float32)) ).to_gpu() print "Initizlizing Optimizer" self.optimizer = optimizers.RMSpropGraves(lr=0.0002, alpha=0.3, momentum=0.2) self.optimizer.setup(self.model.collect_parameters()) # History Data : D=[s, a, r, s_dash, end_episode_flag] self.D = [np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8), np.zeros(self.data_size, dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.int8), np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.bool)]
def __init__(self, n, saveFile, opt, lossFunc, mod=None, use_gpu=False, numDirectIterations=1, defaultOutputTruncation=10): self.n=n self.saveFile=saveFile self.use_gpu=use_gpu self.lossFunc=lossFunc self.numDirectIterations=numDirectIterations self.defaultOutputTruncation=defaultOutputTruncation if mod==None: #construct network model self.model= FunctionSet( x_to_h = F.Linear(7, n), h_to_h = F.Linear(n, n), h_to_y = F.Linear(n, 7) ) else: self.model=mod if self.use_gpu: self.model.to_gpu() else: self.model.to_cpu() self.optimizer = opt self.optimizer.setup(self.model) #constants self.null_byte=np.array([[0]*7], dtype=np.float32) if self.use_gpu: self.null_byte=cuda.to_gpu(self.null_byte) self.null_byte=Variable(self.null_byte)
class LinearModel(object): UNIT_NUM = 10 BATCH_SIZE = 32 EPOCH = 100 def __init__(self, optimizer): self.model = FunctionSet( l = Linear(self.UNIT_NUM, 2) ) self.optimizer = optimizer # true parameters self.w = np.random.uniform(-1, 1, (self.UNIT_NUM, 1)).astype(np.float32) self.b = np.random.uniform(-1, 1, (1, )).astype(np.float32) def _train_linear_classifier(self, model, optimizer, gpu): def _make_label(x): a = (np.dot(x, self.w) + self.b).reshape((self.BATCH_SIZE, )) t = np.empty_like(a).astype(np.int32) t[a>=0] = 0 t[a< 0] = 1 return t def _make_dataset(batch_size, unit_num, gpu): x_data = np.random.uniform(-1, 1, (batch_size, unit_num)).astype(np.float32) t_data = _make_label(x_data) if gpu: x_data = cuda.to_gpu(x_data) t_data = cuda.to_gpu(t_data) x = Variable(x_data) t = Variable(t_data) return x, t for epoch in xrange(self.EPOCH): x, t = _make_dataset(self.BATCH_SIZE, self.UNIT_NUM, gpu) optimizer.zero_grads() y = model.l(x) loss = softmax_cross_entropy(y, t) loss.backward() optimizer.update() x_test, t_test = _make_dataset(self.BATCH_SIZE, self.UNIT_NUM, gpu) y_test = model.l(x_test) return accuracy(y_test, t_test) def _accuracy_cpu(self): self.optimizer.setup(self.model.collect_parameters()) return self._train_linear_classifier(self.model, self.optimizer, False) def _accuracy_gpu(self): model = self.model optimizer = self.optimizer model.to_gpu() optimizer.setup(model.collect_parameters()) return self._train_linear_classifier(model, optimizer, True) def accuracy(self, gpu): if gpu: return cuda.to_cpu(self._accuracy_gpu().data) else: return self._accuracy_cpu().data
def __init__( self, data, target, n_inputs=784, n_hidden=784, n_outputs=10, gpu=-1 ): self.model = FunctionSet( l1=F.Linear(n_inputs, n_hidden), l2=F.Linear(n_hidden, n_hidden), l3=F.Linear(n_hidden, n_outputs) ) if gpu >= 0: self.model.to_gpu() self.x_train, self.x_test = data self.y_train, self.y_test = target self.n_train = len(self.y_train) self.n_test = len(self.y_test) self.gpu = gpu self.optimizer = optimizers.Adam() self.optimizer.setup(self.model) self.train_accuracies = [] self.train_losses = [] self.test_accuracies = [] self.test_losses = []
def __init__(self, data=None, target=None, n_inputs=784, n_hidden=784, n_outputs=10, gpu=-1): self.excludes.append('xp') self.model = FunctionSet(l1=F.Linear(n_inputs, n_hidden), l2=F.Linear(n_hidden, n_hidden), l3=F.Linear(n_hidden, n_outputs)) if gpu >= 0: self.model.to_gpu() self.xp = cuda.cupy else: self.xp = np if not data is None: self.x_train, self.x_test = data else: self.x_train, self.y_test = None, None if not target is None: self.y_train, self.y_test = target self.n_train = len(self.y_train) self.n_test = len(self.y_test) else: self.y_train, self.y_test = None, None self.n_train = 0 self.n_test = 0 self.gpu = gpu self.optimizer = optimizers.Adam() self.optimizer.setup(self.model)
def __init__(self, enable_controller=[0, 3, 4]): self.num_of_actions = len(enable_controller) self.enable_controller = enable_controller # Default setting : "Pong" print "Initializing DQN..." print "Model Building" self.model = FunctionSet( l1=F.Convolution2D(4, 32, ksize=8, stride=4, nobias=False, wscale=np.sqrt(2)), l2=F.Convolution2D(32, 64, ksize=4, stride=2, nobias=False, wscale=np.sqrt(2)), l3=F.Convolution2D(64, 64, ksize=3, stride=1, nobias=False, wscale=np.sqrt(2)), l4=F.Linear(3136, 512, wscale=np.sqrt(2)), q_value=F.Linear(512, self.num_of_actions, initialW=np.zeros((self.num_of_actions, 512), dtype=np.float32)) ).to_gpu() self.model_target = copy.deepcopy(self.model) print "Initizlizing Optimizer" self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001) self.optimizer.setup(self.model.collect_parameters()) # History Data : D=[s, a, r, s_dash, end_episode_flag] self.D = [np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8), np.zeros(self.data_size, dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.int8), np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.bool)]
def __init__(self, input_vector_length,enable_controller=[0, 1, 2]): self.num_of_actions = len(enable_controller) self.enable_controller = enable_controller # Default setting : "Pong" self.input_vector_length = input_vector_length print "Initializing DQN..." # Initialization for Chainer 1.1.0 or older. # print "CUDA init" # cuda.init() #inputs --> 5 * 14 (with 10 temporality) + 5 (of last one hour) + 5 (of last 24 hour) print "Model Building" self.model = FunctionSet( l1=F.Linear(input_vector_length, 500), l2=F.Linear(500, 250), l3=F.Linear(250, 80), q_value=F.Linear(80, self.num_of_actions, initialW=np.zeros((self.num_of_actions, 80), dtype=np.float32)) ).to_gpu() print "Initizlizing Optimizer" self.optimizer = optimizers.RMSpropGraves(lr=0.0002, alpha=0.3, momentum=0.2) self.optimizer.setup(self.model.collect_parameters()) # History Data : D=[s, a, r, s_dash, end_episode_flag] self.D = [np.zeros((self.data_size, self.input_vector_length), dtype=np.uint8), np.zeros(self.data_size, dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.int8), np.zeros((self.data_size, self.input_vector_length), dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.bool)]
def _finetune(self, X, y): utils.disp('*** finetune ***', self.verbose) # construct model and setup optimizer params = {'l{}'.format(layer + 1): dA.encoder for layer, dA in enumerate(self.dAs)} params.update({'l{}'.format(len(self.dAs) + 1): F.Linear(self.dAs[-1].n_hidden, self.n_output)}) self.model = FunctionSet(**params) self.optimizer.setup(self.model) if self.gpu >= 0: cuda.get_device(self.gpu).use() self.model.to_gpu() xp = cuda.cupy if self.gpu >= 0 else np n = len(X) for epoch in range(self.n_epoch_finetune): utils.disp('epoch: {}'.format(epoch + 1), self.verbose) perm = np.random.permutation(n) sum_loss = 0 for i in range(0, n, self.batch_size): X_batch = xp.asarray(X[perm[i: i + self.batch_size]]) y_batch = xp.asarray(y[perm[i: i + self.batch_size]]) self.optimizer.zero_grads() y_var = self._forward(X_batch) loss = self._loss_func(y_var, Variable(y_batch)) loss.backward() self.optimizer.update() sum_loss += float(loss.data) * len(X_batch) utils.disp('fine tune mean loss={}'.format(sum_loss / n), self.verbose)
def __init__(self, in_channels, out1, proj3, out3, proj33, out33, pooltype, proj_pool=None, stride=1): if out1 > 0: assert stride == 1 assert proj_pool is not None self.f = FunctionSet( proj3 = F.Convolution2D(in_channels, proj3, 1, nobias=True), conv3 = F.Convolution2D( proj3, out3, 3, pad=1, stride=stride, nobias=True), proj33 = F.Convolution2D(in_channels, proj33, 1, nobias=True), conv33a = F.Convolution2D( proj33, out33, 3, pad=1, nobias=True), conv33b = F.Convolution2D( out33, out33, 3, pad=1, stride=stride, nobias=True), proj3n = F.BatchNormalization(proj3), conv3n = F.BatchNormalization(out3), proj33n = F.BatchNormalization(proj33), conv33an = F.BatchNormalization(out33), conv33bn = F.BatchNormalization(out33), ) if out1 > 0: self.f.conv1 = F.Convolution2D(in_channels, out1, 1, stride=stride, nobias=True) self.f.conv1n = F.BatchNormalization(out1) if proj_pool is not None: self.f.poolp = F.Convolution2D(in_channels, proj_pool, 1, nobias=True) self.f.poolpn = F.BatchNormalization(proj_pool) if pooltype == 'max': self.f.pool = MaxPooling2D(3, stride=stride, pad=1) elif pooltype == 'avg': self.f.pool = AveragePooling2D(3, stride=stride, pad=1) else: raise NotImplementedError()
def __init__(self, use_gpu, num_of_action_type, num_of_pad, dim): self.use_gpu = use_gpu self.num_of_action_type = num_of_action_type self.num_of_pad = num_of_pad self.num_of_actions = num_of_action_type * num_of_pad self.dim = dim print("Initializing Q-Network...\n") self.q_net_filename = "q_net.pickle" if os.path.exists(self.q_net_filename): print("Loading Q-Network Model...\n") self.model = self.load_model() else: hidden_dim = 256 self.model = FunctionSet(l4=F.Linear(self.dim * self.hist_size, hidden_dim, wscale=np.sqrt(2)), q_value=F.Linear( hidden_dim, self.num_of_actions, initialW=np.zeros( (self.num_of_actions, hidden_dim), dtype=np.float32))) if self.use_gpu >= 0: self.model.to_gpu() self.model_target = copy.deepcopy(self.model) self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001) self.optimizer.setup(self.model.collect_parameters()) # History Data : D=[s, a, r, s_dash, end_episode_flag] self.d = [ np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8), np.zeros((self.data_size, self.num_of_pad), dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.int8), np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.bool) ]
class DQN_CNN(object): def __init__(self,n_act): N_output = n_act self.model = FunctionSet( conv1=F.Convolution2D(1, 16, 3, pad=1), conv2=F.Convolution2D(16, 16, 3, pad=1), l1=F.Linear(1024, 256), l2=F.Linear(256, N_output)) def Q_func(self,x): N,h,w=x.shape x=x.reshape(N,1,h,w) x = Variable(x) h = F.relu(self.model.conv1(x)) h = F.max_pooling_2d(F.relu(self.model.conv2(h)), 2) h = F.relu(self.model.l1(h)) y = self.model.l2(h) return y
def __init__(self, optimizer): self.model = FunctionSet( l = Linear(self.UNIT_NUM, 2) ) self.optimizer = optimizer # true parameters self.w = np.random.uniform(-1, 1, (self.UNIT_NUM, 1)).astype(np.float32) self.b = np.random.uniform(-1, 1, (1, )).astype(np.float32)
def __init__(self,caption_model_place,cnn_model_place,index2word_place,gpu_id=-1,beamsize=3): #basic paramaters you need to modify self.gpu_id=gpu_id# GPU ID. if you want to use cpu, -1 self.beamsize=beamsize #Gpu Setting global xp if self.gpu_id >= 0: xp = cuda.cupy cuda.get_device(gpu_id).use() else: xp=np # Prepare dataset with open(index2word_place, 'r') as f: self.index2word = pickle.load(f) vocab=self.index2word #Load Caffe Model with open(cnn_model_place, 'r') as f: self.func = pickle.load(f) #Model Preparation image_feature_dim=1024#dimension of image feature self.n_units = 512 #number of units per layer n_units = 512 self.model = FunctionSet() self.model.img_feature2vec=F.Linear(image_feature_dim, n_units)#CNN(I)の最後のレイヤーに相当。#parameter W,b self.model.embed=F.EmbedID(len(vocab), n_units)#W_e*S_tに相当 #parameter W self.model.l1_x=F.Linear(n_units, 4 * n_units)#parameter W,b self.model.l1_h=F.Linear(n_units, 4 * n_units)#parameter W,b self.model.out=F.Linear(n_units, len(vocab))#parameter W,b serializers.load_hdf5(caption_model_place, self.model)#read pre-trained model #To GPU if gpu_id >= 0: self.model.to_gpu() self.func.to_gpu() #to avoid overflow. #I don't know why, but this model overflows at the first time only with CPU. #So I intentionally make overflow so that it never happns after that. if gpu_id < 0: numpy_image = np.ones((3, 224,224), dtype=np.float32) self.generate(numpy_image)
def CreateNNs(self): assert (len(self.Options['n_units']) >= 2) #Mean model n_units = self.Options['n_units'] self.f_names = ['l%d' % i for i in range(len(n_units) - 1)] funcs = {} for i in range(len(n_units) - 1): funcs[self.f_names[i]] = F.Linear(n_units[i], n_units[i + 1]) self.model = FunctionSet(**funcs)
def __init__(self): model = FunctionSet(l1_x = F.Linear(go.SIZE, 4 * go.SIZE), l1_h = F.Linear(4 * go.SIZE, 4 * go.SIZE), last = F.Linear(4 * go.SIZE, go.SIZE)) loss = F.softmax_cross_entropy() return
def __init__(self, logging=False): model = FunctionSet(l1=F.Linear(784, 100), l2=F.Linear(100, 100), l3=F.Linear(100, 10)) optimizer = optimizers.SGD() lossFunction = F.softmax_cross_entropy params = {'epoch': 20, 'batchsize': 100, 'logging': logging} NNmanager.__init__(self, model, optimizer, lossFunction, **params)
def create_model(n_units): ''' Prepare multi-layer perceptron model 多層パーセプトロンモデルの設定 入力 784次元、出力 10次元 ''' model = FunctionSet(l1=F.Linear(784, n_units), l2=F.Linear(n_units, n_units), l3=F.Linear(n_units, 10)) return model
def __init__(self, num_inputs, num_units, dropout_ratio, corruption_level, optimizer, gpu): model = FunctionSet( encode=F.Linear(num_inputs, num_units), decode=F.Linear(num_units, num_inputs), ) self.layers = [model.encode, model.decode] super(DenoisingAutoEncoder, self).__init__(model, optimizer, dropout_ratio, corruption_level, gpu)
class DeepLearning: def __init__(self, input_size, hidden_size, output_size): self.model = FunctionSet(l1=F.Linear(input_size, hidden_size), l2=F.Linear(hidden_size, hidden_size), l3=F.Linear(hidden_size, output_size)) self.optimizer = optimizers.Adam() self.optimizer.setup(self.model.collect_parameters()) def batch(self, X_train, y_train, batch_size, perm): train_size = X_train.shape[0] for i in xrange(0, train_size, batch_size): X_batch = X_train[perm[i:i + batch_size]] y_batch = y_train[perm[i:i + batch_size]] # Chainer用に型変換 x = Variable(X_batch) t = Variable(y_batch) self.optimizer.zero_grads() y = self.forward(x) # 予測結果 loss = F.softmax_cross_entropy(y, t) loss.backward() self.optimizer.update() def forward(self, x, train=True): h1 = F.dropout(F.sigmoid(self.model.l1(x)), train=train) h2 = F.dropout(F.sigmoid(self.model.l2(h1)), train=train) return self.model.l3(h2) def predicate(self, x_data): x = np.array([x_data], dtype=np.float32) x = Variable(x) y = self.forward(x, train=False) return np.argmax(y.data) def save(self, fpath): pickle.dump(self.model, open(fpath, 'wb'), -1) def load(self, fpath): self.model = pickle.load(open(fpath, 'rb'))
def CreateNNs(self): assert (len(self.Options['n_units']) >= 2) assert (self.Options['n_units_err'] is None or len(self.Options['n_units_err']) >= 2) #Mean model n_units = self.Options['n_units'] self.f_names = ['l%d' % i for i in range(len(n_units) - 1)] funcs = {} for i in range(len(n_units) - 1): funcs[self.f_names[i]] = F.Linear(n_units[i], n_units[i + 1]) self.model = FunctionSet(**funcs) #Error model if self.Options['n_units_err'] != None: n_units = self.Options['n_units_err'] self.f_names_err = ['l%d' % i for i in range(len(n_units) - 1)] funcs = {} for i in range(len(n_units) - 1): funcs[self.f_names_err[i]] = F.Linear(n_units[i], n_units[i + 1]) self.model_err = FunctionSet(**funcs)
def __init__(self): self.models = [] for i in xrange(3): model = FunctionSet(conv1=L.Convolution2D(4, 8, 3, stride=1, pad=1), conv2=L.Convolution2D(8, 8, 3, stride=1, pad=1), fc3=L.Linear(2040, 512), fc4=L.Linear(512, 512), fc5=L.Linear(512, 15 * 15)) self.models.append(model)
class DQN_NN(object): def __init__(self,n_act): self.N_input = 64 N_output = n_act #N_unit = (self.N_input-1)*2 N_unit = 64 self.model = FunctionSet( l1=F.Linear(self.N_input,N_unit), #l2=F.Linear(N_unit, N_unit), #l3=F.Linear(N_unit, N_unit), l4=F.Linear(N_unit, N_output,initialW=np.zeros((N_output, N_unit), dtype=np.float32))) def Q_func(self,x): N,h,w=x.shape x=x.reshape(N,h*w) x = Variable(x) h = F.leaky_relu(self.model.l1(x)) #h = F.leaky_relu(self.model.l2(h)) #h = F.leaky_relu(self.model.l3(h)) y = self.model.l4(h) return y
class NN3_Model(ModelBase): def __init__(self, input_dim=748, n_units=1000): super(NN3_Model, self).__init__() self.n_units = n_units self.model = FunctionSet(l1=F.Linear(input_dim, n_units), l2=F.Linear(n_units, n_units), l3=F.Linear(n_units, 2)) def forward(self, x_data, y_data, train=True): u"""return loss, accuracy""" x, t = Variable(x_data), Variable(y_data) h1 = F.dropout(F.relu(self.model.l1(x)), train=train) h2 = F.dropout(F.relu(self.model.l2(h1)), train=train) y = self.model.l3(h2) # 多クラス分類なので誤差関数としてソフトマックス関数の # 交差エントロピー関数を用いて、誤差を導出。最低でもlossは必要 return { "loss": F.softmax_cross_entropy(y, t), "accuracy": F.accuracy(y, t) }
def __init__(self, src_vocab, trg_vocab, n_embed=256, n_hidden=512, algorithm='Adam'): self.src_vocab = src_vocab self.trg_vocab = trg_vocab self.n_embed = n_embed self.n_hidden = n_hidden self.algorithm = algorithm self.model = FunctionSet(embed_x=F.EmbedID(len(src_vocab), n_embed), en_x_to_h=F.Linear(n_embed, 4 * n_hidden), en_h_to_h=F.Linear(n_hidden, 4 * n_hidden), en_h_to_de_h=F.Linear(n_hidden, 4 * n_hidden), de_h_to_embed_y=F.Linear(n_hidden, n_embed), embed_y_to_y=F.Linear(n_embed, len(trg_vocab)), y_to_h=F.EmbedID(len(trg_vocab), 4 * n_hidden), de_h_to_h=F.Linear(n_hidden, 4 * n_hidden))
class Perceptron(): def __init__(self, n_in, n_out, use_cuda=False): self.model = FunctionSet(transform=F.Linear(n_in, n_out)) self.use_cuda = use_cuda if self.use_cuda: self.model.to_gpu() self.optimizer = optimizers.Adam() self.optimizer.setup(self.model.collect_parameters()) def predict(self, x_data): if self.use_cuda: x_data = cuda.to_gpu(x_data) x = Variable(x_data) y = F.softmax(self.model.transform(x)) return cuda.to_cpu(y.data) def cost(self, x_data, y_data): x = Variable(x_data) t = Variable(y_data) y = self.model.transform(x) return F.softmax_cross_entropy(y, t), F.accuracy(y, t) def train(self, x_data, y_data): if self.use_cuda: x_data = cuda.to_gpu(x_data) y_data = cuda.to_gpu(y_data) self.optimizer.zero_grads() loss, acc = self.cost(x_data, y_data) loss.backward() self.optimizer.update() return float(cuda.to_cpu(loss.data)), float(cuda.to_cpu(acc.data)) def test(self, x_data, y_data): if self.use_cuda: x_data = cuda.to_gpu(x_data) y_data = cuda.to_gpu(y_data) loss, acc = self.cost(x_data, y_data) return float(cuda.to_cpu(loss.data)), float(cuda.to_cpu(acc.data))
def CreateModel(dof, Dx, Dy1, Dy2, n_units, n_units2, n_units3): if dof == '2': model1 = FunctionSet(l1=F.Linear(Dx, n_units), l2=F.Linear(n_units, Dy1)) model2 = FunctionSet(l1=F.Linear(Dx, n_units), l2=F.Linear(n_units, Dy2)) elif dof == '3': model1 = FunctionSet(l1=F.Linear(Dx, n_units), l2=F.Linear(n_units, n_units), l3=F.Linear(n_units, Dy1)) model2 = FunctionSet(l1=F.Linear(Dx, n_units), l2=F.Linear(n_units, n_units), l3=F.Linear(n_units, Dy2)) elif dof == '7': model1 = FunctionSet(l1=F.Linear(Dx, n_units), l2=F.Linear(n_units, n_units), l3=F.Linear(n_units, n_units), l4=F.Linear(n_units, n_units), l5=F.Linear(n_units, n_units), l6=F.Linear(n_units, n_units), l7=F.Linear(n_units, Dy1)) model2 = FunctionSet(l1=F.Linear(Dx, n_units), l2=F.Linear(n_units, n_units), l3=F.Linear(n_units, n_units), l4=F.Linear(n_units, n_units), l5=F.Linear(n_units, n_units), l6=F.Linear(n_units, n_units), l7=F.Linear(n_units, Dy2)) return model1, model2
def __init__(self, use_gpu, enable_controller, dim): self.use_gpu = use_gpu self.num_of_actions = len(enable_controller) self.enable_controller = enable_controller self.dim = dim print("Initializing Q-Network...") hidden_dim = 256 self.model = FunctionSet(l4=F.Linear(self.dim * self.hist_size, hidden_dim, wscale=np.sqrt(2)), q_value=F.Linear( hidden_dim, self.num_of_actions, initialW=np.zeros( (self.num_of_actions, hidden_dim), dtype=np.float32))) if self.use_gpu >= 0: self.model.to_gpu() self.model_target = copy.deepcopy(self.model) self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001) self.optimizer.setup(self.model.collect_parameters()) # History Data : D=[s, a, r, s_dash, end_episode_flag] self.d = [ np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8), np.zeros(self.data_size, dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.int8), np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.bool) ]
def __init__(self, imgsize, n_in_channels, n_out_channels, ksize, stride=1, pad=0, use_cuda=False): self.model = FunctionSet( encode=F.Convolution2D(n_in_channels, n_out_channels, ksize, stride, pad), decode=F.Linear( n_out_channels * (math.floor( (imgsize + 2 * pad - ksize) / stride) + 1)**2, n_in_channels * imgsize**2)) self.use_cuda = use_cuda if self.use_cuda: self.model.to_gpu() self.optimizer = optimizers.Adam() self.optimizer.setup(self.model.collect_parameters())
def _finetune(self, X, y): utils.disp('*** finetune ***', self.verbose) # construct model and setup optimizer params = { 'l{}'.format(layer + 1): dA.encoder for layer, dA in enumerate(self.dAs) } params.update({ 'l{}'.format(len(self.dAs) + 1): F.Linear(self.dAs[-1].n_hidden, self.n_output) }) self.model = FunctionSet(**params) self.optimizer.setup(self.model) if self.gpu >= 0: cuda.get_device(self.gpu).use() self.model.to_gpu() xp = cuda.cupy if self.gpu >= 0 else np n = len(X) for epoch in range(self.n_epoch_finetune): utils.disp('epoch: {}'.format(epoch + 1), self.verbose) perm = np.random.permutation(n) sum_loss = 0 for i in range(0, n, self.batch_size): X_batch = xp.asarray(X[perm[i:i + self.batch_size]]) y_batch = xp.asarray(y[perm[i:i + self.batch_size]]) self.optimizer.zero_grads() y_var = self._forward(X_batch) loss = self._loss_func(y_var, Variable(y_batch)) loss.backward() self.optimizer.update() sum_loss += float(loss.data) * len(X_batch) utils.disp('fine tune mean loss={}'.format(sum_loss / n), self.verbose)
def __init__(self, n_visible, n_hidden, noise_level=0.0, dropout_ratio=0.3, batch_size=100, n_epoch=20, optimizer=optimizers.Adam(), activation_func=F.relu, verbose=False, gpu=-1): self.n_visible = n_visible self.n_hidden = n_hidden self.noise_level = noise_level self.dropout_ratio = dropout_ratio self.batch_size = batch_size self.n_epoch = n_epoch # construct model and setup optimizer self.model = FunctionSet( encoder=F.Linear(n_visible, n_hidden), decoder=F.Linear(n_hidden, n_visible) ) self.optimizer = optimizer self.optimizer.setup(self.model) self.activation_func = activation_func self.verbose = verbose # set gpu self.gpu = gpu if self.gpu >= 0: cuda.get_device(self.gpu).use() self.model.to_gpu()
def __init__(self, input_vector_length, enable_controller=[0, 1, 2]): self.num_of_actions = len(enable_controller) self.enable_controller = enable_controller # Default setting : "Pong" self.input_vector_length = input_vector_length print("Initializing DQN...") # Initialization for Chainer 1.1.0 or older. # print "CUDA init" # cuda.init() #inputs --> 5 * 14 (with 10 temporality) + 5 (of last one hour) + 5 (of last 24 hour) print("Model Building") self.model = FunctionSet( l1=F.Linear(input_vector_length, 500), l2=F.Linear(500, 250), l3=F.Linear(250, 80), q_value=F.Linear(80, self.num_of_actions, initialW=np.zeros((self.num_of_actions, 80), dtype=np.float32))).to_gpu() print("Initizlizing Optimizer") self.optimizer = optimizers.RMSpropGraves(lr=0.0002, alpha=0.3, momentum=0.2) self.optimizer.setup(self.model.collect_parameters()) # History Data : D=[s, a, r, s_dash, end_episode_flag] self.D = [ np.zeros((self.data_size, self.input_vector_length), dtype=np.uint8), np.zeros(self.data_size, dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.int8), np.zeros((self.data_size, self.input_vector_length), dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.bool) ]
class MNISTNet(): def __init__(self): n_in = 28 * 28 n_hidden = 100 self.model = FunctionSet(encode=F.Linear(n_in, n_hidden), decode=F.Linear(n_hidden, n_in)) self.optimizer = optimizers.Adam() self.optimizer.setup(self.model.collect_parameters()) def encode(self, x_var): return F.sigmoid(self.model.encode(x_var)) def decode(self, x_var): return F.sigmoid(self.model.decode(x_var)) def predict(self, x_data): x = Variable(x_data) p = self.encode(x) return p.data def cost(self, x_data, dropout=True): x = Variable(x_data) t = Variable(x_data) if dropout: x_n = F.dropout(x, ratio=0.4) else: x_n = x h = self.encode(x_n) y = self.decode(h) return F.mean_squared_error(y, t) def train(self, x_data): self.optimizer.zero_grads() loss = self.cost(x_data) loss.backward() self.optimizer.update() return float(loss.data)
def load_mlp_model(path): n_units = 1000 n_labels = 25 model = FunctionSet(l1=F.Linear(4096, n_units), l2=F.Linear(n_units, n_units), l3=F.Linear(n_units, n_labels)) optimizer = optimizers.Adam() optimizer.setup(model) print('Load model from', path) serializers.load_hdf5(path, model) return model
def init_model(vocab_size, char_type_size): model = FunctionSet( embed=F.EmbedID(vocab_size, embed_units), char_type_embed = F.EmbedID(char_type_size, char_type_embed_units), #dict_embed = F.Linear(12, dict_embed_units), hidden1=F.Linear(window * (embed_units + char_type_embed_units)*3, hidden_units), output=F.Linear(hidden_units + 12, label_num), ) if opt_selection == 'Adagrad': opt = optimizers.AdaGrad(lr=learning_rate) elif opt_selection == 'SGD': opt = optimizers.SGD() elif opt_selection == 'Adam': opt = optimizers.Adam() else: opt = optimizers.AdaGrad(lr=learning_rate) print('Adagrad is chosen as defaut') opt.setup(model) return model, opt
class SimpleNet(): def __init__(self): n_in = 1 n_hidden_1 = 5 n_hidden_2 = 5 self.model = FunctionSet( en1=L.Linear(n_in, n_hidden_1), en2_mu=L.Linear(n_hidden_1, n_hidden_2), en2_var=L.Linear(n_hidden_1, n_hidden_2), de1=L.Linear(n_hidden_2, n_hidden_1), de2=L.Linear(n_hidden_1, n_in) ) self.optimizer = optimizers.Adam() self.optimizer.setup(self.model.collect_parameters()) def encode(self, x_var): h1 = F.tanh(self.model.en1(x_var)) mu = self.model.en2_mu(h1) var = self.model.en2_var(h1) return mu, var def decode(self, z, sigmoid=True): h1 = F.tanh(self.model.de1(z)) h2 = self.model.de2(h1) if sigmoid: return F.sigmoid(h2) return h2 def cost(self, x_var, C=1.0, k=1): mu, ln_var = self.encode(x_var) batchsize = len(mu.data) rec_loss = 0 for l in six.moves.range(k): z = F.gaussian(mu, ln_var) rec_loss += F.bernoulli_nll(x_var, self.decode(z, sigmoid=False)) \ / (k * batchsize) self.rec_loss = rec_loss self.loss = self.rec_loss + C * gaussian_kl_divergence(mu, ln_var) / batchsize return self.loss
class Kawamura_Y: def __init__(self): self.model0 = FunctionSet( l=F.Convolution2D(1, 1, 5, stride=1, pad=2, nobias=True)) self.model1 = FunctionSet( l=F.Convolution2D(1, 1, 5, stride=1, pad=2, nobias=True)) #print self.model.l.W.shape self.model0.l.W[0, 0, :, :] = np.array( [[0, 0, 2, 0, 0], [0, 0, -12, 0, 0], [0, 0, 6, 0, 0], [0, 0, 4, 0, 0], [0, 0, 0, 0, 0]]).astype(np.float32) / 12.0 self.model1.l.W[0, 0, :, :] = np.array( [[0, 0, 0, 0, 0], [0, 0, -4, 0, 0], [0, 0, -6, 0, 0], [0, 0, 12, 0, 0], [0, 0, -2, 0, 0]]).astype(np.float32) / 12.0 #print self.model.l.W.shape self.model0.to_gpu() self.model1.to_gpu() def forward(self, x_data): y0 = self.model0.l(x_data) y1 = self.model1.l(x_data) return y0, y1
class TestNestedFunctionSet(TestCase): def setUp(self): self.fs1 = FunctionSet(a=MockFunction((1, 2))) self.fs2 = FunctionSet(fs1=self.fs1, b=MockFunction((3, 4))) def test_get_sorted_funcs(self): assertCountEqual(self, [k for (k, v) in self.fs2._get_sorted_funcs()], ('b', 'fs1')) def test_collect_parameters(self): p_b = np.zeros((3, 4)).astype(np.float32) p_a = np.zeros((1, 2)).astype(np.float32) gp_b = np.ones((3, 4)).astype(np.float32) gp_a = np.ones((1, 2)).astype(np.float32) actual = self.fs2.collect_parameters() self.assertTrue(list(map(len, actual)) == [2, 2]) self.assertTrue((actual[0][0] == p_b).all()) self.assertTrue((actual[0][1] == p_a).all()) self.assertTrue((actual[1][0] == gp_b).all()) self.assertTrue((actual[1][1] == gp_a).all()) def test_pickle_cpu(self): fs2_serialized = pickle.dumps(self.fs2) fs2_loaded = pickle.loads(fs2_serialized) self.assertTrue((self.fs2.b.p == fs2_loaded.b.p).all()) self.assertTrue((self.fs2.fs1.a.p == fs2_loaded.fs1.a.p).all()) @attr.gpu def test_pickle_gpu(self): self.fs2.to_gpu() fs2_serialized = pickle.dumps(self.fs2) fs2_loaded = pickle.loads(fs2_serialized) fs2_loaded.to_cpu() self.fs2.to_cpu() self.assertTrue((self.fs2.b.p == fs2_loaded.b.p).all()) self.assertTrue((self.fs2.fs1.a.p == fs2_loaded.fs1.a.p).all())
with open(join(prefix, file), 'rb') as f: data_dic = cPickle.load(f) data.append(data_dic['data']) label = np.append(label, data_dic['labels']) data = np.vstack(data) return data, label train_data, train_labels = load_data() train_labels = train_labels.astype(np.int32) """ network definition """ model = FunctionSet(conv1=F.Convolution2D(3, 32, 5, stride=1, pad=2), norm1=F.BatchNormalization(32), conv2=F.Convolution2D(32, 32, 5, stride=1, pad=2), norm2=F.BatchNormalization(32), conv3=F.Convolution2D(32, 16, 5, stride=1, pad=2), norm3=F.BatchNormalization(16), conv4=F.Convolution2D(16, 2, 5, stride=1, pad=0), ip1=F.Linear(2, 2)) def forward(x_data, y_data, train=True): x, t = Variable(cuda.to_gpu(x_data)), chainer.Variable(cuda.to_gpu(y_data)) h = model.conv1(x) h = model.norm1(h) h = F.relu(h) h = F.max_pooling_2d(h, 3, stride=2) h = model.conv2(h) h = model.norm2(h)
class QNet: # Hyper-Parameters gamma = 0.99 # Discount factor initial_exploration = 10**4 # Initial exploratoin. original: 5x10^4 replay_size = 32 # Replay (batch) size target_model_update_freq = 10**4 # Target update frequancy. original: 10^4 data_size = 10**5 # Data size of history. original: 10^6 hist_size = 1 #original: 4 def __init__(self, use_gpu, enable_controller, dim): self.use_gpu = use_gpu self.num_of_actions = len(enable_controller) self.enable_controller = enable_controller self.dim = dim print("Initializing Q-Network...") hidden_dim = 256 self.model = FunctionSet(l4=F.Linear(self.dim * self.hist_size, hidden_dim, wscale=np.sqrt(2)), q_value=F.Linear( hidden_dim, self.num_of_actions, initialW=np.zeros( (self.num_of_actions, hidden_dim), dtype=np.float32))) if self.use_gpu >= 0: self.model.to_gpu() self.model_target = copy.deepcopy(self.model) self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001) self.optimizer.setup(self.model.collect_parameters()) # History Data : D=[s, a, r, s_dash, end_episode_flag] self.d = [ np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8), np.zeros(self.data_size, dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.int8), np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.bool) ] def forward(self, state, action, reward, state_dash, episode_end): num_of_batch = state.shape[0] s = Variable(state) s_dash = Variable(state_dash) q = self.q_func(s) # Get Q-value # Generate Target Signals tmp = self.q_func_target(s_dash) # Q(s',*) if self.use_gpu >= 0: tmp = list(map(np.max, tmp.data.get())) # max_a Q(s',a) else: tmp = list(map(np.max, tmp.data)) # max_a Q(s',a) max_q_dash = np.asanyarray(tmp, dtype=np.float32) if self.use_gpu >= 0: target = np.asanyarray(q.data.get(), dtype=np.float32) else: # make new array target = np.array(q.data, dtype=np.float32) for i in xrange(num_of_batch): if not episode_end[i][0]: tmp_ = np.sign(reward[i]) + self.gamma * max_q_dash[i] else: tmp_ = np.sign(reward[i]) action_index = self.action_to_index(action[i]) target[i, action_index] = tmp_ # TD-error clipping if self.use_gpu >= 0: target = cuda.to_gpu(target) td = Variable(target) - q # TD error td_tmp = td.data + 1000.0 * (abs(td.data) <= 1) # Avoid zero division td_clip = td * (abs(td.data) <= 1) + td / abs(td_tmp) * (abs(td.data) > 1) zero_val = np.zeros((self.replay_size, self.num_of_actions), dtype=np.float32) if self.use_gpu >= 0: zero_val = cuda.to_gpu(zero_val) zero_val = Variable(zero_val) loss = F.mean_squared_error(td_clip, zero_val) return loss, q def stock_experience(self, time, state, action, reward, state_dash, episode_end_flag): data_index = time % self.data_size if episode_end_flag is True: self.d[0][data_index] = state self.d[1][data_index] = action self.d[2][data_index] = reward else: self.d[0][data_index] = state self.d[1][data_index] = action self.d[2][data_index] = reward self.d[3][data_index] = state_dash self.d[4][data_index] = episode_end_flag def experience_replay(self, time): if self.initial_exploration < time: # Pick up replay_size number of samples from the Data if time < self.data_size: # during the first sweep of the History Data replay_index = np.random.randint(0, time, (self.replay_size, 1)) else: replay_index = np.random.randint(0, self.data_size, (self.replay_size, 1)) s_replay = np.ndarray(shape=(self.replay_size, self.hist_size, self.dim), dtype=np.float32) a_replay = np.ndarray(shape=(self.replay_size, 1), dtype=np.uint8) r_replay = np.ndarray(shape=(self.replay_size, 1), dtype=np.float32) s_dash_replay = np.ndarray(shape=(self.replay_size, self.hist_size, self.dim), dtype=np.float32) episode_end_replay = np.ndarray(shape=(self.replay_size, 1), dtype=np.bool) for i in xrange(self.replay_size): s_replay[i] = np.asarray(self.d[0][replay_index[i]], dtype=np.float32) a_replay[i] = self.d[1][replay_index[i]] r_replay[i] = self.d[2][replay_index[i]] s_dash_replay[i] = np.array(self.d[3][replay_index[i]], dtype=np.float32) episode_end_replay[i] = self.d[4][replay_index[i]] if self.use_gpu >= 0: s_replay = cuda.to_gpu(s_replay) s_dash_replay = cuda.to_gpu(s_dash_replay) # Gradient-based update self.optimizer.zero_grads() loss, _ = self.forward(s_replay, a_replay, r_replay, s_dash_replay, episode_end_replay) loss.backward() self.optimizer.update() def q_func(self, state): h4 = F.relu(self.model.l4(state)) q = self.model.q_value(h4 / 255.0) return q def q_func_target(self, state): h4 = F.relu(self.model_target.l4(state / 255.0)) q = self.model_target.q_value(h4) return q def e_greedy(self, state, epsilon): s = Variable(state) q = self.q_func(s) q = q.data if np.random.rand() < epsilon: index_action = np.random.randint(0, self.num_of_actions) print(" Random"), else: if self.use_gpu >= 0: index_action = np.argmax(q.get()) else: index_action = np.argmax(q) print("#Greedy"), return self.index_to_action(index_action), q def target_model_update(self): self.model_target = copy.deepcopy(self.model) def index_to_action(self, index_of_action): return self.enable_controller[index_of_action] def action_to_index(self, action): return self.enable_controller.index(action)
import chainer from chainer import Function, FunctionSet, Variable, optimizers, serializers, utils from chainer import Link, Chain, ChainList import chainer.functions as F import chainer.links as L from progressbar import ProgressBar import matplotlib.pyplot as plt prefix = '../data' model = FunctionSet( conv1=F.Convolution2D(1, 20, 5), norm1=F.BatchNormalization(20), conv2=F.Convolution2D(20, 50, 5), norm2=F.BatchNormalization(50), ip1=F.Linear(4050, 1000), ip2=F.Linear(1000, 799), ) def forward(x_data, y_data, train=False, normalized=False): x, t = Variable(x_data), chainer.Variable(y_data) h = model.conv1(x) h = model.norm1(h) h = F.relu(h) h = F.max_pooling_2d(h, 3, stride=2) h = model.conv2(h) h = model.norm2(h) h = F.relu(h) h = F.max_pooling_2d(h, 3, stride=2)