Example #1
0
def Main():
  import argparse
  import numpy as np
  from chainer import cuda, Variable, FunctionSet, optimizers, utils
  import chainer.functions  as F
  from loss_for_error import loss_for_error1, loss_for_error2
  import six.moves.cPickle as pickle

  parser = argparse.ArgumentParser(description='Chainer example: regression')
  parser.add_argument('--gpu', '-g', default=-1, type=int,
                      help='GPU ID (negative value indicates CPU)')
  args = parser.parse_args()

  n_units   = 200  #NOTE: should be the same as ones in regression2c

  N_test= 100
  x_test= np.array([[x] for x in FRange1(*Bound,num_div=N_test)]).astype(np.float32)
  y_test= np.array([[TrueFunc(x[0])] for x in x_test]).astype(np.float32)
  y_err_test= np.array([[0.0] for x in x_test]).astype(np.float32)

  # Dump data for plot:
  fp1= file('/tmp/smpl_test.dat','w')
  for x,y in zip(x_test,y_test):
    fp1.write('%s #%i# %s\n' % (' '.join(map(str,x)),len(x)+1,' '.join(map(str,y))))
  fp1.close()

  # Prepare multi-layer perceptron model
  model = FunctionSet(l1=F.Linear(1, n_units),
                      l2=F.Linear(n_units, n_units),
                      l3=F.Linear(n_units, 1))
  # Error model
  model_err = FunctionSet(l1=F.Linear(1, n_units),
                          l2=F.Linear(n_units, n_units),
                          l3=F.Linear(n_units, 1))
  # Load parameters from file:
  model.copy_parameters_from(pickle.load(open('datak/reg2c_mean.dat', 'rb')))
  model_err.copy_parameters_from(pickle.load(open('datak/reg2c_err.dat', 'rb')))
  #model.copy_parameters_from(map(lambda e:np.array(e,np.float32),pickle.load(open('/tmp/nn_model.dat', 'rb')) ))
  #model_err.copy_parameters_from(map(lambda e:np.array(e,np.float32),pickle.load(open('/tmp/nn_model_err.dat', 'rb')) ))
  if args.gpu >= 0:
    cuda.init(args.gpu)
    model.to_gpu()
    model_err.to_gpu()

  # Neural net architecture
  def forward(x_data, y_data, train=True):
    #train= False  #TEST: Turn off dropout
    dratio= 0.2  #0.5  #TEST: Dropout ratio
    x, t = Variable(x_data), Variable(y_data)
    h1 = F.dropout(F.relu(model.l1(x)),  ratio=dratio, train=train)
    h2 = F.dropout(F.relu(model.l2(h1)), ratio=dratio, train=train)
    y  = model.l3(h2)
    return F.mean_squared_error(y, t), y

  # Neural net architecture
  def forward_err(x_data, y_data, train=True):
    #train= False  #TEST: Turn off dropout
    dratio= 0.2  #0.5  #TEST: Dropout ratio
    x, t = Variable(x_data), Variable(y_data)
    h1 = F.dropout(F.relu(model_err.l1(x)),  ratio=dratio, train=train)
    h2 = F.dropout(F.relu(model_err.l2(h1)), ratio=dratio, train=train)
    y  = model_err.l3(h2)
    #return F.mean_squared_error(y, t), y
    #return loss_for_error1(y, t, 0.1), y  #TEST
    return loss_for_error2(y, t, 0.1), y  #TEST

  #ReLU whose input is normal distribution variable.
  #  mu: mean, var: variance (square of std-dev).
  #  cut_sd: if abs(mu)>cut_sd*sigma, an approximation is used.  Set None to disable this.
  def relu_gauss(mu, var, epsilon=1.0e-6, cut_sd=4.0):
    cast= type(mu)
    sigma= math.sqrt(var)
    if sigma<epsilon:  return cast(max(0.0,mu)), cast(0.0)
    #Approximation to speedup for abs(mu)>cut_sd*sigma.
    if cut_sd!=None and mu>cut_sd*sigma:   return cast(mu), cast(var)
    if cut_sd!=None and mu<-cut_sd*sigma:  return cast(0.0), cast(0.0)
    sqrt2= math.sqrt(2.0)
    sqrt2pi= math.sqrt(2.0*math.pi)
    z= mu/(sqrt2*sigma)
    E= math.erf(z)
    X= math.exp(-z*z)
    mu_out= sigma/sqrt2pi*X + mu/2.0*(1.0+E)
    var_out= (1.0+E)/4.0*(mu*mu*(1.0-E)+2.0*var) - sigma*X/sqrt2pi*(sigma*X/sqrt2pi+mu*E)
    if var_out<0.0:
      if var_out>-epsilon:  return mu_out, 0.0
      else:
        msg= 'ERROR in relu_gauss: %f, %f, %f, %f'%(mu, sigma, mu_out, var_out)
        print msg
        raise Exception(msg)
    return cast(mu_out), cast(var_out)

  relu_gaussv= np.vectorize(relu_gauss)  #Vector version

  #Gradient of ReLU whose input is normal distribution variable.
  #  mu: mean, var: variance (square of std-dev).
  #  cut_sd: if abs(mu)>cut_sd*sigma, an approximation is used.  Set None to disable this.
  def relu_gauss_grad(mu, var, epsilon=1.0e-6, cut_sd=4.0):
    cast= type(mu)
    sigma= math.sqrt(var)
    if sigma<epsilon:  return cast(1.0 if mu>0.0 else 0.0)
    #Approximation to speedup for abs(mu)>cut_sd*sigma.
    if cut_sd!=None and mu>cut_sd*sigma:   return cast(1.0)
    if cut_sd!=None and mu<-cut_sd*sigma:  return cast(0.0)
    sqrt2= math.sqrt(2.0)
    z= mu/(sqrt2*sigma)
    return cast(0.5*(1.0+math.erf(z)))

  relu_gauss_gradv= np.vectorize(relu_gauss_grad)  #Vector version


  #Forward computation of neural net considering input distribution.
  def forward_x(x, x_var=None):
    zero= np.float32(0)
    x= np.array(x,np.float32); x= x.reshape(x.size,1)

    #Error model:
    h0= x
    for l in (model_err.l1, model_err.l2):
      hl1= l.W.dot(h0) + l.b.reshape(l.b.size,1)  #W h0 + b
      h1= np.maximum(zero, hl1)  #ReLU(hl1)
      h0= h1
    l= model_err.l3
    y_err0= l.W.dot(h0) + l.b.reshape(l.b.size,1)
    y_var0= np.diag((y_err0*y_err0).ravel())

    if x_var in (0.0, None):
      g= None  #Gradient
      h0= x
      for l in (model.l1, model.l2):
        hl1= l.W.dot(h0) + l.b.reshape(l.b.size,1)  #W h0 + b
        h1= np.maximum(zero, hl1)  #ReLU(hl1)
        g2= l.W.T.dot(np.diag((hl1>0.0).ravel().astype(np.float32)))  #W diag(step(hl1))
        g= g2 if g==None else g.dot(g2)
        h0= h1
      l= model.l3
      y= l.W.dot(h0) + l.b.reshape(l.b.size,1)
      g= g2 if g==None else g.dot(l.W.T)
      return y, y_var0, g

    else:
      if isinstance(x_var, (float, np.float_, np.float16, np.float32, np.float64)):
        x_var= np.diag(np.array([x_var]*x.size).astype(np.float32))
      elif x_var.size==x.size:
        x_var= np.diag(np.array(x_var.ravel(),np.float32))
      else:
        x_var= np.array(x_var,np.float32); x_var= x_var.reshape(x.size,x.size)
      g= None  #Gradient
      h0= x
      h0_var= x_var
      for l in (model.l1, model.l2):
        hl1= l.W.dot(h0) + l.b.reshape(l.b.size,1)  #W h0 + b
        #print 'l.W',l.W.shape
        #print 'h0_var',h0_var.shape
        hl1_dvar= np.diag( l.W.dot(h0_var.dot(l.W.T)) ).reshape(hl1.size,1)  #diag(W h0_var W^T)
        #print 'hl1',hl1.shape
        #print 'hl1_dvar',hl1_dvar.shape
        h1,h1_dvar= relu_gaussv(hl1,hl1_dvar)  #ReLU_gauss(hl1,hl1_dvar)
        #print 'h1_dvar',h1_dvar.shape
        h1_var= np.diag(h1_dvar.ravel())  #To a full matrix
        #print 'h1_var',h1_var.shape
        #print 'relu_gauss_gradv(hl1,hl1_dvar)',relu_gauss_gradv(hl1,hl1_dvar).shape
        g2= l.W.T.dot(np.diag(relu_gauss_gradv(hl1,hl1_dvar).ravel()))
        g= g2 if g==None else g.dot(g2)
        h0= h1
        h0_var= h1_var
      l= model.l3
      y= l.W.dot(h0) + l.b.reshape(l.b.size,1)
      y_var= l.W.dot(h0_var.dot(l.W.T))
      g= g2 if g==None else g.dot(l.W.T)
      return y, y_var+y_var0, g

  '''
  # testing all data
  preds = []
  x_batch = x_test[:]
  y_batch = y_test[:]
  y_err_batch = y_err_test[:]
  if args.gpu >= 0:
    x_batch = cuda.to_gpu(x_batch)
    y_batch = cuda.to_gpu(y_batch)
    y_err_batch = cuda.to_gpu(y_err_batch)
  loss, pred = forward(x_batch, y_batch, train=False)
  loss_err, pred_err = forward_err(x_batch, y_err_batch, train=False)
  preds = cuda.to_cpu(pred.data)
  preds_err = cuda.to_cpu(pred_err.data)
  sum_loss = float(cuda.to_cpu(loss.data)) * len(y_test)
  sum_loss_err = float(cuda.to_cpu(loss_err.data)) * len(y_test)
  pearson = np.corrcoef(np.asarray(preds).reshape(len(preds),), np.asarray(y_test).reshape(len(preds),))

  print 'test mean loss={}, corrcoef={}, error loss={}'.format(
      sum_loss / N_test, pearson[0][1], sum_loss_err / N_test)

  # Dump data for plot:
  fp1= file('/tmp/nn_test0001.dat','w')
  for x,y,yerr in zip(x_test,preds,preds_err):
    fp1.write('%s #%i# %s %s\n' % (' '.join(map(str,x)),len(x)+1,' '.join(map(str,y)),' '.join(map(str,yerr))))
  fp1.close()
  #'''

  # Dump data for plot:
  fp1= file('/tmp/nn_test0001.dat','w')
  for x in x_test:
    y, var, g= forward_x(x, 0.0)
    y, var, g= y.ravel(), var.ravel(), g.ravel()
    yerr= np.sqrt(var)
    fp1.write('%s %s %s %s\n' % (' '.join(map(str,x)),' '.join(map(str,y)),' '.join(map(str,yerr)),' '.join(map(str,g))))
  fp1.close()

  # Dump data for plot:
  fp1= file('/tmp/nn_test0002.dat','w')
  for x in x_test:
    y, var, g= forward_x(x, 0.5**2)
    y, var, g= y.ravel(), var.ravel(), g.ravel()
    yerr= np.sqrt(var)
    fp1.write('%s %s %s %s\n' % (' '.join(map(str,x)),' '.join(map(str,y)),' '.join(map(str,yerr)),' '.join(map(str,g))))
  fp1.close()
Example #2
0
class ChainerAgent(Agent):
	def __init__(self, epsilon=1.0, frames_per_action=4):
		super(ChainerAgent, self).__init__()
		cuda.init()
		self.epsilon = epsilon
		self.gamma = 0.99
		self.iterations = 0
		
		self.model = FunctionSet(
			l1 = F.Linear(9 * frames_per_action, 256),
			l2 = F.Linear(256, 256),
			l3 = F.Linear(256, 256),
			l4 = F.Linear(256, 2),
		).to_gpu()

		self.optimizer = optimizers.RMSprop(lr=1e-5)
		self.optimizer.setup(self.model)
		self.update_target()

		self.num_frames = 0
		self.frames_per_action = frames_per_action
		self.prev_reward = 0.0

		self.history = ChainHistory(state_len=(9 * frames_per_action))

	def forward(self, state, action, reward, new_state, is_terminal):
		q = self.get_q(Variable(state))
		q_target = self.get_target_q(Variable(new_state))

		max_target_q = cp.max(q_target.data, axis=1)

		target = cp.copy(q.data)

		for i in xrange(target.shape[0]):
			curr_action = int(action[i])
			if is_terminal[i]:
				target[i, curr_action] = reward[i]
			else:
				target[i, curr_action] = reward[i] + self.gamma * max_target_q[i]
		
		loss = F.mean_squared_error(Variable(target), q)
		return loss, 0.0 #cp.mean(q.data[:, action[i]])

	def get_q(self, state):
		h1 = F.relu(self.model.l1(state))
		h2 = F.relu(self.model.l2(h1))
		h3 = F.relu(self.model.l3(h2))
		return self.model.l4(h3)

	def get_target_q(self, state):
		h1 = F.relu(self.target_model.l1(state))
		h2 = F.relu(self.target_model.l2(h1))
		h3 = F.relu(self.target_model.l3(h2))
		return self.target_model.l4(h3)

	def accept_reward(self, state, action, reward, new_state, is_terminal):
		self.prev_reward += reward

		if not (is_terminal or self.num_frames % self.frames_per_action == 0):
			return

		if self.num_frames == self.frames_per_action:
			self.prev_reward = 0.0
			self.prev_action = action
			return

		self.history.add((self.prev_state, self.prev_action, self.prev_reward,
			self.curr_state, is_terminal))
		self.prev_reward = 0.0
		self.prev_action = action

		self.iterations += 1
		if self.iterations % 10000 == 0:
			print '*** UPDATING TARGET NETWORK ***'
			self.update_target()
		
		state, action, reward, new_state, is_terminal = self.history.get(num=32)

		state = cuda.to_gpu(state)
		action = cuda.to_gpu(action)
		new_state = cuda.to_gpu(new_state)
		reward = cuda.to_gpu(reward)

		loss, q = self.forward(state, action, reward, new_state, is_terminal)
		self.optimizer.zero_grads()
		loss.backward()
		self.optimizer.update()

	def update_state_vector(self, state):
		if self.num_frames < self.frames_per_action:
			if self.num_frames == 0:
				self.curr_state = state
			else:
				self.curr_state = np.hstack((self.curr_state, state))
		else:
			if self.num_frames < 2 * self.frames_per_action:
				if self.num_frames == self.frames_per_action:
					self.prev_state = np.copy(self.curr_state[:, :9])
				else:
					self.prev_state = np.hstack((self.prev_state, self.curr_state[:, :9]))
			else:
				self.prev_state[:, :-9] = self.prev_state[:, 9:]
				self.prev_state[:, -9:] = self.curr_state[:, :9]

			self.curr_state[:, :-9] = self.curr_state[:, 9:]
			self.curr_state[:, -9:] = state

		self.num_frames += 1

	def act(self, state):
		self.update_state_vector(state)

		if self.num_frames < self.frames_per_action - 1 or self.num_frames % self.frames_per_action != 0:
			return None

		if self.epsilon > 0.05:
			self.epsilon -= (0.95 / 1000000)

		if random.random() < 0.0001:
			print 'Epsilon greedy strategy current epsilon: {}'.format(self.epsilon)

		if random.random() < self.epsilon:
			return random.random() > 0.375

		q = self.get_q(Variable(cuda.to_gpu(self.curr_state)))

		if random.random() < 0.01:
			if q.data[0,1] > q.data[0,0]:
				print 'On: {}'.format(q.data)
			else:
				print 'Off: {}'.format(q.data)

		return q.data[0,1] > q.data[0,0]

	def save(self, file_name):
		with open(file_name, 'wb') as out_file:
			pickle.dump(self.model, out_file)

	def load(self, file_name):
		self.epsilon = 0.0

		with open(file_name, 'rb') as in_file:
			model = pickle.load(in_file)
			self.model.copy_parameters_from(model.parameters)

	def update_target(self):
		self.target_model = copy.deepcopy(self.model)
		self.target_model = self.target_model.to_gpu()

	def start_new_game(self):
		self.num_frames = 0
Example #3
0
)
N_train = y_train.size
N_test = y_test.size

# 学習モデル構築
model = FunctionSet(
    l1=F.Linear(784, h_unit),
    l2=F.Linear(h_unit, h_unit),
    l3=F.Linear(h_unit, 10)
)

# パラメータファイルの読み込み
if os.path.exists(param_file):
    log.info("Load model parameters : {0}".format(param_file))
    param = np.load(param_file)
    model.copy_parameters_from(param)

# GPUに転送
if gpu >= 0:
    cuda.get_device(gpu).use()
    model.to_gpu()


# 順伝播規則
def forward(x_data, y_data, train=True):
    x = chainer.Variable(x_data)
    t = chainer.Variable(y_data)
    h1 = F.dropout(F.relu(model.l1(x)), ratio=0.5, train=train)
    h2 = F.dropout(F.relu(model.l2(h1)), ratio=0.5, train=train)
    y = model.l2(h2)
Example #4
0
class ConvQAgent(Agent):
	def __init__(self, frames_per_action=4):
		super(ConvQAgent, self).__init__()
		cuda.init()
		self.epsilon = 1.0
		self.gamma = 0.99
		self.iterations = 0
		
		self.model = FunctionSet(
			l1 = F.Convolution2D(frames_per_action, 32, ksize=8, stride=4, nobias=False, wscale=np.sqrt(2)),
			l2 = F.Convolution2D(32, 64, ksize=4, stride=2, nobias=False, wscale=np.sqrt(2)),
			l3 = F.Convolution2D(64, 64, ksize=3, stride=1, nobias=False, wscale=np.sqrt(2)),
			l4 = F.Linear(64 * 7 * 7, 512),
			l5 = F.Linear(512, 2)
		).to_gpu()

		self.optimizer = optimizers.RMSprop(lr=1e-5)
		self.optimizer.setup(self.model)
		self.update_target()

		self.num_frames = 0
		self.frames_per_action = frames_per_action
		self.prev_reward = 0.0

		self.history = ConvHistory((frames_per_action, 84, 84))

	def update_target(self):
		self.target_model = copy.deepcopy(self.model)
		self.target_model = self.target_model.to_gpu()

	def act(self, state):
		self.update_state_vector(state)

		if self.num_frames < self.frames_per_action - 1 or self.num_frames % self.frames_per_action != 0:
			return None

		if random.random() < 0.001:
			print 'Epsilon: {}'.format(self.epsilon)

		if self.epsilon > 0.05:
			self.epsilon -= (0.95 / 300000)

		if random.random() < self.epsilon:
			return random.random() > 0.375

		q = self.get_q(Variable(cuda.to_gpu(self.curr_state[np.newaxis, :, :, :])))

		if random.random() < 0.01:
			if q.data[0,1] > q.data[0,0]:
				print 'On: {}'.format(q.data)
			else:
				print 'Off: {}'.format(q.data)

		return q.data[0,1] > q.data[0,0]

	def update_state_vector(self, state):
		if self.num_frames < self.frames_per_action:
			if self.num_frames == 0:
				self.curr_state = np.zeros((self.frames_per_action, 84, 84), dtype=np.float32)
			self.curr_state[self.num_frames, :, :] = state
		else:
			if self.num_frames == self.frames_per_action:
				self.prev_state = np.zeros((self.frames_per_action, 84, 84), dtype=np.float32)
			self.prev_state[1:, :, :] = self.prev_state[:-1, :, :]
			self.prev_state[0, :, :] = self.curr_state[-1, :, :]

			self.curr_state[1:, :, :] = self.curr_state[:-1, :, :]
			self.curr_state[0, :, :] = state

		self.num_frames += 1

	def accept_reward(self, state, action, reward, new_state, is_terminal):
		self.prev_reward += reward

		if not (is_terminal or self.num_frames % self.frames_per_action == 0):
			return

		if self.num_frames == self.frames_per_action:
			self.prev_reward = 0.0
			self.prev_action = action
			return

		self.history.add((self.prev_state, self.prev_action, self.prev_reward,
			self.curr_state, is_terminal))
		self.prev_reward = 0.0
		self.prev_action = action

		self.iterations += 1
		if self.iterations % 10000 == 0:
			print '*** UPDATING TARGET NETWORK ***'
			self.update_target()
		
		state, action, reward, new_state, is_terminal = self.history.get(num=32)

		state = cuda.to_gpu(state)
		action = cuda.to_gpu(action)
		new_state = cuda.to_gpu(new_state)
		reward = cuda.to_gpu(reward)

		loss, q = self.forward(state, action, reward, new_state, is_terminal)
		self.optimizer.zero_grads()
		loss.backward()
		self.optimizer.update()

	def forward(self, state, action, reward, new_state, is_terminal):
		q = self.get_q(Variable(state))
		q_target = self.get_target_q(Variable(new_state))

		max_target_q = cp.max(q_target.data, axis=1)

		target = cp.copy(q.data)

		for i in xrange(target.shape[0]):
			curr_action = int(action[i, 0])
			if is_terminal[i]:
				target[i, curr_action] = reward[i]
			else:
				target[i, curr_action] = reward[i] + self.gamma * max_target_q[i]
		
		loss = F.mean_squared_error(Variable(target), q)
		return loss, 0.0 #cp.mean(q.data[:, action[i]])

	def get_q(self, state):
		h1 = F.relu(self.model.l1(state))
		h2 = F.relu(self.model.l2(h1))
		h3 = F.relu(self.model.l3(h2))
		h4 = self.model.l4(h3)
		return self.model.l5(h4)

	def get_target_q(self, state):
		h1 = F.relu(self.target_model.l1(state))
		h2 = F.relu(self.target_model.l2(h1))
		h3 = F.relu(self.target_model.l3(h2))
		h4 = self.target_model.l4(h3)
		return self.target_model.l5(h4)

	def save(self, file_name):
		with open(file_name, 'wb') as out_file:
			pickle.dump((self.model, self.optimizer), out_file)

	def load(self, file_name):
		self.epsilon = 0.0

		with open(file_name, 'rb') as in_file:
			model, optimizer = pickle.load(in_file)
			self.model.copy_parameters_from(model.parameters)
			self.optimizer = optimizer

	def start_new_game(self):
		self.num_frames = 0