def main(_):
    assert sum([FLAGS.train, FLAGS.predict, FLAGS.eval]) == 1

    if not os.path.exists(FLAGS.checkpoint_dir):
        os.makedirs(FLAGS.checkpoint_dir)
    if not os.path.exists(FLAGS.log_dir):
        os.makedirs(FLAGS.log_dir)

    # config = tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.5))
    # config.gpu_options.allow_growth = True
    os.environ["CUDA_VISIBLE_DEVICES"] = '1'
    # with tf.Session(config=config) as sess:
    with tf.Session() as sess:
        dnn = DNN(sess, FLAGS)
        if FLAGS.train:
            # os.environ["CUDA_VISIBLE_DEVICES"] = '1'
            dnn.fit()
        elif FLAGS.predict:
            dnn.load_network()
            samples = np.array(
                pd.read_csv('dataset/gen_samples.csv', header=None))
            gen_y = samples[:, -1]
            predict = dnn.predict(np.delete(samples, -1, 1))
            # assert gen_y.shape[0] == predict.shape[0]
            # print 'Accuracy: {}%'.format((predict == gen_y).sum() / float(predict.shape[0]) * 100)
            samples = samples[gen_y != predict]
            pd.DataFrame(samples).to_csv('dataset/gen_samples.csv',
                                         index=False,
                                         header=None)
        elif FLAGS.eval:
            dnn.load_network()
            dnn.eval()
예제 #2
0
def main():
    options = {
        'learning_rate': 0.1,
        'beta1': 0.9,
        'optimizer': 'gd',
        'loss': 'crossentropy'
    }

    train_x, test_x, train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes = load_data(
    )

    X = np.array([[1, 2], [1, 2], [4, 2]])
    Y = np.array([[0], [0], [0]])

    print(train_x.shape)
    print(test_x.shape)
    print(train_set_y_orig.shape)

    print(train_set_y_orig[0, 0:10])

    layers = [
        Dense(32, activation='relu'),
        Dense(5, activation='relu'),
        Dense(1, activation='sigmoid')
    ]

    print(len(layers))

    dnn = DNN(train_x, train_set_y_orig, layers, options)

    print(dnn.params.keys())

    #for param in sorted(dnn.params):
    #	print(param, dnn.params[param].shape)

    print(dnn)
    print(dnn.loss(dnn.predict(test_x), test_set_y_orig))

    dnn.train()
예제 #3
0
#print "mrse mean {0}".format(mrse(mean, target))
#print "rmse mean {0}".format(rmse(mean, target))

#for i in range(10):
#    d = matrix[i]
#    t = target[i]
#    pred = full.activate(d)
    #print "Prediction: {0} for data {1} target: {2}".format(pred, d, t)
#    print "Prediction: {0} for target: {1}".format(pred, t)

print "\n"

for i in range(10):
    d = matrix[i]
    t = target[i]
    pred = dnn.predict(d)
    #print "Prediction: {0} for data {1} target: {2}".format(pred, d, t)
    print "Prediction: {0} for target: {1}".format(pred, t)


#####
r = LinearRegression()
r.fit(matrix, target)
preds = [r.predict(d) for d in matrix]
print "mrse preds {0}".format(mrse(preds, target))
print "rmse preds {0}".format(rmse(preds, target))
for i in range(10):
    d = matrix[i]
    t = target[i]
    pred = r.predict(d)
    #print "Prediction: {0} for data {1} target: {2}".format(pred, d, t)
예제 #4
0
class MountainCar:
    """
    定义预测出来的模型
    """
    def __init__(self, name='Goodone', net=None, train=0):
        """
        初始化
        net: 训练的神经网络
        verity:使用还是验证阶段
        验证阶段,神经网络未训练
        使用阶段,神经网络已训练
        """
        self.env = gym.make("MountainCarContinuous-v0")
        self.name = name
        self.simulation_step = 0.1
        self.units = 50
        self.ratio = 200
        self.reset()
        if net:
            self.net = net
        else:
            self.net = DNN(1, 1, self.units, train=train, name=self.name)

    def save_samples(self, big_epis=100):
        """
        保存运行得到的数据
        得到的数据有big_epis*3000行
        """
        record = []
        for big_epi in range(big_epis):
            # 初始化
            # 为了能够达到目标点
            a = 0.0025
            change = 100
            observation = self.reset()
            for epi in range(10000):
                if epi % change == 0:
                    u = self.action_sample() * 3
                    print(big_epi, int(20 * epi / 3000) * '=')
                observation_old = observation.copy()
                observation, _, done, _ = self.env.step(u)
                target = self._get_target(observation_old, observation, u)
                x = observation_old[0]
                # 保存真实值和计算得到的值,后期作为比较
                # record.append([x, target, -a * math.cos(3 * x)])
                record.append([x, target])
        data = np.array(record)
        np.save(os.path.join(self.net.model_path0, 'memory.npy'), data)
        return data

    def verity_data(self):
        """
        验证数据集的正确性,画出两个自己计算出来的值和真实值的区别
        """
        import matplotlib.pyplot as plt
        import pandas as pd
        import seaborn as sns
        sns.set()

        self.data = self._load_data()
        data_size = len(self.data)
        indexs = np.random.choice(data_size, size=int(data_size / 10))
        df = pd.DataFrame(self.data[indexs, :],
                          columns=['position', 'target_dot', 'real_dot'])
        plt.figure()
        plt.scatter(df['position'],
                    df['target_dot'] * 1.1,
                    s=5,
                    label='target')  # 为了显示出区别乘以1.1
        plt.scatter(df['position'], df['real_dot'], s=5, label='real')
        plt.legend()
        plt.show()

    def train_model(self):
        """
        利用得到的数据对模型进行训练,首先对数据进行缩放,之后利用神经网络进行拟合
        """
        # 训练
        data = self._load_data()
        data[:, 1:] = data[:, 1:] * self.ratio
        self.net.learn_data(data)
        self.net.store_net()

    def verity_net_1(self):
        """
        验证神经网络的正确性
        """

        a = 0.0025
        x_ = np.arange(-1.1, 0.5, 0.001)
        y_tru = -a * np.cos(3 * x_)
        y_pre = self.net.predict(x_.reshape((-1, 1))) / self.ratio
        # 验证对所有的x的拟合情况
        fig = plt.figure()
        plt.plot(x_, y_tru, label='x_tru')
        plt.plot(x_, y_pre, label='x_pre')
        plt.legend()

        y_tru_dot = 3 * a * np.sin(3 * x_)
        y_pre_dot = self.net.predict_dot(x_.reshape(
            (-1, 1)))[:, 0] / self.ratio
        # y_pre_dot = self.net.predict_dot(x_.reshape((-1, 1)))[:, 0]
        # 验证对所有的x_dot的拟合情况
        fig = plt.figure()
        plt.plot(x_, y_tru_dot, label='x_dot_tru')
        plt.plot(x_, y_pre_dot, label='x_dot_pre')
        plt.legend()

        plt.show()

    def verity_net_2(self):
        """
        验证神经网络的正确性2
        与真实系统的的比较
        """
        observation_record = []
        observation_record_net = []
        time_record = []
        observation = self.reset()
        observation_net = observation

        change = 100
        time = 0
        epi = 0
        while True:
            observation_record.append(observation)
            observation_record_net.append(observation_net)
            time_record.append(time)
            if epi % change == 0:
                action = self.action_sample() * 3
            epi += 1
            observation, _, done, info = self.env.step(action)
            observation_net, _, done_net, info_net = self.step(action)
            time += self.simulation_step
            print(observation, observation_net)
            if done_net:
                break

        observation_record = np.array(observation_record)
        observation_record_net = np.array(observation_record_net)
        time_record = np.array(time_record)

        plt.figure(1)
        plt.plot(time_record, observation_record[:, 0], label='x_ture')
        plt.plot(time_record, observation_record_net[:, 0], label='x_pre')
        plt.xlabel('Time(s)')
        plt.ylabel('Xposition')
        plt.plot(time_record, 0.45 * np.ones(len(observation_record)), 'r')
        plt.legend()

        plt.figure(2)
        plt.plot(time_record, observation_record[:, 1], label='v_ture')
        plt.plot(time_record, observation_record_net[:, 1], label='v_pre')
        plt.xlabel('Time(s)')
        plt.ylabel('Vspeed')
        plt.legend()
        plt.show()

    def _load_data(self):
        """
        将最开始得到的数据读取出来
        :return:
        """
        data = np.load(os.path.join(self.net.model_path0, 'memory.npy'))
        return data

    def action_sample(self):
        """
        随机选取符合环境的动作
        """
        return self.env.action_space.sample()

    def reset(self):
        """
        利用原始问题的初始化,随机初始化
        """
        self.state = self.env.reset()
        return self.state

    def step(self, action):
        """
        利用神经网络进行模型辨识
        """
        action = min(max(action, -1.0), 1.0)
        x, v = self.state
        # 神经网络得到的导数
        dot = self.get_dot(self.state)
        v_dot = 0.0015 * action + dot[0]
        v = v + v_dot * self.simulation_step
        v = min(max(v, -0.07), 0.07)

        # 通过v计算x
        x = x + self.simulation_step * v
        x = min(max(x, -1.2), 0.6)
        X = np.array([x, v])
        if X.ndim == 2:
            X = X.reshape((2, ))
        self.state = X
        # 返回参数
        info = {}
        done = {}
        reward = {}
        if x >= 0.45:
            done = True
        return self.state, reward, done, info

    def step_true(self, action):
        """
        利用原进行模型辨识
        """
        action = min(max(action, -1.0), 1.0)
        x, v = self.state
        # 神经网络得到的导数
        # dot = self.get_dot(self.state)
        v_dot = 0.0015 * action - 0.0025 * math.cos(3 * x)
        v = v + v_dot * self.simulation_step
        v = min(max(v, -0.07), 0.07)

        # 通过v计算x
        x = x + self.simulation_step * v
        x = min(max(x, -1.2), 0.6)
        X = np.array([x, v])
        if X.ndim == 2:
            X = X.reshape((2, ))
        self.state = X
        # 返回参数
        info = {}
        done = {}
        reward = {}
        if x >= 0.45:
            done = True
        return self.state, reward, done, info

    def get_dot(self, X):
        return self.net.predict(X[0:1])[0] / self.ratio

    def get_dot2(self, X):
        return self.net.predict_dot(X[0:1])[0] / self.ratio

    def _get_target(self, X, X_new, u):
        """
        得到神经网络需要的真实值
        首先求真实的导数,之后计算真实值
        """
        u = min(max(u, -1.0), 1.0)
        return (((X_new - X) / self.simulation_step)[1] - u * 0.0015)
예제 #5
0
class DQN:
  def __init__(self, num_actions, observation_shape, dqn_params, cnn_params, folder):
    self.num_actions = num_actions
    self.observation_shape= observation_shape
    self.cnn_params = cnn_params
    self.folder = folder
    self.epsilon = dqn_params['epsilon']
    self.gamma = dqn_params['gamma']
    self.mini_batch_size = dqn_params['mini_batch_size']
    self.time_step = 0
    self.decay_rate = dqn_params['decay_rate']
    self.epsilon_min = dqn_params['epsilon_min']
    self.current_epsilon = self.epsilon

    self.use_ddqn = dqn_params['use_ddqn']
    self.print_obs = dqn_params['print_obs']
    self.print_reward = dqn_params['print_reward']

    self.startTraining = False

    #memory for printing reward and observations  
    self.memory = deque(maxlen=1000)

    #PER memory
    self.per_memory = Memory(dqn_params['memory_capacity'])

    #initialize network
    self.model = DNN(folder, num_actions, observation_shape, cnn_params)
    print("model initialized")

    #extra network for Double DQN
    if self.use_ddqn == 1:
        self.target_model = CNN(folder, num_actions, observation_shape, cnn_params)

  def select_action(self, observation, iterations):
    #epislon decay
    if(iterations%1000==0):
        self.current_epsilon = self.epsilon_min + (self.epsilon - self.epsilon_min) * np.exp(-self.decay_rate * self.time_step)
        self.time_step += 1
        #ue.log('Trainable TF conv variables: ' + str(self.model.conv_kernels))
        
    if random.random() < self.current_epsilon:
      # with epsilon probability select a random action 
      action = np.random.randint(0, self.num_actions)
    else:
      # select the action a which maximizes the Q value
      obs = np.array([observation])
      q_values = self.model.predict(obs)
      action = np.argmax(q_values)

    return action

  def update_state(self, action, observation, new_observation, reward, done):
    #this an experience that we save in memory
    transition = {'action': action,
                  'observation': observation,
                  'new_observation': new_observation,
                  'reward': reward,
                  'is_done': done}
    #memory for observation and reward saving for outprints
    self.memory.append(transition)
    #PER memory for observation used to train
    self.per_memory.store(transition)

  def train_step(self):
    """
    Updates the model based on the mini batch from PER
    """
    if self.startTraining == True:
      tree_idx, mini_batch  = self.per_memory.sample(self.mini_batch_size)
      
      new_states = np.zeros((self.mini_batch_size, self.observation_shape[0]))
      old_states = np.zeros((self.mini_batch_size, self.observation_shape[0]))
      actionsBatch = []

      for i in range(self.mini_batch_size):
        new_states[i] = mini_batch[i]['new_observation']
        old_states[i] = mini_batch[i]['observation']
        actionsBatch.append(mini_batch[i]['action'])
        
      target = self.model.predict(old_states)
      target_old = np.array(target)
      target_next = self.model.predict(new_states)
      target_val = 0
      if self.use_ddqn:
        target_val = self.target_model.predict(new_states)
      
      Xs = []
      ys = []
      actions = []

      for i in range(self.mini_batch_size):
        y_j = mini_batch[i]['reward']
        
        if not mini_batch[i]['is_done']:
          q_new_values = target_next[i]

          action = np.max(q_new_values)
          actionIndex = np.argmax(q_new_values)

          
          if self.use_ddqn == 1:
            y_j += self.gamma*target_val[i][actionIndex]
          else:
            y_j += self.gamma*target_next[i][actionIndex]
        

        action = np.zeros(self.num_actions)
        action[mini_batch[i]['action']] = 1

        observation = mini_batch[i]['observation']

        Xs.append(observation.copy())
        ys.append(y_j)
        actions.append(action.copy())

      #Seting up for training
      Xs = np.array(Xs)
      ys = np.array(ys)
      actions = np.array(actions)

      #Updateing PER bintree
      indices = np.arange(self.mini_batch_size, dtype=np.int32)
      
      actionsInt = np.array(actionsBatch, dtype=int)
      
      absolute_errors = np.abs(target_old[indices, actionsInt]-ys[indices])
      
      # Update priority
      self.per_memory.batch_update(tree_idx, absolute_errors)

      self.model.train_step(Xs, ys, actions)


      self.model.train_step(Xs, ys, actions)

  def saveBatchReward(self, iterations):
    
    #Need this on set iteration update
    #self.target_model = CNN(self.folder, self.num_actions, self.observation_shape, self.cnn_params)

    os = ""
    r = 0
    it = iterations/1000
    index = 0
    if self.print_obs == 1 or self.print_reward == 1:
        for x in range(len(self.memory)):
            t = self.memory[x]
            r += t['reward']
            #For writing observations to file to use to calculate means and standarddiviations
            if self.print_obs == 1 and iterations > 1:
                for obs in t['observation']:
                    os += str(obs) + ","
                os += "\n"
        if self.print_reward == 1:
            file = self.model.model_directory + "/plot.txt"
            try:
                f = open(file, "r")
                lines = f.read().splitlines()
                last_line = lines[-1]
                
                index = int(str(last_line.split(",")[0])) + 1
                
                f.close()
            except:
                index = 1
            ue.log("Saved: " + str(index) +  ", Reward: " + str(r) + ", Epislon: " + str(self.current_epsilon))
            f = open(file, "a+")
            f.write(str(index)+ "," + str(r) + "\n")
            f.close()

        #For writing observations to file to use to calculate means and standarddiviations
        if self.print_obs == 1:
            f = open(self.model.model_directory + "/observations.txt", "a+")
            f.write(os)
            f.close
    pass
예제 #6
0
     # 训练模式
     X = memory_norm[:, [0, 1]].copy()
     Y = memory_norm[:, 2:].copy()
     losses = []
     for i in range(40000):
         sample_index = np.random.choice(len(X), size=500)
         batch_x = X[sample_index, :]
         batch_y = Y[sample_index, :]
         loss, mae = net.learn(batch_x, batch_y)
         losses.append(loss)
         print(i + 1, '射程预测平均误差是', mae * 100, 'km')
     plt.plot(losses)
     sample_index = np.random.choice(len(X), size=100)
     batch_x = X[sample_index, :]
     batch_y = Y[sample_index, :]
     batch_y_pre = net.predict(batch_x)
     error_y = net.unorm(np.hstack((batch_x, batch_y - batch_y_pre)))[:, -1]
     print(np.mean(np.abs(error_y)))
     net.store()
     plt.show()
 else:
     X = memory_norm[:, [0, 1]].copy()
     Y = memory_norm[:, 2:].copy()
     tes_num = 100
     sample_index = np.random.choice(len(X), size=tes_num)
     batch_x = X[sample_index, :]
     batch_y = Y[sample_index, :]
     batch_y_pre = net.predict(batch_x)
     for i in range(tes_num):
         print(
             "第%d个数据,w = %f,v = %f,range_real = %f,range_pre = %f,delta_range = %f"
예제 #7
0
def train(data, path):
    os.system("mkdir -p " + path)
    x_train, y_train = data['x'], data['y']
    #pdb.set_trace()
    x = tf.placeholder(tf.float32, [None, 11, input_dim], name='x')

    y = tf.placeholder(tf.float32, [None, input_dim], name='y')

    model = DNN()
    loss = model.loss(x, y)
    pred = model.predict(x)
    tf.add_to_collection('pred', pred)
    tf.add_to_collection('loss', loss)

    optimize = tf.train.AdamOptimizer(learning_rate=5e-5,
                                      beta1=0.9,
                                      beta2=0.999).minimize(loss)
    merged = tf.summary.merge_all()

    with tf.Session() as sess:
        start = time.time()
        init = tf.global_variables_initializer()
        sess.run(init)
        saver = tf.train.Saver()
        writer = tf.summary.FileWriter(path + 'logs', sess.graph)

        err_old = 100
        for i in range(iter_num):
            err_new = 0
            count = 0
            for j in range(len(x_train)):
                idx = random.randint(0, len(x_train) - 1)
                #pdb.set_trace()
                xt = next_batch(x_train[idx])
                yt = y_train[idx % len(y_train)][5:-5, :]
                k = 0
                for k in range(0, len(xt), batch_num):
                    xb = xt[k:k + batch_num]
                    yb = yt[k:k + batch_num]
                    _ = sess.run([optimize], feed_dict={x: xb, y: yb})
                xb = xt[k:]
                yb = yt[k:]
                err, result = sess.run([loss, merged],
                                       feed_dict={
                                           x: xb,
                                           y: yb
                                       })
                err_new += err
                count += 1
                if j % 100 == 0:
                    writer.add_summary(result, len(x_train) * i + j)
            if err_new / count < err_old:
                err_old = err_new / count
                saver.save(sess, path + 'test_best_model')
                #print('Epoch [%4d] Iter [%4d] Time [%5.4f] \nLoss: [%.4f]' %
                #  (i+1, j, time.time() - start, err))
                print('Epoch [%4d] Time [%10.4f] Loss: [%.4f]: Saved ' %
                      (i + 1, time.time() - start, err_new / count))
            else:
                print('Epoch [%4d] Time [%5.4f] Loss: [%.4f]: No save ' %
                      (i + 1, time.time() - start, err_new / count))
예제 #8
0
    opts.W_init = W_init

print "Build DNN structure..."
dnn = DNN(opts)

# training
print "Start DNN training...(lr_decay = %s)" % (str(opts.lr_decay))

acc_all = []
lr = opts.learning_rate
ts = time.time()
for i in range(opts.epoch):

    dnn.train(X_train, Y_train, opts.batch_size, lr)

    acc = np.mean(np.argmax(Y_valid, axis=1) == dnn.predict(X_valid))
    acc_all.append(acc)

    print "Epoch %d, lr = %.4f, accuracy = %f" % (i + 1, lr, acc)

    # dump intermediate model and log per 100 epoch
    if (np.mod((i + 1), 100) == 0):
        model_filename = os.path.join(opts.model_dir,
                                      "epoch%d.model" % (i + 1))
        dnn_save_model(model_filename, dnn)

        log_filename = '../log/%s.log' % parameters
        print "Save %s" % log_filename
        np.savetxt(log_filename, acc_all, fmt='%.7f')

    lr *= opts.lr_decay