def build_model(self): model = Sequential() model.add( Conv2D(24, (3, 3), activation='relu', input_shape=(MEMORY_FRAMES, 80, 80), data_format='channels_first')) model.add(MaxPooling2D((2, 2), data_format='channels_first')) model.add( Conv2D(32, (3, 3), activation='relu', data_format='channels_first')) model.add(MaxPooling2D((2, 2), data_format='channels_first')) model.add(Flatten()) model.add(Dense(256, activation='relu')) model.add(Dense(256, activation='relu')) model.add(Dense(256, activation='relu')) model.add(Dense(self.action_size, activation='linear')) adam = Adam(lr=0.0001) model.compile(loss='mse', optimizer=adam) model.summary() # needed to use TF+keras in multiple threads model._make_predict_function() model._make_train_function() self.graph = tf.get_default_graph() return model
def test_pickling_right_after_compilation(): model = Sequential() model.add(Dense(2, input_shape=(3, ))) model.add(Dense(3)) model.compile(loss='mse', optimizer='sgd', metrics=['acc']) model._make_train_function() model = pickle.loads(pickle.dumps(model))
def test_pickling_right_after_compilation(): model = Sequential() model.add(Dense(2, input_shape=(3,))) model.add(Dense(3)) model.compile(loss='mse', optimizer='sgd', metrics=['acc']) model._make_train_function() model = pickle.loads(pickle.dumps(model))
def test_saving_right_after_compilation(): model = Sequential() model.add(Dense(2, input_shape=(3, ))) model.add(Dense(3)) model.compile(loss='mse', optimizer='sgd', metrics=['acc']) model._make_train_function() _, fname = tempfile.mkstemp('.h5') save_model(model, fname) model = load_model(fname) os.remove(fname)
def test_saving_right_after_compilation(): model = Sequential() model.add(Dense(2, input_shape=(3,))) model.add(Dense(3)) model.compile(loss='mse', optimizer='sgd', metrics=['acc']) model._make_train_function() _, fname = tempfile.mkstemp('.h5') save_model(model, fname) model = load_model(fname) os.remove(fname)
model.add(Flatten()) model.add(Dense(512)) model.add(Activation('relu')) model.add(Dropout(0.5)) # model.add(Dense(nb_classes)) model.add(Dense(nb_classes, kernel_initializer='zero', activation=masked_softmax)) # Define our training protocol protocol_name, protocol = protocols.PATH_INT_PROTOCOL(omega_decay='sum', xi=1e-3 ) opt = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999) # opt = RMSprop(lr=1e-3) # opt = SGD(1e-3) oopt = KOOptimizer(opt, model=model, **protocol) model.compile(loss='categorical_crossentropy', optimizer=oopt, metrics=['accuracy']) model._make_train_function() history = LossHistory() callbacks = [history] datafile_name = "split_cifar10_data_%s_lr%.2e_ep%i.pkl.gz"%(protocol_name, learning_rate, epochs_per_task) def run_fits(cvals, training_data, valid_data, nstats=1): acc_mean = dict() acc_std = dict() for cidx, cval_ in enumerate(cvals): runs = [] for runid in range(nstats): evals = [] sess.run(tf.global_variables_initializer())
class ADQN: def __init__(self, n_nodes, node, n_port): #n_nodes = 总路由器数 node = 路由器编号 n_port = 这个路由器有多少个port self.n_nodes = n_nodes self.node = node self.n_port = n_port with graph.as_default(): self.model = Sequential() self.model.add( Dense(16, activation='relu', input_shape=(self.n_nodes + 1, ))) self.model.add(Dropout(0.2)) self.model.add(Dense(8, activation='relu')) self.model.add(Dropout(0.1)) self.model.add(Dense(n_port, activation='linear')) self.model.compile(loss='mean_squared_error', optimizer=RMSprop(lr=0.01), metrics=['accuracy']) a = random.randint(1, self.n_nodes) state_input = to_categorical(a, num_classes=self.n_nodes + 1) #state_input[0] = 1 state_input_array = np.array(state_input) state_input_array = state_input_array.reshape(1, self.n_nodes + 1) self.model.predict(state_input_array) self.model._make_train_function() weight_name = str(self.node) + '_weights.h5' self.model.save_weights(weight_name) with graph.as_default(): self.target_model = Sequential() self.target_model.add( Dense(16, activation='relu', input_shape=(self.n_nodes + 1, ))) self.target_model.add(Dropout(0.2)) self.target_model.add(Dense(8, activation='relu')) self.target_model.add(Dropout(0.1)) self.target_model.add(Dense(n_port, activation='linear')) self.target_model.compile(loss='mean_squared_error', optimizer=RMSprop(lr=0.01), metrics=['accuracy']) a = random.randint(1, self.n_nodes) state_input = to_categorical(a, num_classes=self.n_nodes + 1) #state_input[0] = 1 state_input_array = np.array(state_input) state_input_array = state_input_array.reshape(1, self.n_nodes + 1) self.target_model.predict(state_input_array) def estimate(self, dest, receive_port, epsilon): #返回两个值 第一个值是最小Q值的动作(port), 第二个是epsilon贪婪算法的动作(port) state_input = to_categorical(dest, num_classes=self.n_nodes + 1) #state_input[0] = 1 state_input_array = np.array(state_input) state_input_array = state_input_array.reshape(1, self.n_nodes + 1) with graph.as_default(): Q_estimate_array = self.target_model.predict(state_input_array) Q_estimate_list = Q_estimate_array.tolist() #print('node',self.node) #i = 0 while True: Q_min_port = Q_estimate_list[0].index(min(Q_estimate_list[0])) + 1 if Q_min_port != receive_port: break else: index = Q_estimate_list[0].index(min(Q_estimate_list[0])) Q_estimate_list[0][index] = max(Q_estimate_list[0]) + 1 #print('node',self.node,'length = Q _es',len(Q_estimate_list[0])) p_greedy = 1 - epsilon + epsilon / len(Q_estimate_list[0]) p_not_greedy = epsilon / len(Q_estimate_list[0]) p_list = [] for i in range(1, self.n_port + 1): if i != Q_min_port: p_list.append(p_not_greedy) else: p_list.append(p_greedy) #print(p_list) # asd =0 # for i in range(self.n_port): # asd += p_list[i] # print(self.node,Q_estimate_list,Q_min_port,dest,asd) p = np.array(p_list) port_list = [] for i in range(self.n_port): port_list.append(i + 1) while True: Q_egreddy_port = np.random.choice(port_list, p=p.ravel()) if Q_egreddy_port != receive_port: break return Q_min_port, Q_egreddy_port, Q_estimate_list[0] def target(self, dest, MinQ_port_eval): #返回target网络的最小值(价值评估) weight_name = str(self.node) + '_weights.h5' with graph.as_default(): self.target_model.load_weights(weight_name) state_input = to_categorical(dest, num_classes=self.n_nodes + 1) # state_input[0] = 1 state_input_array = np.array(state_input) state_input_array = state_input_array.reshape(1, self.n_nodes + 1) with graph.as_default(): Q_estimate_array = self.target_model.predict(state_input_array) Q_estimate_list = Q_estimate_array.tolist() Q_min_actual = Q_estimate_list[0][MinQ_port_eval - 1] return Q_min_actual def learn(self, sample_list): state_list = [] port_list = [] reward_list = [] MinQ_list = [] Q_eval_list = [] Q_actual_list = [] impotance_sampling_list = [] for i in range(len(sample_list)): state_list.append(sample_list[i][0]) port_list.append(sample_list[i][1]) reward_list.append(sample_list[i][2]) Q_eval_list.append(sample_list[i][3]) MinQ_list.append(sample_list[i][4]) impotance_sampling_list.append((sample_list[i][5])) for i in range(len(sample_list)): Q_actual_list.append(reward_list[i] + 0.9 * MinQ_list[i]) state_input = to_categorical(state_list, num_classes=self.n_nodes + 1) # for i in range(len(state_input)): # state_input[i][0] = 1 state_input_array = np.array(state_input) state_input_array = state_input_array.reshape(len(state_list), self.n_nodes + 1) label_list = Q_eval_list for i in range(len(sample_list)): change_port = port_list[i] change_port_index = change_port - 1 label_list[i][change_port_index] = Q_actual_list[i] label_list_array = np.array(label_list) label_list_array = label_list_array.reshape(len(state_list), len(Q_eval_list[0])) sample_weights = np.array(impotance_sampling_list) with graph.as_default(): #print(self.node,'learning start') #reduce_lr = ReduceLROnPlateau(monitor='loss',factor=0.1, patience=2, mode='auto') self.model.fit( np.array(state_input_array[0]).reshape(1, self.n_nodes + 1), np.array(label_list_array[0]).reshape(1, len(Q_eval_list[0])), batch_size=1, epochs=5, verbose=0, sample_weight=np.atleast_1d(sample_weights[0])) #print(self.node,'learning end') for i in range(1, len(state_input)): label = self.model.predict( np.array(state_input_array[i]).reshape(1, self.n_nodes + 1)) # if self.node == 1: # print(state_input_array[i].reshape(1,self.n_nodes+1)) # print('ex label',label) change_port = port_list[i] change_port_index = change_port - 1 label[0][change_port_index] = Q_actual_list[i] label = np.array(label) label = label.reshape(1, len(Q_eval_list[0])) # if self.node == 1: # print('af label',label) #reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.1, patience=2, mode='auto') self.model.fit( np.array(state_input_array[i]).reshape(1, self.n_nodes + 1), label, batch_size=1, epochs=5, verbose=0) #sample_weight=np.atleast_1d(sample_weights[i])) # if self.node == 1: # print('after learning') # test_list = [] # for i in range(1,self.n_nodes+1): # if i != self.node: # test_list.append(i) # state_input = to_categorical(test_list, num_classes=self.n_nodes+1) # # for i in range(len(test_list)): # # state_input[i][0] = 1 # state_input_array = np.array(state_input) # state_input_array = state_input_array.reshape(len(state_input), self.n_nodes+1) # Q_estimate_array = self.model.predict(state_input_array) # print(state_input_array) # print(Q_estimate_array) def update_network(self, beta): weight_name = str(self.node) + '_weights.h5' self.model.save_weights(weight_name) if beta > 0.1: beta -= 0.3 return beta def update_epsilon(self, epsilon): if epsilon > 0.1: epsilon -= 0.2 return epsilon def show_routing_table(self): weight_name = str(self.node) + '_weights.h5' with graph.as_default(): self.target_model.load_weights(weight_name) test_list = [] for i in range(1, self.n_nodes + 1): if i != self.node: test_list.append(i) for i in range(len(test_list)): state_input = to_categorical(test_list[i], num_classes=self.n_nodes + 1) state_input[0] = 1 state_input_array = np.array(state_input) state_input_array = state_input_array.reshape(1, self.n_nodes + 1) with graph.as_default(): Q_estimate_array = self.target_model.predict(state_input_array) print(state_input_array) print(Q_estimate_array)