class Agent(object): def __init__(self, name='model', input_num=None, output_num=None): """A learning agent that uses tensorflow to create a neural network""" assert input_num is not None assert output_num is not None self.input_num = input_num self.output_num = output_num self._build_net() def _build_net(self): """Construct the neural network""" # Change the network structure here S = Input(shape=[self.input_num]) h0 = Dense(300, activation="sigmoid")(S) h1 = Dense(600, activation="sigmoid")(h0) h2 = Dense(29, activation="sigmoid")(h1) V = Dense(self.output_num, activation="sigmoid")(h2) self.model = Model(inputs=S, outputs=V) self.model.compile(optimizer="adam", loss='mse') def train(self, x, y, n_epoch=100, batch=32): """Train the network""" self.model.fit(x=x, y=y, epochs=n_epoch, batch_size=batch) def predict(self, x): """Input values to the neural network and return the result""" a = self.model.predict(x) return a
def test_add_entropy_loss_on_functional_model(self): inputs = Input(shape=(1, )) targets = Input(shape=(1, )) outputs = testing_utils.Bias()(inputs) model = Model([inputs, targets], outputs) model.add_loss(losses.binary_crossentropy(targets, outputs)) model.compile('sgd', run_eagerly=testing_utils.should_run_eagerly()) with test.mock.patch.object(logging, 'warning') as mock_log: model.fit([self.x, self.y], batch_size=3, epochs=5) self.assertNotIn('Gradients do not exist for variables', str(mock_log.call_args))
class NN: """ The NN class wraps a keras Sequential model to reduce the interface methods Notice: Difference to dqn is just the setter and getter methods for the weights """ def __init__(self, env, atoms, alpha: float = 0.001, decay: float = 0.0001): """ We initialize our functional model, therefore we need Input Shape and Output Shape :param env: :param alpha: :param decay: """ self.alpha = alpha self.decay = decay self.model = None self.atoms = atoms # new to D-DDQN self.init_model(env.observation_space.shape[0], env.action_space.n) def init_model(self, input_shape: int, n_actions: int): """ Initializing our keras sequential model :return: initialized model """ input = Input(shape=(input_shape,)) h1 = Dense(64, activation='relu')(input) h2 = Dense(64, activation='relu')(h1) outputs = [] for _ in range(n_actions): outputs.append(Dense(self.atoms, activation='softmax')(h2)) self.model = Model(input, outputs) def predict(self, *args, **kwargs): """ By wrapping the keras predict method we can handle our net as a standalone object :param args: interface to keras.model.predict :return: prediction """ return self.model.predict(*args, **kwargs) def fit(self, *args, **kwargs): """ By wrapping the keras fit method we can handle our net as a standalone object :param args: interface to keras.model.fit :return: history object """ return self.model.fit(*args, **kwargs) def get_weights(self): """ Passing the arguments to keras get_weights """ return self.model.get_weights() def set_weights(self, *args, **kwargs): """ Passing the arguments to keras set_weights """ self.model.set_weights(*args, *kwargs)
def test_loss_with_sample_weight_in_layer_call(self): class MyLayer(layers.Layer): def __init__(self): super(MyLayer, self).__init__() self.bias = testing_utils.Bias() def call(self, inputs): out = self.bias(inputs[0]) self.add_loss(MAE()(inputs[1], out, inputs[2])) self.add_loss( math_ops.reduce_mean(inputs[2] * mae(inputs[1], out))) return out inputs = Input(shape=(1, )) targets = Input(shape=(1, )) sw = Input(shape=(1, )) outputs = MyLayer()([inputs, targets, sw]) model = Model([inputs, targets, sw], outputs) model.predict([self.x, self.y, self.w]) model.compile(optimizer_v2.gradient_descent.SGD(0.05), run_eagerly=testing_utils.should_run_eagerly(), experimental_run_tf_function=testing_utils. should_run_tf_function()) history = model.fit([self.x, self.y, self.w], batch_size=3, epochs=5) self.assertAllClose(history.history['loss'], [2., 1.8, 1.6, 1.4, 1.2], 1e-3) output = model.evaluate([self.x, self.y, self.w]) self.assertAlmostEqual(output, 1.0, 3) output = model.test_on_batch([self.x, self.y, self.w]) self.assertAlmostEqual(output, 1.0, 3)
def fronzen(): x = Input(shape=(32, )) layer = Dense(32) layer.trainable = False y = layer(x) frozen_model = Model(x, y) # 在下面的模型中,训练期间不会更新层的权重 frozen_model.compile(optimizer='rmsprop', loss='mse') layer.trainable = True trainable_model = Model(x, y) # 使用这个模型,训练期间 `layer` 的权重将被更新 # (这也会影响上面的模型,因为它使用了同一个网络层实例) trainable_model.compile(optimizer='rmsprop', loss='mse') frozen_model.fit(data, labels) # 这不会更新 `layer` 的权重 trainable_model.fit(data, labels) # 这会更新 `layer` 的权重
def train_model(model: Model, images): time_str = datetime.datetime.now().strftime("%y-%m-%d_%H-%M-%S") callbacks = [ keras.callbacks.TensorBoard(f'{LOGS_DIR}{time_str}'), TensorBoardImage(f'{LOGS_DIR}{time_str}', "Emojis", images, period=100), CheckpointCallback(f'{LOGS_DIR}{time_str}', period=100), ] model.fit( images, images, epochs=100000, batch_size=len(images), # validation_data=(images, images), callbacks=callbacks, verbose=0) model.save(f"../logs/{time_str}/model.h5")
def attn_many_to_one(dataset_object: LSTM_data): X_train, X_test, Y_train, Y_test = dataset_object.get_memory() X_train, X_test = X_train[:, :, :-12], X_test[:, :, :-12] i = Input(shape=(X_train.shape[1], X_train.shape[2])) att_in = LSTM(NEURONS, return_sequences=True, activation=ACTIVATION, recurrent_activation="sigmoid", activity_regularizer=regularizers.l2(L2), bias_regularizer=regularizers.l2(BIAIS_REG), )(i) att_in = LSTM(NEURONS, return_sequences=True, activation=ACTIVATION, recurrent_activation="sigmoid", activity_regularizer=regularizers.l2(L2), bias_regularizer=regularizers.l2(BIAIS_REG), )(att_in) att_in = LSTM(NEURONS, return_sequences=True, activation=ACTIVATION, recurrent_activation="sigmoid", activity_regularizer=regularizers.l2(L2), bias_regularizer=regularizers.l2(BIAIS_REG), )(att_in) att_out = attention()(att_in) att_out = Dropout(DROPOUT)(att_out) outputs = Dense(1, activation='relu', trainable=True, bias_regularizer=regularizers.l2(BIAIS_REG), activity_regularizer=regularizers.l2(L2) )(att_out) model = Model(inputs=[i], outputs=[outputs]) optim = Adam() model.compile(optimizer=optim, loss=['mean_squared_error'] ) # Fitting the RNN to the Training set history = model.fit(X_train, Y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_data=(X_test, Y_test), callbacks=[EARLY_STOP, REDUCE_LR] ) model.save("data/weights/attn_based_lstm_no_senti") plot_train_loss(history) evaluate(model,X_test,Y_test, dataset_object,name="attn_evaluate", senti="no")
def run_model(model_fn, optimizer='adam', loss='binary_crossentropy', steps_per_epoch=1, epochs=1): inputs, outputs = model_fn(128, 128, 1) _model = Model(inputs=[inputs], outputs=[outputs]) _model.compile(optimizer=optimizer, loss=loss) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # Write the session graph to the logs. tf.summary.FileWriter('./logs/', sess.graph) _model.fit(TensorFeed().build_dataset().dataset, steps_per_epoch=steps_per_epoch, epochs=epochs) return _model
def test_loss_on_model_fit(self): inputs = Input(shape=(1,)) targets = Input(shape=(1,)) outputs = testing_utils.Bias()(inputs) model = Model([inputs, targets], outputs) model.add_loss(MAE()(targets, outputs)) model.add_loss(math_ops.reduce_mean(mae(targets, outputs))) model.compile( optimizer_v2.gradient_descent.SGD(0.05), run_eagerly=testing_utils.should_run_eagerly()) history = model.fit([self.x, self.y], batch_size=3, epochs=5) self.assertAllClose(history.history['loss'], [2., 1.8, 1.6, 1.4, 1.2], 1e-3)
def test_loss_with_sample_weight_on_model_fit(self): inputs = Input(shape=(1,)) targets = Input(shape=(1,)) sw = Input(shape=(1,)) outputs = testing_utils.Bias()(inputs) model = Model([inputs, targets, sw], outputs) model.add_loss(MAE()(targets, outputs, sw)) model.add_loss(3 * math_ops.reduce_mean(sw * mae(targets, outputs))) model.compile( optimizer_v2.gradient_descent.SGD(0.025), run_eagerly=testing_utils.should_run_eagerly()) history = model.fit([self.x, self.y, self.w], batch_size=3, epochs=5) self.assertAllClose(history.history['loss'], [4., 3.6, 3.2, 2.8, 2.4], 1e-3)
def train_model(model: Model, epochs: int) -> dict: model.compile(optimizer='adam', loss='mean_squared_error', metrics=[metrics.CategoricalAccuracy(), metrics.Recall(), metrics.Precision()]) y_train = to_categorical(globals()['TRAINING_LABELS']) y_validate = to_categorical(globals()['VALIDATION_LABELS']) print(y_train.shape) history = model.fit(x=globals()['TRAINING_DATA'], y=y_train, batch_size=32, epochs=epochs, validation_data=(globals()['VALIDATION_DATA'], y_validate), validation_batch_size=32, validation_freq=1) return history.history
def train_autoencoder(x: np.ndarray, cfg: dict, autoencoder: Model) -> History: r"""Train an already built AE model on new data. :param x: The data the AE shall be trained on. :param cfg: ConfigurationSpace values that were used to construct this AE. :param autoencoder: The constructed AE model. :return: The training history. """ callbacks = None if cfg['ae_type'] == 'deep_ksparse': callbacks = [UpdateKSparseLevel()] return autoencoder.fit(x, x, callbacks=callbacks, epochs=cfg['epochs'], batch_size=BATCH_SIZE)
class GenericModel: @staticmethod def load_from(path): model = GenericModel() model.model = load_model(path) return model def __init__(self): self.model = None self.registered_callbacks = [] self.id = 'generic_model' self.time = round(time()) self.desc = None """config = ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.40 config.gpu_options.allow_growth = True session = InteractiveSession(config=config)""" def build_model(self): img_input = Input(self.get_input_shape()) last_layer = self.model_structure(img_input) self.model = Model(img_input, last_layer) self.model.summary() def compile(self, loss_function, metric_functions=None, optimizer=Adam(1e-3, epsilon=1e-6)): self.require_model_loaded() return self.model.compile(loss=loss_function, optimizer=optimizer, metrics=metric_functions) def model_structure(self, input_img): raise NotImplementedError def get_input_shape(self): raise NotImplementedError def register_std_callbacks(self, tensorboard_logs_folder=None, checkpoint_path=None): self.require_model_loaded() run_id = str(time()) if self.desc is not None: run_id += "_" + self.desc folder_id = os.path.join(self.id, run_id) if tensorboard_logs_folder is not None: self.registered_callbacks.append( TensorBoard(log_dir=os.path.join(tensorboard_logs_folder, folder_id), histogram_freq=0, write_graph=True, write_images=True)) if checkpoint_path is not None: store_path = os.path.join(checkpoint_path, folder_id) if not os.path.exists(store_path): os.makedirs(store_path) store_path = os.path.join( store_path, 'e{epoch:02d}-l{loss:.4f}-v{val_loss:.4f}.ckpt') print("Storing to %s" % store_path) self.registered_callbacks.append( ModelCheckpoint(store_path, monitor='val_loss', verbose=1, period=1, save_best_only=False, mode='min')) def train_with_generator(self, training_data_generator, epochs, steps_per_epoch, validation_data=None): self.model.fit(training_data_generator, use_multiprocessing=True, workers=4, steps_per_epoch=steps_per_epoch, callbacks=self.registered_callbacks, epochs=epochs, verbose=1, **({} if validation_data is None else { "validation_data": validation_data })) def require_model_loaded(self): if self.model is None: raise ValueError("Model is not build yet") def load_weights(self, path): self.require_model_loaded() return self.model.load_weights(path) def predict(self, batch): self.require_model_loaded() return self.model.predict(batch)
def train(review_data): ################################################################ # declare input embeddings to the model #User input user_id_input = Input(shape=[1], name='user') #Item Input item_id_input = Input(shape=[1], name='item') price_id_input = Input(shape=[1], name='price') title_id_input = Input(shape=[1], name='title') # define the size of embeddings as a parameter # ****H: size_of_embedding - 5, 10 , 15, 20, 50 size_of_embedding = 15 user_embedding_size = size_of_embedding item_embedding_size = size_of_embedding price_embedding_size = size_of_embedding title_embedding_size = size_of_embedding # apply an embedding layer to all inputs user_embedding = Embedding(output_dim=user_embedding_size, input_dim=users.shape[0], input_length=1, name='user_embedding')(user_id_input) item_embedding = Embedding(output_dim=item_embedding_size, input_dim=items_reviewed.shape[0], input_length=1, name='item_embedding')(item_id_input) price_embedding = Embedding(output_dim=price_embedding_size, input_dim=price.shape[0], input_length=1, name='price_embedding')(price_id_input) title_embedding = Embedding(output_dim=title_embedding_size, input_dim=titles.shape[0], input_length=1, name='title_embedding')(title_id_input) # reshape from shape (batch_size, input_length,embedding_size) to (batch_size, embedding_size). user_vecs = Reshape([user_embedding_size])(user_embedding) item_vecs = Reshape([item_embedding_size])(item_embedding) price_vecs = Reshape([price_embedding_size])(price_embedding) title_vecs = Reshape([title_embedding_size])(title_embedding) ################################################################ # Concatenate the item embeddings : item_vecs_complete = Concatenate()([item_vecs, price_vecs, title_vecs]) # Concatenate user and item embeddings and use them as features for the neural network: input_vecs = Concatenate()([user_vecs, item_vecs_complete ]) # can be changed by Multiply #input_vecs = Concatenate()([user_vecs, item_vecs]) # can be changed by Multiply # Multiply user and item embeddings and use them as features for the neural network: #input_vecs = Multiply()([user_vecs, item_vecs]) # can be changed by concat # Dropout is a technique where randomly selected neurons are ignored during training to prevent overfitting input_vecs = Dropout(0.1)(input_vecs) # Check one dense 128 or two dense layers (128,128) or (128,64) or three denses layers (128,64,32)) # First layer # Dense(128) is a fully-connected layer with 128 hidden units. # Use rectified linear units (ReLU) f(x)=max(0,x) as an activation function. x = Dense(128, activation='relu')(input_vecs) x = Dropout(0.1)(x) # Add droupout or not # To improve the performance # Next Layers #x = Dense(128, activation='relu')(x) # Add dense again or not x = Dense(64, activation='relu')(x) # Add dense again or not x = Dropout(0.1)(x) # Add droupout or not # To improve the performance x = Dense(32, activation='relu')(x) # Add dense again or not # x = Dropout(0.1)(x) # Add droupout or not # To improve the performance # The output y = Dense(1)(x) ################################################################ model = Model( inputs=[user_id_input, item_id_input, price_id_input, title_id_input], outputs=y) ################################################################ # ****H: loss # ****H: optimizer model.compile(loss='mse', optimizer="adam") ################################################################ save_path = "./" mytime = time.strftime("%Y_%m_%d_%H_%M") # modname = 'dense_2_15_embeddings_2_epochs' + mytime modname = 'dense_2_15_embeddings_2_epochs' thename = save_path + '/' + modname + '.h5' mcheck = ModelCheckpoint(thename, monitor='val_loss', save_best_only=True) ################################################################ # ****H: batch_size # ****H: epochs # ****H: # ****H: history = model.fit([ ratings_train["user_id"], ratings_train["item_id"], ratings_train["price_id"], ratings_train["title_id"] ], ratings_train["score"], batch_size=64, epochs=2, validation_split=0.2, callbacks=[mcheck], shuffle=True) print("MSE: ", history.history) return model
class jyHEDModelV1(jyModelBase): def __init__(self): super(jyHEDModelV1, self).__init__() self.__listLayerName = [] self.__pVisualModel = None def structureModel(self): Inputs = layers.Input(shape=self._inputShape, batch_size=self._iBatchSize) Con1 = layers.Conv2D(64, (3, 3), name='Con1', activation='relu', padding='SAME', input_shape=self._inputShape, strides=1)(Inputs) Con2 = layers.Conv2D(64, (3, 3), name='Con2', activation='relu', padding='SAME', strides=1)(Con1) Side1 = sideBranch(Con2, 1) MaxPooling1 = layers.MaxPooling2D((2, 2), name='MaxPooling1', strides=2, padding='SAME')(Con2) # outputs1 Con3 = layers.Conv2D(128, (3, 3), name='Con3', activation='relu', padding='SAME', strides=1)(MaxPooling1) Con4 = layers.Conv2D(128, (3, 3), name='Con4', activation='relu', padding='SAME', strides=1)(Con3) Side2 = sideBranch(Con4, 2) MaxPooling2 = layers.MaxPooling2D((2, 2), name='MaxPooling2', strides=2, padding='SAME')(Con4) # outputs2 Con5 = layers.Conv2D(256, (3, 3), name='Con5', activation='relu', padding='SAME', strides=1)(MaxPooling2) Con6 = layers.Conv2D(256, (3, 3), name='Con6', activation='relu', padding='SAME', strides=1)(Con5) Con7 = layers.Conv2D(256, (3, 3), name='Con7', activation='relu', padding='SAME', strides=1)(Con6) Side3 = sideBranch(Con7, 4) MaxPooling3 = layers.MaxPooling2D((2, 2), name='MaxPooling3', strides=2, padding='SAME')(Con7) # outputs3 Con8 = layers.Conv2D(512, (3, 3), name='Con8', activation='relu', padding='SAME', strides=1)(MaxPooling3) Con9 = layers.Conv2D(512, (3, 3), name='Con9', activation='relu', padding='SAME', strides=1)(Con8) Con10 = layers.Conv2D(512, (3, 3), name='Con10', activation='relu', padding='SAME', strides=1)(Con9) Side4 = sideBranch(Con10, 8) MaxPooling4 = layers.MaxPooling2D((2, 2), name='MaxPooling4', strides=2, padding='SAME')(Con10) # outputs4 Con11 = layers.Conv2D(512, (3, 3), name='Con11', activation='relu', padding='SAME', strides=1)(MaxPooling4) Con12 = layers.Conv2D(512, (3, 3), name='Con12', activation='relu', padding='SAME', strides=1)(Con11) Con13 = layers.Conv2D(512, (3, 3), name='Con13', activation='relu', padding='SAME', strides=1)(Con12) Side5 = sideBranch(Con13, 16) Fuse = layers.Concatenate(axis=-1)([Side1, Side2, Side3, Side4, Side5]) # learn fusion weight Fuse = layers.Conv2D(1, (1, 1), name='Fuse', padding='SAME', use_bias=False, activation=None)(Fuse) output1 = layers.Activation('sigmoid', name='output1')(Side1) output2 = layers.Activation('sigmoid', name='output2')(Side2) output3 = layers.Activation('sigmoid', name='output3')(Side3) output4 = layers.Activation('sigmoid', name='output4')(Side4) output5 = layers.Activation('sigmoid', name='output5')(Side5) output6 = layers.Activation('sigmoid', name='output6')(Fuse) outputs = [output1, output2, output3, output4, output5, output6] self._pModel = Model(inputs=Inputs, outputs=outputs) pAdam = optimizers.adam(lr=0.0001) self._pModel.compile(loss={ 'output6': classBalancedSigmoidCrossEntropy }, optimizer=pAdam) # self._pModel.summary() def startTrain(self, listDS, iMaxLen, iBatchSize): itrTrain = tf.compat.v1.data.make_one_shot_iterator(listDS[0]) itrValid = tf.compat.v1.data.make_one_shot_iterator(listDS[1]) iStepsPerEpochTrain = int(iMaxLen[0] / iBatchSize[0]) iStepsPerEpochValid = int(iMaxLen[1] / iBatchSize[1]) self._pModel.fit(itrTrain, validation_data=itrValid, epochs=self._iEpochs, callbacks=[self._pSaveModel, self._pTensorboard], steps_per_epoch=iStepsPerEpochTrain, validation_steps=iStepsPerEpochValid) def loadWeights(self, strPath): # last = tf.train.latest_checkpoint(strPath) # checkPoint = tf.train.load_checkpoint(strPath) self._pModel.load_weights(strPath) # visual model outputs = [] for myLayer in self._pModel.layers: self.__listLayerName.append(myLayer.name) outputs.append(myLayer.output) # print(self.__pModel.layers[0]) # self.__pVisualModel = Model(self.__pModel.inputs, outputs=outputs) self.__pVisualModel = Model(self._pModel.inputs, outputs=self._pModel.outputs) return self.__pVisualModel def predict(self, IMG): # pImage = open(IMG, 'rb').read() # tensorIMG = tf.image.decode_jpeg(pImage) pIMG = image.array_to_img(IMG)# .resize((256, 144)) tensorIMG = image.img_to_array(pIMG) x = np.array(tensorIMG / 255.0) # show image iColumn = 4 # generate window plt.figure(num='Input') # plt.subplot(1, 1, 1) plt.imshow(x) # imagetest = x x = np.expand_dims(x, axis=0) # pyplot.imshow(x) time1 = datetime.datetime.now() outputs = self.__pVisualModel.predict(x) time2 = datetime.datetime.now() print(time2 - time1) i = 100 listOutput = [] for i in range(len(outputs)): outputShape = outputs[i].shape singleOut = outputs[i].reshape(outputShape[1], outputShape[2], outputShape[3]) # singleOut *= 255 listOutput.append(singleOut) singleOut = listOutput[-1] singleOut[singleOut > 0.5] = 1 listOutput[-1] = singleOut return listOutput ''' for output in outputs: # plt.figure(num='%s' % str(i)) outputShape = output.shape singleOut = output.reshape(outputShape[1], outputShape[2], outputShape[3]) singleOut *= 255 if outputShape[3] == 1: # test = x - output # test = np.abs(test) # return mysum # plt.subplot(1, 1, 1) # plt.imshow(singleOut, camp='gray') # cv2.imwrite('D:\wyc\Projects\TrainDataSet\HED\Result/%s.jpg' % str(i), singleOut) return singleOut # i += 1 # plt.show() ''' def getModelConfig(self): return self._iBatchSize
class PPO: def __init__(self, n_actions, n_features, actor_lr=0.0001, critic_lr=0.0001, reward_decay=0.9, l2=0.001, loss_clipping=0.2, target_update_alpha=0.9): self.n_actions = n_actions self.n_features = n_features self.actor_lr = actor_lr # 学习率 self.critic_lr = critic_lr self.gamma = reward_decay # reward 递减率 self.states, self.actions, self.rewards, self.states_, self.dones, self.v_by_trace = [], [], [], [], [], [] # V(s)=r+g*V(s_) self.l2 = l2 self.loss_clipping = loss_clipping self.target_update_alpha = target_update_alpha # 模型参数平滑因子 self._build_critic() self.actor = self._build_actor() self.actor_old = self._build_actor() self.actor_old.set_weights(self.actor.get_weights()) self.dummy_advantage = np.zeros((1, 1)) self.dummy_old_prediction = np.zeros((1, self.n_actions)) def _build_critic(self): inputs = Input(shape=(self.n_features, )) x = Dense(32, activation='relu', kernel_regularizer=l2(self.l2))(inputs) x = Dense(16, activation='relu', kernel_regularizer=l2(self.l2))(x) output = Dense(1, kernel_regularizer=l2(self.l2))(x) self.critic = Model(inputs=inputs, outputs=output) self.critic.compile(optimizer=Adam(lr=self.critic_lr), loss='mean_squared_error', metrics=['accuracy']) def _build_actor(self): state = Input(shape=(self.n_features, ), name="state") advantage = Input(shape=(1, ), name="Advantage") old_prediction = Input(shape=(self.n_actions, ), name="Old_Prediction") x = Dense(32, activation='relu', kernel_regularizer=l2(self.l2))(state) x = Dense(16, activation='relu', kernel_regularizer=l2(self.l2))(x) policy = Dense(self.n_actions, activation='softmax', kernel_regularizer=l2(self.l2))(x) model = Model(inputs=[state, advantage, old_prediction], outputs=policy) model.compile(optimizer=Adam(lr=self.actor_lr), loss=self.proximal_policy_optimization_loss( advantage=advantage, old_prediction=old_prediction)) return model def proximal_policy_optimization_loss(self, advantage, old_prediction): def loss(y_true, y_pred): prob = y_true * y_pred old_prob = y_true * old_prediction r = prob / (old_prob + 1e-10) return -K.mean( K.minimum( r * advantage, K.clip(r, min_value=1 - self.loss_clipping, max_value=1 + self.loss_clipping) * advantage)) # + 0.2 * (prob * K.log(prob + 1e-10))) return loss def choose_action(self, observation, is_train_mode=True): observation = np.array(observation) observation = observation[np.newaxis, :] action_probs = self.actor.predict( [observation, self.dummy_advantage, self.dummy_old_prediction]) # print('action_probs', action_probs) if is_train_mode: action = int( np.random.choice(range(action_probs.shape[1]), p=np.squeeze(action_probs))) # 加入了随机性 else: action = int(np.squeeze(np.argmax(action_probs, axis=1))) return action def store_transition(self, s, a, r, s_, d): self.states.append(s) self.actions.append(a) self.rewards.append(r) self.states_.append(s_) self.dones.append(d) def learn(self): # print('learn: sample length-',len(self.actions)) # print('learn: states-',self.states) # print('learn: actions-',self.actions) self.cal_v_by_traceback() b_s, b_a, b_vt = np.array(self.states), np.array( self.actions), np.array(self.v_by_trace) b_v = self.get_v(b_s) # print('b_s:{}'.format(self.states)) # print('b_a:{}'.format(self.actions)) # print('b_r:{}'.format(self.rewards)) # print('b_d:{}'.format(self.dones)) # print('b_vt:{}'.format(self.v_by_trace)) # print('b_v:{}'.format(b_v)) b_adv = b_vt - b_v # 可以对adv做标准化 # b_adv = (b_adv - np.mean(b_adv)) / (np.std(b_adv) + 1e-10) b_old_prediction = self.get_old_prediction(b_s) b_a_onehot = np.zeros((b_a.shape[0], self.n_actions)) b_a_onehot[:, b_a.flatten()] = 1 # print('b_adv:{}'.format(b_adv)) # print('b_old_prediction:{}'.format(b_old_prediction)) history = self.actor.fit(x=[b_s, b_adv, b_old_prediction], y=b_a_onehot, epochs=5, verbose=0) # print('actor_loss_mean:{}'.format(history.history['loss'])) actor_loss_mean = np.mean(history.history['loss']) self.critic.fit(x=b_s, y=b_vt, epochs=5, verbose=0) # critic目标就是让td-error尽可能小 self.states, self.actions, self.rewards, self.states_, self.dones, self.v_by_trace = [], [], [], [], [], [] self.update_target_network() return actor_loss_mean def update_target_network(self): self.actor_old.set_weights(self.target_update_alpha * np.array(self.actor.get_weights()) + (1 - self.target_update_alpha) * np.array(self.actor_old.get_weights())) def get_old_prediction(self, s): s = np.reshape(s, (-1, self.n_features)) v = self.actor_old.predict([ s, np.tile(self.dummy_advantage, (s.shape[0], 1)), np.tile(self.dummy_old_prediction, (s.shape[0], 1)) ]) return v def get_v(self, s): s = np.reshape(s, (-1, self.n_features)) v = np.squeeze(self.critic.predict(s)) return v def cal_v_by_traceback(self): ''' 截断后或episode结束后,通过回溯计算V(s)=r+g*V(s_) :return: ''' # self.v_by_traceback = np.zeros_like(self.rewards) if self.dones[-1]: v = 0 else: s = np.array(self.states_[-1]) v = self.get_v(s) for t in reversed(range(0, len(self.rewards))): v = v * self.gamma + self.rewards[t] self.v_by_trace.append(v) self.v_by_trace.reverse()
data = np.array( pandas.read_csv(filepath_or_buffer="iris.data", header=None, nrows=150)) output = [] train_output = [] for i in range(150): if (data[i, 4] == "Iris-setosa"): output.append([1, 0, 0]) if (data[i, 4] == "Iris-versicolor"): output.append([0, 1, 0]) if (data[i, 4] == "Iris-virginica"): output.append([0, 0, 1]) train_input = np.concatenate( (data[0:40, 0:4], data[50:90, 0:4], data[100:140, 0:4]), axis=0) train_output = np.concatenate((output[0:40], output[50:90], output[100:140]), axis=0) test_input = np.concatenate( (data[40:50, 0:4], data[90:100, 0:4], data[140:150, 0:4]), axis=0) print(train_input.shape) print(train_output.shape) model.fit(x=train_input, y=train_output, epochs=10000) model.save("model.hdf5") # predict = model.predict(x=test_input) # # # Get the maximum values of each column i.e. along axis 0 # maxInColumns = np.amax(predict, axis=0) # print('Max value of every column: ', maxInColumns) # print(predict)
class NN: """ The biggest change for the duelling DQN is, that we must define a more complex DQN architecture The architecture must define our Value and Advantage Layers The way we define our model: <<<>>> The Keras functional API is a way to create models that are more flexible than the tf.keras.Sequential API. The functional API can handle models with NON-LINEAR topology, SHARED layers, and even MULTIPLE inputs or outputs. Read more here: https://keras.io/guides/functional_api/ <<<>>> """ def __init__(self, env, alpha: float = 0.001, decay: float = 0.0001): """ We initialize our functional model, therefore we need Input Shape and Output Shape :param env: :param alpha: :param decay: """ self.alpha = alpha self.decay = decay self.model = None # new to D-DDQN self.init_model(env.observation_space.shape[0], env.action_space.n) def init_model(self, input_shape: int, n_actions: int): inp = Input(shape=(input_shape, )) layer_shared1 = Dense(64, activation='relu')(inp) layer_shared1 = BatchNormalization()(layer_shared1) layer_shared2 = Dense(64, activation='relu')(layer_shared1) layer_shared2 = BatchNormalization()(layer_shared2) layer_v1 = Dense(64, activation='relu')(layer_shared2) layer_v1 = BatchNormalization()(layer_v1) layer_a1 = Dense(64, activation='relu')(layer_shared2) layer_a1 = BatchNormalization()(layer_a1) # the value layer ouput is a scalar value layer_v2 = Dense(1, activation='linear')(layer_v1) # The advantage function subtracts the value of the state from the Q # function to obtain a relative measure of the importance of each action. layer_a2 = Dense(n_actions, activation='linear')(layer_a1) # the q layer combines the two streams of value and advantage function # the lambda functional layer can perform lambda expressions on keras layers # read more here : https://keras.io/api/layers/core_layers/lambda/ # the lambda equation is defined in https://arxiv.org/pdf/1511.06581.pdf on equation (9) layer_q = Lambda(lambda x: x[0][:] + x[1][:] - K.mean(x[1][:]), output_shape=(n_actions, ))([layer_v2, layer_a2]) self.model = Model(inp, layer_q) self.model.compile(optimizer=Adam(lr=self.alpha), loss='mse') def predict(self, *args, **kwargs): """ By wrapping the keras predict method we can handle our net as a standalone object :param args: interface to keras.model.predict :return: prediction """ return self.model.predict(*args, **kwargs) def fit(self, *args, **kwargs): """ By wrapping the keras fit method we can handle our net as a standalone object :param args: interface to keras.model.fit :return: history object """ return self.model.fit(*args, **kwargs) def get_weights(self): """ Passing the arguments to keras get_weights """ return self.model.get_weights() def set_weights(self, *args, **kwargs): """ Passing the arguments to keras set_weights """ self.model.set_weights(*args, *kwargs)
class VBNChromosome: """ Class that wraps the neural network. Includes functionality for Virtual Batch Normalization and the mutation of weights.""" def __init__(self, number_actions=6, input_channels=4): self.input_channels = input_channels self.number_actions = number_actions inputs, outputs = self.construct_layers() self.model = Model(inputs=inputs, outputs=outputs) def construct_layers(self): """ Construct the Mnih et al. DQN architecture.""" inputs = Input(shape=(84, 84, self.input_channels)) layer1 = Conv2D(32, [8, 8], strides=(4, 4), activation="relu")(inputs) layer1 = BatchNormalization(momentum=0.95, center=False, scale=False)(layer1) layer2 = Conv2D(64, [4, 4], strides=(2, 2), activation="relu")(layer1) layer2 = BatchNormalization(momentum=0.95, center=False, scale=False)(layer2) layer3 = Conv2D(64, [3, 3], strides=(1, 1), activation="relu")(layer2) layer3 = BatchNormalization(momentum=0.95, center=False, scale=False)(layer3) layer4 = Flatten()(layer3) layer5 = Dense(512, activation="relu")(layer4) layer5 = BatchNormalization(momentum=0.95, center=False, scale=False)(layer5) action = Dense(self.number_actions, activation="softmax")(layer5) return [inputs], action def virtual_batch_norm(self, samples): """ We apply Batch Normalization on a number of samples. By setting the learning rate to 0 we make sure that the weights and biases are not affected. This method is only ment to be used at the start of training.""" optimizer = tf.keras.optimizers.SGD(learning_rate=0) loss = tf.keras.losses.MeanSquaredError() self.model.compile(loss=loss, optimizer=optimizer) fake_y = np.zeros((len(samples), self.number_actions)) self.model.fit(np.array(samples), fake_y) def get_weights(self, layers=None): """ Retrieve all the weights of the network. """ layers = layers if layers else self.model.layers layer_weights = chain(*[layer.get_weights() for layer in layers]) flat_weights = [weights.flatten() for weights in layer_weights] return np.concatenate(flat_weights) def set_weights(self, flat_weights, layers=None): """ Set all the weights of the network. """ i = 0 layers = layers if layers else self.model.layers for layer in layers: new_weights = [] for sub_layer in layer.get_weights(): reshaped = flat_weights[i: i + sub_layer.size].reshape(sub_layer.shape) new_weights.append(reshaped) i += sub_layer.size layer.set_weights(new_weights) def get_perturbable_layers(self): """ Get all the perturbable layers of the network. This excludes the BatchNorm layers. """ return [layer for layer in self.model.layers if not isinstance(layer, BatchNormalization)] def get_perturbable_weights(self): """ Get all the perturbable weights of the network. This excludes the BatchNorm weights. """ return self.get_weights(self.get_perturbable_layers()) def set_perturbable_weights(self, flat_weights): """ Set all the perturbable weights of the network. This excludes setting the BatchNorm weights. """ self.set_weights(flat_weights, self.get_perturbable_layers()) def mutate(self, mutation_power): """ Mutate the current weights by adding a normally distributed vector of noise to the current weights. """ weights = self.get_perturbable_weights() noise = np.random.normal(loc=0.0, scale=mutation_power, size=weights.shape) self.set_perturbable_weights(weights + noise) return noise def determine_actions(self, inputs): """ Choose an action based on the pixel inputs. We do this by simply selecting the action with the highest outputted value. """ actions = self.model(inputs) return [np.argmax(action_set) for action_set in actions]
patience=5, verbose=True) reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=lr, verbose=True) file_name = 'models/weights-improvement-{epoch:02d}-{val_loss:.2f}.hdf5' save_model = tf.keras.callbacks.ModelCheckpoint('{}'.format(file_name), monitor='val_loss') log_dir = "logs/fit/" + dt.datetime.now().strftime("%Y%m%d-%H%M%S") tensorboard = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1) model_resnet.fit( ds_img_train, epochs=epochs, callbacks=[reduce_lr, early_stopping, save_model, tensorboard], validation_data=ds_img_valid, validation_steps=len(train_dg) // batch_size) # print('DenseNet121') # runs with batch size of <32 # base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=input_shape) # x2 = Flatten()(base_model.get_output_at(-1)) # x2 = Dense(32, activation='relu')(x2) # output2 = Dense(lab_dim, activation='sigmoid')(x2) # model_resnet = Model(base_model.input, output2) # model_resnet.compile(optimizer=tf.optimizers.Adam(learning_rate=lr), # loss=tf.nn.sigmoid_cross_entropy_with_logits, metrics=['accuracy']) # early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3) # reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=lr) # file_name = 'weights-improvement-{epoch:02d}-{val_loss:.2f}.hdf5' # save_model = tf.keras.callbacks.ModelCheckpoint('{}'.format(file_name), monitor='val_loss')
question_enc = np.array([ x for x in df['question'].map( lambda x: encode_sentence(x, max_question)).values ]) answer_enc = np.array([ x for x in df['answer'].map(lambda x: encode_sentence(x, max_answer)).values ]) print(df) a = Input(shape=(max_question, WORD_SIZE)) b = Dense(1024)(a) b = Dense(2048)(b) b = Dense(4096)(b) c = Dense(WORD_SIZE)(b) model = Model(inputs=a, outputs=c) model.compile(loss='mean_squared_error', optimizer='sgd') model.fit(question_enc, answer_enc, epochs=10) results = model.predict(question_enc) def get_words_from_vecs(x): return [get_word(vec) for vec in x] print("\n".join([" ".join(get_words_from_vecs(x)) for x in results]))
y_test = df2['label'].values inputt = Input(shape=(21, )) x = Dense(units=20, activation='sigmoid')(inputt) x = Dense(units=18, activation='sigmoid')(x) x = Dense(units=14, activation='sigmoid')(x) x = Dense(units=5, activation='softmax')(x) model = Model(inputs=inputt, outputs=x) Optimizer = SGD(lr=0.01) model.compile(optimizer=Optimizer, loss='mean_squared_error', metrics=['accuracy']) print(X_train.shape) print(y_train.shape) History = model.fit(x=X_train, y=y_train, epochs=500, validation_split=0.1, shuffle=True, batch_size=512) print(History) y_pred = model.predict(testData) y_pred_bool = np.argmax(y_pred, axis=1) print(classification_report(y_test, y_pred_bool))
def run_single_test(algorithm_def, gen_train, gen_val, load_weights, freeze_weights, x_test, y_test, lr, batch_size, epochs, epochs_warmup, model_checkpoint, scores, loss, metrics, logging_path, kwargs, clipnorm=None, clipvalue=None, model_callback=None): print(metrics) print(loss) metrics = make_custom_metrics(metrics) loss = make_custom_loss(loss) if load_weights: enc_model = algorithm_def.get_finetuning_model(model_checkpoint) else: enc_model = algorithm_def.get_finetuning_model() pred_model = apply_prediction_model( input_shape=enc_model.outputs[0].shape[1:], algorithm_instance=algorithm_def, **kwargs) outputs = pred_model(enc_model.outputs) model = Model(inputs=enc_model.inputs[0], outputs=outputs) print_flat_summary(model) if epochs > 0: callbacks = [TerminateOnNaN()] logging_csv = False if logging_path is not None: logging_csv = True logging_path.parent.mkdir(exist_ok=True, parents=True) logger_normal = CSVLogger(str(logging_path), append=False) logger_after_warmup = LogCSVWithStart( str(logging_path), start_from_epoch=epochs_warmup, append=True) if freeze_weights or load_weights: enc_model.trainable = False if freeze_weights: print(("-" * 10) + "LOADING weights, encoder model is completely frozen") if logging_csv: callbacks.append(logger_normal) elif load_weights: assert epochs_warmup < epochs, "warmup epochs must be smaller than epochs" print(("-" * 10) + "LOADING weights, encoder model is trainable after warm-up") print(("-" * 5) + " encoder model is frozen") w_callbacks = list(callbacks) if logging_csv: w_callbacks.append(logger_normal) model.compile(optimizer=get_optimizer(clipnorm, clipvalue, lr), loss=loss, metrics=metrics) model.fit( x=gen_train, validation_data=gen_val, epochs=epochs_warmup, callbacks=w_callbacks, ) epochs = epochs - epochs_warmup enc_model.trainable = True print(("-" * 5) + " encoder model unfrozen") if logging_csv: callbacks.append(logger_after_warmup) else: print(("-" * 10) + "RANDOM weights, encoder model is fully trainable") if logging_csv: callbacks.append(logger_normal) # recompile model model.compile(optimizer=get_optimizer(clipnorm, clipvalue, lr), loss=loss, metrics=metrics) model.fit(x=gen_train, validation_data=gen_val, epochs=epochs, callbacks=callbacks) model.compile(optimizer=get_optimizer(clipnorm, clipvalue, lr), loss=loss, metrics=metrics) y_pred = model.predict(x_test, batch_size=batch_size) scores_f = make_scores(y_test, y_pred, scores) if model_callback: model_callback(model) # cleanup del pred_model del enc_model del model algorithm_def.purge() K.clear_session() for i in range(15): gc.collect() for s in scores_f: print("{} score: {}".format(s[0], s[1])) return scores_f
save_best_only=True) lr_scheduler = LearningRateScheduler(lr_schedule) lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=0.5e-6) callbacks = [checkpoint, lr_reducer, lr_scheduler] # choose training configs sgd = SGD(lr=0.001, momentum=0.9) model.compile(optimizer=sgd, loss=categorical_crossentropy, metrics=['accuracy']) model.summary() # train hist = model.fit(x_train, y_train, batch_size=100, epochs=10, shuffle=True, verbose=1, validation_split=0.1, callbacks=callbacks) # test model.evaluate(x_test, y_test, verbose=1)
def main(arg): directory = Path('./saved_predictions/') directory.mkdir(exist_ok=True) directory = Path('./saved_models/') directory.mkdir(exist_ok=True) directory = Path('./training_checkpoints/') directory.mkdir(exist_ok=True) input_yx_size = tuple(args.input_yx_size) batch_size = args.batch_size epochs = args.epochs learning_rate = args.learning_rate num_test_samples = args.num_test_samples save_weights = args.save_weights every = args.every num_samples = args.num_samples save_train_prediction = args.save_train_prediction save_test_prediction = args.save_test_prediction verbose = args.verbose validation_ratio = args.validation_ratio y_axis_len, x_axis_len = input_yx_size decay = args.decay decay = args.decay load_weights = args.load_weights y_axis_len, x_axis_len = input_yx_size num_points = y_axis_len * x_axis_len is_flat_channel_in = args.is_flat_channel_in input_points = Input(shape=(num_points, 4)) x = input_points x = Convolution1D(64, 1, activation='relu', input_shape=(num_points, 4))(x) x = BatchNormalization()(x) x = Convolution1D(128, 1, activation='relu')(x) x = BatchNormalization()(x) x = Convolution1D(512, 1, activation='relu')(x) x = BatchNormalization()(x) x = MaxPooling1D(pool_size=num_points)(x) x = Dense(512, activation='relu')(x) x = BatchNormalization()(x) x = Dense(256, activation='relu')(x) x = BatchNormalization()(x) x = Dense(16, weights=[ np.zeros([256, 16]), np.array([1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1]).astype(np.float32) ])(x) input_T = Reshape((4, 4))(x) # forward net g = Lambda(mat_mul, arguments={'B': input_T})(input_points) g = Convolution1D(64, 1, input_shape=(num_points, 3), activation='relu')(g) g = BatchNormalization()(g) g = Convolution1D(64, 1, input_shape=(num_points, 3), activation='relu')(g) g = BatchNormalization()(g) # feature transformation net f = Convolution1D(64, 1, activation='relu')(g) f = BatchNormalization()(f) f = Convolution1D(128, 1, activation='relu')(f) f = BatchNormalization()(f) f = Convolution1D(128, 1, activation='relu')(f) f = BatchNormalization()(f) f = MaxPooling1D(pool_size=num_points)(f) f = Dense(512, activation='relu')(f) f = BatchNormalization()(f) f = Dense(256, activation='relu')(f) f = BatchNormalization()(f) f = Dense(64 * 64, weights=[ np.zeros([256, 64 * 64]), np.eye(64).flatten().astype(np.float32) ])(f) feature_T = Reshape((64, 64))(f) # forward net g = Lambda(mat_mul, arguments={'B': feature_T})(g) seg_part1 = g g = Convolution1D(64, 1, activation='relu')(g) g = BatchNormalization()(g) g = Convolution1D(32, 1, activation='relu')(g) g = BatchNormalization()(g) g = Convolution1D(32, 1, activation='relu')(g) g = BatchNormalization()(g) # global_feature global_feature = MaxPooling1D(pool_size=num_points)(g) global_feature = Lambda(exp_dim, arguments={'num_points': num_points})(global_feature) # point_net_seg c = concatenate([seg_part1, global_feature]) """ c = Convolution1D(512, 1, activation='relu')(c) c = BatchNormalization()(c) c = Convolution1D(256, 1, activation='relu')(c) c = BatchNormalization()(c) c = Convolution1D(128, 1, activation='relu')(c) c = BatchNormalization()(c) c = Convolution1D(128, 1, activation='relu')(c) c = BatchNormalization()(c) """ c = Convolution1D(256, 1, activation='relu')(c) c = BatchNormalization()(c) c = Convolution1D(128, 4, activation='relu', strides=4)(c) c = BatchNormalization()(c) c = Convolution1D(128, 4, activation='relu', strides=4)(c) c = BatchNormalization()(c) c = Convolution1D(128, 4, activation='relu', strides=4)(c) c = BatchNormalization()(c) c = Convolution1D(64, 4, activation='relu', strides=4)(c) c = BatchNormalization()(c) c = Convolution1D(64, 4, activation='relu', strides=4)(c) c = BatchNormalization()(c) c = Convolution1D(32, 1, activation='relu')(c) c = BatchNormalization()(c) """ c = Convolution1D(128, 4, activation='relu',strides=4)(c) c = Convolution1D(64, 4, activation='relu',strides=4)(c) c = Convolution1D(32, 4, activation='relu',strides=4)(c) c = Convolution1D(16, 1, activation='relu')(c) c = Convolution1D(1, 1, activation='relu')(c) """ #c = tf.keras.backend.squeeze(c,3); c = CuDNNLSTM(64, return_sequences=False)(c) #c =CuDNNLSTM(784, return_sequences=False)) #c =CuDNNLSTM(256, return_sequences=False)) #c = Reshape([16,16,1])(c) c = Reshape([8, 8, 1])(c) c = Conv2DTranspose(8, (3, 3), padding="same", activation="relu", strides=(2, 2))(c) c = Conv2DTranspose(8, (3, 3), padding="valid", activation="relu")(c) #c =Dropout(0.4)) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(16, (3, 3), padding="valid", activation="relu")(c) #c =Dropout(0.4)) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(32, (3, 3), padding="valid", activation="relu")(c) #c =Dropout(0.4)) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(32, (3, 3), padding="valid", activation="relu")(c) #c =Dropout(0.4)) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(32, (3, 3), padding="valid", activation="relu")(c) #c =Dropout(0.4)) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(64, (3, 3), padding="valid", activation="relu")(c) #c =Dropout(0.4)) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(64, (3, 3), padding="valid", activation="relu")(c) #c =Dropout(0.4)) c = tf.keras.layers.BatchNormalization()(c) #c =Dropout(0.4)) c = Conv2DTranspose(128, (3, 3), padding="same", activation="relu", strides=(2, 2))(c) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(128, (3, 3), padding="valid", activation="relu")(c) #c =Dropout(0.4)) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(128, (3, 3), padding="same", activation="relu", strides=(2, 2))(c) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(128, (3, 3), padding="valid", activation="relu")(c) c = tf.keras.layers.BatchNormalization()(c) #c =Dropout(0.4)) #c =tf.keras.layers.BatchNormalization()) c = Conv2DTranspose(64, (3, 3), padding="same", strides=(4, 2))(c) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(32, (3, 3), padding="valid", activation="relu")(c) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(32, (3, 3), padding="valid", activation="relu")(c) c = tf.keras.layers.BatchNormalization()(c) #c =Dropout(0.4)) c = Conv2DTranspose(32, (3, 3), padding="same", activation="relu", strides=(1, 1))(c) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(32, (3, 1), padding="valid", activation="relu")(c) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(32, (3, 1), padding="valid", activation="relu")(c) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(16, (1, 1), padding="valid", activation="relu")(c) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(8, (1, 1), padding="valid", activation="relu")(c) c = tf.keras.layers.BatchNormalization()(c) c = Conv2DTranspose(1, (1, 1), padding="valid")(c) """ c =Conv2DTranspose(4, (1,1),padding="same",activation="relu")) c =Conv2DTranspose(2, (1,1),padding="same",activation="relu")) #c =Dropout(0.4)) c =Conv2DTranspose(1, (1,1),padding="same")) """ prediction = tf.keras.layers.Reshape([512, 256])(c) """ c1 ,c2 = tf.split(c,[256,256],axis=1,name="split") complexNum = tf.dtypes.complex( c1, c2, name=None ) complexNum =tf.signal.ifft2d( complexNum, name="IFFT" ) real = tf.math.real(complexNum) imag = tf.math.imag(complexNum) con = concatenate([real,imag]) prediction =tf.keras.layers.Reshape([ 512, 256])(con) """ # define model model = Model(inputs=input_points, outputs=prediction) opt = tf.keras.optimizers.Adam(lr=learning_rate, decay=decay) loss = tf.keras.losses.MeanSquaredError() mertric = ['mse'] if args.loss is "MAE": loss = tf.keras.losses.MeanAbsoluteError() mertric = ['mae'] model.compile( loss=loss, optimizer=opt, metrics=mertric, ) model.summary() if load_weights: model.load_weights('./training_checkpoints/cp-best_loss.ckpt') #edit data_loader.py if you want to play with data input_ks, ground_truth = load_data(num_samples, is_flat_channel_in=is_flat_channel_in) input_ks = input_ks / np.max(input_ks) checkpoint_path = "./training_checkpoints/cp-{epoch:04d}.ckpt" checkpoint_dir = os.path.dirname(checkpoint_path) # Create checkpoint callback #do you want to save the model's wieghts? if so set this varaible to true cp_callback = [] NAME = "NUFFT_NET" tensorboard = TensorBoard(log_dir="logs/{}".format(NAME)) cp_callback.append(tensorboard) if save_weights: cp_callback.append( tf.keras.callbacks.ModelCheckpoint(checkpoint_dir, save_weights_only=True, verbose=verbose, period=every)) if args.is_train: model.fit(input_ks, ground_truth, batch_size=batch_size, epochs=epochs, validation_split=validation_ratio, callbacks=cp_callback) if args.name_model is not "": model.save('./saved_mdoels/' + args.name_model) dict_name = './saved_predictions/' #return to image size x_axis_len = int(x_axis_len / 4) np.random.seed(int(time())) if save_train_prediction <= num_samples: rand_ix = np.random.randint(0, num_samples - 1, save_train_prediction) #kspace = np.zeros((save_train_prediction, #y_axis_len,input_ks[rand_ix].shape[1])) kspace = input_ks[rand_ix] if args.save_input: np.save("./saved_predictions/inputs.npy", input_ks[rand_ix]) ground_truth = ground_truth[rand_ix] preds = model.predict(kspace, batch_size=save_train_prediction) for i in range(save_train_prediction): output = np.reshape(preds[i], (y_axis_len * 2, x_axis_len)) output = output * 255 output[np.newaxis, ...] output_gt = ground_truth[i] output_gt[np.newaxis, ...] output = np.concatenate([output, output_gt], axis=0) np.save(dict_name + 'prediction%d.npy' % (i + 1), output) input_ks, ground_truth = load_data( num_test_samples, 'test', is_flat_channel_in=is_flat_channel_in) input_ks = input_ks / np.max(input_ks) if args.is_eval: model.evaluate(input_ks, ground_truth, batch_size, verbose, callbacks=cp_callback) if save_test_prediction <= num_test_samples: rand_ix = np.random.randint(0, num_test_samples - 1, save_test_prediction) kspace = input_ks[rand_ix] if args.save_input: np.save("./saved_predictions/test_inputs.npy", input_ks[rand_ix]) ground_truth = ground_truth[rand_ix] preds = model.predict(kspace, batch_size=save_test_prediction) for i in range(save_test_prediction): output = np.reshape(preds[i], (y_axis_len * 2, x_axis_len)) output = output * 255 output[np.newaxis, ...] output_gt = ground_truth[i] output_gt[np.newaxis, ...] output = np.concatenate([output, output_gt], axis=0) np.save(dict_name + 'test_prediction%d.npy' % (i + 1), output)
def knowledge_transfer(current_student: Model, method: Method, loss: Union[LossType, List[LossType]]) -> \ Tuple[Model, History]: """ Performs KT. :param current_student: the student to be used for the current KT method. :param method: the method to be used for the KT. :param loss: the KT loss to be used. :return: Tuple containing a student Keras model and its training History object. """ kt_logging.debug('Configuring student...') weights = None y_train_adapted = y_train_concat y_val_adapted = y_val_concat metrics = {} if method == Method.DISTILLATION: # Adapt student current_student = kd_student_adaptation(current_student, temperature) # Create KT metrics. metrics = generate_supervised_metrics(method) monitoring_metric = 'val_accuracy' elif method == Method.PKT_PLUS_DISTILLATION: # Adapt student current_student = pkt_plus_kd_student_adaptation(current_student, temperature) # Create importance weights for the different losses. weights = [kd_importance_weight, pkt_importance_weight] if selective_learning: selective_learning_weights = [] for _ in range(n_submodels): selective_learning_weights.extend(weights) weights = selective_learning_weights # Adapt the labels. y_train_adapted.extend(y_train_adapted) y_val_adapted.extend(y_val_adapted) else: # Adapt the labels. y_train_adapted = [y_train_concat, y_train_concat] y_val_adapted = [y_val_concat, y_val_concat] # Create KT metrics. metrics = generate_supervised_metrics(method) monitoring_metric = 'val_concatenate_accuracy' else: # PKT performs KT, but also rotates the space, thus evaluating results has no meaning, # since the neurons representing the classes are not the same anymore. monitoring_metric = 'val_loss' if selective_learning: current_student = selective_learning_student_adaptation(current_student, n_submodels) monitoring_metric = 'val_loss' # Create optimizer. optimizer = initialize_optimizer(optimizer_name, learning_rate, decay, beta1, beta2, rho, momentum, clip_norm, clip_value) # Compile student. current_student.compile(optimizer, loss, metrics, weights) # Initialize callbacks list. kt_logging.debug('Initializing Callbacks...') # Create a temp file, in order to save the model, if needed. tmp_weights_path = None if use_best_model: tmp_weights_path = join(gettempdir(), next(mktemp()) + '.h5') callbacks_list = init_callbacks(monitoring_metric, lr_patience, lr_decay, lr_min, early_stopping_patience, verbosity, tmp_weights_path, selective_learning) # Train student. history = current_student.fit(x_train, y_train_adapted, batch_size=batch_size, callbacks=callbacks_list, epochs=epochs, validation_data=(x_val, y_val_adapted), verbose=verbosity) if exists(tmp_weights_path): # Load best weights and delete the temp file. current_student.load_weights(tmp_weights_path) remove(tmp_weights_path) # Rewind student to its normal state, if necessary. if selective_learning: current_student = selective_learning_student_rewind(current_student, optimizer=optimizer, loss=loss[0], metrics=metrics) if method == Method.DISTILLATION: current_student = kd_student_rewind(current_student) elif method == Method.PKT_PLUS_DISTILLATION: current_student = pkt_plus_kd_rewind(current_student) return current_student, history
internal = self.FC_1(attention_vector) # internal = self.FC_2(internal) final_output = self.classification_layer(internal) return final_output # create the model recurrent_fusion_model = Model() recurrent_fusion_model.compile(optimizer=keras.optimizers.Adam(lr=lr), loss=sparse_categorical_cross_entropy_loss, metrics=[acc_top_1, acc_top_5]) # build internal tensors recurrent_fusion_model.fit(*next(train_generator()), batch_size=1, epochs=1, verbose=0) # get tensorflow saver ready > will be used if a checkpoint found on drive saver = tf.train.Saver(recurrent_fusion_model.variables) if checkpoint_found: # restore the model from the checkpoint log("Model restored") eval_globals.best_video_level_accuracy_1 = float( zip_file_name.split("-")[1]) log("Current Best", eval_globals.best_video_level_accuracy_1) saver.restore(tf.keras.backend.get_session(), checkpoints) # use tensorflow saver initial_epoch = int(zip_file_name.split("-")[0]) # get epoch number
y = tf.keras.layers.Dense(units=32, activation='elu', kernel_initializer='he_uniform')(y) y = tf.keras.layers.Dense(units=2, activation='softmax', kernel_initializer='he_uniform')(y) wenz_model = Model(inputs=[input1, input2], outputs=y) adam = Adam(lr=0.02, decay=0.01) wenz_model.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(), metrics=['accuracy']) checkpoint_path = "training/cp.ckpt" checkpoint_dir = os.path.dirname(checkpoint_path) # Create a callback that saves the model's weights cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, save_weights_only=True, verbose=1) wenz_model.fit(XY, epochs=10, callbacks=[cp_callback]) # add validation training_data? test_loss, test_acc = wenz_model.evaluate(XYt, verbose=2) print('\nTest accuracy:', test_acc) wenz_model.save( '/home/pirate/PycharmProjects/SchafkopfAI/models/trained_models/test-wenz-prediction6' ) store.close()
x2 = [] for i in x: x1.append(i[0]) x2.append(i[1]) y = np.array([7.8, 8.6, 8.7, 7.9, 8.4, 8.9, 10.4, 11.6, 13.9, 15.8]) print(x.shape) print(y.shape) with tf.device('/cpu:0'): inputs = layers.Input(shape=(2, )) out = layers.Dense(1, use_bias=False, kernel_initializer=initializers.RandomUniform())(inputs) model = Model(inputs=inputs, outputs=out) dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(3).repeat() # print(dataset.take(1)) model.compile( optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.0001), loss='mean_squared_error') print_weights = callbacks.LambdaCallback( on_epoch_end=lambda batch, logs: print(model.layers[1].get_weights())) model.summary() model.fit(dataset, epochs=20, steps_per_epoch=1, callbacks=[print_weights]) # model.fit(dataset,epochs=20,steps_per_epoch=1)
shared_model(right_input)]) model = Model(inputs=[left_input, right_input], outputs=[malstm_distance]) if gpus >= 2: # `multi_gpu_model()` is a so quite buggy. it breaks the saved model. model = tf.keras.utils.multi_gpu_model(model, gpus=gpus) model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy']) model.summary() shared_model.summary() # Start trainings training_start_time = time() malstm_trained = model.fit([X_train['left'], X_train['right']], Y_train, batch_size=batch_size, epochs=n_epoch) training_end_time = time() logging.info("Training time finished.\n%d epochs in %12.2f" % (n_epoch, training_end_time - training_start_time)) saver = tf.compat.v1.train.Saver() session = tf.compat.v1.keras.backend.get_session() saver.save(session, SESSION_PATH) model.save(get_model_path(n_epoch, embedding_dim, max_seq_length, n_hidden)) # plot(malstm_trained)