def train_model(build_fn, valid_x, valid_y, epochs, n_layers=1): print(epochs, n_layers) dicts = pickle.load(open('./dictionaries.p', 'rb')) training_set = pickle.load(open('./train_dataset.p', 'rb')) inputs = Input(shape=INPUT_SHAPE) preds, maps = build_fn(inputs, n_layers) model = Model(inputs=[inputs], outputs=preds) optimizer = adam() if not maps: model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) else: model.compile(optimizer=optimizer, loss=losses(maps), metrics=['accuracy']) inputs = [] labels = [] for x in training_set: inputs.append(x['text']) labels.append(x['author']) inputs = np.asarray(inputs) labels = np.expand_dims(to_categorical(np.asarray(labels)), 1) model.summary() model.fit(inputs, labels, epochs=epochs, batch_size=100, shuffle='batch', validation_data=(valid_x, valid_y)) model.save('./saved_model')
def create_input_layers(num_inputs, input_size): if num_inputs == 1: inputs = Input(shape=(input_size,)) return inputs, inputs else: inputs = [] for i in range(num_inputs): inputs.append(Input(shape=(input_size,))) layer = concatenate(inputs) return inputs, layer
def build(self, mode, input_shape, config=None, image_nums=5): if not config: config = VideoModelConfig() assert mode in ["training", "inference"] h, w, c = input_shape inputs = Input(shape=(h, w, c), name="input_image") inputs_image_keys = [] inputs_mask_keys = [] for i in range(image_nums): inputs_image_keys.append(Input(shape=(h, w, c), name="input_image_key" + str(i))) inputs_mask_keys.append(Input(shape=(h, w, c), name="input_mask_key" + str(i))) outputs = temporal_and_mask_single_propagation(inputs, inputs_image_keys, inputs_mask_keys) # outputs = temporal_and_mask_multi_propagation(inputs, inputs_image_keys, inputs_mask_keys) model = Model(inputs=[inputs] + inputs_image_keys + inputs_mask_keys, outputs=outputs, name="triple_model") # model = Model([inputs, inputs_image_keys, inputs_mask_keys], [outputs]) print(model.summary()) plot_model(model, "triple_single_model.png") # plot_model(model, "triple_multi_model.png") if mode == "training": # Network Heads # TODO: verify that this handles zero padded ROIs mrcnn_class_logits, mrcnn_class, mrcnn_bbox = \ fpn_classifier_graph(rois, mrcnn_feature_map, config.IMAGE_SHAPE, config.POOL_SIZE, config.NUM_CLASSES) mrcnn_mask = build_fpn_mask_graph(rois, mrcnn_feature_map, config.IMAGE_SHAPE, config.MASK_POOL_SIZE, config.NUM_CLASSES) # TODO: clean up (use tf.identify if necessary) output_rois = KL.Lambda(lambda x: x * 1, name="output_rois")(rois) global_parsing_loss = KL.Lambda(lambda x: mrcnn_global_parsing_loss_graph(config.NUM_PART_CLASS, *x), name="mrcnn_global_parsing_loss")( [input_gt_part, global_parsing_map]) # Losses rpn_class_loss = KL.Lambda(lambda x: rpn_class_loss_graph(*x), name="rpn_class_loss")( [input_rpn_match, rpn_class_logits]) rpn_bbox_loss = KL.Lambda(lambda x: rpn_bbox_loss_graph(config, *x), name="rpn_bbox_loss")( [input_rpn_bbox, input_rpn_match, rpn_bbox]) class_loss = KL.Lambda(lambda x: mrcnn_class_loss_graph(*x), name="mrcnn_class_loss")( [target_class_ids, mrcnn_class_logits, active_class_ids]) bbox_loss = KL.Lambda(lambda x: mrcnn_bbox_loss_graph(*x), name="mrcnn_bbox_loss")( [target_bbox, target_class_ids, mrcnn_bbox]) mask_loss = KL.Lambda(lambda x: mrcnn_mask_loss_graph(*x), name="mrcnn_mask_loss")( [target_mask, target_class_ids, mrcnn_mask]) # Model inputs = [input_image, input_image_key1, input_image_key2, input_image_key3, input_key_identity, input_image_meta, input_rpn_match, input_rpn_bbox, input_gt_class_ids, input_gt_boxes, input_gt_masks, input_gt_part] if not config.USE_RPN_ROIS: inputs.append(input_rois) outputs = [rpn_class_logits, rpn_class, rpn_bbox, mrcnn_class_logits, mrcnn_class, mrcnn_bbox, mrcnn_mask, rpn_rois, output_rois, rpn_class_loss, rpn_bbox_loss, class_loss, bbox_loss, mask_loss, global_parsing_loss] model = KM.Model(inputs, outputs, name='aten') else: # Network Heads # Proposal classifier and BBox regressor heads mrcnn_class_logits, mrcnn_class, mrcnn_bbox = \ fpn_classifier_graph(rpn_rois, mrcnn_feature_map, config.IMAGE_SHAPE, config.POOL_SIZE, config.NUM_CLASSES) # Detections # output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in image coordinates detections = DetectionLayer(config, name="mrcnn_detection")( [rpn_rois, mrcnn_class, mrcnn_bbox, input_image_meta]) # Convert boxes to normalized coordinates # TODO: let DetectionLayer return normalized coordinates to avoid # unnecessary conversions h, w = config.IMAGE_SHAPE[:2] detection_boxes = KL.Lambda( lambda x: x[..., :4] / np.array([h, w, h, w]))(detections) # Create masks for detections mrcnn_mask = build_fpn_mask_graph(detection_boxes, mrcnn_feature_map, config.IMAGE_SHAPE, config.MASK_POOL_SIZE, config.NUM_CLASSES) global_parsing_prob = KL.Lambda(lambda x: post_processing_graph(*x))([global_parsing_map, input_image]) model = KM.Model([input_image, input_image_key1, input_image_key2, input_image_key3, input_key_identity, input_image_meta], [detections, mrcnn_class, mrcnn_bbox, mrcnn_mask, rpn_rois, rpn_class, rpn_bbox, global_parsing_prob], name='aten') # Add multi-GPU support. if config.GPU_COUNT > 1: from utils.parallel_model import ParallelModel model = ParallelModel(model, config.GPU_COUNT) import platform sys = platform.system() # if sys == "Windows": if self.mode == "training": plot_model(model, "aten_training.jpg") else: plot_model(model, "aten_test.png") return model
label_list = y left_input = [] right_input = [] targets = [] # Number of pairs per image pairs = 5 # Let's create the new dataset to train on for i in range(len(label_list)): for _ in range(pairs): compare_to = i while compare_to == i: # Make sure it's not comparing to itself compare_to = random.randint(0, 999) left_input.append(image_list[i]) right_input.append(image_list[compare_to]) if label_list[i] == label_list[compare_to]: # They are the same targets.append(1.) else: # Not the same targets.append(0.) left_input = np.squeeze(np.array(left_input)) right_input = np.squeeze(np.array(right_input)) targets = np.squeeze(np.array(targets)) siamese_net.fit([left_input,right_input], targets, batch_size=16, epochs=10, verbose=1, validation_data=([left_input, right_input], targets)
def create_output_graph(n_features=8, n_features_cat=3, n_graph_layers=0, n_dense_layers=3, n_dense_per_graph_net=1, activation='tanh', do_weighted_sum=True, with_bias=True): # [b'PF_dxy', b'PF_dz', b'PF_eta', b'PF_mass', b'PF_puppiWeight', b'PF_charge', b'PF_fromPV', b'PF_pdgId', b'PF_px', b'PF_py'] inputs = Input(shape=(maxNPF, n_features), name='input') pxpy = Lambda(lambda x: slice(x, (0, 0, n_features - 2), (-1, -1, -1)))( inputs) if activation == 'prelu': activation = PReLU() if opt.embedding: embeddings = [] for i_emb in range(n_features_cat): input_cat = Input(shape=(maxNPF, 1), name='input_cat{}'.format(i_emb)) if i_emb == 0: inputs = [inputs, input_cat] else: inputs.append(input_cat) embedding = Embedding( input_dim=emb_input_dim[i_emb], output_dim=emb_out_dim, embeddings_initializer=initializers.RandomNormal(mean=0., stddev=0.4 / emb_out_dim), name='embedding{}'.format(i_emb))(input_cat) embedding = Reshape((maxNPF, 8))(embedding) embeddings.append(embedding) throughput = Concatenate()([inputs[0]] + [emb for emb in embeddings]) # x = GlobalExchange()(inputs if not opt.embedding else throughput) if opt.embedding: x = throughput for i_graph in range(n_graph_layers): if i_graph > 0: x = GlobalExchange()(x) for __ in range(n_dense_per_graph_net): x = Dense(64, activation=activation, kernel_initializer='lecun_uniform')(x) # x = BatchNormalization(momentum=0.8)(x) x = GravNet(n_neighbours=20, n_dimensions=4, n_filters=42, n_propagate=18)(x) x = BatchNormalization(momentum=0.8)(x) dense_layers = [] # [4] if do_weighted_sum: for i_dense in range(n_dense_layers): x = Dense(64 // 2**i_dense, activation=activation, kernel_initializer='lecun_uniform')(x) x = BatchNormalization(momentum=0.95)(x) # List of weights. Increase to 3 when operating with biases # x = Dense(3 if with_bias else 1, activation='linear', kernel_initializer='lecun_uniform')(x) # Expect typical weights to not be of order 1 but somewhat smaller, so apply explicit scaling x = Dense( 3 if with_bias else 1, activation='linear', kernel_initializer=initializers.VarianceScaling(scale=0.02))(x) print('Shape of last dense layer', x.shape) x = Concatenate()([x, pxpy]) #x = Flatten()(x) x = weighted_sum_layer(with_bias)(x) else: for i_dense in range(n_dense_layers): x = Dense(64 // 2**i_dense, activation=activation, kernel_initializer='lecun_uniform')(x) x = Flatten()(x) dense_layers = [32, 16, 8] dense_activation = 'relu' for dense_size in dense_layers: x = Dense(dense_size, activation=dense_activation, kernel_initializer='lecun_uniform')(x) x = Dense(2, activation='linear', name='output')(x) return inputs, x
class Network(object): def __init__(self, parameters, modelName=None): self.parameters = parameters if parameters.SQUARE_ACTIONS: self.actions = createDiscreteActionsSquare( self.parameters.NUM_ACTIONS, self.parameters.ENABLE_SPLIT, self.parameters.ENABLE_EJECT) else: self.actions = createDiscreteActionsCircle( self.parameters.NUM_ACTIONS, self.parameters.ENABLE_SPLIT, self.parameters.ENABLE_EJECT) self.num_actions = len(self.actions) self.loadedModelName = None self.gpus = self.parameters.GPUS # Q-learning self.discount = self.parameters.DISCOUNT self.epsilon = self.parameters.EPSILON self.frameSkipRate = self.parameters.FRAME_SKIP_RATE self.gridSquaresPerFov = self.parameters.GRID_SQUARES_PER_FOV # CNN if self.parameters.CNN_REPR: # (KernelSize, stride, filterNum) self.kernel_1 = self.parameters.CNN_L1 self.kernel_2 = self.parameters.CNN_L2 self.kernel_3 = self.parameters.CNN_L3 if self.parameters.CNN_USE_L1: self.stateReprLen = self.parameters.CNN_INPUT_DIM_1 elif self.parameters.CNN_USE_L2: self.stateReprLen = self.parameters.CNN_INPUT_DIM_2 else: self.stateReprLen = self.parameters.CNN_INPUT_DIM_3 else: self.stateReprLen = self.parameters.STATE_REPR_LEN # ANN self.learningRate = self.parameters.ALPHA self.optimizer = self.parameters.OPTIMIZER if self.parameters.ACTIVATION_FUNC_HIDDEN == "elu": self.activationFuncHidden = "linear" # keras.layers.ELU(alpha=eluAlpha) else: self.activationFuncHidden = self.parameters.ACTIVATION_FUNC_HIDDEN self.activationFuncLSTM = self.parameters.ACTIVATION_FUNC_LSTM self.activationFuncOutput = self.parameters.ACTIVATION_FUNC_OUTPUT self.layers = parameters.Q_LAYERS if self.parameters.USE_ACTION_AS_INPUT: inputDim = self.stateReprLen + 4 outputDim = 1 else: inputDim = self.stateReprLen outputDim = self.num_actions if self.parameters.EXP_REPLAY_ENABLED: input_shape_lstm = (self.parameters.MEMORY_TRACE_LEN, inputDim) stateful_training = False self.batch_len = self.parameters.MEMORY_BATCH_LEN else: input_shape_lstm = (1, inputDim) stateful_training = True self.batch_len = 1 if self.parameters.INITIALIZER == "glorot_uniform": initializer = keras.initializers.glorot_uniform() elif self.parameters.INITIALIZER == "glorot_normal": initializer = keras.initializers.glorot_normal() else: weight_initializer_range = math.sqrt( 6 / (self.stateReprLen + self.num_actions)) initializer = keras.initializers.RandomUniform( minval=-weight_initializer_range, maxval=weight_initializer_range, seed=None) # CNN if self.parameters.CNN_REPR: if self.parameters.CNN_P_REPR: if self.parameters.CNN_P_INCEPTION: self.input = Input(shape=(self.stateReprLen, self.stateReprLen, 3)) tower_1 = Conv2D(self.kernel_2[2], (1, 1), padding='same', activation='relu')(self.input) tower_1 = Conv2D(self.kernel_2[2], (3, 3), padding='same', activation='relu')(tower_1) tower_2 = Conv2D(self.kernel_2[2], (1, 1), padding='same', activation='relu')(self.input) tower_2 = Conv2D(self.kernel_2[2], (5, 5), padding='same', activation='relu')(tower_2) tower_3 = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(self.input) tower_3 = Conv2D(self.kernel_2[2], (1, 1), padding='same', activation='relu')(tower_3) self.valueNetwork = keras.layers.concatenate( [tower_1, tower_2, tower_3], axis=3) self.valueNetwork = keras.layers.Flatten()( self.valueNetwork) # DQN approach else: # RGB if self.parameters.CNN_P_RGB: channels = 3 # GrayScale else: channels = 1 if self.parameters.CNN_LAST_GRID: channels = channels * 2 if self.parameters.COORDCONV: channels += 2 self.input = Input(shape=(self.stateReprLen, self.stateReprLen, channels)) conv = self.input if self.parameters.CNN_USE_L1: conv = Conv2D(self.kernel_1[2], kernel_size=(self.kernel_1[0], self.kernel_1[0]), strides=(self.kernel_1[1], self.kernel_1[1]), activation='relu', data_format='channels_last')(conv) if self.parameters.CNN_USE_L2: conv = Conv2D(self.kernel_2[2], kernel_size=(self.kernel_2[0], self.kernel_2[0]), strides=(self.kernel_2[1], self.kernel_2[1]), activation='relu', data_format='channels_last')(conv) if self.parameters.CNN_USE_L3: conv = Conv2D(self.kernel_3[2], kernel_size=(self.kernel_3[0], self.kernel_3[0]), strides=(self.kernel_3[1], self.kernel_3[1]), activation='relu', data_format='channels_last')(conv) self.valueNetwork = Flatten()(conv) # Not pixel input else: if self.parameters.CNN_TOWER: tower = [] self.input = [] self.towerModel = [] for grid in range(self.parameters.NUM_OF_GRIDS): self.input.append( Input(shape=(1, self.stateReprLen, self.stateReprLen))) if self.parameters.CNN_USE_L1: tower.append( Conv2D(self.kernel_1[2], kernel_size=(self.kernel_1[0], self.kernel_1[0]), strides=(self.kernel_1[1], self.kernel_1[1]), activation='relu', data_format='channels_first')( self.input[grid])) if self.parameters.CNN_USE_L2: if self.parameters.CNN_USE_L1: tower[grid] = Conv2D( self.kernel_2[2], kernel_size=(self.kernel_2[0], self.kernel_2[0]), strides=(self.kernel_2[1], self.kernel_2[1]), activation='relu', data_format='channels_first')(tower[grid]) else: tower.append( Conv2D(self.kernel_2[2], kernel_size=(self.kernel_2[0], self.kernel_2[0]), strides=(self.kernel_2[1], self.kernel_2[1]), activation='relu', data_format='channels_first')( self.input[grid])) if self.parameters.CNN_USE_L3: if self.parameters.CNN_USE_L2: tower[grid] = Conv2D( self.kernel_3[2], kernel_size=(self.kernel_3[0], self.kernel_3[0]), strides=(self.kernel_3[1], self.kernel_3[1]), activation='relu', data_format='channels_first')(tower[grid]) else: tower.append( Conv2D(self.kernel_3[2], kernel_size=(self.kernel_3[0], self.kernel_3[0]), strides=(self.kernel_3[1], self.kernel_3[1]), activation='relu', data_format='channels_first')( self.input[grid])) tower[grid] = Flatten()(tower[grid]) self.valueNetwork = keras.layers.concatenate( [i for i in tower], axis=1) # Vision grid merging else: self.input = Input(shape=(self.parameters.NUM_OF_GRIDS, self.stateReprLen, self.stateReprLen)) conv = self.input if self.parameters.CNN_USE_L1: conv = Conv2D(self.kernel_1[2], kernel_size=(self.kernel_1[0], self.kernel_1[0]), strides=(self.kernel_1[1], self.kernel_1[1]), activation='relu', data_format='channels_first')(conv) if self.parameters.CNN_USE_L2: conv = Conv2D(self.kernel_2[2], kernel_size=(self.kernel_2[0], self.kernel_2[0]), strides=(self.kernel_2[1], self.kernel_2[1]), activation='relu', data_format='channels_first')(conv) if self.parameters.CNN_USE_L3: conv = Conv2D(self.kernel_3[2], kernel_size=(self.kernel_3[0], self.kernel_3[0]), strides=(self.kernel_3[1], self.kernel_3[1]), activation='relu', data_format='channels_first')(conv) self.valueNetwork = Flatten()(conv) # Fully connected layers if self.parameters.NEURON_TYPE == "MLP": layerIterable = iter(self.layers) regularizer = keras.regularizers.l2(self.parameters.Q_WEIGHT_DECAY) if self.parameters.DROPOUT: constraint = maxnorm(self.parameters.MAXNORM) else: constraint = None if parameters.CNN_REPR: previousLayer = self.input extraInputSize = self.parameters.EXTRA_INPUT if extraInputSize > 0: extraInput = Input(shape=(extraInputSize, )) self.input = [self.input, extraInput] denseInput = keras.layers.concatenate( [self.valueNetwork, extraInput]) previousLayer = Dense( next(layerIterable), activation=self.activationFuncHidden, bias_initializer=initializer, kernel_initializer=initializer, kernel_regularizer=regularizer)(denseInput) else: self.input = Input(shape=(inputDim, )) previousLayer = self.input for layer in layerIterable: if layer > 0: if self.parameters.DROPOUT: previousLayer = Dropout( self.parameters.DROPOUT)(previousLayer) previousLayer = Dense( layer, activation=self.activationFuncHidden, bias_initializer=initializer, kernel_initializer=initializer, kernel_regularizer=regularizer, kernel_constraint=constraint)(previousLayer) if self.parameters.ACTIVATION_FUNC_HIDDEN == "elu": previousLayer = (keras.layers.ELU( alpha=self.parameters.ELU_ALPHA))(previousLayer) if self.parameters.BATCHNORM: previousLayer = BatchNormalization()(previousLayer) if self.parameters.DROPOUT: previousLayer = Dropout(self.parameters.DROPOUT)(previousLayer) output = Dense(outputDim, activation=self.activationFuncOutput, bias_initializer=initializer, kernel_initializer=initializer, kernel_regularizer=regularizer, kernel_constraint=constraint)(previousLayer) self.valueNetwork = keras.models.Model(inputs=self.input, outputs=output) elif self.parameters.NEURON_TYPE == "LSTM": # Hidden Layer 1 # TODO: Use CNN with LSTM # if self.parameters.CNN_REPR: # hidden1 = LSTM(self.hiddenLayer1, return_sequences=True, stateful=stateful_training, batch_size=self.batch_len) # else: # hidden1 = LSTM(self.hiddenLayer1, input_shape=input_shape_lstm, return_sequences = True, # stateful= stateful_training, batch_size=self.batch_len) hidden1 = LSTM(self.hiddenLayer1, input_shape=input_shape_lstm, return_sequences=True, stateful=stateful_training, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer) self.valueNetwork.add(hidden1) # Hidden 2 if self.hiddenLayer2 > 0: hidden2 = LSTM(self.hiddenLayer2, return_sequences=True, stateful=stateful_training, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer) self.valueNetwork.add(hidden2) # Hidden 3 if self.hiddenLayer3 > 0: hidden3 = LSTM(self.hiddenLayer3, return_sequences=True, stateful=stateful_training, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer) self.valueNetwork.add(hidden3) # Output layer output = LSTM(outputDim, activation=self.activationFuncOutput, return_sequences=True, stateful=stateful_training, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer) self.valueNetwork.add(output) # Create target network self.targetNetwork = keras.models.clone_model(self.valueNetwork) self.targetNetwork.set_weights(self.valueNetwork.get_weights()) if self.parameters.OPTIMIZER == "Adam": if self.parameters.GRADIENT_CLIP_NORM: optimizer = keras.optimizers.Adam( lr=self.learningRate, clipnorm=self.parameters.GRADIENT_CLIP_NORM, amsgrad=self.parameters.AMSGRAD) elif self.parameters.GRADIENT_CLIP: optimizer = keras.optimizers.Adam( lr=self.learningRate, clipvalue=self.parameters.GRADIENT_CLIP, amsgrad=self.parameters.AMSGRAD) else: optimizer = keras.optimizers.Adam( lr=self.learningRate, amsgrad=self.parameters.AMSGRAD) elif self.parameters.OPTIMIZER == "Nadam": optimizer = keras.optimizers.Nadam(lr=self.learningRate) elif self.parameters.OPTIMIZER == "Adamax": optimizer = keras.optimizers.Adamax(lr=self.learningRate) elif self.parameters.OPTIMIZER == "SGD": if self.parameters.NESTEROV: optimizer = keras.optimizers.SGD( lr=self.learningRate, momentum=self.parameters.NESTEROV, nesterov=True) else: optimizer = keras.optimizers.SGD(lr=self.learningRate) self.optimizer = optimizer self.valueNetwork.compile(loss='mse', optimizer=optimizer) self.targetNetwork.compile(loss='mse', optimizer=optimizer) self.model = self.valueNetwork if self.parameters.NEURON_TYPE == "LSTM": # We predict using only one state input_shape_lstm = (1, self.stateReprLen) self.actionNetwork = Sequential() hidden1 = LSTM(self.hiddenLayer1, input_shape=input_shape_lstm, return_sequences=True, stateful=True, batch_size=1, bias_initializer=initializer, kernel_initializer=initializer) self.actionNetwork.add(hidden1) if self.hiddenLayer2 > 0: hidden2 = LSTM(self.hiddenLayer2, return_sequences=True, stateful=True, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer) self.actionNetwork.add(hidden2) if self.hiddenLayer3 > 0: hidden3 = LSTM(self.hiddenLayer3, return_sequences=True, stateful=True, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer) self.actionNetwork.add(hidden3) self.actionNetwork.add( LSTM(self.num_actions, activation=self.activationFuncOutput, return_sequences=False, stateful=True, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer)) self.actionNetwork.compile(loss='mse', optimizer=optimizer) print(self.valueNetwork.summary()) print("\n") if modelName is not None: self.load(modelName) def reset_general(self, model): session = K.get_session() for layer in model.layers: for v in layer.__dict__: v_arg = getattr(layer, v) if hasattr(v_arg, 'initializer'): initializer_method = getattr(v_arg, 'initializer') initializer_method.run(session=session) print('reinitializing layer {}.{}'.format(layer.name, v)) def reset_weights(self): self.reset_general(self.valueNetwork) self.reset_general(self.targetNetwork) def reset_hidden_states(self): self.actionNetwork.reset_states() self.valueNetwork.reset_states() self.targetNetwork.reset_states() def load(self, modelName): path = modelName self.loadedModelName = modelName self.valueNetwork = keras.models.load_model(path + "model.h5") self.targetNetwork = load_model(path + "model.h5") def trainOnBatch(self, inputs, targets, importance_weights): if self.parameters.NEURON_TYPE == "LSTM": if self.parameters.EXP_REPLAY_ENABLED: if self.parameters.PRIORITIZED_EXP_REPLAY_ENABLED: return self.valueNetwork.train_on_batch( inputs, targets, sample_weight=importance_weights) else: return self.valueNetwork.train_on_batch(inputs, targets) else: return self.valueNetwork.train_on_batch( numpy.array([numpy.array([inputs])]), numpy.array([numpy.array([targets])])) else: if self.parameters.PRIORITIZED_EXP_REPLAY_ENABLED: return self.valueNetwork.train_on_batch( inputs, targets, sample_weight=importance_weights) else: return self.valueNetwork.train_on_batch(inputs, targets) def updateActionNetwork(self): self.actionNetwork.set_weights(self.valueNetwork.get_weights()) def updateTargetNetwork(self): self.targetNetwork.set_weights(self.valueNetwork.get_weights()) def predict(self, state, batch_len=1): if self.parameters.NEURON_TYPE == "LSTM": if self.parameters.EXP_REPLAY_ENABLED: return self.valueNetwork.predict(state, batch_size=batch_len) else: return self.valueNetwork.predict( numpy.array([numpy.array([state])]))[0][0] if self.parameters.CNN_REPR: if self.parameters.CNN_TOWER: stateRepr = numpy.zeros( (len(state), 1, 1, len(state[0]), len(state[0]))) for gridIdx, grid in enumerate(state): stateRepr[gridIdx][0][0] = grid state = list(stateRepr) else: if len(state) == 2: grid = numpy.array([state[0]]) extra = numpy.array([state[1]]) state = [grid, extra] else: state = numpy.array([state]) return self.valueNetwork.predict(state)[0] def predictTargetQValues(self, state): if self.parameters.USE_ACTION_AS_INPUT: return [ self.predict_target_network( numpy.array([numpy.concatenate((state[0], act))]))[0] for act in self.actions ] else: return self.predict_target_network(state) def predict_target_network(self, state, batch_len=1): if self.parameters.NEURON_TYPE == "LSTM": if self.parameters.EXP_REPLAY_ENABLED: return self.targetNetwork.predict(state, batch_size=batch_len) else: return self.targetNetwork.predict( numpy.array([numpy.array([state])]))[0][0] if self.parameters.CNN_REPR: if self.parameters.CNN_TOWER: stateRepr = numpy.zeros( (len(state), 1, 1, len(state[0]), len(state[0]))) for gridIdx, grid in enumerate(state): stateRepr[gridIdx][0][0] = grid stateRepr = list(stateRepr) return self.targetNetwork.predict(stateRepr)[0] else: if len(state) == 2: grid = numpy.array([state[0]]) extra = numpy.array([state[1]]) state = [grid, extra] else: state = numpy.array([state]) return self.targetNetwork.predict(state)[0] else: return self.targetNetwork.predict(state)[0] def predict_action_network(self, trace): return self.actionNetwork.predict(numpy.array([numpy.array([trace]) ]))[0] def predict_action(self, state): if self.parameters.USE_ACTION_AS_INPUT: return [ self.predict(numpy.array([numpy.concatenate( (state[0], act))]))[0] for act in self.actions ] else: if self.parameters.NEURON_TYPE == "MLP": return self.predict(state) else: return self.predict_action_network(state) def saveModel(self, path, name=""): self.targetNetwork.set_weights(self.valueNetwork.get_weights()) self.targetNetwork.save(path + name + "model.h5") def setEpsilon(self, val): self.epsilon = val def setFrameSkipRate(self, value): self.frameSkipRate = value def getParameters(self): return self.parameters def getNumOfActions(self): return self.num_actions def getEpsilon(self): return self.epsilon def getDiscount(self): return self.discount def getFrameSkipRate(self): return self.frameSkipRate def getGridSquaresPerFov(self): return self.gridSquaresPerFov def getTargetNetworkMaxSteps(self): return self.targetNetworkMaxSteps def getStateReprLen(self): return self.stateReprLen def getHiddenLayer1(self): return self.hiddenLayer1 def getHiddenLayer2(self): return self.hiddenLayer2 def getHiddenLayer3(self): return self.hiddenLayer3 def getNumActions(self): return self.num_actions def getLearningRate(self): return self.learningRate def getActivationFuncHidden(self): return self.activationFuncHidden def getActivationFuncOutput(self): return self.activationFuncOutput def getOptimizer(self): return self.optimizer def getLoadedModelName(self): return self.loadedModelName def getActions(self): return self.actions def getTargetNetwork(self): return self.targetNetwork def getValueNetwork(self): return self.valueNetwork
confidence_layer = Dense(N_AUTHORS, activation='sigmoid') c_out = Lambda(lambda x: K.sum(x, axis=0))(confidence_layer(mp2_outs)) #get how much each attention head should contribute attn_attributions = calculate_attention_weight(attn_confidences, attn_preds) #apply the confidence to the predictions gated_out = Multiply()([c_out, fc_final_out]) #add the contributions from the attention heads attention_weighted_out = Add()([gated_out, attn_attributions]) return (attention_weighted_out, attn_maps) #construct the model inputs = Input(shape=INPUT_SHAPE) preds, maps = build_cnn_return_preds(inputs) model = Model(inputs=[inputs], outputs=preds) optimizer = adam(lr=0.003) model.compile(optimizer=optimizer, loss=losses(maps), metrics=['accuracy']) #hydrate the inputs inputs = [] labels = [] for _, v in training_set.items(): for x in v: inputs.append(x['text']) labels.append(x['author']) inputs = np.asarray(inputs) labels = np.expand_dims(to_categorical(np.asarray(labels)), 1) #train the model model.fit(inputs, labels, epochs=15, batch_size=100, shuffle='batch') model.save('./saved_model')
validation_steps=nb_videos_val // video_b_s, callbacks=[ EarlyStopping(patience=60), ModelCheckpoint(model_path + save_name, save_weights_only=True, save_best_only=True) ]) elif model_no == 1: xgaus_bshape = (video_b_s, shape_r_gaus, shape_c_gaus, nb_gaussian) ximgs_ops_bshape = (video_b_s, num_frames, shape_r, shape_c, 3 + 2 * opt_num) input_tensors = [ Input(batch_shape=xgaus_bshape) for i in range(0, num_frames) ] input_tensors.append(Input(batch_shape=ximgs_ops_bshape)) m = TD_model_prior_masks(input_tensors, f1_train=True, stateful=stateful) save_name = 'vap_model/UHD_dcross_res_matt_res.{epoch:02d}-{val_loss:.4f}.h5' m.compile(Adam(lr=1e-5), loss=[ kl_divergence, correlation_coefficient, nss, sim, kl_divergence, correlation_coefficient, nss, sim, kl_divergence, correlation_coefficient, nss, sim, kl_divergence, correlation_coefficient, nss, sim ], loss_weights=[