def train_model(build_fn, valid_x, valid_y, epochs, n_layers=1):
    print(epochs, n_layers)
    dicts = pickle.load(open('./dictionaries.p', 'rb'))
    training_set = pickle.load(open('./train_dataset.p', 'rb'))
    inputs = Input(shape=INPUT_SHAPE)
    preds, maps = build_fn(inputs, n_layers)
    model = Model(inputs=[inputs], outputs=preds)
    optimizer = adam()
    if not maps:
        model.compile(optimizer=optimizer,
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])
    else:
        model.compile(optimizer=optimizer,
                      loss=losses(maps),
                      metrics=['accuracy'])
    inputs = []
    labels = []
    for x in training_set:
        inputs.append(x['text'])
        labels.append(x['author'])
    inputs = np.asarray(inputs)
    labels = np.expand_dims(to_categorical(np.asarray(labels)), 1)
    model.summary()
    model.fit(inputs,
              labels,
              epochs=epochs,
              batch_size=100,
              shuffle='batch',
              validation_data=(valid_x, valid_y))
    model.save('./saved_model')
Ejemplo n.º 2
0
def create_input_layers(num_inputs, input_size):
    if num_inputs == 1:
        inputs = Input(shape=(input_size,))
        return inputs, inputs
    else:
        inputs = []
        for i in range(num_inputs):
            inputs.append(Input(shape=(input_size,)))
        layer = concatenate(inputs)
        return inputs, layer
Ejemplo n.º 3
0
    def build(self, mode, input_shape, config=None, image_nums=5):
        if not config:
            config = VideoModelConfig()
        assert mode in ["training", "inference"]
        h, w, c = input_shape
        inputs = Input(shape=(h, w, c), name="input_image")
        inputs_image_keys = []
        inputs_mask_keys = []
        for i in range(image_nums):
            inputs_image_keys.append(Input(shape=(h, w, c), name="input_image_key" + str(i)))
            inputs_mask_keys.append(Input(shape=(h, w, c), name="input_mask_key" + str(i)))

        outputs = temporal_and_mask_single_propagation(inputs, inputs_image_keys, inputs_mask_keys)
        # outputs = temporal_and_mask_multi_propagation(inputs, inputs_image_keys, inputs_mask_keys)
        model = Model(inputs=[inputs] + inputs_image_keys + inputs_mask_keys, outputs=outputs, name="triple_model")
        # model = Model([inputs, inputs_image_keys, inputs_mask_keys], [outputs])
        print(model.summary())
        plot_model(model, "triple_single_model.png")
        # plot_model(model, "triple_multi_model.png")
        if mode == "training":

            # Network Heads
            # TODO: verify that this handles zero padded ROIs
            mrcnn_class_logits, mrcnn_class, mrcnn_bbox = \
                fpn_classifier_graph(rois, mrcnn_feature_map, config.IMAGE_SHAPE,
                                     config.POOL_SIZE, config.NUM_CLASSES)

            mrcnn_mask = build_fpn_mask_graph(rois, mrcnn_feature_map,
                                              config.IMAGE_SHAPE,
                                              config.MASK_POOL_SIZE,
                                              config.NUM_CLASSES)

            # TODO: clean up (use tf.identify if necessary)
            output_rois = KL.Lambda(lambda x: x * 1, name="output_rois")(rois)

            global_parsing_loss = KL.Lambda(lambda x: mrcnn_global_parsing_loss_graph(config.NUM_PART_CLASS, *x),
                                            name="mrcnn_global_parsing_loss")(
                [input_gt_part, global_parsing_map])

            # Losses
            rpn_class_loss = KL.Lambda(lambda x: rpn_class_loss_graph(*x), name="rpn_class_loss")(
                [input_rpn_match, rpn_class_logits])
            rpn_bbox_loss = KL.Lambda(lambda x: rpn_bbox_loss_graph(config, *x), name="rpn_bbox_loss")(
                [input_rpn_bbox, input_rpn_match, rpn_bbox])
            class_loss = KL.Lambda(lambda x: mrcnn_class_loss_graph(*x), name="mrcnn_class_loss")(
                [target_class_ids, mrcnn_class_logits, active_class_ids])
            bbox_loss = KL.Lambda(lambda x: mrcnn_bbox_loss_graph(*x), name="mrcnn_bbox_loss")(
                [target_bbox, target_class_ids, mrcnn_bbox])
            mask_loss = KL.Lambda(lambda x: mrcnn_mask_loss_graph(*x), name="mrcnn_mask_loss")(
                [target_mask, target_class_ids, mrcnn_mask])

            # Model
            inputs = [input_image, input_image_key1, input_image_key2, input_image_key3,
                      input_key_identity, input_image_meta, input_rpn_match, input_rpn_bbox,
                      input_gt_class_ids, input_gt_boxes, input_gt_masks, input_gt_part]
            if not config.USE_RPN_ROIS:
                inputs.append(input_rois)
            outputs = [rpn_class_logits, rpn_class, rpn_bbox,
                       mrcnn_class_logits, mrcnn_class, mrcnn_bbox, mrcnn_mask,
                       rpn_rois, output_rois,
                       rpn_class_loss, rpn_bbox_loss, class_loss, bbox_loss, mask_loss,
                       global_parsing_loss]
            model = KM.Model(inputs, outputs, name='aten')
        else:
            # Network Heads
            # Proposal classifier and BBox regressor heads
            mrcnn_class_logits, mrcnn_class, mrcnn_bbox = \
                fpn_classifier_graph(rpn_rois, mrcnn_feature_map, config.IMAGE_SHAPE,
                                     config.POOL_SIZE, config.NUM_CLASSES)

            # Detections
            # output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in image coordinates
            detections = DetectionLayer(config, name="mrcnn_detection")(
                [rpn_rois, mrcnn_class, mrcnn_bbox, input_image_meta])

            # Convert boxes to normalized coordinates
            # TODO: let DetectionLayer return normalized coordinates to avoid
            #       unnecessary conversions
            h, w = config.IMAGE_SHAPE[:2]
            detection_boxes = KL.Lambda(
                lambda x: x[..., :4] / np.array([h, w, h, w]))(detections)

            # Create masks for detections
            mrcnn_mask = build_fpn_mask_graph(detection_boxes, mrcnn_feature_map,
                                              config.IMAGE_SHAPE,
                                              config.MASK_POOL_SIZE,
                                              config.NUM_CLASSES)

            global_parsing_prob = KL.Lambda(lambda x: post_processing_graph(*x))([global_parsing_map, input_image])

            model = KM.Model([input_image, input_image_key1, input_image_key2, input_image_key3,
                              input_key_identity, input_image_meta],
                             [detections, mrcnn_class, mrcnn_bbox,
                              mrcnn_mask, rpn_rois, rpn_class, rpn_bbox, global_parsing_prob],
                             name='aten')

        # Add multi-GPU support.
        if config.GPU_COUNT > 1:
            from utils.parallel_model import ParallelModel
            model = ParallelModel(model, config.GPU_COUNT)
        import platform
        sys = platform.system()
        # if sys == "Windows":
        if self.mode == "training":
            plot_model(model, "aten_training.jpg")
        else:
            plot_model(model, "aten_test.png")
        return model
label_list = y


left_input = []
right_input = []
targets = []

# Number of pairs per image
pairs = 5
# Let's create the new dataset to train on
for i in range(len(label_list)):
    for _ in range(pairs):
        compare_to = i
        while compare_to == i:  # Make sure it's not comparing to itself
            compare_to = random.randint(0, 999)
        left_input.append(image_list[i])
        right_input.append(image_list[compare_to])
        if label_list[i] == label_list[compare_to]:  # They are the same
            targets.append(1.)
        else:  # Not the same
            targets.append(0.)

left_input = np.squeeze(np.array(left_input))
right_input = np.squeeze(np.array(right_input))
targets = np.squeeze(np.array(targets))

siamese_net.fit([left_input,right_input], targets,
          batch_size=16,
          epochs=10,
          verbose=1,
          validation_data=([left_input, right_input], targets)
Ejemplo n.º 5
0
def create_output_graph(n_features=8,
                        n_features_cat=3,
                        n_graph_layers=0,
                        n_dense_layers=3,
                        n_dense_per_graph_net=1,
                        activation='tanh',
                        do_weighted_sum=True,
                        with_bias=True):
    # [b'PF_dxy', b'PF_dz', b'PF_eta', b'PF_mass', b'PF_puppiWeight', b'PF_charge', b'PF_fromPV', b'PF_pdgId',  b'PF_px', b'PF_py']
    inputs = Input(shape=(maxNPF, n_features), name='input')
    pxpy = Lambda(lambda x: slice(x, (0, 0, n_features - 2), (-1, -1, -1)))(
        inputs)

    if activation == 'prelu':
        activation = PReLU()

    if opt.embedding:
        embeddings = []
        for i_emb in range(n_features_cat):
            input_cat = Input(shape=(maxNPF, 1),
                              name='input_cat{}'.format(i_emb))
            if i_emb == 0:
                inputs = [inputs, input_cat]
            else:
                inputs.append(input_cat)
            embedding = Embedding(
                input_dim=emb_input_dim[i_emb],
                output_dim=emb_out_dim,
                embeddings_initializer=initializers.RandomNormal(mean=0.,
                                                                 stddev=0.4 /
                                                                 emb_out_dim),
                name='embedding{}'.format(i_emb))(input_cat)
            embedding = Reshape((maxNPF, 8))(embedding)
            embeddings.append(embedding)

        throughput = Concatenate()([inputs[0]] + [emb for emb in embeddings])

    # x = GlobalExchange()(inputs if not opt.embedding else throughput)
    if opt.embedding:
        x = throughput

    for i_graph in range(n_graph_layers):
        if i_graph > 0:
            x = GlobalExchange()(x)
        for __ in range(n_dense_per_graph_net):
            x = Dense(64,
                      activation=activation,
                      kernel_initializer='lecun_uniform')(x)
            # x = BatchNormalization(momentum=0.8)(x)
        x = GravNet(n_neighbours=20,
                    n_dimensions=4,
                    n_filters=42,
                    n_propagate=18)(x)
        x = BatchNormalization(momentum=0.8)(x)

    dense_layers = []  # [4]
    if do_weighted_sum:
        for i_dense in range(n_dense_layers):
            x = Dense(64 // 2**i_dense,
                      activation=activation,
                      kernel_initializer='lecun_uniform')(x)
            x = BatchNormalization(momentum=0.95)(x)
        # List of weights. Increase to 3 when operating with biases
        # x = Dense(3 if with_bias else 1, activation='linear', kernel_initializer='lecun_uniform')(x)

        # Expect typical weights to not be of order 1 but somewhat smaller, so apply explicit scaling
        x = Dense(
            3 if with_bias else 1,
            activation='linear',
            kernel_initializer=initializers.VarianceScaling(scale=0.02))(x)
        print('Shape of last dense layer', x.shape)
        x = Concatenate()([x, pxpy])
        #x = Flatten()(x)
        x = weighted_sum_layer(with_bias)(x)
    else:
        for i_dense in range(n_dense_layers):
            x = Dense(64 // 2**i_dense,
                      activation=activation,
                      kernel_initializer='lecun_uniform')(x)
        x = Flatten()(x)
        dense_layers = [32, 16, 8]

    dense_activation = 'relu'
    for dense_size in dense_layers:
        x = Dense(dense_size,
                  activation=dense_activation,
                  kernel_initializer='lecun_uniform')(x)

    x = Dense(2, activation='linear', name='output')(x)
    return inputs, x
Ejemplo n.º 6
0
class Network(object):
    def __init__(self, parameters, modelName=None):
        self.parameters = parameters

        if parameters.SQUARE_ACTIONS:
            self.actions = createDiscreteActionsSquare(
                self.parameters.NUM_ACTIONS, self.parameters.ENABLE_SPLIT,
                self.parameters.ENABLE_EJECT)
        else:
            self.actions = createDiscreteActionsCircle(
                self.parameters.NUM_ACTIONS, self.parameters.ENABLE_SPLIT,
                self.parameters.ENABLE_EJECT)
        self.num_actions = len(self.actions)

        self.loadedModelName = None

        self.gpus = self.parameters.GPUS

        # Q-learning
        self.discount = self.parameters.DISCOUNT
        self.epsilon = self.parameters.EPSILON
        self.frameSkipRate = self.parameters.FRAME_SKIP_RATE
        self.gridSquaresPerFov = self.parameters.GRID_SQUARES_PER_FOV

        # CNN
        if self.parameters.CNN_REPR:
            # (KernelSize, stride, filterNum)
            self.kernel_1 = self.parameters.CNN_L1

            self.kernel_2 = self.parameters.CNN_L2

            self.kernel_3 = self.parameters.CNN_L3

            if self.parameters.CNN_USE_L1:
                self.stateReprLen = self.parameters.CNN_INPUT_DIM_1
            elif self.parameters.CNN_USE_L2:
                self.stateReprLen = self.parameters.CNN_INPUT_DIM_2
            else:
                self.stateReprLen = self.parameters.CNN_INPUT_DIM_3
        else:
            self.stateReprLen = self.parameters.STATE_REPR_LEN

        # ANN
        self.learningRate = self.parameters.ALPHA
        self.optimizer = self.parameters.OPTIMIZER
        if self.parameters.ACTIVATION_FUNC_HIDDEN == "elu":
            self.activationFuncHidden = "linear"  # keras.layers.ELU(alpha=eluAlpha)
        else:
            self.activationFuncHidden = self.parameters.ACTIVATION_FUNC_HIDDEN
        self.activationFuncLSTM = self.parameters.ACTIVATION_FUNC_LSTM
        self.activationFuncOutput = self.parameters.ACTIVATION_FUNC_OUTPUT

        self.layers = parameters.Q_LAYERS

        if self.parameters.USE_ACTION_AS_INPUT:
            inputDim = self.stateReprLen + 4
            outputDim = 1
        else:
            inputDim = self.stateReprLen
            outputDim = self.num_actions

        if self.parameters.EXP_REPLAY_ENABLED:
            input_shape_lstm = (self.parameters.MEMORY_TRACE_LEN, inputDim)
            stateful_training = False
            self.batch_len = self.parameters.MEMORY_BATCH_LEN

        else:
            input_shape_lstm = (1, inputDim)
            stateful_training = True
            self.batch_len = 1

        if self.parameters.INITIALIZER == "glorot_uniform":
            initializer = keras.initializers.glorot_uniform()
        elif self.parameters.INITIALIZER == "glorot_normal":
            initializer = keras.initializers.glorot_normal()
        else:
            weight_initializer_range = math.sqrt(
                6 / (self.stateReprLen + self.num_actions))
            initializer = keras.initializers.RandomUniform(
                minval=-weight_initializer_range,
                maxval=weight_initializer_range,
                seed=None)

        # CNN
        if self.parameters.CNN_REPR:
            if self.parameters.CNN_P_REPR:
                if self.parameters.CNN_P_INCEPTION:

                    self.input = Input(shape=(self.stateReprLen,
                                              self.stateReprLen, 3))

                    tower_1 = Conv2D(self.kernel_2[2], (1, 1),
                                     padding='same',
                                     activation='relu')(self.input)
                    tower_1 = Conv2D(self.kernel_2[2], (3, 3),
                                     padding='same',
                                     activation='relu')(tower_1)

                    tower_2 = Conv2D(self.kernel_2[2], (1, 1),
                                     padding='same',
                                     activation='relu')(self.input)
                    tower_2 = Conv2D(self.kernel_2[2], (5, 5),
                                     padding='same',
                                     activation='relu')(tower_2)

                    tower_3 = MaxPooling2D((3, 3),
                                           strides=(1, 1),
                                           padding='same')(self.input)
                    tower_3 = Conv2D(self.kernel_2[2], (1, 1),
                                     padding='same',
                                     activation='relu')(tower_3)

                    self.valueNetwork = keras.layers.concatenate(
                        [tower_1, tower_2, tower_3], axis=3)
                    self.valueNetwork = keras.layers.Flatten()(
                        self.valueNetwork)

                # DQN approach
                else:
                    # RGB
                    if self.parameters.CNN_P_RGB:
                        channels = 3
                    # GrayScale
                    else:
                        channels = 1
                    if self.parameters.CNN_LAST_GRID:
                        channels = channels * 2
                    if self.parameters.COORDCONV:
                        channels += 2
                    self.input = Input(shape=(self.stateReprLen,
                                              self.stateReprLen, channels))
                    conv = self.input

                    if self.parameters.CNN_USE_L1:
                        conv = Conv2D(self.kernel_1[2],
                                      kernel_size=(self.kernel_1[0],
                                                   self.kernel_1[0]),
                                      strides=(self.kernel_1[1],
                                               self.kernel_1[1]),
                                      activation='relu',
                                      data_format='channels_last')(conv)

                    if self.parameters.CNN_USE_L2:
                        conv = Conv2D(self.kernel_2[2],
                                      kernel_size=(self.kernel_2[0],
                                                   self.kernel_2[0]),
                                      strides=(self.kernel_2[1],
                                               self.kernel_2[1]),
                                      activation='relu',
                                      data_format='channels_last')(conv)

                    if self.parameters.CNN_USE_L3:
                        conv = Conv2D(self.kernel_3[2],
                                      kernel_size=(self.kernel_3[0],
                                                   self.kernel_3[0]),
                                      strides=(self.kernel_3[1],
                                               self.kernel_3[1]),
                                      activation='relu',
                                      data_format='channels_last')(conv)

                    self.valueNetwork = Flatten()(conv)

            # Not pixel input
            else:
                if self.parameters.CNN_TOWER:
                    tower = []
                    self.input = []
                    self.towerModel = []
                    for grid in range(self.parameters.NUM_OF_GRIDS):
                        self.input.append(
                            Input(shape=(1, self.stateReprLen,
                                         self.stateReprLen)))
                        if self.parameters.CNN_USE_L1:
                            tower.append(
                                Conv2D(self.kernel_1[2],
                                       kernel_size=(self.kernel_1[0],
                                                    self.kernel_1[0]),
                                       strides=(self.kernel_1[1],
                                                self.kernel_1[1]),
                                       activation='relu',
                                       data_format='channels_first')(
                                           self.input[grid]))

                        if self.parameters.CNN_USE_L2:
                            if self.parameters.CNN_USE_L1:
                                tower[grid] = Conv2D(
                                    self.kernel_2[2],
                                    kernel_size=(self.kernel_2[0],
                                                 self.kernel_2[0]),
                                    strides=(self.kernel_2[1],
                                             self.kernel_2[1]),
                                    activation='relu',
                                    data_format='channels_first')(tower[grid])
                            else:
                                tower.append(
                                    Conv2D(self.kernel_2[2],
                                           kernel_size=(self.kernel_2[0],
                                                        self.kernel_2[0]),
                                           strides=(self.kernel_2[1],
                                                    self.kernel_2[1]),
                                           activation='relu',
                                           data_format='channels_first')(
                                               self.input[grid]))

                        if self.parameters.CNN_USE_L3:
                            if self.parameters.CNN_USE_L2:
                                tower[grid] = Conv2D(
                                    self.kernel_3[2],
                                    kernel_size=(self.kernel_3[0],
                                                 self.kernel_3[0]),
                                    strides=(self.kernel_3[1],
                                             self.kernel_3[1]),
                                    activation='relu',
                                    data_format='channels_first')(tower[grid])
                            else:
                                tower.append(
                                    Conv2D(self.kernel_3[2],
                                           kernel_size=(self.kernel_3[0],
                                                        self.kernel_3[0]),
                                           strides=(self.kernel_3[1],
                                                    self.kernel_3[1]),
                                           activation='relu',
                                           data_format='channels_first')(
                                               self.input[grid]))
                            tower[grid] = Flatten()(tower[grid])

                    self.valueNetwork = keras.layers.concatenate(
                        [i for i in tower], axis=1)

                # Vision grid merging
                else:
                    self.input = Input(shape=(self.parameters.NUM_OF_GRIDS,
                                              self.stateReprLen,
                                              self.stateReprLen))
                    conv = self.input
                    if self.parameters.CNN_USE_L1:
                        conv = Conv2D(self.kernel_1[2],
                                      kernel_size=(self.kernel_1[0],
                                                   self.kernel_1[0]),
                                      strides=(self.kernel_1[1],
                                               self.kernel_1[1]),
                                      activation='relu',
                                      data_format='channels_first')(conv)

                    if self.parameters.CNN_USE_L2:
                        conv = Conv2D(self.kernel_2[2],
                                      kernel_size=(self.kernel_2[0],
                                                   self.kernel_2[0]),
                                      strides=(self.kernel_2[1],
                                               self.kernel_2[1]),
                                      activation='relu',
                                      data_format='channels_first')(conv)

                    if self.parameters.CNN_USE_L3:
                        conv = Conv2D(self.kernel_3[2],
                                      kernel_size=(self.kernel_3[0],
                                                   self.kernel_3[0]),
                                      strides=(self.kernel_3[1],
                                               self.kernel_3[1]),
                                      activation='relu',
                                      data_format='channels_first')(conv)

                    self.valueNetwork = Flatten()(conv)

        # Fully connected layers
        if self.parameters.NEURON_TYPE == "MLP":
            layerIterable = iter(self.layers)

            regularizer = keras.regularizers.l2(self.parameters.Q_WEIGHT_DECAY)
            if self.parameters.DROPOUT:
                constraint = maxnorm(self.parameters.MAXNORM)
            else:
                constraint = None

            if parameters.CNN_REPR:
                previousLayer = self.input
                extraInputSize = self.parameters.EXTRA_INPUT
                if extraInputSize > 0:
                    extraInput = Input(shape=(extraInputSize, ))
                    self.input = [self.input, extraInput]
                    denseInput = keras.layers.concatenate(
                        [self.valueNetwork, extraInput])
                    previousLayer = Dense(
                        next(layerIterable),
                        activation=self.activationFuncHidden,
                        bias_initializer=initializer,
                        kernel_initializer=initializer,
                        kernel_regularizer=regularizer)(denseInput)
            else:
                self.input = Input(shape=(inputDim, ))
                previousLayer = self.input

            for layer in layerIterable:
                if layer > 0:
                    if self.parameters.DROPOUT:
                        previousLayer = Dropout(
                            self.parameters.DROPOUT)(previousLayer)
                    previousLayer = Dense(
                        layer,
                        activation=self.activationFuncHidden,
                        bias_initializer=initializer,
                        kernel_initializer=initializer,
                        kernel_regularizer=regularizer,
                        kernel_constraint=constraint)(previousLayer)
                    if self.parameters.ACTIVATION_FUNC_HIDDEN == "elu":
                        previousLayer = (keras.layers.ELU(
                            alpha=self.parameters.ELU_ALPHA))(previousLayer)
                    if self.parameters.BATCHNORM:
                        previousLayer = BatchNormalization()(previousLayer)
            if self.parameters.DROPOUT:
                previousLayer = Dropout(self.parameters.DROPOUT)(previousLayer)

            output = Dense(outputDim,
                           activation=self.activationFuncOutput,
                           bias_initializer=initializer,
                           kernel_initializer=initializer,
                           kernel_regularizer=regularizer,
                           kernel_constraint=constraint)(previousLayer)

            self.valueNetwork = keras.models.Model(inputs=self.input,
                                                   outputs=output)

        elif self.parameters.NEURON_TYPE == "LSTM":
            # Hidden Layer 1
            # TODO: Use CNN with LSTM
            # if self.parameters.CNN_REPR:
            #     hidden1 = LSTM(self.hiddenLayer1, return_sequences=True, stateful=stateful_training, batch_size=self.batch_len)
            # else:
            #     hidden1 = LSTM(self.hiddenLayer1, input_shape=input_shape_lstm, return_sequences = True,
            #                    stateful= stateful_training, batch_size=self.batch_len)
            hidden1 = LSTM(self.hiddenLayer1,
                           input_shape=input_shape_lstm,
                           return_sequences=True,
                           stateful=stateful_training,
                           batch_size=self.batch_len,
                           bias_initializer=initializer,
                           kernel_initializer=initializer)
            self.valueNetwork.add(hidden1)
            # Hidden 2
            if self.hiddenLayer2 > 0:
                hidden2 = LSTM(self.hiddenLayer2,
                               return_sequences=True,
                               stateful=stateful_training,
                               batch_size=self.batch_len,
                               bias_initializer=initializer,
                               kernel_initializer=initializer)
                self.valueNetwork.add(hidden2)
            # Hidden 3
            if self.hiddenLayer3 > 0:
                hidden3 = LSTM(self.hiddenLayer3,
                               return_sequences=True,
                               stateful=stateful_training,
                               batch_size=self.batch_len,
                               bias_initializer=initializer,
                               kernel_initializer=initializer)
                self.valueNetwork.add(hidden3)
            # Output layer
            output = LSTM(outputDim,
                          activation=self.activationFuncOutput,
                          return_sequences=True,
                          stateful=stateful_training,
                          batch_size=self.batch_len,
                          bias_initializer=initializer,
                          kernel_initializer=initializer)
            self.valueNetwork.add(output)

        # Create target network
        self.targetNetwork = keras.models.clone_model(self.valueNetwork)
        self.targetNetwork.set_weights(self.valueNetwork.get_weights())

        if self.parameters.OPTIMIZER == "Adam":
            if self.parameters.GRADIENT_CLIP_NORM:
                optimizer = keras.optimizers.Adam(
                    lr=self.learningRate,
                    clipnorm=self.parameters.GRADIENT_CLIP_NORM,
                    amsgrad=self.parameters.AMSGRAD)
            elif self.parameters.GRADIENT_CLIP:
                optimizer = keras.optimizers.Adam(
                    lr=self.learningRate,
                    clipvalue=self.parameters.GRADIENT_CLIP,
                    amsgrad=self.parameters.AMSGRAD)

            else:
                optimizer = keras.optimizers.Adam(
                    lr=self.learningRate, amsgrad=self.parameters.AMSGRAD)
        elif self.parameters.OPTIMIZER == "Nadam":
            optimizer = keras.optimizers.Nadam(lr=self.learningRate)
        elif self.parameters.OPTIMIZER == "Adamax":
            optimizer = keras.optimizers.Adamax(lr=self.learningRate)
        elif self.parameters.OPTIMIZER == "SGD":
            if self.parameters.NESTEROV:
                optimizer = keras.optimizers.SGD(
                    lr=self.learningRate,
                    momentum=self.parameters.NESTEROV,
                    nesterov=True)
            else:
                optimizer = keras.optimizers.SGD(lr=self.learningRate)

        self.optimizer = optimizer

        self.valueNetwork.compile(loss='mse', optimizer=optimizer)
        self.targetNetwork.compile(loss='mse', optimizer=optimizer)

        self.model = self.valueNetwork

        if self.parameters.NEURON_TYPE == "LSTM":
            # We predict using only one state
            input_shape_lstm = (1, self.stateReprLen)
            self.actionNetwork = Sequential()
            hidden1 = LSTM(self.hiddenLayer1,
                           input_shape=input_shape_lstm,
                           return_sequences=True,
                           stateful=True,
                           batch_size=1,
                           bias_initializer=initializer,
                           kernel_initializer=initializer)
            self.actionNetwork.add(hidden1)

            if self.hiddenLayer2 > 0:
                hidden2 = LSTM(self.hiddenLayer2,
                               return_sequences=True,
                               stateful=True,
                               batch_size=self.batch_len,
                               bias_initializer=initializer,
                               kernel_initializer=initializer)
                self.actionNetwork.add(hidden2)
            if self.hiddenLayer3 > 0:
                hidden3 = LSTM(self.hiddenLayer3,
                               return_sequences=True,
                               stateful=True,
                               batch_size=self.batch_len,
                               bias_initializer=initializer,
                               kernel_initializer=initializer)
                self.actionNetwork.add(hidden3)
            self.actionNetwork.add(
                LSTM(self.num_actions,
                     activation=self.activationFuncOutput,
                     return_sequences=False,
                     stateful=True,
                     batch_size=self.batch_len,
                     bias_initializer=initializer,
                     kernel_initializer=initializer))
            self.actionNetwork.compile(loss='mse', optimizer=optimizer)

        print(self.valueNetwork.summary())
        print("\n")

        if modelName is not None:
            self.load(modelName)

    def reset_general(self, model):
        session = K.get_session()
        for layer in model.layers:
            for v in layer.__dict__:
                v_arg = getattr(layer, v)
                if hasattr(v_arg, 'initializer'):
                    initializer_method = getattr(v_arg, 'initializer')
                    initializer_method.run(session=session)
                    print('reinitializing layer {}.{}'.format(layer.name, v))

    def reset_weights(self):
        self.reset_general(self.valueNetwork)
        self.reset_general(self.targetNetwork)

    def reset_hidden_states(self):
        self.actionNetwork.reset_states()
        self.valueNetwork.reset_states()
        self.targetNetwork.reset_states()

    def load(self, modelName):
        path = modelName
        self.loadedModelName = modelName
        self.valueNetwork = keras.models.load_model(path + "model.h5")
        self.targetNetwork = load_model(path + "model.h5")

    def trainOnBatch(self, inputs, targets, importance_weights):
        if self.parameters.NEURON_TYPE == "LSTM":
            if self.parameters.EXP_REPLAY_ENABLED:
                if self.parameters.PRIORITIZED_EXP_REPLAY_ENABLED:
                    return self.valueNetwork.train_on_batch(
                        inputs, targets, sample_weight=importance_weights)
                else:
                    return self.valueNetwork.train_on_batch(inputs, targets)
            else:
                return self.valueNetwork.train_on_batch(
                    numpy.array([numpy.array([inputs])]),
                    numpy.array([numpy.array([targets])]))
        else:
            if self.parameters.PRIORITIZED_EXP_REPLAY_ENABLED:
                return self.valueNetwork.train_on_batch(
                    inputs, targets, sample_weight=importance_weights)
            else:
                return self.valueNetwork.train_on_batch(inputs, targets)

    def updateActionNetwork(self):
        self.actionNetwork.set_weights(self.valueNetwork.get_weights())

    def updateTargetNetwork(self):
        self.targetNetwork.set_weights(self.valueNetwork.get_weights())

    def predict(self, state, batch_len=1):
        if self.parameters.NEURON_TYPE == "LSTM":
            if self.parameters.EXP_REPLAY_ENABLED:
                return self.valueNetwork.predict(state, batch_size=batch_len)
            else:
                return self.valueNetwork.predict(
                    numpy.array([numpy.array([state])]))[0][0]
        if self.parameters.CNN_REPR:
            if self.parameters.CNN_TOWER:
                stateRepr = numpy.zeros(
                    (len(state), 1, 1, len(state[0]), len(state[0])))

                for gridIdx, grid in enumerate(state):
                    stateRepr[gridIdx][0][0] = grid
                state = list(stateRepr)

            else:
                if len(state) == 2:
                    grid = numpy.array([state[0]])
                    extra = numpy.array([state[1]])

                    state = [grid, extra]
                else:
                    state = numpy.array([state])
        return self.valueNetwork.predict(state)[0]

    def predictTargetQValues(self, state):
        if self.parameters.USE_ACTION_AS_INPUT:
            return [
                self.predict_target_network(
                    numpy.array([numpy.concatenate((state[0], act))]))[0]
                for act in self.actions
            ]
        else:
            return self.predict_target_network(state)

    def predict_target_network(self, state, batch_len=1):
        if self.parameters.NEURON_TYPE == "LSTM":
            if self.parameters.EXP_REPLAY_ENABLED:
                return self.targetNetwork.predict(state, batch_size=batch_len)
            else:
                return self.targetNetwork.predict(
                    numpy.array([numpy.array([state])]))[0][0]
        if self.parameters.CNN_REPR:
            if self.parameters.CNN_TOWER:
                stateRepr = numpy.zeros(
                    (len(state), 1, 1, len(state[0]), len(state[0])))

                for gridIdx, grid in enumerate(state):
                    stateRepr[gridIdx][0][0] = grid
                stateRepr = list(stateRepr)

                return self.targetNetwork.predict(stateRepr)[0]
            else:
                if len(state) == 2:
                    grid = numpy.array([state[0]])
                    extra = numpy.array([state[1]])

                    state = [grid, extra]
                else:
                    state = numpy.array([state])
                return self.targetNetwork.predict(state)[0]
        else:
            return self.targetNetwork.predict(state)[0]

    def predict_action_network(self, trace):
        return self.actionNetwork.predict(numpy.array([numpy.array([trace])
                                                       ]))[0]

    def predict_action(self, state):
        if self.parameters.USE_ACTION_AS_INPUT:
            return [
                self.predict(numpy.array([numpy.concatenate(
                    (state[0], act))]))[0] for act in self.actions
            ]
        else:
            if self.parameters.NEURON_TYPE == "MLP":
                return self.predict(state)
            else:
                return self.predict_action_network(state)

    def saveModel(self, path, name=""):
        self.targetNetwork.set_weights(self.valueNetwork.get_weights())
        self.targetNetwork.save(path + name + "model.h5")

    def setEpsilon(self, val):
        self.epsilon = val

    def setFrameSkipRate(self, value):
        self.frameSkipRate = value

    def getParameters(self):
        return self.parameters

    def getNumOfActions(self):
        return self.num_actions

    def getEpsilon(self):
        return self.epsilon

    def getDiscount(self):
        return self.discount

    def getFrameSkipRate(self):
        return self.frameSkipRate

    def getGridSquaresPerFov(self):
        return self.gridSquaresPerFov

    def getTargetNetworkMaxSteps(self):
        return self.targetNetworkMaxSteps

    def getStateReprLen(self):
        return self.stateReprLen

    def getHiddenLayer1(self):
        return self.hiddenLayer1

    def getHiddenLayer2(self):
        return self.hiddenLayer2

    def getHiddenLayer3(self):
        return self.hiddenLayer3

    def getNumActions(self):
        return self.num_actions

    def getLearningRate(self):
        return self.learningRate

    def getActivationFuncHidden(self):
        return self.activationFuncHidden

    def getActivationFuncOutput(self):
        return self.activationFuncOutput

    def getOptimizer(self):
        return self.optimizer

    def getLoadedModelName(self):
        return self.loadedModelName

    def getActions(self):
        return self.actions

    def getTargetNetwork(self):
        return self.targetNetwork

    def getValueNetwork(self):
        return self.valueNetwork
Ejemplo n.º 7
0
    confidence_layer = Dense(N_AUTHORS, activation='sigmoid')
    c_out = Lambda(lambda x: K.sum(x, axis=0))(confidence_layer(mp2_outs))
    #get how much each attention head should contribute
    attn_attributions = calculate_attention_weight(attn_confidences,
                                                   attn_preds)
    #apply the confidence to the predictions
    gated_out = Multiply()([c_out, fc_final_out])
    #add the contributions from the attention heads
    attention_weighted_out = Add()([gated_out, attn_attributions])
    return (attention_weighted_out, attn_maps)


#construct the model
inputs = Input(shape=INPUT_SHAPE)
preds, maps = build_cnn_return_preds(inputs)
model = Model(inputs=[inputs], outputs=preds)
optimizer = adam(lr=0.003)
model.compile(optimizer=optimizer, loss=losses(maps), metrics=['accuracy'])
#hydrate the inputs
inputs = []
labels = []
for _, v in training_set.items():
    for x in v:
        inputs.append(x['text'])
        labels.append(x['author'])
inputs = np.asarray(inputs)
labels = np.expand_dims(to_categorical(np.asarray(labels)), 1)
#train the model
model.fit(inputs, labels, epochs=15, batch_size=100, shuffle='batch')
model.save('./saved_model')
Ejemplo n.º 8
0
                validation_steps=nb_videos_val // video_b_s,
                callbacks=[
                    EarlyStopping(patience=60),
                    ModelCheckpoint(model_path + save_name,
                                    save_weights_only=True,
                                    save_best_only=True)
                ])

        elif model_no == 1:
            xgaus_bshape = (video_b_s, shape_r_gaus, shape_c_gaus, nb_gaussian)
            ximgs_ops_bshape = (video_b_s, num_frames, shape_r, shape_c,
                                3 + 2 * opt_num)
            input_tensors = [
                Input(batch_shape=xgaus_bshape) for i in range(0, num_frames)
            ]
            input_tensors.append(Input(batch_shape=ximgs_ops_bshape))

            m = TD_model_prior_masks(input_tensors,
                                     f1_train=True,
                                     stateful=stateful)

            save_name = 'vap_model/UHD_dcross_res_matt_res.{epoch:02d}-{val_loss:.4f}.h5'

            m.compile(Adam(lr=1e-5),
                      loss=[
                          kl_divergence, correlation_coefficient, nss, sim,
                          kl_divergence, correlation_coefficient, nss, sim,
                          kl_divergence, correlation_coefficient, nss, sim,
                          kl_divergence, correlation_coefficient, nss, sim
                      ],
                      loss_weights=[