Exemple #1
0
class ModelA(ModelBase):
    def __init__(self, wv, maxlen=50, max_num_deptag=50):
        super().__init__(wv, maxlen, max_num_deptag)
        embedding_layer = self.wv.model.wv.get_embedding_layer()

        sequence_input = Input(shape=(self.maxlen,), dtype='int32')
        mask = Masking(mask_value=-1)(sequence_input)
        embedded_sequences = embedding_layer(mask)
        x = (Conv1D(filters=32, kernel_size=3, activation='relu'))(embedded_sequences)
        x = (MaxPooling1D(pool_size=2))(x)
        x = (Bidirectional(GRU(100, dropout=0.15)))(x)
        x = (Dense(16))(x)
        x = (Dropout(0.2))(x)
        preds = (Dense(1, activation='tanh'))(x)
        self.model = Model(sequence_input, preds, name='ConvBiGRUModelA')
        self.compile_model()

    def compile_model(self):
        self.model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['mae'])

    def fit(self, x, y, batch_size, epochs, verbose, callbacks):
        x_preprocessed = np.array([[t[0] for t in row] for row in x])
        x_preprocessed = sequence.pad_sequences(x_preprocessed, maxlen=self.maxlen, value=-1)

        y_preprocessed = np.array(y)

        self.model.fit(x_preprocessed, y_preprocessed, batch_size=batch_size, epochs=epochs, verbose=verbose,
                       callbacks=callbacks)

    def predict(self, x):
        x_preprocessed = np.array([[t[0] for t in row] for row in x])
        x_preprocessed = sequence.pad_sequences(x_preprocessed, maxlen=self.maxlen, value=-1)

        return self.model.predict(x_preprocessed)
def fit_model(X_train, y_train):
    #define model
    model = Model(inputs=[admiss_data], outputs=main_output)
    #
    print(model.summary())
    #
    adam = optimizers.Adam(lr=0.0001,
                           beta_1=0.9,
                           beta_2=0.999,
                           epsilon=1e-08,
                           decay=0.0)
    #
    model.compile(optimizer=adam,
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    #
    class_weight = {
        0: 1.,
        1: cw,  # 1: 20.
    }
    histories = my_callbacks.Histories()
    #model fit
    model.fit([X_train],
              y_train,
              epochs=n_epochs,
              batch_size=n_batch_size,
              validation_data=([[X_val], y_val]),
              class_weight=class_weight,
              callbacks=[histories])
    model.save('base_nn.h5')
    return model
    def train(self, classdict, nb_topics, *args, **kwargs):
        """ Train the autoencoder.

        :param classdict: training data
        :param nb_topics: number of topics, i.e., the number of encoding dimensions
        :param args: arguments to be passed to keras model fitting
        :param kwargs: arguments to be passed to keras model fitting
        :return: None
        :type classdict: dict
        :type nb_topics: int
        """
        self.nb_topics = nb_topics
        self.generate_corpus(classdict)
        vecsize = len(self.dictionary)

        # define all the layers of the autoencoder
        input_vec = Input(shape=(vecsize, ))
        encoded = Dense(self.nb_topics, activation='relu')(input_vec)
        decoded = Dense(vecsize, activation='sigmoid')(encoded)

        # define the autoencoder model
        autoencoder = Model(input=input_vec, output=decoded)

        # define the encoder
        encoder = Model(input=input_vec, output=encoded)

        # define the decoder
        encoded_input = Input(shape=(self.nb_topics, ))
        decoder_layer = autoencoder.layers[-1]
        decoder = Model(input=encoded_input,
                        output=decoder_layer(encoded_input))

        # compile the autoencoder
        autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')

        # process training data
        embedvecs = np.array(
            reduce(add, [
                map(
                    lambda shorttext: self.retrieve_bow_vector(
                        shorttext, normalize=True), classdict[classtype])
                for classtype in classdict
            ]))

        # fit the model
        autoencoder.fit(embedvecs, embedvecs, *args, **kwargs)

        # store the autoencoder models
        self.autoencoder = autoencoder
        self.encoder = encoder
        self.decoder = decoder

        # flag setting
        self.trained = True

        # classes topic vector precomputation
        self.classtopicvecs = {}
        for label in classdict:
            self.classtopicvecs[label] = self.precalculate_liststr_topicvec(
                classdict[label])
Exemple #4
0
def test_works():
    x = Input(shape=(30, 1), name="input")
    e = GRU(128, return_sequences=True)(x)
    s = Slice("[-1,:]")(e)
    # s = Slice('[-1,:]')(e)
    # s = theano.printing.Print("s")(s)
    r = RepeatVector(30)(s)
    m = Merge(mode='concat', concat_axis=2)([r, x])
    d = GRU(128, return_sequences=True)(m)
    p = Ptr_Layer(30)([x, e, d])

    model = Model(input=x, output=p, name='test')

    # print(Sort(nb_out=5).get_output_shape_for((1,2,3)))

    inp = np.random.randint(size=(10000, 30, 1), low=0, high=100)
    indicies = np.argsort(inp[:, :, 0])
    # print(indicies)
    target = np.array(
        [np.take(inp[i], indicies[i], axis=-2) for i in range(inp.shape[0])])
    # print("Input")
    # print(inp)
    # print("Target")
    # print(target)
    model.compile(optimizer=optimizers.Adam(), loss='mse')
    model.fit(inp, target, nb_epoch=500, batch_size=100)
Exemple #5
0
def train():
    # extracting file saved by data_prep.py
    data = np.load('face_data.npz')
    x , y  =  data['x'], data['y']
    #categorical conversion of data label
    y = keras.utils.to_categorical(y, 6)
    # using transfer learning to reduce the time required to train the algo
    resnet = VGGFace(model='resnet50',input_shape=(224, 224, 3))
    
    layer_name = resnet.layers[-2].name
    #adding our own custom layers to make the model work on our datatset
    out = resnet.get_layer(layer_name).output
    out = Dense(6,activation='softmax')(out)
    resnet_4 = Model(resnet.input, out)
    # removing last layer of the model and adding my own layer to it 
    for layer in resnet_4.layers[:-1]:
        layer.trainable = False
        
        resnet_4.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
        #checking the final created dataset 
        print (resnet_4.summary())
        # training the model we have created with our own dataset
        resnet_4.fit(x, y,batch_size=10,epochs=10,shuffle=True)
        #saving the trained model so that it can be used afterwards
        resnet_4.save("C:\\Users\\hseth\\Desktop\\face recogination\\model_save_face.h5")
        # checking the accuracy of the model on training data only as i used a very small dataset
        scores = resnet_4.evaluate(x, y, verbose=1)
        print('Test accuracy:', scores[1])
Exemple #6
0
class ModelB(ModelBase):
    def __init__(self, wv, maxlen=50, max_num_deptag=50):
        super().__init__(wv, maxlen, max_num_deptag)
        embedding_layer = self.wv.model.wv.get_embedding_layer()

        sequence_input = Input(shape=(self.maxlen,), dtype='int32')
        mask = Masking(mask_value=-1)(sequence_input)
        embedded_sequences = embedding_layer(mask)
        x = Dense(50, activation='tanh')(embedded_sequences)
        x = (Dropout(0.2))(x)
        x = Flatten()(x)
        preds = Dense(1)(x)
        self.model = Model(sequence_input, preds, name='MLPModelB')
        self.compile_model()

    def compile_model(self):
        self.model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['mae'])

    def fit(self, x, y, batch_size, epochs, verbose, callbacks):
        x_preprocessed = np.array([[t[0] for t in row] for row in x])
        x_preprocessed = sequence.pad_sequences(x_preprocessed, maxlen=self.maxlen, value=-1)

        y_preprocessed = np.array(y)

        self.model.fit(x_preprocessed, y_preprocessed, batch_size=batch_size, epochs=epochs, verbose=verbose,
                       callbacks=callbacks)

    def predict(self, x):
        x_preprocessed = np.array([[t[0] for t in row] for row in x])
        x_preprocessed = sequence.pad_sequences(x_preprocessed, maxlen=self.maxlen, value=-1)

        return self.model.predict(x_preprocessed)
def my_layer():
    """test one specify layer"""
    a = Input(shape=(3, 3, 2))
    b = WeightedAdd()(a)
    model = Model(inputs=a, outputs=b)
    data = np.ones((1, 3, 3, 2))
    print(model.predict_on_batch(data))
    model.compile(optimizer='Adam', loss=mean_squared_error)
    model.fit(data, data, epochs=1000)
    print(model.predict_on_batch(data))
Exemple #8
0
class ModelE(ModelBase):
    def __init__(self, wv, maxlen=50, max_num_deptag=50):
        super().__init__(wv, maxlen, max_num_deptag)
        wv_layer = self.wv.model.wv.get_embedding_layer()

        wv_input = Input(shape=(self.maxlen,), dtype='int32')
        wv_mask = Masking(mask_value=-1)(wv_input)
        wv_sequences = wv_layer(wv_mask)

        deptag_input = Input(shape=(self.maxlen,), dtype='int32')
        deptag_mask = Masking(mask_value=0)(deptag_input)
        deptag_sequences = Embedding(input_dim=max_num_deptag, output_dim=10, input_length=50)(deptag_mask)

        x = concatenate([wv_sequences, deptag_sequences])

        x = (Conv1D(filters=128, kernel_size=3, activation='relu'))(x)
        # x = (AveragePooling1D(pool_size=3))(x)
        x = (Dropout(0.2))(x)
        x = (Conv1D(filters=32, kernel_size=17, activation='relu'))(x)
        x = (Dropout(0.2))(x)
        x = (GlobalMaxPooling1D())(x)
        x = (Dense(16))(x)
        x = (Dropout(0.2))(x)
        preds = (Dense(1, activation='tanh'))(x)
        self.model = Model(inputs=[wv_input, deptag_input], outputs=preds, name='OnlyConvolutions')
        self.compile_model()

    def compile_model(self):
        self.model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['mae'])

    def fit(self, x, y, batch_size, epochs, verbose, callbacks):
        x1_preprocessed = np.array([[t[0] for t in row] for row in x])
        x1_preprocessed = sequence.pad_sequences(x1_preprocessed, maxlen=self.maxlen, value=-1)

        x2_preprocessed = np.array([[t[1] for t in row] for row in x])
        x2_preprocessed = sequence.pad_sequences(x2_preprocessed, maxlen=self.maxlen, value=-1)

        y_preprocessed = np.array(y)

        self.model.fit([x1_preprocessed, x2_preprocessed], y_preprocessed, batch_size=batch_size, epochs=epochs,
                       verbose=verbose,
                       callbacks=callbacks)

    def predict(self, x):
        x1_preprocessed = np.array([[t[0] for t in row] for row in x])
        x1_preprocessed = sequence.pad_sequences(x1_preprocessed, maxlen=self.maxlen, value=-1)

        x2_preprocessed = np.array([[t[1] + 1 for t in row] for row in x])
        x2_preprocessed = sequence.pad_sequences(x2_preprocessed, maxlen=self.maxlen, value=0)

        return self.model.predict([x1_preprocessed, x2_preprocessed])
class CNNSigmoid(ModelBase):
    def __init__(self, wv, maxlen=50, max_num_deptag=50):
        super().__init__(wv, maxlen, max_num_deptag)
        embedding_layer = self.wv.model.wv.get_embedding_layer()

        sequence_input = Input(shape=(self.maxlen, ), dtype='int32')
        mask = Masking(mask_value=-1)(sequence_input)
        embedded_sequences = embedding_layer(mask)
        x = (Conv1D(filters=128, kernel_size=3,
                    activation='relu'))(embedded_sequences)
        # x = (AveragePooling1D(pool_size=3))(x)
        x = (Dropout(0.2))(x)
        x = (Conv1D(filters=32, kernel_size=17, activation='relu'))(x)
        x = (Dropout(0.2))(x)
        x = (GlobalMaxPooling1D())(x)
        x = (Dense(16))(x)
        x = (Dropout(0.2))(x)
        preds = Dense(1, activation='sigmoid')(x)
        self.model = Model(sequence_input, preds, name='CNNSigmoid')
        self.compile_model()

    def compile_model(self):
        self.model.compile(loss='mean_squared_error',
                           optimizer='sgd',
                           metrics=['mae'])

    def fit(self, x, y, batch_size, epochs, verbose, callbacks):
        x_preprocessed = np.array(x)
        x_preprocessed = sequence.pad_sequences(x_preprocessed,
                                                maxlen=self.maxlen,
                                                value=-1)

        y_preprocessed = np.array(y)

        self.model.fit(x_preprocessed,
                       y_preprocessed,
                       batch_size=batch_size,
                       epochs=epochs,
                       verbose=verbose,
                       callbacks=callbacks)

    def predict(self, x):
        x_preprocessed = np.array(x)
        x_preprocessed = sequence.pad_sequences(x_preprocessed,
                                                maxlen=self.maxlen,
                                                value=-1)

        return self.model.predict(x_preprocessed)
Exemple #10
0
def embedding_binary_classification():
    docs = [
        'Well done!', 'Good work', 'Great effort', 'nice work', 'Excellent!',
        'Weak', 'Poor effort!', 'not good', 'poor work',
        'Could have done better.'
    ]

    # define class labels
    labels = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
    vocab_size = 50
    encoded_docs = [one_hot(d, vocab_size)
                    for d in docs]  # one_hot编码到[1,n],不包括0
    print(encoded_docs)

    max_length = 4
    padded_docs = pad_sequences(encoded_docs,
                                maxlen=max_length,
                                padding='post')
    print(padded_docs)

    input = Input(shape=(4, ))
    x = Embedding(vocab_size, 8,
                  input_length=max_length)(input)  # 这一步对应的参数量为50*8
    x = Flatten()(x)
    x = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=input, outputs=x)

    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['acc'])
    model.summary()

    model.fit(padded_docs, labels, epochs=100, verbose=0)
    loss, accuracy = model.evaluate(padded_docs, labels, verbose=0)
    print('loss: {0},accuracy:{1}'.format(loss, accuracy))

    loss_test, accuracy_test = model.evaluate(padded_docs, labels, verbose=0)
    print('loss_test: {0},accuracy_test:{1}'.format(loss_test, accuracy_test))

    test = one_hot('Weak', 50)
    padded_test = pad_sequences([test], maxlen=max_length, padding='post')
    print(model.predict(padded_test))
Exemple #11
0
def train(model: Model,
          x_train,
          y_train,
          x_test,
          y_test,
          batch_size=128,
          epochs=20):
    time = int(datetime.now().timestamp())
    name = "{}_{}".format("conv", time)
    chkp_path = "./checkpoints/{}".format(name)
    os.makedirs(chkp_path, exist_ok=True)

    model.compile(loss=K.categorical_crossentropy,
                  optimizer=Adam(),
                  metrics=[metrics.categorical_accuracy])

    model.fit(
        x_train,
        y_train,
        batch_size=batch_size,
        epochs=epochs,
        validation_data=(x_test, y_test),
        callbacks=[
            TensorBoard(log_dir='/tmp/tensorflow/{}'.format(name)),
            ModelCheckpoint(os.path.join(
                chkp_path,
                "weights-improvement-{epoch:02d}-{val_categorical_accuracy:.2f}.hdf5"
            ),
                            monitor='val_categorical_accuracy',
                            verbose=1,
                            save_best_only=True,
                            mode='auto')
        ])

    export_model(tf.train.Saver(), ['conv2d_1_input'], 'dense_2/Softmax', name)

    score = model.evaluate(x_test, y_test)

    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
Exemple #12
0
    def train_auto_encoder(self,
                           train_x,
                           test_x,
                           input_dim,
                           out_model_file='encoder_cnn.h5',
                           monitor='val_loss',
                           patience=4):
        '''
        '''

        early_stop = EarlyStopping(monitor=monitor, patience=patience)
        checkpoint = ModelCheckpoint(out_model_file, monitor='val_loss', verbose=1,
                                     save_best_only=True, mode='min')

        input_data = Input(shape=(input_dim,), name='Input')

        encoder = Dense(512, activation='relu', name='Encoder1')(input_data)
        encoder = Dense(256, activation='relu', name='Encoder2')(encoder)
        encoder = Dense(128, activation='relu', name='Encoder3')(encoder)

        decoder = Dense(256, activation='relu', name='Decoder1')(encoder)
        decoder = Dense(512, activation='relu', name='Decoder2')(decoder)
        decoder = Dense(input_dim, activation='linear', name='Output')(decoder)

        autoencoder = Model(input_data, decoder)
        autoencoder.compile(optimizer='adam', loss='mse')
        autoencoder.fit(train_x,
                        train_x,
                        epochs=200,
                        batch_size=512,
                        callbacks=[checkpoint, early_stop],
                        shuffle=True,
                        validation_data=(test_x, test_x),
                        verbose=1)

        autoencoder.save(out_model_file)

        return encoder
Exemple #13
0
def sda_training(features, labels):
    encoder_dims = [1600, 1024, 768]
    stacked_encoder = []
    int_labels = label_to_category(labels=labels, type='training')
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=10)
    y_train = to_categorical([int_labels[i] for i in y_train])
    y_test = to_categorical([int_labels[i] for i in y_test])

    for encoder_dim in encoder_dims:
        input_dim = X_train.shape[1]
        input_img = Input(shape=(input_dim,))
        n_layer = noise.GaussianNoise(0.3)(input_img)
        encode = Dense(encoder_dim, activation='sigmoid')(n_layer)
        decode = Dense(input_dim, activation='sigmoid')(encode)

        ae = Model(input_img, decode)
        ae.compile(optimizer='adam', loss='mape')
        ae.fit(X_train, X_train, epochs=10, batch_size=32, shuffle=True, validation_data=(X_test, X_test))

        encoder = Model(input_img, encode)
        X_train = encoder.predict(X_train)
        X_test = encoder.predict(X_test)
        stacked_encoder.append(encoder.layers[-1])
Exemple #14
0
    def test_lkrelu(self):
        batch_size = 32
        num_classes = 10

        (x_train, y_train), (x_test, y_test) = load_cifar10()

        inputs = Input(shape=x_train.shape[1:])
        x = Conv2D(self.width, (3, 3))(inputs)
        x = LKReLU()(x)
        x = Flatten()(x)
        x = Dense(num_classes, activation='softmax', name='fc1000')(x)
        model = Model(inputs=inputs, outputs=x)
        print(model.summary())

        opt = keras.optimizers.sgd()

        model.compile(loss='categorical_crossentropy',
                      optimizer=opt,
                      metrics=self.metrics)

        log_dir = 'summaries/width-lkrelu-{}-cifar10-{}-{}'.format(
            self.width, self.seed, datetime.datetime.now())
        model.fit(
            x_train,
            y_train,
            batch_size=batch_size,
            epochs=self.epochs,
            validation_data=(x_test, y_test),
            shuffle=False,
            callbacks=[
                TensorBoard(log_dir=log_dir),
                # ModelCheckpoint(
                # 'checkpoints/width-lkrelu-cifar10-{epoch:02d}-{val_loss:.2f}.hdf5')
            ])
        score = model.evaluate(x_test, y_test, verbose=0)
        print('Test loss:', score[0])
        print('Test accuracy:', score[1])
Exemple #15
0
def vgg16_model(img_width, img_height, nb_epoch, nb_classes):
    base_model = VGG16(weights='imagenet',
                       include_top=False,
                       input_shape=(img_width, img_height, 3))

    # load dataset
    (X_train, y_train), (X_test, y_test) = cifar10.load_data()
    y_train = np_utils.to_categorical(y_train, nb_classes)

    y_test = np_utils.to_categorical(y_test, nb_classes)

    # Extract the last layer from third block of vgg16 model
    last = base_model.get_layer('block5_pool').output

    # Add classification layers on top of it
    x = Flatten()(last)
    x = BatchNormalization()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    output = Dense(10, activation='softmax')(x)

    model = Model(base_model.input, output)
    # model.summary()

    # model compile & fit
    model.compile(loss='binary_crossentropy',
                  optimizer=optimizers.SGD(lr=1e-3, momentum=0.9),
                  metrics=['accuracy'])

    model.fit(X_train,
              y_train,
              validation_data=(X_test, y_test),
              epochs=nb_epoch,
              batch_size=100,
              verbose=1)
    return model
class CTCModel:
    def __init__(self,
                 inputs,
                 outputs,
                 greedy=True,
                 beam_width=100,
                 top_paths=1,
                 padding=-1,
                 charset=None):
        """
        A override ou réécrire. C'est dans cette fonction qu'il faudra
        affecter self.inputs et self.outputs avec les listes des layers
        d'entrée et de sortie du réseau
        """
        self.model_train = None
        self.model_pred = None
        self.model_eval = None
        self.inputs = inputs
        self.outputs = outputs

        self.greedy = greedy
        self.beam_width = beam_width
        self.top_paths = top_paths
        self.padding = padding
        self.charset = charset

    def compile(self, optimizer):
        """
        A appeler une fois le modèle créé. Compile le modèle en ajoutant
        la loss CTC

        :param optimizer: L'optimizer a utiliser pendant l'apprentissage
        """
        # Calcul du CTC
        labels = Input(name='labels', shape=[None])
        input_length = Input(name='input_length', shape=[1])
        label_length = Input(name='label_length', shape=[1])

        # Lambda layer for computing the loss function
        loss_out = Lambda(self.ctc_loss_lambda_func,
                          output_shape=(1, ),
                          name='CTCloss')(self.outputs +
                                          [labels, input_length, label_length])

        # Lambda layer for the decoding function
        out_decoded_dense = Lambda(self.ctc_complete_decoding_lambda_func,
                                   output_shape=(None, None),
                                   name='CTCdecode',
                                   arguments={
                                       'greedy': self.greedy,
                                       'beam_width': self.beam_width,
                                       'top_paths': self.top_paths
                                   },
                                   dtype="float32")(self.outputs +
                                                    [input_length])

        #Lambda layer for computing the label error rate
        out_analysis = Lambda(
            self.ctc_complete_analysis_lambda_func,
            output_shape=(None, ),
            name='CTCanalysis',
            arguments={
                'greedy': self.greedy,
                'beam_width': self.beam_width,
                'top_paths': self.top_paths
            },
            dtype="float32")(self.outputs +
                             [labels, input_length, label_length])

        # create Keras models
        self.model_init = Model(inputs=self.inputs, outputs=self.outputs)
        self.model_train = Model(inputs=self.inputs +
                                 [labels, input_length, label_length],
                                 outputs=loss_out)
        self.model_pred = Model(inputs=self.inputs + [input_length],
                                outputs=out_decoded_dense)
        self.model_eval = Model(inputs=self.inputs +
                                [labels, input_length, label_length],
                                outputs=out_analysis)

        # Compile models
        self.model_train.compile(loss={
            'CTCloss': lambda yt, yp: yp
        },
                                 optimizer=optimizer)
        self.model_pred.compile(loss={
            'CTCdecode': lambda yt, yp: yp
        },
                                optimizer=optimizer)
        self.model_eval.compile(loss={
            'CTCanalysis': lambda yt, yp: yp
        },
                                optimizer=optimizer)

    def get_model_train(self):
        """
        :return: Modèle utilisé en interne pour l'entraînement
        """
        return self.model_train

    def get_model_pred(self):
        """
        :return: Modèle utilisé en interne pour la prédiction
        """
        return self.model_pred

    def get_model_eval(self):
        """
        :return: Model used to evaluate a data set
        """
        return self.model_eval

    def fit(self,
            x=None,
            y=None,
            batch_size=None,
            epochs=1,
            verbose=1,
            callbacks=None,
            validation_split=0.0,
            validation_data=None,
            shuffle=True,
            class_weight=None,
            sample_weight=None,
            initial_epoch=0,
            steps_per_epoch=None,
            validation_steps=None):
        """

                Permet de lire les données sur un device (CPU) et en
                parallèle de s'entraîner sur un autre device (GPU)

                Les données d'entrée doivent être de la forme :
                  [input_sequences, label_sequences, inputs_lengths, labels_length]

                :param: Paramètres identiques à ceux de keras.engine.Model.fit()
                :return: L'objet History correspondant à l'entrainement
        """

        out = self.model_train.fit(x=x,
                                   y=y,
                                   batch_size=batch_size,
                                   epochs=epochs,
                                   verbose=verbose,
                                   callbacks=callbacks,
                                   validation_split=validation_split,
                                   validation_data=validation_data,
                                   shuffle=shuffle,
                                   class_weight=class_weight,
                                   sample_weight=sample_weight,
                                   initial_epoch=initial_epoch,
                                   steps_per_epoch=steps_per_epoch,
                                   validation_steps=validation_steps)

        self.model_pred.set_weights(self.model_train.get_weights())
        self.model_eval.set_weights(self.model_train.get_weights())
        return out

    def predict(self, x, batch_size=None, verbose=0):
        """ CTC prediction

        Inputs:
            x = Input data as a 3D Tensor (batch_size, max_input_len, dim_features)
            x_len = 1D array with the length of each data in batch_size
            y = Input data as a 2D Tensor (batch_size, max_label_len)
            y_len = 1D array with the length of each labeling
            label_array = list of labels
            pred = return predictions from the ctc (from model_pred)
            eval = return an analysis of ctc prediction (from model_eval)

        Outputs: a list containing:
            out_pred = output of model_pred
            out_eval = output of model_eval
        """

        #model_out = self.model_pred.evaluate(x=x, y=np.zeros(x[0].shape[0]), batch_size=batch_size, verbose=verbose)
        model_out = self.model_pred.predict(x,
                                            batch_size=batch_size,
                                            verbose=verbose)

        return model_out

    def predict2(self, x, batch_size=None, verbose=0, steps=None):
        """
        The same function as in the Keras Model but with a different function predict_loop for dealing with variable length predictions

        Generates output predictions for the input samples.

                Computation is done in batches.

                # Arguments
                    x: The input data, as a Numpy array
                        (or list of Numpy arrays if the model has multiple outputs).
                    batch_size: Integer. If unspecified, it will default to 32.
                    verbose: Verbosity mode, 0 or 1.
                    steps: Total number of steps (batches of samples)
                        before declaring the prediction round finished.
                        Ignored with the default value of `None`.

                # Returns
                    Numpy array(s) of predictions.

                # Raises
                    ValueError: In case of mismatch between the provided
                        input data and the model's expectations,
                        or in case a stateful model receives a number of samples
                        that is not a multiple of the batch size.
                """
        #[x, x_len] = x
        # Backwards compatibility.
        if batch_size is None and steps is None:
            batch_size = 32
        if x is None and steps is None:
            raise ValueError('If predicting from data tensors, '
                             'you should specify the `steps` '
                             'argument.')
        # Validate user data.
        x = _standardize_input_data(x,
                                    self.model_pred._feed_input_names,
                                    self.model_pred._feed_input_shapes,
                                    check_batch_axis=False)
        if self.model_pred.stateful:
            if x[0].shape[0] > batch_size and x[0].shape[0] % batch_size != 0:
                raise ValueError('In a stateful network, '
                                 'you should only pass inputs with '
                                 'a number of samples that can be '
                                 'divided by the batch size. Found: ' +
                                 str(x[0].shape[0]) + ' samples. '
                                 'Batch size: ' + str(batch_size) + '.')

        # Prepare inputs, delegate logic to `_predict_loop`.
        if self.model_pred.uses_learning_phase and not isinstance(
                K.learning_phase(), int):
            #ins = [x, x_len] + [0.]
            ins = x + [0.]
        else:
            #ins = [x, x_len]
            ins = x
        self.model_pred._make_predict_function()
        f = self.model_pred.predict_function
        out_pred = self._predict_loop(f,
                                      ins,
                                      batch_size=batch_size,
                                      verbose=verbose,
                                      steps=steps)

        list_pred = []
        for elmt in out_pred:
            pred = []
            for val in elmt:
                if val != -1:
                    pred.append(val)
            list_pred.append(pred)

        return list_pred

    @staticmethod
    def ctc_loss_lambda_func(args):
        """
        Function for computing the ctc loss (can be put in a Lambda layer)
        :param args:
            y_pred, labels, input_length, label_length
        :return: CTC loss
        """

        y_pred, labels, input_length, label_length = args
        return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

    @staticmethod
    def ctc_complete_decoding_lambda_func(args, **arguments):
        """
        Complete CTC decoding using Keras (function K.ctc_decode)
        :param args:
            y_pred, input_length
        :param arguments:
            greedy, beam_width, top_paths
        :return:
            K.ctc_decode with dtype='float32'
        """

        y_pred, input_length = args
        my_params = arguments

        assert (K.backend() == 'tensorflow')

        return K.cast(K.ctc_decode(y_pred,
                                   tf.squeeze(input_length),
                                   greedy=my_params['greedy'],
                                   beam_width=my_params['beam_width'],
                                   top_paths=my_params['top_paths'])[0][0],
                      dtype='float32')

    @staticmethod
    def ctc_complete_analysis_lambda_func(args, **arguments):
        """
        Complete CTC analysis using Keras and tensorflow
        WARNING : tf is required
        :param args:
            y_pred, labels, input_length, label_len
        :param arguments:
            greedy, beam_width, top_paths
        :return:
            ler = label error rate
        """

        y_pred, labels, input_length, label_len = args
        my_params = arguments

        assert (K.backend() == 'tensorflow')

        batch = tf.log(tf.transpose(y_pred, perm=[1, 0, 2]) + 1e-8)
        input_length = tf.to_int32(tf.squeeze(input_length))

        greedy = my_params['greedy']
        beam_width = my_params['beam_width']
        top_paths = my_params['top_paths']

        if greedy:
            (decoded,
             log_prob) = ctc.ctc_greedy_decoder(inputs=batch,
                                                sequence_length=input_length)
        else:
            (decoded, log_prob) = ctc.ctc_beam_search_decoder(
                inputs=batch,
                sequence_length=input_length,
                beam_width=beam_width,
                top_paths=top_paths)

        cast_decoded = tf.cast(decoded[0], tf.float32)

        sparse_y = K.ctc_label_dense_to_sparse(
            labels, tf.cast(tf.squeeze(label_len), tf.int32))
        ed_tensor = tf_edit_distance(cast_decoded, sparse_y, norm=True)
        ler_per_seq = Kreshape_To1D(ed_tensor)

        return K.cast(ler_per_seq, dtype='float32')

    def _predict_loop(self,
                      f,
                      ins,
                      max_len=20,
                      max_value=-1,
                      batch_size=32,
                      verbose=0,
                      steps=None):
        """Abstract method to loop over some data in batches.

        # Arguments
            f: Keras function returning a list of tensors.
            ins: list of tensors to be fed to `f`.
            batch_size: integer batch size.
            verbose: verbosity mode.
            steps: Total number of steps (batches of samples)
                before declaring `_predict_loop` finished.
                Ignored with the default value of `None`.

        # Returns
            Array of predictions (if the model has a single output)
            or list of arrays of predictions
            (if the model has multiple outputs).
        """
        num_samples = self.model_pred._check_num_samples(
            ins, batch_size, steps, 'steps')

        if steps is not None:
            # Step-based predictions.
            # Since we do not know how many samples
            # we will see, we cannot pre-allocate
            # the returned Numpy arrays.
            # Instead, we store one array per batch seen
            # and concatenate them upon returning.
            unconcatenated_outs = []
            for step in range(steps):
                batch_outs = f(ins)
                if not isinstance(batch_outs, list):
                    batch_outs = [batch_outs]
                if step == 0:
                    for batch_out in batch_outs:
                        unconcatenated_outs.append([])
                for i, batch_out in enumerate(batch_outs):
                    unconcatenated_outs[i].append(batch_out)

            if len(unconcatenated_outs) == 1:
                return np.concatenate(unconcatenated_outs[0], axis=0)
            return [
                np.concatenate(unconcatenated_outs[i], axis=0)
                for i in range(len(unconcatenated_outs))
            ]
        else:
            # Sample-based predictions.
            outs = []
            batches = _make_batches(num_samples, batch_size)
            index_array = np.arange(num_samples)
            for batch_index, (batch_start, batch_end) in enumerate(batches):
                batch_ids = index_array[batch_start:batch_end]
                if ins and isinstance(ins[-1], float):
                    # Do not slice the training phase flag.
                    ins_batch = _slice_arrays(ins[:-1], batch_ids) + [ins[-1]]
                else:
                    ins_batch = _slice_arrays(ins, batch_ids)
                batch_outs = f(ins_batch)
                if not isinstance(batch_outs, list):
                    batch_outs = [batch_outs]
                if batch_index == 0:
                    # Pre-allocate the results arrays.
                    for batch_out in batch_outs:
                        #shape = (num_samples, ) + batch_out.shape[1:] # WARNING  10)
                        shape = (num_samples, max_len)
                        outs.append(np.zeros(shape, dtype=batch_out.dtype))
                        #outs.append(np.zeros(shape, dtype=batch_out.dtype))#batch_out.dtype))# WARNING CHANGE FROM THE MAIN CODE
                for i, batch_out in enumerate(batch_outs):
                    #outs[i][batch_start:batch_end] = batch_out # WARNING
                    outs[i][batch_start:batch_end] = sequence.pad_sequences(
                        batch_out,
                        value=float(max_value),
                        maxlen=max_len,
                        dtype=batch_out.dtype,
                        padding="post")

            if len(outs) == 1:
                return outs[0]
            return outs

    def save_model(self, path_dir, charset=None):
        """ Save a model in path_dir
        save model_train, model_pred and model_eval in json
        save inputs and outputs in json
        save model CTC parameters in a pickle """

        model_json = self.model_train.to_json()
        with open(path_dir + "/model_train.json", "w") as json_file:
            json_file.write(model_json)

        model_json = self.model_pred.to_json()
        with open(path_dir + "/model_pred.json", "w") as json_file:
            json_file.write(model_json)

        model_json = self.model_eval.to_json()
        with open(path_dir + "/model_eval.json", "w") as json_file:
            json_file.write(model_json)

        model_json = self.model_init.to_json()
        with open(path_dir + "/model_init.json", "w") as json_file:
            json_file.write(model_json)

        param = {
            'greedy': self.greedy,
            'beam_width': self.beam_width,
            'top_paths': self.top_paths,
            'charset': self.charset
        }

        output = open(path_dir + "/model_param.pkl", 'wb')
        p = pickle.Pickler(output)
        p.dump(param)
        output.close()

    def load_model(self, path_dir, optimizer, initial_epoch=None):
        """ Load a model in path_dir
        load model_train, model_pred and model_eval from json
        load inputs and outputs from json
        load model CTC parameters from a pickle """

        json_file = open(path_dir + '/model_train.json', 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        self.model_train = model_from_json(loaded_model_json)

        json_file = open(path_dir + '/model_pred.json', 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        self.model_pred = model_from_json(loaded_model_json,
                                          custom_objects={"tf": tf})

        json_file = open(path_dir + '/model_eval.json', 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        self.model_eval = model_from_json(loaded_model_json,
                                          custom_objects={
                                              "tf": tf,
                                              "ctc": ctc,
                                              "tf_edit_distance":
                                              tf_edit_distance,
                                              "Kreshape_To1D": Kreshape_To1D
                                          })

        json_file = open(path_dir + '/model_init.json', 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        self.model_init = model_from_json(loaded_model_json,
                                          custom_objects={"tf": tf})

        self.inputs = self.model_init.inputs
        self.outputs = self.model_init.outputs

        input = open(path_dir + "/model_param.pkl", 'rb')
        p = pickle.Unpickler(input)
        param = p.load()
        input.close()

        self.greedy = param['greedy'] if 'greedy' in param.keys(
        ) else self.greedy
        self.beam_width = param['beam_width'] if 'beam_width' in param.keys(
        ) else self.beam_width
        self.top_paths = param['top_paths'] if 'top_paths' in param.keys(
        ) else self.top_paths
        self.charset = param['charset'] if 'charset' in param.keys(
        ) else self.charset

        self.compile(optimizer)

        if initial_epoch:
            file_weight = path_dir + 'weights.' + "%02d" % (
                initial_epoch) + '.hdf5'
            print(file_weight)
            if os.path.exists(file_weight):
                self.model_train.load_weights(file_weight)
                self.model_pred.set_weights(self.model_train.get_weights())
                self.model_eval.set_weights(self.model_train.get_weights())
            else:
                print("Weights for epoch ", initial_epoch,
                      " can not be loaded.")
        else:
            print("Training will be start at the beginning.")
class AttentionSumReader(object):
    def __init__(self,
                 word_dict,
                 embedding_matrix,
                 d_len,
                 q_len,
                 embedding_dim,
                 hidden_size,
                 num_layers,
                 weight_path,
                 use_lstm=False):
        """
        初始化模型
        b ... batch_size
        t ... d_len
        f ... hidden_size*2
        i ... candidate_len 
        """
        self.weight_path = weight_path
        self.word_dict = word_dict
        self.vocab_size = len(embedding_matrix)
        self.d_len = d_len
        self.q_len = q_len
        self.A_len = 10

        logging.info("Embedding matrix shape:%d x %d" %
                     (len(embedding_matrix), embedding_dim))

        self.rnn_cell = LSTM if use_lstm else GRU
        self.cell_name = "LSTM" if use_lstm else "GRU"

        # 模型的输入
        q_input = Input(batch_shape=(None, self.q_len),
                        dtype="int32",
                        name="q_input")
        d_input = Input(batch_shape=(
            None,
            self.d_len,
        ),
                        dtype="int32",
                        name="d_input")
        context_mask = Input(batch_shape=(None, self.d_len),
                             dtype="float32",
                             name="context_mask")
        candidates_bi = Input(batch_shape=(None, self.A_len),
                              dtype="int32",
                              name="candidates_bi")

        # 问题的编码模型
        # output shape: (None, max_q_length, embedding_dim)
        q_encode = Embedding(
            input_dim=self.vocab_size,
            output_dim=embedding_dim,
            weights=[embedding_matrix],
            mask_zero=True,
        )(q_input)
        for i in range(1, num_layers):
            q_encode = Bidirectional(self.rnn_cell(
                units=hidden_size,
                name="{}-{}-{}".format("q-encoder", self.cell_name, i),
                kernel_initializer="glorot_uniform",
                recurrent_initializer="glorot_uniform",
                bias_initializer='zeros',
                return_sequences=True),
                                     merge_mode="concat",
                                     dtype="float32")(q_encode)
        # q_encoder output shape: (None, hidden_size * 2)
        # TODO: 用最后一步的隐层状态表示q
        q_encode = Bidirectional(self.rnn_cell(
            units=hidden_size,
            name="{}-{}-{}".format("q-encoder", self.cell_name, num_layers),
            kernel_initializer="glorot_uniform",
            recurrent_initializer="glorot_uniform",
            bias_initializer='zeros',
            return_sequences=False),
                                 merge_mode="concat",
                                 dtype="float32")(q_encode)

        # 上下文文档的编码模型
        # output shape: (None, max_d_length, embedding_dim)
        d_encode = Embedding(input_dim=self.vocab_size,
                             output_dim=embedding_dim,
                             weights=[embedding_matrix],
                             mask_zero=True,
                             input_length=self.d_len)(d_input)

        # d_encoder output shape: (None, max_d_length, hidden_size * 2)
        for i in range(1, num_layers + 1):
            d_encode = Bidirectional(self.rnn_cell(
                units=hidden_size,
                name="{}-{}-{}".format("d-encoder", self.cell_name, i),
                kernel_initializer="glorot_uniform",
                recurrent_initializer="glorot_uniform",
                bias_initializer='zeros',
                return_sequences=True),
                                     merge_mode="concat",
                                     dtype="float32")(d_encode)

        # noinspection PyUnusedLocal
        def my_dot(x):
            """注意力点乘函数,原始版本"""
            c = [
                tf.reduce_sum(tf.multiply(x[0][:, inx, :], x[1]),
                              -1,
                              keep_dims=True) for inx in range(self.d_len)
            ]
            return tf.concat(c, -1)

        def my_dot_v2(x):
            """注意力点乘函数,快速版本"""
            d_btf, q_bf = x
            res = K.batch_dot(tf.expand_dims(q_bf, -1), d_btf, (1, 2))
            return K.reshape(res, [-1, self.d_len])

        mem_attention_pre_soft_bt = Lambda(
            my_dot_v2, name="attention")([d_encode, q_encode])
        mem_attention_pre_soft_masked_bt = Multiply(name="mask")(
            [mem_attention_pre_soft_bt, context_mask])
        mem_attention_bt = Activation(
            activation="softmax",
            name="softmax")(mem_attention_pre_soft_masked_bt)

        # 注意力求和,attention-sum过程
        # TODO: Get rid of sentence-by-sentence processing?
        # TODO: Rewrite into matrix notation instead of scans?
        def sum_prob_of_word(word_ix, sentence_ixs, sentence_attention_probs):
            word_ixs_in_sentence = tf.where(tf.equal(sentence_ixs, word_ix))
            return tf.reduce_sum(
                tf.gather(sentence_attention_probs, word_ixs_in_sentence))

        # noinspection PyUnusedLocal
        def sum_probs_single_sentence(prev, cur):
            candidate_indices_i, sentence_ixs_t, sentence_attention_probs_t = cur
            result = tf.scan(fn=lambda previous, x: sum_prob_of_word(
                x, sentence_ixs_t, sentence_attention_probs_t),
                             elems=[candidate_indices_i],
                             initializer=K.constant(0., dtype="float32"),
                             swap_memory=True)
            return result

        def sum_probs_batch(candidate_indices_bi, sentence_ixs_bt,
                            sentence_attention_probs_bt):
            result = tf.scan(fn=sum_probs_single_sentence,
                             elems=[
                                 candidate_indices_bi, sentence_ixs_bt,
                                 sentence_attention_probs_bt
                             ],
                             initializer=K.variable([0] * self.A_len,
                                                    dtype="float32"),
                             swap_memory=True)
            return result

        # output shape: (None, i) i = max_candidate_length = 10
        y_hat = Lambda(lambda x: sum_probs_batch(x[0], x[1], x[2]),
                       name="attention_sum")(
                           [candidates_bi, d_input, mem_attention_bt])
        self.model = Model(
            inputs=[q_input, d_input, context_mask, candidates_bi],
            outputs=y_hat)
        plot_model(self.model,
                   to_file=__file__ + ".png",
                   show_shapes=True,
                   show_layer_names=True)
        self.model.summary()

    # noinspection PyUnusedLocal
    def train(self, train_data, valid_data, batch_size, epochs, opt_name, lr,
              grad_clip):
        """
        模型训练。
        """
        def save_weight_on_epoch_end(epoch, e_logs):
            filename = "{}weight-epoch{}-{}-{}.h5".format(
                self.weight_path,
                time.strftime("%Y-%m-%d-(%H-%M)", time.localtime()), epoch,
                e_logs['val_acc'])
            self.model.save_weights(filepath=filename)

        checkpointer = LambdaCallback(on_epoch_end=save_weight_on_epoch_end)

        # tensorboard = TensorBoard(log_dir="./logs", histogram_freq=1, write_images=True)
        earlystopping = EarlyStopping(monitor="val_loss",
                                      patience=3,
                                      verbose=1)

        # 对输入进行预处理
        questions_ok, documents_ok, context_mask, candidates_ok, y_true = self.preprocess_input_sequences(
            train_data)
        v_questions, v_documents, v_context_mask, v_candidates, v_y_true = self.preprocess_input_sequences(
            valid_data)
        if opt_name == "SGD":
            optimizer = SGD(lr=lr, decay=1e-6, clipvalue=grad_clip)
        elif opt_name == "ADAM":
            optimizer = Adam(lr=lr, clipvalue=grad_clip)
        else:
            raise NotImplementedError("Other Optimizer Not Implemented.-_-||")
        self.model.compile(optimizer=optimizer,
                           loss="categorical_crossentropy",
                           metrics=["accuracy"])

        # 载入之前训练的权重
        self.load_weight()

        data = {
            "q_input": questions_ok,
            "d_input": documents_ok,
            "context_mask": context_mask,
            "candidates_bi": candidates_ok
        }
        v_data = {
            "q_input": v_questions,
            "d_input": v_documents,
            "context_mask": v_context_mask,
            "candidates_bi": v_candidates
        }
        logs = self.model.fit(x=data,
                              y=y_true,
                              batch_size=batch_size,
                              epochs=epochs,
                              validation_data=(v_data, v_y_true),
                              callbacks=[checkpointer, earlystopping])

    def test(self, test_data, batch_size):
        # 对输入进行预处理
        questions_ok, documents_ok, context_mask, candidates_ok, y_true = self.preprocess_input_sequences(
            test_data)
        data = {
            "q_input": questions_ok,
            "d_input": documents_ok,
            "context_mask": context_mask,
            "candidates_bi": candidates_ok
        }

        y_pred = self.model.predict(x=data, batch_size=batch_size)
        acc_num = np.count_nonzero(
            np.equal(np.argmax(y_pred, axis=-1), np.zeros(len(y_pred))))
        test_acc = acc_num / len(y_pred)
        logging.info("Test accuracy is {}".format(test_acc))
        return acc_num, test_acc

    def load_weight(self, weight_path=None):
        weight_file = self.weight_path if not weight_path else weight_path
        if os.path.exists(weight_file + "weight.h5"):
            logging.info("Load pre-trained weights:{}".format(weight_file +
                                                              "weight.h5"))
            self.model.load_weights(filepath=weight_file + "weight.h5",
                                    by_name=True)

    @staticmethod
    def union_shuffle(data):
        d, q, a, A = data
        c = list(zip(d, q, a, A))
        random.shuffle(c)
        return zip(*c)

    def preprocess_input_sequences(self, data, shuffle=True):
        """
        预处理输入:
        shuffle
        PAD/TRUNC到固定长度的序列
        y_true是长度为self.A_len的向量,index=0为正确答案,one-hot编码
        """
        documents, questions, answer, candidates = self.union_shuffle(
            data) if shuffle else data
        d_lens = [len(i) for i in documents]

        questions_ok = pad_sequences(questions,
                                     maxlen=self.q_len,
                                     dtype="int32",
                                     padding="post",
                                     truncating="post")
        documents_ok = pad_sequences(documents,
                                     maxlen=self.d_len,
                                     dtype="int32",
                                     padding="post",
                                     truncating="post")
        context_mask = K.eval(
            tf.sequence_mask(d_lens, self.d_len, dtype=tf.float32))
        candidates_ok = pad_sequences(candidates,
                                      maxlen=self.A_len,
                                      dtype="int32",
                                      padding="post",
                                      truncating="post")
        y_true = np.zeros_like(candidates_ok)
        y_true[:, 0] = 1
        return questions_ok, documents_ok, context_mask, candidates_ok, y_true
Exemple #18
0
def fit_cnn():
    X_train = train_embed
    y_train = np.array(train_labels)
    X_test = test_embed
    y_test = test_labels
    X_train = X_train.reshape(
        (X_train.shape[0], X_train.shape[1], X_train.shape[2], 1))
    X_test = X_test.reshape(
        (X_test.shape[0], X_test.shape[1], X_test.shape[2], 1))
    y_train, y_test = convert_one_hot(y_train, y_test)
    sequence_length = train_embed.shape[1]  # 60
    embedding_dim = train_embed.shape[2]
    filter_sizes = [2, 3, 4, 5, 6, 7, 8]
    num_filters = 64
    drop = 0.6
    input_shape = train_embed[0].shape
    epochs = 300
    batch_size = 32
    inputs = Input(shape=(sequence_length, embedding_dim, 1), dtype='float32')
    #batch_norm = BatchNormalization(input_shape = input_shape)(inputs)
    conv_0 = Conv2D(num_filters,
                    kernel_size=(filter_sizes[0], embedding_dim),
                    padding='valid',
                    kernel_initializer='normal',
                    activation='relu',
                    input_shape=input_shape)(inputs)
    conv_1 = Conv2D(num_filters,
                    kernel_size=(filter_sizes[1], embedding_dim),
                    padding='valid',
                    kernel_initializer='normal',
                    activation='relu',
                    input_shape=input_shape)(inputs)
    conv_2 = Conv2D(num_filters,
                    kernel_size=(filter_sizes[2], embedding_dim),
                    padding='valid',
                    kernel_initializer='normal',
                    activation='relu',
                    input_shape=input_shape)(inputs)
    conv_3 = Conv2D(num_filters,
                    kernel_size=(filter_sizes[3], embedding_dim),
                    padding='valid',
                    kernel_initializer='normal',
                    activation='relu',
                    input_shape=input_shape)(inputs)
    conv_4 = Conv2D(num_filters,
                    kernel_size=(filter_sizes[4], embedding_dim),
                    padding='valid',
                    kernel_initializer='normal',
                    activation='relu',
                    input_shape=input_shape)(inputs)
    conv_5 = Conv2D(num_filters,
                    kernel_size=(filter_sizes[5], embedding_dim),
                    padding='valid',
                    kernel_initializer='normal',
                    activation='relu',
                    input_shape=input_shape)(inputs)
    conv_6 = Conv2D(num_filters,
                    kernel_size=(filter_sizes[6], embedding_dim),
                    padding='valid',
                    kernel_initializer='normal',
                    activation='relu',
                    input_shape=input_shape)(inputs)

    maxpool_0 = MaxPool2D(pool_size=(sequence_length - filter_sizes[0] + 1, 1),
                          strides=(1, 1),
                          padding='valid')(conv_0)
    maxpool_1 = MaxPool2D(pool_size=(sequence_length - filter_sizes[1] + 1, 1),
                          strides=(1, 1),
                          padding='valid')(conv_1)
    maxpool_2 = MaxPool2D(pool_size=(sequence_length - filter_sizes[2] + 1, 1),
                          strides=(1, 1),
                          padding='valid')(conv_2)
    maxpool_3 = MaxPool2D(pool_size=(sequence_length - filter_sizes[3] + 1, 1),
                          strides=(1, 1),
                          padding='valid')(conv_3)
    maxpool_4 = MaxPool2D(pool_size=(sequence_length - filter_sizes[4] + 1, 1),
                          strides=(1, 1),
                          padding='valid')(conv_4)
    maxpool_5 = MaxPool2D(pool_size=(sequence_length - filter_sizes[5] + 1, 1),
                          strides=(1, 1),
                          padding='valid')(conv_5)
    maxpool_6 = MaxPool2D(pool_size=(sequence_length - filter_sizes[6] + 1, 1),
                          strides=(1, 1),
                          padding='valid')(conv_6)

    concatenated_tensor = Concatenate(axis=1)(
        [maxpool_0, maxpool_1, maxpool_2, maxpool_3])
    flatten = Flatten()(concatenated_tensor)
    dropout = Dropout(drop)(flatten)
    output = Dense(units=3, activation='softmax')(dropout)

    # this creates a model that includes
    model = Model(inputs=inputs, outputs=output)
    adam = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    model.compile(optimizer=adam,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    print("Traning Model...")
    model.fit(X_train,
              y_train,
              batch_size=batch_size,
              epochs=epochs,
              verbose=1,
              validation_data=(X_test, y_test))
    model.save("model/cnn.h5")
    return model
Exemple #19
0
    def start_training(self):
        # Vectorize the data.
        input_texts = []
        target_texts = []
        input_characters = set()
        target_characters = set()
        lines = open(self.data_path, encoding='UTF-8').read().split('\n')
        for line in lines[:min(self.num_samples, len(lines) - 1)]:
            input_text, target_text = line.split('\t')
            # We use "tab" as the "start sequence" character
            # for the targets, and "\n" as "end sequence" character.
            target_text = '\t' + target_text + '\n'
            input_texts.append(input_text)
            target_texts.append(target_text)
            for char in input_text:
                if char not in input_characters:
                    input_characters.add(char)
            for char in target_text:
                if char not in target_characters:
                    target_characters.add(char)

        input_characters = sorted(list(input_characters))
        target_characters = sorted(list(target_characters))
        num_encoder_tokens = len(input_characters)
        num_decoder_tokens = len(target_characters)
        max_encoder_seq_length = max([len(txt) for txt in input_texts])
        max_decoder_seq_length = max([len(txt) for txt in target_texts])

        print('Number of samples:', len(input_texts))
        print('Number of unique input tokens:', num_encoder_tokens)
        print('Number of unique output tokens:', num_decoder_tokens)
        print('Max sequence length for inputs:', max_encoder_seq_length)
        print('Max sequence length for outputs:', max_decoder_seq_length)

        input_token_index = dict([(char, i)
                                  for i, char in enumerate(input_characters)])
        target_token_index = dict([
            (char, i) for i, char in enumerate(target_characters)
        ])

        encoder_input_data = np.zeros(
            (len(input_texts), max_encoder_seq_length, num_encoder_tokens),
            dtype='float32')
        decoder_input_data = np.zeros(
            (len(input_texts), max_decoder_seq_length, num_decoder_tokens),
            dtype='float32')
        decoder_target_data = np.zeros(
            (len(input_texts), max_decoder_seq_length, num_decoder_tokens),
            dtype='float32')

        for i, (input_text,
                target_text) in enumerate(zip(input_texts, target_texts)):
            for t, char in enumerate(input_text):
                encoder_input_data[i, t, input_token_index[char]] = 1.
            for t, char in enumerate(target_text):
                # decoder_target_data is ahead of decoder_target_data by one timestep
                decoder_input_data[i, t, target_token_index[char]] = 1.
                if t > 0:
                    # decoder_target_data will be ahead by one timestep
                    # and will not include the start character.
                    decoder_target_data[i, t - 1,
                                        target_token_index[char]] = 1.

        # Define an input sequence and process it.
        encoder_inputs = Input(shape=(None, num_encoder_tokens))
        encoder = LSTM(self.latent_dim, return_state=True)
        encoder_outputs, state_h, state_c = encoder(encoder_inputs)
        # We discard `encoder_outputs` and only keep the states.
        encoder_states = [state_h, state_c]

        # Set up the decoder, using `encoder_states` as initial state.
        decoder_inputs = Input(shape=(None, num_decoder_tokens))
        # We set up our decoder to return full output sequences,
        # and to return internal states as well. We don't use the
        # return states in the training model, but we will use them in inference.
        decoder_lstm = LSTM(self.latent_dim,
                            return_sequences=True,
                            return_state=True)
        decoder_outputs, _, _ = decoder_lstm(decoder_inputs,
                                             initial_state=encoder_states)
        decoder_dense = Dense(num_decoder_tokens, activation='softmax')
        decoder_outputs = decoder_dense(decoder_outputs)

        # Define the model that will turn
        # `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
        model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

        #
        # Run training
        model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
        model.summary()

        tbCallBack = callbacks.TensorBoard(log_dir='./data/Graph',
                                           histogram_freq=0,
                                           write_graph=True,
                                           write_images=True)
        model.fit([encoder_input_data, decoder_input_data],
                  decoder_target_data,
                  batch_size=self.batch_size,
                  epochs=self.epochs,
                  validation_split=0.2,
                  verbose=1,
                  callbacks=[tbCallBack])
        #
        # # Save model
        model.save('./data/s2s2.h5')
Exemple #20
0
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=0)

y_train = keras.utils.to_categorical(y_train, 4)
y_test = keras.utils.to_categorical(y_test, 4)

resnet = VGGFace(model='resnet50',input_shape=(224, 224, 3))

layer_name = resnet.layers[-2].name

out = resnet.get_layer(layer_name).output
out = Dense(4,activation='softmax')(out)
resnet_4 = Model(resnet.input, out)

for layer in resnet_4.layers[:-1]:
	layer.trainable = False

resnet_4.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

print (resnet_4.summary())

resnet_4.fit(x_train, y_train,batch_size=16,epochs=5,validation_data=(x_test, y_test),shuffle=True)

scores = resnet_4.evaluate(x_test, y_test, verbose=1)
print('Test accuracy:', scores[1])




Exemple #21
0
out1, out2, out3 = c3d(inputs)

model_c3d = Model(inputs=inputs, outputs=[out1, out2, out3])
# model_c3d.summary()

model_c3d.compile(optimizer='adam',
              loss='categorical_crossentropy', 
              metrics=['accuracy'])


# %% Network training

print('Not using data augmentation.')
logging.debug("Running training...")
hist = model_c3d.fit(X_train_c3d, [y_train_frame, y_train_frame, y_train_frame], 
                     batch_size=32, epochs=100, verbose=1, 
                     shuffle=True, callbacks=[mc]) 


# %% Prediction    

logging.debug("Running test...")
model_c3d.load_weights('/home/kdh/Desktop/c3da/checkpoint/c3da_weights.h5')
proba_c3d = model_c3d.predict(X_test_c3d, batch_size=4, verbose=1)
y_pred = [np.argmax(prob) for prob in proba_c3d[2]]
y_true = [np.argmax(true) for true in y_test_frame]
    
count = 0
for i in range(len(y_pred)):
    if y_test[i] == y_pred[i]:
        count += 1
Exemple #22
0
for layer in range(num_layers - 1):
    final_model.layers[layer].trainable = False

final_model.compile(loss='binary_crossentropy',
                    optimizer='adam',
                    metrics=['accuracy'])

# get training data

rootdir_train = '/training'
filenames = load_filenames('all_train', rootdir_train)
#print(filenames)

X, Y = compile_images(filenames, (250, 250, 3))
# fit updated model on senators
final_model.fit(X, Y, batch_size=8, epochs=10, verbose=1, validation_split=0.2)
# test

rootdir_test = '/test'
filenames_test = load_filenames('all_test', rootdir_test)
X_test, Y_test = compile_images(filenames_test, (250, 250, 3))

print("Test results: ", final_model.test_on_batch(X_test, Y_test))

predictions = final_model.predict(X_test)

for i, f in enumerate(filnames_test):
    print(f, " with truth value: ", Y_test[i], predictions[i])

final_model.save("/output/transfer_modle120.h5")
Exemple #23
0
    print('model compile time: {}'.format(time.time() - start_time))
    print('')

    #############################################################################
    # TRAINING

    batch_size = 256
    nb_epoch = 50

    # Model saving callback
    checkpointer = ModelCheckpoint(filepath=WEIGHTS_PRESENCE_FILEPATH,
                                   verbose=1,
                                   save_best_only=True)

    # Early stopping
    early_stopping = EarlyStopping(monitor='val_loss', patience=5)

    history = model_presence_a.fit(data_images_train,
                                   data_presence_train,
                                   batch_size=batch_size,
                                   nb_epoch=nb_epoch,
                                   verbose=2,
                                   validation_data=(data_images_val,
                                                    data_presence_val),
                                   shuffle=True,
                                   callbacks=[checkpointer, early_stopping])

    with open(HISTORY_PRESENCE_FILEPATH, 'w') as f_out:
        json.dump(history.history, f_out)
    print('model compile time: {}'.format(time.time() - start_time))
    print('')

    #############################################################################
    # TRAINING

    batch_size = 48
    nb_epoch = 100

    # Model saving callback
    checkpointer = ModelCheckpoint(filepath=WEIGHTS_SEGMENT_FILEPATH,
                                   verbose=1,
                                   save_best_only=True)

    # Early stopping
    early_stopping = EarlyStopping(monitor='val_loss', patience=5)

    history = model_segment.fit(data_images_train,
                                data_masks_train,
                                batch_size=batch_size,
                                nb_epoch=nb_epoch,
                                verbose=2,
                                shuffle=True,
                                validation_data=(data_images_val,
                                                 data_masks_val),
                                callbacks=[checkpointer, early_stopping])

    with open(HISTORY_SEGMENT_FILEPATH, 'w') as f_out:
        json.dump(history.history, f_out)
def main(i, RNN_TYPE):
    # TODO: change to GRU, Recurrent, and SimpleRNN
    # print(i, RNN_TYPE)
    RNN = recurrent.LSTM
    if RNN_TYPE == "gru":
        # print("Starting a GRU: ")
        RNN = recurrent.GRU
        file_name = "history_gru_" + str(i) + ".csv"

    elif RNN_TYPE == "recurrent":
        # print("Starting a Recurrent Unit: ")
        RNN = recurrent.Recurrent
        file_name = "history_recurrent_" + str(i) + ".csv"

    elif RNN_TYPE == "simplernn":
        # print("Starting a SimpleRNN: ")
        RNN = recurrent.SimpleRNN
        file_name = "history_simple_" + str(i) + ".csv"
    else:
        # print("Starting an LSTM")
        file_name = "history_lstm_" + str(i) + ".csv"

    EMBED_HIDDEN_SIZE = 100
    SENT_HIDDEN_SIZE = 100
    QUERY_HIDDEN_SIZE = 100
    BATCH_SIZE = 50
    EPOCHS = 100
    # print('RNN / Embed / Sent / Query = {}, {}, {}, {}'.format(RNN,
    #                                                            EMBED_HIDDEN_SIZE,
    #                                                            SENT_HIDDEN_SIZE,
    #                                                            QUERY_HIDDEN_SIZE))
    # print(os.getcwd())
    file_list = (os.getcwd())
    base_file = os.getcwd() + "/tasks_1-20_v1-2/en/"
    file_list = (os.listdir(base_file))
    # print(file_list)
    test_file = ""
    train_file = ""
    for file in file_list:
        if file.startswith("qa" + str(i) + "_"):
            if file.endswith("_test.txt"):
                test_file = file
                # print(test_file)
            elif file.endswith("_train.txt"):
                train_file = file
                # print(train_file)
    print(train_file)
    print(test_file)

    f_train = open(base_file + train_file)
    f_test = open(base_file + test_file)

    # try:
    #     path = get_file('babi-tasks-v1-2.tar.gz',
    #                     origin='https://s3.amazonaws.com/text-datasets/babi_tasks_1-20_v1-2.tar.gz')
    # except:
    #     print('Error downloading dataset, please download it manually:\n'
    #           '$ wget http://www.thespermwhale.com/jaseweston/babi/tasks_1-20_v1-2.tar.gz\n'
    #           '$ mv tasks_1-20_v1-2.tar.gz ~/.keras/datasets/babi-tasks-v1-2.tar.gz')
    #     raise
    # tar = tarfile.open(path)
    # # Default QA1 with 1000 samples
    # # challenge = 'tasks_1-20_v1-2/en/qa1_single-supporting-fact_{}.txt'
    # # QA1 with 10,000 samples
    # # challenge = 'tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_{}.txt'
    # # QA2 with 1000 samples
    # challenge = 'tasks_1-20_v1-2/en/qa2_two-supporting-facts_{}.txt'
    # # QA2 with 10,000 samples
    # # challenge = 'tasks_1-20_v1-2/en-10k/qa2_two-supporting-facts_{}.txt'

    # train = get_stories(tar.extractfile(challenge.format('train')))
    # test = get_stories(tar.extractfile(challenge.format('test')))
    # print("training stories:")
    train = get_stories(f_train)
    # print(len(train))
    # print("testing stories:")
    test = get_stories(f_test)
    # print(len(test))
    vocab = set()
    for story, q, answer in train + test:
        vocab |= set(story + q + [answer])
    vocab = sorted(vocab)
    # check_existence(vocab)
    # get_word_vectors_from_pretr_embeddings(train, test, vocab)

    # Reserve 0 for masking via pad_sequences
    vocab_size = len(vocab) + 1
    print("Vocabulary size: ", vocab_size)
    word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
    story_maxlen = max(map(len, (x for x, _, _ in train + test)))
    query_maxlen = max(map(len, (x for _, x, _ in train + test)))

    x, xq, y = vectorize_stories(train, word_idx, story_maxlen, query_maxlen)
    tx, txq, ty = vectorize_stories(test, word_idx,story_maxlen, query_maxlen)
    print('vocab = {}'.format(vocab))
    print('x.shape = {}'.format(x.shape))
    print('xq.shape = {}'.format(xq.shape))
    print('y.shape = {}'.format(y.shape))
    print('story_maxlen, query_maxlen = {}, {}'.format(story_maxlen, query_maxlen))
    print('Build model...')

    pre_trained_emb_weights = get_pre_trained_emb(vocab)
    sentence = layers.Input(shape=(story_maxlen,), dtype='float32')
    encoded_sentence = layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(sentence)
    encoded_sentence = layers.Dropout(0.3)(encoded_sentence)
    question = layers.Input(shape=(query_maxlen,), dtype='float32')
    encoded_question = layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(question)
    encoded_question = layers.Dropout(0.3)(encoded_question)

    encoded_question = RNN(EMBED_HIDDEN_SIZE)(encoded_question)
    encoded_question = layers.RepeatVector(story_maxlen)(encoded_question)
    merged = layers.add([encoded_sentence, encoded_question])
    merged = RNN(EMBED_HIDDEN_SIZE)(merged)
    merged = layers.Dropout(0.3)(merged)
    preds = layers.Dense(vocab_size, activation='softmax')(merged)
    model = Model([sentence, question], preds)
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    print('Training')
    history = model.fit([x, xq], y, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=0.05)
    pandas.DataFrame(history.history).to_csv("__pre_"+file_name)

    loss, acc = model.evaluate([tx, txq], ty,
                               batch_size=BATCH_SIZE)

    pandas.DataFrame([str(loss)+"_"+ str(acc)]).to_csv("__test_"+RNN_TYPE+"_"+str(i)+".csv")
    print('Test loss / test accuracy = {:.4f} / {:.4f}'.format(loss, acc))
Exemple #26
0
def main(_):

    # The data, shuffled and split between train and test sets:
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    print('x_train shape:', x_train.shape)
    print(x_train.shape[0], 'train samples')
    print(x_test.shape[0], 'test samples')

    # Convert class vectors to binary class matrices.
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)

    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255
    x_test /= 255

    inputs = Input(shape=(32, 32, 3))

    out1, out2 = simple_cnn(inputs)

    model = Model(inputs=inputs, outputs=[out1, out2])
    # model.summary()

    opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)
    model.compile(
        optimizer=opt,
        loss=['categorical_crossentropy', 'categorical_crossentropy'],
        metrics=['accuracy'])

    if not data_augmentation:
        print('Not using data augmentation.')
        model.fit(x_train, [y_train, y_train],
                  batch_size=batch_size,
                  epochs=epochs,
                  validation_data=(x_test, [y_test, y_test]),
                  shuffle=True)
    else:
        print('Using real-time data augmentation.')

        def train_generator(x, y, batch_size):
            train_datagen = ImageDataGenerator(
                width_shift_range=
                0.1,  # randomly shift images horizontally (fraction of total width)
                height_shift_range=
                0.1,  # randomly shift images vertically (fraction of total height)
                horizontal_flip=True)  # randomly flip images
            generator = train_datagen.flow(x, y, batch_size=batch_size)
            while 1:
                x_batch, y_batch = generator.next()
                yield (x_batch, [y_batch, y_batch])

        # Fit the model on the batches generated by datagen.flow().
        model.fit_generator(generator=train_generator(x_train, y_train,
                                                      batch_size),
                            steps_per_epoch=int(y_train.shape[0] / batch_size),
                            epochs=epochs,
                            validation_data=(x_test, [y_test, y_test]),
                            callbacks=[])

    # Save model and weights
    if not os.path.isdir(save_dir):
        os.makedirs(save_dir)
    model_path = os.path.join(save_dir, model_name)
    model.save(model_path)
    print('Saved trained model at %s ' % model_path)

    # Score trained model.
    scores = model.evaluate(x_test, [y_test, y_test], verbose=1)
    print('Test loss:', scores[0])
    print('Test accuracy:', scores[1])
# When we see that the validation loss stopped improving, so that we can start 
# learning faster and then get more precision

rp_callback = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                     factor=0.5,
                                                     patience=2,
                                                     min_lr=0.000001,
                                                     verbose=1)
callbacks.append(rp_callback) 


# In[ ]:


model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
model.fit(x=train_dataset, epochs=50, steps_per_epoch=len(train_gen), validation_data=valid_dataset, validation_steps=len(valid_gen), callbacks=callbacks)


# In[ ]:


# Save the model

now = datetime.now().strftime('%b%d_%H-%M-%S')
print(str(now))
model_name = os.path.join(models_dir, str(now))
classification_name = str(prediction_dir) + '/' +str(now)

model.save(model_name)

Exemple #28
0
def train_label_none_label_classification(label_folder, non_label_folder, model_file=None):

    c = Config()

    #  Build or load model
    if model_file is None:
        # create model
        img_input = Input(shape=(28, 28, 3))
        # prediction = model_cnn_2_layer.nn_classify_label_non_label(img_input)
        # prediction = model_cnn_3_layer.nn_classify_label_non_label(img_input)
        prediction = nn_cnn_3_layer.nn_classify_label_non_label(img_input)
        model = Model(inputs=img_input, outputs=prediction)
        model.compile(loss='categorical_crossentropy', optimizer=RMSprop(), metrics=['accuracy'])
    else:
        model = load_model(model_file)

    model.summary()

    # Load and normalize data
    x_train, y_train, x_test, y_test = load_train_validation_data(label_folder, non_label_folder)

    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')

    x_train[:, :, :, 0] -= c.img_channel_mean[0]
    x_train[:, :, :, 1] -= c.img_channel_mean[1]
    x_train[:, :, :, 2] -= c.img_channel_mean[2]
    x_test[:, :, :, 0] -= c.img_channel_mean[0]
    x_test[:, :, :, 1] -= c.img_channel_mean[1]
    x_test[:, :, :, 2] -= c.img_channel_mean[2]

    x_train /= 255
    x_test /= 255
    print(x_train.shape[0], 'train samples')
    print(x_test.shape[0], 'test samples')

    # x_train.reshape(x_train.shape[0], 28, 28, 3)
    # x_test.reshape(x_test.shape[0], 28, 28, 3)

    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(y_train, 2)
    y_test = keras.utils.to_categorical(y_test, 2)

    # Checkpointing is to save the network weights only when there is an improvement in classification accuracy
    # on the validation dataset (monitor=’val_acc’ and mode=’max’).
    file_path = "weights-improvement-{epoch:04d}-{val_acc:.4f}.hdf5"
    checkpoint = ModelCheckpoint(file_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
    callbacks_list = [checkpoint]

    model.fit(x_train, y_train,
              batch_size=128,
              epochs=100,
              verbose=1,
              callbacks=callbacks_list,
              validation_data=(x_test, y_test)
              )
    score = model.evaluate(x_test, y_test, verbose=0)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])

    model.save('final_model.h5')
embedded_sequences = embedding_layer(sequence_input)

cnns = []

for filter_length in filter_lengths:
    x = Conv1D(nb_filter=nb_filter,
               filter_length=filter_length,
               border_mode='valid',
               activation='relu',
               W_constraint=maxnorm(3),
               W_regularizer=l2(0.0001),
               subsample_length=1)(embedded_sequences)
    x = MaxPooling1D(pool_length=MAX_SEQUENCE_LENGTH - filter_length + 1)(x)
    x = Flatten()(x)
    cnns.append(x)

x = merge(cnns, mode='concat')
x = Dropout(0.2)(x)
x = Dense(128, activation='relu')(x)
preds = Dense(len(labels_index), activation='softmax')(x)

model = Model(sequence_input, preds)

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# happy learning!
model.fit(x_train, y_train, validation_data=(x_val, y_val),
          nb_epoch=5, batch_size=128)
def ent_rel_pred_nn(cv_dat, drop, max_len, embedding_length, rdf2vec_model,
                    rev_char_dict):

    print "We are into the training block"

    train_x, train_y, test_x, test_y = cv_dat
    print train_x.shape, test_x.shape
    print "getting validation data"
    train_x, val_x, train_y, val_y = train_test_split(train_x,
                                                      train_y,
                                                      test_size=0.1,
                                                      random_state=666,
                                                      stratify=train_y)

    print "Validation shapes:" + str(val_x.shape)
    print "Data distributions:"
    print Counter([np.argmax(a) for a in train_y])
    print Counter([np.argmax(a) for a in test_y])
    print Counter([np.argmax(a) for a in val_y])

    #get the rdf2vec_vectors for all
    train_rdf2vec = get_rdf_vecs_phrases(train_x, rdf2vec_model, rev_char_dict)
    val_rdf2vec = get_rdf_vecs_phrases(val_x, rdf2vec_model, rev_char_dict)

    train_x = np.array(sequence.pad_sequences(train_x, maxlen=max_len),
                       dtype=np.int)
    test_x = np.array(sequence.pad_sequences(test_x, maxlen=max_len),
                      dtype=np.int)
    val_x = np.array(sequence.pad_sequences(val_x, maxlen=max_len),
                     dtype=np.int)
    print "Training the neural net now"
    print train_x[0]

    #define the neural net model

    input_vec = Input(shape=(max_len, ), dtype='int32', name="inp_vec")
    embedded_l = Embedding(embedding_length,
                           128,
                           mask_zero=False,
                           input_length=max_len,
                           trainable=True)(input_vec)

    lstm_1 = LSTM(128, return_sequences=False, dropout_W=0.3,
                  dropout_U=0.3)(embedded_l)
    dense_1 = Dense(512,
                    activation='tanh',
                    kernel_initializer="glorot_uniform")(lstm_1)
    drop_l = Dropout(drop)(dense_1)
    dense_2 = Dense(256,
                    activation='tanh',
                    kernel_initializer="glorot_uniform")(drop_l)
    drop_l_2 = Dropout(drop)(dense_2)
    #batch_norm = BatchNormalization()(drop_l)

    output_l = Dense(2, activation='softmax', name="output_layer")(drop_l_2)
    output_rdf2vec = Dense(500, activation='softmax',
                           name="output_layer_2")(drop_l_2)

    model = Model([input_vec], output=[output_l, output_rdf2vec])

    adam = Adam(lr=0.0001)

    #weighted categorical crooss entropy

    w_array = np.ones((2, 2))
    w_array[0, 1] = 1.2

    ncce = functools.partial(w_categorical_crossentropy, weights=w_array)
    ncce.__name__ = 'w_categorical_crossentropy'
    model.compile(loss='categorical_crossentropy',
                  optimizer=adam,
                  metrics=['accuracy'],
                  loss_weights={
                      'output_layer': 1.,
                      'output_layer_2': 0.4
                  })

    earlystop = EarlyStopping(monitor='val_output_layer_loss',
                              min_delta=0.0001,
                              patience=3,
                              verbose=1,
                              mode='auto')

    callbacks_list = [earlystop]
    print model.summary()
    model.fit([train_x], [train_y, train_rdf2vec],
              batch_size=128,
              epochs=30,
              verbose=1,
              shuffle=True,
              callbacks=callbacks_list,
              validation_data=[[val_x], [val_y, val_rdf2vec]])

    model_json = model.to_json()
    with open(model_j, "w") as json_file:
        json_file.write(model_json)
    model.save_weights(model_wgt)
    print "Saved the model to disk."

    test_predictions = model.predict([test_x], verbose=False)
    test_pred = [np.argmax(pred) for pred in test_predictions[0]]
    test_y = [np.argmax(label) for label in test_y]

    f1_value = f1_score(test_y, test_pred, average="macro")
    print f1_value
    return f1_value
model = Model(inputs=base_model.input, outputs=preds)

for layer in model.layers[:18]:
    layer.trainable = False
for layer in model.layers[19:]:
    layer.trainable = True

tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model.fit(X_train,
          y_train,
          batch_size=32,
          epochs=3,
          validation_split=0.3,
          callbacks=[tensorboard])

model.save('emotion_cnn_transfer-vgg.model')

# get the predictions for the test data
predicted_classes = model.predict(X_test)
predicted_classes = predicted_classes.argmax(axis=-1)
predicted_classes = keras.utils.to_categorical(predicted_classes, num_classes)

# get the indices to be plotted
correct = np.where(predicted_classes == y_test)[0]
incorrect = np.where(predicted_classes != y_test)[0]

target_names = ["Class {}".format(i) for i in range(num_classes)]
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
batch_size = 128
nb_epoch = 10
data_augmentation = True

# Model saving callback
#checkpointer = ModelCheckpoint(filepath='stochastic_depth_cifar10.hdf5', verbose=1, save_best_only=True)

if not data_augmentation:
    print('Not using data augmentation.')
    history = model.fit(x_train,
                        y_train,
                        batch_size=batch_size,
                        nb_epoch=nb_epoch,
                        verbose=1,
                        validation_data=(x_test, y_test),
                        shuffle=True,
                        callbacks=[])
else:
    print('Using real-time data augmentation.')

    # realtime data augmentation
    datagen_train = ImageDataGenerator(featurewise_center=False,
                                       samplewise_center=False,
                                       featurewise_std_normalization=False,
                                       samplewise_std_normalization=False,
                                       zca_whitening=False,
                                       rotation_range=0,
                                       width_shift_range=0.125,
                                       height_shift_range=0.125,