예제 #1
0
def test_model_trainability_switch():
    # a non-trainable model has no trainable weights
    x = Input(shape=(1,))
    y = Dense(2)(x)
    model = Model(x, y)
    model.trainable = False
    assert model.trainable_weights == []

    # same for Sequential
    model = Sequential()
    model.add(Dense(2, input_dim=1))
    model.trainable = False
    assert model.trainable_weights == []
예제 #2
0
def test_trainable_weights():
    a = Input(shape=(2,))
    b = Dense(1)(a)
    model = Model(a, b)

    weights = model.weights
    assert model.trainable_weights == weights
    assert model.non_trainable_weights == []

    model.trainable = False
    assert model.trainable_weights == []
    assert model.non_trainable_weights == weights

    model.trainable = True
    assert model.trainable_weights == weights
    assert model.non_trainable_weights == []

    model.layers[1].trainable = False
    assert model.trainable_weights == []
    assert model.non_trainable_weights == weights

    # sequential model
    model = Sequential()
    model.add(Dense(1, input_dim=2))
    weights = model.weights

    assert model.trainable_weights == weights
    assert model.non_trainable_weights == []

    model.trainable = False
    assert model.trainable_weights == []
    assert model.non_trainable_weights == weights

    model.trainable = True
    assert model.trainable_weights == weights
    assert model.non_trainable_weights == []

    model.layers[0].trainable = False
    assert model.trainable_weights == []
    assert model.non_trainable_weights == weights
class RationaleCNN:

    def __init__(self, preprocessor, filters=None, n_filters=100, dropout=0.0):
        '''
        parameters
        ---
        preprocessor: an instance of the Preprocessor class, defined below
        '''
        self.preprocessor = preprocessor

        if filters is None:
            self.ngram_filters = [3, 4, 5]
        else:
            self.ngram_filters = filters 

        self.nb_filter = n_filters 
        self.dropout = dropout
        self.sentence_model_trained = False 

        #self.build_model() # build model
        #self.train_sentence_model()

    @staticmethod
    def weighted_sum(X):
        # @TODO.. add sentence preds!
        return K.sum(X, axis=0) # I *think* axis 0 is correct...

    @staticmethod
    def weighted_sum_output_shape(input_shape):
        # expects something like (None, max_doc_len, num_features) 
        shape = list(input_shape)
        #assert len(shape) == 2 # not sure if correct...
        #print len(shape)
        print("shape: %s" % shape)
        # (1 x num_features)
        return tuple((1, shape[-1]))

    @staticmethod
    def balanced_sample(X, y):
        _, pos_rationale_indices = np.where([y[:,0] > 0]) 
        _, neg_rationale_indices = np.where([y[:,1] > 0]) 
        _, non_rationale_indices = np.where([y[:,2] > 0]) 

        # sample a number of non-rationales equal to the total
        # number of pos/neg rationales. 
        m = pos_rationale_indices.shape[0] + neg_rationale_indices.shape[0]
        sampled_non_rationale_indices = np.array(random.sample(non_rationale_indices, m))

        train_indices = np.concatenate([pos_rationale_indices, neg_rationale_indices, sampled_non_rationale_indices])
        np.random.shuffle(train_indices) # why not
        return X[train_indices,:], y[train_indices]

    # r_CNN.sentence_model.predict(X[:10], batch_size=128)
    def train_sentence_model(self, train_documents, nb_epoch=5, downsample=True, batch_size=128, optimizer='adam'):
        # assumes sentence sequences have been generated!
        assert(train_documents[0].sentence_sequences is not None)

        X, y= [], []
        # flatten sentences/sentence labels
        for d in train_documents:
            X.extend(d.sentence_sequences)
            y.extend(d.sentences_y)

        # @TODO sub-sample magic?
        X, y = np.asarray(X), np.asarray(y)
        
        # downsample
        if downsample:
            X, y = RationaleCNN.balanced_sample(X, y)

        #self.train(X[:1000], y[:1000])
        self.train(X, y)

        self.sentence_model_trained = True


    def train(self, X_train, y_train, X_val=None, y_val=None,
                nb_epoch=5, batch_size=32, optimizer='adam'):
        ''' 
        Accepts an X matrix (presumably some slice of self.X) and corresponding
        vector of labels. May want to revisit this. 

        X_val and y_val are to be used to validate during training. 
        '''
        checkpointer = ModelCheckpoint(filepath="weights.hdf5", 
                                       verbose=1, 
                                       save_best_only=(X_val is not None))

        if X_val is not None:
            self.sentence_model.fit({'input': X_train, 'output': y_train},
                batch_size=batch_size, nb_epoch=nb_epoch,
                validation_data={'input': X_val, 'output': y_val},
                verbose=2, callbacks=[checkpointer])
        else: 
            print("no validation data provided!")
            #self.sentence_model.fit({'input': X_train, 'output': y_train},
            #    batch_size=batch_size, nb_epoch=nb_epoch, 
            #    verbose=2, callbacks=[checkpointer])
            self.sentence_model.fit(X_train, y_train,
                batch_size=batch_size, nb_epoch=nb_epoch, 
                verbose=2, callbacks=[checkpointer])
        

    '''
    def predict(self, X_test, batch_size=32, binarize=False):
        raw_preds = self.model.predict({'input': X_test}, batch_size=batch_size)['output']

        #np.array(self.model.predict({'input': X_test}, 
                    #              batch_size=batch_size)['output'])
        if binarize:
          return np.round(raw_preds)
        return raw_preds
    '''


    def build_sentence_model(self):
        ''' 
        Build the *sentence* level model, which operates over, erm, sentences. 
        The task is to predict which sentences are pos/neg rationales.
        '''
        tokens_input = Input(name='input', shape=(self.preprocessor.max_sent_len,), dtype='int32')
        x = Embedding(self.preprocessor.max_features, self.preprocessor.embedding_dims, 
                      input_length=self.preprocessor.max_sent_len, 
                      weights=self.preprocessor.init_vectors)(tokens_input)
        
        x = Dropout(0.1)(x)

        convolutions = []
        for n_gram in self.ngram_filters:
            cur_conv = Convolution1D(nb_filter=self.nb_filter,
                                         filter_length=n_gram,
                                         border_mode='valid',
                                         activation='relu',
                                         subsample_length=1,
                                         input_dim=self.preprocessor.embedding_dims,
                                         input_length=self.preprocessor.max_sent_len)(x)
            # pool
            one_max = MaxPooling1D(pool_length=self.preprocessor.max_sent_len - n_gram + 1)(cur_conv)
            flattened = Flatten()(one_max)
            convolutions.append(flattened)

        sentence_vector = merge(convolutions, name="sentence_vector") # hang on to this layer!
        output = Dense(3, activation="softmax")(sentence_vector)

        self.sentence_model = Model(input=tokens_input, output=output)
        print("model built")
        print(self.sentence_model.summary())
        self.sentence_model.compile(loss='categorical_crossentropy', optimizer="adam")

        self.sentence_embedding_dim = self.sentence_model.layers[-2].output_shape[1]

        return self.sentence_model 


   

    def build_doc_model_fixed(self):
        # no magic here.
        #input_layer = Dense(1, batch_input_shape=(None, self.sentence_embedding_dim))#input_shape=(self.sentence_embedding_dim, ))
        #output_layer = Activation('sigmoid')(input_layer)

        self.document_model = Sequential() 
        self.document_model.add(Dense(1, input_dim=self.sentence_embedding_dim))
        self.document_model.add(Activation("sigmoid"))

        #self.document_model = Model(input=tokens_input, output=output)
        self.document_model.compile(loss='binary_crossentropy', optimizer="adam")


    def train_doc_model_fixed(self, train_documents):
        conv_f = K.function(
                        [self.sentence_model.layers[0].input, K.learning_phase()], 
                        [self.sentence_model.layers[-2].output])
        
        X, y = [], []

        for d in train_documents:
            sentence_vectors = np.matrix([conv_f([np.matrix(sent_seq),1])[0][0] for 
                                sent_seq in d.sentence_sequences])

            #sentence_predictions = self.sentence_model.predict(d.sentence_sequences)
            sentence_predictions = self.sentence_model.predict(d.sentence_sequences)
            weights = np.amax(sentence_predictions[:,0:2],axis=1)
            weighted = np.dot(weights, sentence_vectors)
            X.append(weighted)
            y.append(d.doc_y)
        #train_sequences = 

        X = np.vstack(X)
        y = np.array(y)
        #import pdb; pdb.set_trace()
        self.document_model.fit(X, y)
        

        #return np.matrix(np.dot(weights, vecs))

    def train_document_model(self, train_documents, 
                                nb_epoch=5, downsample=True, 
                                batch_size=128, optimizer='adam'):
        # assumes sentence sequences have been generated!
        assert(train_documents[0].sentence_sequences is not None)

        X, y= [], []
        # flatten sentences/sentence labels
        for d in train_documents:
            X.extend(d.sentence_sequences)
            y.extend(d.sentences_y)

        # @TODO sub-sample magic?
        X, y = np.asarray(X), np.asarray(y)
        
        # downsample
        if downsample:
            X, y = RationaleCNN.balanced_sample(X, y)

        #self.train(X[:1000], y[:1000])
        self.train(X, y)

        self.sentence_model_trained = True


    def build_doc_model_concat(self):
        # the idea is here is to concatenate the sentence inputs; so represent each document
        # by one very long row
        doc_len = self.preprocessor.max_sent_len * self.preprocessor.max_doc_len
        tokens_input = Input(name='input', 
                            shape=(doc_len,), dtype='int32')

        x = Embedding(self.preprocessor.max_features, self.preprocessor.embedding_dims, 
                      input_length=doc_len, 
                      weights=self.preprocessor.init_vectors)(tokens_input)


    def build_sequential_doc_model(self):
        #self.document_model = Sequential()
        m = Sequential()

        # input layer. this is a matrix with dimensions:
        #       (max_doc_length x max_sent_length)
        #
        m.add(Dense(100, input_shape=(p.max_sent_len,)))

        #pass 

    def build_doc_model3(self):
        model = Sequential()

        # 32 is just n_filters; 1 is n_gram
        nb_feature_maps = n_filters = 32
        
        maxlen = self.preprocessor.max_sent_len
        
        conv_filters = []
        for n_gram in self.ngram_filters:
            sequential = Sequential()
            conv_filters.append(sequential)

            sequential.add(Embedding(self.preprocessor.max_features, self.preprocessor.embedding_dims))
            sequential.add(Reshape(1, maxlen, self.preprocessor.embedding_dims))
            sequential.add(Convolution2D(nb_feature_maps, 1, n_gram, self.preprocessor.embedding_dims))
            sequential.add(Activation("relu"))
            sequential.add(MaxPooling2D(poolsize=(maxlen - n_gram + 1, 1)))
            sequential.add(Flatten())

        model = Sequential()
        model.add(Merge(conv_filters, mode='concat'))
        model.add(Dropout(0.5))
        model.add(Dense(nb_feature_maps * len(conv_filters), 1))
        model.add(Activation("sigmoid"))

        '''
        convolutions = []
        for n_gram in self.ngram_filters:
            cur_conv = Convolution2D(n_filters, 1, n_gram, 
                                        input_shape=(1, p.max_doc_len, p.max_sent_len),
                                        activation='relu', border_mode='valid')

            #Convolution1D(nb_filter=self.nb_filter,
            #                             filter_length=n_gram,
            #                             border_mode='valid',
            #                             activation='relu',
            #                             subsample_length=1,
            #                             input_dim=self.preprocessor.embedding_dims,
            #                             input_length=self.preprocessor.max_sent_len)(x)
            # pool
            one_max = MaxPooling1D(pool_length=self.preprocessor.max_sent_len - n_gram + 1)(cur_conv)
            flattened = Flatten()(one_max)
            convolutions.append(flattened)

        '''

        #model.add(
        #    Convolution2D(n_filters, 1, n_gram, 
        #    input_shape=(1, p.max_doc_len, p.max_sent_len))

        # get vectors for each sentence
        #MaxPooling1D(pool_length=self.preprocessor.max_sent_len - n_gram + 1)



        #one_max = MaxPooling1D(pool_length=self.preprocessor.max_sent_len - n_gram + 1)(cur_conv)

        '''
        document_input = Input(name='input', 
            shape=(None, self.preprocessor.max_doc_len, 
                   self.preprocessor.max_sent_len), dtype='int32')

        # filter, nb_rows, nb_cols
        n_gram = 1
        cur_conv = Convolution2D(32, 
                             n_gram, self.preprocessor.embedding_dims, 

                             activation='relu',
                             # samples, channels, rows, cols
                             input_shape=(1,
                                self.preprocessor.max_doc_len,
                                self.preprocessor.embedding_dims,
                             ))(document_input)
        '''
    def build_doc_model2(self):
        document_input = Input(name='input', 
            shape=(self.preprocessor.max_doc_len, 
                   self.preprocessor.max_sent_len,), dtype='int32')

        document_vector = WeightedSumSentenceVector(self.sentence_model)(document_input)

        # sentence_vectors = 

        # 
        #conv_f = K.function([self.sentence_model.layers[0].input, K.learning_phase()], 
        #                [self.sentence_model.layers[-2].output])
        # test_sent.shape
        #   (1,50) ### this is the list of token indices!
        # sentence_v = conv_f([test_sent,1])[0]

        ''' 
        Re-construct the (start of) the *sentence* level model, which operates over, erm, sentences. 
        The task is to predict which sentences are pos/neg rationales.
        '''
        # 



        '''
        tokens_input = Input(name='input', shape=(self.preprocessor.max_sent_len,), dtype='int32')
        x = Embedding(self.preprocessor.max_features, self.preprocessor.embedding_dims, 
                      input_length=self.preprocessor.max_sent_len, 
                      weights=self.preprocessor.init_vectors)(tokens_input)
        
        x = Dropout(0.1)(x)

        convolutions = []
        for n_gram in self.ngram_filters:
            cur_conv = Convolution1D(nb_filter=self.nb_filter,
                                         filter_length=n_gram,
                                         border_mode='valid',
                                         activation='relu',
                                         subsample_length=1,
                                         input_dim=self.preprocessor.embedding_dims,
                                         input_length=self.preprocessor.max_sent_len)(x)
            # pool
            one_max = MaxPooling1D(pool_length=self.preprocessor.max_sent_len - n_gram + 1)(cur_conv)
            flattened = Flatten()(one_max)
            convolutions.append(flattened)

        sentence_vector = merge(convolutions, name="sentence_vector") # hang on to this layer!
        '''

        # ok initialize each layer with parameters!


        ###
        # 
        '''
        output = Dense(3, activation="softmax")(self.penultimate_layer)

        self.sentence_model = Model(input=tokens_input, output=output)
        '''


    '''
    In [137]: model.summary()
    ____________________________________________________________________________________________________
    Layer (type)                       Output Shape        Param #     Connected to                     
    ====================================================================================================
    input (InputLayer)                 (None, 500, 50)     0                                            
    ____________________________________________________________________________________________________
    reshape_16 (Reshape)               (None, 25000)       0           input[0][0]                      
    ____________________________________________________________________________________________________
    embedding_12 (Embedding)           (None, 25000, 200)  2000000     reshape_16[0][0]                 
    ____________________________________________________________________________________________________
    reshape_17 (Reshape)               (None, 500, 10000)  0           embedding_12[0][0]               
    ____________________________________________________________________________________________________
    reshape_18 (Reshape)               (None, 1, 500, 100000           reshape_17[0][0]                 
    ____________________________________________________________________________________________________
    convolution2d_4 (Convolution2D)    (None, 32, 500, 50) 6432        reshape_18[0][0]                 
    ____________________________________________________________________________________________________
    maxpooling2d_1 (MaxPooling2D)      (None, 32, 500, 1)  0           convolution2d_4[0][0]            
    ____________________________________________________________________________________________________
    permute_2 (Permute)                (None, 1, 500, 32)  0           maxpooling2d_1[0][0]             
    ____________________________________________________________________________________________________
    reshape_19 (Reshape)               (None, 500, 32)     0           permute_2[0][0]                  
    =====================================================================================
    '''
    def build_doc_model_clean(self, n_filters=32):
        # input dim is (max_doc_len x max_sent_len) -- eliding the batch size
        tokens_input = Input(name='input', 
                            shape=(self.preprocessor.max_doc_len, self.preprocessor.max_sent_len), 
                            dtype='int32')
        # flatten; create a very wide matrix to hand to embedding layer
        tokens_reshaped = Reshape([self.preprocessor.max_doc_len*self.preprocessor.max_sent_len])(tokens_input)
        # embed the tokens; output will be (p.max_doc_len*p.max_sent_len x embedding_dims)
        x = Embedding(self.preprocessor.max_features, self.preprocessor.embedding_dims, 
                        weights=self.preprocessor.init_vectors)(tokens_reshaped)

        # reshape to preserve document structure; each doc will now be a
        # a row in this matrix
        x = Reshape((1, self.preprocessor.max_doc_len, 
                     self.preprocessor.max_sent_len*self.preprocessor.embedding_dims))(x)

        #x = Reshape((1, p.max_doc_len, p.max_sent_len*p.embedding_dims))(x)

        x = Dropout(0.1)(x)

        ####
        # @TODO wrap in loop to include all n_grams!
        n_gram = 1 # tmp
        

        cur_conv = Convolution2D(n_filters, 1, 
                                 n_gram*self.preprocessor.embedding_dims, 
                                 subsample=(1, self.preprocessor.embedding_dims))(x)
        # model = Model(input=tokens_input, output=cur_conv)

        # this output (n_filters x max_doc_len x 1)
        one_max = MaxPooling2D(pool_size=(1, self.preprocessor.max_sent_len - n_gram + 1))(cur_conv)
        # flip around, to get (1 x max_doc_len x n_filters)
        permuted = Permute((3,2,1)) (one_max)
        # drop extra dimension
        r = Reshape((self.preprocessor.max_doc_len, n_filters))(permuted)
        # now we want to average the sentence vectors!
        x_doc = Lambda(RationaleCNN.weighted_sum, 
                        output_shape=RationaleCNN.weighted_sum_output_shape)(r)

        # finally, the sigmoid layer for classification
        y_hat = Dense(1, activation="softmax")(x_doc)
        model = Model(input=tokens_input, output=x_doc)
        return model 
        #model.summary()

    def build_doc_model(self):
        '''
        Builds the *document* level model, which uses the sentence level model to inform
        its predictions.
        '''
        #tokens_input = Input(name='input', shape=(None, 
        #                        self.preprocessor.max_doc_len, 
        #                        self.preprocessor.max_sent_len), dtype='int32')
        tokens_input = Input(name='input', shape=(p.max_doc_len, p.max_sent_len), dtype='int32')

        tokens_reshaped = Reshape([p.max_doc_len*p.max_sent_len])(tokens_input)

        x = Embedding(p.max_features, p.embedding_dims, weights=p.init_vectors)(tokens_reshaped)
        #tokens_reshaped = Reshape((self.preprocessor.max_doc_len, 
        #                           self.preprocessor.max_sent_len*self.preprocessor.embedding_dims))(tokens_input)

        # so this will be (max_doc_len, max_sent_len, wv_size), i think
        #x = Embedding(self.preprocessor.max_features, self.preprocessor.embedding_dims, 
        #        weights=self.preprocessor.init_vectors)(tokens_input)
                      #input_length=self.preprocessor.max_sent_len, 
                      #weights=self.preprocessor.init_vectors)(tokens_input)
        
        x = Reshape((p.max_doc_len, p.max_sent_len*p.embedding_dims))(x)
        x = Dropout(0.1)(x)

        #  (max_doc_len, max_sent_len, wv_size) -> (max_doc_len, max_sent_len * wv_size)
        #r = Reshape(self.preprocessor.max_doc_len, 
        #            self.preprocessor.max_sent_len * self.preprocessor.embedding_dims)(x)
        convolutions = []
        for n_gram in self.ngram_filters:
            #cur_conv = Convolution1D(nb_filter=self.nb_filter, filter_length=n_gram)

            '''
            # filter, nb_rows, nb_cols
            cur_conv = Convolution2D(self.nb_filter, 
                             1, self.preprocessor.embedding_dims, 
                             filter_length=n_gram,
                             activation='relu',
                             input_dim=self.preprocessor.embedding_dims,
                             input_length=self.preprocessor.max_sent_len)(x)
            '''

            # cur_conv = Convolution2D(32, p.embedding_dims, n_gram, input_shape=(1, p.embedding_dims,  p.max_sent_len))(x)
            cur_conv = Convolution1D(nb_filter=self.nb_filter,
                                         filter_length=n_gram,
                                         border_mode='valid',
                                         activation='relu',
                                         subsample_length=1,
                                         input_dim=self.preprocessor.embedding_dims,
                                         input_length=self.preprocessor.max_sent_len)(x)
            

            # pool
            #one_max = MaxPooling1D(pool_length=self.preprocessor.max_sent_len - n_gram + 1)(cur_conv)
            one_max = MaxPooling1D(pool_length=self.preprocessor.max_sent_len - n_gram + 1)(cur_conv)
            flattened = Flatten()(one_max)
            convolutions.append(flattened)

        penultimate_layer = merge(convolutions)
        
        output = Dense(1, activation="sigmoid")(penultimate_layer)

        self.document_model = Model(input=tokens_input, output=output)
        
        print(self.document_model.summary())
        self.document_model.compile(loss='binary_crossentropy', optimizer="adam")

        return self.document_model 


        '''
예제 #4
0
def test_sequential_regression():
    from keras.models import Sequential, Model
    from keras.layers import Merge, Embedding, BatchNormalization, LSTM, InputLayer, Input

    # start with a basic example of using a Sequential model
    # inside the functional API
    seq = Sequential()
    seq.add(Dense(input_dim=10, output_dim=10))

    x = Input(shape=(10, ))
    y = seq(x)
    model = Model(x, y)
    model.compile('rmsprop', 'mse')
    weights = model.get_weights()

    # test serialization
    config = model.get_config()
    model = Model.from_config(config)
    model.compile('rmsprop', 'mse')
    model.set_weights(weights)

    # more advanced model with multiple branches

    branch_1 = Sequential(name='branch_1')
    branch_1.add(
        Embedding(input_dim=100, output_dim=10, input_length=2,
                  name='embed_1'))
    branch_1.add(LSTM(32, name='lstm_1'))

    branch_2 = Sequential(name='branch_2')
    branch_2.add(Dense(32, input_shape=(8, ), name='dense_2'))

    branch_3 = Sequential(name='branch_3')
    branch_3.add(Dense(32, input_shape=(6, ), name='dense_3'))

    branch_1_2 = Sequential([Merge([branch_1, branch_2], mode='concat')],
                            name='branch_1_2')
    branch_1_2.add(Dense(16, name='dense_1_2-0'))
    # test whether impromtu input_shape breaks the model
    branch_1_2.add(Dense(16, input_shape=(16, ), name='dense_1_2-1'))

    model = Sequential([Merge([branch_1_2, branch_3], mode='concat')],
                       name='final')
    model.add(Dense(16, name='dense_final'))
    model.compile(optimizer='rmsprop',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    model.summary()

    x = (100 * np.random.random((100, 2))).astype('int32')
    y = np.random.random((100, 8))
    z = np.random.random((100, 6))
    labels = np.random.random((100, 16))
    model.fit([x, y, z], labels, nb_epoch=1)

    # test if Sequential can be called in the functional API

    a = Input(shape=(2, ), dtype='int32')
    b = Input(shape=(8, ))
    c = Input(shape=(6, ))
    o = model([a, b, c])

    outer_model = Model([a, b, c], o)
    outer_model.compile(optimizer='rmsprop',
                        loss='categorical_crossentropy',
                        metrics=['accuracy'])
    outer_model.fit([x, y, z], labels, nb_epoch=1)

    # test serialization
    config = outer_model.get_config()
    outer_model = Model.from_config(config)
    outer_model.compile(optimizer='rmsprop',
                        loss='categorical_crossentropy',
                        metrics=['accuracy'])
    outer_model.fit([x, y, z], labels, nb_epoch=1)
예제 #5
0
class KerasMnist(object):
    def __init__(self, hidden_layers, skips, epochs, batch_size):
        assert len(hidden_layers) > 0
        self.hidden_layer_dims = hidden_layers
        self.skips = skips
        self.num_classes = 10
        self.input_dim = 784
        self.epochs = epochs
        self.batch_size = batch_size
        self.model = None

        self.x_train = None
        self.y_train = None
        self.x_test = None
        self.y_test = None

    def load_data(self):
        # the data, split between train and test sets
        (self.x_train, self.y_train), (self.x_test,
                                       self.y_test) = mnist.load_data()

        self.x_train = self.x_train.reshape(60000, 784)
        self.x_test = self.x_test.reshape(10000, 784)
        self.x_train = self.x_train.astype('float32')
        self.x_test = self.x_test.astype('float32')
        self.x_train /= 255
        self.x_test /= 255

        # convert class vectors to binary class matrices
        self.y_train = keras.utils.to_categorical(self.y_train,
                                                  self.num_classes)
        self.y_test = keras.utils.to_categorical(self.y_test, self.num_classes)

    def build_model(self):
        if self.skips > 1:
            self.build_model_skip()
        else:
            self.build_model_no_skip()

    def build_model_no_skip(self):
        '''
        MLP network with ReLU activations. For the last
        layer use the softmax activation. Initialize self.model
        as a Sequential model and add layers to it according to
        the class variables input_dim, hidden_layer_dims and num_classes.
        '''

        self.model = Sequential()
        input_dim = self.input_dim
        for layer in self.hidden_layer_dims:
            self.model.add(
                Dense(units=layer, activation='relu', input_dim=input_dim))
            input_dim = layer
        self.model.add(Dense(units=self.num_classes, activation='softmax'))

        self.model.compile(loss='categorical_crossentropy',
                           optimizer=SGD(),
                           metrics=['accuracy'])
        self.model.summary()

    def build_model_skip(self):
        '''
        MLP with skip connections. Using the Model functional API,
        create layers as before, with ReLU as the activation function,
        and softmax for the last layer. 
        In addition, create skip connections between every n layers, 
        where n is defined by the class parameter skips.
        Make sure to:
         1) Define the variable x as the input to the network.
         2) Define the variable out as the output of the network.
        '''

        x = Input(shape=(self.input_dim, ))
        prev = x
        tensors = [x]
        for index, layer in enumerate(self.hidden_layer_dims):
            if index >= self.skips and index % self.skips == 0:
                hidden_layer = Dense(units=layer, activation='relu')(prev)
                n_skip_back_layer = tensors[index - self.skips + 1]
                prev = keras.layers.add([hidden_layer, n_skip_back_layer])

            else:
                hidden_layer = Dense(units=layer, activation='relu')(prev)
                prev = hidden_layer
            tensors.append(prev)

        out = Dense(units=self.num_classes, activation='softmax')(prev)

        self.model = Model([x], out)
        self.model.compile(loss='categorical_crossentropy',
                           optimizer=SGD(),
                           metrics=['accuracy'])
        self.model.summary()

    def train_eval_model(self):
        history = self.model.fit(self.x_train,
                                 self.y_train,
                                 batch_size=self.batch_size,
                                 epochs=self.epochs,
                                 verbose=0,
                                 validation_data=(self.x_test, self.y_test))
        score_train = self.model.evaluate(self.x_train,
                                          self.y_train,
                                          verbose=0)
        score_test = self.model.evaluate(self.x_test, self.y_test, verbose=0)

        return history, score_train, score_test

    @staticmethod
    def plot_curves(history, figpath):
        history_dict = history.history
        for metric in ['loss', 'acc']:
            plt.clf()
            metric_values = history_dict[metric]
            val_metric_values = history_dict['val_' + metric]
            epochs = range(1, len(metric_values) + 1)
            plt.plot(epochs, metric_values, 'bo')
            plt.plot(epochs, val_metric_values, 'b+')
            plt.xlabel('epochs')
            plt.ylabel(metric)
            plt.savefig(figpath + '_' + metric + '.png')
예제 #6
0
# In[72]:

#trying to fetch last but fourth layer(batch normalisation)
print(base_model.layers[-3].output)

# In[97]:

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import InputLayer
base_model = tensorflow.keras.applications.resnet50.ResNet50(
    weights='imagenet', pooling=max, include_top=False)
#base_model = ResNet50(weights='imagenet', pooling=max, include_top = False)
#input = Input(shape=(32,32,3),name = 'img')
model = tensorflow.keras.Sequential()
model.add(InputLayer(input_shape=(32, 32, 3), name='img'))
for layer in base_model.layers[0:176]:
    model.add(layer)

#intermediate_layer_model = Model(inputs=input,outputs = base_model.layers[-3].output)

# In[ ]:

features = model.predict(img)
features

# In[74]:

#from keras.models import Model
#base_model = ResNet50(weights='imagenet', pooling=max, include_top = False)
#model = base_model  # include here your original model
preds = model.predict_generator(test2)

pred = []
for i in preds:
  if i>0.45:
    pred.append(1)
  else:
    pred.append(0)

print(classification_report(test2.classes,pred))

print(confusion_matrix(test2.classes,pred))

model = Sequential()

model.add(Conv2D(32,(3,3), activation ='relu',input_shape = (256,256,3)))
model.add(Conv2D(32, (3,3),activation ='relu'))
model.add(Conv2D(32, (3,3),activation ='relu'))
model.add(MaxPooling2D(pool_size = (2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(64,(3,3), activation ='relu'))
model.add(Conv2D(64, (3,3),activation ='relu'))
model.add(Conv2D(64, (3,3),activation ='relu'))
model.add(MaxPooling2D(pool_size = (3,3)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(256, activation = "relu"))
model.add(Dropout(0.4))
model.add(Dense(64, activation = "relu"))
   EPOCS = 50
   input_tensor = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
   base_model = VGG16(weights='imagenet', include_top=False,input_tensor=input_tensor)
   x = base_model.output
   x = GlobalAveragePooling2D()(x)
   x = Dense(1024, activation='relu')(x)
   predictions = Dense(N_CATEGORIES, activation='softmax')(x)
   model = Model(inputs=base_model.input, outputs=predictions)
   for layer in base_model.layers[:15]:
      layer.trainable = False
elif(MODELS=='small_cnn'):
   IMAGE_SIZE = 32
   EPOCS = 50
   model = Sequential()
   input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)
   model.add(InputLayer(input_shape=input_shape))
   model.add(Convolution2D(96, 3, 3, border_mode='same'))
   model.add(Activation('relu'))
   model.add(Convolution2D(128, 3, 3))
   model.add(Activation('relu'))
   model.add(Dropout(0.5))
   model.add(Flatten())
   model.add(Dense(1024))
   model.add(Activation('relu'))
   model.add(Dropout(0.5))
   model.add(Dense(N_CATEGORIES))
   model.add(Activation('softmax',name='predictions'))
elif(MODELS=='simple_cnn'):
   IMAGE_SIZE = 48
   EPOCS = 50
   input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)
예제 #9
0
class QNetwork():

    # This class essentially defines the network architecture.
    # The network should take in state of the world as an input,
    # and output Q values of the actions available to the agent as the output.

    def __init__(self, env, replay, deep, duel):
        # Define your network architecture here. It is also a good idea to define any training operations
        # and optimizers here, initialize your variables, or alternately compile your model here.
        self.learning_rate = 0.001  #HYPERPARAMETER1

        #linear network
        if (deep == False and duel == False):
            print("Setting up linear network....")
            self.model = Sequential()
            # self.model.add(Dense(env.action_space.n, input_dim = env.observation_space.shape[0], activation='linear', kernel_initializer='he_uniform', use_bias = True))
            self.model.add(
                Dense(32,
                      input_dim=env.observation_space.shape[0] * 2,
                      activation='linear',
                      kernel_initializer='he_uniform',
                      use_bias=True))
            self.model.add(
                Dense(env.action_space.n,
                      input_dim=32,
                      activation='linear',
                      kernel_initializer='he_uniform',
                      use_bias=True))
            self.model.compile(optimizer=Adam(lr=self.learning_rate),
                               loss='mse')
            # plot_model(self.model, to_file='graphs/Linear.png', show_shapes = True)
            self.model.summary()

        #deep network
        elif (deep == True):
            print("Setting up DDQN network....")
            self.model = Sequential()
            self.model.add(
                Dense(32,
                      input_dim=env.observation_space.shape[0] * 2,
                      activation='relu',
                      kernel_initializer='he_uniform',
                      use_bias=True))
            # self.model.add(BatchNormalization())
            self.model.add(
                Dense(32,
                      input_dim=32,
                      activation='relu',
                      kernel_initializer='he_uniform',
                      use_bias=True))
            # self.model.add(BatchNormalization())
            self.model.add(
                Dense(32,
                      input_dim=32,
                      activation='relu',
                      kernel_initializer='he_uniform',
                      use_bias=True))
            # self.model.add(BatchNormalization())
            self.model.add(
                Dense(env.action_space.n,
                      input_dim=32,
                      activation='linear',
                      kernel_initializer='he_uniform',
                      use_bias=True))
            print("Q-Network initialized.... :)\n")

            self.model.compile(optimizer=Adam(lr=self.learning_rate),
                               loss='mse')
            # plot_model(self.model, to_file='graphs/DDQN.png', show_shapes = True)
            self.model.summary()

        #dueling network
        elif (duel == True):
            print("Setting up Dueling DDQN network....")
            inp = Input(shape=(env.observation_space.shape[0] * 2, ))
            layer_shared1 = Dense(32,
                                  activation='relu',
                                  kernel_initializer='he_uniform',
                                  use_bias=True)(inp)
            # layer_shared1 = BatchNormalization()(layer_shared1)
            layer_shared2 = Dense(32,
                                  activation='relu',
                                  kernel_initializer='he_uniform',
                                  use_bias=True)(layer_shared1)
            # layer_shared2 = BatchNormalization()(layer_shared2)
            print("Shared layers initialized....")

            # layer_v1 = Dense(16,activation='relu',kernel_initializer='he_uniform',use_bias = True)(layer_shared2)
            # # layer_v1 = BatchNormalization()(layer_v1)
            # layer_a1 = Dense(16,activation='relu',kernel_initializer='he_uniform',use_bias = True)(layer_shared2)
            # layer_a1 = BatchNormalization()(layer_a1)
            layer_v2 = Dense(1,
                             activation='linear',
                             kernel_initializer='he_uniform',
                             use_bias=True)(layer_shared2)
            layer_a2 = Dense(env.action_space.n,
                             activation='linear',
                             kernel_initializer='he_uniform',
                             use_bias=True)(layer_shared2)
            print("Value and Advantage Layers initialised....")

            layer_mean = Lambda(lambda x: K.mean(x, axis=-1, keepdims=True))(
                layer_a2)
            temp = layer_v2
            temp2 = layer_mean

            for i in range(env.action_space.n - 1):
                layer_v2 = keras.layers.concatenate([layer_v2, temp], axis=-1)
                layer_mean = keras.layers.concatenate([layer_mean, temp2],
                                                      axis=-1)

            # layer_q = Lambda(lambda x: K.expand_dims(x[0],axis=-1)  + x[1] - K.mean(x[1],keepdims=True), output_shape=(env.action_space.n,))([layer_v2, layer_a2])
            layer_q = Subtract()([layer_a2, layer_mean])
            layer_q = Add()([layer_q, layer_v2])

            print("Q-function layer initialized.... :)\n")

            self.model = Model(inp, layer_q)
            self.model.summary()
            self.model.compile(optimizer=Adam(lr=self.learning_rate),
                               loss='mse')
            # plot_model(self.model, to_file='graphs/Duel_DQN.png', show_shapes = True)

    def save_model_weights(self, suffix):
        # Helper function to save your model / weights.
        self.model.save_weights(suffix)

    def load_model(self, model_file):
        # Helper function to load an existing model.
        self.model = keras.models.load_model(model_file)

    def load_model_weights(self, weight_file):
        # Helper funciton to load model weights.
        self.model.set_weights(weight_file)

    def visualise_weights(self):
        print("Current Weights\n")
        for layer in self.model.layers:
            temp = layer.get_weights()
            print(temp)
예제 #10
0
        # bx = GlobalMaxPooling1D()(bx)
        # bx = Dense(128, activation='relu')(bx)

        # seq_features = merge([ax, bx], mode='concat')
        # # seq_features = Dense(128, activation='relu')(seq_features)
        # preds = Dense(len(ys_index), activation='softmax')(seq_features)

        # model = Model(inputs=[asequence_input, bsequence_input], outputs=preds)
        # model.compile(loss='categorical_crossentropy',
        #               optimizer='rmsprop',
        #               metrics=['acc'])

        cx = Merge([ax, bx], mode='mul')

        model = Sequential()
        model.add(Merge([ax, bx, cx], mode='concat'))
        # model.add(Dense(len(ys_index), activation='softmax'))
        model.add(Dense(2, activation='softmax'))

        model.compile(loss='categorical_crossentropy',
                      optimizer='rmsprop',
                      metrics=['acc'])

        print model.summary()
        # print y_train.shape

        # happy learning!
        count = 0
        while count < EPOCH:
            model.fit([x_train_a, x_train_b],
                      y_train_prov,
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(
    Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

activation = Activation('relu')
activation = my_nl

nw_input = Input(input_shape)
nw = nw_input
nw = Dropout(0.25)(nw)
nw = Conv2D(32, kernel_size=(3, 3), activation='linear')(nw)
예제 #12
0
class DEEPVSA(object):
    def __init__(self, inst, label, train_label, seq_len, inst_len,
                 model_option, use_attention):
        self.train_label = train_label
        self.seq_len = seq_len
        self.model_option = model_option
        self.inst_len = inst_len
        self.X, self.Y, self.Y_one_hot, self.n_class = self.list2np(
            inst, label, seq_len, model_option)
        self.build_model(use_attention)
        self.use_attention = use_attention

    def predict_classes(self, proba):
        if proba.shape[-1] > 1:
            return proba.argmax(axis=-1)
        else:
            return (proba > 0.5).astype('int32')

    def list2np(self, inst, label, seq_len, model_option):
        label_all = [10, 20, 30, 40]
        label_right = int((self.train_label + 1) * 10)
        label[label == label_right] = 1
        label_all.remove(label_right)
        for ii in label_all:
            label[label == ii] = 0
        n_class = 2

        num_sample = inst.shape[0] / seq_len
        if model_option == 3:
            X = inst[0:(num_sample * seq_len), ].reshape(
                num_sample, seq_len, inst.shape[1])
        else:
            X = inst[0:(num_sample * seq_len), ].reshape(num_sample, seq_len)

        Y = label[0:(num_sample * seq_len), ].reshape(num_sample, seq_len)

        Y_one_hot = to_categorical(Y).astype('int32')
        return X, Y, Y_one_hot, n_class

    def build_model(self, use_attention):
        if self.model_option == 0:
            print "Using Bi-SimpleRNN >>>>>>>>>>>>>>>>>>"
            self.model = Sequential()
            self.model.add(
                Embedding(input_dim=256,
                          output_dim=64,
                          input_length=self.seq_len))
            self.model.add(
                Bidirectional(
                    SimpleRNN(units=32,
                              activation='tanh',
                              return_sequences=True)))
            self.model.add(Dropout(0.5))
            self.model.add(
                Bidirectional(
                    SimpleRNN(units=16,
                              activation='tanh',
                              return_sequences=True)))
            self.model.add(Dropout(0.5))
            self.model.add(
                Bidirectional(
                    SimpleRNN(units=8,
                              activation='tanh',
                              return_sequences=True)))
            self.model.add(
                TimeDistributed(Dense(self.n_class, activation='softmax'),
                                input_shape=(self.seq_len, 16)))
            self.model.summary()

        elif self.model_option == 1:
            print "Using Bi-GRU >>>>>>>>>>>>>>>>>>>>>>>>"
            self.model = Sequential()
            self.model.add(
                Embedding(input_dim=256,
                          output_dim=64,
                          input_length=self.seq_len))
            self.model.add(
                Bidirectional(
                    GRU(units=32, activation='tanh', return_sequences=True)))
            self.model.add(Dropout(0.5))
            self.model.add(
                Bidirectional(
                    GRU(units=16, activation='tanh', return_sequences=True)))
            self.model.add(Dropout(0.5))
            self.model.add(
                Bidirectional(
                    GRU(units=8, activation='tanh', return_sequences=True)))
            self.model.add(
                TimeDistributed(Dense(self.n_class, activation='softmax'),
                                input_shape=(self.seq_len, 16)))
            self.model.summary()

        elif self.model_option == 2:
            print "Using Bi-LSTM >>>>>>>>>>>>>>>>>>>>>>>"
            self.model = Sequential()
            self.model.add(
                Embedding(input_dim=256,
                          output_dim=64,
                          input_length=self.seq_len))
            self.model.add(
                Bidirectional(
                    LSTM(units=32, activation='tanh', return_sequences=True)))
            self.model.add(Dropout(0.5))
            self.model.add(
                Bidirectional(
                    LSTM(units=16, activation='tanh', return_sequences=True)))
            self.model.add(Dropout(0.5))
            self.model.add(
                Bidirectional(
                    LSTM(units=8, activation='tanh', return_sequences=True)))
            self.model.add(
                TimeDistributed(Dense(self.n_class, activation='softmax'),
                                input_shape=(self.seq_len, 16)))
            self.model.summary()

        elif self.model_option == 3:
            print "Using hierarchical attention networks >>>>>>>>>>"
            X_input = Input(shape=(self.seq_len, self.inst_len),
                            name='X_input')
            inst_input = Input(shape=(self.inst_len, ), name='inst_input')

            bin_embedded = Embedding(input_dim=257,
                                     output_dim=64,
                                     input_length=self.inst_len)(inst_input)
            inst_embedded = Bidirectional(
                LSTM(units=32, dropout=0.5,
                     return_sequences=True))(bin_embedded)
            if use_attention:
                inst_embedded = Bidirectional(
                    LSTM(units=16, dropout=0.5,
                         return_sequences=True))(inst_embedded)
                inst_embedded = AttLayer(16)(inst_embedded)
            else:
                inst_embedded = Bidirectional(LSTM(units=16,
                                                   dropout=0.5))(inst_embedded)

            inst_model = Model(inst_input, inst_embedded)

            seq_embedded = TimeDistributed(inst_model)(X_input)
            if use_attention and False:
                lstm_out_f = (AttentionLSTM(
                    units=8,
                    seq_len=self.seq_len,
                    seq_input=seq_embedded,
                    dropout=0.25,
                    recurrent_dropout=0.25,
                    return_sequences=True))(seq_embedded)
                lstm_out_b = (AttentionLSTM(units=8,
                                            seq_len=self.seq_len,
                                            seq_input=seq_embedded,
                                            dropout=0.25,
                                            recurrent_dropout=0.25,
                                            return_sequences=True,
                                            go_backwards=True))(seq_embedded)
                lstm_out = concatenate([lstm_out_f, lstm_out_b])

                lstm_out_f = (AttentionLSTM(units=32,
                                            seq_len=self.seq_len,
                                            seq_input=lstm_out,
                                            dropout=0.25,
                                            recurrent_dropout=0.25,
                                            return_sequences=True))(lstm_out)
                lstm_out_b = (AttentionLSTM(units=8,
                                            seq_len=self.seq_len,
                                            seq_input=lstm_out,
                                            dropout=0.25,
                                            recurrent_dropout=0.25,
                                            return_sequences=True,
                                            go_backwards=True))(lstm_out)
                lstm_out = concatenate([lstm_out_f, lstm_out_b])

            else:
                lstm_out = Bidirectional(
                    LSTM(units=32, dropout=0.5,
                         return_sequences=True))(seq_embedded)
                lstm_out = Bidirectional(
                    LSTM(units=16, dropout=0.5,
                         return_sequences=True))(lstm_out)

            model_out = TimeDistributed(Dense(self.n_class,
                                              activation='softmax'),
                                        name='model_out')(lstm_out)

            self.model = Model([X_input], model_out)
            self.model.summary()

            # inst_embedded = Bidirectional(GRU(units=32, dropout=0.5, return_sequences=True))(bin_embedded)
            # if use_attention:
            #     inst_embedded = Bidirectional(GRU(units=16, dropout=0.5, return_sequences=True))(inst_embedded)
            #     inst_embedded = AttLayer(16)(inst_embedded)
            # else:
            #     inst_embedded = Bidirectional(GRU(units=16, dropout=0.5))(inst_embedded)
            #
            # inst_model = Model(inst_input, inst_embedded)
            #
            # seq_embedded = TimeDistributed(inst_model)(X_input)
            # if use_attention and False:
            #     lstm_out_f = (AttentionLSTM(units=8, seq_len=self.seq_len, seq_input=seq_embedded, dropout=0.25,
            #                                 recurrent_dropout=0.25, return_sequences=True))(seq_embedded)
            #     lstm_out_b = (AttentionLSTM(units=8, seq_len=self.seq_len, seq_input=seq_embedded, dropout=0.25,
            #                                 recurrent_dropout=0.25, return_sequences=True,
            #                                 go_backwards=True))(seq_embedded)
            #     lstm_out = concatenate([lstm_out_f, lstm_out_b])
            #
            #     lstm_out_f = (AttentionLSTM(units=32, seq_len=self.seq_len, seq_input=lstm_out, dropout=0.25,
            #                                 recurrent_dropout=0.25, return_sequences=True))(lstm_out)
            #     lstm_out_b = (AttentionLSTM(units=8, seq_len=self.seq_len, seq_input=lstm_out, dropout=0.25,
            #                                 recurrent_dropout=0.25, return_sequences=True,
            #                                 go_backwards=True))(lstm_out)
            #     lstm_out = concatenate([lstm_out_f, lstm_out_b])
            #
            # else:
            #     lstm_out = Bidirectional(GRU(units=32, dropout=0.5, return_sequences=True))(seq_embedded)
            #     lstm_out = Bidirectional(GRU(units=16, dropout=0.5, return_sequences=True))(lstm_out)
            #
            # model_out = TimeDistributed(Dense(self.n_class, activation='softmax'), name='model_out')(lstm_out)
            #
            # self.model = Model([X_input], model_out)
            # self.model.summary()

    def fit(self, batch_size, epoch_1, epoch_2, save_model, save_dir,
            truncate):
        # self.X = self.X[0:1000, ]
        # self.Y = self.Y[0:1000, ]
        # self.Y_one_hot = self.Y_one_hot[0:1000, ]
        print '================================================'
        print "Data shape..."
        print self.X.shape
        print self.Y_one_hot.shape
        print "Counting the number of data in each category..."
        print collections.Counter(self.Y.flatten())
        print '================================================'

        print 'Starting training...'
        if self.train_label == 0 or self.train_label == 1:
            sample_weights = class_weight.compute_sample_weight(
                'balanced', self.Y.flatten()).reshape(self.Y.shape)
            self.model.compile(optimizer=Adam(lr=0.005),
                               loss='categorical_crossentropy',
                               metrics=['accuracy'],
                               sample_weight_mode="temporal")
            self.model.fit(self.X,
                           self.Y_one_hot,
                           batch_size=batch_size,
                           epochs=epoch_1,
                           verbose=1,
                           sample_weight=sample_weights)

        # if save_model:
        #     if truncate:
        #         if self.model_option == 0:
        #             name = str(self.train_label) + '_bi_rnn_truncate_1.h5'
        #         elif self.model_option == 1:
        #             name = str(self.train_label) + '_bigru_truncate_1.h5'
        #         elif self.model_option == 2:
        #             name = str(self.train_label) +  '_bilstm_truncate_1.h5'
        #         else:
        #             name = str(self.train_label) + '_han_truncate_1.h5'
        #     else:
        #         if self.model_option == 0:
        #             name = str(self.train_label) + 'bi_rnn_1.h5'
        #         elif self.model_option == 1:
        #             name = str(self.train_label) + '_bigru_1.h5'
        #         elif self.model_option == 2:
        #             name = str(self.train_label) +  '_bilstm_1.h5'
        #         else:
        #             name = str(self.train_label) + '_han_1.h5'
        #     save_dir_1 = os.path.join(save_dir, name)
        #     if model_option==3 and self.use_attention:
        #         weights = self.model.get_weights()
        #         io.savemat(save_dir_1, {'weights':weights})
        #     else:
        #         self.model.save(save_dir_1)

        if self.train_label == 3 or self.train_label == 2:
            self.model.compile(optimizer=Adam(lr=0.005),
                               loss='categorical_crossentropy',
                               metrics=['accuracy'],
                               sample_weight_mode="temporal")
            self.model.fit(self.X,
                           self.Y_one_hot,
                           batch_size=batch_size,
                           epochs=epoch_2,
                           verbose=1)

        if save_model:
            if truncate:
                if self.model_option == 0:
                    name = str(self.train_label) + '_birnn_truncate.h5'
                elif self.model_option == 1:
                    name = str(self.train_label) + '_bigru_truncate.h5'
                elif self.model_option == 2:
                    name = str(self.train_label) + '_bilstm_truncate.h5'
                else:
                    name = str(self.train_label) + '_han_truncate.h5'
            else:
                if self.model_option == 0:
                    name = str(self.train_label) + '_birnn.h5'
                elif self.model_option == 1:
                    name = str(self.train_label) + '_bigru.h5'
                elif self.model_option == 2:
                    name = str(self.train_label) + '_bilstm.h5'
                else:
                    name = str(self.train_label) + '_han.h5'
            save_dir = os.path.join(save_dir, name)
            if model_option == 3 and self.use_attention:
                weights = self.model.get_weights()
                io.savemat(save_dir, {'weights': weights})
            else:
                self.model.save(save_dir)
        return 0

    def evaluate(self):
        y_pred = self.predict_classes(
            self.model.predict(self.X, batch_size=batch_size))

        print 'Evaluating training results'
        precision, recall, f1, _ = precision_recall_fscore_support(
            self.Y.flatten(),
            y_pred.flatten(),
            labels=[0, 1],
            average='weighted')
        print("Precision: %s Recall: %s F1: %s" % (precision, recall, f1))
        print '================================================'

        for i in xrange(2):
            print 'Evaluating training results of positive labels at region ' + str(
                i)
            precision, recall, f1, _ = precision_recall_fscore_support(
                self.Y.flatten(),
                y_pred.flatten(),
                labels=[i],
                average='weighted')
            print("Precision: %s Recall: %s F1: %s" % (precision, recall, f1))
            print '================================================'
        return 0

    def predict(self, inst_test, label_test):
        X_test, Y_test, _, _, = self.list2np(inst_test, label_test,
                                             self.seq_len, self.model_option)

        y_pred = self.predict_classes(
            self.model.predict(X_test, batch_size=batch_size))

        print 'Evaluating testing results'
        precision, recall, f1, _ = precision_recall_fscore_support(
            Y_test.flatten(),
            y_pred.flatten(),
            labels=[0, 1],
            average='weighted')
        print("Precision: %s Recall: %s F1: %s" % (precision, recall, f1))
        print '================================================'

        for i in xrange(2):
            print 'Evaluating testing results of positive labels at region ' + str(
                i)
            precision, recall, f1, _ = precision_recall_fscore_support(
                Y_test.flatten(),
                y_pred.flatten(),
                labels=[i],
                average='weighted')
            print("Precision: %s Recall: %s F1: %s" % (precision, recall, f1))
            print '================================================'

        return y_pred
예제 #13
0
# -

# ### モデルのアーキテクチャを可視化

# +
import sys

sys.path.append('../../python_lib/convnet-drawer')

# +
from keras.models import Sequential
from convnet_drawer import Model, Conv2D, MaxPooling2D, Flatten, Dense

drawer_model = Model(input_shape=(28, 28, 3))

drawer_model.add(Flatten())
drawer_model.add(Dense(1500))
drawer_model.add(Dense(1000))
drawer_model.add(Dense(500))
drawer_model.add(Dense(10))
drawer_model.add(Dense(500))
drawer_model.add(Dense(1000))
drawer_model.add(Dense(1500))
drawer_model.add(Dense(784))

drawer_model.save_fig('alexnet.svg')

# + {"colab_type": "text", "id": "mvFdHRtk2GGY", "cell_type": "markdown"}
# ### モデルの概要を表示

# + {"colab": {"base_uri": "https://localhost:8080/", "height": 442}, "colab_type": "code", "id": "cr3WlpfZBd7a", "outputId": "75a74782-6a78-4ed2-bbb1-34dc3d770b83"}
예제 #14
0
class BiblioEater:

    # Class to design, train and validate two topologies of NN

    DROPOUT_PROB = 0.2
    DROPOUT_PROB_OUT = 0.3
    NUM_WRITERS = 2
    NUM_FILTERS_1 = 8
    NUM_FILTERS_2 = 16
    NUM_FILTERS_3 = 32
    HIDDEN_DIMS = 16
    RECEPTIVE_FIELD = 4
    STRIDES = 1
    KERNEL_SIZE_2 = 3
    KERNEL_SIZE_3 = 2
    POOL_SIZE = 2
    POOL_SIZE_2 = 2
    POOL_SIZE_3 = 2
    NUM_EPOCHS = 12
    BATCH_SIZE = 8

    def __init__(self):
        self.model = None
        self.max_tokens_per_paragraph = 0
        self.pos_vector_length = 0

    def design_sequential_net(self, max_tokens_per_paragraph,
                              pos_vector_length):

        # This is the sequential model featured in the article
        self.max_tokens_per_paragraph = max_tokens_per_paragraph
        self.pos_vector_length = pos_vector_length

        input_shape = (max_tokens_per_paragraph, pos_vector_length)

        self.model = Sequential()

        # Block 1
        self.model.add(
            Conv1D(filters=self.NUM_FILTERS_1,
                   kernel_size=self.RECEPTIVE_FIELD,
                   strides=self.STRIDES,
                   input_shape=input_shape,
                   activation='relu'))
        self.model.add(Dropout(self.DROPOUT_PROB))
        self.model.add(MaxPooling1D(pool_size=self.POOL_SIZE))

        # Block 2
        self.model.add(
            Conv1D(filters=self.NUM_FILTERS_2,
                   kernel_size=self.KERNEL_SIZE_2,
                   activation='relu'))
        self.model.add(MaxPooling1D(pool_size=self.POOL_SIZE_2))

        # Block 3
        self.model.add(
            Conv1D(filters=self.NUM_FILTERS_3,
                   kernel_size=self.KERNEL_SIZE_3,
                   activation='relu'))
        self.model.add(MaxPooling1D(pool_size=self.POOL_SIZE_3))

        # Final block
        self.model.add(Flatten())
        self.model.add(Dropout(self.DROPOUT_PROB_OUT))
        self.model.add(Dense(self.HIDDEN_DIMS, activation="relu"))
        self.model.add(Dense(self.NUM_WRITERS, activation='softmax'))

        self.model.compile(loss='categorical_crossentropy',
                           optimizer='adam',
                           metrics=['acc'])

        print(self.model.summary())
        # plot_model(self.model, to_file=os.path.join(OUT_FOLDER, "model.png"))

    def train_sequential_net(self, pos_training_set, writer_labels):

        # Sequential model training
        # fit_generator not really needed as the dataset is small. It works all the same.

        # We train with the whole set. We will validate with other books
        model_steps = round(len(pos_training_set) / self.BATCH_SIZE)

        self.model.fit_generator(self.generate_training_batch(
            pos_training_set, writer_labels, self.BATCH_SIZE),
                                 nb_epoch=self.NUM_EPOCHS,
                                 steps_per_epoch=model_steps,
                                 verbose=2)

        # serialize to disk
        with open(MODEL_FILE, "wb") as outfile:
            pickle.dump(self.model, outfile)

        nlp_logger.info("Sequential model written to file")

    def design_multi_sentence_net(self, max_tokens_per_sentence,
                                  pos_vector_length):

        # Alternative design. Although more complex, it does not yield better results consistently

        # Keras Functional API is required for non sequential networks
        sentence_input_1 = Input(shape=(
            max_tokens_per_sentence,
            pos_vector_length,
        ),
                                 name='sentence_input_1')
        sentence_input_2 = Input(shape=(
            max_tokens_per_sentence,
            pos_vector_length,
        ),
                                 name='sentence_input_2')
        sentence_input_3 = Input(shape=(
            max_tokens_per_sentence,
            pos_vector_length,
        ),
                                 name='sentence_input_3')

        shared_conv = Conv1D(filters=self.NUM_FILTERS_1,
                             kernel_size=self.RECEPTIVE_FIELD,
                             strides=2,
                             activation='relu')
        shared_max_pooling = MaxPooling1D(pool_size=self.POOL_SIZE)
        x1 = shared_conv(sentence_input_1)
        x1 = shared_max_pooling(x1)

        x2 = shared_conv(sentence_input_2)
        x2 = shared_max_pooling(x2)

        x3 = shared_conv(sentence_input_3)
        x3 = shared_max_pooling(x3)

        # Now we concatenate the 3 outputs as input to the next layer
        x = concatenate([
            shared_max_pooling.get_output_at(0),
            shared_max_pooling.get_output_at(1),
            shared_max_pooling.get_output_at(2)
        ],
                        axis=-1)

        # Block 2
        x = Conv1D(filters=self.NUM_FILTERS_3,
                   kernel_size=self.KERNEL_SIZE_2,
                   activation='relu')(x)
        x = Dropout(self.DROPOUT_PROB)(x)
        x = MaxPooling1D(pool_size=self.POOL_SIZE_2)(x)

        # Final block
        x = Flatten()(x)
        x = Dense(self.HIDDEN_DIMS, activation="relu")(x)
        main_output = Dense(1, activation='sigmoid', name='main_output')(x)

        self.model = Model(
            inputs=[sentence_input_1, sentence_input_2, sentence_input_3],
            outputs=[main_output])
        self.model.compile(optimizer='adam',
                           loss='binary_crossentropy',
                           metrics=['accuracy'])

        print(self.model.summary())

    def train_multi_sentence_net(self, pos_training_set, writer_labels):

        # Non-sequential model training
        # fit_generator not used this time

        self.model.fit(pos_training_set,
                       writer_labels,
                       batch_size=self.BATCH_SIZE,
                       epochs=self.NUM_EPOCHS,
                       verbose=2)

        # serialize to disk
        with open(MULTI_MODEL_FILE, "wb") as outfile:
            pickle.dump(self.model, outfile)

    def generate_training_batch(self, training_set, labels, batch_size):

        # generator function avoid memory problems with big training sets - not adding much value in this case

        batch_features = np.zeros((batch_size, self.max_tokens_per_paragraph,
                                   self.pos_vector_length))
        batch_labels = np.zeros((batch_size, self.NUM_WRITERS))

        while True:
            for i in range(batch_size):
                # choose random index in features
                index = randint(0, len(training_set) - 1)

                batch_features[i] = training_set[index]
                if labels[index] == 1:
                    batch_labels[i] = [0, 1]
                else:
                    batch_labels[i] = [1, 0]

            yield batch_features, batch_labels
예제 #15
0
model = t
#Allows us to create model only knowing the inputs
model = Conv2D(64, kernel_size=3, activation='relu', strides=2)(model)
model = Conv2D(128, kernel_size=3, activation='relu', strides=2)(model)
model = Conv2D(256, kernel_size=3, activation='relu', strides=2)(model)

#Generate Latent Layer
model = Flatten()(model)
latent = Dense(latent_dim, name="latent_vector")(model)

# make encoder model
encoder = Model(t, latent, name="encoder_layers")
encoder.summary()

#encoder model
'''encoder=Sequential()
encoder.add(Conv2D(64,kernel_size=3,activation='relu',input_shape=(400,350,1),strides=2))
encoder.add(Conv2D(128,kernel_size=3,activation='relu',strides=2))
encoder.add(Conv2D(256,kernel_size=3,activation='relu',strides=2))
encoder.add(Flatten())
encoder.add(Dense(latent_dim,name="latent_vector"))

# make encoder model
#encoder=Model(t,latent,name="encoder_layers")
encoder.summary()


# Decoder Model
decoder=Sequential()
decoder.add(Dense(400*350*1))
decoder.add(Reshape((400,350,1)))
예제 #16
0
    base_model = VGG16(weights='imagenet',
                       include_top=False,
                       input_tensor=input_tensor)
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    predictions = Dense(N_CATEGORIES, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    for layer in base_model.layers[:15]:
        layer.trainable = False
elif (MODELS == 'small_cnn'):
    IMAGE_SIZE = 32
    EPOCS = 50
    model = Sequential()
    input_shape = (IMAGE_SIZE, IMAGE_SIZE, 3)
    model.add(InputLayer(input_shape=input_shape))
    model.add(Convolution2D(96, 3, 3, border_mode='same'))
    model.add(Activation('relu'))
    model.add(Convolution2D(128, 3, 3))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(1024))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(N_CATEGORIES))
    model.add(Activation('softmax', name='predictions'))
elif (MODELS == 'simple_cnn'):
    IMAGE_SIZE = 48
    EPOCS = 50
    input_shape = (IMAGE_SIZE, IMAGE_SIZE, 3)
예제 #17
0
TensorBoard Tensor面板,以下代码通过,但输入histogram_freq=1,要求输入
embeddings_data,没有搞清楚需要填充什么数据
let's demonstrate these features on a simple example. You 'll train a 1D convent
ont the IMDB sentiment-analysis task.
'''
#text-calssfication model to use with TensorBoard
max_features = 2000
max_len = 500

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

x_train = sequence.pad_sequences(x_train, maxlen=max_len)
x_test = sequence.pad_sequences(x_test, maxlen=max_len)  #把每个数组的长度变成500

model = keras.models.Sequential()
model.add(
    layers.Embedding(max_features, 128, input_length=max_len, name='embed'))
model.add(layers.Conv1D(32, 7, activation='relu'))
model.add(layers.MaxPool1D(5))
model.add(layers.Conv1D(32, 7, activation='relu'))
model.add(layers.GlobalMaxPool1D())
model.add(layers.Dense(1))
model.summary()
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

#creating a directory for TensorBoard log files
#training the model with a TensorBoard callback
callbacks = [
    keras.callbacks.TensorBoard(
        log_dir='/Users/zhaolei/Desktop/dataset/my_log_dir',
        #                histogram_freq=1,   #records activation histograms every 1 epoch
        #                embeddings_freq = 1,    #recoreds embedding data every 1 epoch
예제 #18
0
# print(base_model.summary())
last_layer = base_model.get_layer('mixed7')
last_output = last_layer.output
x = Flatten()(last_output)
x = Dense(1024, activation='relu')(x)
x = Dense(1, activation='sigmoid')(x)
model = Model(base_model.input, x)
"""
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
preds = Dense(2, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=preds)
"""
"""
# Initialising the CNN
model = Sequential()

# Create convolutional layer. There are 3 dimensions for input shape
model.add(Conv2D(32, kernel_size=(3, 3), activation = 'relu', input_shape=(299 ,299, 3)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(2, activation='softmax'))
"""
"""
# Pooling layer
model.add(axPooling2D((2, 2)))
# Convolutional layer
예제 #19
0
    def create_network(self,
                       architecture=Architecture.DIRECT,
                       algorithm=Algorithm.DDQN):
        if algorithm == Algorithm.DRQN:
            network_type = "recurrent"
        else:
            network_type = "sequential"

        if architecture == Architecture.DIRECT:
            if network_type == "inception":
                print("Built an inception DQN")
                input_img = Input(shape=(self.history_length,
                                         self.state_height, self.state_width))
                tower_1 = Convolution2D(16,
                                        1,
                                        1,
                                        border_mode='same',
                                        activation='relu')(input_img)
                tower_1 = Convolution2D(16,
                                        3,
                                        3,
                                        border_mode='same',
                                        activation='relu')(tower_1)
                tower_2 = Convolution2D(16,
                                        1,
                                        1,
                                        border_mode='same',
                                        activation='relu')(input_img)
                tower_2 = Convolution2D(16,
                                        5,
                                        5,
                                        border_mode='same',
                                        activation='relu')(tower_2)
                tower_3 = MaxPooling2D((3, 3),
                                       strides=(1, 1),
                                       border_mode='same')(input_img)
                tower_3 = Convolution2D(16,
                                        1,
                                        1,
                                        border_mode='same',
                                        activation='relu')(tower_3)
                output1 = merge([tower_1, tower_2, tower_3],
                                mode='concat',
                                concat_axis=1)
                avgpool = AveragePooling2D((7, 7), strides=(8, 8))(output1)
                flatten = Flatten()(avgpool)
                output = Dense(len(self.environment.actions))(flatten)
                model = Model(input=input_img, output=output)
                model.compile(rmsprop(lr=self.learning_rate), "mse")
                #model.summary()
            elif network_type == "sequential":
                print("Built a sequential DQN")
                model = Sequential()
                # print self.history_length, self.state_height, self.state_width
                # model.add(Convolution2D(16, 3, 3, subsample=(2,2), activation='relu', input_shape=(self.history_length, self.state_height, self.state_width), init='uniform', trainable=True))
                # model.add(Convolution2D(32, 3, 3, subsample=(2,2), activation='relu', init='uniform', trainable=True))
                # model.add(Convolution2D(64, 3, 3, subsample=(2,2), activation='relu', init='uniform', trainable=True))
                # model.add(Convolution2D(128, 3, 3, subsample=(1,1), activation='relu', init='uniform'))
                # model.add(Convolution2D(256, 3, 3, subsample=(1,1), activation='relu', init='uniform'))

                model.add(
                    Convolution2D(16,
                                  8,
                                  8,
                                  subsample=(4, 4),
                                  activation='relu',
                                  name='conv1_agent',
                                  input_shape=(self.history_length,
                                               self.state_height,
                                               self.state_width),
                                  init='uniform',
                                  trainable=True))
                model.add(
                    Convolution2D(32,
                                  4,
                                  4,
                                  subsample=(2, 2),
                                  activation='relu',
                                  init='conv2_agent',
                                  trainable=True))

                model.add(Flatten())
                model.add(
                    Dense(512,
                          activation='relu',
                          name='FC1_agent',
                          init='uniform'))
                model.add(Dense(len(self.environment.actions), init='uniform'))

                model.compile(rmsprop(lr=self.learning_rate), "mse")
            elif network_type == "recurrent":
                print("Built a recurrent DQN")
                model = Sequential()
                model.add(
                    TimeDistributed(Convolution2D(16,
                                                  3,
                                                  3,
                                                  subsample=(2, 2),
                                                  activation='relu',
                                                  init='uniform',
                                                  trainable=True),
                                    input_shape=(self.history_length, 1,
                                                 self.state_height,
                                                 self.state_width)))
                model.add(
                    TimeDistributed(
                        Convolution2D(32,
                                      3,
                                      3,
                                      subsample=(2, 2),
                                      activation='relu',
                                      init='uniform',
                                      trainable=True)))
                model.add(
                    TimeDistributed(
                        Convolution2D(64,
                                      3,
                                      3,
                                      subsample=(2, 2),
                                      activation='relu',
                                      init='uniform',
                                      trainable=True)))
                model.add(
                    TimeDistributed(
                        Convolution2D(128,
                                      3,
                                      3,
                                      subsample=(1, 1),
                                      activation='relu',
                                      init='uniform')))
                model.add(
                    TimeDistributed(
                        Convolution2D(256,
                                      3,
                                      3,
                                      subsample=(1, 1),
                                      activation='relu',
                                      init='uniform')))
                model.add(TimeDistributed(Flatten()))
                model.add(
                    LSTM(512, activation='relu', init='uniform', unroll=True))
                model.add(Dense(len(self.environment.actions), init='uniform'))
                model.compile(rmsprop(lr=self.learning_rate), "mse")
                #model.summary()
        elif architecture == Architecture.DUELING:
            if network_type == "sequential":
                print("Built a dueling sequential DQN")
                input = Input(shape=(self.history_length, self.state_height,
                                     self.state_width))
                x = Convolution2D(16,
                                  3,
                                  3,
                                  subsample=(2, 2),
                                  activation='relu',
                                  input_shape=(self.history_length,
                                               image_height, image_width),
                                  init='uniform',
                                  trainable=True)(input)
                x = Convolution2D(32,
                                  3,
                                  3,
                                  subsample=(2, 2),
                                  activation='relu',
                                  init='uniform',
                                  trainable=True)(x)
                x = Convolution2D(64,
                                  3,
                                  3,
                                  subsample=(2, 2),
                                  activation='relu',
                                  init='uniform',
                                  trainable=True)(x)
                x = Convolution2D(128,
                                  3,
                                  3,
                                  subsample=(1, 1),
                                  activation='relu',
                                  init='uniform')(x)
                x = Convolution2D(256,
                                  3,
                                  3,
                                  subsample=(1, 1),
                                  activation='relu',
                                  init='uniform')(x)
                x = Flatten()(x)
                # state value tower - V
                state_value = Dense(256, activation='relu', init='uniform')(x)
                state_value = Dense(1, init='uniform')(state_value)
                state_value = Lambda(
                    lambda s: K.expand_dims(s[:, 0], dim=-1),
                    output_shape=(len(
                        self.environment.actions), ))(state_value)
                # action advantage tower - A
                action_advantage = Dense(256,
                                         activation='relu',
                                         init='uniform')(x)
                action_advantage = Dense(len(self.environment.actions),
                                         init='uniform')(action_advantage)
                action_advantage = Lambda(
                    lambda a: a[:, :] - K.mean(a[:, :], keepdims=True),
                    output_shape=(len(
                        self.environment.actions), ))(action_advantage)
                # merge to state-action value function Q
                state_action_value = merge([state_value, action_advantage],
                                           mode='sum')
                model = Model(input=input, output=state_action_value)
                model.compile(rmsprop(lr=self.learning_rate), "mse")
                #model.summary()
            else:
                print("ERROR: not implemented")
                exit()
        elif architecture == Architecture.SEQUENCE:
            print("Built a recurrent DQN")
            """
            state_model = Sequential()
            state_model.add(Convolution2D(16, 3, 3, subsample=(2, 2), activation='relu',
                                    input_shape=(self.history_length, self.state_height, self.state_width),
                                    init='uniform', trainable=True))
            state_model.add(Convolution2D(32, 3, 3, subsample=(2, 2), activation='relu', init='uniform', trainable=True))
            state_model.add(Convolution2D(64, 3, 3, subsample=(2, 2), activation='relu', init='uniform', trainable=True))
            state_model.add(Convolution2D(128, 3, 3, subsample=(1, 1), activation='relu', init='uniform'))
            state_model.add(Convolution2D(256, 3, 3, subsample=(1, 1), activation='relu', init='uniform'))
            state_model.add(Flatten())
            state_model.add(Dense(512, activation='relu', init='uniform'))
            state_model.add(RepeatVector(self.max_action_sequence_length))

            action_model = Sequential()
            action_model.add(Masking(mask_value=self.end_token, input_shape=(self.max_action_sequence_length,)))
            action_model.add(Embedding(input_dim=self.input_action_space_size, output_dim=100, init='uniform', input_length=self.max_action_sequence_length))
            action_model.add(TimeDistributed(Dense(100, init='uniform', activation='relu')))

            model = Sequential()
            model.add(Merge([state_model, action_model], mode='concat', concat_axis=-1))
            model.add(LSTM(512, return_sequences=True, activation='relu', init='uniform'))
            model.add(TimeDistributed(Dense(len(self.environment.actions), init='uniform')))
            model.compile(rmsprop(lr=self.learning_rate), "mse")
            model.summary()
            """
            state_model_input = Input(shape=(self.history_length,
                                             self.state_height,
                                             self.state_width))
            state_model = Convolution2D(16,
                                        3,
                                        3,
                                        subsample=(2, 2),
                                        activation='relu',
                                        input_shape=(self.history_length,
                                                     self.state_height,
                                                     self.state_width),
                                        init='uniform',
                                        trainable=True)(state_model_input)
            state_model = Convolution2D(32,
                                        3,
                                        3,
                                        subsample=(2, 2),
                                        activation='relu',
                                        init='uniform',
                                        trainable=True)(state_model)
            state_model = Convolution2D(64,
                                        3,
                                        3,
                                        subsample=(2, 2),
                                        activation='relu',
                                        init='uniform',
                                        trainable=True)(state_model)
            state_model = Convolution2D(128,
                                        3,
                                        3,
                                        subsample=(1, 1),
                                        activation='relu',
                                        init='uniform')(state_model)
            state_model = Convolution2D(256,
                                        3,
                                        3,
                                        subsample=(1, 1),
                                        activation='relu',
                                        init='uniform')(state_model)
            state_model = Flatten()(state_model)
            state_model = Dense(512, activation='relu',
                                init='uniform')(state_model)
            state_model = RepeatVector(
                self.max_action_sequence_length)(state_model)

            action_model_input = Input(
                shape=(self.max_action_sequence_length, ))
            action_model = Masking(
                mask_value=self.end_token,
                input_shape=(
                    self.max_action_sequence_length, ))(action_model_input)
            action_model = Embedding(
                input_dim=self.input_action_space_size,
                output_dim=100,
                init='uniform',
                input_length=self.max_action_sequence_length)(action_model)
            action_model = TimeDistributed(
                Dense(100, init='uniform', activation='relu'))(action_model)

            x = merge([state_model, action_model],
                      mode='concat',
                      concat_axis=-1)
            x = LSTM(512,
                     return_sequences=True,
                     activation='relu',
                     init='uniform')(x)

            # state value tower - V
            state_value = TimeDistributed(
                Dense(256, activation='relu', init='uniform'))(x)
            state_value = TimeDistributed(Dense(1,
                                                init='uniform'))(state_value)
            state_value = Lambda(lambda s: K.repeat_elements(
                s, rep=len(self.environment.actions), axis=2))(state_value)

            # action advantage tower - A
            action_advantage = TimeDistributed(
                Dense(256, activation='relu', init='uniform'))(x)
            action_advantage = TimeDistributed(
                Dense(len(self.environment.actions),
                      init='uniform'))(action_advantage)
            action_advantage = TimeDistributed(
                Lambda(lambda a: a - K.mean(a, keepdims=True, axis=-1)))(
                    action_advantage)

            # merge to state-action value function Q
            state_action_value = merge([state_value, action_advantage],
                                       mode='sum')

            model = Model(input=[state_model_input, action_model_input],
                          output=state_action_value)
            model.compile(rmsprop(lr=self.learning_rate), "mse")
            model.summary()

        return model
def DoubleCNNWordEmbed(nb_labels,
                       wvmodel=None,
                       nb_filters_1=1200,
                       nb_filters_2=600,
                       n_gram=2,
                       filter_length_2=10,
                       maxlen=15,
                       vecsize=100,
                       cnn_dropout_1=0.0,
                       cnn_dropout_2=0.0,
                       final_activation='softmax',
                       dense_wl2reg=0.0,
                       dense_bl2reg=0.0,
                       optimizer='adam',
                       with_gensim=False):
    """ Returns the double-layered convolutional neural network (CNN/ConvNet) for word-embedded vectors.

    :param nb_labels: number of class labels
    :param wvmodel: pre-trained Gensim word2vec model
    :param nb_filters_1: number of filters for the first CNN/ConvNet layer (Default: 1200)
    :param nb_filters_2: number of filters for the second CNN/ConvNet layer (Default: 600)
    :param n_gram: n-gram, or window size of first CNN/ConvNet (Default: 2)
    :param filter_length_2: window size for second CNN/ConvNet layer (Default: 10)
    :param maxlen: maximum number of words in a sentence (Default: 15)
    :param vecsize: length of the embedded vectors in the model (Default: 100)
    :param cnn_dropout_1: dropout rate for the first CNN/ConvNet layer (Default: 0.0)
    :param cnn_dropout_2: dropout rate for the second CNN/ConvNet layer (Default: 0.0)
    :param final_activation: activation function. Options: softplus, softsign, relu, tanh, sigmoid, hard_sigmoid, linear. (Default: 'softmax')
    :param dense_wl2reg: L2 regularization coefficient (Default: 0.0)
    :param dense_bl2reg: L2 regularization coefficient for bias (Default: 0.0)
    :param optimizer: optimizer for gradient descent. Options: sgd, rmsprop, adagrad, adadelta, adam, adamax, nadam. (Default: adam)
    :return: keras sequantial model for CNN/ConvNet for Word-Embeddings
    :type nb_labels: int
    :type wvmodel: gensim.models.keyedvectors.KeyedVectors
    :type nb_filters_1: int
    :type nb_filters_2: int
    :type n_gram: int
    :type filter_length_2: int
    :type maxlen: int
    :type vecsize: int
    :type cnn_dropout_1: float
    :type cnn_dropout_2: float
    :type final_activation: str
    :type dense_wl2reg: float
    :type dense_bl2reg: float
    :type optimizer: str
    :type with_gensim: bool
    :rtype: keras.models.Sequential or keras.models.Model
    """
    if with_gensim == True:
        embedding_layer = wvmodel.get_embedding_layer()
        sequence_input = Input(shape=(maxlen, ), dtype='int32')
        x = embedding_layer(sequence_input)
        x = Conv1D(filters=nb_filters_1,
                   kernel_size=n_gram,
                   padding='valid',
                   activation='relu',
                   input_shape=(maxlen, vecsize))(x)
        if cnn_dropout_1 > 0.0:
            x = Dropout(cnn_dropout_1)(x)
        x = Conv1D(filters=nb_filters_2,
                   kernel_size=filter_length_2,
                   padding='valid',
                   activation='relu')(x)
        if cnn_dropout_2 > 0.0:
            x = Dropout(cnn_dropout_2)(x)
        x = MaxPooling1D(pool_size=maxlen - n_gram - filter_length_2 + 1)(x)
        x = Flatten()(x)
        x = Dense(nb_labels,
                  activation=final_activation,
                  kernel_regularizer=l2(dense_wl2reg),
                  bias_regularizer=l2(dense_bl2reg))(x)

        model = Model(sequence_input, x)
        model.compile(loss='categorical_crossentropy', optimizer=optimizer)
    else:
        model = Sequential()
        model.add(
            Conv1D(filters=nb_filters_1,
                   kernel_size=n_gram,
                   padding='valid',
                   activation='relu',
                   input_shape=(maxlen, vecsize)))
        if cnn_dropout_1 > 0.0:
            model.add(Dropout(cnn_dropout_1))
        model.add(
            Conv1D(filters=nb_filters_2,
                   kernel_size=filter_length_2,
                   padding='valid',
                   activation='relu'))
        if cnn_dropout_2 > 0.0:
            model.add(Dropout(cnn_dropout_2))
        model.add(MaxPooling1D(pool_size=maxlen - n_gram - filter_length_2 +
                               1))
        model.add(Flatten())
        model.add(
            Dense(nb_labels,
                  activation=final_activation,
                  kernel_regularizer=l2(dense_wl2reg),
                  bias_regularizer=l2(dense_bl2reg)))
        model.compile(loss='categorical_crossentropy', optimizer=optimizer)

    return model
예제 #21
0
def test_convolutional_recurrent():
    num_row = 3
    num_col = 3
    filters = 2
    num_samples = 1
    input_channel = 2
    input_num_row = 5
    input_num_col = 5
    sequence_len = 2
    for data_format in ['channels_first', 'channels_last']:

        if data_format == 'channels_first':
            inputs = np.random.rand(num_samples, sequence_len,
                                    input_channel,
                                    input_num_row, input_num_col)
        else:
            inputs = np.random.rand(num_samples, sequence_len,
                                    input_num_row, input_num_col,
                                    input_channel)

        for return_sequences in [True, False]:

            # test for return state:
            x = Input(batch_shape=inputs.shape)
            kwargs = {'data_format': data_format,
                      'return_sequences': return_sequences,
                      'return_state': True,
                      'stateful': True,
                      'filters': filters,
                      'kernel_size': (num_row, num_col),
                      'padding': 'valid'}
            layer = convolutional_recurrent.ConvLSTM2D(**kwargs)
            layer.build(inputs.shape)
            outputs = layer(x)
            output, states = outputs[0], outputs[1:]
            assert len(states) == 2
            model = Model(x, states[0])
            state = model.predict(inputs)
            np.testing.assert_allclose(
                K.eval(layer.states[0]), state, atol=1e-4)

            # test for output shape:
            output = layer_test(convolutional_recurrent.ConvLSTM2D,
                                kwargs={'data_format': data_format,
                                        'return_sequences': return_sequences,
                                        'filters': filters,
                                        'kernel_size': (num_row, num_col),
                                        'padding': 'valid'},
                                input_shape=inputs.shape)

            # No need to check following tests for both data formats
            if data_format == 'channels_first' or return_sequences:
                continue

            # Tests for statefulness
            model = Sequential()
            kwargs = {'data_format': data_format,
                      'return_sequences': return_sequences,
                      'filters': filters,
                      'kernel_size': (num_row, num_col),
                      'stateful': True,
                      'batch_input_shape': inputs.shape,
                      'padding': 'same'}
            layer = convolutional_recurrent.ConvLSTM2D(**kwargs)

            model.add(layer)
            model.compile(optimizer='sgd', loss='mse')
            out1 = model.predict(np.ones_like(inputs))

            # train once so that the states change
            model.train_on_batch(np.ones_like(inputs),
                                 np.random.random(out1.shape))
            out2 = model.predict(np.ones_like(inputs))

            # if the state is not reset, output should be different
            assert(out1.max() != out2.max())

            # check that output changes after states are reset
            # (even though the model itself didn't change)
            layer.reset_states()
            out3 = model.predict(np.ones_like(inputs))
            assert(out2.max() != out3.max())

            # check that container-level reset_states() works
            model.reset_states()
            out4 = model.predict(np.ones_like(inputs))
            assert_allclose(out3, out4, atol=1e-5)

            # check that the call to `predict` updated the states
            out5 = model.predict(np.ones_like(inputs))
            assert(out4.max() != out5.max())

            # cntk doesn't support eval convolution with static
            # variable, will enable it later
            if K.backend() != 'cntk':
                # check regularizers
                kwargs = {'data_format': data_format,
                          'return_sequences': return_sequences,
                          'kernel_size': (num_row, num_col),
                          'stateful': True,
                          'filters': filters,
                          'batch_input_shape': inputs.shape,
                          'kernel_regularizer': regularizers.L1L2(l1=0.01),
                          'recurrent_regularizer': regularizers.L1L2(l1=0.01),
                          'bias_regularizer': 'l2',
                          'activity_regularizer': 'l2',
                          'kernel_constraint': 'max_norm',
                          'recurrent_constraint': 'max_norm',
                          'bias_constraint': 'max_norm',
                          'padding': 'same'}

                layer = convolutional_recurrent.ConvLSTM2D(**kwargs)
                layer.build(inputs.shape)
                assert len(layer.losses) == 3
                assert layer.activity_regularizer
                output = layer(K.variable(np.ones(inputs.shape)))
                assert len(layer.losses) == 4
                K.eval(output)

            # check dropout
            layer_test(convolutional_recurrent.ConvLSTM2D,
                       kwargs={'data_format': data_format,
                               'return_sequences': return_sequences,
                               'filters': filters,
                               'kernel_size': (num_row, num_col),
                               'padding': 'same',
                               'dropout': 0.1,
                               'recurrent_dropout': 0.1},
                       input_shape=inputs.shape)

            # check state initialization
            layer = convolutional_recurrent.ConvLSTM2D(filters=filters,
                                                       kernel_size=(num_row, num_col),
                                                       data_format=data_format,
                                                       return_sequences=return_sequences)
            layer.build(inputs.shape)
            x = Input(batch_shape=inputs.shape)
            initial_state = layer.get_initial_state(x)
            y = layer(x, initial_state=initial_state)
            model = Model(x, y)
            assert model.predict(inputs).shape == layer.compute_output_shape(inputs.shape)
def CNNWordEmbed(nb_labels,
                 wvmodel=None,
                 nb_filters=1200,
                 n_gram=2,
                 maxlen=15,
                 vecsize=100,
                 cnn_dropout=0.0,
                 final_activation='softmax',
                 dense_wl2reg=0.0,
                 dense_bl2reg=0.0,
                 optimizer='adam',
                 with_gensim=False):
    """ Returns the convolutional neural network (CNN/ConvNet) for word-embedded vectors.

    Reference: Yoon Kim, "Convolutional Neural Networks for Sentence Classification,"
    *EMNLP* 2014, 1746-1751 (arXiv:1408.5882). [`arXiv
    <https://arxiv.org/abs/1408.5882>`_]

    :param nb_labels: number of class labels
    :param wvmodel: pre-trained Gensim word2vec model
    :param nb_filters: number of filters (Default: 1200)
    :param n_gram: n-gram, or window size of CNN/ConvNet (Default: 2)
    :param maxlen: maximum number of words in a sentence (Default: 15)
    :param vecsize: length of the embedded vectors in the model (Default: 100)
    :param cnn_dropout: dropout rate for CNN/ConvNet (Default: 0.0)
    :param final_activation: activation function. Options: softplus, softsign, relu, tanh, sigmoid, hard_sigmoid, linear. (Default: 'softmax')
    :param dense_wl2reg: L2 regularization coefficient (Default: 0.0)
    :param dense_bl2reg: L2 regularization coefficient for bias (Default: 0.0)
    :param optimizer: optimizer for gradient descent. Options: sgd, rmsprop, adagrad, adadelta, adam, adamax, nadam. (Default: adam)
    :param with_gensim: boolean variable to indicate if the word-embeddings being used derived from a Gensim's Word2Vec model. (Default: True)
    :return: keras model (`Sequential` or`Model`) for CNN/ConvNet for Word-Embeddings
    :type nb_labels: int
    :type wvmodel: gensim.models.keyedvectors.KeyedVectors
    :type nb_filters: int
    :type n_gram: int
    :type maxlen: int
    :type vecsize: int
    :type cnn_dropout: float
    :type final_activation: str
    :type dense_wl2reg: float
    :type dense_bl2reg: float
    :type optimizer: str
    :type with_gensim: bool
    :rtype: keras.models.Sequential or keras.models.Model
    """
    if with_gensim == True:
        embedding_layer = wvmodel.get_embedding_layer()
        sequence_input = Input(shape=(maxlen, ), dtype='int32')
        x = embedding_layer(sequence_input)
        x = Conv1D(filters=nb_filters,
                   kernel_size=n_gram,
                   padding='valid',
                   activation='relu',
                   input_shape=(maxlen, vecsize))(x)
        if cnn_dropout > 0.0:
            x = Dropout(cnn_dropout)(x)
        x = MaxPooling1D(pool_size=maxlen - n_gram + 1)(x)
        x = Flatten()(x)
        x = Dense(nb_labels,
                  activation=final_activation,
                  kernel_regularizer=l2(dense_wl2reg),
                  bias_regularizer=l2(dense_bl2reg))(x)

        model = Model(sequence_input, x)
        model.compile(loss='categorical_crossentropy', optimizer=optimizer)
    else:
        model = Sequential()
        model.add(
            Conv1D(filters=nb_filters,
                   kernel_size=n_gram,
                   padding='valid',
                   activation='relu',
                   input_shape=(maxlen, vecsize)))
        if cnn_dropout > 0.0:
            model.add(Dropout(cnn_dropout))
        model.add(MaxPooling1D(pool_size=maxlen - n_gram + 1))
        model.add(Flatten())
        model.add(
            Dense(nb_labels,
                  activation=final_activation,
                  kernel_regularizer=l2(dense_wl2reg),
                  bias_regularizer=l2(dense_bl2reg)))
        model.compile(loss='categorical_crossentropy', optimizer=optimizer)

    return model
예제 #23
0
def BuildDiscriminator(summary=True,
                       spectral_normalization=True,
                       batch_normalization=False,
                       bn_momentum=0.9,
                       bn_epsilon=0.00002,
                       resnet=True,
                       name='Discriminator',
                       plot=False):
    if resnet:
        model_input = Input(shape=(32, 32, 3))
        resblock_1 = ResBlock(input_shape=(32, 32, 3),
                              channels=128,
                              sampling='down',
                              batch_normalization=True,
                              spectral_normalization=spectral_normalization,
                              name='Discriminator_resblock_Down_1')
        h = resblock_1(model_input)
        resblock_2 = ResBlock(input_shape=(16, 16, 128),
                              channels=128,
                              sampling='down',
                              batch_normalization=True,
                              spectral_normalization=spectral_normalization,
                              name='Discriminator_resblock_Down_2')
        h = resblock_2(h)
        resblock_3 = ResBlock(input_shape=(8, 8, 128),
                              channels=128,
                              sampling=None,
                              batch_normalization=True,
                              spectral_normalization=spectral_normalization,
                              trainable_sortcut=False,
                              name='Discriminator_resblock_1')
        h = resblock_3(h)
        resblock_4 = ResBlock(input_shape=(8, 8, 128),
                              channels=128,
                              sampling=None,
                              batch_normalization=True,
                              spectral_normalization=spectral_normalization,
                              trainable_sortcut=False,
                              name='Discriminator_resblock_2')
        h = resblock_4(h)
        h = Activation('relu')(h)
        h = GlobalSumPooling2D()(h)
        model_output = DenseSN(1, kernel_initializer='glorot_uniform')(h)

        model = Model(model_input, model_output, name=name)

    else:
        if spectral_normalization:
            model = Sequential(name=name)
            model.add(
                ConvSN2D(64,
                         kernel_size=3,
                         strides=1,
                         kernel_initializer='glorot_uniform',
                         padding='same',
                         input_shape=(32, 32, 3)))
            model.add(LeakyReLU(0.1))
            model.add(
                ConvSN2D(64,
                         kernel_size=4,
                         strides=2,
                         kernel_initializer='glorot_uniform',
                         padding='same'))
            model.add(LeakyReLU(0.1))
            model.add(
                ConvSN2D(128,
                         kernel_size=3,
                         strides=1,
                         kernel_initializer='glorot_uniform',
                         padding='same'))
            model.add(LeakyReLU(0.1))
            model.add(
                ConvSN2D(128,
                         kernel_size=4,
                         strides=2,
                         kernel_initializer='glorot_uniform',
                         padding='same'))
            model.add(LeakyReLU(0.1))
            model.add(
                ConvSN2D(256,
                         kernel_size=3,
                         strides=1,
                         kernel_initializer='glorot_uniform',
                         padding='same'))
            model.add(LeakyReLU(0.1))
            model.add(
                ConvSN2D(256,
                         kernel_size=4,
                         strides=2,
                         kernel_initializer='glorot_uniform',
                         padding='same'))
            model.add(LeakyReLU(0.1))
            model.add(
                ConvSN2D(512,
                         kernel_size=3,
                         strides=1,
                         kernel_initializer='glorot_uniform',
                         padding='same'))
            model.add(LeakyReLU(0.1))
            model.add(GlobalSumPooling2D())
            model.add(DenseSN(1, kernel_initializer='glorot_uniform'))
        else:
            model = Sequential(name=name)
            model.add(
                Conv2D(64,
                       kernel_size=3,
                       strides=1,
                       kernel_initializer='glorot_uniform',
                       padding='same',
                       input_shape=(32, 32, 3)))
            model.add(LeakyReLU(0.1))
            model.add(
                Conv2D(64,
                       kernel_size=4,
                       strides=2,
                       kernel_initializer='glorot_uniform',
                       padding='same'))
            model.add(LeakyReLU(0.1))
            model.add(
                Conv2D(128,
                       kernel_size=3,
                       strides=1,
                       kernel_initializer='glorot_uniform',
                       padding='same'))
            model.add(LeakyReLU(0.1))
            model.add(
                Conv2D(128,
                       kernel_size=4,
                       strides=2,
                       kernel_initializer='glorot_uniform',
                       padding='same'))
            model.add(LeakyReLU(0.1))
            model.add(
                Conv2D(256,
                       kernel_size=3,
                       strides=1,
                       kernel_initializer='glorot_uniform',
                       padding='same'))
            model.add(LeakyReLU(0.1))
            model.add(
                Conv2D(256,
                       kernel_size=4,
                       strides=2,
                       kernel_initializer='glorot_uniform',
                       padding='same'))
            model.add(LeakyReLU(0.1))
            model.add(
                Conv2D(512,
                       kernel_size=3,
                       strides=1,
                       kernel_initializer='glorot_uniform',
                       padding='same'))
            model.add(LeakyReLU(0.1))
            model.add(GlobalSumPooling2D())
            model.add(Dense(1, kernel_initializer='glorot_uniform'))
    if plot:
        plot_model(model, name + '.png', show_layer_names=True)

    if summary:
        print('Discriminator')
        print('Spectral Normalization: {}'.format(spectral_normalization))
        model.summary()
    return model
def CLSTMWordEmbed(nb_labels,
                   wvmodel=None,
                   nb_filters=1200,
                   n_gram=2,
                   maxlen=15,
                   vecsize=100,
                   cnn_dropout=0.0,
                   nb_rnnoutdim=1200,
                   rnn_dropout=0.2,
                   final_activation='softmax',
                   dense_wl2reg=0.0,
                   dense_bl2reg=0.0,
                   optimizer='adam',
                   with_gensim=False):
    """ Returns the C-LSTM neural networks for word-embedded vectors.

    Reference: Chunting Zhou, Chonglin Sun, Zhiyuan Liu, Francis Lau,
    "A C-LSTM Neural Network for Text Classification,"
    (arXiv:1511.08630). [`arXiv
    <https://arxiv.org/abs/1511.08630>`_]

    :param nb_labels: number of class labels
    :param wvmodel: pre-trained Gensim word2vec model
    :param nb_filters: number of filters (Default: 1200)
    :param n_gram: n-gram, or window size of CNN/ConvNet (Default: 2)
    :param maxlen: maximum number of words in a sentence (Default: 15)
    :param vecsize: length of the embedded vectors in the model (Default: 100)
    :param cnn_dropout: dropout rate for CNN/ConvNet (Default: 0.0)
    :param nb_rnnoutdim: output dimension for the LSTM networks (Default: 1200)
    :param rnn_dropout: dropout rate for LSTM (Default: 0.2)
    :param final_activation: activation function. Options: softplus, softsign, relu, tanh, sigmoid, hard_sigmoid, linear. (Default: 'softmax')
    :param dense_wl2reg: L2 regularization coefficient (Default: 0.0)
    :param dense_bl2reg: L2 regularization coefficient for bias (Default: 0.0)
    :param optimizer: optimizer for gradient descent. Options: sgd, rmsprop, adagrad, adadelta, adam, adamax, nadam. (Default: adam)
    :return: keras sequantial model for CNN/ConvNet for Word-Embeddings
    :type nb_labels: int
    :type wvmodel: gensim.models.keyedvectors.KeyedVectors
    :type nb_filters: int
    :type n_gram: int
    :type maxlen: int
    :type vecsize: int
    :type cnn_dropout: float
    :type nb_rnnoutdim: int
    :type rnn_dropout: float
    :type final_activation: str
    :type dense_wl2reg: float
    :type dense_bl2reg: float
    :type optimizer: str
    :type with_gensim: bool
    :rtype: keras.models.Sequential or keras.models.Model
    """
    if with_gensim == True:
        embedding_layer = wvmodel.get_embedding_layer()
        sequence_input = Input(shape=(maxlen, ), dtype='int32')
        x = embedding_layer(sequence_input)
        x = Conv1D(filters=nb_filters,
                   kernel_size=n_gram,
                   padding='valid',
                   activation='relu',
                   input_shape=(maxlen, vecsize))(x)
        if cnn_dropout > 0.0:
            x = Dropout(cnn_dropout)(x)
        x = MaxPooling1D(pool_size=maxlen - n_gram + 1)(x)
        x = LSTM(nb_rnnoutdim)(x)
        if rnn_dropout > 0.0:
            x = Dropout(rnn_dropout)(x)
        x = Dense(
            nb_labels,
            activation=final_activation,
            kernel_regularizer=l2(dense_wl2reg),
            bias_regularizer=l2(dense_bl2reg),
        )(x)

        model = Model(sequence_input, x)
        model.compile(loss='categorical_crossentropy', optimizer=optimizer)
    else:
        model = Sequential()
        model.add(
            Conv1D(filters=nb_filters,
                   kernel_size=n_gram,
                   padding='valid',
                   activation='relu',
                   input_shape=(maxlen, vecsize)))
        if cnn_dropout > 0.0:
            model.add(Dropout(cnn_dropout))
        model.add(MaxPooling1D(pool_size=maxlen - n_gram + 1))
        model.add(LSTM(nb_rnnoutdim))
        if rnn_dropout > 0.0:
            model.add(Dropout(rnn_dropout))
        model.add(
            Dense(
                nb_labels,
                activation=final_activation,
                kernel_regularizer=l2(dense_wl2reg),
                bias_regularizer=l2(dense_bl2reg),
            ))
        model.compile(loss='categorical_crossentropy', optimizer=optimizer)

    return model
예제 #25
0
class Model(ModelHelper):
    """
    Class that encapsulates an LSTM model
    that we have been building. This class makes it
    easy to work with the different functions
    used to work with the model.
    
    Parameters
    ----------
    path: str
        Location to load model from.

    data: pandas DataFrame
        Pandas dataframe with the variable from
        `variable` privided. This is used
        to eventually train and run the model.
    
    variable: str
        Variable to use from `data`.
    
    predicted_period_size: int
        Number of predicted time periods predictions
        to make.
    
    holdout: int, default 0
        Number of periods to hold-out from the 
        training set.

    """
    def __init__(self,
                 data,
                 variable,
                 predicted_period_size,
                 path=None,
                 holdout=0,
                 normalize=True,
                 model_type='sequential'):

        self.path = path
        self.data = data
        self.variable = variable
        self.predicted_period_size = predicted_period_size
        self.holdout = holdout
        if model_type in ['sequential', 'functional']:
            self.model_type = model_type
        else:
            raise ValueError(
                'Wrong model type: It can be either "sequential" or "functional"'
            )

        if path:
            self.model = load_model(self.path,
                                    custom_objects={'loss': tilted_loss(0.5)})

        self.X, self.Y = self.__prepare_data(normalize=normalize)
        self.__extract_last_series_value()

        super().__init__()

    def __extract_last_series_value(self):
        """
        Method for extracting the last value from
        a series prior to normalization. This value
        is then used for denormalizing the set.
        """
        if self.remainder:
            self.last_value = self.data.sort_values('date', ascending=False)\
                                [:-self.remainder][self.variable].values[0]

            self.last_date = self.data.sort_values('date', ascending=False)\
                                [:-self.remainder]['date'].values[0]
        else:
            self.last_value = self.data.sort_values('date', ascending=False)\
                                [self.variable].values[0]

            self.last_date = self.data.sort_values('date', ascending=False)\
                                ['date'].values[0]

    def __prepare_data(self, normalize):
        """
        Prepares data for model.

        Parameters
        ----------
        normalize: bool
            If the method should normalize data or not.
            Normalization is done using 

                normalizations.point_relative_normalization()

        Returns
        -------
        X and Y prepared for training.
        """
        series = self.data[self.variable].values
        self.remainder = len(series) % self.predicted_period_size

        groups = self.create_groups(data=series,
                                    group_size=self.predicted_period_size,
                                    normalize=normalize)

        if self.holdout == 0:
            self.holdout_groups = []
        else:
            self.holdout_groups = groups[::-self.holdout]
            groups = groups[::-self.holdout]

        self.default_number_of_periods = groups.shape[1] - 1

        return self.split_lstm_input(groups)

    def build(self,
              number_of_periods=None,
              period_length=7,
              batch_size=1,
              loss="mse"):
        """
        Builds an LSTM model using Keras. This function
        works as a simple wrapper for a manually created
        model.
        
        Parameters
        ----------
        period_length: int
            The size of each observation used as input.
        
        number_of_periods: int, default None
            The number of periods available in the 
            dataset. If None, the model will be built
            using all available periods - 1 (used for validation).
        
        batch_size: int
            The size of the batch used in each training
            period.
        
        Returns
        -------
        model: Keras model
            Compiled Keras model that can be trained
            and stored in disk.
        """
        if not number_of_periods:
            number_of_periods = self.default_number_of_periods

        if self.model_type == 'sequential':
            self.model = Sequential()
            self.model.add(
                LSTM(units=period_length,
                     batch_input_shape=(batch_size, number_of_periods,
                                        period_length),
                     input_shape=(number_of_periods, period_length),
                     return_sequences=False,
                     stateful=False))

            self.model.add(Dense(units=period_length))
            self.model.add(Activation("linear"))

            self.model.compile(loss=loss, optimizer="rmsprop")
        else:
            input = Input(shape=(number_of_periods, period_length))
            x = LSTM(units=period_length,
                     batch_input_shape=(batch_size, number_of_periods,
                                        period_length),
                     input_shape=(number_of_periods, period_length),
                     return_sequences=False,
                     stateful=False)(input)
            x0 = Dense(units=period_length, activation='linear')(x)
            x1 = Dense(units=period_length, activation='linear')(x)
            x2 = Dense(units=period_length, activation='linear')(x)
            self.model = Functional_Model(input, [x0, x1, x2])
            self.model.compile(loss=loss, optimizer="rmsprop")

        return self.model

    def save(self, path):
        """
        Stores trained model in disk. Useful
        for storing trained models.
        
        Parameters
        ----------
        path: str
            Location of where to store model.
        """
        return self.model.save(path)

    def predict(self, output=None, denormalized=False, return_dict=False):
        """
        Makes a prediction based on input data.

        Parameters
        ----------
        output: int, default None
            Output index in a multi-output model.
            It is unused in a single-output model
        denormalized: bool, default True
            If method should denormalize data. Method
            will use the normalizations.point_relative_normalization()
        
        return_dict: bool, default False
            If should return dict that can be serializable
            as JSON. Useful for returning prediction
            results with dates as keys.

        """
        if self.model_type == 'sequential':
            predictions = self.model.predict(x=self.X)
        else:
            predictions = self.model.predict(x=self.X)[output]

        if denormalized:
            predictions = point_relative_normalization(
                series=predictions, reverse=True, last_value=self.last_value)

        dates = []
        base_date = datetime.strptime(self.last_date, '%Y-%m-%d')
        for i in range(1, len(predictions[0]) + 1):
            d = (base_date + timedelta(days=i)).strftime('%Y-%m-%d')
            dates.append(d)

        results = []
        for d, p in zip(dates, predictions[0].tolist()):
            results.append({'date': d, 'prediction': round(p, 2)})

        if return_dict:
            return results

        else:
            return predictions[0]

    def train(self, data=None, epochs=300, verbose=0):
        """
        Trains model using data from class. 

        Parameters
        ----------
        X: pandas DataFrame
            Pandas dataframe with `variable` used to
            fir model for the fist time.

        epochs: int
            Number of epochs to train model for.
        
        verbose: int, default 0
            Verbosity level to use. The default (0)
            means that nothing is printed on the
            screen.
        
        Returns
        -------
        Metrics from the model history.
        """
        if data is not None:
            self.data = data
            self.X, self.Y = self.__prepare_data(normalize=self.normalize)
            self.__extract_last_series_value()

        if self.model_type == 'sequential':
            self.train_history = self.model.fit(x=self.X,
                                                y=self.Y,
                                                batch_size=1,
                                                epochs=epochs,
                                                verbose=verbose,
                                                shuffle=False)
        else:
            self.train_history = self.model.fit(x=self.X,
                                                y=[self.Y, self.Y, self.Y],
                                                batch_size=1,
                                                epochs=epochs,
                                                verbose=verbose,
                                                shuffle=False)

        self.last_trained = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        return self.train_history

    def evaluate(self, metrics=['mse', 'rmse', 'mape']):
        """
        Evaluates model using provided metrics. The evaluation
        """
        y = point_relative_normalization(series=self.Y[0],
                                         reverse=True,
                                         last_value=self.last_value)

        results = {}
        for metric in metrics:
            if metric == 'mse':
                r = round(self.mse(A=self.Y[0], B=self.predict()), 2)

            else:
                r = round(
                    getattr(self, metric)(A=self.predict(denormalized=True)[0],
                                          B=y), 2)

            results[metric] = r

        return results
예제 #26
0
def test_sequential_regression():
    from keras.models import Sequential, Model
    from keras.layers import Merge, Embedding, BatchNormalization, LSTM, InputLayer, Input

    # start with a basic example of using a Sequential model
    # inside the functional API
    seq = Sequential()
    seq.add(Dense(input_dim=10, output_dim=10))

    x = Input(shape=(10,))
    y = seq(x)
    model = Model(x, y)
    model.compile('rmsprop', 'mse')
    weights = model.get_weights()

    # test serialization
    config = model.get_config()
    model = Model.from_config(config)
    model.compile('rmsprop', 'mse')
    model.set_weights(weights)

    # more advanced model with multiple branches

    branch_1 = Sequential(name='branch_1')
    branch_1.add(Embedding(input_dim=100,
                           output_dim=10,
                           input_length=2,
                           name='embed_1'))
    branch_1.add(LSTM(32, name='lstm_1'))

    branch_2 = Sequential(name='branch_2')
    branch_2.add(Dense(32, input_shape=(8,), name='dense_2'))

    branch_3 = Sequential(name='branch_3')
    branch_3.add(Dense(32, input_shape=(6,), name='dense_3'))

    branch_1_2 = Sequential([Merge([branch_1, branch_2], mode='concat')], name='branch_1_2')
    branch_1_2.add(Dense(16, name='dense_1_2-0'))
    # test whether impromtu input_shape breaks the model
    branch_1_2.add(Dense(16, input_shape=(16,), name='dense_1_2-1'))

    model = Sequential([Merge([branch_1_2, branch_3], mode='concat')], name='final')
    model.add(Dense(16, name='dense_final'))
    model.compile(optimizer='rmsprop',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    model.summary()

    x = (100 * np.random.random((100, 2))).astype('int32')
    y = np.random.random((100, 8))
    z = np.random.random((100, 6))
    labels = np.random.random((100, 16))
    model.fit([x, y, z], labels, nb_epoch=1)

    # test if Sequential can be called in the functional API

    a = Input(shape=(2,), dtype='int32')
    b = Input(shape=(8,))
    c = Input(shape=(6,))
    o = model([a, b, c])

    outer_model = Model([a, b, c], o)
    outer_model.compile(optimizer='rmsprop',
                        loss='categorical_crossentropy',
                        metrics=['accuracy'])
    outer_model.fit([x, y, z], labels, nb_epoch=1)

    # test serialization
    config = outer_model.get_config()
    outer_model = Model.from_config(config)
    outer_model.compile(optimizer='rmsprop',
                        loss='categorical_crossentropy',
                        metrics=['accuracy'])
    outer_model.fit([x, y, z], labels, nb_epoch=1)
예제 #27
0
def make_cifar10_model(**args):
    nb_classes = 10
    img_rows, img_cols = 32, 32
    
    # use 1 kernel size for all convolutional layers
    ks = args.get('kernel_size', 3)
    
    # tune the number of filters for each convolution layer
    nb_filters1 = args.get('nb_filters1', 48)
    nb_filters2 = args.get('nb_filters2', 96)
    nb_filters3 = args.get('nb_filters3', 192)
    
    # tune the pool size once
    ps = args.get('pool_size', 2)
    pool_size = (ps,ps)
    
    # tune the dropout rates independently
    #do1 = args.get('dropout1', 0.25)
    #do2 = args.get('dropout2', 0.25)
    #do3 = args.get('dropout3', 0.25)
    do4 = args.get('dropout1', 0.25)
    do5 = args.get('dropout2', 0.5)
    #do1 = args.get('dropout1', 0.)
    #do2 = args.get('dropout2', 0.)
    #do3 = args.get('dropout3', 0.)
    #do4 = args.get('dropout4', 0.)
    #do5 = args.get('dropout5', 0.)
    
    # tune the dense layers independently
    dense1 = args.get('dense1', 512)
    dense2 = args.get('dense2', 256)
    
    if K.image_dim_ordering() == 'th':
        input_shape = (3, img_rows, img_cols)
    else:
        input_shape = (img_rows, img_cols, 3)

    #act = 'sigmoid'
    act = 'relu'
        
    i = Input( input_shape)
    l = Conv2D(nb_filters1,( ks, ks), padding='same', activation = act)(i)
    #l = Conv2D(nb_filters1, (ks, ks), activation=act)(l)
    l = MaxPooling2D(pool_size=pool_size)(l)
    #l = Dropout(do1)(l)

    l = Conv2D(nb_filters2, (ks, ks), padding='same',activation=act)(l)
    #l = Conv2D(nb_filters2, (ks, ks))(l)
    l = MaxPooling2D(pool_size=pool_size)(l)
    #l = Dropout(do2)(l)

    l = Conv2D(nb_filters3, (ks, ks), padding='same',activation=act)(l)
    #l = Conv2D(nb_filters3, (ks, ks))(l)
    l = MaxPooling2D(pool_size=pool_size)(l)
    #l = Dropout(do3)(l)

    l = Flatten()(l)
    l = Dense(dense1,activation=act)(l)
    l = Dropout(do4)(l)
    l = Dense(dense2,activation=act)(l)
    l =Dropout(do5)(l)
    
    o = Dense(nb_classes, activation='softmax')(l)

    model = Model(inputs=i, outputs=o)
    model.summary()
    
    return model
    
    model = Sequential()
    model.add(Convolution2D(nb_filters1, ks, ks,
                            border_mode='same',
                            input_shape=input_shape))
    model.add(Activation(act))
    model.add(Convolution2D(nb_filters1, ks, ks))
    model.add(Activation(act))
    model.add(MaxPooling2D(pool_size=pool_size))
    model.add(Dropout(do1))
    
    model.add(Convolution2D(nb_filters2, ks, ks, border_mode='same'))
    model.add(Activation(act))
    model.add(Convolution2D(nb_filters2, ks, ks))
    model.add(Activation(act))
    model.add(MaxPooling2D(pool_size=pool_size))
    model.add(Dropout(do2))
    
    model.add(Convolution2D(nb_filters3, ks, ks, border_mode='same'))
    model.add(Activation(act))
    model.add(Convolution2D(nb_filters3, ks, ks))
    model.add(Activation(act))
    model.add(MaxPooling2D(pool_size=pool_size))
    model.add(Dropout(do3))
    
    model.add(Flatten())
    model.add(Dense(dense1))
    model.add(Activation(act))
    model.add(Dropout(do4))
    model.add(Dense(dense2))
    model.add(Activation(act))
    model.add(Dropout(do5))
    
    model.add(Dense(nb_classes, activation='softmax'))
    
    return model
예제 #28
0
    def buildModel(self):

        B = Input(shape=(3, ))
        b = Dense(5, activation="relu")(B)
        inputs = [B]
        merges = [b]
        S = Input(shape=[2, 60, 1])
        inputs.append(S)
        h = Conv2D(2048, (3, 1), padding="same")(S)
        h = LeakyReLU(0.001)(h)
        merges.append(h)
        m = concatenate(merges, axis=1)
        m = Dense(1024)(m)
        m = LeakyReLU(0.001)(m)
        V = Dense(2, activation='softmax')(m)
        model = Model(input=inputs, output=V)
        model.summary()

        model = Sequential()
        model.add(
            Dense(70, input_shape=(42, ), kernel_initializer="lecun_uniform"))
        model.add(Activation('relu'))

        model.add(Dense(28))
        model.add(Activation('relu'))

        model.add(Dense(12))
        model.add(Activation('relu'))

        # model.add(Dense(2000))
        # model.add(Dropout(0.2))
        # model.add(Activation('relu'))

        # model.add(Dense(5000))
        # model.add(Dropout(0.2))
        # model.add(Activation('relu'))

        model.add(Dense(3, kernel_initializer="lecun_uniform"))
        model.add(Activation('linear'))
        return model
        feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    # Adjust preditions to each spatial grid point and anchor size.
    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])


    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs

def weather(feats, anchors, num_classes, input_shape, [y1, y2, y3],  calc_loss=False):
    x = Model(inputs=feats, outputs= prob)
    x.add(Conv2D(feats, (3,3), padding='same'))
    x.add(Activation('relu'))
    x.add(MaxPooling2D(pool_size=(2,2), strides=(1,1), padding='same'))
    x.add(flatten())
    x.add(Dense(512, kernel_constraint=maxnorm(3)))
    x.add(Activation('relu'))
    x.add(Dense(num_classes))
    x.add(activation('softmax'))
    prob = x
    return prob


def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
    '''Get corrected boxes'''
    box_yx = box_xy[..., ::-1]
    box_hw = box_wh[..., ::-1]
예제 #30
0
class UrlDetector:
    def __init__(self, model="simple_nn", vocab_size=87, max_length=200):
        """
        Initiates URL detector model. Default parameters values are taken from J. Saxe et al. - eXpose: A Character-
        Level Convolutional Neural Network with Embeddings For Detecting Malicious URLs, File Paths and Registry Keys

        Parameters
        ----------
        model: {"simple_nn", "big_conv_nn"}
            Path of csv file containing the dataset.
        max_length:
            Maximum length of considered URL (crops longer URL).
        vocab_size:
            Size of alphabet (letters, digits, symbols...).
        """
        self.max_length = max_length
        self.vocab_size = vocab_size
        self.model = Model()
        self.build_model(model)

    def build_model(self, model: str):
        """
        Builds given model.

        Parameters
        ----------
        model: {"simple_nn", "big_conv_nn"}
            Path of csv file containing the dataset.
        """
        if model == "simple_nn":
            self._build_simple_nn()
        elif model == "big_conv_nn":
            self._build_big_conv_nn()

    def _build_simple_nn(self):
        """Defines and compiles a simple NN."""
        self.model = Sequential()
        self.model.add(
            Embedding(self.vocab_size, 32, input_length=self.max_length))
        self.model.add(Flatten())
        self.model.add(Dense(1, activation='sigmoid'))

        self.model.compile(optimizer='adam',
                           loss='binary_crossentropy',
                           metrics=['acc'])
        print(self.model.summary())

    def _get_complete_conv_layer(self, filter_length, nb_filter):
        """Wrap up for convolutional layer followed with a summing pool layer, batch normalization and dropout."""
        model = Sequential()
        model.add(
            Convolution1D(nb_filter=nb_filter,
                          input_shape=(self.max_length, 32),
                          filter_length=filter_length,
                          border_mode='same',
                          activation='relu',
                          subsample_length=1))
        # model.add(BatchNormalization())
        model.add(Lambda(self._sum_1d, output_shape=(nb_filter, )))
        # model.add(BatchNormalization(mode=0))
        model.add(Dropout(0.5))
        return model

    @staticmethod
    def _sum_1d(x):
        """Sum layers on column axis."""
        return K.sum(x, axis=1)

    def _build_big_conv_nn(self):
        """Defines and compiles same CNN as J. Saxe et al. - eXpose: A Character-Level Convolutional Neural Network with
        Embeddings For Detecting Malicious URLs, File Paths and Registry Keys."""
        main_input = Input(shape=(self.max_length, ),
                           dtype='int32',
                           name='main_input')
        embedding = Embedding(input_dim=self.vocab_size,
                              output_dim=32,
                              input_length=self.max_length,
                              dropout=0)(main_input)

        conv1 = self._get_complete_conv_layer(2, 256)(embedding)
        conv2 = self._get_complete_conv_layer(3, 256)(embedding)
        conv3 = self._get_complete_conv_layer(4, 256)(embedding)
        conv4 = self._get_complete_conv_layer(5, 256)(embedding)

        merged = merge.Concatenate()([conv1, conv2, conv3, conv4])
        merged = BatchNormalization()(merged)

        middle = Dense(1024, activation='relu')(merged)
        middle = BatchNormalization()(middle)
        middle = Dropout(0.5)(middle)

        middle = Dense(1024, activation='relu')(middle)
        middle = BatchNormalization()(middle)
        middle = Dropout(0.5)(middle)

        middle = Dense(1024, activation='relu')(middle)
        middle = BatchNormalization()(middle)
        middle = Dropout(0.5)(middle)

        output = Dense(1, activation='sigmoid')(middle)

        self.model = Model(input=main_input, output=output)
        optimizer = Adam(lr=0.001,
                         beta_1=0.9,
                         beta_2=0.999,
                         epsilon=None,
                         decay=0.0,
                         amsgrad=False)
        self.model.compile(loss='binary_crossentropy',
                           optimizer=optimizer,
                           metrics=['acc', self.f1])
        self.model.summary()

    def _get_padded_docs(self, encoded_docs: list) -> list:
        """Makes the data readable for the model."""
        padded_docs = pad_sequences(encoded_docs,
                                    maxlen=self.max_length,
                                    padding='post')
        return padded_docs

    def fit(self,
            encoded_docs: list,
            labels: list,
            batch_size=128,
            epochs=5,
            verbose=1,
            training_logs="training_logs",
            validation_data=None,
            validation_split=0.2):
        """
        Trains the model with Tensorboard monitoring. Data should be shuffled before calling this function because the
        validation set is taken from the last samples of the provided dataset.

        Parameters
        ----------
        encoded_docs
            One-hot encoded URLs.
        labels
            Labels (0/1) of URLs.
        batch_size
            Number of samples per gradient update.
        epochs
            Number of epochs to train on.
        verbose
            Whether to display information (loss, accuracy...) during training.
        training_logs
            Directory where to store Tensorboard logs.
        validation_data
            Tuple with the validation data (X_val, y_val)
        validation_split
            % of data to put in the validation set. Only used if 'validation_data=None'.
        """
        if not os.path.exists(training_logs):
            os.makedirs(training_logs)
        tensorboard = TensorBoard(log_dir=training_logs)
        padded_docs = self._get_padded_docs(encoded_docs)
        if validation_data is None:
            self.model.fit(padded_docs,
                           labels,
                           batch_size=batch_size,
                           epochs=epochs,
                           validation_split=validation_split,
                           verbose=verbose,
                           callbacks=[tensorboard])
        else:
            one_hot_val_urls, y_val = validation_data
            X_val = self._get_padded_docs(one_hot_val_urls)
            self.model.fit(padded_docs,
                           labels,
                           batch_size=batch_size,
                           epochs=epochs,
                           validation_data=(X_val, y_val),
                           verbose=verbose,
                           callbacks=[tensorboard])

    def evaluate(self, encoded_docs: list, labels: list):
        """Computes the accuracy of given data."""
        padded_docs = self._get_padded_docs(encoded_docs)
        loss, accuracy, f1score = self.model.evaluate(padded_docs,
                                                      labels,
                                                      verbose=0)
        print('Accuracy: %f' % (accuracy * 100))
        print('F1-score: %f' % f1score)

    def predict_proba(self, encoded_docs: list) -> np.ndarray:
        """Predicts the probabilities of given data."""
        padded_docs = self._get_padded_docs(encoded_docs)
        probabilities = self.model.predict(padded_docs)
        return probabilities

    def plot_roc_curve(self, encoded_docs: list, labels: list):
        """Plots the ROC curve and computes its AUC."""
        probabilities = self.predict_proba(encoded_docs)
        fpr, tpr, thresholds = roc_curve(labels, probabilities)
        roc_auc = auc(fpr, tpr)
        # Figure
        plt.figure()
        lw = 2
        plt.plot(fpr,
                 tpr,
                 color='darkorange',
                 lw=lw,
                 label='ROC curve (AUC = %0.2f)' % roc_auc)
        plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver operating characteristic example')
        plt.legend(loc="lower right")

    @staticmethod
    def f1(y_true: np.array, y_pred: np.array):
        """Computes F1-score metric. Code taken from: https://stackoverflow.com/a/45305384"""
        def compute_recall(y_true, y_pred):
            """Recall metric.

            Only computes a batch-wise average of recall.

            Computes the recall, a metric for multi-label classification of
            how many relevant items are selected.
            """
            true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
            possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
            recall_score = true_positives / (possible_positives + K.epsilon())
            return recall_score

        def compute_precision(y_true, y_pred):
            """Precision metric.

            Only computes a batch-wise average of precision.

            Computes the precision, a metric for multi-label classification of
            how many selected items are relevant.
            """
            true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
            predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
            precision_score = true_positives / (predicted_positives +
                                                K.epsilon())
            return precision_score

        precision = compute_precision(y_true, y_pred)
        recall = compute_recall(y_true, y_pred)
        return 2 * ((precision * recall) / (precision + recall + K.epsilon()))
예제 #31
0
                                                    test_size = 0.2, 
                                                    stratify=labels)


yTrain = np_utils.to_categorical(yTrain, 2)
yTest = np_utils.to_categorical(yTest, 2)

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD
model = Sequential()
# Dense(64) is a fully-connected layer with 64 hidden units.
# in the first layer, you must specify the expected input data shape:
# here, 20-dimensional vectors.
model.add(Dense(64, activation='relu', input_shape=(w,h,3)))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))

sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

model.fit(Xtrain, yTrain, 
          batch_size=w, nb_epoch=10, verbose=1)
score = model.evaluate(Xtest, yTest, batch_size=128)

예제 #32
0
파일: our.py 프로젝트: cuilimeng/CONAN
noise_dim = EMBEDDING_DIM * 2


pretrain(G, D, lstm_output, y_train, n_samples, noise_dim)

d_loss, g_loss = train(GAN, G, D, lstm_output, y_train, n_samples, noise_dim, verbose=True)

data_and_gen, _ = sample_data_and_gen(G, lstm_output, y_train, n_samples, noise_dim)

X_train = np.concatenate((lstm_output, data_and_gen))
new_y_train = []
for i in range(n_samples*2): new_y_train.append([0,1])
y_train = np.concatenate((y_train, np.array(new_y_train)))

model = Sequential()
model.add(Dense(2, activation='softmax', name = 'Dense_5'))
model.compile(loss=[focal_loss(gamma=2., alpha=.25)], optimizer='rmsprop', metrics=['acc'])

model.fit(X_train, y_train, epochs=5, batch_size=1024)
X_test = lstm_test_output

"""
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import random
random.seed(45)
index = random.sample(range(len(lstm_output)), 10000)
X = lstm_output[index,:]
X = np.concatenate((X, np.array(data_and_gen)[n_samples:,].astype(int)))
Y = y_train[index,1]
for i in range(n_samples): Y = np.concatenate((Y, [2]))
예제 #33
0
pickle.dump(hidden_rep, reps)
reps.close()

weights = np.array(((model.layers[1]).get_weights())[0])

new_file = open('weights.txt', 'wb')
pickle.dump(weights, new_file)
new_file.close()

#print data
val = model.predict(data)
#print emotion
#print np.max(hidden_rep), np.min(hidden_rep)
#print hidden_rep.shape
#print val.shape
#print data.shape
#print e
#imsave('original.jpg', np.reshape(data[0, :], (64, 64)))
#imsave('predicted.jpg', np.reshape(val[0, :], (64, 64)))
#print np.array(model.predict(data)).shape

#new = open('AN_10_rep.txt', 'rb')
#print pickle.load(new)
'''
model = Sequential()
model.add(Dense(30, input_dim = 4096, init = 'uniform'))
model.add(Activation('linear'))
model.add(Dense(4096, init = data[0]))
model.add(Activation('linear'))
'''
def get_model(name, X_train, y_train, embeddings, batch_size, nb_epoch, max_len, max_features, nb_classes):
    print('Building model', name)

    # get correct loss
    loss_function = 'categorical_crossentropy'

    if name == 'LSTM+ATT':
        # this is the placeholder tensor for the input sequences
        sequence = Input(shape=(max_len,), dtype='int32')
        # this embedding layer will transform the sequences of integers
        # into vectors of size 128
        embedded = Embedding(embeddings.shape[0], embeddings.shape[1], input_length=max_len, weights=[embeddings])(
            sequence)

        # 4 convolution layers (each 1000 filters)
        cnn = [Convolution1D(filter_length=filters, nb_filter=1000, border_mode='same') for filters in [2, 3, 5, 7]]

        # concatenate
        question = merge([cnn(embedded) for cnn in cnn], mode='concat')

        # create attention vector from max-pooled convoluted
        maxpool = Lambda(lambda x: keras_backend.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
        attention_vector = maxpool(question)

        forwards = AttentionLSTM(64, attention_vector)(embedded)
        backwards = AttentionLSTM(64, attention_vector, go_backwards=True)(embedded)

        # concatenate the outputs of the 2 LSTMs
        answer_rnn = merge([forwards, backwards], mode='concat', concat_axis=-1)

        after_dropout = Dropout(0.5)(answer_rnn)
        # we have 17 classes
        output = Dense(nb_classes, activation='softmax')(after_dropout)

        model = Model(input=sequence, output=output)

        # try using different optimizers and different optimizer configs
        model.compile('adam', loss_function, metrics=['accuracy'])
        # model.compile('adam', 'hinge', metrics=['hinge'])

        model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, validation_split=0.1, verbose=0)

        return model

    if name == 'LSTM':
        # this is the placeholder tensor for the input sequences
        sequence = Input(shape=(max_len,), dtype='int32')
        # this embedding layer will transform the sequences of integers
        # into vectors of size 128
        embedded = Embedding(embeddings.shape[0], embeddings.shape[1], input_length=max_len, weights=[embeddings])(
            sequence)

        # apply forwards and backward LSTM
        forwards = LSTM(64)(embedded)
        backwards = LSTM(64, go_backwards=True)(embedded)

        # concatenate the outputs of the 2 LSTMs
        answer_rnn = merge([forwards, backwards], mode='concat', concat_axis=-1)

        after_dropout = Dropout(0.5)(answer_rnn)
        # we have 17 classes
        output = Dense(nb_classes, activation='softmax')(after_dropout)

        model = Model(input=sequence, output=output)

        # try using different optimizers and different optimizer configs
        model.compile('adam', loss_function, metrics=['accuracy'])

        model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, validation_split=0.1, verbose=0)

        return model

    if name == 'MLP':
        model = Sequential()
        model.add(Dense(512, input_shape=(max_len,)))
        model.add(Activation('relu'))
        model.add(Dropout(0.5))
        model.add(Dense(nb_classes))
        model.add(Activation('softmax'))
        model.compile(loss=loss_function, optimizer='adam', metrics=['accuracy'])

        model.fit(X_train, y_train, nb_epoch=nb_epoch, batch_size=batch_size, validation_split=0.1, verbose=0)

        return model
예제 #35
0
#dim in BatchNormalization
#for other np multi dim array then specify an axis we mean to collape that axix while
#keeping all the other axies the same
#When we compute a BatchNormalization along an axis, we preserve the dimensions of the array,
#and we normalize with respect to the mean and standard deviation over every other axis
#meaning carry out normalisation within separate channels
X = BatchNormalization(axis = 3, name = bn_name_base + '2a')(X)

# ----------------------------------------------------------------------------------------
#Udacity
from keras.models import Sequential

# Create the Sequential model
model = Sequential()
model.add(Flatten(input_shape=(32, 32, 3)))
model.add(Dense(100))
model.add(Activation('relu'))
model.add(Conv2D(32,kernel_size = (3,3),activation = 'relu',input_shape = input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
#complie the model specify how to train the model
compile(optimizer, loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None)
#e.g.
model.compile('adam', 'categorical_crossentropy', ['accuracy'])


#batch_size was set to 32 by default
model.evaluate(x=None, y=None, batch_size=None, verbose=1, sample_weight=None, steps=None, callbacks=None)

class SeqModel:
    # 使用happynoom描述的网络模型
    def __init__(self,
                 input_shape=None,
                 learning_rate=0.001,
                 n_layers=2,
                 n_hidden=8,
                 rate_dropout=0.2,
                 loss=risk_estimation):
        self.input_shape = input_shape
        self.learning_rate = learning_rate
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.rate_dropout = rate_dropout
        self.loss = loss
        self.model = None

    def lstmModel(self):
        self.model = Sequential()
        self.model.add(GaussianNoise(stddev=0.01,
                                     input_shape=self.input_shape))
        for i in range(0, self.n_layers - 1):
            self.model.add(
                LSTM(self.n_hidden * 4,
                     return_sequences=True,
                     activation='softsign',
                     recurrent_activation='hard_sigmoid',
                     kernel_initializer='glorot_uniform',
                     recurrent_initializer='orthogonal',
                     bias_initializer=initializers.RandomNormal(mean=0.0,
                                                                stddev=0.05,
                                                                seed=None),
                     dropout=self.rate_dropout,
                     recurrent_dropout=self.rate_dropout))

        self.model.add(
            LSTM(self.n_hidden,
                 return_sequences=False,
                 activation='softsign',
                 recurrent_activation='hard_sigmoid',
                 kernel_initializer='glorot_uniform',
                 recurrent_initializer='orthogonal',
                 bias_initializer=initializers.RandomNormal(mean=0.0,
                                                            stddev=0.05,
                                                            seed=None),
                 dropout=self.rate_dropout,
                 recurrent_dropout=self.rate_dropout))
        self.model.add(Dense(256, activation='relu'))
        self.model.add(Dropout(self.rate_dropout))
        self.model.add(
            BatchNormalization(axis=-1,
                               beta_initializer=initializers.RandomNormal(
                                   mean=0.0, stddev=0.05, seed=None)))
        self.model.add(Dense(256, activation='relu'))
        self.model.add(Dropout(self.rate_dropout))
        self.model.add(
            BatchNormalization(axis=-1,
                               beta_initializer=initializers.RandomNormal(
                                   mean=0.0, stddev=0.05, seed=None)))
        self.model.add(Dense(output_size, activation='softmax'))
        opt = RMSprop(lr=self.learning_rate)
        self.model.compile(loss=risk_estimation_sum,
                           optimizer=opt,
                           metrics=['accuracy'])
        self.model.summary()
        return self.model

    def attention_3d_block(self, inputs):
        input_dim = int(inputs.shape[2])
        a = Permute((2, 1))(inputs)
        a = Reshape((input_dim, time_step))(a)
        a = Dense(time_step, activation='softmax')(a)
        # single_attention_vector
        # a = Lambda(lambda x: K.mean(x, axis=1), name='dim_reduction')(a)
        # a = RepeatVector(input_dim)(a)
        a_probs = Permute((2, 1), name='attention_vec')(a)
        output_attention_mul = Multiply()([inputs, a_probs])
        return output_attention_mul

    def lstmAttentionModel(self):
        K.clear_session()  # 清除之前的模型,省得压满内存
        inputs = Input(shape=(
            time_step,
            input_size,
        ))
        attention_mul = self.attention_3d_block(inputs)
        for i in range(0, self.n_layers - 1):
            attention_mul = LSTM(
                self.n_hidden * 4,
                return_sequences=True,
                activation='softsign',
                recurrent_activation='hard_sigmoid',
                kernel_initializer='glorot_uniform',
                recurrent_initializer='orthogonal',
                bias_initializer='zeros',
                dropout=self.rate_dropout,
                recurrent_dropout=self.rate_dropout)(attention_mul)
        attention_mul = LSTM(
            self.n_hidden,
            return_sequences=False,
            activation='softsign',
            recurrent_activation='hard_sigmoid',
            kernel_initializer='glorot_uniform',
            recurrent_initializer='orthogonal',
            bias_initializer='zeros',
            dropout=self.rate_dropout,
            recurrent_dropout=self.rate_dropout)(attention_mul)
        attention_mul = Dense(256,
                              kernel_initializer=initializers.glorot_uniform(),
                              activation='relu')(attention_mul)
        attention_mul = Dropout(self.rate_dropout)(attention_mul)
        attention_mul = BatchNormalization(
            axis=-1, beta_initializer='ones')(attention_mul)
        attention_mul = Dense(256,
                              kernel_initializer=initializers.glorot_uniform(),
                              activation='relu')(attention_mul)
        attention_mul = Dropout(self.rate_dropout)(attention_mul)
        attention_mul = BatchNormalization(
            axis=-1, beta_initializer='ones')(attention_mul)
        outputs = Dense(output_size, activation='softmax')(attention_mul)
        self.model = Model(input=[inputs], output=outputs)
        opt = RMSprop(lr=self.learning_rate)
        self.model.compile(loss=risk_estimation,
                           optimizer=opt,
                           metrics=['accuracy'])
        self.model.summary()
        return self.model

    def train(self):
        # fit network
        history = self.model.fit(train_x,
                                 train_y,
                                 epochs=2000,
                                 batch_size=2048,
                                 verbose=1,
                                 shuffle=True,
                                 validation_data=(test_x, test_y))
        # plot history
        plt.plot(history.history['loss'], label='train')
        plt.legend()
        plt.show()

    def save(self, path=model_path, type='evaluate', name=None):
        if name:
            self.model.save(path + name)
            return
        if type == 'evaluate':
            file = 'lstm_evaluate_' + timestamp + '.h5'
        else:
            file = 'lstm_' + timestamp + '.h5'
        self.model.save(path + file)
        return

    def load(self,
             path=model_path,
             type='evaluate',
             version='lastest',
             model_name=None):
        if model_name:
            self.model = load_model(path + model_name,
                                    custom_objects={
                                        'risk_estimation': risk_estimation,
                                        'risk_estimation_sum':
                                        risk_estimation_sum
                                    })
        else:
            file_names = os.listdir(path)
            model_files = []
            eval_files = []
            if version == 'lastest':
                for file in file_names:
                    if re.search('eval', file) is not None:
                        eval_files.append(file)
                    else:
                        model_files.append(file)
                if type == 'evaluate':
                    eval_files.sort(reverse=True)
                    model_name = eval_files[0]
                else:
                    model_files.sort(reverse=True)
                    model_name = model_files[0]
                print(model_name, 'has loaded')
                self.model = load_model(path + model_name,
                                        custom_objects={
                                            'risk_estimation':
                                            risk_estimation,
                                            'risk_estimation_sum':
                                            risk_estimation_sum
                                        })
            elif version == 'softmax':
                for file in file_names:
                    if re.search('softmax', file) is not None:
                        model_files.append(file)
                model_files.sort(reverse=True)
                model_name = model_files[0]
                self.model = load_model(path + model_name,
                                        custom_objects={
                                            'risk_estimation':
                                            risk_estimation,
                                            'risk_estimation_sum':
                                            risk_estimation_sum
                                        })
            else:
                self.model = load_model(path + version,
                                        custom_objects={
                                            'risk_estimation':
                                            risk_estimation,
                                            'risk_estimation_sum':
                                            risk_estimation_sum
                                        })

    def predict(self, test):
        predict = []
        for sample_index in range(test.shape[0]):
            test_data = test[sample_index].reshape(1, time_step, input_size)
            prev = self.model.predict(test_data)
            predict.append(prev)
        return np.array(predict)
예제 #37
0
for l in model.layers[:12]:
    lrs.append(l)

a = Input(shape = input_shape)

x = lrs[0](a)
for l in lrs[1:]: 
    x = l(x)

x = conv2d_bn_cpd(x, 512, 10, 6, 6, name="blabla")
x = Conv2D(num_classes, kernel_size=(1,1), activation=None, name="final_conv")(x)
x = Flatten()(x)
x = (Activation('softmax', name="final_softmax"))(x)
model = Model(inputs=a, outputs=x)

'''
model.add(Conv2D(num_classes, kernel_size=(1,1)))
model.add(Flatten())
model.add(Activation('softmax'))
''' 
# load model weights, if saved
# model.load_weights("weights.best.hdf5")
# print("loadad weights!")

model.summary()
for l in model.layers[:12]:
    l.trainable = False 
    print("Freezeing: " + l.name)

# initiate RMSprop optimizer
# opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)
예제 #38
0
def BuildGenerator(summary=True,
                   resnet=True,
                   bn_momentum=0.9,
                   bn_epsilon=0.00002,
                   name='Generator',
                   plot=False):
    if resnet:
        model_input = Input(shape=(128, ))
        h = Dense(4 * 4 * 256,
                  kernel_initializer='glorot_uniform')(model_input)
        h = Reshape((4, 4, 256))(h)
        resblock_1 = ResBlock(input_shape=(4, 4, 256),
                              sampling='up',
                              bn_epsilon=bn_epsilon,
                              bn_momentum=bn_momentum,
                              name='Generator_resblock_1')
        h = resblock_1(h)
        resblock_2 = ResBlock(input_shape=(8, 8, 256),
                              sampling='up',
                              bn_epsilon=bn_epsilon,
                              bn_momentum=bn_momentum,
                              name='Generator_resblock_2')
        h = resblock_2(h)
        resblock_3 = ResBlock(input_shape=(16, 16, 256),
                              sampling='up',
                              bn_epsilon=bn_epsilon,
                              bn_momentum=bn_momentum,
                              name='Generator_resblock_3')
        h = resblock_3(h)
        h = BatchNormalization(epsilon=bn_epsilon, momentum=bn_momentum)(h)
        h = Activation('relu')(h)
        model_output = Conv2D(3,
                              kernel_size=3,
                              strides=1,
                              padding='same',
                              activation='tanh')(h)
        model = Model(model_input, model_output, name=name)

    else:
        model = Sequential(name=name)
        model.add(
            Dense(4 * 4 * 512,
                  kernel_initializer='glorot_uniform',
                  input_dim=128))
        model.add(Reshape((4, 4, 512)))
        model.add(
            Conv2DTranspose(256,
                            kernel_size=4,
                            strides=2,
                            padding='same',
                            activation='relu',
                            kernel_initializer='glorot_uniform'))
        model.add(BatchNormalization(epsilon=bn_epsilon, momentum=bn_momentum))
        model.add(
            Conv2DTranspose(128,
                            kernel_size=4,
                            strides=2,
                            padding='same',
                            activation='relu',
                            kernel_initializer='glorot_uniform'))
        model.add(BatchNormalization(epsilon=bn_epsilon, momentum=bn_momentum))
        model.add(
            Conv2DTranspose(64,
                            kernel_size=4,
                            strides=2,
                            padding='same',
                            activation='relu',
                            kernel_initializer='glorot_uniform'))
        model.add(BatchNormalization(epsilon=bn_epsilon, momentum=bn_momentum))
        model.add(
            Conv2DTranspose(3,
                            kernel_size=3,
                            strides=1,
                            padding='same',
                            activation='tanh'))

    if plot:
        plot_model(model, name + '.png', show_layer_names=True)
    if summary:
        print("Generator")
        model.summary()
    return model
out_a = vision_model(digit_a)
out_b = vision_model(digit_b)

concatenated = keras.layers.concatenate([out_a, out_b])
out = Dense(1, activation='sigmod')(concatenated)

classification_model = Model([digit_a, digit_b], out)

""" visual question answer model """
from keras.layers import Conv2D, MaxPooling2D, Flatten
from keras.layers import Input, LSTM, Embedding, Dense
from keras.models import Model, Sequential

# define a vision model that encode out image to vector
vision_model = Sequential()
vision_model.add(Conv2D(64, (3,3), activation='relu', padding='same', input_shape=(224,224,3)))
vision_model.add(Conv2D(64, (3,3), activation='relu'))
vision_model.add(MaxPooling2D((2,2)))
vision_model.add(Conv2D(128, (3,3), activation='relu', padding='same'))
vision_model.add(Conv2D(128, (3,3), activation='relu'))
vision_model.add(MaxPooling2D((2,2)))
vision_model.add(Conv2D(256, (3,3), activation='relu', padding='same'))
vision_model.add(Conv2D(256, (3,3), activation='relu'))
vision_model.add(Conv2D(256, (3,3), activation='relu'))
vision_model.add(MaxPooling2D((2,2)))
vision_model.add(Flatten())

# let's get a tensor with the output of our vision model
image_input = Input(shape=(224,224,3))
encoded_image = vision_model(image_input)
예제 #40
-1
def test_nested_model_trainability():
    # a Sequential inside a Model
    inner_model = Sequential()
    inner_model.add(Dense(2, input_dim=1))

    x = Input(shape=(1,))
    y = inner_model(x)
    outer_model = Model(x, y)
    assert outer_model.trainable_weights == inner_model.trainable_weights
    inner_model.trainable = False
    assert outer_model.trainable_weights == []
    inner_model.trainable = True
    inner_model.layers[-1].trainable = False
    assert outer_model.trainable_weights == []

    # a Sequential inside a Sequential
    inner_model = Sequential()
    inner_model.add(Dense(2, input_dim=1))
    outer_model = Sequential()
    outer_model.add(inner_model)
    assert outer_model.trainable_weights == inner_model.trainable_weights
    inner_model.trainable = False
    assert outer_model.trainable_weights == []
    inner_model.trainable = True
    inner_model.layers[-1].trainable = False
    assert outer_model.trainable_weights == []

    # a Model inside a Model
    x = Input(shape=(1,))
    y = Dense(2)(x)
    inner_model = Model(x, y)
    x = Input(shape=(1,))
    y = inner_model(x)
    outer_model = Model(x, y)
    assert outer_model.trainable_weights == inner_model.trainable_weights
    inner_model.trainable = False
    assert outer_model.trainable_weights == []
    inner_model.trainable = True
    inner_model.layers[-1].trainable = False
    assert outer_model.trainable_weights == []

    # a Model inside a Sequential
    x = Input(shape=(1,))
    y = Dense(2)(x)
    inner_model = Model(x, y)
    outer_model = Sequential()
    outer_model.add(inner_model)
    assert outer_model.trainable_weights == inner_model.trainable_weights
    inner_model.trainable = False
    assert outer_model.trainable_weights == []
    inner_model.trainable = True
    inner_model.layers[-1].trainable = False
    assert outer_model.trainable_weights == []