Ejemplos de Highway en Python, ejemplos de keras.layers.Highway en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: K-fold-elmo.py Proyecto: tanhaishan/SemEval-2019-task3-EmoContext

def buildModel(embeddingMatrix):
    """Constructs the architecture of the modelEMOTICONS_TOKEN[list_str[index]]
    Input:
        embeddingMatrix : The embedding matrix to be loaded in the embedding layer.
    Output:
        model : A basic LSTM model
    """
    sequence = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='float32')
    embeddingLayer = Embedding(embeddingMatrix.shape[0],
                               EMBEDDING_DIM,
                               weights=[embeddingMatrix],
                               input_length=MAX_SEQUENCE_LENGTH,
                               trainable=False)(sequence)
    embedded = Highway()(embeddingLayer)
    # embedded = Dropout(0.25)(embedded)

    embedded = Bidirectional(
        LSTM(LSTM_DIM, dropout=DROPOUT, return_sequences=True))(embedded)
    enc = Bidirectional(LSTM(LSTM_DIM, dropout=DROPOUT))(embedded)

    fc1 = Dense(128, activation="relu")(enc)
    fc2_dropout = Dropout(0.25)(fc1)

    output = Dense(NUM_CLASSES, activation='sigmoid')(fc2_dropout)
    rmsprop = optimizers.rmsprop(lr=LEARNING_RATE)
    model = Model(inputs=sequence, outputs=output)

    model.compile(loss='categorical_crossentropy',
                  optimizer=rmsprop,
                  metrics=['acc'])
    return model

Ejemplo n.º 2

0

Mostrar archivo

def naics_hw_noproj_encoder(input_layer, n_layers=2):
    layers = [input_layer]
    for n in range(n_layers):
        previous_layer = layers[n]
        layer = Highway(activation='relu',
                        name='naics_hw%s' % n)(previous_layer)
        layers.append(layer)
    return layers[-1]

Ejemplo n.º 3

0

Mostrar archivo

Archivo: run_gan.py Proyecto: zhaohuiqiang/Adversarial-Network-for-Discourse-ACL2017

 def _enhance_D(self, c):
     # adding another enhancing layer for D
     if self.enhanceD == "LSTM":
         l = int(c.get_shape()[-1])
         c = Reshape((1,l))(c)
         c = LSTM(l)(c)
     elif self.enhanceD == "HW":
         c = Highway()(c)
     return c

Ejemplo n.º 4

0

Mostrar archivo

Archivo: deepmodels.py Proyecto: rachana88/reflexml-face

def blink_net(shape=(64, 64), nb_channels=1):

    logger.info('generating net with input shape ({})'.format(', '.join(
        str(s) for s in shape)))

    img_width, img_height = shape

    eye = Input(shape=(nb_channels, img_width, img_height))

    eye_model = Sequential()
    eye_model.add(
        Convolution2D(64,
                      3,
                      3,
                      border_mode='valid',
                      activation='relu',
                      input_shape=(nb_channels, img_width, img_height)))
    eye_model.add(Dropout(0.25))
    eye_model.add(
        Convolution2D(32, 3, 3, border_mode='valid', activation='relu'))
    eye_model.add(Dropout(0.25))
    eye_model.add(Flatten(input_shape=(nb_channels, img_width, img_height)))
    eye_model.add(Dense(1024, activation='relu'))
    eye_model.add(Dropout(0.25))
    eye_model.add(Highway(activation='relu'))

    eye_model.add(Dropout(0.25))
    eye_model.add(Highway(activation='relu'))

    eye_model.add(Dropout(0.25))
    eye_model.add(Highway(activation='relu'))

    eye_model.add(Dense(512, activation='relu'))
    eye_model.add(Dropout(0.2))

    eye_model.add(Dense(128, activation='relu'))
    eye_model.add(Dropout(0.2))

    eye_model.add(Dense(2, activation='softmax', name='pose'))

    logger.info('compiling with Adam and mse')
    eye_model.compile('adam', 'categorical_crossentropy', metrics=['acc'])

    return eye_model

Ejemplo n.º 5

0

Mostrar archivo

def lstm():
    model = kr.Sequential()
    model.add(BatchNormalization(input_shape=(1, 465)))
    model.add(
        LSTM(64,
             return_sequences=True,
             kernel_initializer='he_normal',
             use_bias=True,
             bias_initializer=kr.initializers.one(),
             unit_forget_bias=True,
             kernel_regularizer=kr.regularizers.l1_l2(0.001, 0.0001)))
    model.add(LeakyReLU())
    model.add(
        LSTM(64,
             return_sequences=False,
             go_backwards=True,
             kernel_initializer='he_normal'))
    model.add(Highway())
    model.add(GaussianDropout(0.5))
    model.add(LeakyReLU())
    model.add(BatchNormalization())
    model.add(Dense(32))
    model.add(LeakyReLU())
    model.add(BatchNormalization())
    model.add(Highway())
    model.add(Dense(64))
    model.add(LeakyReLU())
    model.add(BatchNormalization())
    model.add(Highway())
    model.add(Dense(128))
    model.add(LeakyReLU())
    model.add(BatchNormalization())
    model.add(Highway())
    model.add(Dense(256))
    model.add(LeakyReLU())
    model.add(BatchNormalization())
    model.add(Dense(1))
    sgd = kr.optimizers.sgd(lr=0.1,
                            momentum=0.1,
                            decay=0.001,
                            nesterov=True,
                            clipnorm=3)
    model.compile(loss='mape', optimizer=sgd, metrics=['mae', 'mse'])
    return model

Ejemplo n.º 6

0

Mostrar archivo

def build_network(concept_dic, embeddings_file, EMBEDDING_DIM=100, MAX_SENSE_LENGTH = 5, CONTEXT_WINDOW_SIZE = 5,
                  PRE_TRAINED=True, UPDATABLE=True,
                  dropout_rate=0.3,
                  hidden_activation="relu", highway_activation="sigmoid", output_activation="linear",
                  optimizer="adam", print_model=False):

    INPUTS = []
    LEFT_RIGHT_CENTER = []

    embedding_layer = create_embedding(concept_dic, embeddings_file,
                                       EMBEDDING_DIM, MAX_SENSE_LENGTH, PRE_TRAINED, UPDATABLE)

    for i in range(2 * CONTEXT_WINDOW_SIZE + 1):
        """Creating network's pipes one-by-one (from left to right)"""

        context_term_input = Input(shape=(MAX_SENSE_LENGTH,), dtype='int32')
        INPUTS.append(context_term_input)

        context_term_embedding = embedding_layer(context_term_input)

        pipe = MaxPooling1D(pool_size=MAX_SENSE_LENGTH)(context_term_embedding)
        pipe = Flatten()(pipe)
        LEFT_RIGHT_CENTER.append(pipe)

    left = Merge(mode='max')(LEFT_RIGHT_CENTER[0:CONTEXT_WINDOW_SIZE])
    left_dense = Dense(units=EMBEDDING_DIM, activation=hidden_activation)(left)
    left_dense_dropout = Dropout(dropout_rate)(left_dense)

    right = Merge(mode='max')(LEFT_RIGHT_CENTER[CONTEXT_WINDOW_SIZE:CONTEXT_WINDOW_SIZE * 2])
    right_dense = Dense(units=EMBEDDING_DIM, activation=hidden_activation)(right)
    right_dense_dropout = Dropout(dropout_rate)(right_dense)
    
    context = Merge(mode='max')([left_dense_dropout, right_dense_dropout])

    centre = LEFT_RIGHT_CENTER[-1]
    #centre_dense = Dense(units=EMBEDDING_DIM, activation=hidden_activation)(centre)
    #centre__dense_dropout = Dense(units=EMBEDDING_DIM, activation=hidden_activation)(centre_dense)
    
    merge_instance = Concatenate(axis=-1)([context, centre])
    merge_instance = Highway(activation=highway_activation)(merge_instance)
    # merge_instance = Dense(units=EMBEDDING_DIM * 2, activation=hidden_activation)(merge_instance)
    # merge_instance = Dropout(dropout_rate)(merge_instance)

    merge_instance = Dense(units=EMBEDDING_DIM, activation=hidden_activation)(merge_instance)
    merge_instance = Dropout(dropout_rate)(merge_instance)

    prediction = Dense(units=1, activation=output_activation)(merge_instance)

    model = Model(inputs=INPUTS, outputs=prediction)

    model.compile(loss='mean_squared_error', optimizer=optimizer)
    
    if print_model:
        print(model.summary())
        
    return model, embedding_layer

Ejemplo n.º 7

0

Mostrar archivo

Archivo: DeepModel.py Proyecto: dcalzad2illinois/deepball

    def _add_skip_layers(self, tensor_list, layer_count, activation="relu", regularization=None):
        for layer_index in range(layer_count):
            new_list = []
            for i, item in enumerate(tensor_list):
                item = TimeDistributed(
                    Highway(activation=activation, W_regularizer=regularization, b_regularizer=regularization))(item)
                new_list.append(item)
            tensor_list = new_list

        return tensor_list

Ejemplo n.º 8

0

Mostrar archivo

Archivo: in_scope_filter.py Proyecto: hyperji/ChemGo

    def default_model(self):
        product_ecfp4 = Input(shape=(16384, ))
        reaction_ecfp4 = Input(shape=(2048, ))
        product = Dense(activation='elu', units=1024)(product_ecfp4)
        reaction = Dense(activation='elu', units=1024)(reaction_ecfp4)
        product = Dropout(0.3)(product)

        product = Highway(activation='elu')(product)
        product = Highway(activation='elu')(product)
        product = Highway(activation='elu')(product)
        product = Highway(activation='elu')(product)
        product = Highway(activation='elu')(product)

        cosine_similarities = Dot(normalize=True, axes=-1)([product, reaction])

        Y = Activation('sigmoid')(cosine_similarities)

        model = Model(input=[product_ecfp4, reaction_ecfp4], output=Y)
        return model

Ejemplo n.º 9

0

Mostrar archivo

def get_fc_model():
    width = 40
    depth = 6
    model = Sequential()
    model.add(Dense(width, input_dim=3))
    model.add(PReLU())
    for d in range(depth):
        model.add(Highway())
        model.add(PReLU())
    model.add(Dense(3))
    model.compile(loss='mae', optimizer='rmsprop')
    return model

Ejemplo n.º 10

0

Mostrar archivo

    def default_model(self):
        product_ecfp4 = Input(shape=(self.n_feats, ))
        product = Dense(512, activation='elu')(product_ecfp4)
        product = Dropout(0.3)(product)

        product = Highway(activation='elu')(product)
        product = Dropout(rate=0.1)(product)
        product = Highway(activation='elu')(product)
        product = Dropout(rate=0.1)(product)
        product = Highway(activation='elu')(product)
        product = Dropout(rate=0.1)(product)
        product = Highway(activation='elu')(product)
        product = Dropout(rate=0.1)(product)
        product = Highway(activation='elu')(product)
        product = Dropout(rate=0.1)(product)

        product = Dense(self.n_classes, activation="relu")(product)

        Y = Activation('softmax')(product)

        model = Model(input=product_ecfp4, output=Y)
        return model

Ejemplo n.º 11

0

Mostrar archivo

def build_model(config):
    config = get_data(config)

    base_name = 'out'
    if config['hedge'] == True:
        outs = [''] * config['n_layers']
        out_name = [''] * config['n_layers']
        N = config['n_layers']
        for i in range(len(outs)):
            outs[i] = base_name + str(i)
            out_name[i] = base_name + str(i)
    else:
        outs = base_name
        out_name = [base_name]
        N = config['n_layers'] - 1
    in_name = 'in0'

    inputs = Input(config['input_size'], name=in_name)

    for j in range(N):
        if j == 0:
            layer = Dense(config['hidden_num'])(inputs)
            layer = Activation(config['activation'])(layer)

            if config['hedge'] == True:
                outs[j] = Dense(config['output_size'],
                                activation='softmax',
                                name=outs[j])(layer)
            continue
        if config['Highway'] == False:
            layer = Dense(config['hidden_num'])(layer)
            layer = Activation(config['activation'])(layer)
        else:
            layer = Highway(activation=config['activation'])(layer)

        if config['hedge'] == True:
            outs[j] = Dense(config['output_size'],
                            activation='softmax',
                            name=outs[j])(layer)
    if config['hedge'] == False:
        outs = Dense(config['output_size'], activation='softmax',
                     name=outs)(layer)
    model = Model(input=inputs, output=outs)

    return (model, in_name, out_name)

Ejemplo n.º 12

0

Mostrar archivo

Archivo: test.py Proyecto: phillip-kravtsov/cppn_research

def add_layer(model, width, activation="tanh"):
    model.add(Highway(bias=True))
    #model.add(Dense(bias=True, output_dim=width))
    model.add(Activation(activation))
    return model

Ejemplo n.º 13

0

Mostrar archivo

def naics_hw3_encoder(input_layer, output_dim):
    h1 = Highway(activation='relu')(input_layer)
    h2 = Highway(activation='relu')(h1)
    h3 = Highway(activation='relu')(h2)
    return Dense(output_dim, name='naics_linear_proj')(h3)

Ejemplo n.º 14

0

Mostrar archivo

def PPI_model_builder(EMBEDDING_DIM,
                      model_ind,
                      MAX_SEQUENCE_LENGTH,
                      WORD_EMBEDDINGS,
                      SUB_ONTOLOGY_work,
                      word_indeces,
                      ACTIVATION_HIDDEN,
                      ACTIVATION_HIGHWAY,
                      ACTIVATION_OUTPUT,
                      DROPOUT,
                      OPTIMIZER,
                      TRANSFER_LEARNING=False,
                      PRE_TRAINED=True,
                      UPDATABLE=True,
                      PRINT_deepSimDEF_SUMMARY=False):
    EMBEDDINGS = {}
    INPUTS = []
    DENSES = []
    CHANNELS = []
    CHANNELS2 = []

    Dense1_weights = []
    if TRANSFER_LEARNING:
        # load json and create model
        # json_file = open('model_repository/model_PPI_' + str(ind) + '.json', 'r')
        json_file = open('model_repository/model_0.json', 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        loaded_model = model_from_json(loaded_model_json)
        # load weights into new model
        # loaded_model.load_weights('model_repository/model_PPI_' + str(ind) + '.h5')
        loaded_model.load_weights('model_repository/model_0.h5')
        # Dense1_weights = loaded_model.get_layer('gene_product_dense').get_weights()
        print("Loaded model from disk")
        model = loaded_model
        model.compile(loss='binary_crossentropy', optimizer=OPTIMIZER, metrics=[fmeasure])
        return model, 0

    for i in range(2):
        for sbo in SUB_ONTOLOGY_work:

            protein_input = Input(shape=(MAX_SEQUENCE_LENGTH[sbo],), dtype='int32')
            INPUTS.append(protein_input)

            if sbo in EMBEDDINGS:
                embedding_layer = EMBEDDINGS[sbo]
            else:
                if PRE_TRAINED:
                    # with using pre-trained word embedings
                    file_reader = open(WORD_EMBEDDINGS[sbo])
                    word_embeddings = {}
                    for line in file_reader:
                        values = line.split()
                        word = values[0]
                        vector = np.asarray(values[1:], dtype='float32')
                        word_embeddings[word] = vector
                    file_reader.close()

                    print 'Loaded', len(word_embeddings), 'word vectors for', sbo, '(Model ' + str(model_ind + 1) + ')'

                    embedding_size = len(word_embeddings[np.random.choice(word_embeddings.keys())])
                    embedding_matrix = np.zeros((len(word_indeces[sbo]) + 1, embedding_size)) - 300.0
                    for word, i in word_indeces[sbo].items():
                        embedding_vector = word_embeddings.get(word)
                        if embedding_vector is not None:
                            # words not found in embedding index will be all-zeros.
                            embedding_matrix[i] = embedding_vector

                    embedding_layer = Embedding(input_dim=len(word_indeces[sbo]) + 1,
                                                output_dim=embedding_size,
                                                weights=[embedding_matrix],
                                                input_length=MAX_SEQUENCE_LENGTH[sbo],
                                                trainable=UPDATABLE)
                else:
                    # without using pre-trained word embedings
                    embedding_layer = Embedding(input_dim=len(word_indeces[sbo]) + 1,
                                                output_dim=EMBEDDING_DIM,
                                                input_length=MAX_SEQUENCE_LENGTH[sbo])

                EMBEDDINGS[sbo] = embedding_layer

            # protein_input = Input(shape=(MAX_SEQUENCE_LENGTH[sbo],), dtype='int32')
            # INPUTS.append(protein_input)

            GO_term = embedding_layer(protein_input)

            Ch = MaxPooling1D(pool_size=MAX_SEQUENCE_LENGTH[sbo])(GO_term)
            Ch = Flatten()(Ch)
            CHANNELS.append(Ch)

    num_pair = 2
    for i in range(num_pair):
        # for j in range(len(CHANNELS)/2):
        if len(SUB_ONTOLOGY_work) > 1:
            Mrg = Concatenate(axis=-1)(CHANNELS[i * len(SUB_ONTOLOGY_work):len(SUB_ONTOLOGY_work) * (i + 1)])
        else:
            Mrg = CHANNELS[i]

        if len(DENSES) == 1:
            Dns = DENSES[0]
        else:
            Dns = Dense(units=EMBEDDING_DIM * len(SUB_ONTOLOGY_work), activation=ACTIVATION_HIDDEN)
            # Dns = Dense(units = EMBEDDING_DIM * len(SUB_ONTOLOGY_work), activation = ACTIVATION_HIDDEN, name='gene_product_dense',weights=Dense1_weights, trainable=UPDATABLE)
            DENSES.append(Dns)
        Ch = Dns(Mrg)

        DrpOut = Dropout(DROPOUT)
        Ch = DrpOut(Ch)

        CHANNELS2.append(Ch)

    merge = Concatenate(axis=-1)(CHANNELS2)
    merge = Highway(activation=ACTIVATION_HIGHWAY, name="highway_layer")(merge)
    merge = Dropout(DROPOUT)(merge)

    merge = Dense(units=EMBEDDING_DIM * len(SUB_ONTOLOGY_work),
                  activation=ACTIVATION_HIDDEN)(merge)
    merge = Dropout(DROPOUT)(merge)

    preds = Dense(units=1, activation=ACTIVATION_OUTPUT)(merge)

    model = Model(inputs=INPUTS, outputs=preds)

    model.compile(loss='binary_crossentropy', optimizer=OPTIMIZER, metrics=[fmeasure])

    if PRINT_deepSimDEF_SUMMARY:
        print model.summary()

    print "Model for Fold Number", model_ind + 1, "Instantiated!!\n"

    return model, EMBEDDINGS

Ejemplo n.º 15

0

Mostrar archivo

def naics_lp_hw_encoder(input_layer, output_dim):
    l1 = Dense(output_dim, name='naics_linear_proj')(input_layer)
    h1 = Highway(activation='relu')(l1)
    return h1

Ejemplo n.º 16

0

Mostrar archivo

def naics_hw_only_encoder(input_layer):
    return Highway(activation='relu')(input_layer)

Ejemplo n.º 17

0

Mostrar archivo

def deep_neural_net_gru(train_data_1, train_data_2, train_labels, test_data_1,
                        test_data_2, test_labels, max_len, len_chars,
                        bidirectional, hidden_units, selfattention, maxpooling,
                        alignment, shortcut, multiplerlu, onlyconcat, n):
    early_stop = EarlyStopping(monitor='loss', patience=0, verbose=1)
    checkpointer = ModelCheckpoint(
        filepath="/home/amarinho/data-amarinho/checkpoint" + str(n) + ".hdf5",
        verbose=1,
        save_best_only=True)
    gru1 = GRU(hidden_units, consume_less='gpu', return_sequences=True)
    gru2 = GRU(hidden_units,
               consume_less='gpu',
               return_sequences=(alignment or selfattention or maxpooling))
    if bidirectional:
        gru1 = Bidirectional(gru1)
        gru2 = Bidirectional(gru2)
    # definition for left branch of the network
    left_branch = Sequential()
    left_branch.add(Masking(mask_value=0, input_shape=(max_len, len_chars)))
    if shortcut:
        left_branch_aux1 = Sequential()
        left_branch_aux1.add(left_branch)
        left_branch_aux1.add(gru1)
        left_branch_aux2 = Sequential()
        left_branch_aux2.add(
            Merge([left_branch, left_branch_aux1], mode='concat'))
        left_branch = left_branch_aux2
    else:
        left_branch.add(gru1)
    left_branch.add(Dropout(0.01))
    left_branch.add(gru2)
    left_branch.add(Dropout(0.01))
    # definition for right branch of the network
    right_branch = Sequential()
    right_branch.add(Masking(mask_value=0, input_shape=(max_len, len_chars)))
    if shortcut:
        right_branch_aux1 = Sequential()
        right_branch_aux1.add(right_branch)
        right_branch_aux1.add(gru1)
        right_branch_aux2 = Sequential()
        right_branch_aux2.add(
            Merge([right_branch, right_branch_aux1], mode='concat'))
        right_branch = right_branch_aux2
    else:
        right_branch.add(gru1)
    right_branch.add(Dropout(0.01))
    right_branch.add(gru2)
    right_branch.add(Dropout(0.01))
    # mechanisms used for building representations from the GRU states (e.g., through attention)
    if alignment:
        left_branch, right_branch = AlignmentAttention(left_branch,
                                                       right_branch)
    if selfattention:
        att = SelfAttLayer()
        left_branch.add(att)
        right_branch.add(att)
    elif maxpooling:
        left_branch.add(GlobalMaxPooling1DMasked())
        right_branch.add(GlobalMaxPooling1DMasked())
    elif alignment:
        gru3 = GRU(hidden_units, consume_less='gpu', return_sequences=False)
        if bidirectional: gru3 = Bidirectional(gru3)
        left_branch.add(gru3)
        right_branch.add(gru3)
    # combine the two representations and produce the final classification
    con_layer = Sequential(name="con_layer")
    con_layer.add(
        Merge([left_branch, right_branch], mode='concat', name="merge_con"))
    mul_layer = Sequential(name="mul_layer")
    mul_layer.add(
        Merge([left_branch, right_branch], mode='mul', name="merge_mul"))
    dif_layer = Sequential(name="dif_layer")
    dif_layer.add(
        Merge([left_branch, right_branch],
              mode=lambda x: x[0] - x[1],
              output_shape=lambda x: x[0],
              name="merge_dif"))
    final_model = Sequential(name="final_model")
    if onlyconcat: final_model.add(con_layer)
    else:
        final_model.add(
            Merge([con_layer, mul_layer, dif_layer],
                  mode='concat',
                  name="merge_threeconcat"))
    final_model.add(Dropout(0.01))
    final_model.add(Dense(hidden_units, activation='relu'))
    final_model.add(Dropout(0.01))
    if multiplerlu:
        final_model.add(Highway(activation='relu'))
        final_model.add(Dropout(0.01))
        final_model.add(Highway(activation='relu'))
        final_model.add(Dropout(0.01))
    final_model.add(Dense(1, activation='sigmoid'))
    print('Compiling...')
    final_model.compile(optimizer='adam',
                        loss='binary_crossentropy',
                        metrics=['accuracy'])
    print('Fitting...')
    final_model.fit([train_data_1, train_data_2],
                    train_labels,
                    verbose=0,
                    validation_data=([test_data_1, test_data_2], test_labels),
                    callbacks=[early_stop, checkpointer],
                    nb_epoch=20)
    start_time = time.time()
    print("Evaluating ...")
    aux = final_model.predict_classes([test_data_1, test_data_2]).ravel()
    return aux, (time.time() - start_time)

Ejemplo n.º 18

0

Mostrar archivo

Archivo: IPConv.py Proyecto: ChunyangDing/IPNN

def main(MODEL_FILE):

    print "Loading hdf5's..."
    test_dict = io.load('./data/test_dict_IPConv_ntuple_'+ RUN_NAME +'.h5')
    train_dict = io.load('./data/train_dict_IPConv_ntuple_'+ RUN_NAME +'.h5')
    
    X_train = train_dict['X']
    y_train = train_dict['y']    

    X_test = test_dict['X']
    y_test = test_dict['y']
    n_features = X_test.shape[2]

    # this is a df
    ip3d = test_dict['ip3d'] 

    print 'Building model...'
    
    if (MODEL_FILE == 'CRNN'):
        graph = build_graph(n_features)

        model = Sequential()

        model.add(graph)
        # remove Maxout for tensorflow
        model.add(MaxoutDense(64, 5, input_shape=graph.nodes['dropout'].output_shape[1:]))
        model.add(Dense(64))

    elif (MODEL_FILE == 'RNN'):

        model = Sequential()
        model.add(Masking(mask_value=-999, input_shape=(N_TRACKS, n_features)))
        model.add(GRU(25))#, input_shape=(N_TRACKS, n_features))) #GRU
        model.add(Dropout(0.2)) #0.2
    
        # remove Maxout for tensorflow
        model.add(MaxoutDense(64, 5))  #, input_shape=graph.nodes['dropout'].output_shape[1:]))
        model.add(Dense(64))

  
    model.add(Dropout(0.4))

    model.add(Highway(activation = 'relu'))

    model.add(Dropout(0.3))
    model.add(Dense(4))

    model.add(Activation('softmax'))

    print 'Compiling model...'
    model.compile('adam', 'categorical_crossentropy')
    model.summary()

    print 'Training:'
    try:
        model.fit(X_train, y_train, batch_size=512,
            callbacks = [
                EarlyStopping(verbose=True, patience=20, monitor='val_loss'),
                ModelCheckpoint(MODEL_FILE + RUN_NAME +'-progress', monitor='val_loss', verbose=True, save_best_only=True)
            ],
        nb_epoch=100, 
        validation_split = 0.2, 
        show_accuracy=True) 
        
    except KeyboardInterrupt:
        print 'Training ended early.'

    # -- load in best network
    model.load_weights(MODEL_FILE + RUN_NAME +'-progress')
    
    if (SAVE_PROTOBUF):
        print 'Saving protobuf'
        # write out to a new directory called models
        # the actual graph file is graph.pb
        # the graph def is in the global session
        import tensorflow as tf
        import keras.backend.tensorflow_backend as tfbe

        sess = tfbe._SESSION

        saver = tf.train.Saver()
        tf.train.write_graph(sess.graph_def, 'models/', 'graph.pb', as_text=False)    

        save_path = saver.save(sess, "./model-weights.ckpt")
        print "Model saved in file: %s" % save_path
        
        print saver.as_saver_def().filename_tensor_name
        print saver.as_saver_def().restore_op_name

        print model.get_output()

    print 'Saving weights...'
    model.save_weights('./weights/ip3d-replacement_' + MODEL_FILE + RUN_NAME +'.h5', overwrite=True)

    json_string = model.to_json()
    open(MODEL_FILE + RUN_NAME +'.json', 'w').write(json_string)

    print 'Testing...'
    yhat = model.predict(X_test, verbose = True, batch_size = 512) 
    io.save('yhat'+ RUN_NAME +'.h5', yhat) 
     
    print 'Plotting ROC...'
    fg = plot_ROC(y_test, yhat, ip3d, MODEL_FILE)
    #plt.show()
    fg.savefig('./plots/roc' + MODEL_FILE + RUN_NAME +'.pdf')

Ejemplo n.º 19

0

Mostrar archivo

Archivo: deepmodels.py Proyecto: rachana88/reflexml-face

def columbia_net(shape=(64, 64), nb_channels=1):

    logger.info('generating net with input shape ({})'.format(', '.join(
        str(s) for s in shape)))

    img_width, img_height = shape

    nb_poses = 5
    nb_vertical = 3
    nb_horiz = 7

    face = Input(shape=(nb_channels, img_width, img_height))
    left_eye = Input(shape=(nb_channels, img_width, img_height))
    right_eye = Input(shape=(nb_channels, img_width, img_height))

    face_model = Sequential()
    face_model.add(Flatten(input_shape=(nb_channels, img_width, img_height)))
    face_model.add(Dense(1024, activation='relu'))
    face_model.add(Dropout(0.25))
    face_model.add(Highway(activation='relu'))

    face_model.add(Dropout(0.25))
    face_model.add(Highway(activation='relu'))

    face_model.add(Dropout(0.25))
    face_model.add(Highway(activation='relu'))

    face_model.add(Dense(512, activation='relu'))

    face_h = face_model(face)

    eye_model = Sequential()
    eye_model.add(Flatten(input_shape=(nb_channels, img_width, img_height)))
    eye_model.add(Dense(1024, activation='relu'))
    eye_model.add(Dropout(0.25))
    eye_model.add(Highway(activation='relu'))

    eye_model.add(Dropout(0.25))
    eye_model.add(Highway(activation='relu'))

    eye_model.add(Dropout(0.25))
    eye_model.add(Highway(activation='relu'))

    eye_model.add(Dense(512, activation='relu'))

    # eye_model.add(Flatten())

    left_eye_h = eye_model(left_eye)
    right_eye_h = eye_model(right_eye)

    # combined = merge([face_h, left_eye_h, right_eye_h], mode='concat', concat_axis=1)
    eyes = merge([left_eye_h, right_eye_h], mode='sum')
    combined = merge([face_h, eyes], mode='concat', concat_axis=1)

    h = Dense(128)(combined)
    h = Activation('relu')(h)
    h = Dropout(0.2)(h)
    out_pose = Dense(nb_poses, activation='softmax', name='pose')(h)

    h = Dense(128)(combined)
    h = Activation('relu')(h)
    h = Dropout(0.2)(h)
    out_vertical = Dense(nb_vertical, activation='softmax', name='vertical')(h)

    h = Dense(128)(combined)
    h = Activation('relu')(h)
    h = Dropout(0.2)(h)
    out_horiz = Dense(nb_horiz, activation='softmax', name='horizontal')(h)

    model = Model(input=[face, left_eye, right_eye],
                  output=[out_pose, out_vertical, out_horiz])

    logger.info('compiling with Adam and mse')
    model.compile('adam',
                  3 * ['sparse_categorical_crossentropy'],
                  metrics=['acc'])

    return model

Ejemplo n.º 20

0

Mostrar archivo

def LSTMCNN(opt):
    # opt.seq_length = number of time steps (words) in each batch
    # opt.rnn_size = dimensionality of hidden layers
    # opt.num_layers = number of layers
    # opt.dropout = dropout probability
    # opt.word_vocab_size = num words in the vocab
    # opt.word_vec_size = dimensionality of word embeddings
    # opt.char_vocab_size = num chars in the character vocab
    # opt.char_vec_size = dimensionality of char embeddings
    # opt.feature_maps = table of feature map sizes for each kernel width
    # opt.kernels = table of kernel widths
    # opt.length = max length of a word
    # opt.use_words = 1 if use word embeddings, otherwise not
    # opt.use_chars = 1 if use char embeddings, otherwise not
    # opt.highway_layers = number of highway layers to use, if any
    # opt.batch_size = number of sequences in each batch

    if opt.use_words:
        word = Input(batch_shape=(opt.batch_size, opt.seq_length),
                     dtype='int32',
                     name='word')
        word_vecs = Embedding(opt.word_vocab_size,
                              opt.word_vec_size,
                              input_length=opt.seq_length)(word)

    if opt.use_chars:
        chars = Input(batch_shape=(opt.batch_size, opt.seq_length,
                                   opt.max_word_l),
                      dtype='int32',
                      name='chars')
        chars_embedding = TimeDistributed(
            Embedding(opt.char_vocab_size,
                      opt.char_vec_size,
                      name='chars_embedding'))(chars)
        cnn = CNN(opt.seq_length, opt.max_word_l, opt.char_vec_size,
                  opt.feature_maps, opt.kernels, chars_embedding)
        if opt.use_words:
            x = Merge(mode='concat')([cnn, word_vecs])
            inputs = [chars, word]
        else:
            x = cnn
            inputs = chars
    else:
        x = word_vecs
        inputs = word

    if opt.batch_norm:
        x = BatchNormalization()(x)

    for l in range(opt.highway_layers):
        x = TimeDistributed(Highway(activation='relu'))(x)

    for l in range(opt.num_layers):
        x = LSTM(opt.rnn_size,
                 activation='tanh',
                 inner_activation='sigmoid',
                 return_sequences=True,
                 stateful=True)(x)

        if opt.dropout > 0:
            x = Dropout(opt.dropout)(x)

    output = TimeDistributed(Dense(opt.word_vocab_size,
                                   activation='softmax'))(x)

    model = sModel(input=inputs, output=output)
    print model.summary()

    optimizer = sSGD(lr=opt.learning_rate,
                     clipnorm=opt.max_grad_norm,
                     scale=float(opt.seq_length))
    model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer)

    return model

Ejemplo n.º 21

0

Mostrar archivo

Archivo: correct_sent_model.py Proyecto: crachmanin/UtahSquad

model.add(merged)
# model.add(BatchNormalization())
# multiply passage by the dot product
# add softmax here

# model.add(Dropout(.5))
# model.add(Dense(100, activation='softmax'))
# model.add(MaxPooling1D(pool_length=4, stride=None, border_mode='valid'))
# model.add(Activation('relu'))
# model.add(Permute((2, 1)))
# model.add(AveragePooling1D(pool_length=5, stride=None, border_mode='valid'))
# model.add(MaxPooling1D(pool_length=MAX_QUESTION_LENGTH/5, stride=None, border_mode='valid'))
# model.add(Permute((2, 1)))
model.add(Flatten())
model.add(Dropout(.2))
model.add(Highway())  # looks like this kind of worked
model.add(Dropout(.2))
model.add(Dense(MAX_PASSAGE_LENGTH, activation='softmax'))

plot(model, to_file='model.png', show_shapes=True)

# train a 1D convnet with global maxpooling
# adam = Adam(lr=.0001, clipnorm=10)
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['acc', 'recall'])
# metrics=['recall'])

# happy learning!
# model.fit(x=[passages, questions], y=labels, nb_epoch=2, batch_size=128)
model.fit([p_train, q_train],

Ejemplo n.º 22

0

Mostrar archivo

(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000, img_channels, img_rows, img_cols)
X_test = X_test.reshape(10000, img_channels, img_rows, img_cols)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

inp_img = Input(shape=(
    img_channels,
    img_rows,
    img_cols,
))
x = Flatten()(inp_img)
x = Dense(32, activation='relu')(x)
for _ in range(nb_layer):
    x = Highway(activation='relu')(x)
y = Dense(nb_classes, activation='softmax')(x)

classifier = Model(input=inp_img, output=y)
classifier.compile(optimizer='rmsprop',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])
classifier.fit(X_train,
               Y_train,
               nb_epoch=100,
               batch_size=batch_size,
               verbose=1)

Ejemplo n.º 23

0

Mostrar archivo

Archivo: gdae_dsd.py Proyecto: wzhangNEU/mlsp2017_svsep_skipfilt

def build_GRU(dim, lsize, bsize, cost = 'mse'):
    def filt(args):
        return subsample(args[0]) * K.abs(subsample(args[1]))

    def subtract(args):
        return K.abs(args[0] - args[1])

    def myfun(args):
        return K.abs(subsample(args[0]))

    def rect(args):
        return K.abs(args[0])

    def flipsum(args):
        return args[0][:, ::-1, :] + args[1]

    def subsample(args):
        return args[:, 3:15, :]

    xin = Input(batch_shape = (bsize, lsize, dim))

    # Encoding Part using bi-directional GRUs
    rnnAF = GRU(dim, init='glorot_normal', activation = 'tanh',
        inner_activation='hard_sigmoid', W_regularizer=None,
               U_regularizer=None, stateful = False, return_sequences = True, consume_less = 'gpu') (xin)
    mr = merge([xin, rnnAF], mode = 'sum')

    rnnBF = GRU(dim, init='glorot_normal', activation = 'tanh',
        inner_activation='hard_sigmoid', W_regularizer=None,
               U_regularizer=None, go_backwards = True, return_sequences = True, consume_less = 'mem') (xin)
    mrB = merge([xin, rnnBF], mode = flipsum, output_shape = (lsize, dim))

    # The return of Bi-GRUs
    mrBDIR = merge([mr, mrB], mode = 'concat')

    # Decoding part
    rnnDsv = GRU(dim, init='glorot_normal', activation = 'tanh',
        inner_activation='hard_sigmoid', W_regularizer=None, U_regularizer=None,
                 stateful = False, return_sequences = True, consume_less = 'gpu') (mrBDIR)
    svout = merge([xin, rnnDsv], output_shape = (lsize/2, dim), mode = filt)

    # Post filtering with sparsity constraint
    hC = TimeDistributed(Highway(input_dim = dim, activation='relu', activity_regularizer=activity_l2(1e-4))) (svout)

    model = Model(xin, [svout, hC, rnnDsv])

    # Cost Functions
    def mseloss(ytrue, ypred):
        # Update of the true
        ytrue = xin * (K.pow(ytrue, 1.) + K.epsilon())/(K.pow(xin, 1.) + K.epsilon())

        return K.sum(K.pow(ytrue-ypred,2.), axis = -1)

    def KL(ytrue, ypred):
        # Update of the true
        ytrue = subsample(xin) * (K.pow(ytrue, 1.) + K.epsilon())/(K.pow(subsample(xin), 1.) + K.epsilon()) + 1e-6
        ypred += 1e-6

        return K.sum(ytrue * (K.log(ytrue) - K.log(ypred)) + (ypred - ytrue), axis=-1)


    def KLbkg(ytrue, ypred):
        # Update of the true
        ytrue = subsample(xin) * K.abs(1. - ((K.pow(ytrue, 1.) + K.epsilon())/(K.pow(subsample(xin), 1.) + K.epsilon()))) + 1e-6
        ypred += 1e-6

        return K.sum(ytrue * (K.log(ytrue) - K.log(ypred)) + (ypred - ytrue), axis=-1)


    if cost == 'mse':
        print('MSE')
        model.compile(optimizer = opt, loss = [mseloss, mseloss])
    elif cost == 'kl':
        print('Kullback-Leibler')
        model.compile(optimizer = opt, loss = [KL, KL])
    elif cost == 'klbkg':
        print('Kullback-Leibler for Accompaniment Instrument')
        model.compile(optimizer = opt, loss = [KLbkg, KLbkg])

    return model

Ejemplo n.º 24

0

Mostrar archivo

model_left.add(Dense(5, input_dim=4, init='glorot_uniform'))
model_left.add(BatchNormalization(mode=2))
model_left.add(Activation('relu'))
model_left.add(Dense(5))
model_left.add(BatchNormalization(mode=2))
model_left.add(Activation('relu'))
model_left.add(Dense(3))
model_left.add(Activation('relu'))
model_left.add(Dense(4))

for i in range(0, 6):
    print(i, model_left.layers[i].name)

model_right = Sequential()
model_right.add(Dense(4, input_shape=(4, )))
model_right.add(Highway())
model_right.add(BatchNormalization(mode=2))

model2 = Sequential()
model2.add(Merge([model_left, model_right], mode='concat'))
model2.add(Activation('relu'))
model2.add(Reshape((8, )))
model2.add(Dense(5))
model2.add(BatchNormalization(mode=2))
model2.add(Activation('relu'))
model2.add(Dense(3))
model2.add(Activation('relu'))
model2.add(Dense(4))

for i in range(0, 6):
    print(i, model2.layers[i].name)

Ejemplo n.º 25

0

Mostrar archivo

Archivo: dnn_keras.py Proyecto: danilosoba/merlin

    def __init__(self,
                 n_in,
                 hidden_layer_size,
                 n_out,
                 L1_reg,
                 L2_reg,
                 hidden_layer_type,
                 output_type='LINEAR',
                 dropout_rate=0.0):

        logger = logging.getLogger("DNN initialization")

        self.n_in = int(n_in)
        self.n_out = int(n_out)
        self.n_layers = len(hidden_layer_size)
        self.dropout_rate = dropout_rate
        self.L1_reg = L1_reg
        self.L2_reg = L2_reg
        self.optimizer = 'adam'
        # fix random seed for reproducibility
        seed = 123
        np.random.seed(seed=seed)
        # Model must have atleast one hidden layer
        assert self.n_layers > 0, 'Model must have at least one hidden layer'
        # Number of hidden layers and their types should be equal
        assert len(hidden_layer_size) == len(hidden_layer_type)
        ### Create model graph ###
        self.model = Sequential()
        self.model.add(
            Dense(output_dim=128,
                  input_dim=n_in,
                  init='glorot_uniform',
                  activation='relu',
                  W_regularizer=l1l2(l1=self.L1_reg, l2=self.L2_reg)))
        self.model.add(Dropout(self.dropout_rate))
        num_layers = 15
        for i in xrange(num_layers):
            self.model.add(Highway(activation='relu'))
            self.model.add(Dropout(self.dropout_rate))
        #self.model.add(Dropout(dropout))

        # add output layer
        if output_type.lower() == 'linear':
            self.final_layer = self.model.add(
                Dense(output_dim=n_out,
                      input_dim=hidden_layer_size[-1],
                      init='glorot_uniform',
                      activation='linear',
                      W_regularizer=l1l2(l1=self.L1_reg, l2=self.L2_reg)))
        elif output_type.lower() == 'sigmoid':
            self.final_layer = self.model.add(
                Dense(output_dim=n_out,
                      input_dim=hidden_layer_size[-1],
                      init='glorot_uniform',
                      activation='sigmoid',
                      W_regularizer=l1l2(l1=self.L1_reg, l2=self.L2_reg)))
        else:
            logger.critical(
                "This output activation function: %s is not supported right now!"
                % (output_type))
            sys.exit(1)

        # Compile the model
        self.model.compile(loss='mse', optimizer=self.optimizer)

Ejemplo n.º 26

0

Mostrar archivo

Archivo: neural_networks_iris_highway.py Proyecto: watchful-cluckets/ruebens-examples

data=pd.DataFrame(np.concatenate((X_train2,Y_train),axis=1))
data2=shuffle(data)

X_train2=np.array(data2.ix[:,0:3])
Y_train=np.array(pd.get_dummies(data2.ix[:,4]))

sgd = SGD(lr=learning_rate,momentum=momentum, decay=decay_rate, nesterov=False)

np.var(X_train2.T)

model = Sequential()
model.add(Dense(7, input_dim=4, init='glorot_uniform'))
model.add(Activation('relu'))
model.add(Dense(7, init='glorot_uniform'))
model.add(Highway())
model.add(Dense(3, init='glorot_uniform'))
model.add(Activation('sigmoid'))
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,metrics=['accuracy'])


model.fit(X_train2, Y_train, 
           batch_size = 30, nb_epoch = 1000, verbose = 1,validation_split=0.9)

res22 = model.predict_classes([X_train2,X_train2,X_train2],batch_size = 30)
acc22=((res22-data2.ix[:,4])==0).sum()/len(res22)
acc22

Ejemplo n.º 27

0

Mostrar archivo

# Note, there is a bug in original implementation of TimeDistributed in keras
input_tensor = tf.placeholder(tf.int32, shape=(
    opts.batch_size, opts.sequence_length))
a = Input(batch_shape=(opts.batch_size, opts.sequence_length),
          tensor=input_tensor, name='input')
e = Embedding(opts.vocabulary_size,
              opts.embedding_size,
              input_length=opts.sequence_length, name='embedding')(a)
x = e
for i in range(1, opts.interaction_times + 1):
    x = PairwiseInteraction(gate_type=opts.gate_type,
                            activation=opts.activation_type,
                            dropout=opts.dropout,
                            name='interaction layer %d' % i)(x)
    if opts.highway:
        x = TimeDistributed(Highway())(x)
    x = KMaxTensorPooling(opts.sequence_length,
                          name='kmaxpooling layer %d' % i)(x)
x = Lambda(lambda t: tf.reshape(
    t, [-1, opts.sequence_length * opts.embedding_size]))(x)
e = Lambda(lambda t: tf.reshape(
    t, [-1, opts.sequence_length * opts.embedding_size]))(e)
x = merge([x, e], mode='concat', concat_axis=1)
x = Dense(128, activation='sigmoid')(x)
x = Dense(32, activation='sigmoid')(x)
x = Dense(1, activation='sigmoid', name='prob')(x)

model = Model(input=a, output=x)
model.compile(optimizer='nadam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

Ejemplo n.º 28

0

Mostrar archivo

    def _generate_model(self, lembedding, num_classes=2, ngrams=[1,2,3,4,5],
                        nfilters=64, rnn_type=GRU, rnn_dim=80, train_vectors=True):

        CHARACTERS_PER_WORD = lembedding.size_level1
        WORDS_PER_DOCUMENT = lembedding.size_level2
        EMBEDDING_DIM = lembedding.vector_box.vector_dim

        INPUT_SHAPE = (CHARACTERS_PER_WORD * WORDS_PER_DOCUMENT, )
        EMBEDDING_SHAPE = (WORDS_PER_DOCUMENT, CHARACTERS_PER_WORD, EMBEDDING_DIM)

        doc = Input(shape=(INPUT_SHAPE[0], ), dtype='int32')

        embedded = Sequential([
                Embedding(
                    input_dim=lembedding.vector_box.size, 
                    output_dim=EMBEDDING_DIM, 
                    input_length=INPUT_SHAPE[0]
                    ), 
                Reshape(EMBEDDING_SHAPE)
            ])(doc)

        def sub_model(n):
            return Sequential([
                    Convolution1D(nfilters, n, 
                        activation='relu', 
                        input_shape=EMBEDDING_SHAPE[1:]
                        ), 
                    Lambda(
                        lambda x: K.max(x, axis=1), 
                        output_shape=(nfilters,)
                        )
                ])

        rep = Dropout(0.5)(
            merge(
                [TimeDistributed(sub_model(n))(embedded) for n in ngrams], 
                mode='concat', 
                concat_axis=-1
            )
        )

        out = Dropout(0.5)(
            merge(
                [rnn_type(rnn_dim)(rep), rnn_type(rnn_dim, go_backwards=True)(rep)], 
                mode='concat', 
                concat_axis=-1
            )
        )

        mapping = [
                Highway(activation='relu'),
                Dropout(0.5),
                Dense(64, activation='relu'),
                Dropout(0.4)
            ]

        for f in mapping:
            out = f(out)

        if num_classes == 2:
            out = Dense(1, activation='sigmoid')(out)
            model = Model(input=doc, output=out)
            if self.optimizer is None:
                self.optimizer = 'rmsprop'
            model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])
        else:
            out = Dense(num_classes, activation='softmax')(out)
            model = Model(input=doc, output=out)
            if self.optimizer is None:
                self.optimizer = 'adam'
            model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=["accuracy"])
        
        return model

Ejemplo n.º 29

0

Mostrar archivo

    return (x-np.min(x))/(np.max(x)-np.min(x))

part=8
thre=1
## Certo é 256
recog=Sequential()
recog.add(Dense(64,activation='relu',input_shape=(784,),init='glorot_uniform'))

recog_left=recog
recog_left.add(Dense(64,input_shape=(64,),activation='relu'))

recog_right=recog
recog_right.add(Dense(64,input_shape=(64,),activation='relu'))
recog_right.add(Lambda(lambda x: x + K.exp(x / 2) * K.random_normal(shape=(1, 64), mean=0.,
                              std=epsilon_std), output_shape=(64,)))
recog_right.add(Highway())
recog_right.add(Activation('sigmoid'))

recog1=Sequential()
recog1.add(Merge([recog_left,recog_right],mode = 'ave'))
recog1.add(Dense(784))

#### HERE***
recog11=Sequential()
layer=Dense(64,init='glorot_uniform',input_shape=(784,))
layer.trainable=False
recog11.add(layer)
layer2=Dense(784, activation='sigmoid',init='glorot_uniform')
layer2.trainable=False
recog11.add(layer2)
recog11.layers[0].W.set_value(np.ones((784,64)).astype(np.float32))

Ejemplo n.º 30

0

Mostrar archivo

Archivo: IPConvbidir.py Proyecto: benedikt-voelkel/IPNN

def main(MODEL_FILE):

    test_dict = io.load('./data/test_dict_IPConv.h5')
    train_dict = io.load('./data/train_dict_IPConv.h5')

    X_train = train_dict['X']
    y_train = train_dict['y']
    n_features = X_train.shape[2]

    X_test = test_dict['X']
    y_test = test_dict['y']
    ip3d = test_dict['ip3d']  # this is a df

    print 'Building model...'

    if (MODEL_FILE == 'CRNN'):
        graph = build_graph(n_features)

        model = Sequential()

        model.add(graph)

        model.add(Dense(64))

    elif (MODEL_FILE == 'RNN'):

        graph = build_graph_noCNN(n_features)

        model = Sequential()
        model.add(graph)

        model.add(Dense(64))

    model.add(Dropout(0.4))

    model.add(Highway(activation='relu'))

    model.add(Dropout(0.4))  #3
    model.add(Dense(4))

    model.add(Activation('softmax'))

    print 'Compiling model...'
    model.compile('adam', 'categorical_crossentropy')
    model.summary()

    print 'Training:'
    try:
        model.fit(X_train,
                  y_train,
                  batch_size=512,
                  callbacks=[
                      EarlyStopping(verbose=True,
                                    patience=20,
                                    monitor='val_loss'),
                      ModelCheckpoint(MODEL_FILE + '-progress',
                                      monitor='val_loss',
                                      verbose=True,
                                      save_best_only=True)
                  ],
                  nb_epoch=200,
                  validation_split=0.2,
                  show_accuracy=True)

    except KeyboardInterrupt:
        print 'Training ended early.'

    # -- load in best network
    model.load_weights(MODEL_FILE + '-progress')

    print 'Saving weights...'
    model.save_weights('./weights/ip3d-replacement_' + MODEL_FILE + '.h5',
                       overwrite=True)

    print 'Testing...'
    yhat = model.predict(X_test, verbose=True, batch_size=512)

    print 'Plotting ROC...'
    fg = plot_ROC(y_test, yhat, ip3d, MODEL_FILE)
    #plt.show()
    fg.savefig('./plots/roc_' + MODEL_FILE + '.pdf')