def run(gParameters):

    fpath = fetch_data(gParameters)
    # Get default parameters for initialization and optimizer functions
    kerasDefaults = candle.keras_default_config()

    learning_rate = gParameters['learning_rate']
    batch_size = gParameters['batch_size']
    epochs = gParameters['epochs']
    dropout = gParameters['dropout']
    optimizer = gParameters['optimizer']
    wv_len = gParameters['wv_len']
    filter_sizes = gParameters['filter_sizes']
    filter_sets = gParameters['filter_sets']
    num_filters = gParameters['num_filters']
    emb_l2 = gParameters['emb_l2']
    w_l2 = gParameters['w_l2']

    train_x = np.load(fpath + '/train_X.npy')
    train_y = np.load(fpath + '/train_Y.npy')
    test_x = np.load(fpath + '/test_X.npy')
    test_y = np.load(fpath + '/test_Y.npy')

    for task in range(len(train_y[0, :])):
        cat = np.unique(train_y[:, task])
        train_y[:, task] = [np.where(cat == x)[0][0] for x in train_y[:, task]]
        test_y[:, task] = [np.where(cat == x)[0][0] for x in test_y[:, task]]

    run_filter_sizes = []
    run_num_filters = []

    for k in range(filter_sets):
        run_filter_sizes.append(filter_sizes + k)
        run_num_filters.append(num_filters)

    ret = run_cnn(gParameters,
                  train_x,
                  train_y,
                  test_x,
                  test_y,
                  learning_rate=learning_rate,
                  batch_size=batch_size,
                  epochs=epochs,
                  dropout=dropout,
                  optimizer=optimizer,
                  wv_len=wv_len,
                  filter_sizes=run_filter_sizes,
                  num_filters=run_num_filters,
                  emb_l2=emb_l2,
                  w_l2=w_l2)

    return ret
def run(gParameters):

    # Construct extension to save model
    ext = p1b2.extension_from_parameters(gParameters, '.keras')
    candle.verify_path(gParameters['save_path'])
    prefix = '{}{}'.format(gParameters['save_path'], ext)
    logfile = gParameters['logfile'] if gParameters[
        'logfile'] else prefix + '.log'
    #candle.set_up_logger(logfile, p1b2.logger, gParameters['verbose'])
    #p1b2.logger.info('Params: {}'.format(gParameters))

    # Get default parameters for initialization and optimizer functions
    kerasDefaults = candle.keras_default_config()
    seed = gParameters['rng_seed']

    # Load dataset
    (X_train, y_train), (X_test, y_test) = p1b2.load_data2(gParameters, seed)

    print("Shape X_test: ", X_test.shape)
    print("Shape y_test: ", y_test.shape)

    print("Range X_test --> Min: ", np.min(X_test), ", max: ", np.max(X_test))
    print("Range y_test --> Min: ", np.min(y_test), ", max: ", np.max(y_test))

    # Define optimizer
    optimizer = candle.build_optimizer(gParameters['optimizer'],
                                       gParameters['learning_rate'],
                                       kerasDefaults)

    # load json and create model
    json_file = open('p1b2.model.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model_json = model_from_json(loaded_model_json)

    # load weights into new model
    loaded_model_json.load_weights('p1b2.model.h5')
    print("Loaded model from disk")

    # evaluate loaded model on test data
    loaded_model_json.compile(loss=gParameters['loss'],
                              optimizer=optimizer,
                              metrics=['accuracy'])
    y_pred = loaded_model_json.predict(X_test)
    scores = p1b2.evaluate_accuracy_one_hot(y_pred, y_test)
    print('Evaluation on test data:', scores)
def run(gParameters):

    #print( gParameters )

    fpath = fetch_data(gParameters)
    # Get default parameters for initialization and optimizer functions
    kerasDefaults = candle.keras_default_config()

    learning_rate = gParameters[ 'learning_rate' ]
    batch_size = gParameters[ 'batch_size' ]
    epochs = gParameters[ 'epochs' ]
    dropout = gParameters[ 'dropout' ]


    optimizer = gParameters[ 'optimizer' ]
    if optimizer == 0:
        optimizer = 'adam'
    elif optimizer == 1:
        optimizer = 'adadelta'
    elif optimizer == 2:
        optimizer = 'sgd'
    elif optimizer == 3:
        optimizer = 'rmsprop'


    wv_len = gParameters[ 'wv_len' ]
    attention_size = gParameters[ 'attention_size' ]

    
    train_x = np.load( fpath + '/train_X.npy' )
    train_y = np.load( fpath + '/train_Y.npy' )
    test_x = np.load( fpath + '/test_X.npy' )
    test_y = np.load( fpath + '/test_Y.npy' )
    

    
    num_classes = []
    for task in range( len( train_y[ 0, : ] ) ):
        cat = np.unique( train_y[ :, task ] )
        num_classes.append( len( cat ) )
        train_y[ :, task ] = [ np.where( cat == x )[ 0 ][ 0 ] for x in train_y[ :, task ] ]
        test_y[ :, task ] = [ np.where( cat == x )[ 0 ][ 0 ] for x in test_y[ :, task ] ]
    num_tasks = len( num_classes )
    

    max_vocab = np.max( train_x )
    max_vocab2 = np.max( test_x )
    if max_vocab2 > max_vocab:
        max_vocab = max_vocab2

    vocab_size = max_vocab + 1

    vocab = np.random.rand( vocab_size, wv_len )

    train_samples = train_x.shape[ 0 ]
    test_samples = test_x.shape[ 0 ]

    max_lines = 50
    max_words = 30

    train_x = train_x.reshape( ( train_x.shape[ 0 ], max_lines, max_words ) )
    test_x = test_x.reshape( ( test_x.shape[ 0 ], max_lines, max_words ) )


    #optional masking
    min_lines = 30
    min_words = 5
    mask = []
    for i in range(train_samples+test_samples):
        doc_mask = np.ones((1,max_lines,max_words))
        num_lines = np.random.randint(min_lines,max_lines)
        for j in range(num_lines):
            num_words = np.random.randint(min_words,max_words)
            doc_mask[0,j,:num_words] = 0
        mask.append(doc_mask)

    mask = np.concatenate(mask,0)

    # train model
    model = hcan( vocab, num_classes, max_lines, max_words, 
                  attention_size= attention_size,
                  dropout_rate = dropout,
                  lr = learning_rate,
                  optimizer= optimizer 
            )

    ret = model.train(
        train_x, 
        [ 
            np.array( train_y[ :, 0 ] ), 
            np.array( train_y[ :, 1 ] ), 
            np.array( train_y[ :, 2 ] ), 
            np.array( train_y[ :, 3 ] )
        ], 
        batch_size= batch_size, epochs= epochs,
        validation_data= [ 
        test_x, 
        [ 
            np.array( test_y[ :, 0 ] ), 
            np.array( test_y[ :, 1 ] ), 
            np.array( test_y[ :, 2 ] ), 
            np.array( test_y[ :, 3 ] )
        ] 
        #[ np.array( test_y[ :, 0 ] ), np.array( test_y[ :, 1 ] ), np.array( test_y[ :, 2 ] ), np.array( test_y[ :, 3 ] ) ]
        ]
        )

    
    return ret
def run(params):

    args = candle.ArgumentStruct(**params)
    seed = args.rng_seed
    candle.set_seed(seed)

    # # Construct extension to save model
    # ext = p1b1.extension_from_parameters(params, '.keras')
    # candle.verify_path(params['save_path'])
    # prefix = '{}{}'.format(params['save_path'], ext)
    # logfile = params['logfile'] if params['logfile'] else prefix+'.log'
    # candle.set_up_logger(logfile, p1b1.logger, params['verbose'])
    #p1b1.logger.info('Params: {}'.format(params))

    # Get default parameters for initialization and optimizer functions
    keras_defaults = candle.keras_default_config()

    # Load dataset
    x_train, y_train, x_val, y_val, x_test, y_test, x_labels, y_labels = p1b1.load_data(
        params, seed)

    # cache_file = 'data_l1000_cache.h5'
    # save_cache(cache_file, x_train, y_train, x_val, y_val, x_test, y_test, x_labels, y_labels)
    # x_train, y_train, x_val, y_val, x_test, y_test, x_labels, y_labels = load_cache(cache_file)

    # p1b1.logger.info("Shape x_train: {}".format(x_train.shape))
    # p1b1.logger.info("Shape x_val:   {}".format(x_val.shape))
    #p1b1.logger.info("Shape x_test:  {}".format(x_test.shape))

    # p1b1.logger.info("Range x_train: [{:.3g}, {:.3g}]".format(np.min(x_train), np.max(x_train)))
    # p1b1.logger.info("Range x_val:   [{:.3g}, {:.3g}]".format(np.min(x_val), np.max(x_val)))
    #p1b1.logger.info("Range x_test:  [{:.3g}, {:.3g}]".format(np.min(x_test), np.max(x_test)))

    # p1b1.logger.debug('Class labels')
    # for i, label in enumerate(y_labels):
    #     p1b1.logger.debug('  {}: {}'.format(i, label))

    # clf = build_type_classifier(x_train, y_train, x_val, y_val)

    n_classes = len(y_labels)
    cond_train = y_train
    cond_val = y_val
    cond_test = y_test

    input_dim = x_train.shape[1]
    cond_dim = cond_train.shape[1]
    latent_dim = params['latent_dim']

    activation = params['activation']
    dropout = params['dropout']
    dense_layers = params['dense']
    dropout_layer = AlphaDropout if params['alpha_dropout'] else Dropout

    # Initialize weights and learning rule
    initializer_weights = candle.build_initializer(params['initialization'],
                                                   keras_defaults, seed)
    initializer_bias = candle.build_initializer('constant', keras_defaults, 0.)

    if dense_layers is not None:
        if type(dense_layers) != list:
            dense_layers = list(dense_layers)
    else:
        dense_layers = []

    # Encoder Part
    x_input = Input(shape=(input_dim, ))
    cond_input = Input(shape=(cond_dim, ))
    h = x_input
    if params['model'] == 'cvae':
        h = keras.layers.concatenate([x_input, cond_input])

    for i, layer in enumerate(dense_layers):
        if layer > 0:
            x = h
            h = Dense(layer,
                      activation=activation,
                      kernel_initializer=initializer_weights,
                      bias_initializer=initializer_bias)(h)
            if params['residual']:
                try:
                    h = keras.layers.add([h, x])
                except ValueError:
                    pass
            if params['batch_normalization']:
                h = BatchNormalization()(h)
            if dropout > 0:
                h = dropout_layer(dropout)(h)

    if params['model'] == 'ae':
        encoded = Dense(latent_dim,
                        activation=activation,
                        kernel_initializer=initializer_weights,
                        bias_initializer=initializer_bias)(h)
    else:
        epsilon_std = params['epsilon_std']
        z_mean = Dense(latent_dim, name='z_mean')(h)
        z_log_var = Dense(latent_dim, name='z_log_var')(h)
        encoded = z_mean

        def vae_loss(x, x_decoded_mean):
            xent_loss = binary_crossentropy(x, x_decoded_mean)
            kl_loss = -0.5 * K.sum(
                1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
            return K.mean(xent_loss + kl_loss / input_dim)

        def sampling(params):
            z_mean_, z_log_var_ = params
            batch_size = K.shape(z_mean_)[0]
            epsilon = K.random_normal(shape=(batch_size, latent_dim),
                                      mean=0.,
                                      stddev=epsilon_std)
            return z_mean_ + K.exp(z_log_var_ / 2) * epsilon

        z = Lambda(sampling, output_shape=(latent_dim, ))([z_mean, z_log_var])
        if params['model'] == 'cvae':
            z_cond = keras.layers.concatenate([z, cond_input])

    # Build autoencoder model
    if params['model'] == 'cvae':
        # encoder = Model([x_input, cond_input], encoded)
        # decoder = Model([decoder_input, cond_input], decoded)
        # model = Model([x_input, cond_input], decoder([z, cond_input]))
        loss = vae_loss
        metrics = [xent, corr, mse]
    elif params['model'] == 'vae':
        # encoder = Model(x_input, encoded)
        # decoder = Model(decoder_input, decoded)
        # model = Model(x_input, decoder(z))
        loss = vae_loss
        metrics = [xent, corr, mse]
    else:
        # encoder = Model(x_input, encoded)
        # decoder = Model(decoder_input, decoded)
        # model = Model(x_input, decoder(encoded))
        loss = params['loss']
        metrics = [xent, corr]

    # Define optimizer
    # optimizer = candle.build_optimizer(params['optimizer'],
    #                                             params['learning_rate'],
    #                                             keras_defaults)
    optimizer = optimizers.deserialize({
        'class_name': params['optimizer'],
        'config': {}
    })
    base_lr = params['base_lr'] or K.get_value(optimizer.lr)
    if params['learning_rate']:
        K.set_value(optimizer.lr, params['learning_rate'])

    if params['model'] == 'cvae':
        # inputs = [x_train, cond_train]
        # val_inputs = [x_val, cond_val]
        test_inputs = [x_test, cond_test]
    else:
        # inputs = x_train
        # val_inputs = x_val
        test_inputs = x_test

    test_outputs = x_test

    model_name = params['model_name']

    # load json and create model
    json_file = open('{}.{}.model.json'.format(model_name, params['model']),
                     'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model_json = model_from_json(loaded_model_json)

    # load weights into new model
    loaded_model_json.load_weights('{}.{}.weights.h5'.format(
        model_name, params['model']))
    print("Loaded model from disk")

    # evaluate loaded model on test data
    loaded_model_json.compile(loss=loss, optimizer=optimizer, metrics=metrics)
    x_pred = loaded_model_json.predict(test_inputs)
    scores = p1b1.evaluate_autoencoder(x_pred, x_test)
    # p1b1.logger.info('\nEvaluation on test data: {}'.format(scores))
    print('Evaluation on test data: {}'.format(scores))

    # load encoder
    encoder = load_model('{}.{}.encoder.h5'.format(model_name,
                                                   params['model']))
    print("Loaded encoder from disk")

    x_test_encoded = encoder.predict(test_inputs,
                                     batch_size=params['batch_size'])
    y_test_classes = np.argmax(y_test, axis=1)
    candle.plot_scatter(x_test_encoded, y_test_classes,
                        '{}.{}.latent'.format(model_name, params['model']))

    if params['tsne']:
        tsne = TSNE(n_components=2, random_state=seed)
        x_test_encoded_tsne = tsne.fit_transform(x_test_encoded)
        candle.plot_scatter(
            x_test_encoded_tsne, y_test_classes,
            '{}.{}.latent.tsne'.format(model_name, params['model']))
#model.add(Activation('relu'))
#model.add(MaxPooling1D(pool_size=1))
#model.add(Conv1D(filters=128, kernel_size=10, strides=1, padding='valid'))
#model.add(Activation('relu'))
#model.add(MaxPooling1D(pool_size=10))
#model.add(Flatten())
#model.add(Dense(200))
#model.add(Activation('relu'))
#model.add(Dropout(0.1))
#model.add(Dense(20))
#model.add(Activation('relu'))
#model.add(Dropout(0.1))
#model.add(Dense(CLASSES))
#model.add(Activation('softmax'))

kerasDefaults = candle.keras_default_config()

# Define optimizer
optimizer = candle.build_optimizer(candle_params['optimizer'],
                                            candle_params['learning_rate'],
                                            kerasDefaults)

model.summary()
model.compile(loss=candle_params['loss'],
                optimizer=optimizer,
                metrics=[candle_params['metrics']])

output_dir = candle_params['save']

if not os.path.exists(output_dir):
    os.makedirs(output_dir)
Exemple #6
0
def run(gParameters):
    """
    Runs the model using the specified set of parameters

    Args:
       gParameters: a python dictionary containing the parameters (e.g. epoch)
       to run the model with.
    """
    #
    if 'dense' in gParameters:
        dval = gParameters['dense']
        if type(dval) != list:
            res = list(dval)
            #try:
            #is_str = isinstance(dval, basestring)
            #except NameError:
            #is_str = isinstance(dval, str)
            #if is_str:
            #res = str2lst(dval)
            gParameters['dense'] = res
        print(gParameters['dense'])

    if 'conv' in gParameters:
        #conv_list = p1_common.parse_conv_list(gParameters['conv'])
        #cval = gParameters['conv']
        #try:
        #is_str = isinstance(cval, basestring)
        #except NameError:
        #is_str = isinstance(cval, str)
        #if is_str:
        #res = str2lst(cval)
        #gParameters['conv'] = res
        print('Conv input', gParameters['conv'])
    # print('Params:', gParameters)
    # Construct extension to save model
    ext = benchmark.extension_from_parameters(gParameters, '.keras')
    logfile = gParameters['logfile'] if gParameters[
        'logfile'] else gParameters['output_dir'] + ext + '.log'

    fh = logging.FileHandler(logfile)
    fh.setFormatter(
        logging.Formatter("[%(asctime)s %(process)d] %(message)s",
                          datefmt="%Y-%m-%d %H:%M:%S"))
    fh.setLevel(logging.DEBUG)

    sh = logging.StreamHandler()
    sh.setFormatter(logging.Formatter(''))
    sh.setLevel(logging.DEBUG if gParameters['verbose'] else logging.INFO)

    benchmark.logger.setLevel(logging.DEBUG)
    benchmark.logger.addHandler(fh)
    benchmark.logger.addHandler(sh)
    benchmark.logger.info('Params: {}'.format(gParameters))

    # Get default parameters for initialization and optimizer functions
    kerasDefaults = candle.keras_default_config()
    seed = gParameters['rng_seed']

    # Build dataset loader object
    loader = benchmark.DataLoader(
        seed=seed,
        dtype=gParameters['data_type'],
        val_split=gParameters['val_split'],
        test_cell_split=gParameters['test_cell_split'],
        cell_features=gParameters['cell_features'],
        drug_features=gParameters['drug_features'],
        feature_subsample=gParameters['feature_subsample'],
        scaling=gParameters['scaling'],
        scramble=gParameters['scramble'],
        min_logconc=gParameters['min_logconc'],
        max_logconc=gParameters['max_logconc'],
        subsample=gParameters['subsample'],
        category_cutoffs=gParameters['category_cutoffs'])

    # Initialize weights and learning rule
    initializer_weights = candle.build_initializer(
        gParameters['initialization'], kerasDefaults, seed)
    initializer_bias = candle.build_initializer('constant', kerasDefaults, 0.)

    activation = gParameters['activation']

    # Define model architecture
    gen_shape = None
    out_dim = 1

    model = Sequential()
    if 'dense' in gParameters:  # Build dense layers
        for layer in gParameters['dense']:
            if layer:
                model.add(
                    Dense(layer,
                          input_dim=loader.input_dim,
                          kernel_initializer=initializer_weights,
                          bias_initializer=initializer_bias))
                if gParameters['batch_normalization']:
                    model.add(BatchNormalization())
                model.add(Activation(gParameters['activation']))
                if gParameters['dropout']:
                    model.add(Dropout(gParameters['dropout']))
    else:  # Build convolutional layers
        gen_shape = 'add_1d'
        layer_list = list(range(0, len(gParameters['conv'])))
        lc_flag = False
        if 'locally_connected' in gParameters:
            lc_flag = True

        for l, i in enumerate(layer_list):
            if i == 0:
                add_conv_layer(model,
                               gParameters['conv'][i],
                               input_dim=loader.input_dim,
                               locally_connected=lc_flag)
            else:
                add_conv_layer(model,
                               gParameters['conv'][i],
                               locally_connected=lc_flag)
            if gParameters['batch_normalization']:
                model.add(BatchNormalization())
            model.add(Activation(gParameters['activation']))
            if gParameters['pool']:
                model.add(MaxPooling1D(pool_size=gParameters['pool']))
        model.add(Flatten())

    model.add(Dense(out_dim))

    # Define optimizer
    optimizer = candle.build_optimizer(gParameters['optimizer'],
                                       gParameters['learning_rate'],
                                       kerasDefaults)

    # Compile and display model
    model.compile(loss=gParameters['loss'], optimizer=optimizer)
    model.summary()
    benchmark.logger.debug('Model: {}'.format(model.to_json()))

    train_gen = benchmark.DataGenerator(
        loader,
        batch_size=gParameters['batch_size'],
        shape=gen_shape,
        name='train_gen',
        cell_noise_sigma=gParameters['cell_noise_sigma']).flow()
    val_gen = benchmark.DataGenerator(loader,
                                      partition='val',
                                      batch_size=gParameters['batch_size'],
                                      shape=gen_shape,
                                      name='val_gen').flow()
    val_gen2 = benchmark.DataGenerator(loader,
                                       partition='val',
                                       batch_size=gParameters['batch_size'],
                                       shape=gen_shape,
                                       name='val_gen2').flow()
    test_gen = benchmark.DataGenerator(loader,
                                       partition='test',
                                       batch_size=gParameters['batch_size'],
                                       shape=gen_shape,
                                       name='test_gen').flow()

    train_steps = int(loader.n_train / gParameters['batch_size'])
    val_steps = int(loader.n_val / gParameters['batch_size'])
    test_steps = int(loader.n_test / gParameters['batch_size'])

    if 'train_steps' in gParameters:
        train_steps = gParameters['train_steps']
    if 'val_steps' in gParameters:
        val_steps = gParameters['val_steps']
    if 'test_steps' in gParameters:
        test_steps = gParameters['test_steps']

    checkpointer = ModelCheckpoint(filepath=gParameters['output_dir'] +
                                   '.model' + ext + '.h5',
                                   save_best_only=True)
    progbar = MyProgbarLogger(train_steps * gParameters['batch_size'])
    loss_history = MyLossHistory(
        progbar=progbar,
        val_gen=val_gen2,
        test_gen=test_gen,
        val_steps=val_steps,
        test_steps=test_steps,
        metric=gParameters['loss'],
        category_cutoffs=gParameters['category_cutoffs'],
        ext=ext,
        pre=gParameters['output_dir'])

    # Seed random generator for training
    np.random.seed(seed)

    candleRemoteMonitor = candle.CandleRemoteMonitor(params=gParameters)

    history = model.fit_generator(
        train_gen,
        train_steps,
        epochs=gParameters['epochs'],
        validation_data=val_gen,
        validation_steps=val_steps,
        verbose=0,
        callbacks=[checkpointer, loss_history, progbar, candleRemoteMonitor],
    )

    benchmark.logger.removeHandler(fh)
    benchmark.logger.removeHandler(sh)

    return history
Exemple #7
0
def run(gParameters):

    origin = gParameters['data_url']
    train_data = gParameters['train_data']
    data_loc = candle.fetch_file(origin + train_data,
                                 untar=True,
                                 md5_hash=None,
                                 subdir='Pilot3')

    print('Data downloaded and stored at: ' + data_loc)
    data_path = os.path.dirname(data_loc)
    print(data_path)

    kerasDefaults = candle.keras_default_config()

    rnn_size = gParameters['rnn_size']
    n_layers = gParameters['n_layers']
    learning_rate = gParameters['learning_rate']
    dropout = gParameters['dropout']
    recurrent_dropout = gParameters['recurrent_dropout']
    n_epochs = gParameters['epochs']
    data_train = data_path + '/data.pkl'
    verbose = gParameters['verbose']
    savedir = gParameters['output_dir']
    do_sample = gParameters['do_sample']
    temperature = gParameters['temperature']
    primetext = gParameters['primetext']
    length = gParameters['length']

    # load data from pickle
    f = open(data_train, 'rb')

    if (sys.version_info > (3, 0)):
        classes = pickle.load(f, encoding='latin1')
        chars = pickle.load(f, encoding='latin1')
        char_indices = pickle.load(f, encoding='latin1')
        indices_char = pickle.load(f, encoding='latin1')

        maxlen = pickle.load(f, encoding='latin1')
        step = pickle.load(f, encoding='latin1')

        X_ind = pickle.load(f, encoding='latin1')
        y_ind = pickle.load(f, encoding='latin1')
    else:
        classes = pickle.load(f)
        chars = pickle.load(f)
        char_indices = pickle.load(f)
        indices_char = pickle.load(f)

        maxlen = pickle.load(f)
        step = pickle.load(f)

        X_ind = pickle.load(f)
        y_ind = pickle.load(f)

    f.close()

    [s1, s2] = X_ind.shape
    print(X_ind.shape)
    print(y_ind.shape)
    print(maxlen)
    print(len(chars))

    X = np.zeros((s1, s2, len(chars)), dtype=np.bool)
    y = np.zeros((s1, len(chars)), dtype=np.bool)

    for i in range(s1):
        for t in range(s2):
            X[i, t, X_ind[i, t]] = 1
        y[i, y_ind[i]] = 1

    # build the model: a single LSTM
    if verbose:
        print('Build model...')

    model = Sequential()

    # for rnn_size in rnn_sizes:
    for k in range(n_layers):
        if k < n_layers - 1:
            ret_seq = True
        else:
            ret_seq = False

        if k == 0:
            model.add(
                LSTM(rnn_size,
                     input_shape=(maxlen, len(chars)),
                     return_sequences=ret_seq,
                     dropout=dropout,
                     recurrent_dropout=recurrent_dropout))
        else:
            model.add(
                LSTM(rnn_size,
                     dropout=dropout,
                     recurrent_dropout=recurrent_dropout,
                     return_sequences=ret_seq))

    model.add(Dense(len(chars)))
    model.add(Activation(gParameters['activation']))

    optimizer = candle.build_optimizer(gParameters['optimizer'],
                                       gParameters['learning_rate'],
                                       kerasDefaults)

    model.compile(loss=gParameters['loss'], optimizer=optimizer)

    if verbose:
        model.summary()

    for iteration in range(1, n_epochs + 1):
        if verbose:
            print()
            print('-' * 50)
            print('Iteration', iteration)

        history = LossHistory()
        model.fit(X, y, batch_size=100, epochs=1, callbacks=[history])

        loss = history.losses[-1]
        if verbose:
            print(loss)

        dirname = savedir
        if len(dirname) > 0 and not dirname.endswith('/'):
            dirname = dirname + '/'

        if not os.path.exists(dirname):
            os.makedirs(dirname)

        # serialize model to JSON
        model_json = model.to_json()
        with open(
                dirname + "/model_" + str(iteration) + "_" +
                "{:f}".format(loss) + ".json", "w") as json_file:
            json_file.write(model_json)

        # serialize weights to HDF5
        model.save_weights(dirname + "/model_" + str(iteration) + "_" +
                           "{:f}".format(loss) + ".h5")

        if verbose:
            print("Checkpoint saved.")

        if do_sample:
            outtext = open(dirname + "/example_" + str(iteration) + "_" +
                           "{:f}".format(loss) + ".txt",
                           "w",
                           encoding='utf-8')

            diversity = temperature

            outtext.write('----- diversity:' + str(diversity) + "\n")

            generated = ''
            seedstr = primetext

            outtext.write('----- Generating with seed: "' + seedstr + '"' +
                          "\n")

            sentence = " " * maxlen

            # class_index = 0
            generated += sentence
            outtext.write(generated)

            for c in seedstr:
                sentence = sentence[1:] + c
                x = np.zeros((1, maxlen, len(chars)))
                for t, char in enumerate(sentence):
                    x[0, t, char_indices[char]] = 1.

                preds = model.predict(x, verbose=verbose)[0]
                next_index = sample(preds, diversity)
                next_char = indices_char[next_index]

                generated += c

                outtext.write(c)

            for i in range(length):
                x = np.zeros((1, maxlen, len(chars)))
                for t, char in enumerate(sentence):
                    x[0, t, char_indices[char]] = 1.

                preds = model.predict(x, verbose=verbose)[0]
                next_index = sample(preds, diversity)
                next_char = indices_char[next_index]

                generated += next_char
                sentence = sentence[1:] + next_char

            if (sys.version_info > (3, 0)):
                outtext.write(generated + '\n')
            else:
                outtext.write(generated.decode('utf-8').encode('utf-8') + '\n')

            outtext.close()
Exemple #8
0
def run(gParameters):
    print('gParameters: ', gParameters)

    EPOCH = gParameters['epochs']
    BATCH = gParameters['batch_size']
    nb_classes = gParameters['classes']
    DR = gParameters['drop']
    ACTIVATION = gParameters['activation']
    kerasDefaults = candle.keras_default_config()
    kerasDefaults['momentum_sgd'] = gParameters['momentum']
    OPTIMIZER = candle.build_optimizer(gParameters['optimizer'],
                                       gParameters['learning_rate'],
                                       kerasDefaults)
    PL = 6213  # 38 + 60483
    PS = 6212  # 60483

    X_train, Y_train, X_test, Y_test = load_data(nb_classes, PL, gParameters)

    print('X_train shape:', X_train.shape)
    print('X_test shape:', X_test.shape)

    print('Y_train shape:', Y_train.shape)
    print('Y_test shape:', Y_test.shape)

    inputs = Input(shape=(PS, ))

    x = Dense(2000, activation=ACTIVATION)(inputs)
    x = Dense(1000, activation=ACTIVATION)(x)

    for i in range(gParameters['connections']):
        x = f(x, gParameters, distance=gParameters['distance'])

    x = Dropout(DR)(x)

    x = Dense(500, activation=ACTIVATION)(x)
    x = Dropout(DR)(x)
    x = Dense(250, activation=ACTIVATION)(x)
    x = Dropout(DR)(x)
    x = Dense(125, activation=ACTIVATION)(x)
    x = Dropout(DR)(x)
    x = Dense(62, activation=ACTIVATION)(x)
    x = Dropout(DR)(x)
    x = Dense(30, activation=ACTIVATION)(x)
    x = Dropout(DR)(x)
    outputs = Dense(2, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)
    model.summary()
    model.compile(loss='categorical_crossentropy',
                  optimizer=OPTIMIZER,
                  metrics=['accuracy'])

    # set up a bunch of callbacks to do work during model training.
    checkpointer = ModelCheckpoint(filepath='t29res.autosave.model.h5',
                                   verbose=0,
                                   save_weights_only=False,
                                   save_best_only=True)
    csv_logger = CSVLogger('t29res.training.log')
    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.4,
                                  patience=10,
                                  verbose=1,
                                  mode='auto',
                                  epsilon=0.0001,
                                  cooldown=3,
                                  min_lr=0.000000001)
    callbacks = [checkpointer, csv_logger, reduce_lr]

    def warmup_scheduler(epoch):
        lr = gParameters['learning_rate']
        if epoch <= 4:
            K.set_value(model.optimizer.lr, (lr * (epoch + 1) / 5))
        print('Epoch {}: lr={}'.format(epoch, K.get_value(model.optimizer.lr)))
        return K.get_value(model.optimizer.lr)

    if 'warmup_lr' in gParameters:

        warmup_lr = LearningRateScheduler(warmup_scheduler)
        print("adding LearningRateScheduler")
        callbacks.append(warmup_lr)

    history = model.fit(X_train,
                        Y_train,
                        batch_size=BATCH,
                        epochs=EPOCH,
                        verbose=1,
                        validation_data=(X_test, Y_test),
                        callbacks=callbacks)

    score = model.evaluate(X_test, Y_test, verbose=0)

    # summarize history for accuracy
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('Model Accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')

    plt.savefig('t29res.accuracy.png', bbox_inches='tight')
    plt.savefig('t29res.accuracy.pdf', bbox_inches='tight')

    plt.close()

    # summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model Loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')

    plt.savefig('t29res.loss.png', bbox_inches='tight')
    plt.savefig('t29res.loss.pdf', bbox_inches='tight')

    print('Test val_loss:', score[0])
    print('Test accuracy:', score[1])

    # serialize model to JSON
    model_json = model.to_json()
    with open("t29res.model.json", "w") as json_file:
        json_file.write(model_json)

    # serialize model to YAML
    model_yaml = model.to_yaml()
    with open("t29res.model.yaml", "w") as yaml_file:
        yaml_file.write(model_yaml)

    # serialize weights to HDF5
    model.save_weights("t29res.model.h5")
    print("Saved model to disk")

    # load json and create model
    json_file = open('t29res.model.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model_json = model_from_json(loaded_model_json)

    # load yaml and create model
    yaml_file = open('t29res.model.yaml', 'r')
    loaded_model_yaml = yaml_file.read()
    yaml_file.close()
    loaded_model_yaml = model_from_yaml(loaded_model_yaml)

    # load weights into new model
    loaded_model_json.load_weights("t29res.model.h5")
    print("Loaded json model from disk")

    # evaluate json loaded model on test data
    loaded_model_json.compile(loss='binary_crossentropy',
                              optimizer=gParameters['optimizer'],
                              metrics=['accuracy'])
    score_json = loaded_model_json.evaluate(X_test, Y_test, verbose=0)

    print('json Validation loss:', score_json[0])
    print('json Validation accuracy:', score_json[1])
    print("json %s: %.2f%%" %
          (loaded_model_json.metrics_names[1], score_json[1] * 100))

    # load weights into new model
    loaded_model_yaml.load_weights("t29res.model.h5")
    print("Loaded yaml model from disk")

    # evaluate loaded model on test data
    loaded_model_yaml.compile(loss='binary_crossentropy',
                              optimizer=gParameters['optimizer'],
                              metrics=['accuracy'])
    score_yaml = loaded_model_yaml.evaluate(X_test, Y_test, verbose=0)

    print('yaml Validation loss:', score_yaml[0])
    print('yaml Validation accuracy:', score_yaml[1])
    print("yaml %s: %.2f%%" %
          (loaded_model_yaml.metrics_names[1], score_yaml[1] * 100))

    # predict using loaded yaml model on test and training data
    predict_yaml_train = loaded_model_yaml.predict(X_train)
    predict_yaml_test = loaded_model_yaml.predict(X_test)

    print('Yaml_train_shape:', predict_yaml_train.shape)
    print('Yaml_test_shape:', predict_yaml_test.shape)

    predict_yaml_train_classes = np.argmax(predict_yaml_train, axis=1)
    predict_yaml_test_classes = np.argmax(predict_yaml_test, axis=1)

    np.savetxt("predict_yaml_train.csv",
               predict_yaml_train,
               delimiter=",",
               fmt="%.3f")
    np.savetxt("predict_yaml_test.csv",
               predict_yaml_test,
               delimiter=",",
               fmt="%.3f")

    np.savetxt("predict_yaml_train_classes.csv",
               predict_yaml_train_classes,
               delimiter=",",
               fmt="%d")
    np.savetxt("predict_yaml_test_classes.csv",
               predict_yaml_test_classes,
               delimiter=",",
               fmt="%d")

    return history
Exemple #9
0
def run(params):

    args = candle.ArgumentStruct(**params)
    seed = args.rng_seed
    candle.set_seed(seed)
    
    # Construct extension to save model
    ext = p1b1.extension_from_parameters(params, '.keras')
    candle.verify_path(params['save_path'])
    prefix = '{}{}'.format(params['save_path'], ext)
    logfile = params['logfile'] if params['logfile'] else prefix+'.log'
    candle.set_up_logger(logfile, p1b1.logger, params['verbose'])
    p1b1.logger.info('Params: {}'.format(params))

    # Get default parameters for initialization and optimizer functions
    keras_defaults = candle.keras_default_config()

    # Load dataset
    x_train, y_train, x_val, y_val, x_test, y_test, x_labels, y_labels = p1b1.load_data(params, seed)

    # cache_file = 'data_l1000_cache.h5'
    # save_cache(cache_file, x_train, y_train, x_val, y_val, x_test, y_test, x_labels, y_labels)
    # x_train, y_train, x_val, y_val, x_test, y_test, x_labels, y_labels = load_cache(cache_file)

    p1b1.logger.info("Shape x_train: {}".format(x_train.shape))
    p1b1.logger.info("Shape x_val:   {}".format(x_val.shape))
    p1b1.logger.info("Shape x_test:  {}".format(x_test.shape))

    p1b1.logger.info("Range x_train: [{:.3g}, {:.3g}]".format(np.min(x_train), np.max(x_train)))
    p1b1.logger.info("Range x_val:   [{:.3g}, {:.3g}]".format(np.min(x_val), np.max(x_val)))
    p1b1.logger.info("Range x_test:  [{:.3g}, {:.3g}]".format(np.min(x_test), np.max(x_test)))

    p1b1.logger.debug('Class labels')
    for i, label in enumerate(y_labels):
        p1b1.logger.debug('  {}: {}'.format(i, label))

    # clf = build_type_classifier(x_train, y_train, x_val, y_val)

    n_classes = len(y_labels)
    cond_train = y_train
    cond_val = y_val
    cond_test = y_test

    input_dim = x_train.shape[1]
    cond_dim = cond_train.shape[1]
    latent_dim = params['latent_dim']

    activation = params['activation']
    dropout = params['dropout']
    dense_layers = params['dense']
    dropout_layer = AlphaDropout if params['alpha_dropout'] else Dropout

    # Initialize weights and learning rule
    initializer_weights = candle.build_initializer(params['initialization'], keras_defaults, seed)
    initializer_bias = candle.build_initializer('constant', keras_defaults, 0.)

    if dense_layers is not None:
        if type(dense_layers) != list:
            dense_layers = list(dense_layers)
    else:
        dense_layers = []

    # Encoder Part
    x_input = Input(shape=(input_dim,))
    cond_input = Input(shape=(cond_dim,))
    h = x_input
    if params['model'] == 'cvae':
        h = keras.layers.concatenate([x_input, cond_input])

    for i, layer in enumerate(dense_layers):
        if layer > 0:
            x = h
            h = Dense(layer, activation=activation,
                      kernel_initializer=initializer_weights,
                      bias_initializer=initializer_bias)(h)
            if params['residual']:
                try:
                    h = keras.layers.add([h, x])
                except ValueError:
                    pass
            if params['batch_normalization']:
                h = BatchNormalization()(h)
            if dropout > 0:
                h = dropout_layer(dropout)(h)

    if params['model'] == 'ae':
        encoded = Dense(latent_dim, activation=activation,
                        kernel_initializer=initializer_weights,
                        bias_initializer=initializer_bias)(h)
    else:
        epsilon_std = params['epsilon_std']
        z_mean = Dense(latent_dim, name='z_mean')(h)
        z_log_var = Dense(latent_dim, name='z_log_var')(h)
        encoded = z_mean

        def vae_loss(x, x_decoded_mean):
            xent_loss = binary_crossentropy(x, x_decoded_mean)
            kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
            return K.mean(xent_loss + kl_loss/input_dim)

        def sampling(params):
            z_mean_, z_log_var_ = params
            batch_size = K.shape(z_mean_)[0]
            epsilon = K.random_normal(shape=(batch_size, latent_dim),
                                      mean=0., stddev=epsilon_std)
            return z_mean_ + K.exp(z_log_var_ / 2) * epsilon

        z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])
        if params['model'] == 'cvae':
            z_cond = keras.layers.concatenate([z, cond_input])

    # Decoder Part
    decoder_input = Input(shape=(latent_dim,))
    h = decoder_input
    if params['model'] == 'cvae':
        h = keras.layers.concatenate([decoder_input, cond_input])

    for i, layer in reversed(list(enumerate(dense_layers))):
        if layer > 0:
            x = h
            h = Dense(layer, activation=activation,
                      kernel_initializer=initializer_weights,
                      bias_initializer=initializer_bias)(h)
            if params['residual']:
                try:
                    h = keras.layers.add([h, x])
                except ValueError:
                    pass
            if params['batch_normalization']:
                h = BatchNormalization()(h)
            if dropout > 0:
                h = dropout_layer(dropout)(h)

    decoded = Dense(input_dim, activation='sigmoid',
                    kernel_initializer=initializer_weights,
                    bias_initializer=initializer_bias)(h)

    # Build autoencoder model
    if params['model'] == 'cvae':
        encoder = Model([x_input, cond_input], encoded)
        decoder = Model([decoder_input, cond_input], decoded)
        model = Model([x_input, cond_input], decoder([z, cond_input]))
        loss = vae_loss
        metrics = [xent, corr, mse]
    elif params['model'] == 'vae':
        encoder = Model(x_input, encoded)
        decoder = Model(decoder_input, decoded)
        model = Model(x_input, decoder(z))
        loss = vae_loss
        metrics = [xent, corr, mse]
    else:
        encoder = Model(x_input, encoded)
        decoder = Model(decoder_input, decoded)
        model = Model(x_input, decoder(encoded))
        loss = params['loss']
        metrics = [xent, corr]

    model.summary()
    decoder.summary()

    if params['cp']:
        model_json = model.to_json()
        with open(prefix+'.model.json', 'w') as f:
            print(model_json, file=f)

    # Define optimizer
    # optimizer = candle.build_optimizer(params['optimizer'],
    #                                             params['learning_rate'],
    #                                             keras_defaults)
    optimizer = optimizers.deserialize({'class_name': params['optimizer'], 'config': {}})
    base_lr = params['base_lr'] or K.get_value(optimizer.lr)
    if params['learning_rate']:
        K.set_value(optimizer.lr, params['learning_rate'])

    model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

    # calculate trainable and non-trainable params
    params.update(candle.compute_trainable_params(model))

    def warmup_scheduler(epoch):
        lr = params['learning_rate'] or base_lr * params['batch_size']/100
        if epoch <= 5:
            K.set_value(model.optimizer.lr, (base_lr * (5-epoch) + lr * epoch) / 5)
        p1b1.logger.debug('Epoch {}: lr={}'.format(epoch, K.get_value(model.optimizer.lr)))
        return K.get_value(model.optimizer.lr)

    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.00001)
    warmup_lr = LearningRateScheduler(warmup_scheduler)
    checkpointer = ModelCheckpoint(params['save_path']+ext+'.weights.h5', save_best_only=True, save_weights_only=True)
    tensorboard = TensorBoard(log_dir="tb/tb{}".format(ext))
    candle_monitor = candle.CandleRemoteMonitor(params=params)
    timeout_monitor = candle.TerminateOnTimeOut(params['timeout'])
    history_logger = LoggingCallback(p1b1.logger.debug)

    callbacks = [candle_monitor, timeout_monitor, history_logger]
    if params['reduce_lr']:
        callbacks.append(reduce_lr)
    if params['warmup_lr']:
        callbacks.append(warmup_lr)
    if params['cp']:
        callbacks.append(checkpointer)
    if params['tb']:
        callbacks.append(tensorboard)

    x_val2 = np.copy(x_val)
    np.random.shuffle(x_val2)
    start_scores = p1b1.evaluate_autoencoder(x_val, x_val2)
    p1b1.logger.info('\nBetween random pairs of validation samples: {}'.format(start_scores))

    if params['model'] == 'cvae':
        inputs = [x_train, cond_train]
        val_inputs = [x_val, cond_val]
        test_inputs = [x_test, cond_test]
    else:
        inputs = x_train
        val_inputs = x_val
        test_inputs = x_test

    outputs = x_train
    val_outputs = x_val
    test_outputs = x_test

    history = model.fit(inputs, outputs,
                        verbose=2,
                        batch_size=params['batch_size'],
                        epochs=params['epochs'],
                        callbacks=callbacks,
                        validation_data=(val_inputs, val_outputs))

    if params['cp']:
        encoder.save(prefix+'.encoder.h5')
        decoder.save(prefix+'.decoder.h5')

    candle.plot_history(prefix, history, 'loss')
    candle.plot_history(prefix, history, 'corr', 'streaming pearson correlation')

    # Evalute model on test set
    x_pred = model.predict(test_inputs)
    scores = p1b1.evaluate_autoencoder(x_pred, x_test)
    p1b1.logger.info('\nEvaluation on test data: {}'.format(scores))

    x_test_encoded = encoder.predict(test_inputs, batch_size=params['batch_size'])
    y_test_classes = np.argmax(y_test, axis=1)
    candle.plot_scatter(x_test_encoded, y_test_classes, prefix+'.latent')

    if params['tsne']:
        tsne = TSNE(n_components=2, random_state=seed)
        x_test_encoded_tsne = tsne.fit_transform(x_test_encoded)
        candle.plot_scatter(x_test_encoded_tsne, y_test_classes, prefix+'.latent.tsne')

    # diff = x_pred - x_test
    # plt.hist(diff.ravel(), bins='auto')
    # plt.title("Histogram of Errors with 'auto' bins")
    # plt.savefig('histogram_keras.png')

    # generate synthetic data
    # epsilon_std = 1.0
    # for i in range(1000):
    #     z_sample = np.random.normal(size=(1, 2)) * epsilon_std
    #     x_decoded = decoder.predict(z_sample)

    p1b1.logger.handlers = []

    return history
def run(gParameters):

    print ('Params:', gParameters)

    file_train = gParameters['train_data']
    file_test = gParameters['test_data']
    url = gParameters['data_url']

    train_file = candle.get_file(file_train, url+file_train, cache_subdir='Pilot1')
    test_file = candle.get_file(file_test, url+file_test, cache_subdir='Pilot1')

    X_train, Y_train, X_test, Y_test = bmk.load_data(train_file, test_file, gParameters)

    print('X_train shape:', X_train.shape)
    print('X_test shape:', X_test.shape)

    print('Y_train shape:', Y_train.shape)
    print('Y_test shape:', Y_test.shape)

    x_train_len = X_train.shape[1]

    # this reshaping is critical for the Conv1D to work

    X_train = np.expand_dims(X_train, axis=2)
    X_test = np.expand_dims(X_test, axis=2)

    print('X_train shape:', X_train.shape)
    print('X_test shape:', X_test.shape)

    model = Sequential()

    layer_list = list(range(0, len(gParameters['conv']), 3))
    for l, i in enumerate(layer_list):
        filters = gParameters['conv'][i]
        filter_len = gParameters['conv'][i+1]
        stride = gParameters['conv'][i+2]
        print(int(i/3), filters, filter_len, stride)
        if gParameters['pool']:
            pool_list=gParameters['pool']
            if type(pool_list) != list:
                pool_list=list(pool_list)

        if filters <= 0 or filter_len <= 0 or stride <= 0:
                break
        if 'locally_connected' in gParameters:
                model.add(LocallyConnected1D(filters, filter_len, strides=stride, padding='valid', input_shape=(x_train_len, 1)))
        else:
            #input layer
            if i == 0:
                model.add(Conv1D(filters=filters, kernel_size=filter_len, strides=stride, padding='valid', input_shape=(x_train_len, 1)))
            else:
                model.add(Conv1D(filters=filters, kernel_size=filter_len, strides=stride, padding='valid'))
        model.add(Activation(gParameters['activation']))
        if gParameters['pool']:
                model.add(MaxPooling1D(pool_size=pool_list[int(i/3)]))

    model.add(Flatten())

    for layer in gParameters['dense']:
        if layer:
            model.add(Dense(layer))
            model.add(Activation(gParameters['activation']))
            if gParameters['dropout']:
                    model.add(Dropout(gParameters['dropout']))
    model.add(Dense(gParameters['classes']))
    model.add(Activation(gParameters['out_activation']))

#Reference case
#model.add(Conv1D(filters=128, kernel_size=20, strides=1, padding='valid', input_shape=(P, 1)))
#model.add(Activation('relu'))
#model.add(MaxPooling1D(pool_size=1))
#model.add(Conv1D(filters=128, kernel_size=10, strides=1, padding='valid'))
#model.add(Activation('relu'))
#model.add(MaxPooling1D(pool_size=10))
#model.add(Flatten())
#model.add(Dense(200))
#model.add(Activation('relu'))
#model.add(Dropout(0.1))
#model.add(Dense(20))
#model.add(Activation('relu'))
#model.add(Dropout(0.1))
#model.add(Dense(CLASSES))
#model.add(Activation('softmax'))

    kerasDefaults = candle.keras_default_config()

    # Define optimizer
    optimizer = candle.build_optimizer(gParameters['optimizer'],
                                                gParameters['learning_rate'],
                                                kerasDefaults)

    model.summary()
    model.compile(loss=gParameters['loss'],
                  optimizer=optimizer,
                  metrics=[gParameters['metrics']])

    output_dir = gParameters['output_dir']

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # calculate trainable and non-trainable params
    gParameters.update(candle.compute_trainable_params(model))

    # set up a bunch of callbacks to do work during model training..
    model_name = gParameters['model_name']
    path = '{}/{}.autosave.model.h5'.format(output_dir, model_name)
    checkpointer = ModelCheckpoint(filepath=path, verbose=1, save_weights_only=False, save_best_only=True)

    csv_logger = CSVLogger('{}/training.log'.format(output_dir))
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, verbose=1, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0)
    candleRemoteMonitor = candle.CandleRemoteMonitor(params=gParameters)
    timeoutMonitor = candle.TerminateOnTimeOut(gParameters['timeout'])
    history = model.fit(X_train, Y_train,
                    batch_size=gParameters['batch_size'],
                    epochs=gParameters['epochs'],
                    verbose=1,
                    validation_data=(X_test, Y_test),
                    callbacks = [csv_logger, reduce_lr, candleRemoteMonitor, timeoutMonitor])

    score = model.evaluate(X_test, Y_test, verbose=0)

    print('Test score:', score[0])
    print('Test accuracy:', score[1])
    # serialize model to JSON
    model_json = model.to_json()
    with open("{}/{}.model.json".format(output_dir, model_name), "w") as json_file:
        json_file.write(model_json)

    # serialize model to YAML
    model_yaml = model.to_yaml()
    with open("{}/{}.model.yaml".format(output_dir, model_name), "w") as yaml_file:
        yaml_file.write(model_yaml)

    # serialize weights to HDF5
    model.save_weights("{}/{}.weights.h5".format(output_dir, model_name))
    print("Saved model to disk")

    # load json and create model
    json_file = open('{}/{}.model.json'.format(output_dir, model_name), 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model_json = model_from_json(loaded_model_json)


    # load yaml and create model
    yaml_file = open('{}/{}.model.yaml'.format(output_dir, model_name), 'r')
    loaded_model_yaml = yaml_file.read()
    yaml_file.close()
    loaded_model_yaml = model_from_yaml(loaded_model_yaml)


    # load weights into new model
    loaded_model_json.load_weights('{}/{}.weights.h5'.format(output_dir, model_name))
    print("Loaded json model from disk")

    # evaluate json loaded model on test data
    loaded_model_json.compile(loss=gParameters['loss'],
        optimizer=gParameters['optimizer'],
        metrics=[gParameters['metrics']])
    score_json = loaded_model_json.evaluate(X_test, Y_test, verbose=0)

    print('json Test score:', score_json[0])
    print('json Test accuracy:', score_json[1])

    print("json %s: %.2f%%" % (loaded_model_json.metrics_names[1], score_json[1]*100))

    # load weights into new model
    loaded_model_yaml.load_weights('{}/{}.weights.h5'.format(output_dir, model_name))
    print("Loaded yaml model from disk")

    # evaluate loaded model on test data
    loaded_model_yaml.compile(loss=gParameters['loss'],
        optimizer=gParameters['optimizer'],
        metrics=[gParameters['metrics']])
    score_yaml = loaded_model_yaml.evaluate(X_test, Y_test, verbose=0)

    print('yaml Test score:', score_yaml[0])
    print('yaml Test accuracy:', score_yaml[1])

    print("yaml %s: %.2f%%" % (loaded_model_yaml.metrics_names[1], score_yaml[1]*100))

    return history
Exemple #11
0
def run(params):
    args = candle.ArgumentStruct(**params)
    seed = args.rng_seed
    candle.set_seed(seed)

    # Construct extension to save model
    ext = attn.extension_from_parameters(params, 'keras')
    candle.verify_path(params['save_path'])
    prefix = '{}{}'.format(params['save_path'], ext)
    logfile = params['logfile'] if params['logfile'] else prefix + '.log'
    root_fname = 'Agg_attn_bin'
    candle.set_up_logger(logfile, attn.logger, params['verbose'])
    attn.logger.info('Params: {}'.format(params))

    # Get default parameters for initialization and optimizer functions
    keras_defaults = candle.keras_default_config()

    ##
    X_train, _Y_train, X_val, _Y_val, X_test, _Y_test = attn.load_data(
        params, seed)

    # move this inside the load_data function
    Y_train = _Y_train['AUC']
    Y_test = _Y_test['AUC']
    Y_val = _Y_val['AUC']

    Y_train_neg, Y_train_pos = np.bincount(Y_train)
    Y_test_neg, Y_test_pos = np.bincount(Y_test)
    Y_val_neg, Y_val_pos = np.bincount(Y_val)

    Y_train_total = Y_train_neg + Y_train_pos
    Y_test_total = Y_test_neg + Y_test_pos
    Y_val_total = Y_val_neg + Y_val_pos

    total = Y_train_total + Y_test_total + Y_val_total
    neg = Y_train_neg + Y_test_neg + Y_val_neg
    pos = Y_train_pos + Y_test_pos + Y_val_pos

    print('Examples:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.
          format(total, pos, 100 * pos / total))

    nb_classes = params['dense'][-1]

    Y_train = np_utils.to_categorical(Y_train, nb_classes)
    Y_test = np_utils.to_categorical(Y_test, nb_classes)
    Y_val = np_utils.to_categorical(Y_val, nb_classes)

    y_integers = np.argmax(Y_train, axis=1)
    class_weights = compute_class_weight('balanced', np.unique(y_integers),
                                         y_integers)
    d_class_weights = dict(enumerate(class_weights))

    print('X_train shape:', X_train.shape)
    print('X_val shape:', X_val.shape)
    print('X_test shape:', X_test.shape)

    print('Y_train shape:', Y_train.shape)
    print('Y_val shape:', Y_val.shape)
    print('Y_test shape:', Y_test.shape)

    # save the data
    # data_train = {"X": X_train, "y": Y_train}
    # data_val = {"X": X_val, "y": Y_val}
    # data_test = {"X": X_test, "y": Y_test}

    h5f = h5py.File('training_attn.h5', 'w')
    h5f.create_dataset('X_train', data=X_train)
    h5f.create_dataset('Y_train', data=Y_train)
    h5f.create_dataset('X_val', data=X_val)
    h5f.create_dataset('Y_val', data=Y_val)
    h5f.create_dataset('X_test', data=X_test)
    h5f.create_dataset('Y_test', data=Y_test)
    h5f.close()
Exemple #12
0
def run(params):
    args = candle.ArgumentStruct(**params)
    seed = args.rng_seed
    candle.set_seed(seed)

    # Construct extension to save model
    ext = attn.extension_from_parameters(params, "keras")
    candle.verify_path(params["save_path"])
    prefix = "{}{}".format(params["save_path"], ext)
    logfile = params["logfile"] if params["logfile"] else prefix + ".log"
    root_fname = "Agg_attn_bin"
    candle.set_up_logger(logfile, attn.logger, params["verbose"])
    attn.logger.info("Params: {}".format(params))

    # Get default parameters for initialization and optimizer functions
    keras_defaults = candle.keras_default_config()

    ##
    X_train, _Y_train, X_val, _Y_val, X_test, _Y_test = attn.load_data(
        params, seed)

    # move this inside the load_data function
    Y_train = _Y_train["AUC"]
    Y_test = _Y_test["AUC"]
    Y_val = _Y_val["AUC"]

    Y_train_neg, Y_train_pos = np.bincount(Y_train)
    Y_test_neg, Y_test_pos = np.bincount(Y_test)
    Y_val_neg, Y_val_pos = np.bincount(Y_val)

    Y_train_total = Y_train_neg + Y_train_pos
    Y_test_total = Y_test_neg + Y_test_pos
    Y_val_total = Y_val_neg + Y_val_pos

    total = Y_train_total + Y_test_total + Y_val_total
    neg = Y_train_neg + Y_test_neg + Y_val_neg
    pos = Y_train_pos + Y_test_pos + Y_val_pos

    print("Examples:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n".
          format(total, pos, 100 * pos / total))

    nb_classes = params["dense"][-1]

    Y_train = np_utils.to_categorical(Y_train, nb_classes)
    Y_test = np_utils.to_categorical(Y_test, nb_classes)
    Y_val = np_utils.to_categorical(Y_val, nb_classes)

    y_integers = np.argmax(Y_train, axis=1)
    class_weights = compute_class_weight("balanced", np.unique(y_integers),
                                         y_integers)
    d_class_weights = dict(enumerate(class_weights))

    print("X_train shape:", X_train.shape)
    print("X_test shape:", X_test.shape)

    print("Y_train shape:", Y_train.shape)
    print("Y_test shape:", Y_test.shape)

    PS = X_train.shape[1]
    model = build_attention_model(params, PS)

    # parallel_model = multi_gpu_model(model, gpus=4)
    # parallel_model.compile(loss='mean_squared_error',
    #         optimizer=SGD(lr=0.0001, momentum=0.9),
    #              metrics=['mae',r2])
    kerasDefaults = candle.keras_default_config()
    if params["momentum"]:
        kerasDefaults["momentum_sgd"] = params["momentum"]

    optimizer = candle.build_optimizer(params["optimizer"],
                                       params["learning_rate"], kerasDefaults)

    model.compile(
        loss=params["loss"],
        optimizer=optimizer,
        #                       SGD(lr=0.00001, momentum=0.9),
        #             optimizer=Adam(lr=0.00001),
        #             optimizer=RMSprop(lr=0.0001),
        #             optimizer=Adadelta(),
        metrics=[
            "acc",
            tf.keras.metrics.AUC(name="auroc", curve="ROC"),
            tf.keras.metrics.AUC(name="aucpr", curve="PR"),
        ],
    )

    # set up a bunch of callbacks to do work during model training..

    checkpointer = ModelCheckpoint(
        filepath=params["save_path"] + root_fname + ".autosave.model.h5",
        verbose=1,
        save_weights_only=False,
        save_best_only=True,
    )
    csv_logger = CSVLogger("{}/{}.training.log".format(params["save_path"],
                                                       root_fname))
    reduce_lr = ReduceLROnPlateau(
        monitor="val_auroc",
        factor=0.20,
        patience=40,
        verbose=1,
        mode="auto",
        min_delta=0.0001,
        cooldown=3,
        min_lr=0.000000001,
    )
    early_stop = EarlyStopping(monitor="val_auroc",
                               patience=200,
                               verbose=1,
                               mode="auto")
    candle_monitor = candle.CandleRemoteMonitor(params=params)

    candle_monitor = candle.CandleRemoteMonitor(params=params)
    timeout_monitor = candle.TerminateOnTimeOut(params["timeout"])
    tensorboard = TensorBoard(log_dir="tb/tb{}".format(ext))

    history_logger = LoggingCallback(attn.logger.debug)

    callbacks = [candle_monitor, timeout_monitor, csv_logger, history_logger]

    if params["reduce_lr"]:
        callbacks.append(reduce_lr)

    if params["use_cp"]:
        callbacks.append(checkpointer)
    if params["use_tb"]:
        callbacks.append(tensorboard)
    if params["early_stop"]:
        callbacks.append(early_stop)

    epochs = params["epochs"]
    batch_size = params["batch_size"]
    history = model.fit(
        X_train,
        Y_train,
        class_weight=d_class_weights,
        batch_size=batch_size,
        epochs=epochs,
        verbose=1,
        validation_data=(X_val, Y_val),
        callbacks=callbacks,
    )

    # diagnostic plots
    if "loss" in history.history.keys():
        candle.plot_history(params["save_path"] + root_fname, history, "loss")
    if "acc" in history.history.keys():
        candle.plot_history(params["save_path"] + root_fname, history, "acc")
    if "auroc" in history.history.keys():
        candle.plot_history(params["save_path"] + root_fname, history, "auroc")
    if "auprc" in history.history.keys():
        candle.plot_history(params["save_path"] + root_fname, history, "aucpr")

    # Evaluate model
    score = model.evaluate(X_test, Y_test, verbose=0)
    Y_predict = model.predict(X_test)

    evaluate_model(params, root_fname, nb_classes, Y_test, _Y_test, Y_predict,
                   pos, total, score)

    save_and_test_saved_model(params, model, root_fname, X_train, X_test,
                              Y_test)

    attn.logger.handlers = []

    return history
def run_cnn(GP,
            train_x,
            train_y,
            test_x,
            test_y,
            learning_rate=0.01,
            batch_size=10,
            epochs=10,
            dropout=0.5,
            optimizer='adam',
            wv_len=300,
            filter_sizes=[3, 4, 5],
            num_filters=[300, 300, 300],
            emb_l2=0.001,
            w_l2=0.01):

    max_vocab = np.max(train_x)
    max_vocab2 = np.max(test_x)
    if max_vocab2 > max_vocab:
        max_vocab = max_vocab2

    wv_mat = np.random.randn(max_vocab + 1, wv_len).astype('float32') * 0.1

    num_classes = []
    num_classes.append(np.max(train_y[:, 0]) + 1)
    num_classes.append(np.max(train_y[:, 1]) + 1)
    num_classes.append(np.max(train_y[:, 2]) + 1)
    num_classes.append(np.max(train_y[:, 3]) + 1)

    kerasDefaults = candle.keras_default_config()
    optimizer_run = candle.build_optimizer(optimizer, learning_rate,
                                           kerasDefaults)

    cnn = keras_mt_shared_cnn.init_export_network(num_classes=num_classes,
                                                  in_seq_len=1500,
                                                  vocab_size=len(wv_mat),
                                                  wv_space=wv_len,
                                                  filter_sizes=filter_sizes,
                                                  num_filters=num_filters,
                                                  concat_dropout_prob=dropout,
                                                  emb_l2=emb_l2,
                                                  w_l2=w_l2,
                                                  optimizer=optimizer_run)

    print(cnn.summary())

    validation_data = ({
        'Input': test_x
    }, {
        'Dense0': test_y[:, 0],
        'Dense1': test_y[:, 1],
        'Dense2': test_y[:, 2],
        'Dense3': test_y[:, 3]
    })

    # candleRemoteMonitor = CandleRemoteMonitor(params= GP)
    # timeoutMonitor = TerminateOnTimeOut(TIMEOUT)

    candleRemoteMonitor = candle.CandleRemoteMonitor(params=GP)
    timeoutMonitor = candle.TerminateOnTimeOut(GP['timeout'])

    history = cnn.fit(x=np.array(train_x),
                      y=[
                          np.array(train_y[:, 0]),
                          np.array(train_y[:, 1]),
                          np.array(train_y[:, 2]),
                          np.array(train_y[:, 3])
                      ],
                      batch_size=batch_size,
                      epochs=epochs,
                      verbose=2,
                      validation_data=validation_data,
                      callbacks=[candleRemoteMonitor, timeoutMonitor])

    return history
Exemple #14
0
def run(gParameters):

    fpath = fetch_data(gParameters)
    # Get default parameters for initialization and optimizer functions
    kerasDefaults = candle.keras_default_config()

    # Construct structures common to all folds
#    shared_nnet_spec = []
#    elem = gParameters['shared_nnet_spec'].split( ',' )
#    for el in elem:
#        shared_nnet_spec.append( int( el ) )

#    individual_nnet_spec = []
#    indiv = gParameters['ind_nnet_spec'].split( ':' )
#    for ind in indiv:
#        indiv_nnet_spec = []
#        elem = ind.split( ',' )
#        for el in elem:
#            indiv_nnet_spec.append( int( el ) )
#        individual_nnet_spec.append( indiv_nnet_spec )

    shared_nnet_spec = gParameters['shared_nnet_spec']
    individual_nnet_spec = gParameters['ind_nnet_spec']

    # Construct features common to all folds
    features = []
    feat = gParameters['feature_names'].split(':')
    for f in feat:
        features.append(f)

    n_feat = len(feat)
    print('Feature names:')
    for i in range(n_feat):
        print(features[i])


    # initialize arrays for all the features
    truth_array = [[] for _ in range(n_feat)]
    pred_array = [[] for _ in range(n_feat)]
    avg_loss = 0.0


    # stdout display level
    verbose = True

    # per fold
    for fold in range( gParameters['n_fold'] ):

        # build data
        X_train, Y_train, X_test, Y_test = bmk.build_data( len(individual_nnet_spec), fold, fpath )

        # build model
        input_dim = len( X_train[0][0] )
        models, labels_train, labels_test = build_model(gParameters, kerasDefaults,
                                                        shared_nnet_spec, individual_nnet_spec,
                                                        input_dim, Y_train, Y_test, verbose)

        # train model
        models = train_model(gParameters, models,
                X_train, labels_train,
                X_test, labels_test,
                fold, verbose)

        # evaluate model
        ret = evaluate_model(X_test, Y_test, labels_test, models)

        for i in range(n_feat):
            truth_array[i].extend(ret[i][0])
            pred_array[i].extend(ret[i][1])

        avg_loss += ret[ -1 ]


    avg_loss /= float( gParameters['n_fold'] )

    for task in range(n_feat):
        print('Task',task+1,':',features[task],'- Macro F1 score', f1_score(truth_array[task], pred_array[task], average='macro'))
        print('Task',task+1,':',features[task],'- Micro F1 score', f1_score(truth_array[task], pred_array[task], average='micro'))


    return avg_loss
def run(params):
    args = candle.ArgumentStruct(**params)
    seed = args.rng_seed
    candle.set_seed(seed)

    # Construct extension to save model
    ext = extension_from_parameters(params, 'keras')
    candle.verify_path(params['save_path'])
    prefix = '{}{}'.format(params['save_path'], ext)
    logfile = params['logfile'] if params['logfile'] else prefix+'.log'
    root_fname = 'Agg_attn_abs_bin'
    candle.set_up_logger(logfile, attn.logger, params['verbose'])
    attn.logger.info('Params: {}'.format(params))

    # Get default parameters for initialization and optimizer functions
    keras_defaults = candle.keras_default_config()

    ##
    X_train, _Y_train, X_val, _Y_val, X_test, _Y_test  = attn.load_data(params, seed)

    # move this inside the load_data function
    Y_train = _Y_train['AUC']
    Y_test = _Y_test['AUC']
    Y_val = _Y_val['AUC']

    Y_train_neg, Y_train_pos = np.bincount(Y_train)
    Y_test_neg, Y_test_pos = np.bincount(Y_test)
    Y_val_neg, Y_val_pos = np.bincount(Y_val)

    Y_train_total = Y_train_neg + Y_train_pos
    Y_test_total = Y_test_neg + Y_test_pos
    Y_val_total = Y_val_neg + Y_val_pos

    total = Y_train_total + Y_test_total + Y_val_total
    neg = Y_train_neg + Y_test_neg + Y_val_neg
    pos = Y_train_pos + Y_test_pos + Y_val_pos

    print('Examples:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(
        total, pos, 100 * pos / total))

    nb_classes = params['dense'][-1]

    # Convert classes to categorical with an extra slot for the abstaining class
    Y_train, Y_test, Y_val = candle.modify_labels(nb_classes+1, Y_train, Y_test, Y_val)

    # Disable class weight (for initial testing of the abstention classifier)
    #y_integers = np.argmax(Y_train, axis=1)
    #class_weights = compute_class_weight('balanced', np.unique(y_integers), y_integers)
    #d_class_weights = dict(enumerate(class_weights))

    print('X_train shape:', X_train.shape)
    print('X_test shape:', X_test.shape)

    print('Y_train shape:', Y_train.shape)
    print('Y_test shape:', Y_test.shape)

    PS = X_train.shape[1]
    model = build_attention_model(params, PS)
    model = candle.add_model_output(model, mode='abstain', num_add=1, activation='sigmoid')
    print('Model after modifying layer for abstention')
    model.summary()
    
    # Configure abstention model
    mask_ = np.zeros(nb_classes+1)
    mask_[-1] = 1
    mu0 = 0.5 # In the long term this is not as important since mu auto tunes, however it may require a large number of epochs to converge if set far away from target

    candle.abstention_variable_initialization(mu0, mask_, nb_classes)

    #parallel_model = multi_gpu_model(model, gpus=4)
    #parallel_model.compile(loss='mean_squared_error',
    #         optimizer=SGD(lr=0.0001, momentum=0.9),
    #              metrics=['mae',r2])
    kerasDefaults = candle.keras_default_config()
    if params['momentum']:
        kerasDefaults['momentum_sgd'] = params['momentum']

    optimizer = candle.build_optimizer(params['optimizer'], params['learning_rate'], kerasDefaults)

    # compile model with abstention loss
    model.compile(loss=candle.abstention_loss, optimizer=optimizer, metrics=['acc',tf_auc,candle.abs_acc,candle.acc_class1,candle.abs_acc_class1])


    # set up a bunch of callbacks to do work during model training..
    checkpointer = ModelCheckpoint(filepath=params['save_path'] + root_fname + '.autosave.model.h5', verbose=1, save_weights_only=False, save_best_only=True)
    csv_logger = CSVLogger('{}/{}.training.log'.format(params['save_path'], root_fname))
    reduce_lr = ReduceLROnPlateau(monitor='val_tf_auc', factor=0.20, patience=40, verbose=1, mode='auto', min_delta=0.0001, cooldown=3, min_lr=0.000000001)
    early_stop = EarlyStopping(monitor='val_tf_auc', patience=200, verbose=1, mode='auto')
    candle_monitor = candle.CandleRemoteMonitor(params=params)

    candle_monitor = candle.CandleRemoteMonitor(params=params)
    timeout_monitor = candle.TerminateOnTimeOut(params['timeout'])
    tensorboard = TensorBoard(log_dir="tb/tb{}".format(ext))

    history_logger = candle.LoggingCallback(attn.logger.debug)
    
    abstention_cbk = candle.AbstentionAdapt_Callback(monitor='val_abs_acc_class1', scale_factor=params['abs_scale_factor'], target_acc=params['target_abs_acc'])

    callbacks = [candle_monitor, timeout_monitor, csv_logger, history_logger, abstention_cbk]

    if params['reduce_lr']:
        callbacks.append(reduce_lr)

    if params['use_cp']:
        callbacks.append(checkpointer)
    if params['use_tb']:
        callbacks.append(tensorboard)
    if params['early_stop']:
        callbacks.append(early_stop)

    epochs = params['epochs']
    batch_size=params['batch_size']
    history = model.fit(X_train, Y_train, #class_weight=d_class_weights,
                        batch_size=batch_size,
                        epochs=epochs,
                        verbose=1,
                        validation_data=(X_val, Y_val),
                        callbacks = callbacks)
                        
    # diagnostic plots
    if 'loss' in history.history.keys():
        candle.plot_history(params['save_path'] + root_fname, history, 'loss')
    if 'acc' in history.history.keys():
        candle.plot_history(params['save_path'] + root_fname, history, 'acc')
    if 'abs_acc' in history.history.keys():
        candle.plot_history(params['save_path'] + root_fname, history, 'abs_acc')
    # Plot mu evolution
    fname = params['save_path'] + root_fname + '.mu.png'
    xlabel='Epochs'
    ylabel='Abstention Weight mu'
    title='mu Evolution'
    attnviz.plot_array(abstention_cbk.muvalues, xlabel, ylabel, title, fname)

    # Evaluate model
    score = model.evaluate(X_test, Y_test, verbose=0)
    Y_predict = model.predict(X_test)
    evaluate_abstention(params, root_fname, nb_classes, Y_test, _Y_test, Y_predict, pos, total, score)

    save_and_test_saved_model(params, model, root_fname, X_train, X_test, Y_test)

    attn.logger.handlers = []

    return history
Exemple #16
0
def run(GP):

    # set the seed
    if GP['rng_seed']:
        np.random.seed(GP['rng_seed'])
    else:
        np.random.seed(np.random.randint(10000))

    # Set paths
    if not os.path.isdir(GP['home_dir']):
        print('Keras home directory not set')
        sys.exit(0)
    sys.path.append(GP['home_dir'])

    # Setup loggin
    args = candle.ArgumentStruct(**GP)
    #    set_seed(args.rng_seed)
    #    ext = extension_from_parameters(args)
    candle.verify_path(args.save_path)
    prefix = args.save_path  # + ext
    logfile = args.logfile if args.logfile else prefix + '.log'
    candle.set_up_logger(logfile, logger, False)  # args.verbose
    logger.info('Params: {}'.format(GP))

    import p2b1 as hf
    reload(hf)

    # import keras_model_utils as KEU
    # reload(KEU)
    # reload(p2ck)
    # reload(p2ck.optimizers)
    maps = hf.autoencoder_preprocess()

    from keras.optimizers import SGD, RMSprop, Adam
    from keras.datasets import mnist
    from keras.callbacks import LearningRateScheduler, ModelCheckpoint
    from keras import callbacks
    from keras.layers.advanced_activations import ELU
    from keras.preprocessing.image import ImageDataGenerator

    #    GP=hf.ReadConfig(opts.config_file)
    batch_size = GP['batch_size']
    learning_rate = GP['learning_rate']
    kerasDefaults = candle.keras_default_config()

    # #### Read Data ########
    import helper
    (data_files, fields) = p2b1.get_list_of_data_files(GP)
    # Read from local directoy
    # (data_files, fields) = helper.get_local_files('/p/gscratchr/brainusr/datasets/cancer/pilot2/3k_run16_10us.35fs-DPPC.20-DIPC.60-CHOL.20.dir/')
    # (data_files, fields) = helper.get_local_files('3k_run16', '/p/lscratchf/brainusr/datasets/cancer/pilot2/')

    # Define datagenerator
    datagen = hf.ImageNoiseDataGenerator(corruption_level=GP['noise_factor'])

    # get data dimension ##
    num_samples = 0
    for f in data_files:

        # Seperate different arrays from the data
        (X, nbrs, resnums) = helper.get_data_arrays(f)

        num_samples += X.shape[0]

    (X, nbrs, resnums) = helper.get_data_arrays(data_files[0])
    print('\nData chunk shape: ', X.shape)

    molecular_hidden_layers = GP['molecular_num_hidden']
    if not molecular_hidden_layers:
        X_train = hf.get_data(X, case=GP['case'])
        input_dim = X_train.shape[1]
    else:
        # computing input dimension for outer AE
        input_dim = X.shape[1] * molecular_hidden_layers[-1]

    print('\nState AE input/output dimension: ', input_dim)

    # get data dimension for molecular autoencoder
    molecular_nbrs = np.int(GP['molecular_nbrs'])
    num_molecules = X.shape[1]
    num_beads = X.shape[2]

    if GP['nbr_type'] == 'relative':
        # relative x, y, z positions
        num_loc_features = 3
        loc_feat_vect = ['rel_x', 'rel_y', 'rel_z']
    elif GP['nbr_type'] == 'invariant':
        # relative distance and angle
        num_loc_features = 2
        loc_feat_vect = ['rel_dist', 'rel_angle']
    else:
        print('Invalid nbr_type!!')
        exit()

    if not GP['type_bool']:
        # only consider molecular location coordinates
        num_type_features = 0
        type_feat_vect = []
    else:
        num_type_features = 5
        type_feat_vect = list(fields.keys())[3:8]

    num_features = num_loc_features + num_type_features + num_beads
    dim = np.prod([num_beads, num_features, molecular_nbrs + 1])
    bead_kernel_size = num_features
    molecular_input_dim = dim
    mol_kernel_size = num_beads

    feature_vector = loc_feat_vect + type_feat_vect + list(fields.keys())[8:]

    print('\nMolecular AE input/output dimension: ', molecular_input_dim)

    print(
        '\nData Format:\n[Frames (%s), Molecules (%s), Beads (%s), %s (%s)]' %
        (num_samples, num_molecules, num_beads, feature_vector, num_features))

    # ## Define Model, Solver and Compile ##########
    print('\nDefine the model and compile')
    opt = candle.build_optimizer(GP['optimizer'], learning_rate, kerasDefaults)
    model_type = 'mlp'
    memo = '%s_%s' % (GP['base_memo'], model_type)

    # ####### Define Molecular Model, Solver and Compile #########
    molecular_nonlinearity = GP['molecular_nonlinearity']

    len_molecular_hidden_layers = len(molecular_hidden_layers)
    conv_bool = GP['conv_bool']
    full_conv_bool = GP['full_conv_bool']
    if conv_bool:
        molecular_model, molecular_encoder = AE_models.conv_dense_mol_auto(
            bead_k_size=bead_kernel_size,
            mol_k_size=mol_kernel_size,
            weights_path=None,
            input_shape=(1, molecular_input_dim, 1),
            nonlinearity=molecular_nonlinearity,
            hidden_layers=molecular_hidden_layers,
            l2_reg=GP['l2_reg'],
            drop=float(GP['dropout']))
    elif full_conv_bool:
        molecular_model, molecular_encoder = AE_models.full_conv_mol_auto(
            bead_k_size=bead_kernel_size,
            mol_k_size=mol_kernel_size,
            weights_path=None,
            input_shape=(1, molecular_input_dim, 1),
            nonlinearity=molecular_nonlinearity,
            hidden_layers=molecular_hidden_layers,
            l2_reg=GP['l2_reg'],
            drop=float(GP['dropout']))

    else:
        molecular_model, molecular_encoder = AE_models.dense_auto(
            weights_path=None,
            input_shape=(molecular_input_dim, ),
            nonlinearity=molecular_nonlinearity,
            hidden_layers=molecular_hidden_layers,
            l2_reg=GP['l2_reg'],
            drop=float(GP['dropout']))

    if GP['loss'] == 'mse':
        loss_func = 'mse'
    elif GP['loss'] == 'custom':
        loss_func = helper.combined_loss

    molecular_model.compile(
        optimizer=opt,
        loss=loss_func,
        metrics=['mean_squared_error', 'mean_absolute_error'])
    print('\nModel Summary: \n')
    molecular_model.summary()
    # #### set up callbacks and cooling for the molecular_model ##########
    drop = GP['dropout']
    mb_epochs = GP['epochs']
    initial_lrate = GP['learning_rate']
    epochs_drop = 1 + int(np.floor(mb_epochs / 3))

    def step_decay(epoch):
        global initial_lrate, epochs_drop, drop
        lrate = initial_lrate * np.power(drop,
                                         np.floor((1 + epoch) / epochs_drop))
        return lrate

    lr_scheduler = LearningRateScheduler(step_decay)
    history = callbacks.History()
    # callbacks=[history,lr_scheduler]

    history_logger = candle.LoggingCallback(logger.debug)
    candleRemoteMonitor = candle.CandleRemoteMonitor(params=GP)
    timeoutMonitor = candle.TerminateOnTimeOut(TIMEOUT)
    callbacks = [history, history_logger, candleRemoteMonitor, timeoutMonitor]
    loss = 0.

    # ### Save the Model to disk
    if GP['save_path'] is not None:
        save_path = GP['save_path']
        if not os.path.exists(save_path):
            os.makedirs(save_path)
    else:
        save_path = '.'

    model_json = molecular_model.to_json()
    with open(save_path + '/model.json', "w") as json_file:
        json_file.write(model_json)

    encoder_json = molecular_encoder.to_json()
    with open(save_path + '/encoder.json', "w") as json_file:
        json_file.write(encoder_json)

    print('Saved model to disk')

    # ### Train the Model
    if GP['train_bool']:
        ct = hf.Candle_Molecular_Train(
            molecular_model,
            molecular_encoder,
            data_files,
            mb_epochs,
            callbacks,
            batch_size=batch_size,
            nbr_type=GP['nbr_type'],
            save_path=GP['save_path'],
            len_molecular_hidden_layers=len_molecular_hidden_layers,
            molecular_nbrs=molecular_nbrs,
            conv_bool=conv_bool,
            full_conv_bool=full_conv_bool,
            type_bool=GP['type_bool'],
            sampling_density=GP['sampling_density'])
        frame_loss, frame_mse = ct.train_ac()
    else:
        frame_mse = []
        frame_loss = []

    return frame_loss, frame_mse
def run(gParameters):

    # Construct extension to save model
    ext = p1b2.extension_from_parameters(gParameters, '.keras')
    candle.verify_path(gParameters['save_path'])
    prefix = '{}{}'.format(gParameters['save_path'], ext)
    logfile = gParameters['logfile'] if gParameters[
        'logfile'] else prefix + '.log'
    candle.set_up_logger(logfile, p1b2.logger, gParameters['verbose'])
    p1b2.logger.info('Params: {}'.format(gParameters))

    # Get default parameters for initialization and optimizer functions
    kerasDefaults = candle.keras_default_config()
    seed = gParameters['rng_seed']

    # Load dataset
    #(X_train, y_train), (X_test, y_test) = p1b2.load_data(gParameters, seed)
    #(X_train, y_train), (X_val, y_val), (X_test, y_test) = p1b2.load_data_one_hot(gParameters, seed)
    (X_train, y_train), (X_test, y_test) = p1b2.load_data2(gParameters, seed)

    print("Shape X_train: ", X_train.shape)
    #print ("Shape X_val: ", X_val.shape)
    print("Shape X_test: ", X_test.shape)
    print("Shape y_train: ", y_train.shape)
    #print ("Shape y_val: ", y_val.shape)
    print("Shape y_test: ", y_test.shape)

    print("Range X_train --> Min: ", np.min(X_train), ", max: ",
          np.max(X_train))
    #print ("Range X_val --> Min: ", np.min(X_val), ", max: ", np.max(X_val))
    print("Range X_test --> Min: ", np.min(X_test), ", max: ", np.max(X_test))
    print("Range y_train --> Min: ", np.min(y_train), ", max: ",
          np.max(y_train))
    #print ("Range y_val --> Min: ", np.min(y_val), ", max: ", np.max(y_val))
    print("Range y_test --> Min: ", np.min(y_test), ", max: ", np.max(y_test))

    input_dim = X_train.shape[1]
    input_vector = Input(shape=(input_dim, ))
    output_dim = y_train.shape[1]

    # Initialize weights and learning rule
    initializer_weights = candle.build_initializer(
        gParameters['initialization'], kerasDefaults, seed)
    initializer_bias = candle.build_initializer('constant', kerasDefaults, 0.)

    activation = gParameters['activation']

    # Define MLP architecture
    layers = gParameters['dense']

    if layers != None:
        if type(layers) != list:
            layers = list(layers)
        for i, l in enumerate(layers):
            if i == 0:
                x = Dense(l,
                          activation=activation,
                          kernel_initializer=initializer_weights,
                          bias_initializer=initializer_bias,
                          kernel_regularizer=l2(gParameters['reg_l2']),
                          activity_regularizer=l2(
                              gParameters['reg_l2']))(input_vector)
            else:
                x = Dense(l,
                          activation=activation,
                          kernel_initializer=initializer_weights,
                          bias_initializer=initializer_bias,
                          kernel_regularizer=l2(gParameters['reg_l2']),
                          activity_regularizer=l2(gParameters['reg_l2']))(x)
            if gParameters['dropout']:
                x = Dropout(gParameters['dropout'])(x)
        output = Dense(output_dim,
                       activation=activation,
                       kernel_initializer=initializer_weights,
                       bias_initializer=initializer_bias)(x)
    else:
        output = Dense(output_dim,
                       activation=activation,
                       kernel_initializer=initializer_weights,
                       bias_initializer=initializer_bias)(input_vector)

    # Build MLP model
    mlp = Model(outputs=output, inputs=input_vector)
    p1b2.logger.debug('Model: {}'.format(mlp.to_json()))

    # Define optimizer
    optimizer = candle.build_optimizer(gParameters['optimizer'],
                                       gParameters['learning_rate'],
                                       kerasDefaults)

    # Compile and display model
    mlp.compile(loss=gParameters['loss'],
                optimizer=optimizer,
                metrics=['accuracy'])
    mlp.summary()

    # Seed random generator for training
    np.random.seed(seed)

    history = mlp.fit(X_train,
                      y_train,
                      batch_size=gParameters['batch_size'],
                      epochs=gParameters['epochs'],
                      validation_split=gParameters['val_split']
                      #validation_data=(X_val, y_val)
                      )

    best_val_loss = "{:.5f}".format(min(history.history['val_loss']))
    best_val_acc = "{:.5f}".format(max(history.history['val_acc']))
    print('best_val_loss =', best_val_loss, 'best_val_acc =', best_val_acc)

    # model save
    # save_filepath = "model_mlp_W_" + ext
    # mlp.save_weights(save_filepath)

    model_json = mlp.to_json()
    with open(prefix + '.model.json', 'w') as f:
        print(model_json, file=f)
    mlp.save_weights(prefix + '.weights.h5')

    # Evalute model on test set
    y_pred = mlp.predict(X_test)
    scores = p1b2.evaluate_accuracy_one_hot(y_pred, y_test)
    print('Evaluation on test data:', scores)
Exemple #18
0
def run(gParameters):

    print('Params:', gParameters)

    file_train = gParameters['train_data']
    file_test = gParameters['test_data']
    url = gParameters['data_url']

    train_file = candle.get_file(file_train,
                                 url + file_train,
                                 cache_subdir='Pilot1')
    test_file = candle.get_file(file_test,
                                url + file_test,
                                cache_subdir='Pilot1')

    X_train, Y_train, X_test, Y_test = load_data(train_file, test_file,
                                                 gParameters)

    print('X_train shape:', X_train.shape)
    print('X_test shape:', X_test.shape)

    print('Y_train shape:', Y_train.shape)
    print('Y_test shape:', Y_test.shape)

    x_train_len = X_train.shape[1]

    # this reshaping is critical for the Conv1D to work

    X_train = np.expand_dims(X_train, axis=2)
    X_test = np.expand_dims(X_test, axis=2)

    print('X_train shape:', X_train.shape)
    print('X_test shape:', X_test.shape)

    # Have to add this line or else ALW on 2020-11-15 finds Supervisor jobs using canonically CANDLE-compliant model scripts die as soon as a particular task is used a second time:
    # EXCEPTION:
    # InvalidArgumentError() ...
    # File "<string>", line 23, in <module>
    # File "/gpfs/alpine/med106/world-shared/candle/2020-11-11/checkouts/Supervisor/workflows/common/python/model_runner.py", line 241, in run_model
    #     result, history = run(hyper_parameter_map, obj_return)
    # File "/gpfs/alpine/med106/world-shared/candle/2020-11-11/checkouts/Supervisor/workflows/common/python/model_runner.py", line 169, in run
    #     history = pkg.run(params)
    # File "/gpfs/alpine/med106/world-shared/candle/2020-11-11/checkouts/Benchmarks/Pilot1/NT3/nt3_candle_wrappers_baseline_keras2.py", line 211, in run
    #     callbacks=[csv_logger, reduce_lr, candleRemoteMonitor, timeoutMonitor])
    # File "/gpfs/alpine/world-shared/med106/sw/condaenv-200408/lib/python3.6/site-packages/keras/engine/training.py", line 1178, in fit
    #     validation_freq=validation_freq)
    # File "/gpfs/alpine/world-shared/med106/sw/condaenv-200408/lib/python3.6/site-packages/keras/engine/training_arrays.py", line 204, in fit_loop
    #     outs = fit_function(ins_batch)
    # File "/gpfs/alpine/world-shared/med106/sw/condaenv-200408/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 2979, in __call__
    #     return self._call(inputs)
    # File "/gpfs/alpine/world-shared/med106/sw/condaenv-200408/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 2933, in _call
    #     session)
    # File "/gpfs/alpine/world-shared/med106/sw/condaenv-200408/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 2885, in _make_callable
    #     callable_fn = session._make_callable_from_options(callable_opts)
    # File "/gpfs/alpine/world-shared/med106/sw/condaenv-200408/lib/python3.6/site-packages/tensorflow_core/python/client/session.py", line 1505, in _make_callable_from_options
    #     return BaseSession._Callable(self, callable_options)
    # File "/gpfs/alpine/world-shared/med106/sw/condaenv-200408/lib/python3.6/site-packages/tensorflow_core/python/client/session.py", line 1460, in __init__
    #     session._session, options_ptr)
    K.clear_session()

    model = Sequential()

    layer_list = list(range(0, len(gParameters['conv']), 3))
    for _, i in enumerate(layer_list):
        filters = gParameters['conv'][i]
        filter_len = gParameters['conv'][i + 1]
        stride = gParameters['conv'][i + 2]
        print(int(i / 3), filters, filter_len, stride)
        if gParameters['pool']:
            pool_list = gParameters['pool']
            if type(pool_list) != list:
                pool_list = list(pool_list)

        if filters <= 0 or filter_len <= 0 or stride <= 0:
            break
        if 'locally_connected' in gParameters:
            model.add(
                LocallyConnected1D(filters,
                                   filter_len,
                                   strides=stride,
                                   padding='valid',
                                   input_shape=(x_train_len, 1)))
        else:
            # input layer
            if i == 0:
                model.add(
                    Conv1D(filters=filters,
                           kernel_size=filter_len,
                           strides=stride,
                           padding='valid',
                           input_shape=(x_train_len, 1)))
            else:
                model.add(
                    Conv1D(filters=filters,
                           kernel_size=filter_len,
                           strides=stride,
                           padding='valid'))
        model.add(Activation(gParameters['activation']))
        if gParameters['pool']:
            model.add(MaxPooling1D(pool_size=pool_list[int(i / 3)]))

    model.add(Flatten())

    for layer in gParameters['dense']:
        if layer:
            model.add(Dense(layer))
            model.add(Activation(gParameters['activation']))
            if gParameters['dropout']:
                model.add(Dropout(gParameters['dropout']))
    model.add(Dense(gParameters['classes']))
    model.add(Activation(gParameters['out_activation']))

    # Reference case
    # model.add(Conv1D(filters=128, kernel_size=20, strides=1, padding='valid', input_shape=(P, 1)))
    # model.add(Activation('relu'))
    # model.add(MaxPooling1D(pool_size=1))
    # model.add(Conv1D(filters=128, kernel_size=10, strides=1, padding='valid'))
    # model.add(Activation('relu'))
    # model.add(MaxPooling1D(pool_size=10))
    # model.add(Flatten())
    # model.add(Dense(200))
    # model.add(Activation('relu'))
    # model.add(Dropout(0.1))
    # model.add(Dense(20))
    # model.add(Activation('relu'))
    # model.add(Dropout(0.1))
    # model.add(Dense(CLASSES))
    # model.add(Activation('softmax'))

    kerasDefaults = candle.keras_default_config()

    # Define optimizer
    optimizer = candle.build_optimizer(gParameters['optimizer'],
                                       gParameters['learning_rate'],
                                       kerasDefaults)

    model.summary()
    model.compile(loss=gParameters['loss'],
                  optimizer=optimizer,
                  metrics=[gParameters['metrics']])

    output_dir = gParameters['output_dir']

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # calculate trainable and non-trainable params
    gParameters.update(candle.compute_trainable_params(model))

    # set up a bunch of callbacks to do work during model training..
    model_name = gParameters['model_name']
    path = '{}/{}.autosave.model.h5'.format(output_dir, model_name)
    # checkpointer = ModelCheckpoint(filepath=path, verbose=1, save_weights_only=False, save_best_only=True)
    csv_logger = CSVLogger('{}/training.log'.format(output_dir))
    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.1,
                                  patience=10,
                                  verbose=1,
                                  mode='auto',
                                  epsilon=0.0001,
                                  cooldown=0,
                                  min_lr=0)
    candleRemoteMonitor = candle.CandleRemoteMonitor(params=gParameters)
    timeoutMonitor = candle.TerminateOnTimeOut(gParameters['timeout'])
    history = model.fit(
        X_train,
        Y_train,
        batch_size=gParameters['batch_size'],
        epochs=gParameters['epochs'],
        verbose=1,
        validation_data=(X_test, Y_test),
        callbacks=[csv_logger, reduce_lr, candleRemoteMonitor, timeoutMonitor])

    score = model.evaluate(X_test, Y_test, verbose=0)

    if False:
        print('Test score:', score[0])
        print('Test accuracy:', score[1])
        # serialize model to JSON
        model_json = model.to_json()
        with open("{}/{}.model.json".format(output_dir, model_name),
                  "w") as json_file:
            json_file.write(model_json)

        # serialize model to YAML
        model_yaml = model.to_yaml()
        with open("{}/{}.model.yaml".format(output_dir, model_name),
                  "w") as yaml_file:
            yaml_file.write(model_yaml)

        # serialize weights to HDF5
        model.save_weights("{}/{}.weights.h5".format(output_dir, model_name))
        print("Saved model to disk")

        # load json and create model
        json_file = open('{}/{}.model.json'.format(output_dir, model_name),
                         'r')
        loaded_model_json = json_file.read()
        json_file.close()
        loaded_model_json = model_from_json(loaded_model_json)

        # load yaml and create model
        yaml_file = open('{}/{}.model.yaml'.format(output_dir, model_name),
                         'r')
        loaded_model_yaml = yaml_file.read()
        yaml_file.close()
        loaded_model_yaml = model_from_yaml(loaded_model_yaml)

        # load weights into new model
        loaded_model_json.load_weights('{}/{}.weights.h5'.format(
            output_dir, model_name))
        print("Loaded json model from disk")

        # evaluate json loaded model on test data
        loaded_model_json.compile(loss=gParameters['loss'],
                                  optimizer=gParameters['optimizer'],
                                  metrics=[gParameters['metrics']])
        score_json = loaded_model_json.evaluate(X_test, Y_test, verbose=0)

        print('json Test score:', score_json[0])
        print('json Test accuracy:', score_json[1])

        print("json %s: %.2f%%" %
              (loaded_model_json.metrics_names[1], score_json[1] * 100))

        # load weights into new model
        loaded_model_yaml.load_weights('{}/{}.weights.h5'.format(
            output_dir, model_name))
        print("Loaded yaml model from disk")

        # evaluate loaded model on test data
        loaded_model_yaml.compile(loss=gParameters['loss'],
                                  optimizer=gParameters['optimizer'],
                                  metrics=[gParameters['metrics']])
        score_yaml = loaded_model_yaml.evaluate(X_test, Y_test, verbose=0)

        print('yaml Test score:', score_yaml[0])
        print('yaml Test accuracy:', score_yaml[1])

        print("yaml %s: %.2f%%" %
              (loaded_model_yaml.metrics_names[1], score_yaml[1] * 100))

    return history
Exemple #19
0
def run(params):
    args = candle.ArgumentStruct(**params)
    seed = args.rng_seed
    candle.set_seed(seed)

    # Construct extension to save model
    ext = adrp.extension_from_parameters(params, ".keras")
    candle.verify_path(params["save_path"])
    prefix = "{}{}".format(params["save_path"], ext)
    logfile = params["logfile"] if params["logfile"] else prefix + ".log"
    candle.set_up_logger(logfile, adrp.logger, params["verbose"])
    adrp.logger.info("Params: {}".format(params))

    # Get default parameters for initialization and optimizer functions
    keras_defaults = candle.keras_default_config()

    ##
    X_train, Y_train, X_test, Y_test, PS = adrp.load_data(params, seed)

    print("X_train shape:", X_train.shape)
    print("X_test shape:", X_test.shape)

    print("Y_train shape:", Y_train.shape)
    print("Y_test shape:", Y_test.shape)

    # Initialize weights and learning rule
    initializer_weights = candle.build_initializer(params["initialization"],
                                                   keras_defaults, seed)
    initializer_bias = candle.build_initializer("constant", keras_defaults,
                                                0.0)

    activation = params["activation"]
    # TODO: set output_dim
    output_dim = 1

    # TODO: Use dense_layers for creating inputs/outputs
    dense_layers = params["dense"]

    inputs = Input(shape=(PS, ))

    if dense_layers != None:
        if type(dense_layers) != list:
            dense_layers = list(dense_layers)
        for i, l in enumerate(dense_layers):
            if i == 0:
                x = Dense(
                    l,
                    activation=activation,
                    kernel_initializer=initializer_weights,
                    bias_initializer=initializer_bias,
                )(inputs)
            else:
                x = Dense(
                    l,
                    activation=activation,
                    kernel_initializer=initializer_weights,
                    bias_initializer=initializer_bias,
                )(x)
            if params["dropout"]:
                x = Dropout(params["dropout"])(x)
        output = Dense(
            output_dim,
            activation=activation,
            kernel_initializer=initializer_weights,
            bias_initializer=initializer_bias,
        )(x)
    else:
        output = Dense(
            output_dim,
            activation=activation,
            kernel_initializer=initializer_weights,
            bias_initializer=initializer_bias,
        )(inputs)

    model = Model(inputs=inputs, outputs=output)

    model.summary()

    kerasDefaults = candle.keras_default_config()
    if params["momentum"]:
        kerasDefaults["momentum_sgd"] = params["momentum"]

    optimizer = candle.build_optimizer(params["optimizer"],
                                       params["learning_rate"], kerasDefaults)

    model.compile(
        loss=params["loss"],
        optimizer=optimizer,
        metrics=["mae", r2],
    )

    # set up a bunch of callbacks to do work during model training..

    checkpointer = ModelCheckpoint(
        filepath=params["save_path"] + "agg_adrp.autosave.model.h5",
        verbose=1,
        save_weights_only=False,
        save_best_only=True,
    )
    csv_logger = CSVLogger(params["save_path"] + "agg_adrp.training.log")
    reduce_lr = ReduceLROnPlateau(
        monitor="val_loss",
        factor=0.75,
        patience=20,
        mode="auto",
        epsilon=0.0001,
        cooldown=3,
        min_lr=0.000000001,
    )
    early_stop = EarlyStopping(monitor="val_loss",
                               patience=100,
                               verbose=1,
                               mode="auto")

    # history = parallel_model.fit(X_train, Y_train,
    epochs = params["epochs"]
    batch_size = params["batch_size"]

    history = model.fit(
        X_train,
        Y_train,
        batch_size=batch_size,
        epochs=epochs,
        verbose=1,
        validation_data=(X_test, Y_test),
        callbacks=[checkpointer, csv_logger, reduce_lr, early_stop],
    )

    score = model.evaluate(X_test, Y_test, verbose=0)

    print(score)

    print(history.history.keys())

    # see big fuction below, creates plots etc.
    # TODO: Break post_process into multiple functions
    post_process(params, X_train, X_test, Y_test, score, history, model)

    adrp.logger.handlers = []

    return history