def test_localaveragepooling2D(tmpdir): os.environ['JANGGU_OUTPUT'] = tmpdir.strpath # some test data testin = np.ones((1, 10, 1, 3)) testin[:, :, :, 1] += 1 testin[:, :, :, 2] += 2 # test local average pooling lin = Input((10, 1, 3)) out = LocalAveragePooling2D(3)(lin) m = Janggu(lin, out) testout = m.predict(testin) np.testing.assert_equal(testout, testin[:, :8, :, :]) # more tests testin = np.ones((1, 3, 1, 2)) testin[:, 0, :, :] = 0 testin[:, 2, :, :] = 2 testin[:, :, :, 1] += 1 # test local average pooling lin = Input((3, 1, 2)) out = LocalAveragePooling2D(3)(lin) m = Janggu(lin, out) testout = m.predict(testin) np.testing.assert_equal(testout.shape, (1, 1, 1, 2)) np.testing.assert_equal(testout[0, 0, 0, 0], 1) np.testing.assert_equal(testout[0, 0, 0, 1], 2)
def objective(params): print(params) try: train_data = get_data(params) train_data, test = split_train_test(train_data, [test_chrom]) train, val = split_train_test(train_data, [params['val_chrom']]) # define a keras model only based on DNA K.clear_session() if params['inputs'] == 'epi_dna': dnam = Janggu.create_by_name('cage_promoters_dna_only') epim = Janggu.create_by_name('cage_promoters_epi_only') layer = Concatenate()([ dnam.kerasmodel.layers[-2].output, epim.kerasmodel.layers[-2].output ]) layer = Dense(1, name='geneexpr')(layer) model = Janggu([dnam.kerasmodel.input] + epim.kerasmodel.input, layer, name='cage_promoters_epi_dna') if not params['pretrained']: # This part randomly reinitializes the network # so that we can train it from scratch newjointmodel = model_from_json(model.kerasmodel.to_json()) newjointmodel = Janggu( newjointmodel.inputs, newjointmodel.outputs, name='cage_promoters_epi_dna_randominit') model = newjointmodel else: model = Janggu.create(get_model, params, train_data[0], train_data[1], name='cage_promoters_{}'.format( params['inputs'])) except ValueError: main_logger.exception('objective:') return {'status': 'fail'} model.compile(optimizer=get_opt(params['opt']), loss='mae', metrics=['mse']) hist = model.fit( train_data[0], train_data[1], epochs=params['epochs'], batch_size=64, validation_data=[params['val_chrom']], callbacks=[EarlyStopping(patience=5, restore_best_weights=True)]) print('#' * 40) for key in hist.history: print('{}: {}'.format(key, hist.history[key][-1])) print('#' * 40) pred_train = model.predict(train[0]) pred_val = model.predict(val[0]) pred_test = model.predict(test[0]) model.evaluate(train[0], train[1], callbacks=['var_explained', 'mse', 'mae', 'cor'], datatags=['train']) mae_val = model.evaluate(val[0], val[1], callbacks=['var_explained', 'mse', 'mae', 'cor'], datatags=['val']) mae_val = mae_val[0] model.evaluate(test[0], test[1], callbacks=['var_explained', 'mse', 'mae', 'cor'], datatags=['test']) cor_train = np.corrcoef(train[1][:][:, 0], pred_train[:, 0])[0, 1] cor_val = np.corrcoef(val[1][:][:, 0], pred_val[:, 0])[0, 1] cor_test = np.corrcoef(test[1][:][:, 0], pred_test[:, 0])[0, 1] model.summary() main_logger.info('cor [train/val/test]: {:.2f}/{:.2f}/{:.2f}'.format( cor_train, cor_val, cor_test)) return { 'loss': mae_val, 'status': 'ok', 'all_losses': hist.history, 'cor_train': cor_train, 'cor_val': cor_val, 'cor_test': cor_test, 'model_config': model.kerasmodel.to_json(), 'model_weights': model.kerasmodel.get_weights(), 'concrete_params': params }
newjointmodel.outputs, name='randominit_dnase_dna_joint_model_{}_{}'.format( dnasename, dnaname)) newjointmodel.compile(optimizer=get_opt('amsgrad'), loss='binary_crossentropy', metrics=['acc']) hist = newjointmodel.fit( train_data[0], train_data[1], epochs=shared_space['epochs'], batch_size=64, validation_data=val_data, callbacks=[EarlyStopping(patience=5, restore_best_weights=True)]) pred_test = newjointmodel.predict(test_data[0]) pred_val = newjointmodel.predict(val_data[0]) auprc_val = average_precision_score(val_data[1][:], pred_val) auprc_test = average_precision_score(test_data[1][:], pred_test) print('auprc_val: {:.2%}'.format(auprc_val)) print('auprc_test: {:.2%}'.format(auprc_test)) auprc_rand_val.append(auprc_val) auprc_rand_test.append(auprc_test) df = pd.DataFrame({'auprc_val': auprc_rand_val, 'auprc_test': auprc_rand_test}) df.to_csv(os.path.join(os.environ['JANGGU_OUTPUT'], "dnase_dna_use_randominit_submodels.tsv"), sep='\t')