def test_create_model(rnn): got = model.create_model(model.Options(attention=True, rnn=rnn)).get_config() got = json.loads(json.dumps(got)) tfversion = version.parse(tf.__version__) with pathlib.Path(__file__).with_suffix('.json').open('r') as file: expected = json.load(file)[f"{tfversion.major}.{tfversion.minor}"] assert got == expected[rnn]
def run(self): """Run the command provided by the user.""" options = dgmodel.Options( min_mss_len=self.args.min_mss_length, batch_size=self.args.batch_size, xdrop_len=self.args.xdrop_length, ) getattr(self, self.args.command)(self.args, options)
def test_update_options(testcase, expected): opt = dgmodel.Options() for key, value in testcase.items(): try: assert opt[key] != expected[key] assert opt[key] != value except KeyError: pass got = dgopt._update_options(opt, testcase) # pylint: disable=protected-access for key, value in expected.items(): assert got[key] == value
def test_get_opimizer(optimizer, rho, momentum, epsilon, learning_rate): opt = model.Options(optimizer=optimizer, rho=rho, learning_rate=learning_rate, momentum=momentum, epsilon=epsilon) got = model._get_optimizer(opt) # pylint: disable=protected-access if optimizer not in ('Adam', 'RMSprop'): assert got == optimizer return assert isinstance(got, tf.keras.optimizers.Optimizer) got = got.get_config() assert got["learning_rate"] == learning_rate assert got["epsilon"] == epsilon if optimizer == 'RMSprob': assert got["rho"] == rho assert got["momentum"] == "momentum" elif optimizer == "Adam": assert got["beta_2"] == rho assert got["beta_1"] == momentum
def test_predict(monkeypatch, tmpdir, mss_bool): # helper functions: def dgpred_predict_dummy(model, data_iterator, output_shape, step_size): assert isinstance(model, tf.keras.Model) assert isinstance(data_iterator, tf.data.Dataset) assert isinstance(output_shape[0], int) and isinstance( output_shape[1], int) assert step_size == 3 return np.ones((100, 5)) def apply_mss_dummy(prediction, options): assert isinstance(prediction, np.ndarray) assert isinstance(options, dgmodel.Options) assert mss_bool return np.ones((100, 5)) def softmax_dummy(prediction): assert isinstance(prediction, np.ndarray) assert not mss_bool return np.ones((100, 5)) monkeypatch.setattr(dgpred, "predict", dgpred_predict_dummy) monkeypatch.setattr(dgpred, "apply_mss", apply_mss_dummy) monkeypatch.setattr(dgpred, "softmax", softmax_dummy) # variables to give for testing opt = dgmodel.Options(project_root_dir=str(tmpdir), n_batches=1, n_epochs=1, batch_size=10, vecsize=10) dnasequence = "".join( np.random.choice(["N", "A", "C", "G", "T"], size=(100))) model = dgmodel.create_model(opt) dgparser._predict( # pylint: disable=protected-access dnasequence=dnasequence, model=model, options=opt, step_size=3, use_mss=mss_bool)
def test_get_brnn_layer(rnn, units, dropout, attention): opt = model.Options(rnn=rnn, units=units, dropout=dropout, attention=attention) got = model._get_brnn_layer(opt) # pylint: disable=protected-access assert isinstance(got, tf.keras.layers.RNN) if rnn == "LSTM": assert got.name == "BLSTM" else: assert got.name == "BGRU" output = got(tf.zeros((10, 6, 2))) if attention and not rnn == 'LSTM': assert len(output) == 2 assert output[0].shape == (10, 6, units) assert output[1].shape == (10, units) else: assert output.shape == (10, 6, units) got = got.get_config() assert got["dropout"] == dropout assert got["return_sequences"] assert got["return_state"] == (attention if rnn != "LSTM" else False)
def test_build_and_optimize(tmpdir, monkeypatch, is_failed): opt = dgmodel.Options(project_root_dir=str(tmpdir), n_batches=1, n_epochs=1, batch_size=10, vecsize=10) train = dgprep.Data(fwd=np.zeros((5, 100)), truelbl=np.zeros((5, 100))) valid = dgprep.Data(fwd=np.zeros((5, 100)), truelbl=np.zeros((5, 100))) optdict = {"test": 10} mcc = np.nan if is_failed == "loss" else 1 monkeypatch.setattr(dgpred, "calculate_metrics", lambda *_, **unused: (None, { "MCC": mcc })) monkeypatch.setattr(dgpred, "predict_complete", lambda *_, **unused: np.ones((100, 5))) monkeypatch.setattr(tf.keras.Model, "fit", lambda *_, **unused: None) if is_failed == "exception": def _func(*_, **unused): raise ValueError else: _func = lambda *_, **unused: None monkeypatch.setattr(dgpred, "filter_segments", _func) got = dgopt.build_and_optimize(train_data=train, val_data=valid, step_size=10, options=opt, options_dict=optdict) print(got["Metrics"]) expected = hyperopt.STATUS_OK if is_failed == "no" else hyperopt.STATUS_FAIL assert got["status"] == expected expected = -1 if is_failed == "no" else np.inf assert got["loss"] == expected
def test_predict(self, monkeypatch, dummyfasta): fastafile, expected_sequence = dummyfasta(1) sys.argv = ["deepgrp", "predict", "model.hdf5", str(fastafile)] dgparsclass = dgparser.CommandLineParser().parse_args() #helper funcions def load_model_dummy(model, custom_objects): assert model == "model.hdf5" assert isinstance(custom_objects, dict) #assert isinstance(custom_objects["ReverseComplement"], dgmodel.ReverseComplement) # create random dummy model inputs = tf.keras.Input(shape=(3, )) x = tf.keras.layers.Dense(4, activation=tf.nn.relu)(inputs) outputs = tf.keras.layers.Dense(5, activation=tf.nn.softmax)(x) model = tf.keras.Model(inputs=inputs, outputs=outputs) return model def _predict_dummy(dnasequence, model, options, step_size, use_mss): assert dnasequence == expected_sequence["chr1"] assert isinstance(model, tf.keras.Model) assert isinstance(options, dgmodel.Options) assert isinstance(step_size, int) assert isinstance(use_mss, bool) return np.random.randint(low=0, high=1000, size=(100), dtype=np.long), \ np.random.randint(10) #monkeypatching monkeypatch.setattr(tf.keras.models, "load_model", load_model_dummy) monkeypatch.setattr(dgparser, "_predict", _predict_dummy) # create dummy opt opt = dgmodel.Options(project_root_dir=str(fastafile.parent), n_batches=1, n_epochs=1, batch_size=10, vecsize=10) dgparsclass.predict(dgparsclass.args, opt)
def test_train(self, monkeypatch, tmp_path): # create dummy parameter file dummy_dir = tmp_path.joinpath("dummy_files") dummy_logdir = tmp_path.joinpath("dummy_logdir") dummy_dir.mkdir() dummy_logdir.mkdir() parameter = dummy_dir.joinpath("parameter.toml") with parameter.open("w") as file: toml.dump({"dummy": "parameter"}, file) # create training and validfile trainfile = dummy_dir.joinpath("chr1.fa.gz.npz") validfile = dummy_dir.joinpath("chr1.fa.npz") fwd = np.zeros((5, 100)) np.savez(trainfile, fwd=fwd) np.savez(validfile, fwd=fwd) # create dummy parser sys.argv = [ "deepgrp", "train", str(parameter), str(trainfile), str(validfile), "bedfile", "--logdir", str(dummy_logdir), "--modelfile", "model.hdf5" ] dgparsclass = dgparser.CommandLineParser().parse_args() # helper functions def get_toml_dummy(file): assert pathlib.Path(file.name) == parameter return dgmodel.Options() def from_dict_dummy(_, dictionary): assert isinstance(dictionary, dict) return dgmodel.Options() def preprocess_y_dummy(filename, chromosom, length, repeats_to_search): assert isinstance(filename, str) assert filename == "bedfile" assert chromosom == "chr1" assert length == 100 assert isinstance(repeats_to_search, list) return "np.ndarray_PREPROCESS_Y" def drop_start_end_n_dummy(fwd, array): assert isinstance(fwd, np.ndarray) assert array == "np.ndarray_PREPROCESS_Y" return np.zeros((5, 100)), np.zeros((5, 100)) def create_model_dummy(parameter): assert isinstance(parameter, dgmodel.Options) # create random dummy model inputs = tf.keras.Input(shape=(3, )) x = tf.keras.layers.Dense(4, activation=tf.nn.relu)(inputs) outputs = tf.keras.layers.Dense(5, activation=tf.nn.softmax)(x) model = tf.keras.Model(inputs=inputs, outputs=outputs) return model def training_dummy(data, options, model, logdir): assert pathlib.Path(logdir) == dummy_logdir assert isinstance(model, tf.keras.models.Model) assert isinstance(data[0], dgpreprocess.Data) and \ isinstance(data[1], dgpreprocess.Data) assert isinstance(options, dgmodel.Options) def model_save_dummy(self, filename): assert isinstance(self, tf.keras.Model) assert filename == "model.hdf5" #Overwriting functions monkeypatch.setattr(dgmodel.Options, "from_toml", get_toml_dummy) monkeypatch.setattr(dgmodel.Options, "fromdict", from_dict_dummy) #overwrite preprocessing functions monkeypatch.setattr(dgpreprocess, "preprocess_y", preprocess_y_dummy) monkeypatch.setattr(dgpreprocess, "drop_start_end_n", drop_start_end_n_dummy) # Training monkeypatch.setattr(dgmodel, "create_model", create_model_dummy) monkeypatch.setattr(dgtrain, "training", training_dummy) monkeypatch.setattr(tf.keras.Model, "save", model_save_dummy) dgparsclass.train(dgparsclass.args, dgmodel.Options())
def from_dict_dummy(_, dictionary): assert isinstance(dictionary, dict) return dgmodel.Options()
def get_toml_dummy(file): assert pathlib.Path(file.name) == parameter return dgmodel.Options()
def test_create_logdir(tmp_path): opt = model.Options(project_root_dir=(str(tmp_path))) got = model.create_logdir(opt) assert got == str(tmp_path.joinpath("tf_logs", "run-19900305121503"))
def test_fromdict(self, init_args, expected): got = model.Options() got.fromdict(init_args) for attribute, value in expected.items(): assert getattr(got, attribute) == value