コード例 #1
0
ファイル: test_model.py プロジェクト: fhausmann/deepgrp
def test_create_model(rnn):
    got = model.create_model(model.Options(attention=True,
                                           rnn=rnn)).get_config()
    got = json.loads(json.dumps(got))
    tfversion = version.parse(tf.__version__)
    with pathlib.Path(__file__).with_suffix('.json').open('r') as file:
        expected = json.load(file)[f"{tfversion.major}.{tfversion.minor}"]
    assert got == expected[rnn]
コード例 #2
0
ファイル: __main__.py プロジェクト: fhausmann/deepgrp
 def run(self):
     """Run the command provided by the user."""
     options = dgmodel.Options(
         min_mss_len=self.args.min_mss_length,
         batch_size=self.args.batch_size,
         xdrop_len=self.args.xdrop_length,
     )
     getattr(self, self.args.command)(self.args, options)
コード例 #3
0
def test_update_options(testcase, expected):
    opt = dgmodel.Options()
    for key, value in testcase.items():
        try:
            assert opt[key] != expected[key]
            assert opt[key] != value
        except KeyError:
            pass
    got = dgopt._update_options(opt, testcase)  # pylint: disable=protected-access
    for key, value in expected.items():
        assert got[key] == value
コード例 #4
0
ファイル: test_model.py プロジェクト: fhausmann/deepgrp
def test_get_opimizer(optimizer, rho, momentum, epsilon, learning_rate):
    opt = model.Options(optimizer=optimizer,
                        rho=rho,
                        learning_rate=learning_rate,
                        momentum=momentum,
                        epsilon=epsilon)
    got = model._get_optimizer(opt)  # pylint: disable=protected-access
    if optimizer not in ('Adam', 'RMSprop'):
        assert got == optimizer
        return
    assert isinstance(got, tf.keras.optimizers.Optimizer)
    got = got.get_config()
    assert got["learning_rate"] == learning_rate
    assert got["epsilon"] == epsilon
    if optimizer == 'RMSprob':
        assert got["rho"] == rho
        assert got["momentum"] == "momentum"
    elif optimizer == "Adam":
        assert got["beta_2"] == rho
        assert got["beta_1"] == momentum
コード例 #5
0
def test_predict(monkeypatch, tmpdir, mss_bool):
    # helper functions:
    def dgpred_predict_dummy(model, data_iterator, output_shape, step_size):
        assert isinstance(model, tf.keras.Model)
        assert isinstance(data_iterator, tf.data.Dataset)
        assert isinstance(output_shape[0], int) and isinstance(
            output_shape[1], int)
        assert step_size == 3
        return np.ones((100, 5))

    def apply_mss_dummy(prediction, options):
        assert isinstance(prediction, np.ndarray)
        assert isinstance(options, dgmodel.Options)
        assert mss_bool
        return np.ones((100, 5))

    def softmax_dummy(prediction):
        assert isinstance(prediction, np.ndarray)
        assert not mss_bool
        return np.ones((100, 5))

    monkeypatch.setattr(dgpred, "predict", dgpred_predict_dummy)
    monkeypatch.setattr(dgpred, "apply_mss", apply_mss_dummy)
    monkeypatch.setattr(dgpred, "softmax", softmax_dummy)

    # variables to give for testing
    opt = dgmodel.Options(project_root_dir=str(tmpdir),
                          n_batches=1,
                          n_epochs=1,
                          batch_size=10,
                          vecsize=10)
    dnasequence = "".join(
        np.random.choice(["N", "A", "C", "G", "T"], size=(100)))
    model = dgmodel.create_model(opt)
    dgparser._predict(  # pylint: disable=protected-access
        dnasequence=dnasequence,
        model=model,
        options=opt,
        step_size=3,
        use_mss=mss_bool)
コード例 #6
0
ファイル: test_model.py プロジェクト: fhausmann/deepgrp
def test_get_brnn_layer(rnn, units, dropout, attention):
    opt = model.Options(rnn=rnn,
                        units=units,
                        dropout=dropout,
                        attention=attention)
    got = model._get_brnn_layer(opt)  # pylint: disable=protected-access
    assert isinstance(got, tf.keras.layers.RNN)
    if rnn == "LSTM":
        assert got.name == "BLSTM"
    else:
        assert got.name == "BGRU"
    output = got(tf.zeros((10, 6, 2)))
    if attention and not rnn == 'LSTM':
        assert len(output) == 2
        assert output[0].shape == (10, 6, units)
        assert output[1].shape == (10, units)
    else:
        assert output.shape == (10, 6, units)
    got = got.get_config()
    assert got["dropout"] == dropout
    assert got["return_sequences"]
    assert got["return_state"] == (attention if rnn != "LSTM" else False)
コード例 #7
0
def test_build_and_optimize(tmpdir, monkeypatch, is_failed):

    opt = dgmodel.Options(project_root_dir=str(tmpdir),
                          n_batches=1,
                          n_epochs=1,
                          batch_size=10,
                          vecsize=10)
    train = dgprep.Data(fwd=np.zeros((5, 100)), truelbl=np.zeros((5, 100)))
    valid = dgprep.Data(fwd=np.zeros((5, 100)), truelbl=np.zeros((5, 100)))
    optdict = {"test": 10}
    mcc = np.nan if is_failed == "loss" else 1

    monkeypatch.setattr(dgpred, "calculate_metrics", lambda *_, **unused:
                        (None, {
                            "MCC": mcc
                        }))
    monkeypatch.setattr(dgpred, "predict_complete",
                        lambda *_, **unused: np.ones((100, 5)))
    monkeypatch.setattr(tf.keras.Model, "fit", lambda *_, **unused: None)
    if is_failed == "exception":

        def _func(*_, **unused):
            raise ValueError
    else:
        _func = lambda *_, **unused: None
    monkeypatch.setattr(dgpred, "filter_segments", _func)
    got = dgopt.build_and_optimize(train_data=train,
                                   val_data=valid,
                                   step_size=10,
                                   options=opt,
                                   options_dict=optdict)

    print(got["Metrics"])
    expected = hyperopt.STATUS_OK if is_failed == "no" else hyperopt.STATUS_FAIL
    assert got["status"] == expected
    expected = -1 if is_failed == "no" else np.inf
    assert got["loss"] == expected
コード例 #8
0
    def test_predict(self, monkeypatch, dummyfasta):

        fastafile, expected_sequence = dummyfasta(1)
        sys.argv = ["deepgrp", "predict", "model.hdf5", str(fastafile)]
        dgparsclass = dgparser.CommandLineParser().parse_args()

        #helper funcions
        def load_model_dummy(model, custom_objects):
            assert model == "model.hdf5"
            assert isinstance(custom_objects, dict)
            #assert isinstance(custom_objects["ReverseComplement"], dgmodel.ReverseComplement)
            # create random dummy model
            inputs = tf.keras.Input(shape=(3, ))
            x = tf.keras.layers.Dense(4, activation=tf.nn.relu)(inputs)
            outputs = tf.keras.layers.Dense(5, activation=tf.nn.softmax)(x)
            model = tf.keras.Model(inputs=inputs, outputs=outputs)
            return model

        def _predict_dummy(dnasequence, model, options, step_size, use_mss):
            assert dnasequence == expected_sequence["chr1"]
            assert isinstance(model, tf.keras.Model)
            assert isinstance(options, dgmodel.Options)
            assert isinstance(step_size, int)
            assert isinstance(use_mss, bool)
            return np.random.randint(low=0, high=1000, size=(100), dtype=np.long), \
                    np.random.randint(10)

        #monkeypatching
        monkeypatch.setattr(tf.keras.models, "load_model", load_model_dummy)
        monkeypatch.setattr(dgparser, "_predict", _predict_dummy)
        # create dummy opt
        opt = dgmodel.Options(project_root_dir=str(fastafile.parent),
                              n_batches=1,
                              n_epochs=1,
                              batch_size=10,
                              vecsize=10)
        dgparsclass.predict(dgparsclass.args, opt)
コード例 #9
0
    def test_train(self, monkeypatch, tmp_path):
        # create dummy parameter file
        dummy_dir = tmp_path.joinpath("dummy_files")
        dummy_logdir = tmp_path.joinpath("dummy_logdir")
        dummy_dir.mkdir()
        dummy_logdir.mkdir()
        parameter = dummy_dir.joinpath("parameter.toml")
        with parameter.open("w") as file:
            toml.dump({"dummy": "parameter"}, file)
        # create training and validfile
        trainfile = dummy_dir.joinpath("chr1.fa.gz.npz")
        validfile = dummy_dir.joinpath("chr1.fa.npz")
        fwd = np.zeros((5, 100))
        np.savez(trainfile, fwd=fwd)
        np.savez(validfile, fwd=fwd)
        # create dummy parser
        sys.argv = [
            "deepgrp", "train",
            str(parameter),
            str(trainfile),
            str(validfile), "bedfile", "--logdir",
            str(dummy_logdir), "--modelfile", "model.hdf5"
        ]
        dgparsclass = dgparser.CommandLineParser().parse_args()

        # helper functions
        def get_toml_dummy(file):
            assert pathlib.Path(file.name) == parameter
            return dgmodel.Options()

        def from_dict_dummy(_, dictionary):
            assert isinstance(dictionary, dict)
            return dgmodel.Options()

        def preprocess_y_dummy(filename, chromosom, length, repeats_to_search):
            assert isinstance(filename, str)
            assert filename == "bedfile"
            assert chromosom == "chr1"
            assert length == 100
            assert isinstance(repeats_to_search, list)
            return "np.ndarray_PREPROCESS_Y"

        def drop_start_end_n_dummy(fwd, array):
            assert isinstance(fwd, np.ndarray)
            assert array == "np.ndarray_PREPROCESS_Y"
            return np.zeros((5, 100)), np.zeros((5, 100))

        def create_model_dummy(parameter):
            assert isinstance(parameter, dgmodel.Options)
            # create random dummy model
            inputs = tf.keras.Input(shape=(3, ))
            x = tf.keras.layers.Dense(4, activation=tf.nn.relu)(inputs)
            outputs = tf.keras.layers.Dense(5, activation=tf.nn.softmax)(x)
            model = tf.keras.Model(inputs=inputs, outputs=outputs)
            return model

        def training_dummy(data, options, model, logdir):
            assert pathlib.Path(logdir) == dummy_logdir
            assert isinstance(model, tf.keras.models.Model)
            assert isinstance(data[0], dgpreprocess.Data) and \
                    isinstance(data[1], dgpreprocess.Data)
            assert isinstance(options, dgmodel.Options)

        def model_save_dummy(self, filename):
            assert isinstance(self, tf.keras.Model)
            assert filename == "model.hdf5"

        #Overwriting functions
        monkeypatch.setattr(dgmodel.Options, "from_toml", get_toml_dummy)
        monkeypatch.setattr(dgmodel.Options, "fromdict", from_dict_dummy)

        #overwrite preprocessing functions
        monkeypatch.setattr(dgpreprocess, "preprocess_y", preprocess_y_dummy)
        monkeypatch.setattr(dgpreprocess, "drop_start_end_n",
                            drop_start_end_n_dummy)

        # Training
        monkeypatch.setattr(dgmodel, "create_model", create_model_dummy)
        monkeypatch.setattr(dgtrain, "training", training_dummy)
        monkeypatch.setattr(tf.keras.Model, "save", model_save_dummy)
        dgparsclass.train(dgparsclass.args, dgmodel.Options())
コード例 #10
0
 def from_dict_dummy(_, dictionary):
     assert isinstance(dictionary, dict)
     return dgmodel.Options()
コード例 #11
0
 def get_toml_dummy(file):
     assert pathlib.Path(file.name) == parameter
     return dgmodel.Options()
コード例 #12
0
ファイル: test_model.py プロジェクト: fhausmann/deepgrp
def test_create_logdir(tmp_path):
    opt = model.Options(project_root_dir=(str(tmp_path)))
    got = model.create_logdir(opt)
    assert got == str(tmp_path.joinpath("tf_logs", "run-19900305121503"))
コード例 #13
0
ファイル: test_model.py プロジェクト: fhausmann/deepgrp
 def test_fromdict(self, init_args, expected):
     got = model.Options()
     got.fromdict(init_args)
     for attribute, value in expected.items():
         assert getattr(got, attribute) == value