Esempio n. 1
0
def test_model_shape(config):
    """Test that the trained model size is expected based on given parameters."""
    storage_adapter = SomStorageAdapter(config=config, feedback_strategy=None)
    model_adapter = SomModelAdapter(storage_adapter=storage_adapter)
    tc = SomTrainJob(node_map=2, model_adapter=model_adapter)
    result, dist = tc.execute()
    assert model_adapter.model.model.shape[0:2] == (2, 2)
Esempio n. 2
0
def test_output_length(config):
    """Test that correct number of outputs are generated with Hadoop_2k.json."""
    storage_adapter = SomStorageAdapter(config=config, feedback_strategy=None)
    model_adapter = SomModelAdapter(storage_adapter=storage_adapter)
    tc = SomTrainJob(node_map=2, model_adapter=model_adapter)
    result, dist = tc.execute()
    assert len(dist) == 2000
Esempio n. 3
0
def test_output_values(config):
    """Test that all distance values in training set are less than or equal to 1 on Hadoop_2k.json."""
    storage_adapter = SomStorageAdapter(config=config, feedback_strategy=None)
    model_adapter = SomModelAdapter(storage_adapter=storage_adapter)
    tc = SomTrainJob(node_map=2, model_adapter=model_adapter)
    result, dist = tc.execute()
    assert sum(dist) <= 2000
Esempio n. 4
0
def test_vocab_length(config):
    """Check length of processed vocab on on Hadoop_2k.json."""
    storage_adapter = SomStorageAdapter(config=config, feedback_strategy=None)
    model_adapter = SomModelAdapter(storage_adapter=storage_adapter)
    tc = SomTrainJob(node_map=2, model_adapter=model_adapter)
    result, dist = tc.execute()

    assert len(model_adapter.w2v_model.model["message"].wv.vocab) == 141
Esempio n. 5
0
def test_loss_value(config):
    """Check the loss value is not greater then during testing."""
    storage_adapter = SomStorageAdapter(config=config, feedback_strategy=None)
    model_adapter = SomModelAdapter(storage_adapter=storage_adapter)
    tc = SomTrainJob(node_map=2, model_adapter=model_adapter)
    result, dist = tc.execute()
    print(model_adapter.w2v_model.model["message"].get_latest_training_loss())
    tl = model_adapter.w2v_model.model["message"].get_latest_training_loss()
    assert tl < 320000.0
Esempio n. 6
0
def test_log_similarity(config):
    """Check that two words have consistent similar logs after training."""
    storage_adapter = SomStorageAdapter(config=config, feedback_strategy=None)
    model_adapter = SomModelAdapter(storage_adapter=storage_adapter)
    tc = SomTrainJob(node_map=2, model_adapter=model_adapter)
    result, dist = tc.execute()
    log_1 = 'INFOmainorgapachehadoopmapreducevappMRAppMasterExecutingwithtokens'
    answer_1 = 'INFOmainorgapachehadoopmapreducevappMRAppMasterCreatedMRAppMasterforapplicationappattempt'

    match_1 = [
        model_adapter.w2v_model.model["message"].wv.most_similar(log_1)[i][0]
        for i in range(3)
    ]
    assert answer_1 in match_1

    log_2 = 'ERRORRMCommunicatorAllocatororgapachehadoopmapreducevapprmRMContainerAllocatorERRORINCONTACTINGRM'
    answer_2 = 'WARNLeaseRenewermsrabimsrasaorgapachehadoophdfsLeaseRenewerFailedtorenewleaseforDFSClient' \
               'NONMAPREDUCEforsecondsWillretryshortly'
    match_2 = [
        model_adapter.w2v_model.model["message"].wv.most_similar(log_2)[i][0]
        for i in range(3)
    ]
    print(match_2[0])
    assert answer_2 in match_2
    def test_train_command(self):
        """Test case for validating that when we train a model and add it to task queue that it will run."""
        mgr = Pipeline()
        config = Configuration()
        config.STORAGE_DATASOURCE = "local"
        config.STORAGE_DATASINK = "stdout"
        config.LS_INPUT_PATH = "validation_data/Hadoop_2k.json"
        storage_adapter = SomStorageAdapter(config=config,
                                            feedback_strategy=None)
        model_adapter = SomModelAdapter(storage_adapter)
        tc = SomTrainJob(node_map=2, model_adapter=model_adapter)

        mgr.add_steps(tc)
        self.assertEqual(len(mgr), TASKS_IN_QUEUE)
        self.assertNotEqual(mgr.count, TASKS_IN_QUEUE)
        mgr.execute_steps()
        self.assertEqual(mgr.count, TASKS_IN_QUEUE)
        mgr.clear()