Python read_data Examples, modules.utils.read_data Python Examples

Example #1

0

Show file

File: main.py Project: jumo98/pymatcher

def main():
    # Set maximal threads to eight to avoid computation problems
    os.environ['NUMEXPR_MAX_THREADS'] = "8"

    # Configure logger
    logging.basicConfig(level=logging.DEBUG,
                        format='%(asctime)s - %(name)s - ' +
                        '%(levelname)s - %(message)s',
                        filename='applog.log',
                        filemode='w')

    # Get logger with name of module
    logger = logging.getLogger(__name__)
    logger.info("Starting program.")

    # Read config data
    cfg = utils.load_config(logger, "config/data.json")

    # Set constant paths to where data is stored
    # Information is retrieved from config file
    logger.info("Reading config file...")

    TRAIN_FILE = utils.read_string_from_config(logger=logger,
                                               cfg=cfg,
                                               key="trainFile")

    IDEAL_FILE = utils.read_string_from_config(logger=logger,
                                               cfg=cfg,
                                               key="idealFile")

    TEST_FILE = utils.read_string_from_config(logger=logger,
                                              cfg=cfg,
                                              key="testFile")

    TRAIN_PATH = os.path.join(os.path.dirname(__file__), TRAIN_FILE)

    IDEAL_PATH = os.path.join(os.path.dirname(__file__), IDEAL_FILE)

    TEST_PATH = os.path.join(os.path.dirname(__file__), TEST_FILE)

    logger.info("Reading config file...Done")

    # Create database instance
    database = db.Database(cfg=cfg)

    # Save training data and ideal functions in db
    training_data = utils.read_data(logger=logger, path=TRAIN_PATH)
    ideal_data = utils.read_data(logger=logger, path=IDEAL_PATH)
    database.update_training_table(data=training_data)
    database.update_ideal_functions_table(data=ideal_data)

    # Initiate training process
    trainingData = data.TrainingData(training_data=training_data,
                                     ideal_data=ideal_data)
    training_result = trainingData.find_ideal_functions()

    # Initiate mapping process
    testData = data.TestData(ideal_data=ideal_data,
                             result_data=training_result,
                             test_filepath=TEST_PATH)
    test_result = testData.map_to_functions()

    # Save mapping results in db
    database.update_result_table(data=test_result)

    # Create plotter instance with results
    plotter = plot.Plotter(ideal_data=ideal_data,
                           training_data=training_data,
                           training_result=training_result,
                           deviations=trainingData.deviations(),
                           test_result=test_result)

    # Plot results
    plotter.plot_results()

    # Show results
    plotter.show_results()

    logger.info("Program has finished.")

Example #2

0

Show file

def _init_test_data_class():
    logger = logging.getLogger(__name__)
    ideal_data = utils.read_data(logger=logger, path=get_ideal_path())
    training_data = utils.read_data(logger=logger, path=get_train_path())
    return data.TrainingData(ideal_data=ideal_data,
                             training_data=training_data)

Example #3

0

Show file

File: test_ssa_detections.py Project: splunk/security_content

    def ssa_detection_test_main(self, spl, source, test_name, pass_condition,
                                test_id, sourcetype):
        self.execution_passed = True

        self.wait_time(SLEEP_TIME_CREATE_INDEX)

        check_ssa_spl = check_source_sink(spl)
        spl = manipulate_spl(self.api.env, spl, test_id)
        assert spl is not None, "fail to manipulate spl file"

        upl = self.api.compile_spl(spl)
        assert upl is not None, "failed to compile spl"

        validated_upl = self.api.validate_upl(upl)
        assert validated_upl is not None, "failed to validate upl"

        pipeline_id = self.api.create_pipeline(validated_upl)
        assert pipeline_id is not None, "failed to create a pipeline"

        _pipeline_status = self.api.pipeline_status(pipeline_id)
        assert _pipeline_status == "CREATED", f"Current status of pipeline {pipeline_id} should be CREATED"
        self.created_pipelines.append(pipeline_id)

        pipeline_activated = self.api.activate_pipeline(pipeline_id)
        assert pipeline_activated, f"pipeline {pipeline_id} should be activated."
        self.activated_pipelines.append(pipeline_id)

        self.wait_time(SLEEP_TIME_ACTIVATE_PIPELINE)

        if not check_ssa_spl:
            msg = f"Detection test successful for {test_name}"
            LOGGER.warning(
                f"Test not completed. Detection seems deprecated, and will not send messages to SSA"
            )
            self.test_results["msg"] = msg
            return self.test_results

        data = read_data(source, sourcetype)
        LOGGER.info("Sending (%d) events" % (len(data)))

        assert len(data) > 0, "No events to send, skip to next test."

        data_uploaded = self.api.ingest_data(data, sourcetype)
        assert data_uploaded, "Failed to upload test data"

        self.wait_time(SLEEP_TIME_SEND_DATA)

        search_results = False
        max_execution_time_reached = False

        while not (search_results or max_execution_time_reached):
            max_execution_time_reached = self.wait_time(WAIT_CYCLE)
            query = f"from indexes('detection_testing') | search test_id=\"{test_id}\" "
            LOGGER.info(f"Executing search query: {query}")
            sid = self.api.submit_search_job('mc', query)
            assert sid is not None, f"Failed to create a Search Job"

            job_finished = False
            while not job_finished:
                self.wait_time(WAIT_CYCLE)
                result = self.api.check_search_job_finished(sid)
                job_finished = result

            results = self.api.get_search_job_results(sid)
            search_results = (len(results) > 0)
            if not search_results:
                LOGGER.info(
                    f"Search didn't return any results. Retrying in {WAIT_CYCLE}s, max execution time left {self.max_execution_time}s"
                )

        if not results:
            LOGGER.warning("Search job didn't return any results")

        LOGGER.info('Received %s result(s)', len(results))
        test_passed = assert_results(pass_condition, results)
        assert test_passed, f"Pass condition {pass_condition} not satisfied"

        msg = f"Detection test successful for {test_name}"
        LOGGER.info(msg)
        self.test_results["msg"] = msg

        return self.test_results

Example #4

0

Show file

 def test_read_data(self):
     logger = logging.getLogger(__name__)
     utils.read_data(logger, "data/ideal.csv")

Example #5

0

Show file

    def test_read_data_wrong_fileformat(self):
        logger = logging.getLogger(__name__)

        with self.assertRaises(KeyError):
            utils.read_data(logger, "config/data.json")

Example #6

0

Show file

    def test_read_data_not_found(self):
        logger = logging.getLogger(__name__)

        with self.assertRaises(FileNotFoundError):
            utils.read_data(logger, "data/not_available.csv")