Beispiel #1
0
    def test_and_run_multiple_pipeline_jobs(self):

        with open("./test_pipeline_build_custom.json") as f:
            pipeline_structure = json.load(f)

        pipeline_name = "test custom pipeline"
        sys.path.insert(
            0, self.config["local_pipeline_import_path"][pipeline_name])

        pipeline_obj = pipeline.Pipeline(pipeline_name, self.connection,
                                         self.meta_data)
        pipeline_obj.load_steps_into_db(pipeline_structure)

        jobs_obj1 = pipeline.Jobs("Test custom job", self.connection,
                                  self.meta_data)
        jobs_obj1.create_jobs_to_run("test custom pipeline")

        jobs_obj1.run_job()

        with open("test_output_custom.json", "r") as f:
            output1 = json.load(f)

        jobs_obj2 = pipeline.Jobs("Test custom job", self.connection,
                                  self.meta_data)
        jobs_obj2.create_jobs_to_run("test custom pipeline")

        jobs_obj2.run_job()

        with open("test_output_custom.json", "r") as f:
            output2 = json.load(f)

        self.assertEqual(len(output1), len(output2))
Beispiel #2
0
    def test_create_and_run_rest_job(self):

        with open("./test_pipeline_build_rest.json") as f:
            pipeline_structure = json.load(f)

        pipeline_name = "test rest scoring pipeline"

        pipeline_obj = pipeline.Pipeline(pipeline_name, self.connection,
                                         self.meta_data)
        pipeline_obj.load_steps_into_db(pipeline_structure)

        jobs_obj = pipeline.Jobs("Test rest scoring job", self.connection,
                                 self.meta_data)
        jobs_obj.create_jobs_to_run("test rest scoring pipeline")

        jobs_obj.run_job()
Beispiel #3
0
    def test_create_and_run_custom_job(self):

        with open("./test_pipeline_build_custom.json") as f:
            pipeline_structure = json.load(f)

        pipeline_name = "test custom pipeline"
        sys.path.insert(
            0, self.config["local_pipeline_import_path"][pipeline_name])

        pipeline_obj = pipeline.Pipeline(pipeline_name, self.connection,
                                         self.meta_data)
        pipeline_obj.load_steps_into_db(pipeline_structure)

        jobs_obj = pipeline.Jobs("Test custom job", self.connection,
                                 self.meta_data)
        jobs_obj.create_jobs_to_run("test custom pipeline")

        jobs_obj.run_job()
Beispiel #4
0
    def test_create_and_run_jobs(self):

        with open("./test_pipeline_build.json") as f:
            pipeline_structure = json.load(f)

        pipeline_obj = pipeline.Pipeline("test pipeline", self.connection,
                                         self.meta_data)
        pipeline_obj.load_steps_into_db(pipeline_structure)

        jobs_obj = pipeline.Jobs("Test job", self.connection, self.meta_data)
        jobs_obj.create_jobs_to_run("test pipeline")

        jobs_obj.run_job()

        with open("./test_output.json") as f:
            pipeline_results = json.load(f)

        self.assertEquals(2, len(pipeline_results))
    def test_open_scoring_pipeline(self):

        with open("./test_pipeline_build_openscoring.json") as f:
            pipeline_structure = json.load(f)

        pipeline_name = "test open_scoring"

        pipeline_obj = pipeline.Pipeline(pipeline_name, self.connection,
                                         self.meta_data)
        pipeline_obj.load_steps_into_db(pipeline_structure)

        jobs_obj = pipeline.Jobs("Test open_scoring job", self.connection,
                                 self.meta_data)
        jobs_obj.create_jobs_to_run("test open_scoring")

        jobs_obj.run_job()

        #TODO: Write test

        self.assertEqual(True, False)
Beispiel #6
0
    def test_and_run_pipeline_with_load_from_db(self):

        sqlite_file_name = os.path.join(os.path.curdir, "files", "test.db3")

        if os.path.exists(sqlite_file_name):
            os.remove(sqlite_file_name)

        import sqlalchemy as sa

        engine = sa.create_engine("sqlite:///./files/test.db3")
        connection = engine.connect()

        query_string = """
                CREATE TABLE test_summary_file
                (eid INTEGER, person_id VARCHAR(255), month_of_birth VARCHAR(255), year_of_birth VARCHAR(255),
                admit_date DATEIME,discharge_date DATETIME,drg VARCHAR(255), "group" VARCHAR(255))
                ;
        INSERT INTO test_summary_file
        (eid,person_id,month_of_birth,year_of_birth,admit_date,discharge_date,drg, "group")
          VALUES
          (1000,'100x','09','1990','2014-01-01','2014-01-02','701','1')
          ;

        INSERT INTO test_summary_file
        (eid,person_id,month_of_birth,year_of_birth,admit_date,discharge_date,drg,"group") 
        VALUES  
          (2000, 'x200','10','1980', '2015-02-01', '2015-01-31','', 2)
          ;

        CREATE TABLE test_summary_dx_list (
          eid INTEGER, seq_id INTEGER, poa VARCHAR(255), code VARCHAR(255)
          );

        INSERT INTO test_summary_dx_list (eid,seq_id,poa,code) VALUES 
          (1000,1,'1','N10');

        INSERT INTO test_summary_dx_list (eid,seq_id,poa,code) VALUES   
          (1000,2,'1','E119');

        INSERT INTO test_summary_dx_list (eid,seq_id,poa,code) VALUES  
          (1000,3,'0','K219');

                """

        for statement in query_string.split(";"):
            connection.execute(statement)

        connection.close()

        with open("./test_pipeline_build_from_db.json") as f:
            pipeline_structure = json.load(f)

        pipeline_name = "test loading from db"
        sys.path.insert(
            0, self.config["local_pipeline_import_path"][pipeline_name])

        pipeline_obj = pipeline.Pipeline(pipeline_name, self.connection,
                                         self.meta_data)
        pipeline_obj.load_steps_into_db(pipeline_structure)

        jobs_obj = pipeline.Jobs("Test custom job",
                                 self.connection,
                                 self.meta_data,
                                 external_data_connections_dict=self.
                                 config["external_data_connections"])
        jobs_obj.create_jobs_to_run("test loading from db")

        jobs_obj.run_job()

        with open("test_output_custom.json", "r") as f:
            output = json.load(f)

        self.assertEqual(2, len(output))
Beispiel #7
0
    def test_archive_data_transformations(self):

        with open("./test_pipeline_build.json") as f:
            pipeline_structure = json.load(f)

        pipeline_obj_1 = pipeline.Pipeline("test pipeline", self.connection,
                                           self.meta_data)
        pipeline_obj_1.load_steps_into_db(pipeline_structure)

        jobs_obj_1 = pipeline.Jobs("Test job", self.connection, self.meta_data)
        jobs_obj_1.create_jobs_to_run("test pipeline")

        jobs_obj_1.run_job()

        num_dts_1 = len(
            list(
                self.connection.execute(
                    "select * from %s.data_transformations dts" %
                    (self.meta_data.schema, ))))

        ap_obj = pipeline.ArchivePipeline("test pipeline", self.connection,
                                          self.meta_data)
        ap_obj.archive_steps()

        num_dts_2 = len(
            list(
                self.connection.execute(
                    "select * from %s.data_transformations dts" %
                    (self.meta_data.schema, ))))

        self.assertTrue(num_dts_1 > 0)
        self.assertEqual(0, num_dts_2)

        num_adts_2 = len(
            list(
                self.connection.execute(
                    "select * from %s.archived_data_transformations dts" %
                    (self.meta_data.schema, ))))

        self.assertEquals(num_dts_1, num_adts_2)

        jobs_obj_2 = pipeline.Jobs("Test job 2", self.connection,
                                   self.meta_data)
        jobs_obj_2.create_jobs_to_run("test pipeline")
        jobs_obj_2.run_job()

        ap_obj2 = pipeline.ArchivePipeline("test pipeline", self.connection,
                                           self.meta_data)
        ap_obj2.archive_steps(steps=[8])

        num_dts_3 = len(
            list(
                self.connection.execute(
                    "select * from %s.data_transformations dts" %
                    (self.meta_data.schema, ))))
        self.assertEquals(0, num_dts_3)

        num_adts_3 = len(
            list(
                self.connection.execute(
                    "select * from %s.archived_data_transformations dts" %
                    (self.meta_data.schema, ))))

        self.assertEquals(15, num_adts_3)