def test_and_run_multiple_pipeline_jobs(self): with open("./test_pipeline_build_custom.json") as f: pipeline_structure = json.load(f) pipeline_name = "test custom pipeline" sys.path.insert( 0, self.config["local_pipeline_import_path"][pipeline_name]) pipeline_obj = pipeline.Pipeline(pipeline_name, self.connection, self.meta_data) pipeline_obj.load_steps_into_db(pipeline_structure) jobs_obj1 = pipeline.Jobs("Test custom job", self.connection, self.meta_data) jobs_obj1.create_jobs_to_run("test custom pipeline") jobs_obj1.run_job() with open("test_output_custom.json", "r") as f: output1 = json.load(f) jobs_obj2 = pipeline.Jobs("Test custom job", self.connection, self.meta_data) jobs_obj2.create_jobs_to_run("test custom pipeline") jobs_obj2.run_job() with open("test_output_custom.json", "r") as f: output2 = json.load(f) self.assertEqual(len(output1), len(output2))
def test_create_and_run_rest_job(self): with open("./test_pipeline_build_rest.json") as f: pipeline_structure = json.load(f) pipeline_name = "test rest scoring pipeline" pipeline_obj = pipeline.Pipeline(pipeline_name, self.connection, self.meta_data) pipeline_obj.load_steps_into_db(pipeline_structure) jobs_obj = pipeline.Jobs("Test rest scoring job", self.connection, self.meta_data) jobs_obj.create_jobs_to_run("test rest scoring pipeline") jobs_obj.run_job()
def test_create_and_run_custom_job(self): with open("./test_pipeline_build_custom.json") as f: pipeline_structure = json.load(f) pipeline_name = "test custom pipeline" sys.path.insert( 0, self.config["local_pipeline_import_path"][pipeline_name]) pipeline_obj = pipeline.Pipeline(pipeline_name, self.connection, self.meta_data) pipeline_obj.load_steps_into_db(pipeline_structure) jobs_obj = pipeline.Jobs("Test custom job", self.connection, self.meta_data) jobs_obj.create_jobs_to_run("test custom pipeline") jobs_obj.run_job()
def test_create_and_run_jobs(self): with open("./test_pipeline_build.json") as f: pipeline_structure = json.load(f) pipeline_obj = pipeline.Pipeline("test pipeline", self.connection, self.meta_data) pipeline_obj.load_steps_into_db(pipeline_structure) jobs_obj = pipeline.Jobs("Test job", self.connection, self.meta_data) jobs_obj.create_jobs_to_run("test pipeline") jobs_obj.run_job() with open("./test_output.json") as f: pipeline_results = json.load(f) self.assertEquals(2, len(pipeline_results))
def test_open_scoring_pipeline(self): with open("./test_pipeline_build_openscoring.json") as f: pipeline_structure = json.load(f) pipeline_name = "test open_scoring" pipeline_obj = pipeline.Pipeline(pipeline_name, self.connection, self.meta_data) pipeline_obj.load_steps_into_db(pipeline_structure) jobs_obj = pipeline.Jobs("Test open_scoring job", self.connection, self.meta_data) jobs_obj.create_jobs_to_run("test open_scoring") jobs_obj.run_job() #TODO: Write test self.assertEqual(True, False)
def test_and_run_pipeline_with_load_from_db(self): sqlite_file_name = os.path.join(os.path.curdir, "files", "test.db3") if os.path.exists(sqlite_file_name): os.remove(sqlite_file_name) import sqlalchemy as sa engine = sa.create_engine("sqlite:///./files/test.db3") connection = engine.connect() query_string = """ CREATE TABLE test_summary_file (eid INTEGER, person_id VARCHAR(255), month_of_birth VARCHAR(255), year_of_birth VARCHAR(255), admit_date DATEIME,discharge_date DATETIME,drg VARCHAR(255), "group" VARCHAR(255)) ; INSERT INTO test_summary_file (eid,person_id,month_of_birth,year_of_birth,admit_date,discharge_date,drg, "group") VALUES (1000,'100x','09','1990','2014-01-01','2014-01-02','701','1') ; INSERT INTO test_summary_file (eid,person_id,month_of_birth,year_of_birth,admit_date,discharge_date,drg,"group") VALUES (2000, 'x200','10','1980', '2015-02-01', '2015-01-31','', 2) ; CREATE TABLE test_summary_dx_list ( eid INTEGER, seq_id INTEGER, poa VARCHAR(255), code VARCHAR(255) ); INSERT INTO test_summary_dx_list (eid,seq_id,poa,code) VALUES (1000,1,'1','N10'); INSERT INTO test_summary_dx_list (eid,seq_id,poa,code) VALUES (1000,2,'1','E119'); INSERT INTO test_summary_dx_list (eid,seq_id,poa,code) VALUES (1000,3,'0','K219'); """ for statement in query_string.split(";"): connection.execute(statement) connection.close() with open("./test_pipeline_build_from_db.json") as f: pipeline_structure = json.load(f) pipeline_name = "test loading from db" sys.path.insert( 0, self.config["local_pipeline_import_path"][pipeline_name]) pipeline_obj = pipeline.Pipeline(pipeline_name, self.connection, self.meta_data) pipeline_obj.load_steps_into_db(pipeline_structure) jobs_obj = pipeline.Jobs("Test custom job", self.connection, self.meta_data, external_data_connections_dict=self. config["external_data_connections"]) jobs_obj.create_jobs_to_run("test loading from db") jobs_obj.run_job() with open("test_output_custom.json", "r") as f: output = json.load(f) self.assertEqual(2, len(output))
def test_archive_data_transformations(self): with open("./test_pipeline_build.json") as f: pipeline_structure = json.load(f) pipeline_obj_1 = pipeline.Pipeline("test pipeline", self.connection, self.meta_data) pipeline_obj_1.load_steps_into_db(pipeline_structure) jobs_obj_1 = pipeline.Jobs("Test job", self.connection, self.meta_data) jobs_obj_1.create_jobs_to_run("test pipeline") jobs_obj_1.run_job() num_dts_1 = len( list( self.connection.execute( "select * from %s.data_transformations dts" % (self.meta_data.schema, )))) ap_obj = pipeline.ArchivePipeline("test pipeline", self.connection, self.meta_data) ap_obj.archive_steps() num_dts_2 = len( list( self.connection.execute( "select * from %s.data_transformations dts" % (self.meta_data.schema, )))) self.assertTrue(num_dts_1 > 0) self.assertEqual(0, num_dts_2) num_adts_2 = len( list( self.connection.execute( "select * from %s.archived_data_transformations dts" % (self.meta_data.schema, )))) self.assertEquals(num_dts_1, num_adts_2) jobs_obj_2 = pipeline.Jobs("Test job 2", self.connection, self.meta_data) jobs_obj_2.create_jobs_to_run("test pipeline") jobs_obj_2.run_job() ap_obj2 = pipeline.ArchivePipeline("test pipeline", self.connection, self.meta_data) ap_obj2.archive_steps(steps=[8]) num_dts_3 = len( list( self.connection.execute( "select * from %s.data_transformations dts" % (self.meta_data.schema, )))) self.assertEquals(0, num_dts_3) num_adts_3 = len( list( self.connection.execute( "select * from %s.archived_data_transformations dts" % (self.meta_data.schema, )))) self.assertEquals(15, num_adts_3)