def get_nih(db): nih_df = pd.read_csv(from_root("temp\\test_data\\nih_cleaned.csv")) db.insert(nih_df, "tmp_nih", "dbo") nih_metamap_df = annotate(nih_df) db.insert(nih_metamap_df, "tmp_nih_metamap", "dbo") write_df(from_root("temp\\test_data\\nih_metamap.csv"), nih_metamap_df)
def classify_culture(db, to_module, l1ml_module, l1s_module, l2_module): culture_df = db.extract(from_root("temp\\sql\\test_culture.sql")) keys = ["test_key", "result_key"] culture_to_results = to_module.classify(culture_df) culture_l1ml_results = l1ml_module.classify(culture_df) culture_l1s_results = l1s_module.classify(culture_df) culture_l2_results = l2_module.classify(culture_df) culture_results = culture_to_results\ .merge(culture_l1ml_results, how="inner", on=keys)\ .merge(culture_l1s_results, how="inner", on=keys)\ .merge(culture_l2_results, how="inner", on=keys) db.insert(culture_results, "tmp_culture_predictions", "dbo") write_df(from_root("temp\\predictions\\culture.csv"), culture_results)
def classify_random(db, to_module, l1ml_module, l1s_module, l2_module): random_df = db.extract(from_root("temp\\sql\\test_random.sql")) keys = ["test_key", "result_key"] random_to_results = to_module.classify(random_df) random_l1ml_results = l1ml_module.classify(random_df) random_l1s_results = l1s_module.classify(random_df) random_l2_results = l2_module.classify(random_df) random_results = random_to_results\ .merge(random_l1ml_results, how="inner", on=keys)\ .merge(random_l1s_results, how="inner", on=keys)\ .merge(random_l2_results, how="inner", on=keys) db.insert(random_results, "tmp_random_predictions", "dbo") write_df(from_root("temp\\predictions\\random.csv"), random_results)
def classify_nih(db, tp_module, to_module, l1ml_module, l1s_module, l2_module): nih_df = db.extract(from_root("temp\\sql\\test_nih.sql")) keys = ["test_key", "result_key"] nih_tp_results = tp_module.classify(nih_df) nih_to_results = to_module.classify(nih_df) nih_l1ml_results = l1ml_module.classify(nih_df) nih_l1s_results = l1s_module.classify(nih_df) nih_l2_results = l2_module.classify(nih_df) nih_results = nih_tp_results\ .merge(nih_to_results, how="inner", on=keys)\ .merge(nih_l1ml_results, how="inner", on=keys)\ .merge(nih_l1s_results, how="inner", on=keys)\ .merge(nih_l2_results, how="inner", on=keys) db.insert(nih_results, "tmp_nih_predictions", "dbo") write_df(from_root("temp\\predictions\\nih.csv"), nih_results)
def main(): # ========================================================================== # Load the DataFrames to classify db = Database.get_instance() tp_df = db.extract(from_root("sql\\test\\test_performed.sql")) to_df = db.extract(from_root("sql\\test\\test_outcome.sql")) l1_df = db.extract(from_root("sql\\test\\level_1.sql")) l2_df = db.extract(from_root("sql\\test\\level_2.sql")) print("Finished loading the DataFrames.") # ========================================================================== # Load modules tp_module = TestPerformedModule.load_from_file( from_root("pkl\\test_performed_module.pkl")) to_module = TestOutcomeModule.load_from_file( from_root("pkl\\test_outcome_module.pkl")) l1ml_module = Level1MLModule.load_from_file( from_root("pkl\\level_1_ml_module.pkl")) l1s_module = Level1SymbolicModule(to_module).load_from_file( from_root("pkl\\level_1_symbolic_module.pkl")) l2_module = Level2Module(l1ml_module).load_from_file( from_root("pkl\\level_2_module.pkl")) tp_module_org_false = TestPerformedModule.load_from_file( from_root("pkl\\test_performed_organisms_false_module.pkl")) to_module_org_false = TestOutcomeModule.load_from_file( from_root("pkl\\test_outcome_organisms_false_module.pkl")) print("Finished loading modules.") # ========================================================================== # Classify the DataFrames tp_results = tp_module.classify(tp_df) to_results = to_module.classify(to_df) l1ml_results = l1ml_module.classify(l1_df) l1s_results = l1s_module.classify(l1_df) l2_results = l2_module.classify(l2_df) tp_org_false_results = tp_module_org_false.classify(tp_df) to_org_false_results = to_module_org_false.classify(to_df) l1s_retall_results = l1s_module.classify(l1_df, return_all=True) l2_retall_results = l2_module.classify(l2_df, return_all=True) print("Finished classifying the DataFrames.") # ========================================================================== # Write final prediction results to CSV and database results = tp_results\ .merge(to_results, how="outer", on=["test_key", "result_key"])\ .merge(l1ml_results, how="outer", on=["test_key", "result_key"])\ .merge(l1s_results, how="outer", on=["test_key", "result_key"])\ .merge(l2_results, how="outer", on=["test_key", "result_key"]) org_false_results = tp_org_false_results\ .merge(to_org_false_results, how="outer", on=["test_key", "result_key"]) retall_results = l1s_retall_results\ .merge(l2_retall_results, how="outer", on=["test_key", "result_key"]) write_df(from_root("results\\predictions.csv"), results) write_df(from_root("results\\predictions_org_false.csv"), org_false_results) write_df(from_root("results\\predictions_retall.csv"), retall_results) db.insert(results, "predictions", "dbo") print("Finished writing results to CSV and database.")
def get_culture(db): culture_df = db.extract(from_root("temp\\sql\\get_test_culture.sql")) culture_df = culture_df.sample(n=100) db.insert(culture_df, "tmp_culture", "dbo") write_df(from_root("temp\\test_data\\culture.csv"), culture_df)
def get_random(db): random_df = db.extract(from_root("temp\\sql\\get_test_random.sql")) random_df = random_df.sample(n=100) db.insert(random_df, "tmp_random", "dbo") write_df(from_root("temp\\test_data\\random.csv"), random_df)