def setUp(self): logger.init() self.run_parameters = { "spreadsheet_name_full_path": "../../data/spreadsheets/TEST_1_gene_expression_positive_real_number.tsv", "gg_network_name_full_path": "../../data/networks/TEST_1_gene_gene.edge", "results_directory": "./", "redis_credential": { "host": "knowredis.knoweng.org", "port": 6379, "password": "******" }, "source_hint": "", "taxonid": '9606', "pipeline_type": "samples_clustering_pipeline" } self.run_parameters_empty_phenotype = { "spreadsheet_name_full_path": "../../data/spreadsheets/TEST_1_gene_expression_positive_real_number.tsv", "gg_network_name_full_path": "../../data/networks/TEST_1_gene_gene.edge", "phenotype_name_full_path": "", "results_directory": "./", "redis_credential": { "host": "knowredis.knoweng.org", "port": 6379, "password": "******" }, "source_hint": "", "taxonid": '9606', "pipeline_type": "samples_clustering_pipeline" } self.file_ETL = "TEST_1_gene_expression_positive_real_number_ETL.tsv" self.file_MAP = "TEST_1_gene_expression_positive_real_number_MAP.tsv" self.file_UNMAPPED = "TEST_1_gene_expression_positive_real_number_User_To_Ensembl.tsv"
def data_cleanup(): try: logger.init() run_directory, run_file = get_run_directory_and_file(sys.argv) run_parameters = get_run_parameters(run_directory, run_file) run_pipelines(run_parameters, SELECT[run_parameters['pipeline_type']]) except Exception as err: logger.logging.append("ERROR: {}".format(str(err))) raise RuntimeError(str(err))
def checker(): try: logger.init() run_directory, run_file = get_run_directory_and_file(sys.argv) run_parameters = get_run_parameters(run_directory, run_file) obj = Checker(run_parameters) obj.condition_check() except Exception as err: logger.logging.append("ERROR: {}".format(str(err))) raise RuntimeError(str(err))
def network_prepper(): try: logger.init() run_directory, run_file = get_run_directory_and_file(sys.argv) run_parameters = get_run_parameters(run_directory, run_file) run_pipelines(run_parameters, "network_prepper_pipeline") except Exception as err: logger.logging.append("ERROR: {}".format(str(err))) # try to write the log logger.generate_logging( False, logger.logging, run_parameters["results_directory"] + "/log_" + "network_prepper_pipeline" + ".yml") raise RuntimeError(str(err))
def setUp(self): logger.init() self.run_dir = "./run_file" self.user_spreadsheet = "user_spreadsheet.tsv" self.spreadsheet_path = self.run_dir + "/" + self.user_spreadsheet self.f_context = "\ta\tb\tc\n" + \ "ENSG00000000003\t1\t0\t1\n" + \ "ENSG00001000205\t0\t0\t1\n" + \ "ENSG00000700034\t1\t1\t1\n" self.golden_output = pd.DataFrame( [[1, 0, 1], [0, 0, 1], [1, 1, 1]], index=['ENSG00000000003', "ENSG00001000205", 'ENSG00000700034'], columns=['a', 'b', 'c'])
def setUp(self): logger.init() self.input_df = pd.DataFrame([[1, 1, None], [2, 0, 0], [4, 1, 1]], index=['aa', "bb", 'cc'], columns=['a', 'b', 'c']) self.golden_output_remove = pd.DataFrame([[2, 0, 0], [4, 1, 1]], index=["bb", 'cc'], columns=['a', 'b', 'c']) self.golden_output_average = pd.DataFrame( [[1, 1, 0.5], [2, 0, 0], [4, 1, 1]], index=['aa', "bb", 'cc'], columns=['a', 'b', 'c'])
def setUp(self): logger.init() self.input_df = pd.DataFrame( [[1, 2], [0, 10], [1, 9]], index=['ENSG00001027003', "ENSG00001027003", 'ENSG00008000303'], columns=['a', 'b']) self.input_df_nan = pd.DataFrame( [[1, 0], [0, 10], [1, 1]], index=['ENSG00001027003', "ENSG00001027003", 'ENSG00008000303'], columns=['a', None]) self.input_df_fail = pd.DataFrame( [[1, 0], [0, 10], [1, 1]], index=['ENSG00001027003', "ENSG00001027003", 'ENSG00008000303'], columns=[None, None])
def setUp(self): logger.init() self.input_df = pd.DataFrame( [[1, 2], [0, 10], [1, 9]], index=['ENSG00001027003', "ENSG00001027003", 'ENSG00008000303'], columns=['a', 'b']) self.input_df_nan = pd.DataFrame( [[1, 0], [0, None], [1, 1]], index=['ENSG00001027003', "ENSG00001027003", 'ENSG00008000303'], columns=['a', 'b']) self.input_df_text = pd.DataFrame( [["text", 0], [0, "text"], [1, 1]], index=['ENSG00001027003', "ENSG00001027003", 'ENSG00008000303'], columns=['a', 'b']) self.input_df_negative = pd.DataFrame( [[-1, 0], [0, -2], [1, 1]], index=['ENSG00001027003', "ENSG00001027003", 'ENSG00008000303'], columns=['a', 'b'])
def setUp(self): logger.init() self.run_parameters = { "spreadsheet_name_full_path": "../../data/spreadsheets/TEST_1_gene_expression_binary.tsv", "results_directory": "./", "source_hint": "", "taxonid": '9606', "impute": "average", "redis_credential": { "host": "knowredis.knoweng.org", "port": 6379, "password": "******" }, "pipeline_type": "geneset_characterization_pipeline" } self.file_ETL = "TEST_1_gene_expression_binary_ETL.tsv" self.file_MAP = "TEST_1_gene_expression_binary_MAP.tsv" self.file_UNMAPPED = "TEST_1_gene_expression_binary_User_To_Ensembl.tsv"
def setUp(self): logger.init() self.input_df_good = pd.DataFrame([[1, 0], [0, 0], [1, 1]], index=['ENSG00000000003', "ENSG00000000457", 'ENSG00000000005'], columns=['a', 'b']) self.input_df_bad = pd.DataFrame([[1, 0], [0, 0], [1, 1]], index=['ENSG00000000003', "ENSG00000000457", 'S00000005'], columns=['a', 'b']) self.input_df_cannot_map = pd.DataFrame([[1, 0], [0, 0], [1, 1]], index=['000000003', "000457", 'S00000005'], columns=['a', 'b']) self.run_parameters = { "spreadsheet_name_full_path": "../data/spreadsheets/example.tsv", "results_directory": "./", "redis_credential": { "host": "knowredis.knoweng.org", "port": 6379, "password": "******" }, "source_hint": "", "taxonid": '9606' } self.output_mapping = "./example_MAP.tsv" self.golden_output_good = pd.DataFrame([[1, 0], [0, 0], [1, 1]], index=['ENSG00000000003', "ENSG00000000457", 'ENSG00000000005'], columns=['a', 'b'])
def setUp(self): logger.init() self.input_df_dup = pd.DataFrame( [[1, 0], [0, 0], [1, 1]], index=['ENSG00000000003', "ENSG00001027003", 'ENSG00000000003'], columns=['a', 'b']) self.input_df_nodup = pd.DataFrame( [[1, 0], [0, 0], [1, 1]], index=['ENSG00001027003', "ENSG00000002008", 'ENSG00008000303'], columns=['a', 'b']) self.golden_output_dedup = pd.DataFrame( [[1, 0], [0, 0]], index=["ENSG00000000003", 'ENSG00001027003'], columns=['a', 'b']) self.golden_output_nodup = pd.DataFrame( [[1, 0], [0, 0], [1, 1]], index=['ENSG00001027003', "ENSG00000002008", 'ENSG00008000303'], columns=['a', 'b'])
def setUp(self): logger.init() self.input_phenotype_df = pd.DataFrame( [[1, 0], [0, 0], [1, 1], [0, 1], [0, 0]], index=['a', "b", 'c', 'd', 'e'], columns=['a', 'b']) self.input_phenotype_df_bad_value = pd.DataFrame( [[1, 0], [3, 0], [1, 1], [0, 1], [0, 0]], index=['a', "b", 'c', 'd', 'e'], columns=['a', 'b']) self.input_phenotype_df_nan = pd.DataFrame( [[1, 0], [0, None], [0, 1], [1, 0], [0, 1], [1, 1]], index=['a', "b", 'c', 'd', 'e', 'f'], columns=['a', 'b']) self.input_phenotype_df_negative = pd.DataFrame([[1.1], [-2.2], [3.3]], index=['a', 'b', 'f'], columns=['drug1']) self.input_phenotype_df_pearson = pd.DataFrame( [[1.1, 0.1], [-2.2, 1.2], [3.3, 2.3]], index=['d', 'e', 'f'], columns=['drug1', 'drug2'])
def setUp(self): logger.init() self.run_parameters = { "spreadsheet_name_full_path": "../../data/spreadsheets/TEST_1_gene_expression_real_number.tsv", "phenotype_name_full_path": "../../data/spreadsheets/TEST_1_phenotype_pearson.tsv", "results_directory": "./", "source_hint": "", "taxonid": '9606', "impute": "average", "pipeline_type": "samples_clustering_pipeline", "correlation_measure": 'pearson', "redis_credential": { "host": "knowredis.knoweng.org", "port": 6379, "password": "******" } } self.file_ETL = "TEST_1_gene_expression_real_number_ETL.tsv" self.file_MAP = "TEST_1_gene_expression_real_number_MAP.tsv" self.file_UNMAPPED = "TEST_1_gene_expression_real_number_User_To_Ensembl.tsv" self.phenotype_ETL = "TEST_1_phenotype_pearson_ETL.tsv"
def setUp(self): logger.init() self.input_df_good = pd.DataFrame( [[1, 0], [0, 0], [1, 1]], index=['ENSG00000000003', "ENSG00000000457", 'ENSG00000000005'], columns=['a', 'b']) self.input_phenotype = pd.DataFrame([[1.1, 2.2, 3.3]], index=['drug1'], columns=['a', 'b', 'c']) self.run_parameters = { "spreadsheet_name_full_path": "../data/spreadsheets/example.tsv", "phenotype_name_full_path": ".. /data/spreadsheets/phenotype.tsv", "results_directory": "./", "redis_credential": { "host": "knowredis.knoweng.org", "port": 6379, "password": "******" }, "source_hint": "", "taxonid": '9606', "pipeline_type": "gene_priorization_pipeline", "input_data_type": "" }
columns=['a', 'b', 'c']) self.golden_output_remove = pd.DataFrame([[2, 0, 0], [4, 1, 1]], index=["bb", 'cc'], columns=['a', 'b', 'c']) self.golden_output_average = pd.DataFrame( [[1, 1, 0.5], [2, 0, 0], [4, 1, 1]], index=['aa', "bb", 'cc'], columns=['a', 'b', 'c']) def tearDown(self): del self.input_df def test_impute_na_average(self): ret = SpreadSheet.impute_na(self.input_df, "average") npytest.assert_array_equal(self.golden_output_average, ret) def test_impute_na_remove(self): ret = SpreadSheet.impute_na(self.input_df, "remove") npytest.assert_array_equal(self.golden_output_remove, ret) def test_impute_na_bad_option(self): ret = SpreadSheet.impute_na(self.input_df, "bad") npytest.assert_array_equal(self.input_df, ret) if __name__ == '__main__': logger.init() unittest.main()