def setUp(self):
        logger.init()

        self.run_parameters = {
            "spreadsheet_name_full_path": "../../data/spreadsheets/TEST_1_gene_expression_positive_real_number.tsv",
            "gg_network_name_full_path":  "../../data/networks/TEST_1_gene_gene.edge",
            "results_directory": "./",
            "redis_credential": {
                "host": "knowredis.knoweng.org",
                "port": 6379,
                "password": "******"
            },
            "source_hint": "",
            "taxonid": '9606',
            "pipeline_type": "samples_clustering_pipeline"
        }

        self.run_parameters_empty_phenotype = {
            "spreadsheet_name_full_path": "../../data/spreadsheets/TEST_1_gene_expression_positive_real_number.tsv",
            "gg_network_name_full_path": "../../data/networks/TEST_1_gene_gene.edge",
            "phenotype_name_full_path": "",
            "results_directory": "./",
            "redis_credential": {
                "host": "knowredis.knoweng.org",
                "port": 6379,
                "password": "******"
            },
            "source_hint": "",
            "taxonid": '9606',
            "pipeline_type": "samples_clustering_pipeline"
        }

        self.file_ETL = "TEST_1_gene_expression_positive_real_number_ETL.tsv"
        self.file_MAP = "TEST_1_gene_expression_positive_real_number_MAP.tsv"
        self.file_UNMAPPED = "TEST_1_gene_expression_positive_real_number_User_To_Ensembl.tsv"
Beispiel #2
0
def data_cleanup():
    try:
        logger.init()
        run_directory, run_file = get_run_directory_and_file(sys.argv)
        run_parameters = get_run_parameters(run_directory, run_file)
        run_pipelines(run_parameters, SELECT[run_parameters['pipeline_type']])
    except Exception as err:
        logger.logging.append("ERROR: {}".format(str(err)))
        raise RuntimeError(str(err))
def checker():
    try:
        logger.init()
        run_directory, run_file = get_run_directory_and_file(sys.argv)
        run_parameters = get_run_parameters(run_directory, run_file)
        obj = Checker(run_parameters)
        obj.condition_check()

    except Exception as err:
        logger.logging.append("ERROR: {}".format(str(err)))
        raise RuntimeError(str(err))
def network_prepper():
    try:
        logger.init()
        run_directory, run_file = get_run_directory_and_file(sys.argv)
        run_parameters = get_run_parameters(run_directory, run_file)
        run_pipelines(run_parameters, "network_prepper_pipeline")
    except Exception as err:
        logger.logging.append("ERROR: {}".format(str(err)))
        # try to write the log
        logger.generate_logging(
            False, logger.logging, run_parameters["results_directory"] +
            "/log_" + "network_prepper_pipeline" + ".yml")
        raise RuntimeError(str(err))
Beispiel #5
0
    def setUp(self):
        logger.init()

        self.run_dir = "./run_file"
        self.user_spreadsheet = "user_spreadsheet.tsv"
        self.spreadsheet_path = self.run_dir + "/" + self.user_spreadsheet
        self.f_context = "\ta\tb\tc\n" + \
                         "ENSG00000000003\t1\t0\t1\n" + \
                         "ENSG00001000205\t0\t0\t1\n" + \
                         "ENSG00000700034\t1\t1\t1\n"
        self.golden_output = pd.DataFrame(
            [[1, 0, 1], [0, 0, 1], [1, 1, 1]],
            index=['ENSG00000000003', "ENSG00001000205", 'ENSG00000700034'],
            columns=['a', 'b', 'c'])
    def setUp(self):
        logger.init()

        self.input_df = pd.DataFrame([[1, 1, None], [2, 0, 0], [4, 1, 1]],
                                     index=['aa', "bb", 'cc'],
                                     columns=['a', 'b', 'c'])

        self.golden_output_remove = pd.DataFrame([[2, 0, 0], [4, 1, 1]],
                                                 index=["bb", 'cc'],
                                                 columns=['a', 'b', 'c'])

        self.golden_output_average = pd.DataFrame(
            [[1, 1, 0.5], [2, 0, 0], [4, 1, 1]],
            index=['aa', "bb", 'cc'],
            columns=['a', 'b', 'c'])
Beispiel #7
0
    def setUp(self):
        logger.init()

        self.input_df = pd.DataFrame(
            [[1, 2], [0, 10], [1, 9]],
            index=['ENSG00001027003', "ENSG00001027003", 'ENSG00008000303'],
            columns=['a', 'b'])

        self.input_df_nan = pd.DataFrame(
            [[1, 0], [0, 10], [1, 1]],
            index=['ENSG00001027003', "ENSG00001027003", 'ENSG00008000303'],
            columns=['a', None])

        self.input_df_fail = pd.DataFrame(
            [[1, 0], [0, 10], [1, 1]],
            index=['ENSG00001027003', "ENSG00001027003", 'ENSG00008000303'],
            columns=[None, None])
    def setUp(self):
        logger.init()

        self.input_df = pd.DataFrame(
            [[1, 2], [0, 10], [1, 9]],
            index=['ENSG00001027003', "ENSG00001027003", 'ENSG00008000303'],
            columns=['a', 'b'])
        self.input_df_nan = pd.DataFrame(
            [[1, 0], [0, None], [1, 1]],
            index=['ENSG00001027003', "ENSG00001027003", 'ENSG00008000303'],
            columns=['a', 'b'])
        self.input_df_text = pd.DataFrame(
            [["text", 0], [0, "text"], [1, 1]],
            index=['ENSG00001027003', "ENSG00001027003", 'ENSG00008000303'],
            columns=['a', 'b'])
        self.input_df_negative = pd.DataFrame(
            [[-1, 0], [0, -2], [1, 1]],
            index=['ENSG00001027003', "ENSG00001027003", 'ENSG00008000303'],
            columns=['a', 'b'])
    def setUp(self):
        logger.init()

        self.run_parameters = {
            "spreadsheet_name_full_path":
            "../../data/spreadsheets/TEST_1_gene_expression_binary.tsv",
            "results_directory": "./",
            "source_hint": "",
            "taxonid": '9606',
            "impute": "average",
            "redis_credential": {
                "host": "knowredis.knoweng.org",
                "port": 6379,
                "password": "******"
            },
            "pipeline_type": "geneset_characterization_pipeline"
        }

        self.file_ETL = "TEST_1_gene_expression_binary_ETL.tsv"
        self.file_MAP = "TEST_1_gene_expression_binary_MAP.tsv"
        self.file_UNMAPPED = "TEST_1_gene_expression_binary_User_To_Ensembl.tsv"
    def setUp(self):
        logger.init()

        self.input_df_good = pd.DataFrame([[1, 0],
                                           [0, 0],
                                           [1, 1]],
                                          index=['ENSG00000000003', "ENSG00000000457", 'ENSG00000000005'],
                                          columns=['a', 'b'])

        self.input_df_bad = pd.DataFrame([[1, 0],
                                          [0, 0],
                                          [1, 1]],
                                         index=['ENSG00000000003', "ENSG00000000457", 'S00000005'],
                                         columns=['a', 'b'])

        self.input_df_cannot_map = pd.DataFrame([[1, 0],
                                                   [0, 0],
                                                   [1, 1]],
                                                  index=['000000003', "000457", 'S00000005'],
                                                  columns=['a', 'b'])
        self.run_parameters = {
            "spreadsheet_name_full_path": "../data/spreadsheets/example.tsv",
            "results_directory": "./",
            "redis_credential": {
                "host": "knowredis.knoweng.org",
                "port": 6379,
                "password": "******"
            },
            "source_hint": "",
            "taxonid": '9606'
        }

        self.output_mapping = "./example_MAP.tsv"

        self.golden_output_good = pd.DataFrame([[1, 0],
                                                [0, 0],
                                                [1, 1]],
                                               index=['ENSG00000000003', "ENSG00000000457", 'ENSG00000000005'],
                                               columns=['a', 'b'])
    def setUp(self):
        logger.init()

        self.input_df_dup = pd.DataFrame(
            [[1, 0], [0, 0], [1, 1]],
            index=['ENSG00000000003', "ENSG00001027003", 'ENSG00000000003'],
            columns=['a', 'b'])

        self.input_df_nodup = pd.DataFrame(
            [[1, 0], [0, 0], [1, 1]],
            index=['ENSG00001027003', "ENSG00000002008", 'ENSG00008000303'],
            columns=['a', 'b'])

        self.golden_output_dedup = pd.DataFrame(
            [[1, 0], [0, 0]],
            index=["ENSG00000000003", 'ENSG00001027003'],
            columns=['a', 'b'])

        self.golden_output_nodup = pd.DataFrame(
            [[1, 0], [0, 0], [1, 1]],
            index=['ENSG00001027003', "ENSG00000002008", 'ENSG00008000303'],
            columns=['a', 'b'])
    def setUp(self):
        logger.init()

        self.input_phenotype_df = pd.DataFrame(
            [[1, 0], [0, 0], [1, 1], [0, 1], [0, 0]],
            index=['a', "b", 'c', 'd', 'e'],
            columns=['a', 'b'])
        self.input_phenotype_df_bad_value = pd.DataFrame(
            [[1, 0], [3, 0], [1, 1], [0, 1], [0, 0]],
            index=['a', "b", 'c', 'd', 'e'],
            columns=['a', 'b'])
        self.input_phenotype_df_nan = pd.DataFrame(
            [[1, 0], [0, None], [0, 1], [1, 0], [0, 1], [1, 1]],
            index=['a', "b", 'c', 'd', 'e', 'f'],
            columns=['a', 'b'])
        self.input_phenotype_df_negative = pd.DataFrame([[1.1], [-2.2], [3.3]],
                                                        index=['a', 'b', 'f'],
                                                        columns=['drug1'])

        self.input_phenotype_df_pearson = pd.DataFrame(
            [[1.1, 0.1], [-2.2, 1.2], [3.3, 2.3]],
            index=['d', 'e', 'f'],
            columns=['drug1', 'drug2'])
Beispiel #13
0
 def setUp(self):
     logger.init()
     self.run_parameters = {
         "spreadsheet_name_full_path":
         "../../data/spreadsheets/TEST_1_gene_expression_real_number.tsv",
         "phenotype_name_full_path":
         "../../data/spreadsheets/TEST_1_phenotype_pearson.tsv",
         "results_directory": "./",
         "source_hint": "",
         "taxonid": '9606',
         "impute": "average",
         "pipeline_type": "samples_clustering_pipeline",
         "correlation_measure": 'pearson',
         "redis_credential": {
             "host": "knowredis.knoweng.org",
             "port": 6379,
             "password": "******"
         }
     }
     self.file_ETL = "TEST_1_gene_expression_real_number_ETL.tsv"
     self.file_MAP = "TEST_1_gene_expression_real_number_MAP.tsv"
     self.file_UNMAPPED = "TEST_1_gene_expression_real_number_User_To_Ensembl.tsv"
     self.phenotype_ETL = "TEST_1_phenotype_pearson_ETL.tsv"
Beispiel #14
0
    def setUp(self):
        logger.init()
        self.input_df_good = pd.DataFrame(
            [[1, 0], [0, 0], [1, 1]],
            index=['ENSG00000000003', "ENSG00000000457", 'ENSG00000000005'],
            columns=['a', 'b'])
        self.input_phenotype = pd.DataFrame([[1.1, 2.2, 3.3]],
                                            index=['drug1'],
                                            columns=['a', 'b', 'c'])

        self.run_parameters = {
            "spreadsheet_name_full_path": "../data/spreadsheets/example.tsv",
            "phenotype_name_full_path": ".. /data/spreadsheets/phenotype.tsv",
            "results_directory": "./",
            "redis_credential": {
                "host": "knowredis.knoweng.org",
                "port": 6379,
                "password": "******"
            },
            "source_hint": "",
            "taxonid": '9606',
            "pipeline_type": "gene_priorization_pipeline",
            "input_data_type": ""
        }
                                     columns=['a', 'b', 'c'])

        self.golden_output_remove = pd.DataFrame([[2, 0, 0], [4, 1, 1]],
                                                 index=["bb", 'cc'],
                                                 columns=['a', 'b', 'c'])

        self.golden_output_average = pd.DataFrame(
            [[1, 1, 0.5], [2, 0, 0], [4, 1, 1]],
            index=['aa', "bb", 'cc'],
            columns=['a', 'b', 'c'])

    def tearDown(self):
        del self.input_df

    def test_impute_na_average(self):
        ret = SpreadSheet.impute_na(self.input_df, "average")
        npytest.assert_array_equal(self.golden_output_average, ret)

    def test_impute_na_remove(self):
        ret = SpreadSheet.impute_na(self.input_df, "remove")
        npytest.assert_array_equal(self.golden_output_remove, ret)

    def test_impute_na_bad_option(self):
        ret = SpreadSheet.impute_na(self.input_df, "bad")
        npytest.assert_array_equal(self.input_df, ret)


if __name__ == '__main__':
    logger.init()
    unittest.main()