Example #1
0
    def test_query_columns_with_dots(self):
        df = ff.load(self.tab2_file)

        conditions = ["TVC.counts < 3"]

        self.assertEqual(ff.dffilter(conditions, df).shape, (8, 99))

        df = ff.load(self.tab2_file)  # The filtering of df is always in place

        conditions = ["TVC.counts > 3"]

        self.assertEqual(ff.dffilter(conditions, df).shape, (1, 99))
Example #2
0
    def test_reverse_the_filter_condition_similar_ords_and_or_json(self):
        conds = json.load(open(file_test("slashb.json")))
        df = ff.load(file_test("DOT.column.tab"))

        new_df = ff.dffilter(conds, df)

        self.assertEqual(new_df.shape, (4, 99))
Example #3
0
    def test_num_columns_that_fails_cast_to_str_coerced_into_nan(self):
        df = ff.load(self.tab4_file)

        conditions = ['CG46 != CG46']  # This is a trick to check if the column
        # has a NaN (NaN is not equal to itself)

        self.assertEqual(ff.dffilter(conditions, df).shape, (1, 76))
Example #4
0
    def test_reverse_the_filter_condition_similar_words_and_or(self):
        df = ff.load(file_test("DOT.column.tab"))

        conditions = [
            r'ExonicFunc.refGene not_contains \bsynonymous SNV|deletion'
        ]
        self.assertEqual(ff.dffilter(conditions, df).shape, (7, 99))
Example #5
0
    def test_DF_can_be_filtered_with_OR_string_fields(self):
        df = ff.load(self.tab_file)

        conditions = [
            'PopFreqMax < 0.01 | PopFreqMax > 0.99',
            'Func.refGene contains exonic|intronic'
        ]
        self.assertEqual(ff.dffilter(conditions, df).shape, (53, 151))
Example #6
0
    def test_cleansing_filter_number_starting_column(self):
        tab_file = file_test("DOT.column.tab")
        column = "1000G_ALL"
        df = ff.load(tab_file)

        self.assertTrue("1000G_ALL" in df.columns)

        column, df = ff.clean(column, df)

        self.assertEqual("_1000G_ALL", column)
        self.assertTrue("_1000G_ALL" in df.columns)
Example #7
0
    def test_cleansing_filter_and_df(self):
        tab_file = file_test("DOT.column.tab")
        column = "TVC.counts"
        df = ff.load(tab_file)

        self.assertTrue("TVC.counts" in df.columns)

        column, df = ff.clean(column, df)

        self.assertEqual("TVC_counts", column)
        self.assertTrue("TVC_counts" in df.columns)
Example #8
0
    def test_DF_can_be_filtered_by_one_condition_query(self):
        df = ff.load(self.tab_file)

        self.assertEqual(ff.dffilter(['Ref == "G"'], df).shape, (60, 151))
Example #9
0
    def test_DF_has_loaded_correctly(self):
        df = ff.load(self.tab_file)

        self.assertEqual(df["MetaLR_score"].dtype, np.dtype("float64"))
Example #10
0
    def test_load_with_weird_chars(self):
        df = ff.load(self.tab_file)

        self.assertEqual(type(df), pd.DataFrame)
        # The DataFrame has 151 columns and 249 data rows.
        self.assertEqual(df.shape, (249, 151))
Example #11
0
    def test_can_load_gzipped_tab_as_DF(self):
        df = ff.load(self.tab_file + ".gz")

        self.assertEqual(type(df), pd.DataFrame)
        # The DataFrame has 151 columns and 249 data rows.
        self.assertEqual(df.shape, (249, 151))
Example #12
0
    def test_thousand_genomes_column_filtering(self):
        df = ff.load(file_test("DOT.column.tab"))

        conditions = ['1000G_ALL > 0.2']
        self.assertEqual(ff.dffilter(conditions, df).shape, (1, 99))
Example #13
0
    def test_reverse_the_filter_condition_multiple(self):
        df = ff.load(file_test("DOT.column.tab"))

        conditions = ['ExonicFunc.refGene not_contains frameshift deletion']
        self.assertEqual(ff.dffilter(conditions, df).shape, (8, 99))
Example #14
0
    def test_reverse_the_filter_condition_single(self):
        df = ff.load(file_test("DOT.column.tab"))

        conditions = ['Func.refGene not_contains intronic']
        self.assertEqual(ff.dffilter(conditions, df).shape, (4, 99))
Example #15
0
    def test_num_columns_with_commas(self):
        tab_file = join(dirname(__file__), "test_files", "floats_comma.tab")
        df = ff.load(tab_file)

        self.assertEqual(ff.dffilter(['ExAC_ALL <= 0.1'], df).shape, (5, 2))
Example #16
0
    def test_DF_can_be_filtered_by_two_conditions(self):
        df = ff.load(self.tab_file)

        conditions = ['Ref == "A"', 'Func.refGene contains exonic']

        self.assertEqual(ff.dffilter(conditions, df).shape, (13, 151))
Example #17
0
    def test_multiple_weirdness_can_function(self):
        df = ff.load(self.tab2_file)

        conditions = ["TVC.counts < 3", "TVC.counts > 3"]

        self.assertEqual(ff.dffilter(conditions, df).shape, (0, 99))
Example #18
0
    def test_DF_can_be_filtered_by_numeric_AND_string_conditions(self):
        df = ff.load(self.tab_file)

        conditions = ['PopFreqMax < 0.01', 'Func.refGene contains exonic']
        self.assertEqual(ff.dffilter(conditions, df).shape, (2, 151))
Example #19
0
    def test_news_filter_2017_03(self):
        df = ff.load(file_test("DOT.column.tab"))

        conditions = ['gnomAD_exome_ALL > 0.99']
        self.assertEqual(ff.dffilter(conditions, df).shape, (2, 99))
Example #20
0
    def test_non_existent_columns_doesnt_break_code(self):
        df = ff.load(self.tab2_file)

        conditions = ["Imaginary < 3"]

        self.assertEqual(ff.dffilter(conditions, df).shape, (9, 99))
Example #21
0
    def test_NaN_in_str_fields_dont_break_the_filter(self):
        df = ff.load(self.tab3_file)

        conditions = ['Func.refGene contains exonic|intronic']

        self.assertEqual(ff.dffilter(conditions, df).shape, (1, 76))