예제 #1
0
    def test_slice_and_exclude_rids(self):
        out_gct = sg.slice_gctoo(self.in_gct, rid=["a", "c", "d"], exclude_rid=["d"])

        # Outputs should be dataframes even if there is only 1 index or column
        pd.util.testing.assert_frame_equal(out_gct.data_df, self.in_gct.data_df.iloc[[0, 2], :])
        pd.util.testing.assert_frame_equal(out_gct.row_metadata_df, self.in_gct.row_metadata_df.iloc[[0, 2], :])
        pd.util.testing.assert_frame_equal(out_gct.col_metadata_df, self.in_gct.col_metadata_df)
예제 #2
0
    def test_slice_bools(self):
        out_gct = sg.slice_gctoo(self.in_gct, row_bool=[True, False, True, False], col_bool=[False, False, True])

        # Outputs should be dataframes even if there is only 1 index or column
        pd.util.testing.assert_frame_equal(out_gct.data_df, pd.DataFrame(self.in_gct.data_df.iloc[[0, 2], 2]))
        pd.util.testing.assert_frame_equal(out_gct.row_metadata_df, self.in_gct.row_metadata_df.iloc[[0, 2], :])
        pd.util.testing.assert_frame_equal(out_gct.col_metadata_df, pd.DataFrame(self.in_gct.col_metadata_df.iloc[2, :]).T)
예제 #3
0
def main():
    # Get args
    args = build_parser().parse_args(sys.argv[1:])
    setup_logger.setup(verbose=args.verbose)

    # Read the input gct
    in_gct = parse.parse(args.in_gct_path)

    # Read in each of the command line arguments
    rid = _read_arg(args.rid)
    cid = _read_arg(args.cid)
    exclude_rid = _read_arg(args.exclude_rid)
    exclude_cid = _read_arg(args.exclude_cid)

    # Slice the gct
    out_gct = sg.slice_gctoo(in_gct,
                             rid=rid,
                             cid=cid,
                             exclude_rid=exclude_rid,
                             exclude_cid=exclude_cid)
    assert out_gct.data_df.size > 0, "Slicing yielded an empty gct!"

    # Write the output gct
    if args.use_gctx:
        wgx.write(out_gct, args.out_name)
    else:
        wg.write(out_gct,
                 args.out_name,
                 data_null="NaN",
                 metadata_null="NA",
                 filler_null="NA")
예제 #4
0
 def test_slice_cid_and_col_bool(self):
     # cid and col_bool should not both be provided
     with self.assertRaises(AssertionError) as e:
         sg.slice_gctoo(self.in_gct, cid=["e", "f", "g"], col_bool=[True, True, False])
     self.assertIn("cid and col_bool", str(e.exception))
예제 #5
0
    def test_gctx_parsing(self):
        # parse in gctx, no other arguments
        mg1 = mini_gctoo_for_testing.make()
        mg2 = parse.parse("functional_tests/mini_gctoo_for_testing.gctx")

        pandas_testing.assert_frame_equal(mg1.data_df, mg2.data_df)
        pandas_testing.assert_frame_equal(mg1.row_metadata_df,
                                          mg2.row_metadata_df)
        pandas_testing.assert_frame_equal(mg1.col_metadata_df,
                                          mg2.col_metadata_df)

        # check convert_neg_666 worked correctly
        self.assertTrue(mg2.col_metadata_df["mfc_plate_id"].isnull().all())

        # parse w/o convert_neg_666
        mg2_alt = parse.parse("functional_tests/mini_gctoo_for_testing.gctx",
                              convert_neg_666=False)
        self.assertFalse(
            mg2_alt.col_metadata_df["mfc_plate_id"].isnull().all())

        # parsing w/rids & cids specified
        test_rids = [
            'LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33',
            'LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'
        ]
        test_cids = ['LJP007_MCF7_24H:TRT_POSCON:BRD-A61304759:10']
        mg3 = slice_gctoo.slice_gctoo(mg1, rid=test_rids, cid=test_cids)
        mg4 = parse.parse("functional_tests/mini_gctoo_for_testing.gctx",
                          rid=test_rids,
                          cid=test_cids)
        pandas_testing.assert_frame_equal(mg3.data_df, mg4.data_df)
        pandas_testing.assert_frame_equal(mg3.row_metadata_df,
                                          mg4.row_metadata_df)
        pandas_testing.assert_frame_equal(mg3.col_metadata_df,
                                          mg4.col_metadata_df)

        # parsing w/ridx & cidx specified
        mg5 = slice_gctoo.slice_gctoo(
            mg1,
            rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'],
            cid='LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666')
        mg6 = parse.parse("functional_tests/mini_gctoo_for_testing.gctx",
                          ridx=[4],
                          cidx=[4])

        pandas_testing.assert_frame_equal(mg5.data_df, mg6.data_df)
        pandas_testing.assert_frame_equal(mg5.row_metadata_df,
                                          mg6.row_metadata_df)
        pandas_testing.assert_frame_equal(mg5.col_metadata_df,
                                          mg6.col_metadata_df)

        # parsing row metadata only
        mg7 = parse.parse("functional_tests/mini_gctoo_for_testing.gctx",
                          row_meta_only=True)
        pandas_testing.assert_frame_equal(mg7, mg1.row_metadata_df)

        # parsing col metadata only
        mg8 = parse.parse("functional_tests/mini_gctoo_for_testing.gctx",
                          col_meta_only=True)
        pandas_testing.assert_frame_equal(mg8, mg1.col_metadata_df)

        # parsing w/multiindex
        mg9 = parse.parse("functional_tests/mini_gctoo_for_testing.gctx",
                          make_multiindex=True)
        self.assertTrue(mg9.multi_index_df is not None)
예제 #6
0
    def test_parse(self):
        # parse whole thing
        mg1 = mini_gctoo_for_testing.make()
        mg2 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx")

        pandas_testing.assert_frame_equal(mg1.data_df, mg2.data_df)
        pandas_testing.assert_frame_equal(mg1.row_metadata_df,
                                          mg2.row_metadata_df)
        pandas_testing.assert_frame_equal(mg1.col_metadata_df,
                                          mg2.col_metadata_df)

        # test with string rid/cid
        test_rids = [
            'LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33',
            'LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'
        ]
        test_cids = ['LJP007_MCF7_24H:TRT_POSCON:BRD-A61304759:10']
        mg3 = slice_gctoo.slice_gctoo(mg1, rid=test_rids, cid=test_cids)
        mg4 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx",
                               rid=test_rids,
                               cid=test_cids)
        pandas_testing.assert_frame_equal(mg3.data_df, mg4.data_df)
        pandas_testing.assert_frame_equal(mg3.row_metadata_df,
                                          mg4.row_metadata_df)
        pandas_testing.assert_frame_equal(mg3.col_metadata_df,
                                          mg4.col_metadata_df)

        # first, make & write out temp version of mini_gctoo with int rids/cids
        new_mg = mini_gctoo_for_testing.make(convert_neg_666=False)
        int_indexed_data_df = new_mg.data_df.copy()
        int_indexed_data_df.index = [str(i) for i in range(0, 6)]
        int_indexed_data_df.columns = [str(i) for i in range(10, 16)]

        int_indexed_row_meta = new_mg.row_metadata_df.copy()
        int_indexed_row_meta.index = int_indexed_data_df.index

        int_indexed_col_meta = new_mg.col_metadata_df.copy()
        int_indexed_col_meta.index = int_indexed_data_df.columns

        int_indexed_gctoo = GCToo.GCToo(data_df=int_indexed_data_df,
                                        row_metadata_df=int_indexed_row_meta,
                                        col_metadata_df=int_indexed_col_meta)

        write_gctx.write(int_indexed_gctoo, "int_indexed_mini_gctoo.gctx")

        # test with numeric (repr as string) rid/cid
        mg5 = GCToo.GCToo(data_df=int_indexed_data_df,
                          row_metadata_df=int_indexed_row_meta,
                          col_metadata_df=int_indexed_col_meta)
        mg5 = slice_gctoo.slice_gctoo(
            mg5,
            row_bool=[True, False, True, False, True, False],
            col_bool=[True, False, False, True, True, True])

        mg5.data_df.index.name = "rid"
        mg5.data_df.columns.name = "cid"

        mg5.row_metadata_df.index.name = "rid"
        mg5.row_metadata_df.columns.name = "rhd"

        mg5.col_metadata_df.index.name = "cid"
        mg5.col_metadata_df.columns.name = "chd"

        mg6 = parse_gctx.parse("int_indexed_mini_gctoo.gctx",
                               rid=["0", "2", "4"],
                               cid=["10", "13", "14", "15"],
                               convert_neg_666=False)

        os.remove("int_indexed_mini_gctoo.gctx")

        pandas_testing.assert_frame_equal(mg5.data_df, mg6.data_df)
        pandas_testing.assert_frame_equal(mg5.row_metadata_df,
                                          mg6.row_metadata_df)
        pandas_testing.assert_frame_equal(mg5.col_metadata_df,
                                          mg6.col_metadata_df)

        # test with ridx/cidx
        mg7 = slice_gctoo.slice_gctoo(
            mg1,
            rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'],
            cid='LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666')
        mg8 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx",
                               ridx=[4],
                               cidx=[4])

        pandas_testing.assert_frame_equal(mg7.data_df, mg8.data_df)
        pandas_testing.assert_frame_equal(mg7.row_metadata_df,
                                          mg8.row_metadata_df)
        pandas_testing.assert_frame_equal(mg7.col_metadata_df,
                                          mg8.col_metadata_df)

        # test with rid/cidx
        mg9 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx",
                               rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'],
                               cidx=[4])

        pandas_testing.assert_frame_equal(mg7.data_df, mg9.data_df)
        pandas_testing.assert_frame_equal(mg7.row_metadata_df,
                                          mg9.row_metadata_df)
        pandas_testing.assert_frame_equal(mg7.col_metadata_df,
                                          mg9.col_metadata_df)

        # test with ridx/cid
        mg10 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx",
                                ridx=[4],
                                cid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'])

        pandas_testing.assert_frame_equal(mg7.data_df, mg10.data_df)
        pandas_testing.assert_frame_equal(mg7.row_metadata_df,
                                          mg10.row_metadata_df)
        pandas_testing.assert_frame_equal(mg7.col_metadata_df,
                                          mg10.col_metadata_df)

        # test with row_meta_only
        mg11 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx",
                                row_meta_only=True)
        pandas_testing.assert_frame_equal(mg11, mg1.row_metadata_df)

        # test with col_meta_only
        mg12 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx",
                                col_meta_only=True)
        pandas_testing.assert_frame_equal(mg12, mg1.col_metadata_df)