Example #1
0
def main(args):
    in_gctoo = parse_gct.parse(args.filename, convert_neg_666=False)
    logger.debug("Original out name: {}".format(in_gctoo.src))

    if args.output_filepath == None:
        out_name = str.split(in_gctoo.src, "/")[-1].split(".")[0]
    else:
        out_name = args.output_filepath

    write_gctx.write(in_gctoo, out_name)
Example #2
0
def main():
    args = build_parser().parse_args(sys.argv[1:])
    setup_logger.setup(verbose=args.verbose)
    in_gctoo = parse_gct.parse(args.filename, convert_neg_666=False)
    logger.debug("Original out name: {}".format(in_gctoo.src))

    if args.output_filepath == None:
        out_name = str.split(in_gctoo.src, "/")[-1].split(".")[0]
    else:
        out_name = args.output_filepath

    write_gctx.write(in_gctoo, out_name)
Example #3
0
def main():
    # get args
    args = build_parser().parse_args(sys.argv[1:])
    setup_logger.setup(verbose=args.verbose)

    # Get files directly
    if args.input_filepaths is not None:
        files = args.input_filepaths

    # Or find them
    else:
        files = get_file_list(args.file_wildcard)

        # No files found
        if len(files) == 0:
            msg = "No files were found. args.file_wildcard: {}".format(
                args.file_wildcard)
            logger.error(msg)
            raise Exception(msg)

    # Only 1 file found
    if len(files) == 1:
        logger.warning(
            "Only 1 file found. No concatenation needs to be done, exiting")
        return

    # More than 1 file found
    else:
        # Parse each file and append to a list
        gctoos = []
        for f in files:
            gctoos.append(parse.parse(f))

        # Create concatenated gctoo object
        if args.concat_direction == "horiz":
            out_gctoo = hstack(gctoos, args.fields_to_remove, args.reset_ids)

        elif args.concat_direction == "vert":
            out_gctoo = vstack(gctoos, args.fields_to_remove, args.reset_ids)

    # Write out_gctoo to file
    logger.info("Writing to output file args.out_name:  {}".format(
        args.out_name))

    if args.out_type == "gctx":
        write_gctx.write(out_gctoo, args.out_name)

    elif args.out_type == "gct":
        write_gct.write(out_gctoo,
                        args.out_name,
                        filler_null=args.filler_null,
                        metadata_null=args.metadata_null,
                        data_null=args.data_null)
Example #4
0
    def test_write_src(self):
        # case 1: gctoo obj doesn't have src
        mini1 = mini_gctoo_for_testing.make()
        mini1.src = None
        write_gctx.write(mini1, "no_src_example")
        hdf5_file = h5py.File("no_src_example.gctx")
        hdf5_src1 = hdf5_file.attrs[write_gctx.src_attr]
        hdf5_file.close()
        self.assertEqual(hdf5_src1, "no_src_example.gctx")
        os.remove("no_src_example.gctx")

        # case 2: gctoo obj does have src
        mini2 = mini_gctoo_for_testing.make()
        write_gctx.write(mini2, "with_src_example.gctx")
        hdf5_file = h5py.File("with_src_example.gctx")
        hdf5_src2 = hdf5_file.attrs[write_gctx.src_attr]
        hdf5_file.close()
        self.assertEqual(hdf5_src2, "mini_gctoo.gctx")
        os.remove("with_src_example.gctx")
Example #5
0
    def test_write_version(self):
        #TODO @oana refactor this test so it just calls the write_version method
        # case 1: gctoo obj doesn't have version
        mini1 = mini_gctoo_for_testing.make()
        mini1.version = None
        fn = "no_version_provided_example.gctx"
        write_gctx.write(mini1, fn)
        hdf5_file = h5py.File(fn)
        hdf5_v1 = hdf5_file.attrs[write_gctx.version_attr]
        hdf5_file.close()
        self.assertEqual(hdf5_v1, write_gctx.version_number)
        os.remove(fn)

        # case 2: gctoo obj does have version, but it is not used when writing
        mini2 = mini_gctoo_for_testing.make()
        mini2.version = "MY_VERSION"
        fn = "with_version_provided_example.gctx"
        write_gctx.write(mini2, fn)
        hdf5_file = h5py.File(fn)
        hdf5_v2 = hdf5_file.attrs[write_gctx.version_attr]
        hdf5_file.close()
        self.assertEqual(hdf5_v2, write_gctx.version_number)
        os.remove(fn)
Example #6
0
    def test_parse(self):
        # parse whole thing
        mg1 = mini_gctoo_for_testing.make()
        mg2 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx")

        assert_frame_equal(mg1.data_df, mg2.data_df)
        assert_frame_equal(mg1.row_metadata_df, mg2.row_metadata_df)
        assert_frame_equal(mg1.col_metadata_df, mg2.col_metadata_df)

        # test with string rid/cid
        test_rids = [
            'LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33',
            'LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'
        ]
        test_cids = ['LJP007_MCF7_24H:TRT_POSCON:BRD-A61304759:10']
        mg3 = slice_gct.slice_gctoo(mg1, rid=test_rids, cid=test_cids)
        mg4 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx",
                               rid=test_rids,
                               cid=test_cids)
        assert_frame_equal(mg3.data_df, mg4.data_df)
        assert_frame_equal(mg3.row_metadata_df, mg4.row_metadata_df)
        assert_frame_equal(mg3.col_metadata_df, mg4.col_metadata_df)

        # first, make & write out temp version of mini_gctoo with int rids/cids
        new_mg = mini_gctoo_for_testing.make(convert_neg_666=False)
        int_indexed_data_df = new_mg.data_df.copy()
        int_indexed_data_df.index = range(0, 6)
        int_indexed_data_df.columns = range(10, 16)

        int_indexed_row_meta = new_mg.row_metadata_df.copy()
        int_indexed_row_meta.index = range(0, 6)

        int_indexed_col_meta = new_mg.col_metadata_df.copy()
        int_indexed_col_meta.index = range(10, 16)

        int_indexed_gctoo = GCToo.GCToo(data_df=int_indexed_data_df,
                                        row_metadata_df=int_indexed_row_meta,
                                        col_metadata_df=int_indexed_col_meta)

        write_gctx.write(int_indexed_gctoo, "int_indexed_mini_gctoo.gctx")

        # test with numeric (repr as string) rid/cid
        mg5 = GCToo.GCToo(data_df=int_indexed_data_df,
                          row_metadata_df=int_indexed_row_meta,
                          col_metadata_df=int_indexed_col_meta)
        mg5 = slice_gct.slice_gctoo(
            mg5,
            row_bool=[True, False, True, False, True, False],
            col_bool=[True, False, False, True, True, True])

        mg5.data_df.index.name = "rid"
        mg5.data_df.columns.name = "cid"

        mg5.row_metadata_df.index.name = "rid"
        mg5.row_metadata_df.columns.name = "rhd"

        mg5.col_metadata_df.index.name = "cid"
        mg5.col_metadata_df.columns.name = "chd"

        mg6 = parse_gctx.parse("int_indexed_mini_gctoo.gctx",
                               rid=[0, 2, 4],
                               cid=[10, 13, 14, 15],
                               convert_neg_666=False)

        os.remove("int_indexed_mini_gctoo.gctx")

        assert_frame_equal(mg5.data_df, mg6.data_df)
        assert_frame_equal(mg5.row_metadata_df, mg6.row_metadata_df)
        assert_frame_equal(mg5.col_metadata_df, mg6.col_metadata_df)

        # test with ridx/cidx
        mg7 = slice_gct.slice_gctoo(
            mg1,
            rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'],
            cid='LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666')
        mg8 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx",
                               ridx=[4],
                               cidx=[4])

        assert_frame_equal(mg7.data_df, mg8.data_df)
        assert_frame_equal(mg7.row_metadata_df, mg8.row_metadata_df)
        assert_frame_equal(mg7.col_metadata_df, mg8.col_metadata_df)

        # test with rid/cidx
        mg9 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx",
                               rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'],
                               cidx=[4])

        assert_frame_equal(mg7.data_df, mg9.data_df)
        assert_frame_equal(mg7.row_metadata_df, mg9.row_metadata_df)
        assert_frame_equal(mg7.col_metadata_df, mg9.col_metadata_df)

        # test with ridx/cid
        mg10 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx",
                                ridx=[4],
                                cid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'])

        assert_frame_equal(mg7.data_df, mg10.data_df)
        assert_frame_equal(mg7.row_metadata_df, mg10.row_metadata_df)
        assert_frame_equal(mg7.col_metadata_df, mg10.col_metadata_df)