def main(args): in_gctoo = parse_gct.parse(args.filename, convert_neg_666=False) logger.debug("Original out name: {}".format(in_gctoo.src)) if args.output_filepath == None: out_name = str.split(in_gctoo.src, "/")[-1].split(".")[0] else: out_name = args.output_filepath write_gctx.write(in_gctoo, out_name)
def main(): args = build_parser().parse_args(sys.argv[1:]) setup_logger.setup(verbose=args.verbose) in_gctoo = parse_gct.parse(args.filename, convert_neg_666=False) logger.debug("Original out name: {}".format(in_gctoo.src)) if args.output_filepath == None: out_name = str.split(in_gctoo.src, "/")[-1].split(".")[0] else: out_name = args.output_filepath write_gctx.write(in_gctoo, out_name)
def main(): # get args args = build_parser().parse_args(sys.argv[1:]) setup_logger.setup(verbose=args.verbose) # Get files directly if args.input_filepaths is not None: files = args.input_filepaths # Or find them else: files = get_file_list(args.file_wildcard) # No files found if len(files) == 0: msg = "No files were found. args.file_wildcard: {}".format( args.file_wildcard) logger.error(msg) raise Exception(msg) # Only 1 file found if len(files) == 1: logger.warning( "Only 1 file found. No concatenation needs to be done, exiting") return # More than 1 file found else: # Parse each file and append to a list gctoos = [] for f in files: gctoos.append(parse.parse(f)) # Create concatenated gctoo object if args.concat_direction == "horiz": out_gctoo = hstack(gctoos, args.fields_to_remove, args.reset_ids) elif args.concat_direction == "vert": out_gctoo = vstack(gctoos, args.fields_to_remove, args.reset_ids) # Write out_gctoo to file logger.info("Writing to output file args.out_name: {}".format( args.out_name)) if args.out_type == "gctx": write_gctx.write(out_gctoo, args.out_name) elif args.out_type == "gct": write_gct.write(out_gctoo, args.out_name, filler_null=args.filler_null, metadata_null=args.metadata_null, data_null=args.data_null)
def test_write_src(self): # case 1: gctoo obj doesn't have src mini1 = mini_gctoo_for_testing.make() mini1.src = None write_gctx.write(mini1, "no_src_example") hdf5_file = h5py.File("no_src_example.gctx") hdf5_src1 = hdf5_file.attrs[write_gctx.src_attr] hdf5_file.close() self.assertEqual(hdf5_src1, "no_src_example.gctx") os.remove("no_src_example.gctx") # case 2: gctoo obj does have src mini2 = mini_gctoo_for_testing.make() write_gctx.write(mini2, "with_src_example.gctx") hdf5_file = h5py.File("with_src_example.gctx") hdf5_src2 = hdf5_file.attrs[write_gctx.src_attr] hdf5_file.close() self.assertEqual(hdf5_src2, "mini_gctoo.gctx") os.remove("with_src_example.gctx")
def test_write_version(self): #TODO @oana refactor this test so it just calls the write_version method # case 1: gctoo obj doesn't have version mini1 = mini_gctoo_for_testing.make() mini1.version = None fn = "no_version_provided_example.gctx" write_gctx.write(mini1, fn) hdf5_file = h5py.File(fn) hdf5_v1 = hdf5_file.attrs[write_gctx.version_attr] hdf5_file.close() self.assertEqual(hdf5_v1, write_gctx.version_number) os.remove(fn) # case 2: gctoo obj does have version, but it is not used when writing mini2 = mini_gctoo_for_testing.make() mini2.version = "MY_VERSION" fn = "with_version_provided_example.gctx" write_gctx.write(mini2, fn) hdf5_file = h5py.File(fn) hdf5_v2 = hdf5_file.attrs[write_gctx.version_attr] hdf5_file.close() self.assertEqual(hdf5_v2, write_gctx.version_number) os.remove(fn)
def test_parse(self): # parse whole thing mg1 = mini_gctoo_for_testing.make() mg2 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx") assert_frame_equal(mg1.data_df, mg2.data_df) assert_frame_equal(mg1.row_metadata_df, mg2.row_metadata_df) assert_frame_equal(mg1.col_metadata_df, mg2.col_metadata_df) # test with string rid/cid test_rids = [ 'LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33', 'LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666' ] test_cids = ['LJP007_MCF7_24H:TRT_POSCON:BRD-A61304759:10'] mg3 = slice_gct.slice_gctoo(mg1, rid=test_rids, cid=test_cids) mg4 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx", rid=test_rids, cid=test_cids) assert_frame_equal(mg3.data_df, mg4.data_df) assert_frame_equal(mg3.row_metadata_df, mg4.row_metadata_df) assert_frame_equal(mg3.col_metadata_df, mg4.col_metadata_df) # first, make & write out temp version of mini_gctoo with int rids/cids new_mg = mini_gctoo_for_testing.make(convert_neg_666=False) int_indexed_data_df = new_mg.data_df.copy() int_indexed_data_df.index = range(0, 6) int_indexed_data_df.columns = range(10, 16) int_indexed_row_meta = new_mg.row_metadata_df.copy() int_indexed_row_meta.index = range(0, 6) int_indexed_col_meta = new_mg.col_metadata_df.copy() int_indexed_col_meta.index = range(10, 16) int_indexed_gctoo = GCToo.GCToo(data_df=int_indexed_data_df, row_metadata_df=int_indexed_row_meta, col_metadata_df=int_indexed_col_meta) write_gctx.write(int_indexed_gctoo, "int_indexed_mini_gctoo.gctx") # test with numeric (repr as string) rid/cid mg5 = GCToo.GCToo(data_df=int_indexed_data_df, row_metadata_df=int_indexed_row_meta, col_metadata_df=int_indexed_col_meta) mg5 = slice_gct.slice_gctoo( mg5, row_bool=[True, False, True, False, True, False], col_bool=[True, False, False, True, True, True]) mg5.data_df.index.name = "rid" mg5.data_df.columns.name = "cid" mg5.row_metadata_df.index.name = "rid" mg5.row_metadata_df.columns.name = "rhd" mg5.col_metadata_df.index.name = "cid" mg5.col_metadata_df.columns.name = "chd" mg6 = parse_gctx.parse("int_indexed_mini_gctoo.gctx", rid=[0, 2, 4], cid=[10, 13, 14, 15], convert_neg_666=False) os.remove("int_indexed_mini_gctoo.gctx") assert_frame_equal(mg5.data_df, mg6.data_df) assert_frame_equal(mg5.row_metadata_df, mg6.row_metadata_df) assert_frame_equal(mg5.col_metadata_df, mg6.col_metadata_df) # test with ridx/cidx mg7 = slice_gct.slice_gctoo( mg1, rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], cid='LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666') mg8 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], cidx=[4]) assert_frame_equal(mg7.data_df, mg8.data_df) assert_frame_equal(mg7.row_metadata_df, mg8.row_metadata_df) assert_frame_equal(mg7.col_metadata_df, mg8.col_metadata_df) # test with rid/cidx mg9 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx", rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], cidx=[4]) assert_frame_equal(mg7.data_df, mg9.data_df) assert_frame_equal(mg7.row_metadata_df, mg9.row_metadata_df) assert_frame_equal(mg7.col_metadata_df, mg9.col_metadata_df) # test with ridx/cid mg10 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], cid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666']) assert_frame_equal(mg7.data_df, mg10.data_df) assert_frame_equal(mg7.row_metadata_df, mg10.row_metadata_df) assert_frame_equal(mg7.col_metadata_df, mg10.col_metadata_df)