Beispiel #1
0
def main():
    # get args
    args = build_parser().parse_args(sys.argv[1:])
    setup_logger.setup(verbose=args.verbose)

    # Get files directly
    if args.input_filepaths is not None:
        files = args.input_filepaths

    # Or find them
    else:
        files = get_file_list(args.file_wildcard)

        # No files found
        if len(files) == 0:
            msg = "No files were found. args.file_wildcard: {}".format(
                args.file_wildcard)
            logger.error(msg)
            raise Exception(msg)

    # Only 1 file found
    if len(files) == 1:
        logger.warning(
            "Only 1 file found. No concatenation needs to be done, exiting")
        return

    # More than 1 file found
    else:
        # Parse each file and append to a list
        gctoos = []
        for f in files:
            gctoos.append(parse.parse(f))

        # Create concatenated gctoo object
        if args.concat_direction == "horiz":
            out_gctoo = hstack(gctoos, args.fields_to_remove, args.reset_ids)

        elif args.concat_direction == "vert":
            out_gctoo = vstack(gctoos, args.fields_to_remove, args.reset_ids)

    # Write out_gctoo to file
    logger.info("Writing to output file args.out_name:  {}".format(
        args.out_name))

    if args.out_type == "gctx":
        write_gctx.write(out_gctoo, args.out_name)

    elif args.out_type == "gct":
        write_gct.write(out_gctoo,
                        args.out_name,
                        filler_null=args.filler_null,
                        metadata_null=args.metadata_null,
                        data_null=args.data_null)
Beispiel #2
0
        (data_df, row_df,
         col_df) = GCToo.multi_index_df_to_component_dfs(mi_df)

        self.assertTrue(col_df.equals(e_col_metadata_df))
        self.assertTrue(row_df.equals(e_row_metadata_df))
        self.assertTrue(data_df.equals(e_data_df))

        # edge case: if the index (or column) of the multi-index has only one
        # level, it becomes a regular index
        mi_df_index_plain = pd.MultiIndex.from_arrays([["D", "E"]],
                                                      names=["rid"])
        mi_df2 = pd.DataFrame([[1, 3, 5], [7, 11, 13]],
                              index=mi_df_index_plain,
                              columns=mi_df_columns)

        # row df should be empty
        e_row_df2 = pd.DataFrame(index=["D", "E"])

        (data_df2, row_df2,
         col_df2) = GCToo.multi_index_df_to_component_dfs(mi_df2)
        self.assertTrue(row_df2.equals(e_row_df2))
        self.assertTrue(col_df2.equals(e_col_metadata_df))
        self.assertTrue(data_df2.equals(e_data_df))


if __name__ == "__main__":
    setup_GCToo_logger.setup(verbose=True)

    unittest.main()
Beispiel #3
0
"""
import logging
import setup_GCToo_logger as setup_logger
import os
import numpy as np
import pandas as pd
import h5py
import GCToo

__author__ = "Oana Enache"
__email__ = "*****@*****.**"

#instantiate logger
logger = logging.getLogger(setup_logger.LOGGER_NAME)
# when not in debug mode, probably best to set verbose=False
setup_logger.setup(verbose=False)

version_node = "version"
rid_node = "/0/META/ROW/id"
cid_node = "/0/META/COL/id"
data_node = "/0/DATA/0/matrix"
row_meta_group_node = "/0/META/ROW"
col_meta_group_node = "/0/META/COL"


def parse(gctx_file_path, convert_neg_666=True, rid=None, cid=None):
    """
	Primary method of script. Reads in path to a gctx file and parses into GCToo object.

	Input:
		Mandatory:
Beispiel #4
0
            "rids in concatenated_meta_df do not agree with rids in data_df.")

        # Reset rids in concatenated_meta_df
        reset_ids_in_meta_df(concatenated_meta_df)

        # Replace rids in data_df with the new ones from concatenated_meta_df
        # (just an array of unique integers, zero-indexed)
        data_df.index = pd.Index(concatenated_meta_df.index.values)


def reset_ids_in_meta_df(meta_df):
    """ Meta_df is modified inplace. """

    # Record original index name, and then change it so that the column that it
    # becomes will be appropriately named
    original_index_name = meta_df.index.name
    meta_df.index.name = "old_id"

    # Reset index
    meta_df.reset_index(inplace=True)

    # Change the index name back to what it was
    meta_df.index.name = original_index_name


if __name__ == "__main__":
    args = build_parser().parse_args(sys.argv[1:])
    setup_logger.setup(verbose=args.verbose)

    main(args)
Beispiel #5
0
                set(mini_gctoo.row_metadata_df[c])))
            self.assertTrue(
                set(mini_gctoo.row_metadata_df[c]) == set(
                    mini_gctoo_row_metadata[c]),
                "Values in column {} differ between expected metadata and written row metadata!"
                .format(c))

        # check col metadata
        self.assertTrue(
            set(mini_gctoo.col_metadata_df.columns) == set(
                mini_gctoo_col_metadata.columns),
            "Mismatch between expected col metadata columns {} and column values written to file: {}"
            .format(mini_gctoo.col_metadata_df.columns,
                    mini_gctoo_col_metadata.columns))
        self.assertTrue(
            set(mini_gctoo.col_metadata_df.index) == set(
                mini_gctoo.col_metadata_df.index),
            "Mismatch between expect col metadata index {} and index values written to file: {}"
            .format(mini_gctoo.col_metadata_df.index,
                    mini_gctoo_col_metadata.index))
        for c in list(mini_gctoo.col_metadata_df.columns):
            self.assertTrue(
                set(mini_gctoo.col_metadata_df[c]) == set(
                    mini_gctoo_col_metadata[c]),
                "Values in column {} differ between expected metadata and written col metadata!"
                .format(c))


if __name__ == "__main__":
    setup_logger.setup(verbose=True)
    unittest.main()