コード例 #1
0
    # Initialise
    skip_first_line = options.skip_first_line
    first_line_is_header = options.first_line_is_header
    fix_chromosome = options.fix_chromosome
    bedgraph_header = options.header
    user_selected = str(options.selection).split(',')

    # Get the input data
    data = TabFile(filen,
                   skip_first_line=skip_first_line,
                   first_line_is_header=first_line_is_header)
    print "Read in %d lines" % len(data)
    if first_line_is_header:
        print "Header:"
        for col in data.header():
            print "\t%s" % col

    # Output file
    output_root = os.path.splitext(os.path.basename(filen))[0]

    # Selected columns
    if len(user_selected) == 0:
        print "No columns selected for output."
        sys.exit()
    print "Selected columns = %s" % ' '.join(user_selected)
    # Assume user counts columns starting from one and adjust to count from zero
    # Also check that the requested column exists and set up file names based on
    # user input
    selected = []
    col_lookup = {}
コード例 #2
0
ファイル: make_macs2_xls.py プロジェクト: gwmei/genomics
class MacsXLS:
    """Class for reading and manipulating XLS output from MACS

    Reads the XLS output file from the MACS peak caller and
    processes and stores the information for subsequent manipulation
    and output.

    To read in data from a MACS output file:

    >>> macs = MacsXLS("macs.xls")

    This reads in the data and prepends an additional 'order'
    column (a list of numbers from one to the number of data
    lines).

    To get the MACS version:

    >>> macs.macs_version
    2.0.10

    To access the 'header' information (as a Python list):

    >>> macs.header

    To see the column names (as a Python list):

    >>> macs.columns

    The data is stored as a TabFile object; to access the data
    use the 'data' property, e.g.

    >>> for line in macs.data:
    ...    print "Chr %s Start %s End" % (line['chr'],line['start'],line['end'])

    To sort the data on a particular column use the 'sort_on'
    method, e.g.

    >>> macs.sort_on('chr')

    (Note that the order column is always recalculated after
    sorting.)

    """

    def __init__(self,filen=None,fp=None,name=None):
        """Create a new MacsXLS instance

        Arguments:
          filen: name of the file to read the MACS output from.
            If None then fp argument must be supplied instead.
          fp: file-like object opened for reading. If None then
            filen argument must be supplied instead. If both filen
            and fp are supplied then fp will be used preferentially.

        """
        # Store data
        self.__filen = filen
        self.__name = name
        self.__macs_version = None
        self.__command_line = None
        self.__header = []
        self.__data = None
        # Open file, if necessary
        if fp is None:
            fp = open(filen,'r')
        else:
            filen = None
        # Iterate over header lines
        for line in fp:
            line = line.strip()
            if line.startswith('#') or line == '':
                # Header line
                self.__header.append(line)
                # Detect/extract data from header
                if line.startswith("# This file is generated by MACS version "):
                    # Look for MACS version
                    self.__macs_version = line.split()[8]
                elif self.__name is None and line.startswith("# name = "):
                    # Look for 'name' if none set
                    self.__name = line[len("# name = "):]
                elif line.startswith("# Command line: "):
                    # Look for command line
                    self.__command_line = line[16:]
            else:
                if self.__data is None:
                    # First line of actual data should be the column names
                    columns = line.split('\t')
                    # Insert an additional column called 'order'
                    columns.insert(0,"order")
                    # Set up TabFile to handle actual data
                    self.__data = TabFile(column_names=columns)
                else:
                    # Assume it's actual data and store it
                    self.__data.append(tabdata="\t%s" % line)
        # Close the file handle, if we opened it
        if filen is not None:
            fp.close()
        # Check that we actually got a version line
        if self.macs_version is None:
            raise Exception,"Failed to extract MACS version, not a MACS output file?"
        # Populate the 'order' column
        self.update_order()

    @property
    def filen(self):
        """Return the source file name

        """
        return self.__filen

    @property
    def name(self):
        """Return the name property

        """
        return self.__name

    @property
    def macs_version(self):
        """Return the MACS version extracted from the file

        """
        return self.__macs_version

    @property
    def command_line(self):
        """Return the command line string extracted from the header

        This is the value associated with the "# Command line: ..."
        header line.

        Will be 'None' if no matching header line is found, else is
        the string following the ':'.

        """
        return self.__command_line

    @property
    def columns(self):
        """Return the column names for the MACS data

        Returns a list of the column names from the data
        extracted from the file.

        """
        return self.__data.header()

    @property
    def columns_as_xls_header(self):
        """Returns the column name list, with hash prepended

        """
        return ['#'+self.columns[0]] + self.columns[1:]

    @property
    def header(self):
        """Return the header data from the file

        Returns a list of lines comprising the header
        extracted from the file.

        """
        return self.__header

    @property
    def data(self):
        """Return the data from the file

        Returns a TabFile object comprising the data
        extracted from the file.

        """
        return self.__data

    @property
    def with_broad_option(self):
        """Returns True if MACS was run with --broad option

        If --broad wasn't detected then returns False.

        """
        if self.macs_version.startswith('1.'):
            # Not an option in MACS 1.*
            return False
        try:
            # Was --broad specified in the command line?
            return '--broad' in self.command_line.split()
        except AttributeError:
            # No command line? Check for 'abs_summit' column
            return 'abs_summit' not in self.columns

    def sort_on(self,column,reverse=True):
        """Sort data on specified column

        Sorts the data in-place, by the specified column.

        By default data is sorted in descending order; set
        'reverse' argument to False to sort values in ascending
        order instead
 
        Note that the 'order' column is automatically updated
        after each sorting operation.

        Arguments:
          column: name of the column to sort on
          reverse: if True (default) then sort in descending
            order (i.e. largest to smallest). Otherwise sort in
            ascending order.

        """
        # Sort the data
        self.__data.sort(lambda line: line[column],reverse=reverse)
        # Update the 'order' column
        self.update_order()

    def update_order(self):
        # Set/update values in 'order' column
        for i in range(0,len(self.__data)):
            self.__data[i]['order'] = i+1