Esempio n. 1
0
    def loadPeaksFromFile(self,peaks_file,columns=None):
        """Read peaks data from a file and populate the object

        Arguments:
          peaks_file (str): the name of the input file to read peaks
            data from.
          columns (tuple): (optional) tuple with 3 integers
            indicating which columns to use from the input
            ``peaks_file`` for the chromosome, start and end
            columns (if not the first three columns). The
            columns should be numbered from 1.

        """
        # Handle columns
        if columns is None:
            columns = (1,2,3)
        chrom = columns[0] - 1
        start = columns[1] - 1
        end = columns[2] - 1
        ncols = max(chrom,start,end) + 1
        # Read in from file
        fp = open(peaks_file,'rU')
        for line in fp:
            # Skip lines that start with a # symbol
            if line.startswith('#'):
                logging.debug("Peaks file: skipped line: %s" % line.strip())
                continue
            # Lines are tab-delimited
            items = line.strip().split('\t')
            if len(items) < ncols:
                logging.warning("Peaks file: skipped line: %s" % line.strip())
                logging.warning("Insufficient number of fields (%d): need at "
                                "least %d" % (len(items),ncols))
                continue
            # Check that items in 'start' and 'end' columns are digits
            if not items[start].isdigit() or not items[end].isdigit():
                logging.warning("Peaks file: skipped line: %s" % line.strip())
                # Indicate problem field(s)
                bad_fields = []
                for i in (start,end):
                    if not items[i].isdigit():
                        bad_fields.append(i)
                logging.warning("                         %s" % \
                                make_errline(line,bad_fields))
                logging.warning("Expected integer at indicated positions")
                continue
            # Store in a new Peak object
            try:
                peak = Peak(items[chrom],
                            items[start],
                            items[end])
            except PeakRangeError,ex:
                logging.error("Peaks file: bad line: %s" % line.strip())
                logging.error("                      %s" %
                              make_errline(line,(start,end)))
                logging.error("%s" % ex)
                raise ex
            self.peaks.append(peak)
Esempio n. 2
0
    def loadFeaturesFromFile(self,features_file):
        """Read features from a file and populate the object

        Arguments:
          features_file: the name of the input file to read features from.

        """
        # Local flags etc
        line_index = 0
        critical_error = False
        # Read in data from file
        fp = open(features_file,'rU')
        for line in fp:
            # Increment index
            line_index += 1
            # Skip lines starting with #
            if line.startswith('#'):
                logging.debug("Feature file: skipped line: %s" % line.strip())
                continue
            # Lines are tab-delimited and have at least 5 columns:
            # ID  chr  start  end  strand
            items = line.strip().split('\t')
            if len(items) < 5:
                logging.warning("Feature file: skipped line: %s" % line.strip())
                logging.warning("Insufficient number of fields (%d)" % \
                                    len(items))
                continue
            # Check line is valid i.e. start and stop should be
            # numbers, strand should be + or -
            problem_fields = []
            if not items[2].isdigit(): problem_fields.append(2)
            if not items[3].isdigit(): problem_fields.append(3)
            if not (items[4] == '+' or  items[4] == '-'): problem_fields.append(4)
            if problem_fields:
                # If this is the first line then assume it's a header and ignore
                if line_index == 1:
                    logging.warning("%s: first line ignored as header: %s" % 
                                    (features_file,line.strip()))
                else:
                    # Indicate problem field(s)
                    logging.error("%s: critical error line %d: bad values:" %
                                  (features_file,line_index))
                    logging.error("%s" % line.strip())
                    logging.error("%s" % make_errline(line.strip(),problem_fields))
                    # This is a critical error: update flag
                    critical_error = True
                # Continue to next line
                continue
            elif int(items[2]) >= int(items[3]):
                # Start position is same or higher than end
                logging.error("%s: critical error line %d: 'end' comes before 'start':" %
                              (features_file,line_index))
                logging.error("%s" % line.strip())
                logging.error("%s" % make_errline(line.strip(),(2,3)))
                # This is a critical error: update flag but continue reading
                critical_error = True
                continue
            # Store in a new Feature object
            feature = Feature(items[0],
                              items[1],
                              items[2],
                              items[3],
                              items[4],
                              source_file=features_file)
            # Additional flag
            if len(items) >= 6:
                # Is column 6 a flag?
                try:
                    flag_value = int(items[5])
                    if flag_value != 0 and flag_value != 1:
                        flag_value = None
                except ValueError:
                    flag_value = None
                # Store value
                feature.flag = flag_value

            # Store data
            self.features.append(feature)
        fp.close()
        # Deal with postponed critical errors
        if critical_error:
            raise Exception, "critical error(s) in '%s'" % features_file
        # Store the source file
        self.source_file = features_file
        # Return a reference to this object
        return self
Esempio n. 3
0
    def loadPeaksFromFile(self,peaks_file,columns=None,id_column=None):
        """Read peaks data from a file and populate the object

        Arguments:
          peaks_file (str): the name of the input file to read peaks
            data from.
          columns (tuple): (optional) tuple with 3 integers
            indicating which columns to use from the input
            ``peaks_file`` for the chromosome, start and end
            columns (if not the first three columns). The
            columns should be numbered from 1.
          id_column (int): (optional) specify a column in the
            file which contains the ID for the peak. The
            columns are assumed to be numbered from 1.

        """
        # Handle columns
        if columns is None:
            columns = (1,2,3)
        chrom = columns[0] - 1
        start = columns[1] - 1
        end = columns[2] - 1
        ncols = max(chrom,start,end) + 1
        # Include optional ID column
        if id_column is not None:
            ncols = max(ncols,id_column)
            id_column = id_column - 1
        # Read in from file
        fp = open(peaks_file,'rU')
        for line in fp:
            # Skip lines that start with a # symbol
            if line.startswith('#'):
                logging.debug("Peaks file: skipped line: %s" % line.strip())
                continue
            # Lines are tab-delimited
            items = line.strip().split('\t')
            if len(items) < ncols:
                logging.warning("Peaks file: skipped line: %s" % line.strip())
                logging.warning("Insufficient number of fields (%d): need at "
                                "least %d" % (len(items),ncols))
                continue
            # Check that items in 'start' and 'end' columns are digits
            if not items[start].isdigit() or not items[end].isdigit():
                logging.warning("Peaks file: skipped line: %s" % line.strip())
                # Indicate problem field(s)
                bad_fields = []
                for i in (start,end):
                    if not items[i].isdigit():
                        bad_fields.append(i)
                logging.warning("                         %s" % \
                                make_errline(line,bad_fields))
                logging.warning("Expected integer at indicated positions")
                continue
            # Optional ID
            try:
                id_ = items[id_column]
            except TypeError:
                id_ = None
            # Store in a new Peak object
            try:
                peak = Peak(items[chrom],
                            items[start],
                            items[end],
                            id=id_,
                            source_file=peaks_file)
            except PeakRangeError,ex:
                logging.error("Peaks file: bad line: %s" % line.strip())
                logging.error("                      %s" %
                              make_errline(line,(start,end)))
                logging.error("%s" % ex)
                raise ex
            self.peaks.append(peak)
    def loadFeaturesFromFile(self,features_file):
        """Read features from a file and populate the object

        Arguments:
          features_file: the name of the input file to read features from.

        """
        # Local flags etc
        line_index = 0
        critical_error = False
        # Read in data from file
        fp = open(features_file,'rU')
        for line in fp:
            # Increment index
            line_index += 1
            # Skip lines starting with #
            if line.startswith('#'):
                logging.debug("Feature file: skipped line: %s" % line.strip())
                continue
            # Lines are tab-delimited and have at least 5 columns:
            # ID  chr  start  end  strand
            items = line.strip().split('\t')
            if len(items) < 5:
                logging.warning("Feature file: skipped line: %s" % line.strip())
                logging.warning("Insufficient number of fields (%d)" % \
                                    len(items))
                continue
            # Check line is valid i.e. start and stop should be
            # numbers, strand should be + or -
            problem_fields = []
            if not items[2].isdigit(): problem_fields.append(2)
            if not items[3].isdigit(): problem_fields.append(3)
            if not (items[4] == '+' or  items[4] == '-'): problem_fields.append(4)
            if problem_fields:
                # If this is the first line then assume it's a header and ignore
                if line_index == 1:
                    logging.warning("%s: first line ignored as header: %s" % 
                                    (features_file,line.strip()))
                else:
                    # Indicate problem field(s)
                    logging.error("%s: critical error line %d: bad values:" %
                                  (features_file,line_index))
                    logging.error("%s" % line.strip())
                    logging.error("%s" % make_errline(line.strip(),problem_fields))
                    # This is a critical error: update flag
                    critical_error = True
                # Continue to next line
                continue
            elif int(items[2]) >= int(items[3]):
                # Start position is same or higher than end
                logging.error("%s: critical error line %d: 'end' comes before 'start':" %
                              (features_file,line_index))
                logging.error("%s" % line.strip())
                logging.error("%s" % make_errline(line.strip(),(2,3)))
                # This is a critical error: update flag but continue reading
                critical_error = True
                continue
            # Store in a new Feature object
            feature = Feature(items[0],
                              items[1],
                              items[2],
                              items[3],
                              items[4])
            # Additional flag
            if len(items) >= 6:
                # Is column 6 a flag?
                try:
                    flag_value = int(items[5])
                    if flag_value != 0 and flag_value != 1:
                        flag_value = None
                except ValueError:
                    flag_value = None
                # Store value
                feature.flag = flag_value

            # Store data
            self.features.append(feature)
        fp.close()
        # Deal with postponed critical errors
        if critical_error:
            raise Exception, "critical error(s) in '%s'" % features_file
        # Return a reference to this object
        return self