def loadPeaksFromFile(self,peaks_file,columns=None): """Read peaks data from a file and populate the object Arguments: peaks_file (str): the name of the input file to read peaks data from. columns (tuple): (optional) tuple with 3 integers indicating which columns to use from the input ``peaks_file`` for the chromosome, start and end columns (if not the first three columns). The columns should be numbered from 1. """ # Handle columns if columns is None: columns = (1,2,3) chrom = columns[0] - 1 start = columns[1] - 1 end = columns[2] - 1 ncols = max(chrom,start,end) + 1 # Read in from file fp = open(peaks_file,'rU') for line in fp: # Skip lines that start with a # symbol if line.startswith('#'): logging.debug("Peaks file: skipped line: %s" % line.strip()) continue # Lines are tab-delimited items = line.strip().split('\t') if len(items) < ncols: logging.warning("Peaks file: skipped line: %s" % line.strip()) logging.warning("Insufficient number of fields (%d): need at " "least %d" % (len(items),ncols)) continue # Check that items in 'start' and 'end' columns are digits if not items[start].isdigit() or not items[end].isdigit(): logging.warning("Peaks file: skipped line: %s" % line.strip()) # Indicate problem field(s) bad_fields = [] for i in (start,end): if not items[i].isdigit(): bad_fields.append(i) logging.warning(" %s" % \ make_errline(line,bad_fields)) logging.warning("Expected integer at indicated positions") continue # Store in a new Peak object try: peak = Peak(items[chrom], items[start], items[end]) except PeakRangeError,ex: logging.error("Peaks file: bad line: %s" % line.strip()) logging.error(" %s" % make_errline(line,(start,end))) logging.error("%s" % ex) raise ex self.peaks.append(peak)
def loadFeaturesFromFile(self,features_file): """Read features from a file and populate the object Arguments: features_file: the name of the input file to read features from. """ # Local flags etc line_index = 0 critical_error = False # Read in data from file fp = open(features_file,'rU') for line in fp: # Increment index line_index += 1 # Skip lines starting with # if line.startswith('#'): logging.debug("Feature file: skipped line: %s" % line.strip()) continue # Lines are tab-delimited and have at least 5 columns: # ID chr start end strand items = line.strip().split('\t') if len(items) < 5: logging.warning("Feature file: skipped line: %s" % line.strip()) logging.warning("Insufficient number of fields (%d)" % \ len(items)) continue # Check line is valid i.e. start and stop should be # numbers, strand should be + or - problem_fields = [] if not items[2].isdigit(): problem_fields.append(2) if not items[3].isdigit(): problem_fields.append(3) if not (items[4] == '+' or items[4] == '-'): problem_fields.append(4) if problem_fields: # If this is the first line then assume it's a header and ignore if line_index == 1: logging.warning("%s: first line ignored as header: %s" % (features_file,line.strip())) else: # Indicate problem field(s) logging.error("%s: critical error line %d: bad values:" % (features_file,line_index)) logging.error("%s" % line.strip()) logging.error("%s" % make_errline(line.strip(),problem_fields)) # This is a critical error: update flag critical_error = True # Continue to next line continue elif int(items[2]) >= int(items[3]): # Start position is same or higher than end logging.error("%s: critical error line %d: 'end' comes before 'start':" % (features_file,line_index)) logging.error("%s" % line.strip()) logging.error("%s" % make_errline(line.strip(),(2,3))) # This is a critical error: update flag but continue reading critical_error = True continue # Store in a new Feature object feature = Feature(items[0], items[1], items[2], items[3], items[4], source_file=features_file) # Additional flag if len(items) >= 6: # Is column 6 a flag? try: flag_value = int(items[5]) if flag_value != 0 and flag_value != 1: flag_value = None except ValueError: flag_value = None # Store value feature.flag = flag_value # Store data self.features.append(feature) fp.close() # Deal with postponed critical errors if critical_error: raise Exception, "critical error(s) in '%s'" % features_file # Store the source file self.source_file = features_file # Return a reference to this object return self
def loadPeaksFromFile(self,peaks_file,columns=None,id_column=None): """Read peaks data from a file and populate the object Arguments: peaks_file (str): the name of the input file to read peaks data from. columns (tuple): (optional) tuple with 3 integers indicating which columns to use from the input ``peaks_file`` for the chromosome, start and end columns (if not the first three columns). The columns should be numbered from 1. id_column (int): (optional) specify a column in the file which contains the ID for the peak. The columns are assumed to be numbered from 1. """ # Handle columns if columns is None: columns = (1,2,3) chrom = columns[0] - 1 start = columns[1] - 1 end = columns[2] - 1 ncols = max(chrom,start,end) + 1 # Include optional ID column if id_column is not None: ncols = max(ncols,id_column) id_column = id_column - 1 # Read in from file fp = open(peaks_file,'rU') for line in fp: # Skip lines that start with a # symbol if line.startswith('#'): logging.debug("Peaks file: skipped line: %s" % line.strip()) continue # Lines are tab-delimited items = line.strip().split('\t') if len(items) < ncols: logging.warning("Peaks file: skipped line: %s" % line.strip()) logging.warning("Insufficient number of fields (%d): need at " "least %d" % (len(items),ncols)) continue # Check that items in 'start' and 'end' columns are digits if not items[start].isdigit() or not items[end].isdigit(): logging.warning("Peaks file: skipped line: %s" % line.strip()) # Indicate problem field(s) bad_fields = [] for i in (start,end): if not items[i].isdigit(): bad_fields.append(i) logging.warning(" %s" % \ make_errline(line,bad_fields)) logging.warning("Expected integer at indicated positions") continue # Optional ID try: id_ = items[id_column] except TypeError: id_ = None # Store in a new Peak object try: peak = Peak(items[chrom], items[start], items[end], id=id_, source_file=peaks_file) except PeakRangeError,ex: logging.error("Peaks file: bad line: %s" % line.strip()) logging.error(" %s" % make_errline(line,(start,end))) logging.error("%s" % ex) raise ex self.peaks.append(peak)
def loadFeaturesFromFile(self,features_file): """Read features from a file and populate the object Arguments: features_file: the name of the input file to read features from. """ # Local flags etc line_index = 0 critical_error = False # Read in data from file fp = open(features_file,'rU') for line in fp: # Increment index line_index += 1 # Skip lines starting with # if line.startswith('#'): logging.debug("Feature file: skipped line: %s" % line.strip()) continue # Lines are tab-delimited and have at least 5 columns: # ID chr start end strand items = line.strip().split('\t') if len(items) < 5: logging.warning("Feature file: skipped line: %s" % line.strip()) logging.warning("Insufficient number of fields (%d)" % \ len(items)) continue # Check line is valid i.e. start and stop should be # numbers, strand should be + or - problem_fields = [] if not items[2].isdigit(): problem_fields.append(2) if not items[3].isdigit(): problem_fields.append(3) if not (items[4] == '+' or items[4] == '-'): problem_fields.append(4) if problem_fields: # If this is the first line then assume it's a header and ignore if line_index == 1: logging.warning("%s: first line ignored as header: %s" % (features_file,line.strip())) else: # Indicate problem field(s) logging.error("%s: critical error line %d: bad values:" % (features_file,line_index)) logging.error("%s" % line.strip()) logging.error("%s" % make_errline(line.strip(),problem_fields)) # This is a critical error: update flag critical_error = True # Continue to next line continue elif int(items[2]) >= int(items[3]): # Start position is same or higher than end logging.error("%s: critical error line %d: 'end' comes before 'start':" % (features_file,line_index)) logging.error("%s" % line.strip()) logging.error("%s" % make_errline(line.strip(),(2,3))) # This is a critical error: update flag but continue reading critical_error = True continue # Store in a new Feature object feature = Feature(items[0], items[1], items[2], items[3], items[4]) # Additional flag if len(items) >= 6: # Is column 6 a flag? try: flag_value = int(items[5]) if flag_value != 0 and flag_value != 1: flag_value = None except ValueError: flag_value = None # Store value feature.flag = flag_value # Store data self.features.append(feature) fp.close() # Deal with postponed critical errors if critical_error: raise Exception, "critical error(s) in '%s'" % features_file # Return a reference to this object return self