Beispiel #1
0
Datei: gff.py Projekt: bow/track
 def parse(self):
     # Initial variables #
     fields = []
     info   = {}
     # Main loop #
     for number, line in iterate_lines(self.path):
         # Ignored lines #
         if line.startswith("browser "): continue
         # Track headers #
         if line.startswith("track "):
             try:
                 info = dict([p.split('=',1) for p in shlex.split(line[6:])])
             except ValueError:
                 self.handler.error("The track%s seems to have an invalid <track> header line", self.path, number)
             fields = []
             continue
         # Split the lines #
         items = line.split('\t')
         if len(items) == 1: items = line.split()
         # Chromosome #
         chrom = items.pop(0)
         # Length is nine #
         if len(items) != 8:
             self.handler.error("The track%s doesn't have nine columns", self.path, number)
         # Have we started a track already ? #
         if not fields:
             self.handler.newTrack(info, self.name)
             fields = all_fields[0:len(items)]
             self.handler.defineFields(fields)
         # Source field #
         if items[0] == '.': items[0] = ''
         # Name field #
         if items[1] == '.': items[1] = ''
         # Start and end fields #
         try:
             items[2] = int(items[2])
             items[3] = int(items[3])
         except ValueError:
             self.handler.error("The track%s has non integers as interval bounds", self.path, number)
         if items[3] <= items[2]:
             self.handler.error("The track%s has negative or null intervals", self.path, number)
         # Score field #
         if items[4] == '.' or items[4] == '': items[4] = 0.0
         try:
             items[4] = float(items[4])
         except ValueError:
             self.handler.error("The track%s has non floats as score values", self.path, number)
         # Strand field #
         items[5] = strand_to_int(items[5])
         # Frame field #
         if items[6] == '.': items[6] = None
         else:
             try:
                 items[6] = int(items[6])
             except ValueError:
                 self.handler.error("The track%s has non integers as frame value", self.path, number)
         # Group or attribute field #
         if items[7] == '.': items[7] = ''
         # Yield it #
         self.handler.newFeature(chrom, items)
Beispiel #2
0
Datei: util.py Projekt: bow/track
def parse_chr_file(self, path):
    """Read a chromsome file and return a dictionary"""
    chrmeta = {}
    for number, line in iterate_lines(path):
        items = line.split('\t')
        if len(items) == 1: items = line.split()
        if len(items) != 2:
            raise Exception("The file '" + path + ":" + str(number) + "' does not seam to be a valid chromosome file.")
        name = items[0]
        try:
            length = int(items[1])
        except ValueError:
            raise Exception("The file '" + path + ":" + str(number) + "' has non-integer as chromosome lengths.")
        chrmeta[name] = dict([('length', length)])
    if not chrmeta:
        raise Exception("The file '" + path + "' does not seam to contain any information.")
    return chrmeta
Beispiel #3
0
 def parse(self):
     # Initial variables #
     info = {}
     declare_track = True
     # Main loop #
     for number, line in iterate_lines(self.path):
         # Ignored lines #
         if line.startswith("browser "): continue
         # Track headers #
         if line.startswith("track "):
             try:
                 info = dict([p.split('=',1) for p in shlex.split(line[6:])])
             except ValueError:
                 self.handler.error("The track%s seems to have an invalid <track> header line", self.path, number)
             declare_track = True
             continue
         # Split the lines #
         items = line.split('\t')
         if len(items) == 1: items = line.split()
         # Chromosome #
         chrom = items.pop(0)
         # Length is three #
         if len(items) != 3:
             self.handler.error("The track%s doesn't have four columns", self.path, number)
         # Have we started a track already ? #
         if declare_track:
             declare_track = False
             self.handler.defineFields(all_fields)
             self.handler.newTrack(info, self.name)
         # Start and end fields #
         try:
             items[0] = int(items[0])
             items[1] = int(items[1])
         except ValueError:
             self.handler.error("The track%s has non integers as interval bounds", self.path, number)
         # Score field #
         if items[2] == '.' or items[2] == '': items[2] = 0.0
         try:
             items[2] = float(items[2])
         except ValueError:
             self.handler.error("The track%s has non floats as score values", self.path, number)
         # Yield it #
         self.handler.newFeature(chrom, items)
Beispiel #4
0
def parse_chr_file(self, path):
    """Read a chromsome file and return a dictionary"""
    chrmeta = {}
    for number, line in iterate_lines(path):
        items = line.split('\t')
        if len(items) == 1: items = line.split()
        if len(items) != 2:
            raise Exception("The file '" + path + ":" + str(number) +
                            "' does not seam to be a valid chromosome file.")
        name = items[0]
        try:
            length = int(items[1])
        except ValueError:
            raise Exception("The file '" + path + ":" + str(number) +
                            "' has non-integer as chromosome lengths.")
        chrmeta[name] = dict([('length', length)])
    if not chrmeta:
        raise Exception("The file '" + path +
                        "' does not seam to contain any information.")
    return chrmeta
Beispiel #5
0
 def parse(self):
     # Initial variables #
     l_chrom, l_name, l_start, l_end, l_strand, l_score = None, None, None, None, None, None
     # Start a new track #
     self.handler.newTrack({'int_to_float':'score'}, self.name)
     self.handler.defineFields(all_fields)
     # Line loop #
     for number, line in iterate_lines(self.path):
         # Ignored lines #
         if line.startswith("browser "): continue
         if line.startswith("track "): continue
         # Split the lines #
         items = line.split('\t')
         if len(items) == 1: items = line.split()
         # Is it a legal line ? #
         if len(items) < 5: self.handler.error("The track%s has less than five columns", self.path, number)
         # Chromosome #
         chrom = items.pop(0)
         # Name field #
         name = items[0]
         # Start and end field #
         try: pos = int(items[1])
         except ValueError: self.handler.error("The track%s has non integers as position", self.path, number)
         start = pos-1
         end = pos
         # Strand field #
         strand = strand_to_int(items[2])
         # Score field #
         try: score = int(items[3])
         except ValueError: self.handler.error("The track%s has non integers as tag count values", self.path, number)
         # Ignore null scores #
         if score == 0: continue
         # Merge adjacent features with same scores #
         if (l_chrom, l_name, l_strand, l_score) == (chrom, name, strand, score) and start == l_end:
             l_end = end
             continue
         else:
             if l_chrom: self.handler.newFeature(l_chrom, (l_name, l_start, l_end, l_strand, l_score))
             l_chrom, l_name, l_start, l_end, l_strand, l_score = chrom, name, start, end, strand, score
     # Last feature #
     if l_chrom: self.handler.newFeature(l_chrom, (l_name, l_start, l_end, l_strand, l_score))
Beispiel #6
0
 def parse(self):
     # Initial variables #
     info = {}
     declare_track = True
     # Main loop #
     for number, line in iterate_lines(self.path):
         # Ignored lines #
         if line.startswith("browser "): continue
         # Track headers #
         if line.startswith("track "):
             try:
                 info = dict(
                     [p.split('=', 1) for p in shlex.split(line[6:])])
             except ValueError:
                 self.handler.error(
                     "The track%s seems to have an invalid <track> header line",
                     self.path, number)
             declare_track = True
             continue
         # Split the lines #
         items = line.split('\t')
         if len(items) == 1: items = line.split()
         if len(items) > 8: items = items[0:8] + [' '.join(items[8:])]
         # Chromosome #
         chrom = items.pop(0)
         # Length is nine #
         if len(items) != 8:
             self.handler.error("The track%s doesn't have nine columns",
                                self.path, number)
         # Have we started a track already ? #
         if declare_track:
             declare_track = False
             self.handler.newTrack(info, self.name)
         # Source field #
         if items[0] == '.': items[0] = ''
         # Name field #
         if items[1] == '.': items[1] = ''
         # Start and end fields #
         try:
             items[2] = int(items[2])
             items[3] = int(items[3])
         except ValueError:
             self.handler.error(
                 "The track%s has non integers as interval bounds",
                 self.path, number)
         # Strand field #
         items[5] = strand_to_int(items[5])
         # Frame field #
         if items[6] == '.': items[6] = None
         else:
             try:
                 items[6] = int(items[6])
             except ValueError:
                 self.handler.error(
                     "The track%s has non integers as frame value",
                     self.path, number)
         # The last special column #
         attr = shlex.split(items.pop())
         attr = [(attr[i], attr[i + 1].strip(';'))
                 for i in xrange(0, len(attr), 2)]
         # Not using dict to preserve annotation order #
         keys, values = [x[0] for x in attr], [x[1] for x in attr]
         # GTF attribute column must have annotations starting with "gene_id" and "transcript_id" #
         assert ["gene_id", "transcript_id"] == keys[:2], "Invalid " \
                 "attribute column: %r. Valid attributes begin with " \
                 "\"gene_id\" and \"transcript_id\""
         self.handler.defineFields(all_fields + keys)
         items += values
         # Yield it #
         self.handler.newFeature(chrom, items)
Beispiel #7
0
 def parse(self):
     # Initial variables #
     fields = []
     info = {}
     # Main loop #
     for number, line in iterate_lines(self.path):
         # Ignored lines #
         if line.startswith("browser "): continue
         # Track headers #
         if line.startswith("track "):
             try:
                 info = dict(
                     [p.split('=', 1) for p in shlex.split(line[6:])])
             except ValueError:
                 self.handler.error(
                     "The track%s seems to have an invalid <track> header line",
                     self.path, number)
             fields = []
             continue
         # Split the lines #
         items = line.split('\t')
         if len(items) == 1: items = line.split()
         # Chromosome #
         chrom = items.pop(0)
         # Length is nine #
         if len(items) != 8:
             self.handler.error("The track%s doesn't have nine columns",
                                self.path, number)
         # Have we started a track already ? #
         if not fields:
             self.handler.newTrack(info, self.name)
             fields = all_fields[0:len(items)]
             self.handler.defineFields(fields)
         # Source field #
         if items[0] == '.': items[0] = ''
         # Name field #
         if items[1] == '.': items[1] = ''
         # Start and end fields #
         try:
             items[2] = int(items[2])
             items[3] = int(items[3])
         except ValueError:
             self.handler.error(
                 "The track%s has non integers as interval bounds",
                 self.path, number)
         if items[3] <= items[2]:
             self.handler.error(
                 "The track%s has negative or null intervals", self.path,
                 number)
         # Score field #
         if items[4] == '.' or items[4] == '': items[4] = 0.0
         try:
             items[4] = float(items[4])
         except ValueError:
             self.handler.error(
                 "The track%s has non floats as score values", self.path,
                 number)
         # Strand field #
         items[5] = strand_to_int(items[5])
         # Frame field #
         if items[6] == '.': items[6] = None
         else:
             try:
                 items[6] = int(items[6])
             except ValueError:
                 self.handler.error(
                     "The track%s has non integers as frame value",
                     self.path, number)
         # Group or attribute field #
         if items[7] == '.': items[7] = ''
         # Yield it #
         self.handler.newFeature(chrom, items)
Beispiel #8
0
 def parse(self):
     # Initial variables #
     info   = {}
     params = {}
     declare_track = True
     last_feature  = None
     last_chrom    = None
     # Line loop #
     for number, line in iterate_lines(self.path):
         # Ignored lines #
         if line.startswith("browser "): continue
         # Track headers #
         if line.startswith("track "):
             try:
                 info = dict([p.split('=',1) for p in shlex.split(line[6:])])
             except ValueError:
                 self.handler.error("The track%s seems to have an invalid <track> header line", self.path, number)
             declare_track = True
             continue
         # Have we started a track already ? #
         if declare_track:
             declare_track = False
             if last_feature:
                 self.handler.newFeature(last_chrom, last_feature)
                 last_feature = None
                 last_chrom   = None
             self.handler.newTrack(info, self.name)
             self.handler.defineFields(all_fields)
         # Directive line #
         if line.startswith("variableStep") or line.startswith("fixedStep"):
             params = dict([p.split('=',1) for p in shlex.split('mode=' + line)])
             if not params.get('chrom', False):
                 self.handler.error("The track%s doesn't specify a chromosome.", self.path, number)
             try:
                 params['span'] = int(params.get('span', 1))
             except ValueError:
                 self.handler.error("The track%s has a non integer as span value.", self.path, number)
             if params['span'] < 1:
                 self.handler.error("The track%s has a negative or null span value.", self.path, number)
             if line.startswith("fixedStep "):
                 if not 'start' in params:
                     self.handler.error("The track%s has a fixedStep directive without a start.", self.path, number)
                 try:
                     params['start'] = int(params['start'])
                 except ValueError:
                     self.handler.error("The track%s has a non integer as start value.", self.path, number)
                 try:
                     params['step'] = int(params.get('step',1))
                 except ValueError:
                     self.handler.error("The track%s has a non integer as step value.", self.path, number)
                 if params['step'] < 1:
                     self.handler.error("The track%s has a negative or null step value.", self.path, number)
             continue
         # Not a directive line #
         if not params:
             self.handler.error("The track%s is missing a fixedStep or variableStep directive.", self.path, number)
         # Fixed #
         if params['mode'] == 'fixedStep':
             try:
                 line = float(line)
             except ValueError:
                 self.handler.error("The track%s has non floats as score values.", self.path, number)
             chrom   = params['chrom']
             feature = [params['start'], params['start'] + params['span'], line]
             params['start'] += params['step']
         # Variable #
         elif params['mode'] == 'variableStep':
             line = line.split('\t')
             if len(line) == 1: line = line[0].split()
             try:
                 line[0] = int(line[0])
                 line[1] = float(line[1])
             except ValueError:
                 self.handler.error("The track%s has invalid values.", self.path, number)
             except IndexError:
                 self.handler.error("The track%s has missing values.", self.path, number)
             chrom   = params['chrom']
             feature = [line[0], line[0] + params['span'], line[1]]
         # Ignore null scores #
         if feature[2] == 0.0: continue
         # Merge adjacent features with same scores #
         # For instance ['chr1', 10, 11, 9.8] and ['chr1', 11, 12, 9.8] should merge.
         if last_feature:
             if last_chrom == chrom:
                 if last_feature[1] > feature[0]:
                     self.handler.error("The track%s has a start or span larger than its end or step.", self.path, number)
                 if floats_eq(last_feature[2], feature[2]) and overlapping(last_feature[0], last_feature[1], feature[0], feature[1]):
                     last_feature[0] = min(last_feature[0], feature[0])
                     last_feature[1] = max(last_feature[1], feature[1])
                     continue
             self.handler.newFeature(last_chrom, last_feature)
         last_feature = feature
         last_chrom   = chrom
     if last_feature: self.handler.newFeature(last_chrom, last_feature)
Beispiel #9
0
Datei: gtf.py Projekt: bow/track
 def parse(self):
     # Initial variables #
     info   = {}
     declare_track = True
     # Main loop #
     for number, line in iterate_lines(self.path):
         # Ignored lines #
         if line.startswith("browser "): continue
         # Track headers #
         if line.startswith("track "):
             try:
                 info = dict([p.split('=',1) for p in shlex.split(line[6:])])
             except ValueError:
                 self.handler.error("The track%s seems to have an invalid <track> header line", self.path, number)
             declare_track = True
             continue
         # Split the lines #
         items = line.split('\t')
         if len(items) == 1: items = line.split()
         if len(items) > 8: items = items[0:8] + [' '.join(items[8:])]
         # Chromosome #
         chrom = items.pop(0)
         # Length is nine #
         if len(items) != 8:
             self.handler.error("The track%s doesn't have nine columns", self.path, number)
         # Have we started a track already ? #
         if declare_track:
             declare_track = False
             self.handler.newTrack(info, self.name)
         # Source field #
         if items[0] == '.': items[0] = ''
         # Name field #
         if items[1] == '.': items[1] = ''
         # Start and end fields #
         try:
             items[2] = int(items[2])
             items[3] = int(items[3])
         except ValueError:
             self.handler.error("The track%s has non integers as interval bounds", self.path, number)
         # Strand field #
         items[5] = strand_to_int(items[5])
         # Frame field #
         if items[6] == '.': items[6] = None
         else:
             try:
                 items[6] = int(items[6])
             except ValueError:
                 self.handler.error("The track%s has non integers as frame value", self.path, number)
         # The last special column #
         attr = shlex.split(items.pop())
         attr = [(attr[i],attr[i+1].strip(';')) for i in xrange(0,len(attr),2)]
         # Not using dict to preserve annotation order #
         keys, values = [x[0] for x in attr], [x[1] for x in attr]
         # GTF attribute column must have annotations starting with "gene_id" and "transcript_id" #
         assert ["gene_id", "transcript_id"] == keys[:2], "Invalid " \
                 "attribute column: %r. Valid attributes begin with " \
                 "\"gene_id\" and \"transcript_id\""
         self.handler.defineFields(all_fields + keys)
         items += values
         # Yield it #
         self.handler.newFeature(chrom, items)
Beispiel #10
0
Datei: bed.py Projekt: bow/track
 def parse(self):
     # Initial variables #
     fields = []
     info   = {}
     # Main loop #
     for number, line in iterate_lines(self.path):
         # Ignored lines #
         if line.startswith("browser "): continue
         # Track headers #
         if line.startswith("track "):
             try:
                 info = dict([p.split('=',1) for p in shlex.split(line[6:])])
             except ValueError:
                 self.handler.error("The track%s seems to have an invalid <track> header line", self.path, number)
             fields = []
             continue
         # Split the lines #
         items = line.split('\t')
         if len(items) == 1: items = line.split()
         # Chromosome #
         chrom = items.pop(0)
         # Have we started a track already ? #
         if not fields:
             self.handler.newTrack(info, self.name)
             fields = all_fields[0:len(items)]
             self.handler.defineFields(fields)
         # Start and end fields #
         try:
             items[0] = int(items[0])
             items[1] = int(items[1])
         except ValueError:
             self.handler.error("The track%s has non integers as interval bounds", self.path, number)
         except IndexError:
             self.handler.error("The track%s has less than two columns", self.path, number)
         # All following fields are optional #
         try:
             # Name field #
             if items[2] == '.': items[2] = ''
             # Score field #
             if items[3] == '.' or items[3] == '': items[3] = 0.0
             try:
                 items[3] = float(items[3])
             except ValueError:
                 self.handler.error("The track%s has non floats as score values", self.path, number)
             # Strand field #
             items[4] = strand_to_int(items[4])
             # Thick starts #
             try:
                 items[5] = float(items[5])
             except ValueError:
                 self.handler.error("The track%s has non integers as thick starts", self.path, number)
             # Thick ends #
             try:
                 items[6] = float(items[6])
             except ValueError:
                 self.handler.error("The track%s has non integers as thick ends", self.path, number)
             # Too many fields #
             if len(items) > 11:
                 self.handler.error("The track%s has more than twelve columns", self.path, number)
         # All index errors are ignored since the fields above three are optional #
         except IndexError:
             pass
         finally:
             self.handler.newFeature(chrom, items)
Beispiel #11
0
 def parse(self):
     # Initial variables #
     info = {}
     declare_track = True
     # Main loop #
     for number, line in iterate_lines(self.path):
         # Ignored lines #
         if line.startswith("browser "): continue
         # Track headers #
         if line.startswith("track "):
             try:
                 info = dict(
                     [p.split('=', 1) for p in shlex.split(line[6:])])
             except ValueError:
                 self.handler.error(
                     "The track%s seems to have an invalid <track> header line",
                     self.path, number)
             declare_track = True
             continue
         # Split the lines #
         items = line.split('\t')
         if len(items) == 1: items = line.split()
         if len(items) > 8: items = items[0:8] + [' '.join(items[8:])]
         # Chromosome #
         chrom = items.pop(0)
         # Length is nine #
         if len(items) != 8:
             self.handler.error("The track%s doesn't have nine columns",
                                self.path, number)
         # Have we started a track already ? #
         if declare_track:
             declare_track = False
             self.handler.newTrack(info, self.name)
         # Source field #
         if items[0] == '.': items[0] = ''
         # Name field #
         if items[1] == '.': items[1] = ''
         # Start and end fields #
         try:
             items[2] = int(items[2])
             items[3] = int(items[3])
         except ValueError:
             self.handler.error(
                 "The track%s has non integers as interval bounds",
                 self.path, number)
         # Score field #
         if items[4] == '.' or items[4] == '': items[4] = 0.0
         try:
             items[4] = float(items[4])
         except ValueError:
             self.handler.error(
                 "The track%s has non floats as score values", self.path,
                 number)
         # Strand field #
         items[5] = strand_to_int(items[5])
         # Frame field #
         if items[6] == '.': items[6] = None
         else:
             try:
                 items[6] = int(items[6])
             except ValueError:
                 self.handler.error(
                     "The track%s has non integers as frame value",
                     self.path, number)
         # The last special column #
         attr = shlex.split(items.pop())
         attr = dict([(attr[i], attr[i + 1].strip(';'))
                      for i in xrange(0, len(attr), 2)])
         self.handler.defineFields(all_fields + attr.keys())
         items += attr.values()
         # Yield it #
         self.handler.newFeature(chrom, items)
Beispiel #12
0
 def parse(self):
     # Initial variables #
     info = {}
     params = {}
     declare_track = True
     last_feature = None
     last_chrom = None
     # Line loop #
     for number, line in iterate_lines(self.path):
         # Ignored lines #
         if line.startswith("browser "): continue
         # Track headers #
         if line.startswith("track "):
             try:
                 info = dict(
                     [p.split('=', 1) for p in shlex.split(line[6:])])
             except ValueError:
                 self.handler.error(
                     "The track%s seems to have an invalid <track> header line",
                     self.path, number)
             declare_track = True
             continue
         # Have we started a track already ? #
         if declare_track:
             declare_track = False
             if last_feature:
                 self.handler.newFeature(last_chrom, last_feature)
                 last_feature = None
                 last_chrom = None
             self.handler.newTrack(info, self.name)
             self.handler.defineFields(all_fields)
         # Directive line #
         if line.startswith("variableStep") or line.startswith("fixedStep"):
             params = dict(
                 [p.split('=', 1) for p in shlex.split('mode=' + line)])
             if not params.get('chrom', False):
                 self.handler.error(
                     "The track%s doesn't specify a chromosome.", self.path,
                     number)
             try:
                 params['span'] = int(params.get('span', 1))
             except ValueError:
                 self.handler.error(
                     "The track%s has a non integer as span value.",
                     self.path, number)
             if params['span'] < 1:
                 self.handler.error(
                     "The track%s has a negative or null span value.",
                     self.path, number)
             if line.startswith("fixedStep "):
                 if not 'start' in params:
                     self.handler.error(
                         "The track%s has a fixedStep directive without a start.",
                         self.path, number)
                 try:
                     params['start'] = int(params['start'])
                 except ValueError:
                     self.handler.error(
                         "The track%s has a non integer as start value.",
                         self.path, number)
                 try:
                     params['step'] = int(params.get('step', 1))
                 except ValueError:
                     self.handler.error(
                         "The track%s has a non integer as step value.",
                         self.path, number)
                 if params['step'] < 1:
                     self.handler.error(
                         "The track%s has a negative or null step value.",
                         self.path, number)
             continue
         # Not a directive line #
         if not params:
             self.handler.error(
                 "The track%s is missing a fixedStep or variableStep directive.",
                 self.path, number)
         # Fixed #
         if params['mode'] == 'fixedStep':
             try:
                 line = float(line)
             except ValueError:
                 self.handler.error(
                     "The track%s has non floats as score values.",
                     self.path, number)
             chrom = params['chrom']
             feature = [
                 params['start'], params['start'] + params['span'], line
             ]
             params['start'] += params['span'] + params['step'] - 1
         # Variable #
         elif params['mode'] == 'variableStep':
             line = line.split('\t')
             if len(line) == 1: line = line[0].split()
             try:
                 line[0] = int(line[0])
                 line[1] = float(line[1])
             except ValueError:
                 self.handler.error("The track%s has invalid values.",
                                    self.path, number)
             except IndexError:
                 self.handler.error("The track%s has missing values.",
                                    self.path, number)
             chrom = params['chrom']
             feature = [line[0], line[0] + params['span'], line[1]]
         # Ignore null scores #
         if feature[2] == 0.0: continue
         # Merge adjacent features with same scores #
         # For instance ['chr1', 10, 11, 9.8] and ['chr1', 11, 12, 9.8] should merge.
         if last_feature:
             if last_chrom == chrom:
                 if last_feature[1] > feature[0]:
                     self.handler.error(
                         "The track%s has a start or span larger than its end or step.",
                         self.path, number)
                 if floats_eq(last_feature[2], feature[2]) and overlapping(
                         last_feature[0], last_feature[1], feature[0],
                         feature[1]):
                     last_feature[0] = min(last_feature[0], feature[0])
                     last_feature[1] = max(last_feature[1], feature[1])
                     continue
             self.handler.newFeature(last_chrom, last_feature)
         last_feature = feature
         last_chrom = chrom
     if last_feature: self.handler.newFeature(last_chrom, last_feature)
Beispiel #13
0
 def parse(self):
     # Initial variables #
     info   = {}
     declare_track = True
     # Main loop #
     for number, line in iterate_lines(self.path):
         # Ignored lines #
         if line.startswith("browser "): continue
         # Track headers #
         if line.startswith("track "):
             try:
                 info = dict([p.split('=',1) for p in shlex.split(line[6:])])
             except ValueError:
                 self.handler.error("The track%s seems to have an invalid <track> header line", self.path, number)
             declare_track = True
             continue
         # Split the lines #
         items = line.split('\t')
         if len(items) == 1: items = line.split()
         if len(items) > 8: items = items[0:8] + [' '.join(items[8:])]
         # Chromosome #
         chrom = items.pop(0)
         # Length is nine #
         if len(items) != 8:
             self.handler.error("The track%s doesn't have nine columns", self.path, number)
         # Have we started a track already ? #
         if declare_track:
             declare_track = False
             self.handler.newTrack(info, self.name)
         # Source field #
         if items[0] == '.': items[0] = ''
         # Name field #
         if items[1] == '.': items[1] = ''
         # Start and end fields #
         try:
             items[2] = int(items[2])
             items[3] = int(items[3])
         except ValueError:
             self.handler.error("The track%s has non integers as interval bounds", self.path, number)
         # Score field #
         if items[4] == '.' or items[4] == '': items[4] = 0.0
         try:
             items[4] = float(items[4])
         except ValueError:
             self.handler.error("The track%s has non floats as score values", self.path, number)
         # Strand field #
         items[5] = strand_to_int(items[5])
         # Frame field #
         if items[6] == '.': items[6] = None
         else:
             try:
                 items[6] = int(items[6])
             except ValueError:
                 self.handler.error("The track%s has non integers as frame value", self.path, number)
         # The last special column #
         attr = shlex.split(items.pop())
         attr = dict([(attr[i],attr[i+1].strip(';')) for i in xrange(0,len(attr),2)])
         self.handler.defineFields(all_fields + attr.keys())
         items += attr.values()
         # Yield it #
         self.handler.newFeature(chrom, items)
Beispiel #14
0
 def parse(self):
     # Initial variables #
     fields = []
     info = {}
     # Main loop #
     for number, line in iterate_lines(self.path):
         # Ignored lines #
         if line.startswith("browser "): continue
         # Track headers #
         if line.startswith("track "):
             try:
                 info = dict(
                     [p.split('=', 1) for p in shlex.split(line[6:])])
             except ValueError:
                 self.handler.error(
                     "The track%s seems to have an invalid <track> header line",
                     self.path, number)
             fields = []
             continue
         # Split the lines #
         items = line.split('\t')
         if len(items) == 1: items = line.split()
         # Chromosome #
         chrom = items.pop(0)
         # Have we started a track already ? #
         if not fields:
             self.handler.newTrack(info, self.name)
             fields = all_fields[0:len(items)]
             self.handler.defineFields(fields)
         # Start and end fields #
         try:
             items[0] = int(items[0])
             items[1] = int(items[1])
         except ValueError:
             self.handler.error(
                 "The track%s has non integers as interval bounds",
                 self.path, number)
         except IndexError:
             self.handler.error("The track%s has less than two columns",
                                self.path, number)
         # All following fields are optional #
         try:
             # Name field #
             if items[2] == '.': items[2] = ''
             # Score field #
             if items[3] == '.' or items[3] == '': items[3] = 0.0
             try:
                 items[3] = float(items[3])
             except ValueError:
                 self.handler.error(
                     "The track%s has non floats as score values",
                     self.path, number)
             # Strand field #
             items[4] = strand_to_int(items[4])
             # Thick starts #
             try:
                 items[5] = float(items[5])
             except ValueError:
                 self.handler.error(
                     "The track%s has non integers as thick starts",
                     self.path, number)
             # Thick ends #
             try:
                 items[6] = float(items[6])
             except ValueError:
                 self.handler.error(
                     "The track%s has non integers as thick ends",
                     self.path, number)
             # Too many fields #
             if len(items) > 11:
                 self.handler.error(
                     "The track%s has more than twelve columns", self.path,
                     number)
         # All index errors are ignored since the fields above three are optional #
         except IndexError:
             pass
         finally:
             self.handler.newFeature(chrom, items)