Beispiel #1
0
Datei: gff.py Projekt: bow/track
 def parse(self):
     # Initial variables #
     fields = []
     info   = {}
     # Main loop #
     for number, line in iterate_lines(self.path):
         # Ignored lines #
         if line.startswith("browser "): continue
         # Track headers #
         if line.startswith("track "):
             try:
                 info = dict([p.split('=',1) for p in shlex.split(line[6:])])
             except ValueError:
                 self.handler.error("The track%s seems to have an invalid <track> header line", self.path, number)
             fields = []
             continue
         # Split the lines #
         items = line.split('\t')
         if len(items) == 1: items = line.split()
         # Chromosome #
         chrom = items.pop(0)
         # Length is nine #
         if len(items) != 8:
             self.handler.error("The track%s doesn't have nine columns", self.path, number)
         # Have we started a track already ? #
         if not fields:
             self.handler.newTrack(info, self.name)
             fields = all_fields[0:len(items)]
             self.handler.defineFields(fields)
         # Source field #
         if items[0] == '.': items[0] = ''
         # Name field #
         if items[1] == '.': items[1] = ''
         # Start and end fields #
         try:
             items[2] = int(items[2])
             items[3] = int(items[3])
         except ValueError:
             self.handler.error("The track%s has non integers as interval bounds", self.path, number)
         if items[3] <= items[2]:
             self.handler.error("The track%s has negative or null intervals", self.path, number)
         # Score field #
         if items[4] == '.' or items[4] == '': items[4] = 0.0
         try:
             items[4] = float(items[4])
         except ValueError:
             self.handler.error("The track%s has non floats as score values", self.path, number)
         # Strand field #
         items[5] = strand_to_int(items[5])
         # Frame field #
         if items[6] == '.': items[6] = None
         else:
             try:
                 items[6] = int(items[6])
             except ValueError:
                 self.handler.error("The track%s has non integers as frame value", self.path, number)
         # Group or attribute field #
         if items[7] == '.': items[7] = ''
         # Yield it #
         self.handler.newFeature(chrom, items)
Beispiel #2
0
 def parse(self):
     # Initial variables #
     l_chrom, l_name, l_start, l_end, l_strand, l_score = None, None, None, None, None, None
     # Start a new track #
     self.handler.newTrack({'int_to_float':'score'}, self.name)
     self.handler.defineFields(all_fields)
     # Line loop #
     for number, line in iterate_lines(self.path):
         # Ignored lines #
         if line.startswith("browser "): continue
         if line.startswith("track "): continue
         # Split the lines #
         items = line.split('\t')
         if len(items) == 1: items = line.split()
         # Is it a legal line ? #
         if len(items) < 5: self.handler.error("The track%s has less than five columns", self.path, number)
         # Chromosome #
         chrom = items.pop(0)
         # Name field #
         name = items[0]
         # Start and end field #
         try: pos = int(items[1])
         except ValueError: self.handler.error("The track%s has non integers as position", self.path, number)
         start = pos-1
         end = pos
         # Strand field #
         strand = strand_to_int(items[2])
         # Score field #
         try: score = int(items[3])
         except ValueError: self.handler.error("The track%s has non integers as tag count values", self.path, number)
         # Ignore null scores #
         if score == 0: continue
         # Merge adjacent features with same scores #
         if (l_chrom, l_name, l_strand, l_score) == (chrom, name, strand, score) and start == l_end:
             l_end = end
             continue
         else:
             if l_chrom: self.handler.newFeature(l_chrom, (l_name, l_start, l_end, l_strand, l_score))
             l_chrom, l_name, l_start, l_end, l_strand, l_score = chrom, name, start, end, strand, score
     # Last feature #
     if l_chrom: self.handler.newFeature(l_chrom, (l_name, l_start, l_end, l_strand, l_score))
Beispiel #3
0
 def parse(self):
     # Initial variables #
     self.handler.newTrack({'int_to_float':'score'}, self.name)
     self.handler.defineFields(all_fields)
     with open(self.path) as f:
         for number, line in enumerate(f):
             items = line.split('\t')
             chrom = items.pop(0)
             if len(items) < 4: self.handler.error("The track%s doesn't have five columns", self.path, number)
             # Name field #
             name = items[0]
             # Start field #
             try: pos = int(items[1])
             except ValueError: self.handler.error("The track%s has non integers as position", self.path, number)
             # Strand field #
             strand = strand_to_int(items[2])
             # Score field #
             try: score = int(items[3])
             except ValueError: self.handler.error("The track%s has non integers as tag count values", self.path, number)
             # Yield it #
             self.handler.newFeature(chrom, (name, pos-1, pos, strand, score))
Beispiel #4
0
 def parse(self):
     # Initial variables #
     info = {}
     declare_track = True
     # Main loop #
     for number, line in iterate_lines(self.path):
         # Ignored lines #
         if line.startswith("browser "): continue
         # Track headers #
         if line.startswith("track "):
             try:
                 info = dict(
                     [p.split('=', 1) for p in shlex.split(line[6:])])
             except ValueError:
                 self.handler.error(
                     "The track%s seems to have an invalid <track> header line",
                     self.path, number)
             declare_track = True
             continue
         # Split the lines #
         items = line.split('\t')
         if len(items) == 1: items = line.split()
         if len(items) > 8: items = items[0:8] + [' '.join(items[8:])]
         # Chromosome #
         chrom = items.pop(0)
         # Length is nine #
         if len(items) != 8:
             self.handler.error("The track%s doesn't have nine columns",
                                self.path, number)
         # Have we started a track already ? #
         if declare_track:
             declare_track = False
             self.handler.newTrack(info, self.name)
         # Source field #
         if items[0] == '.': items[0] = ''
         # Name field #
         if items[1] == '.': items[1] = ''
         # Start and end fields #
         try:
             items[2] = int(items[2])
             items[3] = int(items[3])
         except ValueError:
             self.handler.error(
                 "The track%s has non integers as interval bounds",
                 self.path, number)
         # Strand field #
         items[5] = strand_to_int(items[5])
         # Frame field #
         if items[6] == '.': items[6] = None
         else:
             try:
                 items[6] = int(items[6])
             except ValueError:
                 self.handler.error(
                     "The track%s has non integers as frame value",
                     self.path, number)
         # The last special column #
         attr = shlex.split(items.pop())
         attr = [(attr[i], attr[i + 1].strip(';'))
                 for i in xrange(0, len(attr), 2)]
         # Not using dict to preserve annotation order #
         keys, values = [x[0] for x in attr], [x[1] for x in attr]
         # GTF attribute column must have annotations starting with "gene_id" and "transcript_id" #
         assert ["gene_id", "transcript_id"] == keys[:2], "Invalid " \
                 "attribute column: %r. Valid attributes begin with " \
                 "\"gene_id\" and \"transcript_id\""
         self.handler.defineFields(all_fields + keys)
         items += values
         # Yield it #
         self.handler.newFeature(chrom, items)
Beispiel #5
0
 def parse(self):
     # Initial variables #
     fields = []
     info = {}
     # Main loop #
     for number, line in iterate_lines(self.path):
         # Ignored lines #
         if line.startswith("browser "): continue
         # Track headers #
         if line.startswith("track "):
             try:
                 info = dict(
                     [p.split('=', 1) for p in shlex.split(line[6:])])
             except ValueError:
                 self.handler.error(
                     "The track%s seems to have an invalid <track> header line",
                     self.path, number)
             fields = []
             continue
         # Split the lines #
         items = line.split('\t')
         if len(items) == 1: items = line.split()
         # Chromosome #
         chrom = items.pop(0)
         # Length is nine #
         if len(items) != 8:
             self.handler.error("The track%s doesn't have nine columns",
                                self.path, number)
         # Have we started a track already ? #
         if not fields:
             self.handler.newTrack(info, self.name)
             fields = all_fields[0:len(items)]
             self.handler.defineFields(fields)
         # Source field #
         if items[0] == '.': items[0] = ''
         # Name field #
         if items[1] == '.': items[1] = ''
         # Start and end fields #
         try:
             items[2] = int(items[2])
             items[3] = int(items[3])
         except ValueError:
             self.handler.error(
                 "The track%s has non integers as interval bounds",
                 self.path, number)
         if items[3] <= items[2]:
             self.handler.error(
                 "The track%s has negative or null intervals", self.path,
                 number)
         # Score field #
         if items[4] == '.' or items[4] == '': items[4] = 0.0
         try:
             items[4] = float(items[4])
         except ValueError:
             self.handler.error(
                 "The track%s has non floats as score values", self.path,
                 number)
         # Strand field #
         items[5] = strand_to_int(items[5])
         # Frame field #
         if items[6] == '.': items[6] = None
         else:
             try:
                 items[6] = int(items[6])
             except ValueError:
                 self.handler.error(
                     "The track%s has non integers as frame value",
                     self.path, number)
         # Group or attribute field #
         if items[7] == '.': items[7] = ''
         # Yield it #
         self.handler.newFeature(chrom, items)
Beispiel #6
0
Datei: gtf.py Projekt: bow/track
 def parse(self):
     # Initial variables #
     info   = {}
     declare_track = True
     # Main loop #
     for number, line in iterate_lines(self.path):
         # Ignored lines #
         if line.startswith("browser "): continue
         # Track headers #
         if line.startswith("track "):
             try:
                 info = dict([p.split('=',1) for p in shlex.split(line[6:])])
             except ValueError:
                 self.handler.error("The track%s seems to have an invalid <track> header line", self.path, number)
             declare_track = True
             continue
         # Split the lines #
         items = line.split('\t')
         if len(items) == 1: items = line.split()
         if len(items) > 8: items = items[0:8] + [' '.join(items[8:])]
         # Chromosome #
         chrom = items.pop(0)
         # Length is nine #
         if len(items) != 8:
             self.handler.error("The track%s doesn't have nine columns", self.path, number)
         # Have we started a track already ? #
         if declare_track:
             declare_track = False
             self.handler.newTrack(info, self.name)
         # Source field #
         if items[0] == '.': items[0] = ''
         # Name field #
         if items[1] == '.': items[1] = ''
         # Start and end fields #
         try:
             items[2] = int(items[2])
             items[3] = int(items[3])
         except ValueError:
             self.handler.error("The track%s has non integers as interval bounds", self.path, number)
         # Strand field #
         items[5] = strand_to_int(items[5])
         # Frame field #
         if items[6] == '.': items[6] = None
         else:
             try:
                 items[6] = int(items[6])
             except ValueError:
                 self.handler.error("The track%s has non integers as frame value", self.path, number)
         # The last special column #
         attr = shlex.split(items.pop())
         attr = [(attr[i],attr[i+1].strip(';')) for i in xrange(0,len(attr),2)]
         # Not using dict to preserve annotation order #
         keys, values = [x[0] for x in attr], [x[1] for x in attr]
         # GTF attribute column must have annotations starting with "gene_id" and "transcript_id" #
         assert ["gene_id", "transcript_id"] == keys[:2], "Invalid " \
                 "attribute column: %r. Valid attributes begin with " \
                 "\"gene_id\" and \"transcript_id\""
         self.handler.defineFields(all_fields + keys)
         items += values
         # Yield it #
         self.handler.newFeature(chrom, items)
Beispiel #7
0
Datei: bed.py Projekt: bow/track
 def parse(self):
     # Initial variables #
     fields = []
     info   = {}
     # Main loop #
     for number, line in iterate_lines(self.path):
         # Ignored lines #
         if line.startswith("browser "): continue
         # Track headers #
         if line.startswith("track "):
             try:
                 info = dict([p.split('=',1) for p in shlex.split(line[6:])])
             except ValueError:
                 self.handler.error("The track%s seems to have an invalid <track> header line", self.path, number)
             fields = []
             continue
         # Split the lines #
         items = line.split('\t')
         if len(items) == 1: items = line.split()
         # Chromosome #
         chrom = items.pop(0)
         # Have we started a track already ? #
         if not fields:
             self.handler.newTrack(info, self.name)
             fields = all_fields[0:len(items)]
             self.handler.defineFields(fields)
         # Start and end fields #
         try:
             items[0] = int(items[0])
             items[1] = int(items[1])
         except ValueError:
             self.handler.error("The track%s has non integers as interval bounds", self.path, number)
         except IndexError:
             self.handler.error("The track%s has less than two columns", self.path, number)
         # All following fields are optional #
         try:
             # Name field #
             if items[2] == '.': items[2] = ''
             # Score field #
             if items[3] == '.' or items[3] == '': items[3] = 0.0
             try:
                 items[3] = float(items[3])
             except ValueError:
                 self.handler.error("The track%s has non floats as score values", self.path, number)
             # Strand field #
             items[4] = strand_to_int(items[4])
             # Thick starts #
             try:
                 items[5] = float(items[5])
             except ValueError:
                 self.handler.error("The track%s has non integers as thick starts", self.path, number)
             # Thick ends #
             try:
                 items[6] = float(items[6])
             except ValueError:
                 self.handler.error("The track%s has non integers as thick ends", self.path, number)
             # Too many fields #
             if len(items) > 11:
                 self.handler.error("The track%s has more than twelve columns", self.path, number)
         # All index errors are ignored since the fields above three are optional #
         except IndexError:
             pass
         finally:
             self.handler.newFeature(chrom, items)
Beispiel #8
0
 def parse(self):
     # Initial variables #
     info = {}
     declare_track = True
     # Main loop #
     for number, line in iterate_lines(self.path):
         # Ignored lines #
         if line.startswith("browser "): continue
         # Track headers #
         if line.startswith("track "):
             try:
                 info = dict(
                     [p.split('=', 1) for p in shlex.split(line[6:])])
             except ValueError:
                 self.handler.error(
                     "The track%s seems to have an invalid <track> header line",
                     self.path, number)
             declare_track = True
             continue
         # Split the lines #
         items = line.split('\t')
         if len(items) == 1: items = line.split()
         if len(items) > 8: items = items[0:8] + [' '.join(items[8:])]
         # Chromosome #
         chrom = items.pop(0)
         # Length is nine #
         if len(items) != 8:
             self.handler.error("The track%s doesn't have nine columns",
                                self.path, number)
         # Have we started a track already ? #
         if declare_track:
             declare_track = False
             self.handler.newTrack(info, self.name)
         # Source field #
         if items[0] == '.': items[0] = ''
         # Name field #
         if items[1] == '.': items[1] = ''
         # Start and end fields #
         try:
             items[2] = int(items[2])
             items[3] = int(items[3])
         except ValueError:
             self.handler.error(
                 "The track%s has non integers as interval bounds",
                 self.path, number)
         # Score field #
         if items[4] == '.' or items[4] == '': items[4] = 0.0
         try:
             items[4] = float(items[4])
         except ValueError:
             self.handler.error(
                 "The track%s has non floats as score values", self.path,
                 number)
         # Strand field #
         items[5] = strand_to_int(items[5])
         # Frame field #
         if items[6] == '.': items[6] = None
         else:
             try:
                 items[6] = int(items[6])
             except ValueError:
                 self.handler.error(
                     "The track%s has non integers as frame value",
                     self.path, number)
         # The last special column #
         attr = shlex.split(items.pop())
         attr = dict([(attr[i], attr[i + 1].strip(';'))
                      for i in xrange(0, len(attr), 2)])
         self.handler.defineFields(all_fields + attr.keys())
         items += attr.values()
         # Yield it #
         self.handler.newFeature(chrom, items)
Beispiel #9
0
 def parse(self):
     # Initial variables #
     info   = {}
     declare_track = True
     # Main loop #
     for number, line in iterate_lines(self.path):
         # Ignored lines #
         if line.startswith("browser "): continue
         # Track headers #
         if line.startswith("track "):
             try:
                 info = dict([p.split('=',1) for p in shlex.split(line[6:])])
             except ValueError:
                 self.handler.error("The track%s seems to have an invalid <track> header line", self.path, number)
             declare_track = True
             continue
         # Split the lines #
         items = line.split('\t')
         if len(items) == 1: items = line.split()
         if len(items) > 8: items = items[0:8] + [' '.join(items[8:])]
         # Chromosome #
         chrom = items.pop(0)
         # Length is nine #
         if len(items) != 8:
             self.handler.error("The track%s doesn't have nine columns", self.path, number)
         # Have we started a track already ? #
         if declare_track:
             declare_track = False
             self.handler.newTrack(info, self.name)
         # Source field #
         if items[0] == '.': items[0] = ''
         # Name field #
         if items[1] == '.': items[1] = ''
         # Start and end fields #
         try:
             items[2] = int(items[2])
             items[3] = int(items[3])
         except ValueError:
             self.handler.error("The track%s has non integers as interval bounds", self.path, number)
         # Score field #
         if items[4] == '.' or items[4] == '': items[4] = 0.0
         try:
             items[4] = float(items[4])
         except ValueError:
             self.handler.error("The track%s has non floats as score values", self.path, number)
         # Strand field #
         items[5] = strand_to_int(items[5])
         # Frame field #
         if items[6] == '.': items[6] = None
         else:
             try:
                 items[6] = int(items[6])
             except ValueError:
                 self.handler.error("The track%s has non integers as frame value", self.path, number)
         # The last special column #
         attr = shlex.split(items.pop())
         attr = dict([(attr[i],attr[i+1].strip(';')) for i in xrange(0,len(attr),2)])
         self.handler.defineFields(all_fields + attr.keys())
         items += attr.values()
         # Yield it #
         self.handler.newFeature(chrom, items)
Beispiel #10
0
 def parse(self):
     # Initial variables #
     fields = []
     info = {}
     # Main loop #
     for number, line in iterate_lines(self.path):
         # Ignored lines #
         if line.startswith("browser "): continue
         # Track headers #
         if line.startswith("track "):
             try:
                 info = dict(
                     [p.split('=', 1) for p in shlex.split(line[6:])])
             except ValueError:
                 self.handler.error(
                     "The track%s seems to have an invalid <track> header line",
                     self.path, number)
             fields = []
             continue
         # Split the lines #
         items = line.split('\t')
         if len(items) == 1: items = line.split()
         # Chromosome #
         chrom = items.pop(0)
         # Have we started a track already ? #
         if not fields:
             self.handler.newTrack(info, self.name)
             fields = all_fields[0:len(items)]
             self.handler.defineFields(fields)
         # Start and end fields #
         try:
             items[0] = int(items[0])
             items[1] = int(items[1])
         except ValueError:
             self.handler.error(
                 "The track%s has non integers as interval bounds",
                 self.path, number)
         except IndexError:
             self.handler.error("The track%s has less than two columns",
                                self.path, number)
         # All following fields are optional #
         try:
             # Name field #
             if items[2] == '.': items[2] = ''
             # Score field #
             if items[3] == '.' or items[3] == '': items[3] = 0.0
             try:
                 items[3] = float(items[3])
             except ValueError:
                 self.handler.error(
                     "The track%s has non floats as score values",
                     self.path, number)
             # Strand field #
             items[4] = strand_to_int(items[4])
             # Thick starts #
             try:
                 items[5] = float(items[5])
             except ValueError:
                 self.handler.error(
                     "The track%s has non integers as thick starts",
                     self.path, number)
             # Thick ends #
             try:
                 items[6] = float(items[6])
             except ValueError:
                 self.handler.error(
                     "The track%s has non integers as thick ends",
                     self.path, number)
             # Too many fields #
             if len(items) > 11:
                 self.handler.error(
                     "The track%s has more than twelve columns", self.path,
                     number)
         # All index errors are ignored since the fields above three are optional #
         except IndexError:
             pass
         finally:
             self.handler.newFeature(chrom, items)