Beispiel #1
0
def BEDIterator(handle):
  """Generator function to iterate over Fasta records (as SeqRecord objects).

  handle - input file

  If this is not given, then the entire title line will be used
  as the description, and the first word as the id and name.

  Note that use of title2ids matches that of Bio.Fasta.SequenceParser
  but the defaults are slightly different.
  """
  line_no = 0
  #Skip any text before the first record (e.g. blank lines, comments)
  while True :
    line_no += 1
    line = handle.readline().strip()
    if not line:
      return
    if line[0] == "#" or len(line) == 0:
      continue
    try:
      ref,source,type,start,end,score,strand,frame,attributes = \
        line.split("\t")
    except:
      raise FormatError, "Problem with line %d in %s.  Line was\n%s" %\
        (line_no,handle.name,line)

    attr_pairs = attributes.strip(';').split(";")
    attr_dict = dict(map(lambda x: tuple(x.split("=")), attr_pairs))
    result = SeqFeature(location=FeatureLocation(int(start),int(end)),
      type=type,strand=_gff3_strand_to_numeric[strand],ref=ref,ref_db=source)
    result.id = attr_dict.get("ID",None)
    result.name = attr_dict.get("Name",None)
    result.attributes = attr_dict # not an official property of SeqFeature.
    yield result
Beispiel #2
0
def GTFIterator(handle):
    """Generator function to iterate over Fasta records (as SeqRecord objects).

  handle - input file

  If this is not given, then the entire title line will be used
  as the description, and the first word as the id and name.

  Note that use of title2ids matches that of Bio.Fasta.SequenceParser
  but the defaults are slightly different.
  """
    line_no = 0
    #Skip any text before the first record (e.g. blank lines, comments)
    while True:
        line_no += 1
        line = handle.next()
        if line is None:
            return
        line = line.strip()
        if len(line) == 0 or line[0] == '#':
            continue
        try:
            ref,source,type,start,end,score,strand,frame,attributes = \
              line.split('\t')
        except:
            raise ValueError, 'Problem with line %d in %s.  Line was\n%s' %\
              (line_no,handle.name,line)

        attr_pairs = attributes.strip(';').split(';')
        attr_dict = {}
        for pair in attr_pairs:
            try:
                key, value = pair.strip().split(' ', 1)
                value = value.strip('"')
                attr_dict[key] = value
            except:
                attr_dict['transcript_id'] = pair

        #attr_dict = dict(map(lambda x: tuple([y.strip('"')
        #for y in x.split(' ',1)]), attr_pairs))
        result = SeqFeature(location=FeatureLocation(int(start), int(end)),
                            type=type,
                            strand=_gff3_strand_to_numeric[strand],
                            ref=ref,
                            ref_db=source)
        result.name = result.id = attr_dict.get('transcript_id', None)
        result.attributes = attr_dict  # not an official property of SeqFeature.
        result.frame = frame
        yield result