Example #1
0
 def __init__(
     self,
     seq,
     id=UNKNOWN_ID,
     name=UNKNOWN_NAME,
     description=UNKNOWN_DESCRIPTION,
     dbxrefs=None,
     features=None,
     annotations=None,
     letter_annotations=None,
     qual=None,
 ):
     if id == UNKNOWN_ID and name != UNKNOWN_NAME:
         id = name
     # We don't want a Biopython Seq, we need our repr
     if not isinstance(seq, Seq) and not isinstance(seq, UnknownSeq):
         raise ValueError("seq should be a franklin Seq")
     SeqRecord.__init__(
         self,
         seq,
         id=id,
         name=name,
         description=description,
         dbxrefs=dbxrefs,
         features=features,
         annotations=annotations,
         letter_annotations=letter_annotations,
     )
     if qual is not None:
         self.qual = qual
Example #2
0
 def _init_with_SeqRecord(self,record):
     # Initialize self using existing SeqRecord object
     SeqRecord.__init__(self, seq=record.seq, id=record.id,
                         name=record.name, description=record.description,
                         dbxrefs=record.dbxrefs, features=record.features,
                         annotations=record.annotations,
                         letter_annotations=record.letter_annotations)
    def __init__(self, seq, id="<unknown id>", name="<unknown name>", description="<unknown description>",
                 dbxrefs=None, features=None, annotations=None, letter_annotations=None):
        self.id = id
        self.tempseq = seq
        temp = self.id.split('|')
        self.GeneID = temp[0]
        self.TranscriptID = temp[1]
        self.GeneName = temp[2]
        self.ExonRank = temp[3]
        if temp[4] == '': self.ConstExon = False
        else: self.ConstExon = True
        try: self.FPUTRend = int(temp[5])
        except: self.FPUTRend = ''
        try: self.TPUTRstart = int(temp[6])
        except: self.TPUTRstart = ''
        try: self.exonStart = int(temp[7])
        except: self.exonStart = ''
        try: self.exonEnd = int(temp[8])
        except: self.exonEnd = ''

        if self.FPUTRend != '':
            sequence =  str(self.tempseq)[(self.FPUTRend-self.exonStart):(self.exonEnd-self.exonStart)]
        elif self.TPUTRstart != '':
            sequence = str(self.tempseq)[0:(self.TPUTRstart-self.exonStart)]
        else:
            sequence = str(self.tempseq)

        SeqRecord.__init__(self, Seq(sequence, IUPAC.unambiguous_dna), id, name, description, dbxrefs=None,
                           features=None, annotations=None, letter_annotations=None)
Example #4
0
 def __init__(self, seq, seq_id, **kwargs):
     SeqRecord.__init__(self, seq=seq, id=seq_id)
     try:
         self.frame = find_frame(seq)
     except ValueError:
         self.frame = None
     self.frequency = None
     self.mutations = []
     for k, v in list(kwargs.items()):
         self.__dict__[k] = v
Example #5
0
    def __init__(self, readSeq, readID, qualStr, R='R1'):

        SeqRecord.__init__(self, readSeq, readID)
        self.qualStr = qualStr
        self.R = R

        if self.R == 'R2':

            self.seq = self.seq.reverse_complement()

        self.sim_fastq_entry()
Example #6
0
 def _init_with_SeqRecord(self,record):
     # Check if record has phred_quality letter annotations and convert to
     # ASCII string
     if 'phred_quality' in record.letter_annotations and isinstance(record.letter_annotations['phred_quality'], types.ListType):
         qual = ''.join([chr(q+33) for q in record.letter_annotations['phred_quality']])
         record.letter_annotations['phred_quality'] = qual
     
     # Initialize self using existing SeqRecord object
     SeqRecord.__init__(self, seq=record.seq, id=record.id,
                         name=record.name, description=record.description,
                         dbxrefs=record.dbxrefs, features=record.features,
                         annotations=record.annotations,
                         letter_annotations=record.letter_annotations)
Example #7
0
    def __init__(self, seq_record, cut_site, **kwargs):
        """
        __init__ requires a string representation of the recognition
        sequence, `seq_str`, and the one-based index of the last base
        before the cut site `cut_site`.

        E.g., NotI:
            5'---GC     GGCCGC---3'
            3'---CGCCGG     CG---5'
            noti = RecognitionSeq('GCGGCCGC', 2)

        E.g., EcoRV:
            5'---GAT  ATC---3'
            3'---CTA  TAG---5'
            ecorv = RecognitionSeq('GATATC', 3)
        """
        if isinstance(seq_record, SeqRecord):
            SeqRecord.__init__(
                self,
                seq=seq_record.seq,
                id=seq_record.id,
                name=seq_record.name,
                description=seq_record.description,
                letter_annotations=seq_record.letter_annotations,
                annotations=seq_record.annotations,
                features=seq_record.features,
                dbxrefs=seq_record.dbxrefs)
        else:
            SeqRecord.__init__(self,
                               seq=Seq(str(seq_record).upper(),
                                       alphabet=IUPAC.unambiguous_dna),
                               id=kwargs.get('id', '<unknown id>'),
                               name=kwargs.get('name', '<unknown name>'),
                               description=kwargs.get('description',
                                                      '<unknown description>'),
                               letter_annotations=kwargs.get(
                                   'letter_annotations', None),
                               annotations=kwargs.get('annotations', None),
                               features=kwargs.get('features', None),
                               dbxrefs=kwargs.get('dbxrefs', None))
        for base in str(self.seq):
            if base.upper() not in ['A', 'C', 'G', 'T']:
                raise InvalidRecognitionSeqError(
                    "Invalid base {0!r}:\n\t".format(base.upper()) +
                    "RecognitionSeq only supports unambiguous DNA")
        self.cut_site = int(cut_site)
        if self.cut_site < 0 or self.cut_site > len(self):
            raise InvalidRecognitionSeqError(
                "Invalid cut_site {0} for recognition sequence {1}".format(
                    self.cut_site, str(self.seq)))
        self.overhang = len(self) - (2 * self.cut_site)
Example #8
0
    def __init__(self, seq_record, cut_site, **kwargs):
        """
        __init__ requires a string representation of the recognition
        sequence, `seq_str`, and the one-based index of the last base
        before the cut site `cut_site`.

        E.g., NotI:
            5'---GC     GGCCGC---3'
            3'---CGCCGG     CG---5'
            noti = RecognitionSeq('GCGGCCGC', 2)

        E.g., EcoRV:
            5'---GAT  ATC---3'
            3'---CTA  TAG---5'
            ecorv = RecognitionSeq('GATATC', 3)
        """
        if isinstance(seq_record, SeqRecord):
            SeqRecord.__init__(self,
                    seq = seq_record.seq,
                    id = seq_record.id,
                    name = seq_record.name,
                    description = seq_record.description,
                    letter_annotations = seq_record.letter_annotations,
                    annotations = seq_record.annotations,
                    features = seq_record.features,
                    dbxrefs = seq_record.dbxrefs)
        else:
            SeqRecord.__init__(self,
                    seq = Seq(str(seq_record).upper(), 
                            alphabet=IUPAC.unambiguous_dna),
                    id = kwargs.get('id', '<unknown id>'),
                    name = kwargs.get('name', '<unknown name>'),
                    description = kwargs.get('description',
                            '<unknown description>'),
                    letter_annotations = kwargs.get('letter_annotations',
                            None),
                    annotations = kwargs.get('annotations', None),
                    features = kwargs.get('features', None),
                    dbxrefs = kwargs.get('dbxrefs', None))
        for base in str(self.seq):
            if base.upper() not in ['A', 'C', 'G', 'T']:
                raise InvalidRecognitionSeqError(
                    "Invalid base {0!r}:\n\t".format(base.upper()) + 
                    "RecognitionSeq only supports unambiguous DNA")
        self.cut_site = int(cut_site)
        if self.cut_site < 0 or self.cut_site > len(self):
            raise InvalidRecognitionSeqError(
                "Invalid cut_site {0} for recognition sequence {1}".format(
                        self.cut_site, str(self.seq)))
        self.overhang = len(self) - (2*self.cut_site)
Example #9
0
 def __init__(self,
              seq_record,
              start_site,
              end_site,
              overhang,
              five_prime_terminus=False,
              three_prime_terminus=False,
              **kwargs):
     if start_site > end_site:
         raise InvalidFragmentError(
             "A Fragment's start position ({0}) cannot be greater than "
             "its end position ({1})".format(start_site, end_site))
     if isinstance(seq_record, SeqRecord):
         SeqRecord.__init__(
             self,
             seq=seq_record.seq,
             id=seq_record.id,
             name=seq_record.name,
             description=seq_record.description,
             letter_annotations=seq_record.letter_annotations,
             annotations=seq_record.annotations,
             features=seq_record.features,
             dbxrefs=seq_record.dbxrefs)
     else:
         SeqRecord.__init__(self,
                            seq=Seq(str(seq_record).upper(),
                                    alphabet=IUPAC.ambiguous_dna),
                            id=kwargs.get('id', '<unknown id>'),
                            name=kwargs.get('name', '<unknown name>'),
                            description=kwargs.get('description',
                                                   '<unknown description>'),
                            letter_annotations=kwargs.get(
                                'letter_annotations', None),
                            annotations=kwargs.get('annotations', None),
                            features=kwargs.get('features', None),
                            dbxrefs=kwargs.get('dbxrefs', None))
     self.start = int(start_site)
     self.end = int(end_site)
     self.length = self.end - self.start + 1
     if self.length != len(self):
         raise InvalidFragmentError(
             "The fragment's sequence length ({0}) does not match its "
             "start ({1}) and end ({2}) sites".format(
                 len(self), self.start, self.end))
     self.overhang = int(overhang)
     self.five_prime_terminus = five_prime_terminus
     self.three_prime_terminus = three_prime_terminus
Example #10
0
    def __init__(self, seq_obj, position=None, footprint=None, tail=None):

        self.position = position
        self.footprint = footprint
        self.tail = tail

        seq_obj.seq.alphabet = ambiguous_dna

        SeqRecord.__init__(self,
                           seq=seq_obj.seq,
                           id=seq_obj.id,
                           name=seq_obj.name,
                           description=seq_obj.description,
                           dbxrefs=seq_obj.dbxrefs,
                           features=seq_obj.features,
                           annotations=seq_obj.annotations,
                           letter_annotations=seq_obj.letter_annotations)
Example #11
0
 def __init__(self, 
              seq_record,
              start_site,
              end_site,
              overhang,
              five_prime_terminus=False,
              three_prime_terminus=False,
              **kwargs):
     if start_site > end_site:
         raise InvalidFragmentError(
             "A Fragment's start position ({0}) cannot be greater than "
             "its end position ({1})".format(start_site, end_site))
     if isinstance(seq_record, SeqRecord):
         SeqRecord.__init__(self,
                 seq = seq_record.seq,
                 id = seq_record.id,
                 name = seq_record.name,
                 description = seq_record.description,
                 letter_annotations = seq_record.letter_annotations,
                 annotations = seq_record.annotations,
                 features = seq_record.features,
                 dbxrefs = seq_record.dbxrefs)
     else:
         SeqRecord.__init__(self,
                 seq = Seq(str(seq_record).upper(), 
                         alphabet=IUPAC.ambiguous_dna),
                 id = kwargs.get('id', '<unknown id>'),
                 name = kwargs.get('name', '<unknown name>'),
                 description = kwargs.get('description',
                         '<unknown description>'),
                 letter_annotations = kwargs.get('letter_annotations',
                         None),
                 annotations = kwargs.get('annotations', None),
                 features = kwargs.get('features', None),
                 dbxrefs = kwargs.get('dbxrefs', None))
     self.start = int(start_site)
     self.end = int(end_site)
     self.length = self.end - self.start + 1
     if self.length != len(self):
         raise InvalidFragmentError(
             "The fragment's sequence length ({0}) does not match its "
             "start ({1}) and end ({2}) sites".format(len(self),
                     self.start, self.end))
     self.overhang = int(overhang)
     self.five_prime_terminus = five_prime_terminus
     self.three_prime_terminus = three_prime_terminus
Example #12
0
    def __init__(self, sequence, **kwargs):
        SeqRecord.__init__(self, sequence, **kwargs)

        sequence = upper(sequence)
        g_count = sequence.count('G')
        c_count = sequence.count('C')

        # make GC content as a percent
        content = sum([g_count, c_count]) / float(len(sequence))
        content = content * 100
        self.gc_content = round(content, 0)

        # set the start stop in parent seq
        self.start = int(kwargs.get('id', -1))
        self.stop = self.start + len(sequence) - 1

        # set up score
        self.score = -1
        self.genes = []
Example #13
0
    def __init__(self,
                 seq_obj,
                 position=None,
                 footprint=None,
                 tail=None):

        self.position  = position
        self.footprint = footprint
        self.tail      = tail

        seq_obj.seq.alphabet = ambiguous_dna

        SeqRecord.__init__(self,
                           seq                = seq_obj.seq,
                           id                 = seq_obj.id,
                           name               = seq_obj.name,
                           description        = seq_obj.description,
                           dbxrefs            = seq_obj.dbxrefs,
                           features           = seq_obj.features,
                           annotations        = seq_obj.annotations,
                           letter_annotations = seq_obj.letter_annotations)
Example #14
0
 def __init__(self, *args, **kw):
     """Initialize ImmuneChain
     
     This is performed either with a prebuilt SeqRecord object or as a
     native SeqRecord object.
     """
     if len(args) > 0 and isinstance(args[0],SeqRecord):   # pre-built SeqRecord
         self._init_with_SeqRecord(args[0])
     elif kw.has_key('record'):          # pre-built SeqRecord
         self._init_with_SeqRecord(kw['record'])
     else:   # native SeqRecord init
         SeqRecord.__init__(self,*args,**kw)
     
     # precompute hash on features for performance
     self._update_feature_dict()
     
     # load `source` feature qualifiers into annotations and delete `source`
     # feature, if it exists
     self._process_source_feature()
     
     # define a set for uniq tags
     self._tags = set(self.annotations.setdefault('tags',[]))
Example #15
0
 def __init__(self, *args, **kw):
     """Initialize ImmuneChain
     
     This is performed either with a prebuilt SeqRecord object or as a
     native SeqRecord object.
     """
     if len(args) > 0 and isinstance(args[0],SeqRecord):   # pre-built SeqRecord
         self._init_with_SeqRecord(args[0])
     elif kw.has_key('record'):          # pre-built SeqRecord
         self._init_with_SeqRecord(kw['record'])
     else:   # native SeqRecord init
         SeqRecord.__init__(self,*args,**kw)
     
     # precompute hash on features for performance
     self._update_feature_dict()
     
     # load `source` feature qualifiers into annotations and delete `source`
     # feature, if it exists
     self._process_source_feature()
     
     # define a set for uniq tags
     self._tags = set(self.annotations.setdefault('tags',[]))
Example #16
0
 def __init__(self, kmer_str, freq_dict=None):
     SeqRecord.__init__(self, Seq(kmer_str), id=kmer_str, description="")
     if freq_dict:
         self.freq = freq_dict.get(kmer_str, 1)
     else:
         self.freq = 1
Example #17
0
 def __init__(self, rec):
   SeqRecord.__init__(self, rec.seq, rec.id, rec.name, rec.description,
                       rec.dbxrefs,rec.features,rec.annotations)