Beispiel #1
0
    def __init__(self, query_name, query_sequence, header=None):
        '''
        @param:    query_name
        @pdef:     name of the query sequence
        @ptype:    {String}

        @param:    query_sequence
        @pdef:     query sequence
        @ptype:    {String}

        @param:    header
        @pdef:     main data about the blast execution
        @pdefault: _None_
        @ptype:    {BlastHeader}
        '''
        self._query = Sequence(query_name, query_sequence)
        self._header = header
        self._filter = HitFilter()

        self._lastiteration = 0  # Keep the last iteration
        self._hits = []  # List of BlastHit objects
        self._correctedHits = False

        self._filter_hits = None
        self._query_index = None
    def sequence(self):
        '''
        Protein sequence

        @return: {Sequence}
        '''
        return Sequence(self.entry_name, self._sequence)
Beispiel #3
0
 def build(file_name, sequenceID, sequence, force=False):
     newFasta = File(file_name, 'w', overwrite=force)
     newSeq = Sequence(seqID=sequenceID, sequence=sequence)
     file_dsc = newFasta.descriptor
     file_dsc.write(newSeq.format('FASTA'))
     newFasta.close()
     return Fasta(fasta_file=newFasta.full)
    def build(file_name, sequence_id, sequence, force=None):
        '''
        Creates a Fasta object and a FASTA file from a sequence.

        @param:    file_name
        @pdef:     name of the fasta file (with path, if necessary)
        @ptype:    {String}

        @param:    sequence_id
        @pdef:     name of the sequence
        @ptype:    {String}

        @param:    sequence
        @pdef:     sequence
        @ptype:    {String} or {List}

        @param:    force
        @pdef:     overwrite previous files with the same name
        @pdefault: _SBIglobals.overwrite_
        @ptype:    {Boolean}

        @return: {Fasta}
        '''
        newFasta = File(file_name, 'w', overwrite=force)
        newSeq = Sequence(sequence_id=sequence_id, sequence=sequence)
        newFasta.write(newSeq.format('FASTA'))
        newFasta.close()
        return Fasta(fasta_file=newFasta.full, auto_load=0)
Beispiel #5
0
    def __init__(self,
                 sequences,
                 sequenceInits,
                 identities=None,
                 positives=None,
                 gaps=None):

        if not isinstance(sequences, list):
            raise AttributeError('Sequences must be added in a list\n')
        if not isinstance(sequenceInits, list):
            raise AttributeError('Sequence inits must be added in a list\n')
        if len(sequences) != len(sequenceInits):
            raise AttributeError(
                'One Sequence Init is required for each sequence\n')

        self._seq = []
        for aliseq in sequences:
            self._seq.append(Sequence(sequence=aliseq))
        self._num_seq = len(self._seq)
        self._segment = []

        self._idx = self._set_index(sequenceInits)

        self._segmentation_ok = self._search_segments()

        if not self._segmentation_ok:
            raise SAE(code=1)

        self._identities = identities
        self._positives = positives
        self._gaps = gaps
        self._alipatt = None
        self._aliptmeth = None

        self._aligned_aa = self._get_aligned_aa()
Beispiel #6
0
 def load(self):
     for line in self.file.descriptor:
         if line.startswith('>'):
             self._sequences.append(
                 Sequence(seqID=line.lstrip('>').strip()))
             self._seqfinder[self._sequences[-1].id] = len(
                 self._sequences) - 1
         elif len(line.strip()) > 0:
             self._sequences[-1].append(line.strip())
     self.file.close()
Beispiel #7
0
 def live_show(self):
     n = 0
     for line in self.file.descriptor:
         if line.startswith('>'):
             if n > 0:
                 yield s
             n += 1
             s = Sequence(seqID=line.lstrip('>').strip())
         elif len(line.strip()) > 0:
             s.append(line.strip())
     self.file.close()
     yield s
Beispiel #8
0
 def get_sequence(pdb):
     seqs = []
     sys.stdout.write(pdb[0] + '\n')
     if not os.path.isdir(pdb[0]):
         os.mkdir(pdb[0])
     pdbf = os.path.join(pdb[0], '{}.pdb'.format(pdb[0]))
     if not os.path.isfile(pdbf):
         wget.download('http://files.rcsb.org/view/{}.pdb'.format(pdb[0]),
                       out=pdbf)
     pdbstr = PDB(pdbf)
     qchains = pdb[1] if len(pdb) > 1 else pdbstr.chain_identifiers
     for chain in qchains:
         print('\t--' + chain + '--')
         seqs.append(
             Sequence('{}_{}'.format(pdb[0], chain),
                      pdbstr.get_chain_by_id(chain).protein_sequence))
     return seqs
    def load(self):
        '''
        Uploads to memory all the sequences from the file.

        '''
        if self.is_loaded:
            return

        self._sequences = [None] * self._total_sequences
        self.file.open()
        for line in self.file.read():
            if line.startswith('>'):
                seqID = line.lstrip('>').strip()
                s = Sequence(sequence_id=seqID)
                self._sequences[self._sequenceID[seqID]] = s
            elif len(line.strip()) > 0:
                self._sequences[self._sequenceID[seqID]].append(line.strip())
        self.file.close()
        self._loaded = True
    def live_show(self):
        '''
        Yields the different sequences in the file without actually storing
        them to memory.

        @yields: {Sequence}
        '''
        if self.is_loaded:
            for s in self.sequences:
                yield s

        else:
            n, s = 0, ''
            self.file.open()
            for line in self.file.read():
                if line.startswith('>'):
                    if n > 0:
                        yield s
                    n += 1
                    s = Sequence(sequence_id=line.lstrip('>').strip())
                elif len(line.strip()) > 0:
                    s.append(line.strip())
            self.file.close()
            yield s
Beispiel #11
0
    def __init__(self,
                 sequences,
                 sequence_inits,
                 identities=None,
                 positives=None,
                 gaps=None):
        '''
        @param:    sequences
        @pdef:     sequences to add to the alignment.
        @ptype:    {List} of {String} or {Sequence}

        @param:    sequence_inits
        @pdef:     initial number of each sequence of the alignment. (not all
                   sequences start alignment at 1)
        @ptype:    {List} of {Integer}

        @param:    identities
        @pdef:     number of identities in the alignment
        @pdefault: _None_
        @ptype:    {Integer}

        @param:    positives
        @pdef:     number of positives in the alignment
        @pdefault: _None_
        @ptype:    {Integer}

        @param:    gaps
        @pdef:     number of gaps in the alignment
        @pdefault: _None_
        @ptype:    {Integer}

        @raises: {AttributeError} if sequence or sequence_inits is not a {List}
                 or if they are not {List}s of the same length.
                 Also, if the sequence list contains something different than
                 {String} or {Sequence}
        @raises: {SeqAliError} if the fragmentation of the alignment encounters
                 some problem.
        '''
        if not isinstance(sequences, list):
            raise AttributeError('Sequences must be added in a list\n')
        if not isinstance(sequence_inits, list):
            raise AttributeError('Sequence inits must be added in a list\n')
        if len(sequences) != len(sequence_inits):
            raise AttributeError('One init is required for each sequence\n')

        self._error = SeqAliError()
        self._seq = []

        for aliseq in sequences:
            if isinstance(aliseq, Sequence):
                self._seq.append(aliseq)
            elif isinstance(aliseq, basestring):
                self._seq.append(Sequence(sequence=aliseq))
            else:
                raise AttributeError(
                    'sequences must be specified as strings or Sequence objects.'
                )

        self._num_seq = len(self._seq)
        self._segment = []

        self._idx = sequence_inits

        self._seq2ali = self._built_seq2ali(sequence_inits)
        self._staticSA = self._seq2ali

        self._segmentation_ok = self._search_segments()

        if not self._segmentation_ok:
            raise self._error.wrong_segmentation()

        self._identities = identities
        self._positives = positives
        self._gaps = gaps
        self._alipatt = None
        self._aliptmeth = None

        self._aligned_aa = self._get_aligned_aa()
Beispiel #12
0
    def retrieve(self, seqID, allbut=False, prefix_size=None):
        SBIglobals.alert('debug', self,
                         'Getting sequence for {0}'.format(seqID))

        if isinstance(seqID, basestring):
            if len(self) == 0:
                sequence = Sequence()
                read = False
                for line in self.file.descriptor:
                    if line.startswith('>'):
                        if prefix_size is not None:
                            sid = line.lstrip(
                                '>').split()[0].strip()[:prefix_size]
                        else:
                            sid = line.lstrip('>').split()[0].strip()
                        if sid == seqID:
                            sequence.id = line.lstrip('>').split()[0].strip()
                            read = True
                        elif read:
                            break
                    elif read and len(line.strip()) > 0:
                        sequence.append(line.strip())
                self.file.close()
            else:
                if not seqID in self._seqfinder:
                    raise KeyError(seqiID)
                return self._sequences[self._seqfinder[seqID]]

        if isinstance(seqID, list): seqID = set(seqID)
        if isinstance(seqID, set):
            sequence = []
            if len(self) == 0:
                read = False
                for line in self.file.descriptor:
                    if line.startswith('>'):
                        if prefix_size is not None:
                            sid = line.lstrip(
                                '>').split()[0].strip()[:prefix_size]
                        else:
                            sid = line.lstrip('>').split()[0].strip()
                        if (not allbut and sid in seqID) or \
                           (allbut and sid not in seqID):
                            newSeq = Sequence(
                                seqID=line.lstrip('>').split()[0].strip())
                            sequence.append(newSeq)
                            if not allbut and prefix_size is None:
                                seqID.remove(newSeq.id)
                            read = True
                        elif read:
                            read = False
                        elif len(seqID) == 0:
                            break
                    elif read and len(line.strip()) > 0:
                        sequence[-1].append(line.strip())
                self.file.close()
            else:
                for queryID in seqID:
                    if not queryID in self._seqfinder:
                        raise KeyError(queryID)
                    sequence.append(self._sequences[self._seqfinder[seqID]])

        return sequence