def __init__(self, query_name, query_sequence, header=None): ''' @param: query_name @pdef: name of the query sequence @ptype: {String} @param: query_sequence @pdef: query sequence @ptype: {String} @param: header @pdef: main data about the blast execution @pdefault: _None_ @ptype: {BlastHeader} ''' self._query = Sequence(query_name, query_sequence) self._header = header self._filter = HitFilter() self._lastiteration = 0 # Keep the last iteration self._hits = [] # List of BlastHit objects self._correctedHits = False self._filter_hits = None self._query_index = None
def sequence(self): ''' Protein sequence @return: {Sequence} ''' return Sequence(self.entry_name, self._sequence)
def build(file_name, sequenceID, sequence, force=False): newFasta = File(file_name, 'w', overwrite=force) newSeq = Sequence(seqID=sequenceID, sequence=sequence) file_dsc = newFasta.descriptor file_dsc.write(newSeq.format('FASTA')) newFasta.close() return Fasta(fasta_file=newFasta.full)
def build(file_name, sequence_id, sequence, force=None): ''' Creates a Fasta object and a FASTA file from a sequence. @param: file_name @pdef: name of the fasta file (with path, if necessary) @ptype: {String} @param: sequence_id @pdef: name of the sequence @ptype: {String} @param: sequence @pdef: sequence @ptype: {String} or {List} @param: force @pdef: overwrite previous files with the same name @pdefault: _SBIglobals.overwrite_ @ptype: {Boolean} @return: {Fasta} ''' newFasta = File(file_name, 'w', overwrite=force) newSeq = Sequence(sequence_id=sequence_id, sequence=sequence) newFasta.write(newSeq.format('FASTA')) newFasta.close() return Fasta(fasta_file=newFasta.full, auto_load=0)
def __init__(self, sequences, sequenceInits, identities=None, positives=None, gaps=None): if not isinstance(sequences, list): raise AttributeError('Sequences must be added in a list\n') if not isinstance(sequenceInits, list): raise AttributeError('Sequence inits must be added in a list\n') if len(sequences) != len(sequenceInits): raise AttributeError( 'One Sequence Init is required for each sequence\n') self._seq = [] for aliseq in sequences: self._seq.append(Sequence(sequence=aliseq)) self._num_seq = len(self._seq) self._segment = [] self._idx = self._set_index(sequenceInits) self._segmentation_ok = self._search_segments() if not self._segmentation_ok: raise SAE(code=1) self._identities = identities self._positives = positives self._gaps = gaps self._alipatt = None self._aliptmeth = None self._aligned_aa = self._get_aligned_aa()
def load(self): for line in self.file.descriptor: if line.startswith('>'): self._sequences.append( Sequence(seqID=line.lstrip('>').strip())) self._seqfinder[self._sequences[-1].id] = len( self._sequences) - 1 elif len(line.strip()) > 0: self._sequences[-1].append(line.strip()) self.file.close()
def live_show(self): n = 0 for line in self.file.descriptor: if line.startswith('>'): if n > 0: yield s n += 1 s = Sequence(seqID=line.lstrip('>').strip()) elif len(line.strip()) > 0: s.append(line.strip()) self.file.close() yield s
def get_sequence(pdb): seqs = [] sys.stdout.write(pdb[0] + '\n') if not os.path.isdir(pdb[0]): os.mkdir(pdb[0]) pdbf = os.path.join(pdb[0], '{}.pdb'.format(pdb[0])) if not os.path.isfile(pdbf): wget.download('http://files.rcsb.org/view/{}.pdb'.format(pdb[0]), out=pdbf) pdbstr = PDB(pdbf) qchains = pdb[1] if len(pdb) > 1 else pdbstr.chain_identifiers for chain in qchains: print('\t--' + chain + '--') seqs.append( Sequence('{}_{}'.format(pdb[0], chain), pdbstr.get_chain_by_id(chain).protein_sequence)) return seqs
def load(self): ''' Uploads to memory all the sequences from the file. ''' if self.is_loaded: return self._sequences = [None] * self._total_sequences self.file.open() for line in self.file.read(): if line.startswith('>'): seqID = line.lstrip('>').strip() s = Sequence(sequence_id=seqID) self._sequences[self._sequenceID[seqID]] = s elif len(line.strip()) > 0: self._sequences[self._sequenceID[seqID]].append(line.strip()) self.file.close() self._loaded = True
def live_show(self): ''' Yields the different sequences in the file without actually storing them to memory. @yields: {Sequence} ''' if self.is_loaded: for s in self.sequences: yield s else: n, s = 0, '' self.file.open() for line in self.file.read(): if line.startswith('>'): if n > 0: yield s n += 1 s = Sequence(sequence_id=line.lstrip('>').strip()) elif len(line.strip()) > 0: s.append(line.strip()) self.file.close() yield s
def __init__(self, sequences, sequence_inits, identities=None, positives=None, gaps=None): ''' @param: sequences @pdef: sequences to add to the alignment. @ptype: {List} of {String} or {Sequence} @param: sequence_inits @pdef: initial number of each sequence of the alignment. (not all sequences start alignment at 1) @ptype: {List} of {Integer} @param: identities @pdef: number of identities in the alignment @pdefault: _None_ @ptype: {Integer} @param: positives @pdef: number of positives in the alignment @pdefault: _None_ @ptype: {Integer} @param: gaps @pdef: number of gaps in the alignment @pdefault: _None_ @ptype: {Integer} @raises: {AttributeError} if sequence or sequence_inits is not a {List} or if they are not {List}s of the same length. Also, if the sequence list contains something different than {String} or {Sequence} @raises: {SeqAliError} if the fragmentation of the alignment encounters some problem. ''' if not isinstance(sequences, list): raise AttributeError('Sequences must be added in a list\n') if not isinstance(sequence_inits, list): raise AttributeError('Sequence inits must be added in a list\n') if len(sequences) != len(sequence_inits): raise AttributeError('One init is required for each sequence\n') self._error = SeqAliError() self._seq = [] for aliseq in sequences: if isinstance(aliseq, Sequence): self._seq.append(aliseq) elif isinstance(aliseq, basestring): self._seq.append(Sequence(sequence=aliseq)) else: raise AttributeError( 'sequences must be specified as strings or Sequence objects.' ) self._num_seq = len(self._seq) self._segment = [] self._idx = sequence_inits self._seq2ali = self._built_seq2ali(sequence_inits) self._staticSA = self._seq2ali self._segmentation_ok = self._search_segments() if not self._segmentation_ok: raise self._error.wrong_segmentation() self._identities = identities self._positives = positives self._gaps = gaps self._alipatt = None self._aliptmeth = None self._aligned_aa = self._get_aligned_aa()
def retrieve(self, seqID, allbut=False, prefix_size=None): SBIglobals.alert('debug', self, 'Getting sequence for {0}'.format(seqID)) if isinstance(seqID, basestring): if len(self) == 0: sequence = Sequence() read = False for line in self.file.descriptor: if line.startswith('>'): if prefix_size is not None: sid = line.lstrip( '>').split()[0].strip()[:prefix_size] else: sid = line.lstrip('>').split()[0].strip() if sid == seqID: sequence.id = line.lstrip('>').split()[0].strip() read = True elif read: break elif read and len(line.strip()) > 0: sequence.append(line.strip()) self.file.close() else: if not seqID in self._seqfinder: raise KeyError(seqiID) return self._sequences[self._seqfinder[seqID]] if isinstance(seqID, list): seqID = set(seqID) if isinstance(seqID, set): sequence = [] if len(self) == 0: read = False for line in self.file.descriptor: if line.startswith('>'): if prefix_size is not None: sid = line.lstrip( '>').split()[0].strip()[:prefix_size] else: sid = line.lstrip('>').split()[0].strip() if (not allbut and sid in seqID) or \ (allbut and sid not in seqID): newSeq = Sequence( seqID=line.lstrip('>').split()[0].strip()) sequence.append(newSeq) if not allbut and prefix_size is None: seqID.remove(newSeq.id) read = True elif read: read = False elif len(seqID) == 0: break elif read and len(line.strip()) > 0: sequence[-1].append(line.strip()) self.file.close() else: for queryID in seqID: if not queryID in self._seqfinder: raise KeyError(queryID) sequence.append(self._sequences[self._seqfinder[seqID]]) return sequence