def seqScanning(Uniprot_coord): '''Returns a list of SAVs. If the string 'Uniprot_coord' is just a Uniprot ID, the list will contain all possible amino acid substitutions at all positions in the sequence. If 'Uniprot_coord' also includes a specific position, the list will only contain all possible amino acid variants at that position. ''' assert isinstance(Uniprot_coord, str), "Must be a string." coord = Uniprot_coord.strip().split() assert len(coord) < 3, "Invalid format. Examples: 'Q9BW27' or 'Q9BW27 10'." Uniprot_record = pd.queryUniprot(coord[0]) sequence = Uniprot_record['sequence 0'].replace("\n", "") if len(coord) == 1: positions = range(len(sequence)) else: positions = [int(coord[1]) - 1] SAV_list = [] acc = coord[0] for i in positions: wt_aa = sequence[i] for aa in 'ACDEFGHIKLMNPQRSTVWY': if aa == wt_aa: continue s = ' '.join([acc, str(i + 1), wt_aa, aa]) SAV_list.append(s) return SAV_list
def recoverPickle(self, filename=None, folder=None, days=30, **kwargs): acc = self.uniq_acc if acc is None: # assume acc is equal to uniq_acc acc = self.acc if folder is None: folder = SETTINGS.get('rhapsody_local_folder') if folder is None: folder = '.' else: folder = os.path.join(folder, 'pickles') if filename is None: # assume acc is equal to uniq_acc acc = self.acc filename = 'UniprotMap-' + acc + '.pkl' pickle_path = os.path.join(folder, filename) if not os.path.isfile(pickle_path): # import unique accession number acc = pd.queryUniprot(self.acc)['accession 0'] filename = 'UniprotMap-' + acc + '.pkl' pickle_path = os.path.join(folder, filename) else: pickle_path = os.path.join(folder, filename) # check if pickle exists if not os.path.isfile(pickle_path): raise IOError("File '{}' not found".format(filename)) # load pickle recovered_self = pickle.load(open(pickle_path, "rb")) if acc not in [recovered_self.acc, recovered_self.uniq_acc]: raise ValueError('Accession number in recovered pickle (%s) ' % recovered_self.uniq_acc + 'does not match.') # check timestamp and ignore pickles that are too old date_format = "%Y-%m-%d %H:%M:%S.%f" t_old = datetime.datetime.strptime(recovered_self.timestamp, date_format) t_now = datetime.datetime.utcnow() Delta_t = datetime.timedelta(days=days) if t_old + Delta_t < t_now: raise RuntimeError( 'Pickle {} was too old and was ignored.'.format(filename)) self.fullRecord = recovered_self.fullRecord self.uniq_acc = recovered_self.uniq_acc self.sequence = recovered_self.sequence self.PDBrecords = recovered_self.PDBrecords self.PDBmappings = recovered_self.PDBmappings self.customPDBmappings = recovered_self.customPDBmappings self._align_algo_args = recovered_self._align_algo_args self._align_algo_kwargs = recovered_self._align_algo_kwargs self.timestamp = recovered_self.timestamp self.Pfam = recovered_self.Pfam LOGGER.info("Pickle '{}' recovered.".format(filename)) return
def _print_fasta_file(Uniprot_accs, filename='custom_sequences.fasta'): date = datetime.date.today().strftime('%Y%m%d') new_accs = {} with open(filename, 'w', 1) as f: for acc in Uniprot_accs: new_acc = f"{acc}-{date}" f.write(f">{new_acc}") record = queryUniprot(acc) sequence = record['sequence 0'] f.write(sequence) # store new temporary accession numbers new_accs[acc] = new_acc return filename, new_accs
def queryUniprot(*args, n_attempts=3, dt=1, **kwargs): """ Redefine prody function to check for no internet connection """ attempt = 0 while attempt < n_attempts: try: _ = openURL('http://www.uniprot.org/') break except: LOGGER.info(f'Attempt {attempt} to contact www.uniprot.org failed') attempt += 1 time.sleep((attempt + 1) * dt) else: _ = openURL('http://www.uniprot.org/') return pd.queryUniprot(*args, **kwargs)
def refresh(self): """Refresh imported Uniprot records and mappings, and delete precomputed alignments. """ # import Uniprot record and official accession number self.fullRecord = queryUniprot(self.acc) self.uniq_acc = self.fullRecord['accession 0'] # import main sequence and PDB records rec = self.fullRecord self.sequence = rec['sequence 0'].replace("\n", "") self.PDBrecords = [ rec[key] for key in rec.keys() if key.startswith('dbRef') and 'PDB' in rec[key] ] # parse PDB records into PDB mappings, easier to access self._initiatePDBmappings() # set remaining attributes self.customPDBmappings = [] self._align_algo_args = ['localxs', -0.5, -0.1] self._align_algo_kwargs = {'one_alignment_only': True} self._timestamp = str(datetime.datetime.utcnow()) self.Pfam = None return