Esempio n. 1
0
def seqScanning(Uniprot_coord):
    '''Returns a list of SAVs. If the string 'Uniprot_coord' is just a Uniprot ID,
    the list will contain all possible amino acid substitutions at all positions
    in the sequence. If 'Uniprot_coord' also includes a specific position, the list
    will only contain all possible amino acid variants at that position.
    '''
    assert isinstance(Uniprot_coord, str), "Must be a string."
    coord = Uniprot_coord.strip().split()
    assert len(coord) < 3, "Invalid format. Examples: 'Q9BW27' or 'Q9BW27 10'."
    Uniprot_record = pd.queryUniprot(coord[0])
    sequence = Uniprot_record['sequence   0'].replace("\n", "")
    if len(coord) == 1:
        positions = range(len(sequence))
    else:
        positions = [int(coord[1]) - 1]
    SAV_list = []
    acc = coord[0]
    for i in positions:
        wt_aa = sequence[i]
        for aa in 'ACDEFGHIKLMNPQRSTVWY':
            if aa == wt_aa:
                continue
            s = ' '.join([acc, str(i + 1), wt_aa, aa])
            SAV_list.append(s)
    return SAV_list
Esempio n. 2
0
 def recoverPickle(self, filename=None, folder=None, days=30, **kwargs):
     acc = self.uniq_acc
     if acc is None:
         # assume acc is equal to uniq_acc
         acc = self.acc
     if folder is None:
         folder = SETTINGS.get('rhapsody_local_folder')
         if folder is None:
             folder = '.'
         else:
             folder = os.path.join(folder, 'pickles')
     if filename is None:
         # assume acc is equal to uniq_acc
         acc = self.acc
         filename = 'UniprotMap-' + acc + '.pkl'
         pickle_path = os.path.join(folder, filename)
         if not os.path.isfile(pickle_path):
             # import unique accession number
             acc = pd.queryUniprot(self.acc)['accession   0']
             filename = 'UniprotMap-' + acc + '.pkl'
             pickle_path = os.path.join(folder, filename)
     else:
         pickle_path = os.path.join(folder, filename)
     # check if pickle exists
     if not os.path.isfile(pickle_path):
         raise IOError("File '{}' not found".format(filename))
     # load pickle
     recovered_self = pickle.load(open(pickle_path, "rb"))
     if acc not in [recovered_self.acc, recovered_self.uniq_acc]:
         raise ValueError('Accession number in recovered pickle (%s) ' %
                          recovered_self.uniq_acc + 'does not match.')
     # check timestamp and ignore pickles that are too old
     date_format = "%Y-%m-%d %H:%M:%S.%f"
     t_old = datetime.datetime.strptime(recovered_self.timestamp,
                                        date_format)
     t_now = datetime.datetime.utcnow()
     Delta_t = datetime.timedelta(days=days)
     if t_old + Delta_t < t_now:
         raise RuntimeError(
             'Pickle {} was too old and was ignored.'.format(filename))
     self.fullRecord = recovered_self.fullRecord
     self.uniq_acc = recovered_self.uniq_acc
     self.sequence = recovered_self.sequence
     self.PDBrecords = recovered_self.PDBrecords
     self.PDBmappings = recovered_self.PDBmappings
     self.customPDBmappings = recovered_self.customPDBmappings
     self._align_algo_args = recovered_self._align_algo_args
     self._align_algo_kwargs = recovered_self._align_algo_kwargs
     self.timestamp = recovered_self.timestamp
     self.Pfam = recovered_self.Pfam
     LOGGER.info("Pickle '{}' recovered.".format(filename))
     return
Esempio n. 3
0
def _print_fasta_file(Uniprot_accs, filename='custom_sequences.fasta'):
    date = datetime.date.today().strftime('%Y%m%d')
    new_accs = {}
    with open(filename, 'w', 1) as f:
        for acc in Uniprot_accs:
            new_acc = f"{acc}-{date}"
            f.write(f">{new_acc}")
            record = queryUniprot(acc)
            sequence = record['sequence   0']
            f.write(sequence)
            # store new temporary accession numbers
            new_accs[acc] = new_acc
    return filename, new_accs
Esempio n. 4
0
def queryUniprot(*args, n_attempts=3, dt=1, **kwargs):
    """
    Redefine prody function to check for no internet connection
    """
    attempt = 0
    while attempt < n_attempts:
        try:
            _ = openURL('http://www.uniprot.org/')
            break
        except:
            LOGGER.info(f'Attempt {attempt} to contact www.uniprot.org failed')
            attempt += 1
            time.sleep((attempt + 1) * dt)
    else:
        _ = openURL('http://www.uniprot.org/')
    return pd.queryUniprot(*args, **kwargs)
Esempio n. 5
0
 def refresh(self):
     """Refresh imported Uniprot records and mappings, and
     delete precomputed alignments.
     """
     # import Uniprot record and official accession number
     self.fullRecord = queryUniprot(self.acc)
     self.uniq_acc = self.fullRecord['accession   0']
     # import main sequence and PDB records
     rec = self.fullRecord
     self.sequence = rec['sequence   0'].replace("\n", "")
     self.PDBrecords = [
         rec[key] for key in rec.keys()
         if key.startswith('dbRef') and 'PDB' in rec[key]
     ]
     # parse PDB records into PDB mappings, easier to access
     self._initiatePDBmappings()
     # set remaining attributes
     self.customPDBmappings = []
     self._align_algo_args = ['localxs', -0.5, -0.1]
     self._align_algo_kwargs = {'one_alignment_only': True}
     self._timestamp = str(datetime.datetime.utcnow())
     self.Pfam = None
     return