def wwPDBServer(*key): """Set/get `wwPDB`_ FTP/HTTP server location used for downloading PDB structures. Use one of the following keywords for setting a server: +---------------------------+-----------------------------+ | wwPDB FTP server | *Key* (case insensitive) | +===========================+=============================+ | RCSB PDB (USA) (default) | RCSB, USA, US | +---------------------------+-----------------------------+ | PDBe (Europe) | PDBe, Europe, Euro, EU | +---------------------------+-----------------------------+ | PDBj (Japan) | PDBj, Japan, Jp | +---------------------------+-----------------------------+ .. _wwPDB: http://www.wwpdb.org/""" if not key: return SETTINGS.get('wwpdb', None) elif len(key) == 1: try: key = key[0].lower() except AttributeError: raise TypeError('key must be a string') if key in WWPDB_FTP_SERVERS: SETTINGS['wwpdb'] = key SETTINGS.save() LOGGER.info('wwPDB server is set to {}.' .format(WWPDB_FTP_SERVERS[key][0])) else: raise ValueError('{0} is not a valid wwPDB server identifier' .format(repr(key))) else: raise TypeError('one wwPDB server identifier is expected, {0} given' .format(len(key)))
def backupFile(filename, backup=None, backup_ext='.BAK', **kwargs): """Rename *filename* with *backup_ext* appended to its name for backup purposes, if *backup* is **True** or if automatic backups is turned on using :func:`.confProDy`. Default extension :file:`.BAK` is used when one is not set using :func:`.confProDy`. If *filename* does not exist, no action will be taken and *filename* will be returned. If file is successfully renamed, new filename will be returned.""" try: exists = isfile(filename) except Exception as err: raise TypeError('filename must be a string ({0})'.format(str(err))) from caviar.prody_parser import SETTINGS if exists and (backup or SETTINGS.get('backup', False)): if backup_ext == '.BAK': backup_ext = SETTINGS.get('backup_ext', '.BAK') bak = filename + backup_ext if isfile(bak): try: os.remove(bak) except Exception as err: pass try: os.rename(filename, bak) except Exception as err: pass return bak else: return filename
def changeDefinitions(**kwargs): defs = SETTINGS.get(DEFINITIONS_KEY, {}) defs.update(kwargs) SETTINGS[DEFINITIONS_KEY] = defs SETTINGS[TIMESTAMP_KEY] = int(time()) SETTINGS.save() updateDefinitions()
def pathPDBMirror(path=None, format=None): """Returns or specify PDB mirror path to be used by :func:`.fetchPDB`. To release the current mirror, pass an invalid path, e.g. ``path=''``. If you are keeping a partial mirror, such as PDB files in :file:`/data/structures/divided/pdb/` folder, specify *format*, which is ``'pdb'`` in this case.""" if path is None: path = SETTINGS.get('pdb_mirror_path') format = SETTINGS.get('pdb_mirror_format', None) if path: if isdir(path): if format is None: return path else: return path, format else: LOGGER.warning( 'PDB mirror path {0} is not a accessible.'.format( repr(path))) else: if isdir(path): path = abspath(path) LOGGER.info('Local PDB mirror path is set: {0}'.format(repr(path))) SETTINGS['pdb_mirror_path'] = path SETTINGS['pdb_mirror_format'] = format SETTINGS.save() else: current = SETTINGS.pop('pdb_mirror_path') if current: LOGGER.info('PDB mirror {0} is released.'.format( repr(current))) SETTINGS.save() else: raise IOError('{0} is not a valid path.'.format(repr(path)))
def pathPDBFolder(folder=None, divided=False): """Returns or specify local PDB folder for storing PDB files downloaded from `wwPDB <http://www.wwpdb.org/>`_ servers. Files stored in this folder can be accessed via :func:`.fetchPDB` from any working directory. To release the current folder, pass an invalid path, e.g. ``folder=''``. If *divided* is **True**, the divided folder structure of wwPDB servers will be assumed when reading from and writing to the local folder. For example, a structure with identifier **1XYZ** will be present as :file:`pdblocalfolder/yz/pdb1xyz.pdb.gz`. If *divided* is **False**, a plain folder structure will be expected and adopted when saving files. For example, the same structure will be present as :file:`pdblocalfolder/1xyz.pdb.gz`. Finally, in either case, lower case letters will be used and compressed files will be stored.""" if folder is None: folder = SETTINGS.get('pdb_local_folder') if folder: if isdir(folder): return folder, SETTINGS.get('pdb_local_divided', True) else: LOGGER.warn('PDB local folder {0} is not a accessible.'.format( repr(folder))) else: if isdir(folder): folder = abspath(folder) LOGGER.info('Local PDB folder is set: {0}'.format(repr(folder))) if divided: LOGGER.info('wwPDB divided folder structure will be assumed.') else: LOGGER.info('A plain folder structure will be assumed.') SETTINGS['pdb_local_folder'] = folder SETTINGS['pdb_local_divided'] = bool(divided) SETTINGS.save() else: current = SETTINGS.pop('pdb_local_folder') if current: LOGGER.info('PDB folder {0} is released.'.format( repr(current))) SETTINGS.pop('pdb_local_divided') SETTINGS.save() else: raise IOError('{0} is not a valid path.'.format(repr(folder)))
def listNonstdAAProps(resname): """Returns properties of non-standard amino acid *resname*. .. ipython:: python listNonstdAAProps('PTR')""" try: alist = list(SETTINGS.get(NONSTANDARD_KEY, NONSTANDARD)[resname]) except KeyError: raise ValueError('{0} is not a non-standard residue name'.format( repr(resname))) else: alist.sort() return alist
def delNonstdAminoacid(resname): """Delete non-standard amino acid *resname*. .. ipython:: python delNonstdAminoacid('PTR') flagDefinition('nonstdaa') Default set of non-standard amino acids can be restored as follows: .. ipython:: python flagDefinition(reset='nonstdaa')""" nonstd = SETTINGS.get(NONSTANDARD_KEY, NONSTANDARD) try: nonstd.pop(resname) except KeyError: raise ValueError('{0} is not a non-standard residue name'.format( repr(resname))) else: updateNonstandard(nonstd)
def addNonstdAminoacid(resname, *properties): """Add non-standard amino acid *resname* with *properties* selected from: * {props} .. ipython:: python addNonstdAminoacid('PTR', 'acidic', 'aromatic', 'cyclic', 'large', 'polar', 'surface') Default set of non-standard amino acids can be restored as follows: .. ipython:: python flagDefinition(reset='nonstdaa')""" resname = str(resname) if len(resname) > 4: LOGGER.warn('Residue name {0} is unusually long.'.format( repr(resname))) propset = set(properties) for cat, val in CATEGORIES.items(): intersection = val.intersection(propset) if intersection: if len(intersection) > 1: raise ValueError('amino acid properties {0} cannot be ' 'present together'.format(', '.join( [repr(prp) for prp in intersection]))) for prop in intersection: propset.remove(prop) if propset: raise ValueError('amino acid property {0} is not valid'.format( repr(propset.pop()))) nonstd = SETTINGS.get(NONSTANDARD_KEY, NONSTANDARD) nonstd[resname] = set(properties) updateNonstandard(nonstd)
def updateDefinitions(): """Update definitions and set some global variables. This function must be called at the end of the module.""" global DEFINITIONS, AMINOACIDS, BACKBONE, TIMESTAMP DEFINITIONS = {} user = SETTINGS.get('flag_definitions', {}) # nucleics nucleic = set() for key in ['nucleobase', 'nucleoside', 'nucleotide']: aset = set(user.get(key, DEFAULTS[key])) nucleic.update(aset) DEFINITIONS[key] = aset DEFINITIONS['nucleic'] = nucleic # heteros for key in [ 'water', 'lipid', 'ion', 'sugar', 'heme', 'at', 'cg', 'purine', 'pyrimidine' ]: DEFINITIONS[key] = set(user.get(key, DEFAULTS[key])) DEFINITIONS['backbone'] = DEFINITIONS['bb'] = set( user.get(key, DEFAULTS['bb'])) DEFINITIONS['backbonefull'] = DEFINITIONS['bbfull'] = set( user.get(key, DEFAULTS['bbfull'])) # element regex for key in ['hydrogen', 'carbon', 'nitrogen', 'oxygen', 'sulfur']: DEFINITIONS[key] = recompile(user.get(key, DEFAULTS[key])) try: nonstd = SETTINGS[NONSTANDARD_KEY] except KeyError: nonstd = NONSTANDARD DEFINITIONS.update(CATEGORIZED) else: for cat in CATEGORIES: for key in CATEGORIES[cat]: DEFINITIONS[key] = set(DEFAULTS[key]) DEFINITIONS['charged'] = set(DEFINITIONS['acidic']) DEFINITIONS['charged'].update(DEFINITIONS['basic']) for resi, props in nonstd.items(): for prop in props: DEFINITIONS[prop].add(resi) DEFINITIONS['stdaa'] = DEFAULTS['stdaa'] DEFINITIONS['nonstdaa'] = set(nonstd) AMINOACIDS = set(DEFINITIONS['stdaa']) AMINOACIDS.update(DEFINITIONS['nonstdaa']) DEFINITIONS['protein'] = DEFINITIONS['aminoacid'] = AMINOACIDS BACKBONE = DEFINITIONS['bb'] global TIMESTAMP TIMESTAMP = SETTINGS.get(TIMESTAMP_KEY, 0)
def resetDefinitions(flag): if flag == 'all': SETTINGS.pop(DEFINITIONS_KEY, None) SETTINGS.pop(NONSTANDARD_KEY, None) SETTINGS[TIMESTAMP_KEY] = int(time()) SETTINGS.save() updateDefinitions() elif flag == 'nonstdaa': SETTINGS.pop(NONSTANDARD_KEY, None) SETTINGS[TIMESTAMP_KEY] = int(time()) SETTINGS.save() updateDefinitions() else: try: SETTINGS.pop(DEFINITIONS_KEY, {}).pop(flag) except KeyError: pass else: SETTINGS[TIMESTAMP_KEY] = int(time()) SETTINGS.save() updateDefinitions()
def updateNonstandard(nonstd): SETTINGS[NONSTANDARD_KEY] = nonstd SETTINGS[TIMESTAMP_KEY] = int(time()) SETTINGS.save() updateDefinitions()
def fetchPDBLigand(cci, filename=None): """Fetch PDB ligand data from PDB_ for chemical component *cci*. *cci* may be 3-letter chemical component identifier or a valid XML filename. If *filename* is given, XML file will be saved with that name. If you query ligand data frequently, you may configure ProDy to save XML files in your computer. Set ``ligand_xml_save`` option **True**, i.e. ``confProDy(ligand_xml_save=True)``. Compressed XML files will be save to ProDy package folder, e.g. :file:`/home/user/.prody/pdbligands`. Each file is around 5Kb when compressed. This function is compatible with PDBx/PDBML v 4.0. Ligand data is returned in a dictionary. Ligand coordinate atom data with *model* and *ideal* coordinate sets are also stored in this dictionary. Note that this dictionary will contain data that is present in the XML file and all Ligand Expo XML files do not contain every possible data field. So, it may be better if you use :meth:`dict.get` instead of indexing the dictionary, e.g. to retrieve formula weight (or relative molar mass) of the chemical component use ``data.get('formula_weight')`` instead of ``data['formula_weight']`` to avoid exceptions when this data field is not found in the XML file. URL and/or path of the XML file are returned in the dictionary with keys ``url`` and ``path``, respectively. Following example downloads data for ligand STI (a.k.a. Gleevec and Imatinib) and calculates RMSD between model (X-ray structure 1IEP) and ideal (energy minimized) coordinate sets: .. ipython:: python from caviar.prody_parser import * ligand_data = fetchPDBLigand('STI') ligand_data['model_coordinates_db_code'] ligand_model = ligand_data['model'] ligand_ideal = ligand_data['ideal'] transformation = superpose(ligand_ideal.noh, ligand_model.noh) calcRMSD(ligand_ideal.noh, ligand_model.noh)""" if not isinstance(cci, str): raise TypeError('cci must be a string') if isfile(cci): inp = openFile(cci) xml = inp.read() inp.close() url = None path = cci cci = splitext(splitext(split(cci)[1])[0])[0].upper() elif len(cci) > 4 or not cci.isalnum(): raise ValueError('cci must be 3-letters long and alphanumeric or ' 'a valid filename') else: xml = None cci = cci.upper() if SETTINGS.get('ligand_xml_save'): folder = join(getPackagePath(), 'pdbligands') if not isdir(folder): makePath(folder) xmlgz = path = join(folder, cci + '.xml.gz') if isfile(xmlgz): with openFile(xmlgz) as inp: xml = inp.read() else: folder = None path = None url = ('http://files.rcsb.org/ligands/download/{0}' '.xml'.format(cci.upper())) if not xml: try: inp = openURL(url) except IOError: raise IOError( 'XML file for ligand {0} is not found online'.format(cci)) else: xml = inp.read() if PY3K: xml = xml.decode() inp.close() if filename: out = openFile(filename, mode='w', folder=folder) out.write(xml) out.close() if SETTINGS.get('ligand_xml_save'): with openFile(xmlgz, 'w') as out: out.write(xml) import xml.etree.cElementTree as ET root = ET.XML(xml) if (root.get('{http://www.w3.org/2001/XMLSchema-instance}' 'schemaLocation') != 'http://pdbml.pdb.org/schema/pdbx-v40.xsd pdbx-v40.xsd'): LOGGER.warn('XML is not in PDBx/PDBML v 4.0 format, resulting ' 'dictionary may not contain all data fields') ns = root.tag[:root.tag.rfind('}') + 1] len_ns = len(ns) dict_ = {'url': url, 'path': path} for child in list(root.find(ns + 'chem_compCategory')[0]): tag = child.tag[len_ns:] if tag.startswith('pdbx_'): tag = tag[5:] dict_[tag] = child.text dict_['formula_weight'] = float(dict_.get('formula_weight')) identifiers_and_descriptors = [] results = root.find(ns + 'pdbx_chem_comp_identifierCategory') if results: identifiers_and_descriptors.extend(results) results = root.find(ns + 'pdbx_chem_comp_descriptorCategory') if results: identifiers_and_descriptors.extend(results) for child in identifiers_and_descriptors: program = child.get('program').replace(' ', '_') type_ = child.get('type').replace(' ', '_') dict_[program + '_' + type_] = child[0].text dict_[program + '_version'] = child.get('program_version') dict_['audits'] = [ (audit.get('action_type'), audit.get('date')) for audit in list(root.find(ns + 'pdbx_chem_comp_auditCategory')) ] atoms = list(root.find(ns + 'chem_comp_atomCategory')) n_atoms = len(atoms) ideal_coords = np.zeros((n_atoms, 3)) model_coords = np.zeros((n_atoms, 3)) atomnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['name'].dtype) elements = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['element'].dtype) resnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['resname'].dtype) charges = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['charge'].dtype) resnums = np.ones(n_atoms, dtype=ATOMIC_FIELDS['charge'].dtype) alternate_atomnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['name'].dtype) leaving_atom_flags = np.zeros(n_atoms, np.bool) aromatic_flags = np.zeros(n_atoms, np.bool) stereo_configs = np.zeros(n_atoms, np.bool) ordinals = np.zeros(n_atoms, int) name2index = {} for i, atom in enumerate(atoms): data = dict([(child.tag[len_ns:], child.text) for child in list(atom)]) name = data.get('pdbx_component_atom_id', 'X') name2index[name] = i atomnames[i] = name elements[i] = data.get('type_symbol', 'X') resnames[i] = data.get('pdbx_component_comp_id', 'UNK') charges[i] = float(data.get('charge', 0)) alternate_atomnames[i] = data.get('alt_atom_id', 'X') leaving_atom_flags[i] = data.get('pdbx_leaving_atom_flag') == 'Y' aromatic_flags[i] = data.get('pdbx_atomatic_flag') == 'Y' stereo_configs[i] = data.get('pdbx_stereo_config') == 'Y' ordinals[i] = int(data.get('pdbx_ordinal', 0)) model_coords[i, 0] = float(data.get('model_Cartn_x', 0)) model_coords[i, 1] = float(data.get('model_Cartn_y', 0)) model_coords[i, 2] = float(data.get('model_Cartn_z', 0)) ideal_coords[i, 0] = float(data.get('pdbx_model_Cartn_x_ideal', 0)) ideal_coords[i, 1] = float(data.get('pdbx_model_Cartn_y_ideal', 0)) ideal_coords[i, 2] = float(data.get('pdbx_model_Cartn_z_ideal', 0)) pdbid = dict_.get('model_coordinates_db_code') if pdbid: model = AtomGroup(cci + ' model ({0})'.format(pdbid)) else: model = AtomGroup(cci + ' model') model.setCoords(model_coords) model.setNames(atomnames) model.setResnames(resnames) model.setResnums(resnums) model.setElements(elements) model.setCharges(charges) model.setFlags('leaving_atom_flags', leaving_atom_flags) model.setFlags('aromatic_flags', aromatic_flags) model.setFlags('stereo_configs', stereo_configs) model.setData('ordinals', ordinals) model.setData('alternate_atomnames', alternate_atomnames) dict_['model'] = model ideal = model.copy() ideal.setTitle(cci + ' ideal') ideal.setCoords(ideal_coords) dict_['ideal'] = ideal bonds = [] warned = set() for bond in list(root.find(ns + 'chem_comp_bondCategory') or bonds): name_1 = bond.get('atom_id_1') name_2 = bond.get('atom_id_2') try: bonds.append((name2index[name_1], name2index[name_2])) except KeyError: if name_1 not in warned and name_1 not in name2index: warned.add(name_1) LOGGER.warn('{0} specified {1} in bond category is not ' 'a valid atom name.'.format(repr(name_1), cci)) if name_2 not in warned and name_2 not in name2index: warned.add(name_2) LOGGER.warn('{0} specified {1} in bond category is not ' 'a valid atom name.'.format(repr(name_2), cci)) if bonds: bonds = np.array(bonds, int) model.setBonds(bonds) ideal.setBonds(bonds) return dict_