コード例 #1
0
ファイル: wwpdb.py プロジェクト: jr-marchand/caviar
def wwPDBServer(*key):
    """Set/get `wwPDB`_ FTP/HTTP server location used for downloading PDB
    structures.  Use one of the following keywords for setting a server:

    +---------------------------+-----------------------------+
    | wwPDB FTP server          | *Key* (case insensitive)    |
    +===========================+=============================+
    | RCSB PDB (USA) (default)  | RCSB, USA, US               |
    +---------------------------+-----------------------------+
    | PDBe (Europe)             | PDBe, Europe, Euro, EU      |
    +---------------------------+-----------------------------+
    | PDBj (Japan)              | PDBj, Japan, Jp             |
    +---------------------------+-----------------------------+

    .. _wwPDB: http://www.wwpdb.org/"""

    if not key:
        return SETTINGS.get('wwpdb', None)
    elif len(key) == 1:
        try:
            key = key[0].lower()
        except AttributeError:
            raise TypeError('key must be a string')
        if key in WWPDB_FTP_SERVERS:
            SETTINGS['wwpdb'] = key
            SETTINGS.save()
            LOGGER.info('wwPDB server is set to {}.'
                        .format(WWPDB_FTP_SERVERS[key][0]))
        else:
            raise ValueError('{0} is not a valid wwPDB server identifier'
                             .format(repr(key)))
    else:
        raise TypeError('one wwPDB server identifier is expected, {0} given'
                        .format(len(key)))
コード例 #2
0
def backupFile(filename, backup=None, backup_ext='.BAK', **kwargs):
    """Rename *filename* with *backup_ext* appended to its name for backup
    purposes, if *backup* is **True** or if automatic backups is turned on
    using :func:`.confProDy`.  Default extension :file:`.BAK` is used when
    one is not set using :func:`.confProDy`.  If *filename* does not exist,
    no action will be taken and *filename* will be returned.  If file is
    successfully renamed, new filename will be returned."""

    try:
        exists = isfile(filename)
    except Exception as err:
        raise TypeError('filename must be a string ({0})'.format(str(err)))

    from caviar.prody_parser import SETTINGS
    if exists and (backup or SETTINGS.get('backup', False)):
        if backup_ext == '.BAK':
            backup_ext = SETTINGS.get('backup_ext', '.BAK')
        bak = filename + backup_ext
        if isfile(bak):
            try:
                os.remove(bak)
            except Exception as err:
                pass
        try:
            os.rename(filename, bak)
        except Exception as err:
            pass
        return bak
    else:
        return filename
コード例 #3
0
def changeDefinitions(**kwargs):

    defs = SETTINGS.get(DEFINITIONS_KEY, {})
    defs.update(kwargs)
    SETTINGS[DEFINITIONS_KEY] = defs
    SETTINGS[TIMESTAMP_KEY] = int(time())
    SETTINGS.save()
    updateDefinitions()
コード例 #4
0
ファイル: localpdb.py プロジェクト: jr-marchand/caviar
def pathPDBMirror(path=None, format=None):
    """Returns or specify PDB mirror path to be used by :func:`.fetchPDB`.
    To release the current mirror, pass an invalid path, e.g. ``path=''``.
    If you are keeping a partial mirror, such as PDB files in
    :file:`/data/structures/divided/pdb/` folder, specify *format*, which is
    ``'pdb'`` in this case."""

    if path is None:
        path = SETTINGS.get('pdb_mirror_path')
        format = SETTINGS.get('pdb_mirror_format', None)
        if path:
            if isdir(path):
                if format is None:
                    return path
                else:
                    return path, format
            else:
                LOGGER.warning(
                    'PDB mirror path {0} is not a accessible.'.format(
                        repr(path)))
    else:
        if isdir(path):
            path = abspath(path)
            LOGGER.info('Local PDB mirror path is set: {0}'.format(repr(path)))
            SETTINGS['pdb_mirror_path'] = path
            SETTINGS['pdb_mirror_format'] = format
            SETTINGS.save()
        else:
            current = SETTINGS.pop('pdb_mirror_path')
            if current:
                LOGGER.info('PDB mirror {0} is released.'.format(
                    repr(current)))
                SETTINGS.save()
            else:
                raise IOError('{0} is not a valid path.'.format(repr(path)))
コード例 #5
0
ファイル: localpdb.py プロジェクト: jr-marchand/caviar
def pathPDBFolder(folder=None, divided=False):
    """Returns or specify local PDB folder for storing PDB files downloaded from
    `wwPDB <http://www.wwpdb.org/>`_ servers.  Files stored in this folder can
    be accessed via :func:`.fetchPDB` from any working directory.  To release
    the current folder, pass an invalid path, e.g. ``folder=''``.

    If *divided* is **True**, the divided folder structure of wwPDB servers
    will be assumed when reading from and writing to the local folder.  For
    example, a structure with identifier **1XYZ** will be present as
    :file:`pdblocalfolder/yz/pdb1xyz.pdb.gz`.

    If *divided* is **False**, a plain folder structure will be expected and
    adopted when saving files.  For example, the same structure will be
    present as :file:`pdblocalfolder/1xyz.pdb.gz`.

    Finally, in either case, lower case letters will be used and compressed
    files will be stored."""

    if folder is None:
        folder = SETTINGS.get('pdb_local_folder')
        if folder:
            if isdir(folder):
                return folder, SETTINGS.get('pdb_local_divided', True)
            else:
                LOGGER.warn('PDB local folder {0} is not a accessible.'.format(
                    repr(folder)))
    else:
        if isdir(folder):
            folder = abspath(folder)
            LOGGER.info('Local PDB folder is set: {0}'.format(repr(folder)))
            if divided:
                LOGGER.info('wwPDB divided folder structure will be assumed.')
            else:
                LOGGER.info('A plain folder structure will be assumed.')
            SETTINGS['pdb_local_folder'] = folder
            SETTINGS['pdb_local_divided'] = bool(divided)
            SETTINGS.save()
        else:
            current = SETTINGS.pop('pdb_local_folder')
            if current:
                LOGGER.info('PDB folder {0} is released.'.format(
                    repr(current)))
                SETTINGS.pop('pdb_local_divided')
                SETTINGS.save()
            else:
                raise IOError('{0} is not a valid path.'.format(repr(folder)))
コード例 #6
0
def listNonstdAAProps(resname):
    """Returns properties of non-standard amino acid *resname*.

    .. ipython:: python

       listNonstdAAProps('PTR')"""

    try:
        alist = list(SETTINGS.get(NONSTANDARD_KEY, NONSTANDARD)[resname])
    except KeyError:
        raise ValueError('{0} is not a non-standard residue name'.format(
            repr(resname)))
    else:
        alist.sort()
        return alist
コード例 #7
0
def delNonstdAminoacid(resname):
    """Delete non-standard amino acid *resname*.

    .. ipython:: python

       delNonstdAminoacid('PTR')
       flagDefinition('nonstdaa')

    Default set of non-standard amino acids can be restored as follows:

    .. ipython:: python

       flagDefinition(reset='nonstdaa')"""

    nonstd = SETTINGS.get(NONSTANDARD_KEY, NONSTANDARD)
    try:
        nonstd.pop(resname)
    except KeyError:
        raise ValueError('{0} is not a non-standard residue name'.format(
            repr(resname)))
    else:
        updateNonstandard(nonstd)
コード例 #8
0
def addNonstdAminoacid(resname, *properties):
    """Add non-standard amino acid *resname* with *properties* selected from:

      * {props}

    .. ipython:: python

       addNonstdAminoacid('PTR', 'acidic', 'aromatic', 'cyclic', 'large',
       'polar', 'surface')

    Default set of non-standard amino acids can be restored as follows:

    .. ipython:: python

       flagDefinition(reset='nonstdaa')"""

    resname = str(resname)
    if len(resname) > 4:
        LOGGER.warn('Residue name {0} is unusually long.'.format(
            repr(resname)))
    propset = set(properties)
    for cat, val in CATEGORIES.items():
        intersection = val.intersection(propset)
        if intersection:
            if len(intersection) > 1:
                raise ValueError('amino acid properties {0} cannot be '
                                 'present together'.format(', '.join(
                                     [repr(prp) for prp in intersection])))
            for prop in intersection:
                propset.remove(prop)
    if propset:
        raise ValueError('amino acid property {0} is not valid'.format(
            repr(propset.pop())))

    nonstd = SETTINGS.get(NONSTANDARD_KEY, NONSTANDARD)
    nonstd[resname] = set(properties)
    updateNonstandard(nonstd)
コード例 #9
0
def updateDefinitions():
    """Update definitions and set some global variables.  This function must be
    called at the end of the module."""

    global DEFINITIONS, AMINOACIDS, BACKBONE, TIMESTAMP
    DEFINITIONS = {}
    user = SETTINGS.get('flag_definitions', {})

    # nucleics
    nucleic = set()
    for key in ['nucleobase', 'nucleoside', 'nucleotide']:
        aset = set(user.get(key, DEFAULTS[key]))
        nucleic.update(aset)
        DEFINITIONS[key] = aset
    DEFINITIONS['nucleic'] = nucleic

    # heteros
    for key in [
            'water', 'lipid', 'ion', 'sugar', 'heme', 'at', 'cg', 'purine',
            'pyrimidine'
    ]:
        DEFINITIONS[key] = set(user.get(key, DEFAULTS[key]))

    DEFINITIONS['backbone'] = DEFINITIONS['bb'] = set(
        user.get(key, DEFAULTS['bb']))
    DEFINITIONS['backbonefull'] = DEFINITIONS['bbfull'] = set(
        user.get(key, DEFAULTS['bbfull']))

    # element regex
    for key in ['hydrogen', 'carbon', 'nitrogen', 'oxygen', 'sulfur']:
        DEFINITIONS[key] = recompile(user.get(key, DEFAULTS[key]))

    try:
        nonstd = SETTINGS[NONSTANDARD_KEY]

    except KeyError:
        nonstd = NONSTANDARD
        DEFINITIONS.update(CATEGORIZED)
    else:

        for cat in CATEGORIES:
            for key in CATEGORIES[cat]:
                DEFINITIONS[key] = set(DEFAULTS[key])

        DEFINITIONS['charged'] = set(DEFINITIONS['acidic'])
        DEFINITIONS['charged'].update(DEFINITIONS['basic'])

        for resi, props in nonstd.items():
            for prop in props:
                DEFINITIONS[prop].add(resi)

    DEFINITIONS['stdaa'] = DEFAULTS['stdaa']
    DEFINITIONS['nonstdaa'] = set(nonstd)
    AMINOACIDS = set(DEFINITIONS['stdaa'])
    AMINOACIDS.update(DEFINITIONS['nonstdaa'])
    DEFINITIONS['protein'] = DEFINITIONS['aminoacid'] = AMINOACIDS

    BACKBONE = DEFINITIONS['bb']

    global TIMESTAMP
    TIMESTAMP = SETTINGS.get(TIMESTAMP_KEY, 0)
コード例 #10
0
def resetDefinitions(flag):

    if flag == 'all':
        SETTINGS.pop(DEFINITIONS_KEY, None)
        SETTINGS.pop(NONSTANDARD_KEY, None)
        SETTINGS[TIMESTAMP_KEY] = int(time())
        SETTINGS.save()
        updateDefinitions()
    elif flag == 'nonstdaa':
        SETTINGS.pop(NONSTANDARD_KEY, None)
        SETTINGS[TIMESTAMP_KEY] = int(time())
        SETTINGS.save()
        updateDefinitions()
    else:
        try:
            SETTINGS.pop(DEFINITIONS_KEY, {}).pop(flag)
        except KeyError:
            pass
        else:
            SETTINGS[TIMESTAMP_KEY] = int(time())
            SETTINGS.save()
            updateDefinitions()
コード例 #11
0
def updateNonstandard(nonstd):

    SETTINGS[NONSTANDARD_KEY] = nonstd
    SETTINGS[TIMESTAMP_KEY] = int(time())
    SETTINGS.save()
    updateDefinitions()
コード例 #12
0
ファイル: pdbligands.py プロジェクト: jr-marchand/caviar
def fetchPDBLigand(cci, filename=None):
    """Fetch PDB ligand data from PDB_ for chemical component *cci*.
    *cci* may be 3-letter chemical component identifier or a valid XML
    filename.  If *filename* is given, XML file will be saved with that name.

    If you query ligand data frequently, you may configure ProDy to save XML
    files in your computer.  Set ``ligand_xml_save`` option **True**, i.e.
    ``confProDy(ligand_xml_save=True)``.  Compressed XML files will be save
    to ProDy package folder, e.g. :file:`/home/user/.prody/pdbligands`.  Each
    file is around 5Kb when compressed.

    This function is compatible with PDBx/PDBML v 4.0.

    Ligand data is returned in a dictionary.  Ligand coordinate atom data with
    *model* and *ideal* coordinate sets are also stored in this dictionary.
    Note that this dictionary will contain data that is present in the XML
    file and all Ligand Expo XML files do not contain every possible data
    field.  So, it may be better if you use :meth:`dict.get` instead of
    indexing the dictionary, e.g. to retrieve formula weight (or relative
    molar mass) of the chemical component use ``data.get('formula_weight')``
    instead of ``data['formula_weight']`` to avoid exceptions when this data
    field is not found in the XML file.  URL and/or path of the XML file are
    returned in the dictionary with keys ``url`` and ``path``, respectively.

    Following example downloads data for ligand STI (a.k.a. Gleevec and
    Imatinib) and calculates RMSD between model (X-ray structure 1IEP) and
    ideal (energy minimized) coordinate sets:

    .. ipython:: python

       from caviar.prody_parser import *
       ligand_data = fetchPDBLigand('STI')
       ligand_data['model_coordinates_db_code']
       ligand_model = ligand_data['model']
       ligand_ideal = ligand_data['ideal']
       transformation = superpose(ligand_ideal.noh, ligand_model.noh)
       calcRMSD(ligand_ideal.noh, ligand_model.noh)"""

    if not isinstance(cci, str):
        raise TypeError('cci must be a string')

    if isfile(cci):
        inp = openFile(cci)
        xml = inp.read()
        inp.close()
        url = None
        path = cci
        cci = splitext(splitext(split(cci)[1])[0])[0].upper()
    elif len(cci) > 4 or not cci.isalnum():
        raise ValueError('cci must be 3-letters long and alphanumeric or '
                         'a valid filename')
    else:
        xml = None
        cci = cci.upper()
        if SETTINGS.get('ligand_xml_save'):
            folder = join(getPackagePath(), 'pdbligands')
            if not isdir(folder):
                makePath(folder)
            xmlgz = path = join(folder, cci + '.xml.gz')
            if isfile(xmlgz):
                with openFile(xmlgz) as inp:
                    xml = inp.read()
        else:
            folder = None
            path = None

        url = ('http://files.rcsb.org/ligands/download/{0}'
               '.xml'.format(cci.upper()))
        if not xml:
            try:
                inp = openURL(url)
            except IOError:
                raise IOError(
                    'XML file for ligand {0} is not found online'.format(cci))
            else:
                xml = inp.read()
                if PY3K:
                    xml = xml.decode()
                inp.close()

            if filename:
                out = openFile(filename, mode='w', folder=folder)
                out.write(xml)
                out.close()
            if SETTINGS.get('ligand_xml_save'):
                with openFile(xmlgz, 'w') as out:
                    out.write(xml)

    import xml.etree.cElementTree as ET

    root = ET.XML(xml)
    if (root.get('{http://www.w3.org/2001/XMLSchema-instance}'
                 'schemaLocation') !=
            'http://pdbml.pdb.org/schema/pdbx-v40.xsd pdbx-v40.xsd'):
        LOGGER.warn('XML is not in PDBx/PDBML v 4.0 format, resulting '
                    'dictionary may not contain all data fields')
    ns = root.tag[:root.tag.rfind('}') + 1]
    len_ns = len(ns)
    dict_ = {'url': url, 'path': path}

    for child in list(root.find(ns + 'chem_compCategory')[0]):
        tag = child.tag[len_ns:]
        if tag.startswith('pdbx_'):
            tag = tag[5:]
        dict_[tag] = child.text
    dict_['formula_weight'] = float(dict_.get('formula_weight'))

    identifiers_and_descriptors = []
    results = root.find(ns + 'pdbx_chem_comp_identifierCategory')
    if results:
        identifiers_and_descriptors.extend(results)
    results = root.find(ns + 'pdbx_chem_comp_descriptorCategory')
    if results:
        identifiers_and_descriptors.extend(results)
    for child in identifiers_and_descriptors:
        program = child.get('program').replace(' ', '_')
        type_ = child.get('type').replace(' ', '_')
        dict_[program + '_' + type_] = child[0].text
        dict_[program + '_version'] = child.get('program_version')

    dict_['audits'] = [
        (audit.get('action_type'), audit.get('date'))
        for audit in list(root.find(ns + 'pdbx_chem_comp_auditCategory'))
    ]

    atoms = list(root.find(ns + 'chem_comp_atomCategory'))
    n_atoms = len(atoms)
    ideal_coords = np.zeros((n_atoms, 3))
    model_coords = np.zeros((n_atoms, 3))

    atomnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['name'].dtype)
    elements = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['element'].dtype)
    resnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['resname'].dtype)
    charges = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['charge'].dtype)

    resnums = np.ones(n_atoms, dtype=ATOMIC_FIELDS['charge'].dtype)

    alternate_atomnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['name'].dtype)
    leaving_atom_flags = np.zeros(n_atoms, np.bool)
    aromatic_flags = np.zeros(n_atoms, np.bool)
    stereo_configs = np.zeros(n_atoms, np.bool)
    ordinals = np.zeros(n_atoms, int)

    name2index = {}

    for i, atom in enumerate(atoms):
        data = dict([(child.tag[len_ns:], child.text) for child in list(atom)])

        name = data.get('pdbx_component_atom_id', 'X')
        name2index[name] = i
        atomnames[i] = name
        elements[i] = data.get('type_symbol', 'X')
        resnames[i] = data.get('pdbx_component_comp_id', 'UNK')
        charges[i] = float(data.get('charge', 0))

        alternate_atomnames[i] = data.get('alt_atom_id', 'X')
        leaving_atom_flags[i] = data.get('pdbx_leaving_atom_flag') == 'Y'
        aromatic_flags[i] = data.get('pdbx_atomatic_flag') == 'Y'
        stereo_configs[i] = data.get('pdbx_stereo_config') == 'Y'
        ordinals[i] = int(data.get('pdbx_ordinal', 0))

        model_coords[i, 0] = float(data.get('model_Cartn_x', 0))
        model_coords[i, 1] = float(data.get('model_Cartn_y', 0))
        model_coords[i, 2] = float(data.get('model_Cartn_z', 0))
        ideal_coords[i, 0] = float(data.get('pdbx_model_Cartn_x_ideal', 0))
        ideal_coords[i, 1] = float(data.get('pdbx_model_Cartn_y_ideal', 0))
        ideal_coords[i, 2] = float(data.get('pdbx_model_Cartn_z_ideal', 0))

    pdbid = dict_.get('model_coordinates_db_code')
    if pdbid:
        model = AtomGroup(cci + ' model ({0})'.format(pdbid))
    else:
        model = AtomGroup(cci + ' model')
    model.setCoords(model_coords)
    model.setNames(atomnames)
    model.setResnames(resnames)
    model.setResnums(resnums)
    model.setElements(elements)
    model.setCharges(charges)
    model.setFlags('leaving_atom_flags', leaving_atom_flags)
    model.setFlags('aromatic_flags', aromatic_flags)
    model.setFlags('stereo_configs', stereo_configs)
    model.setData('ordinals', ordinals)
    model.setData('alternate_atomnames', alternate_atomnames)
    dict_['model'] = model
    ideal = model.copy()
    ideal.setTitle(cci + ' ideal')
    ideal.setCoords(ideal_coords)
    dict_['ideal'] = ideal

    bonds = []
    warned = set()
    for bond in list(root.find(ns + 'chem_comp_bondCategory') or bonds):
        name_1 = bond.get('atom_id_1')
        name_2 = bond.get('atom_id_2')
        try:
            bonds.append((name2index[name_1], name2index[name_2]))
        except KeyError:
            if name_1 not in warned and name_1 not in name2index:
                warned.add(name_1)
                LOGGER.warn('{0} specified {1} in bond category is not '
                            'a valid atom name.'.format(repr(name_1), cci))
            if name_2 not in warned and name_2 not in name2index:
                warned.add(name_2)
                LOGGER.warn('{0} specified {1} in bond category is not '
                            'a valid atom name.'.format(repr(name_2), cci))
    if bonds:
        bonds = np.array(bonds, int)
        model.setBonds(bonds)
        ideal.setBonds(bonds)
    return dict_